LLVM 23.0.0git
AMDGPURegBankLegalizeRules.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
39
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
44
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(0, 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(1, 64);
63 case P2:
64 return MRI.getType(Reg) == LLT::pointer(2, 32);
65 case P3:
66 return MRI.getType(Reg) == LLT::pointer(3, 32);
67 case P4:
68 return MRI.getType(Reg) == LLT::pointer(4, 64);
69 case P5:
70 return MRI.getType(Reg) == LLT::pointer(5, 32);
71 case P8:
72 return MRI.getType(Reg) == LLT::pointer(8, 128);
73 case Ptr32:
74 return isAnyPtr(MRI.getType(Reg), 32);
75 case Ptr64:
76 return isAnyPtr(MRI.getType(Reg), 64);
77 case Ptr128:
78 return isAnyPtr(MRI.getType(Reg), 128);
79 case V2S16:
80 return MRI.getType(Reg) == LLT::fixed_vector(2, 16);
81 case V2S32:
82 return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
83 case V3S32:
84 return MRI.getType(Reg) == LLT::fixed_vector(3, 32);
85 case V4S32:
86 return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
87 case B32:
88 return MRI.getType(Reg).getSizeInBits() == 32;
89 case B64:
90 return MRI.getType(Reg).getSizeInBits() == 64;
91 case B96:
92 return MRI.getType(Reg).getSizeInBits() == 96;
93 case B128:
94 return MRI.getType(Reg).getSizeInBits() == 128;
95 case B160:
96 return MRI.getType(Reg).getSizeInBits() == 160;
97 case B256:
98 return MRI.getType(Reg).getSizeInBits() == 256;
99 case B512:
100 return MRI.getType(Reg).getSizeInBits() == 512;
101 case DivAnyTy:
102 return MUI.isDivergentAtDef(Reg);
103 case UniS1:
104 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniformAtDef(Reg);
105 case UniS16:
106 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniformAtDef(Reg);
107 case UniS32:
108 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniformAtDef(Reg);
109 case UniS64:
110 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniformAtDef(Reg);
111 case UniS128:
112 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isUniformAtDef(Reg);
113 case UniP0:
114 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniformAtDef(Reg);
115 case UniP1:
116 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniformAtDef(Reg);
117 case UniP2:
118 return MRI.getType(Reg) == LLT::pointer(2, 32) && MUI.isUniformAtDef(Reg);
119 case UniP3:
120 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniformAtDef(Reg);
121 case UniP4:
122 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniformAtDef(Reg);
123 case UniP5:
124 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniformAtDef(Reg);
125 case UniP8:
126 return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniformAtDef(Reg);
127 case UniPtr32:
128 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniformAtDef(Reg);
129 case UniPtr64:
130 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isUniformAtDef(Reg);
131 case UniPtr128:
132 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniformAtDef(Reg);
133 case UniV2S16:
134 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) &&
135 MUI.isUniformAtDef(Reg);
136 case UniV2S32:
137 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) &&
138 MUI.isUniformAtDef(Reg);
139 case UniB32:
140 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniformAtDef(Reg);
141 case UniB64:
142 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniformAtDef(Reg);
143 case UniB96:
144 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniformAtDef(Reg);
145 case UniB128:
146 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniformAtDef(Reg);
147 case UniB160:
148 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isUniformAtDef(Reg);
149 case UniB256:
150 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniformAtDef(Reg);
151 case UniB512:
152 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniformAtDef(Reg);
153 case UniBRC: {
154 if (MUI.isDivergentAtDef(Reg))
155 return false;
156 // Check if there is SGPR register class of same size as the LLT.
157 const SIRegisterInfo *TRI =
158 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
159 // There is no 16 bit SGPR register class. Extra size check is required
160 // since getSGPRClassForBitWidth returns SReg_32RegClass for Size 16.
161 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
162 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(LLTSize);
163 }
164 case DivS1:
165 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergentAtDef(Reg);
166 case DivS16:
167 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergentAtDef(Reg);
168 case DivS32:
169 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergentAtDef(Reg);
170 case DivS64:
171 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergentAtDef(Reg);
172 case DivS128:
173 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isDivergentAtDef(Reg);
174 case DivP0:
175 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergentAtDef(Reg);
176 case DivP1:
177 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergentAtDef(Reg);
178 case DivP2:
179 return MRI.getType(Reg) == LLT::pointer(2, 32) && MUI.isDivergentAtDef(Reg);
180 case DivP3:
181 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergentAtDef(Reg);
182 case DivP4:
183 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergentAtDef(Reg);
184 case DivP5:
185 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergentAtDef(Reg);
186 case DivPtr32:
187 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isDivergentAtDef(Reg);
188 case DivPtr64:
189 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isDivergentAtDef(Reg);
190 case DivPtr128:
191 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergentAtDef(Reg);
192 case DivV2S16:
193 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) &&
195 case DivV2S32:
196 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) &&
198 case DivV3S32:
199 return MRI.getType(Reg) == LLT::fixed_vector(3, 32) &&
201 case DivV4S16:
202 return MRI.getType(Reg) == LLT::fixed_vector(4, 16) &&
204 case DivV6S32:
205 return MRI.getType(Reg) == LLT::fixed_vector(6, 32) &&
207 case DivB32:
208 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergentAtDef(Reg);
209 case DivB64:
210 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergentAtDef(Reg);
211 case DivB96:
212 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergentAtDef(Reg);
213 case DivB128:
214 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergentAtDef(Reg);
215 case DivB160:
216 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isDivergentAtDef(Reg);
217 case DivB256:
218 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergentAtDef(Reg);
219 case DivB512:
220 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergentAtDef(Reg);
221 case DivBRC: {
222 if (MUI.isUniformAtDef(Reg))
223 return false;
224 // Check if there is VGPR register class of same size as the LLT.
225 const SIRegisterInfo *TRI =
226 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
227 return TRI->getSGPRClassForBitWidth(MRI.getType(Reg).getSizeInBits());
228 }
229 case BRC: {
230 // Check if there is SGPR and VGPR register class of same size as the LLT.
231 const SIRegisterInfo *TRI =
232 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
233 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
234 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(LLTSize) &&
235 TRI->getVGPRClassForBitWidth(LLTSize);
236 }
237 case _:
238 return true;
239 default:
240 llvm_unreachable("missing matchUniformityAndLLT");
241 }
242}
243
245 const MachineUniformityInfo &MUI,
246 const MachineRegisterInfo &MRI) const {
247 // Check LLT signature.
248 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
249 const MachineOperand &MO = MI.getOperand(i);
250 if (OpUniformityAndTypes[i] == _) {
251 assert((!MI.getOperand(i).isReg() ||
252 !MI.getOperand(i).getReg().isVirtual()) &&
253 "_ is for non-register and physical register operands only");
254 continue;
255 }
256
257 // Remaining IDs check registers.
258 if (!MO.isReg())
259 return false;
260
261 if (!matchUniformityAndLLT(MO.getReg(), OpUniformityAndTypes[i], MUI, MRI))
262 return false;
263 }
264
265 // More complex check.
266 if (TestFunc)
267 return TestFunc(MI);
268
269 return true;
270}
271
273
275 : FastTypes(FastTypes) {}
276
278 if (Ty == LLT::scalar(16))
279 return S16;
280 if (Ty == LLT::scalar(32))
281 return S32;
282 if (Ty == LLT::scalar(64))
283 return S64;
284 if (Ty == LLT::fixed_vector(2, 16))
285 return V2S16;
286 if (Ty == LLT::fixed_vector(2, 32))
287 return V2S32;
288 if (Ty == LLT::fixed_vector(3, 32))
289 return V3S32;
290 if (Ty == LLT::fixed_vector(4, 32))
291 return V4S32;
292 return _;
293}
294
296 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
297 isAnyPtr(Ty, 32))
298 return B32;
299 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
300 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
301 return B64;
302 if (Ty == LLT::fixed_vector(3, 32))
303 return B96;
304 if (Ty == LLT::fixed_vector(4, 32) || Ty == LLT::fixed_vector(2, 64) ||
305 Ty == LLT::fixed_vector(8, 16) || isAnyPtr(Ty, 128))
306 return B128;
307 return _;
308}
309
310const RegBankLLTMapping *
312 const MachineRegisterInfo &MRI,
313 const MachineUniformityInfo &MUI) const {
314 // Search in "Fast Rules".
315 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
316 // slot that could "match fast Predicate". If not, InvalidMapping is
317 // returned which results in failure, does not search "Slow Rules".
318 if (FastTypes != NoFastRules) {
319 Register Reg = MI.getOperand(0).getReg();
320 int Slot;
321 if (FastTypes == StandardB)
322 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));
323 else
324 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));
325
326 if (Slot != -1)
327 return MUI.isUniformAtDef(Reg) ? &Uni[Slot] : &Div[Slot];
328 }
329
330 // Slow search for more complex rules.
331 for (const RegBankLegalizeRule &Rule : Rules) {
332 if (Rule.Predicate.match(MI, MUI, MRI))
333 return &Rule.OperandMapping;
334 }
335
336 return nullptr;
337}
338
340 Rules.push_back(Rule);
341}
342
344 RegBankLLTMapping RuleApplyIDs) {
345 int Slot = getFastPredicateSlot(Ty);
346 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
347 Div[Slot] = std::move(RuleApplyIDs);
348}
349
351 RegBankLLTMapping RuleApplyIDs) {
352 int Slot = getFastPredicateSlot(Ty);
353 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
354 Uni[Slot] = std::move(RuleApplyIDs);
355}
356
357int SetOfRulesForOpcode::getFastPredicateSlot(
359 switch (FastTypes) {
360 case Standard: {
361 switch (Ty) {
362 case S32:
363 return 0;
364 case S16:
365 return 1;
366 case S64:
367 return 2;
368 case V2S16:
369 return 3;
370 default:
371 return -1;
372 }
373 }
374 case StandardB: {
375 switch (Ty) {
376 case B32:
377 return 0;
378 case B64:
379 return 1;
380 case B96:
381 return 2;
382 case B128:
383 return 3;
384 default:
385 return -1;
386 }
387 }
388 case Vector: {
389 switch (Ty) {
390 case S32:
391 return 0;
392 case V2S32:
393 return 1;
394 case V3S32:
395 return 2;
396 case V4S32:
397 return 3;
398 default:
399 return -1;
400 }
401 }
402 default:
403 return -1;
404 }
405}
406
407RegBankLegalizeRules::RuleSetInitializer
408RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
409 FastRulesTypes FastTypes) {
410 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
411}
412
413RegBankLegalizeRules::RuleSetInitializer
414RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
415 FastRulesTypes FastTypes) {
416 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
417}
418
421 unsigned Opc = MI.getOpcode();
422 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
423 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
424 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
425 unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
426 auto IRAIt = IRulesAlias.find(IntrID);
427 if (IRAIt == IRulesAlias.end())
428 return nullptr;
429 return &IRules.at(IRAIt->second);
430 }
431
432 auto GRAIt = GRulesAlias.find(Opc);
433 if (GRAIt == GRulesAlias.end())
434 return nullptr;
435 return &GRules.at(GRAIt->second);
436}
437
438// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
439class Predicate {
440private:
441 struct Elt {
442 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
443 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
444 // Sequences of && and || will be represented by jumps, for example:
445 // (A && B && ... X) or (A && B && ... X) || Y
446 // A == true jump to B
447 // A == false jump to end or Y, result is A(false) or Y
448 // (A || B || ... X) or (A || B || ... X) && Y
449 // A == true jump to end or Y, result is A(true) or Y
450 // A == false jump to B
451 // Notice that when negating expression, we simply flip Neg on each Pred
452 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
453 std::function<bool(const MachineInstr &)> Pred;
454 bool Neg; // Neg of Pred is calculated before jump
455 unsigned TJumpOffset;
456 unsigned FJumpOffset;
457 };
458
459 SmallVector<Elt, 8> Expression;
460
461 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(Expr); };
462
463public:
464 Predicate(std::function<bool(const MachineInstr &)> Pred) {
465 Expression.push_back({Pred, false, 1, 1});
466 };
467
468 bool operator()(const MachineInstr &MI) const {
469 unsigned Idx = 0;
470 unsigned ResultIdx = Expression.size();
471 bool Result;
472 do {
473 Result = Expression[Idx].Pred(MI);
474 Result = Expression[Idx].Neg ? !Result : Result;
475 if (Result) {
476 Idx += Expression[Idx].TJumpOffset;
477 } else {
478 Idx += Expression[Idx].FJumpOffset;
479 }
480 } while ((Idx != ResultIdx));
481
482 return Result;
483 };
484
485 Predicate operator!() const {
486 SmallVector<Elt, 8> NegExpression;
487 for (const Elt &ExprElt : Expression) {
488 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
489 ExprElt.TJumpOffset});
490 }
491 return Predicate(std::move(NegExpression));
492 };
493
494 Predicate operator&&(const Predicate &RHS) const {
495 SmallVector<Elt, 8> AndExpression = Expression;
496
497 unsigned RHSSize = RHS.Expression.size();
498 unsigned ResultIdx = Expression.size();
499 for (unsigned i = 0; i < ResultIdx; ++i) {
500 // LHS results in false, whole expression results in false.
501 if (i + AndExpression[i].FJumpOffset == ResultIdx)
502 AndExpression[i].FJumpOffset += RHSSize;
503 }
504
505 AndExpression.append(RHS.Expression);
506
507 return Predicate(std::move(AndExpression));
508 }
509
510 Predicate operator||(const Predicate &RHS) const {
511 SmallVector<Elt, 8> OrExpression = Expression;
512
513 unsigned RHSSize = RHS.Expression.size();
514 unsigned ResultIdx = Expression.size();
515 for (unsigned i = 0; i < ResultIdx; ++i) {
516 // LHS results in true, whole expression results in true.
517 if (i + OrExpression[i].TJumpOffset == ResultIdx)
518 OrExpression[i].TJumpOffset += RHSSize;
519 }
520
521 OrExpression.append(RHS.Expression);
522
523 return Predicate(std::move(OrExpression));
524 }
525};
526
527// Initialize rules
530 : ST(&_ST), MRI(&_MRI) {
531
532 addRulesForGOpcs({G_ADD, G_SUB}, Standard)
533 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
534 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
535 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
536 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
538 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
539 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}})
540 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}});
541
542 addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
543 .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
544 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
545
546 addRulesForGOpcs({G_UADDE, G_USUBE, G_SADDE, G_SSUBE}, Standard)
548 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
549
550 addRulesForGOpcs({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}, Standard)
551 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
552 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
553 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
554 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
556 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
557
558 bool HasVecMulU64 = ST->hasVMulU64Inst();
559 addRulesForGOpcs({G_MUL}, Standard)
560 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
561 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
562 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
563 .Uni(S64, {{SgprB64}, {SgprB64, SgprB64}})
565 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
566 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
567 .Div(S64, {{VgprB64}, {VgprB64, VgprB64}}, HasVecMulU64)
568 .Div(S64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32Mul}, !HasVecMulU64);
569
570 bool hasMulHi = ST->hasScalarMulHiInsts();
571 addRulesForGOpcs({G_UMULH, G_SMULH}, Standard)
572 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
573 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasMulHi)
574 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasMulHi);
575
576 addRulesForGOpcs({G_AMDGPU_MAD_U64_U32}, Standard)
577 .Div(S64, {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}})
579
580 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
581 addRulesForGOpcs({G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32}, Standard)
582 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}, UniMul64}, HasScalarSMulU64)
583 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}, DivSMulToMAD});
584
585 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
587 .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
588 .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
589 .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
590 .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
591 .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
592 .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
593 .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
594
595 addRulesForGOpcs({G_SHL}, Standard)
596 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
597 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
599 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
600 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
601 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
602 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
603 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
604
605 addRulesForGOpcs({G_LSHR}, Standard)
606 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
607 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
609 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
610 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
611 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
612 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
613 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
614
615 addRulesForGOpcs({G_ASHR}, Standard)
616 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
617 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
619 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
620 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
621 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
622 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
623 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
624
625 addRulesForGOpcs({G_FSHR}, Standard)
626 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
627 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
628
629 addRulesForGOpcs({G_BSWAP}, Standard)
630 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
631 .Div(S16, {{Vgpr16}, {Vgpr16}})
632 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
633 .Div(S32, {{Vgpr32}, {Vgpr32}})
634 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}})
635 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}});
636
637 addRulesForGOpcs({G_AMDGPU_CVT_F32_UBYTE0, G_AMDGPU_CVT_F32_UBYTE1,
638 G_AMDGPU_CVT_F32_UBYTE2, G_AMDGPU_CVT_F32_UBYTE3,
639 G_AMDGPU_RCP_IFLAG},
640 Standard)
641 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
642 .Div(S32, {{Vgpr32}, {Vgpr32}});
643
644 addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
645
646 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
647 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
648 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
649 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
650 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
651
652 addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
653 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
654 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
655 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
656 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
658 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
659
660 addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
661 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
662 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
663 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
664 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
666 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
667
668 addRulesForGOpcs({G_IMPLICIT_DEF})
669 .Any({{UniS1}, {{Sgpr32Trunc}, {}}})
670 .Any({{UniS16}, {{Sgpr16}, {}}})
671 .Any({{UniBRC}, {{SgprBRC}, {}}});
672
673 addRulesForGOpcs({G_CONSTANT}, Standard)
674 .Any({{UniS1, _}, {{Sgpr32Trunc}, {}, UniCstExt}})
675 .Uni(S16, {{Sgpr16}, {}})
676 .Uni(S32, {{Sgpr32}, {}})
677 .Uni(S64, {{Sgpr64}, {}})
678 .Any({{UniPtr32, _}, {{SgprPtr32}, {}}})
679 .Any({{UniPtr64, _}, {{SgprPtr64}, {}}});
680
681 addRulesForGOpcs({G_FCONSTANT}, Standard)
682 .Uni(S16, {{Sgpr16}, {}})
683 .Uni(S32, {{Sgpr32}, {}})
684 .Uni(S64, {{Sgpr64}, {}});
685
686 addRulesForGOpcs({G_FREEZE})
687 .Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt}}})
688 .Any({{DivS1}, {{Vcc}, {Vcc}}})
689 .Any({{UniS16}, {{Sgpr16}, {Sgpr16}}})
690 .Any({{UniBRC}, {{SgprBRC}, {SgprBRC}}})
691 .Any({{DivBRC}, {{VgprBRC}, {VgprBRC}}});
692
693 addRulesForGOpcs({G_BITCAST})
694 .Any({{UniBRC}, {{SgprBRC}, {SgprBRC}}})
695 .Any({{DivBRC}, {{VgprBRC}, {VgprBRC}}});
696
697 addRulesForGOpcs({G_UNMERGE_VALUES})
698 .Any({{UniS16}, {{}, {}, UnmergeToShiftTrunc}})
699 .Any({{UniBRC}, {{}, {}, VerifyAllSgpr}})
700 .Any({{DivBRC}, {{}, {}, ApplyAllVgpr}});
701
702 addRulesForGOpcs({G_BUILD_VECTOR})
703 .Any({{UniBRC, S16}, {{}, {}, VerifyAllSgpr}})
704 .Any({{UniBRC, BRC}, {{}, {}, VerifyAllSgpr}})
705 .Any({{DivBRC, S16}, {{}, {}, ApplyAllVgpr}})
706 .Any({{DivBRC, BRC}, {{}, {}, ApplyAllVgpr}});
707
708 addRulesForGOpcs({G_MERGE_VALUES, G_CONCAT_VECTORS})
709 .Any({{UniBRC, BRC}, {{}, {}, VerifyAllSgpr}})
710 .Any({{DivBRC, BRC}, {{}, {}, ApplyAllVgpr}});
711
712 addRulesForGOpcs({G_PHI})
713 .Any({{UniS1}, {{}, {}, AextToS32InIncomingBlockGPHI}})
714 .Any({{UniS16}, {{}, {}, VerifyAllSgprGPHI}})
715 .Any({{UniBRC}, {{}, {}, VerifyAllSgprGPHI}})
716 .Any({{DivBRC}, {{}, {}, VerifyAllSgprOrVgprGPHI}});
717
718 addRulesForGOpcs({G_EXTRACT_VECTOR_ELT})
719 .Any({{UniB32, UniBRC, UniS32}, {{SgprB32}, {SgprBRC, Sgpr32}}})
720 .Any({{DivB32, DivBRC, UniS32}, {{VgprB32}, {VgprBRC, Sgpr32}}})
721 .Any({{DivB32, BRC, DivS32},
723 .Any({{UniB64, UniBRC, UniS32}, {{SgprB64}, {SgprBRC, Sgpr32}}})
724 .Any({{DivB64, DivBRC, UniS32},
726 .Any({{DivB64, BRC, DivS32},
728
729 addRulesForGOpcs({G_INSERT_VECTOR_ELT})
731 {{SgprBRC}, {SgprBRC, SgprB32, Sgpr32}}})
732 .Any(
733 {{DivBRC, BRC, B32, UniS32}, {{VgprBRC}, {VgprBRC, VgprB32, Sgpr32}}})
734 .Any({{DivBRC, BRC, B32, DivS32},
738 .Any({{DivBRC, BRC, B64, UniS32},
740 .Any({{DivBRC, BRC, B64, DivS32},
742
743 // INTERSECT_RAY {Div}, {{VgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
744 // INTERSECT_RAY {Uni}, {{UniInVgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
745 addRulesForGOpcs({G_AMDGPU_BVH_INTERSECT_RAY, G_AMDGPU_BVH_DUAL_INTERSECT_RAY,
746 G_AMDGPU_BVH8_INTERSECT_RAY})
747 .Any({{}, {{}, {}, ApplyBVH_INTERSECT_RAY}});
748
749 // LOAD {Div}, {{VgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
750 // LOAD {Uni}, {{UniInVgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
751 // LOAD_NORET {}, {{}, {Imm, VgprSrc, ..., Sgpr_WF_RsrcIdx}}
752 // STORE {}, {{}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
753 addRulesForGOpcs({G_AMDGPU_INTRIN_IMAGE_LOAD, G_AMDGPU_INTRIN_IMAGE_LOAD_D16,
754 G_AMDGPU_INTRIN_IMAGE_LOAD_NORET,
755 G_AMDGPU_INTRIN_IMAGE_STORE,
756 G_AMDGPU_INTRIN_IMAGE_STORE_D16})
757 .Any({{}, {{}, {}, ApplyINTRIN_IMAGE}});
758
759 Predicate isSignedICmp([](const MachineInstr &MI) -> bool {
760 auto Pred =
761 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
762 return CmpInst::isSigned(Pred);
763 });
764
765 Predicate isEqualityICmp([](const MachineInstr &MI) -> bool {
766 auto Pred =
767 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
768 return ICmpInst::isEquality(Pred);
769 });
770
771 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
772 // clang-format off
773 addRulesForGOpcs({G_ICMP})
774 .Any({{{UniS1, _, S16}, isEqualityICmp}, {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
775 .Any({{{UniS1, _, S16}, !isEqualityICmp && isSignedICmp}, {{Sgpr32Trunc}, {None, Sgpr32SExt, Sgpr32SExt}}})
776 .Any({{{UniS1, _, S16}, !isEqualityICmp && !isSignedICmp}, {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
777 .Any({{{DivS1, _, S16}}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
778 .Any({{{UniS1, _, S32}}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
779 .Any({{{DivS1, _, S32}}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
780 .Any({{{UniS1, _, S64}, isEqualityICmp}, {{Sgpr32Trunc}, {None, Sgpr64, Sgpr64}}}, HasScalarCompareEq64)
781 .Any({{{UniS1, _, S64}, isEqualityICmp}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}}, !HasScalarCompareEq64)
782 .Any({{{UniS1, _, S64}, !isEqualityICmp}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
783 .Any({{{DivS1, _, S64}}, {{Vcc}, {None, Vgpr64, Vgpr64}}})
784 .Any({{{UniS1, _, Ptr32}}, {{Sgpr32Trunc}, {None, SgprPtr32, SgprPtr32}}})
785 .Any({{{DivS1, _, Ptr32}}, {{Vcc}, {None, VgprPtr32, VgprPtr32}}})
786 .Any({{{UniS1, _, Ptr64}, isEqualityICmp}, {{Sgpr32Trunc}, {None, SgprPtr64, SgprPtr64}}}, HasScalarCompareEq64)
787 .Any({{{UniS1, _, Ptr64}, isEqualityICmp}, {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}}, !HasScalarCompareEq64)
788 .Any({{{UniS1, _, Ptr64}, !isEqualityICmp}, {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}})
789 .Any({{{DivS1, _, Ptr64}}, {{Vcc}, {None, VgprPtr64, VgprPtr64}}});
790 // clang-format on
791
792 addRulesForGOpcs({G_BRCOND})
793 .Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}})
794 .Any({{DivS1}, {{}, {Vcc}}});
795
796 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
797
798 addRulesForGOpcs({G_SELECT}, StandardB)
799 .Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
801 .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
805
806 addRulesForGOpcs({G_ANYEXT})
807 .Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away
808 .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
809 .Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away
810 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
811 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
812 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
813 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
814 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
815 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
816 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
817
818 bool Has16bitCmp = ST->has16BitInsts();
819
820 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
821 // It is up to user to deal with truncated bits.
822 // S1, S16, S32 and S64 results are handled with specific rules. Remaining
823 // (result, source) pairs with valid register classes are covered by the
824 // generic UniBRC/DivBRC wildcard rules.
825 addRulesForGOpcs({G_TRUNC})
826 .Any({{UniS1, UniS16}, {{None}, {None}}}) // should be combined away
827 .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
828 .Any({{UniS1, UniS64}, {{None}, {None}}}) // should be combined away
829 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
830 .Any({{UniBRC, UniBRC}, {{SgprBRC}, {SgprBRC}}})
831 .Any({{DivBRC, DivBRC}, {{VgprBRC}, {VgprBRC}}})
832 .Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
833 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
834 // This is non-trivial. VgprToVccCopy is done using compare instruction.
835 .Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}}, Has16bitCmp)
837 !Has16bitCmp)
838 .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
839 .Any({{DivS1, DivS64}, {{Vcc}, {Vgpr64}, VgprToVccCopy}});
840
841 addRulesForGOpcs({G_ZEXT})
845 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
846 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
847 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
848 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
849 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
850 // not extending S16 to S32 is questionable.
851 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
852 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
853 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
854 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
855
856 addRulesForGOpcs({G_SEXT})
860 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
861 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
862 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
863 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
864 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
865 // not extending S16 to S32 is questionable.
866 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
867 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
868 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
869 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
870
871 addRulesForGOpcs({G_SEXT_INREG})
872 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
873 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
874 .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
876
877 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)
878 .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}})
879 .Div(S32, {{Vgpr32}, {Vgpr32, Imm}})
880 .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
881 .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
882
883 addRulesForGOpcs({G_ASSERT_ALIGN}, Standard)
884 .Uni(S32, {{Sgpr32}, {Sgpr32}})
885 .Div(S32, {{Vgpr32}, {Vgpr32}})
886 .Uni(S64, {{Sgpr64}, {Sgpr64}})
887 .Div(S64, {{Vgpr64}, {Vgpr64}})
888 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32}}})
889 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32}}})
890 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64}}})
891 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64}}});
892
893 // Atomic read-modify-write operations: result and value are always VGPR,
894 // pointer varies by address space.
895 addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
896 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
897 G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN,
898 G_ATOMICRMW_UMAX, G_ATOMICRMW_UINC_WRAP,
899 G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX})
900 .Any({{DivS32, P0, S32}, {{Vgpr32}, {VgprP0, Vgpr32}}})
901 .Any({{DivS64, P0, S64}, {{Vgpr64}, {VgprP0, Vgpr64}}})
902 .Any({{DivS32, P1, S32}, {{Vgpr32}, {VgprP1, Vgpr32}}})
903 .Any({{DivS64, P1, S64}, {{Vgpr64}, {VgprP1, Vgpr64}}})
904 .Any({{DivS32, P3, S32}, {{Vgpr32}, {VgprP3, Vgpr32}}})
905 .Any({{DivS64, P3, S64}, {{Vgpr64}, {VgprP3, Vgpr64}}});
906
907 addRulesForGOpcs({G_ATOMICRMW_USUB_SAT, G_ATOMICRMW_USUB_COND})
908 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0, Vgpr32}}})
909 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1, Vgpr32}}})
910 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3, Vgpr32}}});
911
912 bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
913 bool HasAtomicBufferGlobalPkAddF16Insts =
914 ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
915 ST->hasAtomicBufferGlobalPkAddF16Insts();
916 bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
917 addRulesForGOpcs({G_ATOMICRMW_FADD})
918 .Any({{DivS32, P0, S32}, {{Vgpr32}, {VgprP0, Vgpr32}}})
919 .Any({{DivS64, P0, S64}, {{Vgpr64}, {VgprP0, Vgpr64}}})
920 .Any({{DivS32, P1, S32}, {{Vgpr32}, {VgprP1, Vgpr32}}})
921 .Any({{DivS64, P1, S64}, {{Vgpr64}, {VgprP1, Vgpr64}}})
922 .Any({{DivS32, P3, S32}, {{Vgpr32}, {VgprP3, Vgpr32}}})
923 .Any({{DivS64, P3, S64}, {{Vgpr64}, {VgprP3, Vgpr64}}})
924 .Any({{DivV2S16, P0, V2S16}, {{VgprV2S16}, {VgprP0, VgprV2S16}}},
925 HasAtomicFlatPkAdd16Insts)
926 .Any({{DivV2S16, P1, V2S16}, {{VgprV2S16}, {VgprP1, VgprV2S16}}},
927 HasAtomicBufferGlobalPkAddF16Insts)
928 .Any({{DivV2S16, P3, V2S16}, {{VgprV2S16}, {VgprP3, VgprV2S16}}},
929 HasAtomicDsPkAdd16Insts);
930
931 addRulesForGOpcs({G_ATOMIC_CMPXCHG})
932 .Any({{DivS32, P2}, {{Vgpr32}, {VgprP2, Vgpr32, Vgpr32}}})
933 .Any({{DivS64, P2}, {{Vgpr64}, {VgprP2, Vgpr64, Vgpr64}}})
934 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3, Vgpr32, Vgpr32}}})
935 .Any({{DivS64, P3}, {{Vgpr64}, {VgprP3, Vgpr64, Vgpr64}}});
936
937 addRulesForGOpcs({G_AMDGPU_ATOMIC_CMPXCHG})
938 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0, VgprV2S32}}})
939 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1, VgprV2S32}}})
940 .Any({{DivS64, P0}, {{Vgpr64}, {VgprP0, VgprV2S64}}})
941 .Any({{DivS64, P1}, {{Vgpr64}, {VgprP1, VgprV2S64}}});
942
943 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_CMPSWAP}, Standard)
944 .Div(S32, {{Vgpr32},
946 .Div(S64, {{Vgpr64},
948
949 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_ADD, G_AMDGPU_BUFFER_ATOMIC_AND,
950 G_AMDGPU_BUFFER_ATOMIC_DEC, G_AMDGPU_BUFFER_ATOMIC_FMAX,
951 G_AMDGPU_BUFFER_ATOMIC_FMIN, G_AMDGPU_BUFFER_ATOMIC_INC,
952 G_AMDGPU_BUFFER_ATOMIC_OR, G_AMDGPU_BUFFER_ATOMIC_SMAX,
953 G_AMDGPU_BUFFER_ATOMIC_SMIN, G_AMDGPU_BUFFER_ATOMIC_SUB,
954 G_AMDGPU_BUFFER_ATOMIC_SWAP, G_AMDGPU_BUFFER_ATOMIC_UMAX,
955 G_AMDGPU_BUFFER_ATOMIC_UMIN, G_AMDGPU_BUFFER_ATOMIC_XOR},
956 Standard)
959
960 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
961 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
962 bool usesTrue16 = ST->useRealTrue16Insts();
963
964 Predicate isAlign16([](const MachineInstr &MI) -> bool {
965 return (*MI.memoperands_begin())->getAlign() >= Align(16);
966 });
967
968 Predicate isAlign4([](const MachineInstr &MI) -> bool {
969 return (*MI.memoperands_begin())->getAlign() >= Align(4);
970 });
971
972 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
973 return (*MI.memoperands_begin())->isAtomic();
974 });
975
976 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
977 return AMDGPU::isUniformMMO(*MI.memoperands_begin());
978 });
979
980 Predicate isConst([](const MachineInstr &MI) -> bool {
981 // Address space in MMO be different then address space on pointer.
982 const MachineMemOperand *MMO = *MI.memoperands_begin();
983 const unsigned AS = MMO->getAddrSpace();
984 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
986 });
987
988 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
989 return (*MI.memoperands_begin())->isVolatile();
990 });
991
992 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
993 return (*MI.memoperands_begin())->isInvariant();
994 });
995
996 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
997 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
998 });
999
1000 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
1001 const MachineMemOperand *MMO = *MI.memoperands_begin();
1002 return MMO->getAlign() >= Align(MMO->getSize().getValue());
1003 });
1004
1005 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
1006 const MachineMemOperand *MMO = *MI.memoperands_begin();
1007 const unsigned MemSize = 8 * MMO->getSize().getValue();
1008 return MemSize == 16 || MemSize == 8;
1009 });
1010
1011 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
1012 const MachineMemOperand *MMO = *MI.memoperands_begin();
1013 return 8 * MMO->getSize().getValue() == 32;
1014 });
1015
1016 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
1017 (isConst || isInvMMO || isNoClobberMMO);
1018
1019 // clang-format off
1020 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
1021 addRulesForGOpcs({G_LOAD})
1022 // flat, addrspace(0), never uniform - flat_load
1023 .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
1024 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1025 .Any({{DivB64, P0}, {{VgprB64}, {VgprP0}}})
1026 .Any({{DivB96, P0}, {{VgprB96}, {VgprP0}}})
1027 .Any({{DivB128, P0}, {{VgprB128}, {VgprP0}}})
1028
1029 // global, addrspace(1)
1030 // divergent - global_load
1031 .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
1032 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
1033 .Any({{DivB64, P1}, {{VgprB64}, {VgprP1}}})
1034 .Any({{DivB96, P1}, {{VgprB96}, {VgprP1}}})
1035 .Any({{DivB128, P1}, {{VgprB128}, {VgprP1}}})
1036 .Any({{DivB256, P1}, {{VgprB256}, {VgprP1}, SplitLoad}})
1037 .Any({{DivB512, P1}, {{VgprB512}, {VgprP1}, SplitLoad}})
1038
1039 // uniform - s_load
1040 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
1041 .Any({{{UniS16, P1}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
1042 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1043 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
1044 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
1045 .Any({{{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}}}) //32-bit load
1046 .Any({{{UniB64, P1}, isAlign4 && isUL}, {{SgprB64}, {SgprP1}}})
1047 .Any({{{UniB96, P1}, isAlign16 && isUL}, {{SgprB96}, {SgprP1}, WidenLoad}}, !hasSMRDx3)
1048 .Any({{{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP1}, SplitLoad}}, !hasSMRDx3)
1049 .Any({{{UniB96, P1}, isAlign4 && isUL}, {{SgprB96}, {SgprP1}}}, hasSMRDx3)
1050 .Any({{{UniB128, P1}, isAlign4 && isUL}, {{SgprB128}, {SgprP1}}})
1051 .Any({{{UniB256, P1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
1052 .Any({{{UniB512, P1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
1053
1054 // Uniform via global or buffer load, for example volatile or non-aligned
1055 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
1056 // selected as global_load, use SgprP1 for pointer instead to match
1057 // patterns without flat-for-global, default for GFX7 and older.
1058 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
1059 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
1060 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
1061 .Any({{{UniS16, P1}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && !hasSMRDSmall) // s16 load
1062 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1063 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1064 .Any({{{UniB64, P1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
1065 .Any({{{UniB96, P1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
1066 .Any({{{UniB128, P1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
1067 .Any({{{UniB256, P1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP1}, SplitLoad}})
1068 .Any({{{UniB512, P1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP1}, SplitLoad}})
1069
1070 // local, addrspace(3) - ds_load
1071 .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
1072 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1073 .Any({{DivB64, P3}, {{VgprB64}, {VgprP3}}})
1074 .Any({{DivB96, P3}, {{VgprB96}, {VgprP3}}})
1075 .Any({{DivB128, P3}, {{VgprB128}, {VgprP3}}})
1076
1077 .Any({{UniS16, P3}, {{UniInVgprS16}, {SgprP3}}}, usesTrue16) // 16-bit load
1078 .Any({{UniB32, P3}, {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1079 .Any({{UniB64, P3}, {{UniInVgprB64}, {VgprP3}}})
1080 .Any({{UniB96, P3}, {{UniInVgprB96}, {VgprP3}}})
1081 .Any({{UniB128, P3}, {{UniInVgprB128}, {VgprP3}}})
1082
1083 // constant, addrspace(4)
1084 // divergent - global_load
1085 .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
1086 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
1087 .Any({{DivB64, P4}, {{VgprB64}, {VgprP4}}})
1088 .Any({{DivB96, P4}, {{VgprB96}, {VgprP4}}})
1089 .Any({{DivB128, P4}, {{VgprB128}, {VgprP4}}})
1090 .Any({{DivB256, P4}, {{VgprB256}, {VgprP4}, SplitLoad}})
1091 .Any({{DivB512, P4}, {{VgprB512}, {VgprP4}, SplitLoad}})
1092
1093 // uniform - s_load
1094 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
1095 .Any({{{UniS16, P4}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
1096 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1097 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
1098 .Any({{{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) //32-bit load
1099 .Any({{{UniB64, P4}, isAlign4 && isUL}, {{SgprB64}, {SgprP4}}})
1100 .Any({{{UniB96, P4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasSMRDx3)
1101 .Any({{{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasSMRDx3)
1102 .Any({{{UniB96, P4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasSMRDx3)
1103 .Any({{{UniB128, P4}, isAlign4 && isUL}, {{SgprB128}, {SgprP4}}})
1104 .Any({{{UniB256, P4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}})
1105 .Any({{{UniB512, P4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}})
1106
1107 // uniform in vgpr - global_load or buffer_load
1108 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
1109 .Any({{{UniS16, P4}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && !hasSMRDSmall) // s16 load
1110 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1111 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1112 .Any({{{UniB64, P4}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP4}}})
1113 .Any({{{UniB96, P4}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP4}}})
1114 .Any({{{UniB128, P4}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP4}}})
1115 .Any({{{UniB256, P4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP4}, SplitLoad}})
1116 .Any({{{UniB512, P4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP4}, SplitLoad}})
1117
1118 // private, addrspace(5), never uniform - scratch_load
1119 .Any({{DivS16, P5}, {{Vgpr16}, {VgprP5}}}, usesTrue16)
1120 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1121 .Any({{DivB64, P5}, {{VgprB64}, {VgprP5}}})
1122 .Any({{DivB96, P5}, {{VgprB96}, {VgprP5}}})
1123 .Any({{DivB128, P5}, {{VgprB128}, {VgprP5}}})
1124
1125 .Any({{DivS32, Ptr128}, {{Vgpr32}, {VgprPtr128}}});
1126
1127
1128 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
1129 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0}}})
1130
1131 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1}}})
1132 .Any({{{UniS32, P1}, isAlign4 && isUL}, {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)
1133 .Any({{{UniS32, P1}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP1}}}, hasSMRDSmall)
1134 .Any({{{UniS32, P1}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP1}}}, !hasSMRDSmall)
1135 .Any({{{UniS32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP1}}}, hasSMRDSmall)
1136
1137 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3}}})
1138 .Any({{UniS32, P3}, {{UniInVgprS32}, {VgprP3}}})
1139
1140 .Any({{DivS32, P4}, {{Vgpr32}, {VgprP4}}})
1141 .Any({{{UniS32, P4}, isAlign4 && isUL}, {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)
1142 .Any({{{UniS32, P4}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP4}}}, hasSMRDSmall)
1143 .Any({{{UniS32, P4}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP4}}}, !hasSMRDSmall)
1144 .Any({{{UniS32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP4}}}, hasSMRDSmall)
1145
1146 .Any({{DivS32, P5}, {{Vgpr32}, {VgprP5}}});
1147
1148 addRulesForGOpcs({G_STORE})
1149 // addrspace(0)
1150 .Any({{S16, P0}, {{}, {Vgpr16, VgprP0}}}, usesTrue16) // 16-bit store
1151 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
1152 .Any({{B64, P0}, {{}, {VgprB64, VgprP0}}})
1153 .Any({{B96, P0}, {{}, {VgprB96, VgprP0}}})
1154 .Any({{B128, P0}, {{}, {VgprB128, VgprP0}}})
1155
1156 // addrspace(1), there are no stores to addrspace(4)
1157 // For targets:
1158 // - with "+flat-for-global" - global_store
1159 // - without(-flat-for-global) - buffer_store addr64
1160 .Any({{S16, DivP1}, {{}, {Vgpr16, VgprP1}}}, usesTrue16) // 16-bit store
1161 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1162 .Any({{B64, DivP1}, {{}, {VgprB64, VgprP1}}})
1163 .Any({{B96, DivP1}, {{}, {VgprB96, VgprP1}}})
1164 .Any({{B128, DivP1}, {{}, {VgprB128, VgprP1}}})
1165
1166 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
1167 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
1168 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
1169 .Any({{S16, UniP1}, {{}, {Vgpr16, SgprP1}}}, usesTrue16) // 16-bit store
1170 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1171 .Any({{B64, UniP1}, {{}, {VgprB64, SgprP1}}})
1172 .Any({{B96, UniP1}, {{}, {VgprB96, SgprP1}}})
1173 .Any({{B128, UniP1}, {{}, {VgprB128, SgprP1}}})
1174
1175 // addrspace(3) and addrspace(5)
1176 .Any({{S16, Ptr32}, {{}, {Vgpr16, VgprPtr32}}}, usesTrue16) // 16-bit store
1177 .Any({{B32, Ptr32}, {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
1178 .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
1179 .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
1180 .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
1181
1182 // clang-format on
1183
1184 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1185 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1186 StandardB)
1195
1196 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1197 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1198 StandardB)
1201
1202 addRulesForGOpcs(
1203 {G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, G_AMDGPU_BUFFER_LOAD_USHORT_TFE},
1204 StandardB)
1207
1208 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_TFE, G_AMDGPU_BUFFER_LOAD_FORMAT_TFE},
1209 StandardB)
1217 .Any({{UniB160},
1219
1220 addRulesForGOpcs(
1221 {G_AMDGPU_BUFFER_LOAD_FORMAT_D16, G_AMDGPU_TBUFFER_LOAD_FORMAT_D16},
1222 StandardB)
1229
1230 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
1231 G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
1232 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1233 G_AMDGPU_TBUFFER_STORE_FORMAT,
1234 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1235 .Any({{B32}, {{}, {VgprB32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1236 .Any({{B64}, {{}, {VgprB64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1237 .Any({{B96}, {{}, {VgprB96, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1238 .Any({{B128}, {{}, {VgprB128, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1239
1240 // Buffer atomics: resource descriptor + scalar offset are SGPR, data and
1241 // address components are VGPR.
1242 //
1243 // Operand order (SIInstructions.td BufferAtomicGenericInstruction):
1244 // dst = op vdata, rsrc, vindex, voffset, soffset, offset_imm, cachepolicy,
1245 // idxen_imm
1246 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_FADD})
1247 .Any({{S32, S32, V4S32, S32, S32, S32},
1249 .Any({{S64, S64, V4S32, S32, S32, S32},
1251 .Any({{V2S16, V2S16, V4S32, S32, S32, S32},
1252 {{VgprV2S16},
1254
1255 addRulesForGOpcs({G_PTR_ADD})
1256 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
1257 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
1258 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
1259 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
1260
1261 addRulesForGOpcs({G_INTTOPTR})
1262 .Any({{UniPtr32}, {{SgprPtr32}, {Sgpr32}}})
1263 .Any({{DivPtr32}, {{VgprPtr32}, {Vgpr32}}})
1264 .Any({{UniPtr64}, {{SgprPtr64}, {Sgpr64}}})
1265 .Any({{DivPtr64}, {{VgprPtr64}, {Vgpr64}}})
1266 .Any({{UniPtr128}, {{SgprPtr128}, {Sgpr128}}})
1267 .Any({{DivPtr128}, {{VgprPtr128}, {Vgpr128}}});
1268
1269 addRulesForGOpcs({G_PTRTOINT})
1270 .Any({{UniS32}, {{Sgpr32}, {SgprPtr32}}})
1271 .Any({{DivS32}, {{Vgpr32}, {VgprPtr32}}})
1272 .Any({{UniS64}, {{Sgpr64}, {SgprPtr64}}})
1273 .Any({{DivS64}, {{Vgpr64}, {VgprPtr64}}})
1274 .Any({{UniS128}, {{Sgpr128}, {SgprPtr128}}})
1275 .Any({{DivS128}, {{Vgpr128}, {VgprPtr128}}});
1276
1277 // FIXME: Update llvm/test/CodeGen/AMDGPU/ptrmask.ll to use GlobalISel.
1278 // Currently crashes on P8 (buffer resource) tests due to legalizer issue.
1279 addRulesForGOpcs({G_PTRMASK})
1280 .Any({{UniP1}, {{SgprP1}, {SgprP1, Sgpr64}}})
1281 .Any({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}})
1282 .Any({{UniP3}, {{SgprP3}, {SgprP3, Sgpr32}}})
1283 .Any({{DivP3}, {{VgprP3}, {VgprP3, Vgpr32}}});
1284
1285 addRulesForGOpcs({G_ABS}, Standard)
1286 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}})
1287 .Div(S16, {{Vgpr16}, {Vgpr16}, AbsToNegMax})
1288 .Uni(S32, {{Sgpr32}, {Sgpr32}})
1289 .Div(S32, {{Vgpr32}, {Vgpr32}, AbsToNegMax})
1290 .Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, AbsToS32})
1291 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}, AbsToNegMax});
1292
1293 addRulesForGOpcs({G_BITREVERSE}, Standard)
1294 .Uni(S32, {{Sgpr32}, {Sgpr32}})
1295 .Div(S32, {{Vgpr32}, {Vgpr32}})
1296 .Uni(S64, {{Sgpr64}, {Sgpr64}})
1297 .Div(S64, {{Vgpr64}, {Vgpr64}});
1298
1299 addRulesForGOpcs({G_AMDGPU_FFBH_U32, G_AMDGPU_FFBL_B32, G_CTLZ_ZERO_POISON,
1300 G_CTTZ_ZERO_POISON})
1301 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
1302 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1303 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
1305
1306 addRulesForGOpcs({G_CTPOP})
1307 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
1308 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1309 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
1310 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}, CtPop64To32}});
1311
1312 addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
1313
1314 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
1315 .Uni(S64, {{Sgpr64}, {}});
1316
1317 addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
1318
1319 addRulesForGOpcs({G_GLOBAL_VALUE})
1320 .Any({{UniP0}, {{SgprP0}, {}}})
1321 .Any({{UniP1}, {{SgprP1}, {}}})
1322 .Any({{UniP3}, {{SgprP3}, {}}})
1323 .Any({{UniP4}, {{SgprP4}, {}}})
1324 .Any({{UniP8}, {{SgprP8}, {}}});
1325
1326 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
1327
1328 addRulesForGOpcs({G_SI_CALL})
1329 .Any({{_, UniP0}, {{None}, {SgprP0}}})
1330 .Any({{_, DivP0}, {{None}, {SgprP0Call_WF}}})
1331 .Any({{_, UniP4}, {{None}, {SgprP4}}})
1332 .Any({{_, DivP4}, {{None}, {SgprP4Call_WF}}});
1333
1334 bool hasSALUFloat = ST->hasSALUFloatInsts();
1335
1336 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
1337 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1338 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1339 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1340 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1341 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
1342 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1343 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1344 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1345 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
1347 hasSALUFloat)
1348 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1349
1350 addRulesForGOpcs({G_FSUB, G_STRICT_FSUB}, Standard)
1351 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1352 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1353 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1354 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1355 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1356 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
1357
1358 addRulesForGOpcs({G_FMAD}, Standard)
1359 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1360 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1361 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1362 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1363
1364 addRulesForGOpcs({G_FLDEXP, G_STRICT_FLDEXP}, Standard)
1365 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1366 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1367 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
1368 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1369 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr32}})
1370 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
1371
1372 addRulesForGOpcs({G_FMA, G_STRICT_FMA}, Standard)
1373 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1374 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
1375 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64, Vgpr64}})
1376 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr64}})
1380 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16, Sgpr16}}, hasSALUFloat)
1381 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}}, !hasSALUFloat)
1382 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}}, hasSALUFloat)
1383 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}, !hasSALUFloat)
1384 .Uni(V2S16,
1386 hasSALUFloat)
1388 !hasSALUFloat);
1389
1390 addRulesForGOpcs({G_AMDGPU_FMED3}, Standard)
1391 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1392 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1393 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1394 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1395
1396 // TODO: This opcode is generated from the i64->i16 signed clamped pattern in
1397 // the PreLegalizerCombiner. Move the combine to RegBankCombiner to keep more
1398 // instructions on SALU.
1399 addRulesForGOpcs({G_AMDGPU_SMED3}, Standard)
1400 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1401 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1402
1403 // FNEG and FABS are either folded as source modifiers or can be selected as
1404 // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
1405 // targets without SALU float we still select them as VGPR since there would
1406 // be no real sgpr use.
1407 addRulesForGOpcs({G_FNEG, G_FABS}, Standard)
1408 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasSALUFloat)
1409 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasSALUFloat)
1410 .Div(S16, {{Vgpr16}, {Vgpr16}})
1411 .Uni(S32, {{UniInVgprS32}, {Vgpr32}}, !hasSALUFloat)
1412 .Uni(S32, {{Sgpr32}, {Sgpr32}}, hasSALUFloat)
1413 .Div(S32, {{Vgpr32}, {Vgpr32}})
1414 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1415 .Div(S64, {{Vgpr64}, {Vgpr64}})
1416 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}}, !hasSALUFloat)
1417 .Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, hasSALUFloat)
1418 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1419 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1420 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1421
1422 addRulesForGOpcs({G_FCANONICALIZE}, Standard)
1423 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
1424 .Div(S32, {{Vgpr32}, {Vgpr32}})
1425 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
1426 .Div(S16, {{Vgpr16}, {Vgpr16}})
1427 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1428 .Div(S64, {{Vgpr64}, {Vgpr64}})
1429 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}})
1430 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1431 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1432 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1433
1434 bool hasPST = ST->hasPseudoScalarTrans();
1435 addRulesForGOpcs({G_FSQRT}, Standard)
1436 .Div(S16, {{Vgpr16}, {Vgpr16}})
1437 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasPST)
1438 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasPST);
1439
1440 addRulesForGOpcs({G_FPTOUI, G_FPTOSI, G_FPTOUI_SAT, G_FPTOSI_SAT})
1441 .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
1442 .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
1443 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1444 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat)
1445 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1446 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1447 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
1448 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1449 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1450 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}});
1451
1452 addRulesForGOpcs({G_UITOFP, G_SITOFP})
1453 .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
1454 .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
1455 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1456 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat)
1457 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1458 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1459 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
1460 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1461 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1462 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}});
1463
1464 addRulesForGOpcs({G_AMDGPU_S_BUFFER_PREFETCH})
1466
1467 addRulesForGOpcs({G_FPEXT})
1468 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1469 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1470 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}})
1471 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1472 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat);
1473
1474 addRulesForGOpcs({G_AMDGPU_CVT_PK_I16_I32}, Standard)
1475 .Uni(V2S16, {{UniInVgprV2S16}, {Vgpr32, Vgpr32}})
1476 .Div(V2S16, {{VgprV2S16}, {Vgpr32, Vgpr32}});
1477
1478 addRulesForGOpcs({G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY}, Standard)
1479 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1480 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
1481
1482 bool hasSALUMinimumMaximumInsts = ST->hasSALUMinimumMaximumInsts();
1483
1484 addRulesForGOpcs({G_FMINIMUM, G_FMAXIMUM}, Standard)
1485 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUMinimumMaximumInsts)
1486 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUMinimumMaximumInsts)
1487 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1488 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUMinimumMaximumInsts)
1489 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUMinimumMaximumInsts)
1490 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1491 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1492 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1494 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1495
1496 addRulesForGOpcs({G_FMINNUM_IEEE, G_FMAXNUM_IEEE, G_FMINNUM, G_FMAXNUM,
1497 G_FMINIMUMNUM, G_FMAXIMUMNUM},
1498 Standard)
1499 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1500 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1501 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1502 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1504 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
1505 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1506 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1507 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1508 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
1509
1510 addRulesForGOpcs({G_FPTRUNC})
1511 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1512 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1513 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
1515 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
1516 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1517 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat);
1518
1519 addRulesForGOpcs({G_IS_FPCLASS})
1520 .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
1521 .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
1522 .Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}})
1523 .Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}})
1524 .Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
1525 .Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});
1526
1527 addRulesForGOpcs({G_FCMP}, Standard)
1528 .Any({{UniS1, _, S16}, {{Sgpr32Trunc}, {None, Sgpr16, Sgpr16}}},
1529 hasSALUFloat)
1530 .Any({{UniS1, _, S16}, {{UniInVcc}, {None, Vgpr16, Vgpr16}}},
1531 !hasSALUFloat)
1532 .Any({{DivS1, _, S16}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
1533 .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}},
1534 hasSALUFloat)
1535 .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}},
1536 !hasSALUFloat)
1537 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
1538 .Any({{UniS1, _, S64}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
1539 .Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
1540
1541 addRulesForGOpcs({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUNDEVEN, G_FFLOOR, G_FCEIL,
1542 G_FEXP2, G_FLOG2},
1543 Standard)
1544 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
1545 .Div(S16, {{Vgpr16}, {Vgpr16}})
1546 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
1547 .Div(S32, {{Vgpr32}, {Vgpr32}})
1548 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1549 .Div(S64, {{Vgpr64}, {Vgpr64}});
1550
1551 addRulesForGOpcs({G_AMDGPU_GLOBAL_LOAD_MONITOR, G_AMDGPU_FLAT_LOAD_MONITOR},
1552 StandardB)
1553 .Uni(B32, {{UniInVgprB32}, {SgprPtr64}})
1554 .Div(B32, {{VgprB32}, {VgprPtr64}})
1555 .Uni(B64, {{UniInVgprB64}, {SgprPtr64}})
1556 .Div(B64, {{VgprB64}, {VgprPtr64}})
1557 .Uni(B128, {{UniInVgprB128}, {SgprPtr64}})
1558 .Div(B128, {{VgprB128}, {VgprPtr64}});
1559
1560 using namespace Intrinsic;
1561
1562 addRulesForIOpcs({returnaddress}).Any({{UniP0}, {{SgprP0}, {}}});
1563
1564 addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
1565
1566 addRulesForIOpcs({amdgcn_s_getreg}).Any({{}, {{Sgpr32}, {IntrId, Imm}}});
1567
1568 addRulesForIOpcs({amdgcn_s_setreg})
1569 .Any({{_, _, S32}, {{}, {IntrId, Imm, SgprB32_ReadFirstLane}}});
1570
1571 addRulesForIOpcs({amdgcn_s_sendmsg, amdgcn_s_sendmsghalt})
1572 .Any({{}, {{}, {IntrId, Imm, SgprB32_M0}}});
1573
1574 addRulesForIOpcs({amdgcn_s_sendmsg_rtn})
1575 .Any({{S32}, {{Sgpr32}, {}}})
1576 .Any({{S64}, {{Sgpr64}, {}}});
1577
1578 addRulesForIOpcs({amdgcn_s_memrealtime, amdgcn_s_memtime}, Standard)
1579 .Uni(S64, {{Sgpr64}, {IntrId}});
1580
1581 addRulesForIOpcs({amdgcn_groupstaticsize, amdgcn_pops_exiting_wave_id,
1582 amdgcn_reloc_constant, amdgcn_s_get_waveid_in_workgroup},
1583 Standard)
1584 .Uni(S32, {{Sgpr32}, {IntrId}});
1585
1586 // Intrinsics with no register operands.
1587 addRulesForIOpcs({amdgcn_asyncmark,
1588 amdgcn_endpgm,
1589 amdgcn_init_exec,
1590 amdgcn_s_barrier,
1591 amdgcn_s_barrier_leave,
1592 amdgcn_s_barrier_signal,
1593 amdgcn_s_barrier_wait,
1594 amdgcn_s_monitor_sleep,
1595 amdgcn_s_nop,
1596 amdgcn_s_sethalt,
1597 amdgcn_s_setprio,
1598 amdgcn_s_setprio_inc_wg,
1599 amdgcn_s_sleep,
1600 amdgcn_s_ttracedata_imm,
1601 amdgcn_s_wait_asynccnt,
1602 amdgcn_s_wait_bvhcnt,
1603 amdgcn_s_wait_dscnt,
1604 amdgcn_s_wait_event,
1605 amdgcn_s_wait_event_export_ready,
1606 amdgcn_s_wait_expcnt,
1607 amdgcn_s_wait_kmcnt,
1608 amdgcn_s_wait_loadcnt,
1609 amdgcn_s_wait_samplecnt,
1610 amdgcn_s_wait_storecnt,
1611 amdgcn_s_wait_tensorcnt,
1612 amdgcn_s_waitcnt,
1613 amdgcn_unreachable,
1614 amdgcn_wait_asyncmark,
1615 amdgcn_wave_barrier})
1616 .Any({{}, {{}, {}}});
1617
1618 addRulesForIOpcs({amdgcn_init_exec_from_input})
1619 .Any({{}, {{}, {IntrId, Sgpr32, Imm}}});
1620
1621 addRulesForIOpcs({amdgcn_s_ttracedata}).Any({{}, {{}, {IntrId, SgprB32_M0}}});
1622
1623 addRulesForIOpcs({amdgcn_s_sleep_var})
1624 .Any({{}, {{}, {IntrId, SgprB32_ReadFirstLane}}});
1625
1626 addRulesForIOpcs({amdgcn_s_barrier_join, amdgcn_s_wakeup_barrier})
1627 .Any({{}, {{}, {IntrId, SgprB32_M0}}});
1628
1629 addRulesForIOpcs({amdgcn_s_barrier_signal_var, amdgcn_s_barrier_init})
1630 .Any({{}, {{}, {IntrId, SgprB32_M0, SgprB32_M0}}});
1631
1632 addRulesForIOpcs({amdgcn_s_barrier_signal_isfirst})
1633 .Any({{UniS1}, {{Sgpr32Trunc}, {}}});
1634
1635 addRulesForIOpcs(
1636 {amdgcn_s_get_named_barrier_state, amdgcn_s_get_barrier_state}, Standard)
1637 .Uni(S32, {{Sgpr32}, {IntrId, SgprB32_M0}});
1638
1639 addRulesForIOpcs({amdgcn_flat_prefetch}).Any({{}, {{}, {IntrId, VgprP0}}});
1640
1641 addRulesForIOpcs({amdgcn_global_prefetch}).Any({{}, {{}, {IntrId, VgprP1}}});
1642
1643 addRulesForIOpcs({amdgcn_s_prefetch_data})
1645
1646 addRulesForIOpcs({amdgcn_class})
1647 .Any({{UniS1, _, S16}, {{UniInVcc}, {IntrId, Vgpr16, Vgpr32}}})
1648 .Any({{DivS1, _, S16}, {{Vcc}, {IntrId, Vgpr16, Vgpr32}}})
1649 .Any({{UniS1, _, S32}, {{UniInVcc}, {IntrId, Vgpr32, Vgpr32}}})
1650 .Any({{DivS1, _, S32}, {{Vcc}, {IntrId, Vgpr32, Vgpr32}}})
1651 .Any({{UniS1, _, S64}, {{UniInVcc}, {IntrId, Vgpr64, Vgpr32}}})
1652 .Any({{DivS1, _, S64}, {{Vcc}, {IntrId, Vgpr64, Vgpr32}}});
1653
1654 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
1655 addRulesForIOpcs({amdgcn_end_cf})
1656 .Any({{_, UniS32}, {{}, {IntrId, Sgpr32}}})
1657 .Any({{_, UniS64}, {{}, {IntrId, Sgpr64}}});
1658
1659 addRulesForIOpcs({amdgcn_if_break}, Standard)
1660 .Uni(S64, {{Sgpr64}, {IntrId, Vcc, Sgpr64}})
1661 .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
1662
1663 addRulesForIOpcs({amdgcn_exp})
1664 .Any({{_, _, _, S32, S32, S32, S32},
1665 {{}, {IntrId, Imm, Imm, Vgpr32, Vgpr32, Vgpr32, Vgpr32}}});
1666
1667 addRulesForIOpcs({amdgcn_exp_compr})
1668 .Any({{_, _, _, V2S16}, {{}, {IntrId, Imm, Imm, VgprV2S16, VgprV2S16}}});
1669
1670 addRulesForIOpcs({amdgcn_exp_row})
1671 .Any({{_, _, _, S32, S32, S32, S32, _, S32},
1672 {{},
1674 SgprB32_M0}}});
1675
1676 addRulesForIOpcs({amdgcn_lds_direct_load}, StandardB)
1677 .Div(B32, {{VgprB32}, {IntrId, SgprB32_M0}});
1678
1679 addRulesForIOpcs({amdgcn_lds_param_load}, Standard)
1680 .Div(S32, {{Vgpr32}, {IntrId, Imm, Imm, SgprB32_M0}});
1681
1682 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
1683 .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
1684
1685 addRulesForIOpcs({amdgcn_readfirstlane})
1686 .Any({{UniB32, _, DivB32}, {{}, {SgprB32, None, VgprB32}}})
1687 // this should not exist in the first place, it is from call lowering
1688 // readfirstlaning just in case register is not in sgpr.
1689 .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
1690
1691 addRulesForIOpcs({amdgcn_readlane}, StandardB)
1693
1694 addRulesForIOpcs({amdgcn_writelane}, StandardB)
1695 .Div(B32,
1696 {{VgprB32},
1698
1699 addRulesForIOpcs({amdgcn_add_max_i32, amdgcn_add_max_u32, amdgcn_add_min_i32,
1700 amdgcn_add_min_u32},
1701 Standard)
1702 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1703 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1704
1705 addRulesForIOpcs({amdgcn_pk_add_max_i16, amdgcn_pk_add_max_u16,
1706 amdgcn_pk_add_min_i16, amdgcn_pk_add_min_u16},
1707 Standard)
1710
1711 addRulesForIOpcs({amdgcn_permlane16, amdgcn_permlanex16}, Standard)
1712 .Div(S32, {{Vgpr32},
1715
1716 addRulesForIOpcs({amdgcn_permlane_bcast, amdgcn_permlane_up,
1717 amdgcn_permlane_down, amdgcn_permlane_xor},
1718 StandardB)
1719 .Div(B32,
1720 {{VgprB32},
1722
1723 addRulesForIOpcs({amdgcn_permlane_idx_gen}, Standard)
1725
1726 addRulesForIOpcs({amdgcn_perm}, Standard)
1727 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1728 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1729
1730 addRulesForIOpcs(
1731 {amdgcn_wave_reduce_add, amdgcn_wave_reduce_and, amdgcn_wave_reduce_fadd,
1732 amdgcn_wave_reduce_fmax, amdgcn_wave_reduce_fmin,
1733 amdgcn_wave_reduce_fsub, amdgcn_wave_reduce_max, amdgcn_wave_reduce_min,
1734 amdgcn_wave_reduce_or, amdgcn_wave_reduce_sub, amdgcn_wave_reduce_umax,
1735 amdgcn_wave_reduce_umin, amdgcn_wave_reduce_xor},
1736 Standard)
1737 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}})
1738 .Div(S32, {{Sgpr32ToVgprDst}, {IntrId, VgprB32}})
1739 .Uni(S64, {{Sgpr64}, {IntrId, Sgpr64}})
1740 .Div(S64, {{Sgpr64ToVgprDst}, {IntrId, VgprB64}});
1741
1742 addRulesForIOpcs({amdgcn_bitop3, amdgcn_fmad_ftz}, Standard)
1743 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1744 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1745 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1746 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1747
1748 addRulesForIOpcs({amdgcn_udot4, amdgcn_sdot4, amdgcn_udot8, amdgcn_sdot8,
1749 amdgcn_dot4_f32_bf8_bf8, amdgcn_dot4_f32_bf8_fp8,
1750 amdgcn_dot4_f32_fp8_fp8, amdgcn_dot4_f32_fp8_bf8},
1751 Standard)
1752 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1753 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1754
1755 addRulesForIOpcs({amdgcn_rsq, amdgcn_rsq_clamp}, Standard)
1756 .Uni(S16, {{Sgpr16}, {IntrId, Sgpr16}}, hasPST)
1757 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}}, !hasPST)
1758 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1759 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}}, hasPST)
1760 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}}, !hasPST)
1761 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1762 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64}})
1763 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64}});
1764
1765 addRulesForIOpcs({amdgcn_mul_u24, amdgcn_mul_i24}, Standard)
1766 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1767 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}})
1768 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr32, Vgpr32}})
1769 .Div(S64, {{Vgpr64}, {IntrId, Vgpr32, Vgpr32}});
1770
1771 addRulesForIOpcs({amdgcn_ds_bpermute, amdgcn_ds_bpermute_fi_b32,
1772 amdgcn_ds_permute, amdgcn_fmul_legacy, amdgcn_mulhi_i24,
1773 amdgcn_mulhi_u24},
1774 Standard)
1775 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1776 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1777
1778 addRulesForIOpcs({amdgcn_cvt_sr_bf8_f32, amdgcn_cvt_sr_fp8_f32,
1779 amdgcn_cvt_sr_fp8_f32_e5m3, amdgcn_cvt_pk_bf8_f32,
1780 amdgcn_cvt_pk_fp8_f32, amdgcn_cvt_pk_fp8_f32_e5m3},
1781 Standard)
1782 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1783 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1784
1785 addRulesForIOpcs({amdgcn_cvt_off_f32_i4, amdgcn_cvt_f32_bf8,
1786 amdgcn_cvt_f32_fp8, amdgcn_cvt_f32_fp8_e5m3},
1787 Standard)
1788 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
1789 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}});
1790
1791 addRulesForIOpcs({amdgcn_cvt_pk_f32_bf8, amdgcn_cvt_pk_f32_fp8})
1792 .Any({{UniV2S32}, {{UniInVgprV2S32}, {IntrId, Vgpr32}}})
1793 .Any({{DivV2S32}, {{VgprV2S32}, {IntrId, Vgpr32}}});
1794
1795 addRulesForIOpcs({amdgcn_cubesc, amdgcn_cubetc, amdgcn_cubema, amdgcn_cubeid,
1796 amdgcn_fma_legacy},
1797 Standard)
1798 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1799 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1800
1801 addRulesForIOpcs({amdgcn_frexp_mant, amdgcn_fract}, Standard)
1802 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}})
1803 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1804 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
1805 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1806 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64}})
1807 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64}});
1808
1809 addRulesForIOpcs({amdgcn_prng_b32})
1810 .Any({{UniS32}, {{UniInVgprS32}, {IntrId, Vgpr32}}})
1811 .Any({{DivS32}, {{Vgpr32}, {IntrId, Vgpr32}}});
1812
1813 addRulesForIOpcs({amdgcn_sffbh}, Standard)
1814 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}})
1815 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}});
1816
1817 addRulesForIOpcs({amdgcn_ubfe, amdgcn_sbfe}, Standard)
1818 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1819 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32, Sgpr32, Sgpr32}, S_BFE})
1820 .Uni(S64, {{Sgpr64}, {IntrId, Sgpr64, Sgpr32, Sgpr32}, S_BFE})
1821 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr32, Vgpr32}, V_BFE});
1822
1823 addRulesForIOpcs({amdgcn_cvt_pk_i16, amdgcn_cvt_pk_u16, amdgcn_cvt_pknorm_i16,
1824 amdgcn_cvt_pknorm_u16, amdgcn_cvt_pkrtz},
1825 Standard)
1826 .Uni(V2S16, {{UniInVgprV2S16}, {IntrId, Vgpr32, Vgpr32}})
1827 .Div(V2S16, {{VgprV2S16}, {IntrId, Vgpr32, Vgpr32}});
1828
1829 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk32_bf6_f16,
1830 amdgcn_cvt_scalef32_sr_pk32_fp6_f16,
1831 amdgcn_cvt_scalef32_sr_pk32_bf6_bf16,
1832 amdgcn_cvt_scalef32_sr_pk32_fp6_bf16},
1833 Standard)
1835
1836 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk32_bf6_f32,
1837 amdgcn_cvt_scalef32_sr_pk32_fp6_f32},
1838 Standard)
1840
1841 addRulesForIOpcs({amdgcn_global_load_tr_b64})
1842 .Any({{DivB64, _, UniP1}, {{VgprB64}, {IntrId, SgprP1}}})
1843 .Any({{DivB64, _, DivP1}, {{VgprB64}, {IntrId, VgprP1}}})
1844 .Any({{DivB32, _, UniP1}, {{VgprB32}, {IntrId, SgprP1}}})
1845 .Any({{DivB32, _, DivP1}, {{VgprB32}, {IntrId, VgprP1}}});
1846
1847 addRulesForIOpcs({amdgcn_global_load_tr_b128})
1848 .Any({{DivB64, _, UniP1}, {{VgprB64}, {IntrId, SgprP1}}})
1849 .Any({{DivB64, _, DivP1}, {{VgprB64}, {IntrId, VgprP1}}})
1850 .Any({{DivB128, _, UniP1}, {{VgprB128}, {IntrId, SgprP1}}})
1851 .Any({{DivB128, _, DivP1}, {{VgprB128}, {IntrId, VgprP1}}});
1852
1853 addRulesForIOpcs({amdgcn_global_load_tr4_b64})
1854 .Any({{DivV2S32, _, UniP1}, {{VgprV2S32}, {IntrId, SgprP1}}})
1855 .Any({{DivV2S32, _, DivP1}, {{VgprV2S32}, {IntrId, VgprP1}}});
1856
1857 addRulesForIOpcs({amdgcn_global_load_tr6_b96})
1858 .Any({{DivV3S32, _, UniP1}, {{VgprV3S32}, {IntrId, SgprP1}}})
1859 .Any({{DivV3S32, _, DivP1}, {{VgprV3S32}, {IntrId, VgprP1}}});
1860
1861 addRulesForIOpcs({amdgcn_ds_load_tr4_b64, amdgcn_ds_load_tr8_b64})
1862 .Any({{DivV2S32}, {{VgprV2S32}, {IntrId, VgprP3}}});
1863
1864 addRulesForIOpcs({amdgcn_ds_load_tr6_b96})
1865 .Any({{DivV3S32}, {{VgprV3S32}, {IntrId, VgprP3}}});
1866
1867 addRulesForIOpcs({amdgcn_ds_load_tr16_b128})
1868 .Any({{DivB128}, {{VgprB128}, {IntrId, VgprP3}}});
1869
1870 addRulesForIOpcs({amdgcn_global_atomic_ordered_add_b64})
1871 .Any({{DivS64}, {{Vgpr64}, {IntrId, VgprP1, Vgpr64}}});
1872
1873 addRulesForIOpcs(
1874 {amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num}, Standard)
1875 .Div(S32, {{Vgpr32}, {IntrId, VgprP1, Vgpr32}});
1876
1877 addRulesForIOpcs({amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num},
1878 Standard)
1879 .Div(S32, {{Vgpr32}, {IntrId, VgprP0, Vgpr32}});
1880
1881 addRulesForIOpcs({amdgcn_raw_buffer_load_lds})
1882 .Any({{_}, {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Sgpr32}}});
1883
1884 addRulesForIOpcs({amdgcn_struct_buffer_load_lds})
1885 .Any({{_},
1886 {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1887
1888 addRulesForIOpcs({amdgcn_raw_ptr_buffer_load_lds})
1889 .Any({{_}, {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Sgpr32}}});
1890
1891 addRulesForIOpcs({amdgcn_struct_ptr_buffer_load_lds})
1892 .Any({{_}, {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1893
1894 addRulesForIOpcs({amdgcn_global_load_lds})
1895 .Any({{}, {{}, {IntrId, VgprP1, SgprB32_M0}}});
1896
1897 addRulesForIOpcs({amdgcn_global_load_async_to_lds_b8,
1898 amdgcn_global_load_async_to_lds_b32,
1899 amdgcn_global_load_async_to_lds_b64,
1900 amdgcn_global_load_async_to_lds_b128,
1901 amdgcn_global_store_async_from_lds_b8,
1902 amdgcn_global_store_async_from_lds_b32,
1903 amdgcn_global_store_async_from_lds_b64,
1904 amdgcn_global_store_async_from_lds_b128})
1905 .Any({{}, {{}, {IntrId, VgprP1, VgprP3}}});
1906
1907 addRulesForIOpcs({amdgcn_cluster_load_b32})
1909 .Any({{DivB32, _, UniP1}, {{VgprB32}, {IntrId, SgprP1, Imm, SgprB32_M0}}})
1910 .Any(
1911 {{DivB32, _, DivP1}, {{VgprB32}, {IntrId, VgprP1, Imm, SgprB32_M0}}});
1912
1913 addRulesForIOpcs({amdgcn_cluster_load_b64})
1915 .Any({{DivB64, _, UniP1}, {{VgprB64}, {IntrId, SgprP1, Imm, SgprB32_M0}}})
1916 .Any(
1917 {{DivB64, _, DivP1}, {{VgprB64}, {IntrId, VgprP1, Imm, SgprB32_M0}}});
1918
1919 addRulesForIOpcs({amdgcn_cluster_load_b128})
1921 .Any({{DivB128, _, UniP1},
1922 {{VgprB128}, {IntrId, SgprP1, Imm, SgprB32_M0}}})
1923 .Any({{DivB128, _, DivP1},
1924 {{VgprB128}, {IntrId, VgprP1, Imm, SgprB32_M0}}});
1925
1926 addRulesForIOpcs({amdgcn_cluster_load_async_to_lds_b8,
1927 amdgcn_cluster_load_async_to_lds_b32,
1928 amdgcn_cluster_load_async_to_lds_b64,
1929 amdgcn_cluster_load_async_to_lds_b128})
1930 .Any({{}, {{}, {IntrId, VgprP1, VgprP3, Imm, Imm, SgprB32_M0}}});
1931
1932 addRulesForIOpcs({amdgcn_perm_pk16_b4_u4}, StandardB)
1933 .Uni(B64, {{UniInVgprB64}, {IntrId, Vgpr32, Vgpr32, VgprV2S32}})
1934 .Div(B64, {{VgprB64}, {IntrId, Vgpr32, Vgpr32, VgprV2S32}});
1935
1936 addRulesForIOpcs({amdgcn_perm_pk16_b6_u4}, StandardB)
1938 .Div(B96, {{VgprB96}, {IntrId, Vgpr32, VgprB64, VgprV2S32}});
1939
1940 addRulesForIOpcs({amdgcn_perm_pk16_b8_u4}, StandardB)
1942 .Div(B128, {{VgprB128}, {IntrId, VgprB64, VgprB64, VgprV2S32}});
1943
1944 addRulesForIOpcs({amdgcn_wwm, amdgcn_strict_wwm, amdgcn_wqm, amdgcn_softwqm,
1945 amdgcn_strict_wqm},
1946 StandardB)
1947 .Div(B32, {{VgprB32}, {IntrId, VgprB32}})
1948 .Uni(B32, {{SgprB32}, {IntrId, SgprB32}})
1949 .Div(B64, {{VgprB64}, {IntrId, VgprB64}})
1950 .Uni(B64, {{SgprB64}, {IntrId, SgprB64}})
1951 .Div(B96, {{VgprB96}, {IntrId, VgprB96}})
1952 .Uni(B96, {{SgprB96}, {IntrId, SgprB96}})
1953 .Div(B128, {{VgprB128}, {IntrId, VgprB128}})
1954 .Uni(B128, {{SgprB128}, {IntrId, SgprB128}})
1955 .Any({{UniB256}, {{SgprB256}, {IntrId, SgprB256}}})
1956 .Any({{DivB256}, {{VgprB256}, {IntrId, VgprB256}}})
1957 .Any({{UniB512}, {{SgprB512}, {IntrId, SgprB512}}})
1958 .Any({{DivB512}, {{VgprB512}, {IntrId, VgprB512}}});
1959
1960 addRulesForIOpcs({amdgcn_kill, amdgcn_wqm_demote})
1961 .Any({{}, {{}, {IntrId, Vcc}}});
1962
1963 addRulesForIOpcs({amdgcn_ballot}, Standard)
1964 .Uni(S64, {{Sgpr64}, {IntrId, Vcc}})
1965 .Uni(S32, {{Sgpr32}, {IntrId, Vcc}});
1966
1967 addRulesForIOpcs({amdgcn_inverse_ballot})
1968 .Any({{DivS1, _, S32}, {{Vcc}, {IntrId, SgprB32_ReadFirstLane}}})
1969 .Any({{DivS1, _, S64}, {{Vcc}, {IntrId, SgprB64_ReadFirstLane}}});
1970
1971 addRulesForIOpcs({amdgcn_live_mask, amdgcn_ps_live})
1972 .Any({{DivS1}, {{Vcc}, {}}});
1973
1974 addRulesForIOpcs({amdgcn_mov_dpp, amdgcn_mov_dpp8}, StandardB)
1975 .Div(B32, {{VgprB32}, {IntrId, VgprB32}})
1976 .Div(B64, {{VgprB64}, {IntrId, VgprB64}});
1977
1978 addRulesForIOpcs({amdgcn_update_dpp}, StandardB)
1979 .Div(B32, {{VgprB32}, {IntrId, VgprB32, VgprB32}})
1980 .Div(B64, {{VgprB64}, {IntrId, VgprB64, VgprB64}});
1981
1982 addRulesForIOpcs({amdgcn_sin, amdgcn_cos}, Standard)
1983 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1984 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}})
1985 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1986 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}});
1987
1988 addRulesForIOpcs({amdgcn_trig_preop}, Standard)
1989 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr32}})
1990 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr32}});
1991
1992 addRulesForIOpcs({amdgcn_exp2}, Standard)
1993 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1994 .Uni(S16, {{Sgpr16}, {IntrId, Sgpr16}}, hasPST)
1995 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}}, !hasPST)
1996 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1997 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}}, hasPST)
1998 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}}, !hasPST);
1999
2000 addRulesForIOpcs({amdgcn_rcp, amdgcn_sqrt}, Standard)
2001 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
2002 .Uni(S16, {{Sgpr16}, {IntrId, Sgpr16}}, hasPST)
2003 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}}, !hasPST)
2004 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
2005 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}}, hasPST)
2006 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}}, !hasPST)
2007 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64}})
2008 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64}});
2009
2010 addRulesForIOpcs({amdgcn_log}, Standard)
2011 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
2012 .Uni(S16, {{Sgpr16}, {IntrId, Sgpr16}}, hasPST)
2013 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}}, !hasPST)
2014 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
2015 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}}, hasPST)
2016 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}}, !hasPST);
2017
2018 addRulesForIOpcs({amdgcn_ds_atomic_async_barrier_arrive_b64})
2019 .Any({{}, {{}, {IntrId, VgprP3}}});
2020
2021 addRulesForIOpcs({amdgcn_ds_atomic_barrier_arrive_rtn_b64}, Standard)
2022 .Div(S64, {{Vgpr64}, {IntrId, VgprP3, Vgpr64}});
2023
2024 addRulesForIOpcs({amdgcn_ds_add_gs_reg_rtn, amdgcn_ds_sub_gs_reg_rtn},
2025 Standard)
2026 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
2027 .Div(S64, {{Vgpr64}, {IntrId, Vgpr32}});
2028
2029 addRulesForIOpcs({amdgcn_ds_append, amdgcn_ds_consume}, Standard)
2030 .Uni(S32, {{UniInVgprS32}, {IntrId, SgprB32_M0}})
2031 .Div(S32, {{Vgpr32}, {IntrId, SgprB32_M0}});
2032
2033 addRulesForIOpcs(
2034 {amdgcn_ds_bvh_stack_rtn, amdgcn_ds_bvh_stack_push4_pop1_rtn}, Standard)
2035 .Div(S32, {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV4S32}});
2036
2037 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop1_rtn}, Standard)
2038 .Div(S32, {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV8S32}});
2039
2040 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop2_rtn}, Standard)
2041 .Div(S64, {{Vgpr64, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV8S32}});
2042
2043 addRulesForIOpcs({amdgcn_ds_gws_sema_p, amdgcn_ds_gws_sema_v,
2044 amdgcn_ds_gws_sema_release_all})
2045 .Any({{}, {{}, {IntrId, SgprB32_M0}}});
2046
2047 addRulesForIOpcs(
2048 {amdgcn_ds_gws_barrier, amdgcn_ds_gws_init, amdgcn_ds_gws_sema_br})
2049 .Any({{}, {{}, {IntrId, Vgpr32, SgprB32_M0}}});
2050
2051 addRulesForIOpcs({amdgcn_ds_ordered_add, amdgcn_ds_ordered_swap}, Standard)
2052 .Div(S32, {{Vgpr32}, {IntrId, SgprB32_M0, Vgpr32}});
2053
2054 addRulesForIOpcs({amdgcn_ds_swizzle}, Standard)
2055 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
2056 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}});
2057
2058 addRulesForIOpcs({amdgcn_permlane16_var, amdgcn_permlanex16_var}, Standard)
2059 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
2060
2061 addRulesForIOpcs({amdgcn_permlane16_swap, amdgcn_permlane32_swap}, Standard)
2062 .Div(S32, {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
2063
2064 addRulesForIOpcs({amdgcn_permlane64}, StandardB)
2065 .Div(B32, {{VgprB32}, {IntrId, VgprB32}});
2066
2067 addRulesForIOpcs({amdgcn_ds_read_tr4_b64, amdgcn_ds_read_tr8_b64})
2068 .Any({{DivV2S32}, {{VgprV2S32}, {IntrId, VgprP3}}});
2069
2070 addRulesForIOpcs({amdgcn_ds_read_tr6_b96})
2071 .Any({{DivV3S32}, {{VgprV3S32}, {IntrId, VgprP3}}});
2072
2073 addRulesForIOpcs({amdgcn_ds_read_tr16_b64})
2074 .Any({{DivV4S16}, {{VgprV4S16}, {IntrId, VgprP3}}});
2075
2076 addRulesForIOpcs({amdgcn_interp_p1}, Standard)
2077 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Imm, Imm, SgprB32_M0}});
2078
2079 addRulesForIOpcs({amdgcn_interp_p1_f16}, Standard)
2080 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Imm, Imm, Imm, SgprB32_M0}});
2081
2082 addRulesForIOpcs({amdgcn_interp_p2}, Standard)
2083 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Imm, Imm, SgprB32_M0}});
2084
2085 addRulesForIOpcs({amdgcn_interp_p2_f16}, Standard)
2086 .Div(S16,
2088
2089 addRulesForIOpcs({amdgcn_interp_mov}, Standard)
2090 .Div(S32, {{Vgpr32}, {IntrId, Imm, Imm, Imm, SgprB32_M0}});
2091
2092 addRulesForIOpcs({amdgcn_interp_inreg_p10, amdgcn_interp_inreg_p2,
2093 amdgcn_interp_inreg_p10_f16, amdgcn_interp_p10_rtz_f16},
2094 Standard)
2095 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2096 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
2097
2098 addRulesForIOpcs({amdgcn_interp_inreg_p2_f16, amdgcn_interp_p2_rtz_f16},
2099 Standard)
2100 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2101 .Div(S16, {{Vgpr16}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
2102
2103 addRulesForIOpcs({amdgcn_div_fmas}, Standard)
2104 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32, Vcc}})
2105 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32, Vcc}})
2106 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr64, Vgpr64, Vcc}})
2107 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr64, Vgpr64, Vcc}});
2108
2109 addRulesForIOpcs({amdgcn_div_fixup}, Standard)
2110 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
2111 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
2112 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2113 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2114 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr64, Vgpr64}})
2115 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr64, Vgpr64}});
2116
2117 addRulesForIOpcs({amdgcn_div_scale}, Standard)
2118 .Div(S32, {{Vgpr32, Vcc}, {IntrId, Vgpr32, Vgpr32}})
2119 .Uni(S32, {{UniInVgprS32, UniInVcc}, {IntrId, Vgpr32, Vgpr32}})
2120 .Div(S64, {{Vgpr64, Vcc}, {IntrId, Vgpr64, Vgpr64}})
2121 .Uni(S64, {{UniInVgprS64, UniInVcc}, {IntrId, Vgpr64, Vgpr64}});
2122
2123 addRulesForIOpcs({amdgcn_fdot2, amdgcn_sdot2, amdgcn_udot2}, Standard)
2125 .Div(S32, {{Vgpr32}, {IntrId, VgprV2S16, VgprV2S16, Vgpr32}});
2126
2127 addRulesForIOpcs({amdgcn_fdot2_f16_f16}, Standard)
2129 .Div(S16, {{Vgpr16}, {IntrId, VgprV2S16, VgprV2S16, Vgpr16}});
2130
2131 addRulesForIOpcs({amdgcn_sudot4, amdgcn_sudot8}, Standard)
2132 .Uni(S32, {{UniInVgprS32}, {IntrId, Imm, Vgpr32, Imm, Vgpr32, Vgpr32}})
2133 .Div(S32, {{Vgpr32}, {IntrId, Imm, Vgpr32, Imm, Vgpr32, Vgpr32}});
2134
2135 addRulesForIOpcs({amdgcn_s_alloc_vgpr})
2137
2138 addRulesForIOpcs({amdgcn_sat_pk4_i4_i8, amdgcn_sat_pk4_u4_u8}, Standard)
2139 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr32}})
2140 .Div(S16, {{Vgpr16}, {IntrId, Vgpr32}});
2141
2142 // TODO: Add handling for GFX90A+ which should use VGPRs instead of AGPRs.
2143 bool HasGFX90AInsts = ST->hasGFX90AInsts();
2144 addRulesForIOpcs({amdgcn_mfma_f32_32x32x1f32, amdgcn_mfma_f32_16x16x1f32,
2145 amdgcn_mfma_f32_4x4x1f32, amdgcn_mfma_f32_32x32x2f32,
2146 amdgcn_mfma_f32_16x16x4f32, amdgcn_mfma_f32_32x32x4f16,
2147 amdgcn_mfma_f32_16x16x4f16, amdgcn_mfma_f32_4x4x4f16,
2148 amdgcn_mfma_f32_32x32x8f16, amdgcn_mfma_f32_16x16x16f16,
2149 amdgcn_mfma_i32_32x32x4i8, amdgcn_mfma_i32_16x16x4i8,
2150 amdgcn_mfma_i32_4x4x4i8, amdgcn_mfma_i32_32x32x8i8,
2151 amdgcn_mfma_i32_16x16x16i8, amdgcn_mfma_f32_32x32x2bf16,
2152 amdgcn_mfma_f32_16x16x2bf16, amdgcn_mfma_f32_4x4x2bf16,
2153 amdgcn_mfma_f32_32x32x4bf16, amdgcn_mfma_f32_16x16x8bf16})
2154 .Any({{DivAnyTy},
2156 !HasGFX90AInsts);
2157
2158 // WMMA/SWMMAC intrinsics: all register operands map to VGPR.
2159 addRulesForIOpcs(
2160 {// WMMA GFX11+
2161 amdgcn_wmma_f32_16x16x16_f16, amdgcn_wmma_f32_16x16x16_bf16,
2162 amdgcn_wmma_f16_16x16x16_f16, amdgcn_wmma_bf16_16x16x16_bf16,
2163 amdgcn_wmma_f16_16x16x16_f16_tied, amdgcn_wmma_bf16_16x16x16_bf16_tied,
2164 amdgcn_wmma_i32_16x16x16_iu8, amdgcn_wmma_i32_16x16x16_iu4,
2165 // WMMA GFX12
2166 amdgcn_wmma_f32_16x16x16_fp8_fp8, amdgcn_wmma_f32_16x16x16_fp8_bf8,
2167 amdgcn_wmma_f32_16x16x16_bf8_fp8, amdgcn_wmma_f32_16x16x16_bf8_bf8,
2168 amdgcn_wmma_i32_16x16x32_iu4,
2169 // WMMA GFX1250
2170 amdgcn_wmma_f32_16x16x4_f32, amdgcn_wmma_f32_16x16x32_bf16,
2171 amdgcn_wmma_f32_16x16x32_f16, amdgcn_wmma_f16_16x16x32_f16,
2172 amdgcn_wmma_bf16_16x16x32_bf16, amdgcn_wmma_bf16f32_16x16x32_bf16,
2173 amdgcn_wmma_f32_16x16x64_fp8_fp8, amdgcn_wmma_f32_16x16x64_fp8_bf8,
2174 amdgcn_wmma_f32_16x16x64_bf8_fp8, amdgcn_wmma_f32_16x16x64_bf8_bf8,
2175 amdgcn_wmma_f16_16x16x64_fp8_fp8, amdgcn_wmma_f16_16x16x64_fp8_bf8,
2176 amdgcn_wmma_f16_16x16x64_bf8_fp8, amdgcn_wmma_f16_16x16x64_bf8_bf8,
2177 amdgcn_wmma_f16_16x16x128_fp8_fp8, amdgcn_wmma_f16_16x16x128_fp8_bf8,
2178 amdgcn_wmma_f16_16x16x128_bf8_fp8, amdgcn_wmma_f16_16x16x128_bf8_bf8,
2179 amdgcn_wmma_f32_16x16x128_fp8_fp8, amdgcn_wmma_f32_16x16x128_fp8_bf8,
2180 amdgcn_wmma_f32_16x16x128_bf8_fp8, amdgcn_wmma_f32_16x16x128_bf8_bf8,
2181 amdgcn_wmma_i32_16x16x64_iu8, amdgcn_wmma_f32_16x16x128_f8f6f4,
2182 amdgcn_wmma_scale_f32_16x16x128_f8f6f4,
2183 amdgcn_wmma_scale16_f32_16x16x128_f8f6f4, amdgcn_wmma_f32_32x16x128_f4,
2184 amdgcn_wmma_scale_f32_32x16x128_f4, amdgcn_wmma_scale16_f32_32x16x128_f4,
2185 // SWMMAC GFX12
2186 amdgcn_swmmac_f32_16x16x32_f16, amdgcn_swmmac_f32_16x16x32_bf16,
2187 amdgcn_swmmac_f16_16x16x32_f16, amdgcn_swmmac_bf16_16x16x32_bf16,
2188 amdgcn_swmmac_i32_16x16x32_iu8, amdgcn_swmmac_i32_16x16x32_iu4,
2189 amdgcn_swmmac_i32_16x16x64_iu4, amdgcn_swmmac_f32_16x16x32_fp8_fp8,
2190 amdgcn_swmmac_f32_16x16x32_fp8_bf8, amdgcn_swmmac_f32_16x16x32_bf8_fp8,
2191 amdgcn_swmmac_f32_16x16x32_bf8_bf8,
2192 // SWMMAC GFX1250
2193 amdgcn_swmmac_f32_16x16x64_f16, amdgcn_swmmac_f32_16x16x64_bf16,
2194 amdgcn_swmmac_f16_16x16x64_f16, amdgcn_swmmac_bf16_16x16x64_bf16,
2195 amdgcn_swmmac_bf16f32_16x16x64_bf16, amdgcn_swmmac_f32_16x16x128_fp8_fp8,
2196 amdgcn_swmmac_f32_16x16x128_fp8_bf8, amdgcn_swmmac_f32_16x16x128_bf8_fp8,
2197 amdgcn_swmmac_f32_16x16x128_bf8_bf8, amdgcn_swmmac_f16_16x16x128_fp8_fp8,
2198 amdgcn_swmmac_f16_16x16x128_fp8_bf8, amdgcn_swmmac_f16_16x16x128_bf8_fp8,
2199 amdgcn_swmmac_f16_16x16x128_bf8_bf8, amdgcn_swmmac_i32_16x16x128_iu8})
2200 .Any({{}, {{}, {}, ApplyAllVgpr}});
2201
2202} // end initialize rules
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
constexpr LLT S16
constexpr LLT S1
constexpr LLT V2S16
constexpr LLT S32
constexpr LLT V4S32
constexpr LLT V3S32
constexpr LLT S64
constexpr LLT V2S32
constexpr LLT S128
UniformityLLTOpPredicateID LLTToBId(LLT Ty)
bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI)
UniformityLLTOpPredicateID LLTToId(LLT Ty)
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
Register Reg
Register const TargetRegisterInfo * TRI
Machine IR instance of the generic uniformity analysis.
bool operator()(const MachineInstr &MI) const
Predicate operator||(const Predicate &RHS) const
Predicate operator&&(const Predicate &RHS) const
Predicate(std::function< bool(const MachineInstr &)> Pred)
Predicate operator!() const
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
const SetOfRulesForOpcode * getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping * findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
void addFastRuleUniform(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
bool isSigned() const
Definition InstrTypes.h:993
bool isDivergentAtDef(ConstValueRefT V) const
Whether V is divergent at its definition.
bool isUniformAtDef(ConstValueRefT V) const
Whether V is uniform/non-divergent at its definition.
bool isEquality() const
Return true if this predicate is either EQ or NE.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
TypeSize getValue() const
Representation of each machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const TargetRegisterInfo * getTargetRegisterInfo() const
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void swap(SmallVectorImpl &RHS)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isAnyPtr(LLT Ty, unsigned Width)
bool isUniformMMO(const MachineMemOperand *MMO)
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
Definition SIInstrInfo.h:44
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
SmallVector< UniformityLLTOpPredicateID, 4 > OpUniformityAndTypes
PredicateMapping(std::initializer_list< UniformityLLTOpPredicateID > OpList, std::function< bool(const MachineInstr &)> TestFunc=nullptr)
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI) const
std::function< bool(const MachineInstr &)> TestFunc
RegBankLLTMapping(std::initializer_list< RegBankLLTMappingApplyID > DstOpMappingList, std::initializer_list< RegBankLLTMappingApplyID > SrcOpMappingList, LoweringMethodID LoweringMethod=DoNotLower)
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39