LLVM 23.0.0git
AMDGPURegBankLegalizeRules.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
39
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
44
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(0, 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(1, 64);
63 case P2:
64 return MRI.getType(Reg) == LLT::pointer(2, 32);
65 case P3:
66 return MRI.getType(Reg) == LLT::pointer(3, 32);
67 case P4:
68 return MRI.getType(Reg) == LLT::pointer(4, 64);
69 case P5:
70 return MRI.getType(Reg) == LLT::pointer(5, 32);
71 case P8:
72 return MRI.getType(Reg) == LLT::pointer(8, 128);
73 case Ptr32:
74 return isAnyPtr(MRI.getType(Reg), 32);
75 case Ptr64:
76 return isAnyPtr(MRI.getType(Reg), 64);
77 case Ptr128:
78 return isAnyPtr(MRI.getType(Reg), 128);
79 case V2S16:
80 return MRI.getType(Reg) == LLT::fixed_vector(2, 16);
81 case V2S32:
82 return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
83 case V3S32:
84 return MRI.getType(Reg) == LLT::fixed_vector(3, 32);
85 case V4S32:
86 return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
87 case B32:
88 return MRI.getType(Reg).getSizeInBits() == 32;
89 case B64:
90 return MRI.getType(Reg).getSizeInBits() == 64;
91 case B96:
92 return MRI.getType(Reg).getSizeInBits() == 96;
93 case B128:
94 return MRI.getType(Reg).getSizeInBits() == 128;
95 case B160:
96 return MRI.getType(Reg).getSizeInBits() == 160;
97 case B256:
98 return MRI.getType(Reg).getSizeInBits() == 256;
99 case B512:
100 return MRI.getType(Reg).getSizeInBits() == 512;
101 case UniS1:
102 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniform(Reg);
103 case UniS16:
104 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniform(Reg);
105 case UniS32:
106 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg);
107 case UniS64:
108 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg);
109 case UniS128:
110 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isUniform(Reg);
111 case UniP0:
112 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniform(Reg);
113 case UniP1:
114 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg);
115 case UniP2:
116 return MRI.getType(Reg) == LLT::pointer(2, 32) && MUI.isUniform(Reg);
117 case UniP3:
118 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniform(Reg);
119 case UniP4:
120 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
121 case UniP5:
122 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
123 case UniP8:
124 return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniform(Reg);
125 case UniPtr32:
126 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
127 case UniPtr64:
128 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isUniform(Reg);
129 case UniPtr128:
130 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
131 case UniV2S16:
132 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
133 case UniV2S32:
134 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isUniform(Reg);
135 case UniB32:
136 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
137 case UniB64:
138 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(Reg);
139 case UniB96:
140 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(Reg);
141 case UniB128:
142 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(Reg);
143 case UniB160:
144 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isUniform(Reg);
145 case UniB256:
146 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg);
147 case UniB512:
148 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
149 case UniBRC: {
150 if (!MUI.isUniform(Reg))
151 return false;
152 // Check if there is SGPR register class of same size as the LLT.
153 const SIRegisterInfo *TRI =
154 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
155 // There is no 16 bit SGPR register class. Extra size check is required
156 // since getSGPRClassForBitWidth returns SReg_32RegClass for Size 16.
157 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
158 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(LLTSize);
159 }
160 case DivS1:
161 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg);
162 case DivS16:
163 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg);
164 case DivS32:
165 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
166 case DivS64:
167 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg);
168 case DivS128:
169 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isDivergent(Reg);
170 case DivP0:
171 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergent(Reg);
172 case DivP1:
173 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg);
174 case DivP2:
175 return MRI.getType(Reg) == LLT::pointer(2, 32) && MUI.isDivergent(Reg);
176 case DivP3:
177 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergent(Reg);
178 case DivP4:
179 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg);
180 case DivP5:
181 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg);
182 case DivPtr32:
183 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isDivergent(Reg);
184 case DivPtr64:
185 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isDivergent(Reg);
186 case DivPtr128:
187 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
188 case DivV2S16:
189 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
190 case DivV2S32:
191 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isDivergent(Reg);
192 case DivV3S32:
193 return MRI.getType(Reg) == LLT::fixed_vector(3, 32) && MUI.isDivergent(Reg);
194 case DivV4S16:
195 return MRI.getType(Reg) == LLT::fixed_vector(4, 16) && MUI.isDivergent(Reg);
196 case DivB32:
197 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
198 case DivB64:
199 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(Reg);
200 case DivB96:
201 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(Reg);
202 case DivB128:
203 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(Reg);
204 case DivB160:
205 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isDivergent(Reg);
206 case DivB256:
207 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(Reg);
208 case DivB512:
209 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg);
210 case DivBRC: {
211 if (!MUI.isDivergent(Reg))
212 return false;
213 // Check if there is VGPR register class of same size as the LLT.
214 const SIRegisterInfo *TRI =
215 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
216 return TRI->getSGPRClassForBitWidth(MRI.getType(Reg).getSizeInBits());
217 }
218 case BRC: {
219 // Check if there is SGPR and VGPR register class of same size as the LLT.
220 const SIRegisterInfo *TRI =
221 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
222 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
223 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(LLTSize) &&
224 TRI->getVGPRClassForBitWidth(LLTSize);
225 }
226 case _:
227 return true;
228 default:
229 llvm_unreachable("missing matchUniformityAndLLT");
230 }
231}
232
234 const MachineUniformityInfo &MUI,
235 const MachineRegisterInfo &MRI) const {
236 // Check LLT signature.
237 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
238 const MachineOperand &MO = MI.getOperand(i);
239 if (OpUniformityAndTypes[i] == _) {
240 assert((!MI.getOperand(i).isReg() ||
241 !MI.getOperand(i).getReg().isVirtual()) &&
242 "_ is for non-register and physical register operands only");
243 continue;
244 }
245
246 // Remaining IDs check registers.
247 if (!MO.isReg())
248 return false;
249
250 if (!matchUniformityAndLLT(MO.getReg(), OpUniformityAndTypes[i], MUI, MRI))
251 return false;
252 }
253
254 // More complex check.
255 if (TestFunc)
256 return TestFunc(MI);
257
258 return true;
259}
260
262
264 : FastTypes(FastTypes) {}
265
267 if (Ty == LLT::scalar(16))
268 return S16;
269 if (Ty == LLT::scalar(32))
270 return S32;
271 if (Ty == LLT::scalar(64))
272 return S64;
273 if (Ty == LLT::fixed_vector(2, 16))
274 return V2S16;
275 if (Ty == LLT::fixed_vector(2, 32))
276 return V2S32;
277 if (Ty == LLT::fixed_vector(3, 32))
278 return V3S32;
279 if (Ty == LLT::fixed_vector(4, 32))
280 return V4S32;
281 return _;
282}
283
285 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
286 isAnyPtr(Ty, 32))
287 return B32;
288 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
289 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
290 return B64;
291 if (Ty == LLT::fixed_vector(3, 32))
292 return B96;
293 if (Ty == LLT::fixed_vector(4, 32) || Ty == LLT::fixed_vector(2, 64) ||
294 Ty == LLT::fixed_vector(8, 16) || isAnyPtr(Ty, 128))
295 return B128;
296 return _;
297}
298
299const RegBankLLTMapping *
301 const MachineRegisterInfo &MRI,
302 const MachineUniformityInfo &MUI) const {
303 // Search in "Fast Rules".
304 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
305 // slot that could "match fast Predicate". If not, InvalidMapping is
306 // returned which results in failure, does not search "Slow Rules".
307 if (FastTypes != NoFastRules) {
308 Register Reg = MI.getOperand(0).getReg();
309 int Slot;
310 if (FastTypes == StandardB)
311 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));
312 else
313 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));
314
315 if (Slot != -1)
316 return MUI.isUniform(Reg) ? &Uni[Slot] : &Div[Slot];
317 }
318
319 // Slow search for more complex rules.
320 for (const RegBankLegalizeRule &Rule : Rules) {
321 if (Rule.Predicate.match(MI, MUI, MRI))
322 return &Rule.OperandMapping;
323 }
324
325 return nullptr;
326}
327
329 Rules.push_back(Rule);
330}
331
333 RegBankLLTMapping RuleApplyIDs) {
334 int Slot = getFastPredicateSlot(Ty);
335 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
336 Div[Slot] = std::move(RuleApplyIDs);
337}
338
340 RegBankLLTMapping RuleApplyIDs) {
341 int Slot = getFastPredicateSlot(Ty);
342 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
343 Uni[Slot] = std::move(RuleApplyIDs);
344}
345
346int SetOfRulesForOpcode::getFastPredicateSlot(
348 switch (FastTypes) {
349 case Standard: {
350 switch (Ty) {
351 case S32:
352 return 0;
353 case S16:
354 return 1;
355 case S64:
356 return 2;
357 case V2S16:
358 return 3;
359 default:
360 return -1;
361 }
362 }
363 case StandardB: {
364 switch (Ty) {
365 case B32:
366 return 0;
367 case B64:
368 return 1;
369 case B96:
370 return 2;
371 case B128:
372 return 3;
373 default:
374 return -1;
375 }
376 }
377 case Vector: {
378 switch (Ty) {
379 case S32:
380 return 0;
381 case V2S32:
382 return 1;
383 case V3S32:
384 return 2;
385 case V4S32:
386 return 3;
387 default:
388 return -1;
389 }
390 }
391 default:
392 return -1;
393 }
394}
395
396RegBankLegalizeRules::RuleSetInitializer
397RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
398 FastRulesTypes FastTypes) {
399 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
400}
401
402RegBankLegalizeRules::RuleSetInitializer
403RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
404 FastRulesTypes FastTypes) {
405 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
406}
407
410 unsigned Opc = MI.getOpcode();
411 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
412 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
413 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
414 unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
415 auto IRAIt = IRulesAlias.find(IntrID);
416 if (IRAIt == IRulesAlias.end())
417 return nullptr;
418 return &IRules.at(IRAIt->second);
419 }
420
421 auto GRAIt = GRulesAlias.find(Opc);
422 if (GRAIt == GRulesAlias.end())
423 return nullptr;
424 return &GRules.at(GRAIt->second);
425}
426
427// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
428class Predicate {
429private:
430 struct Elt {
431 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
432 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
433 // Sequences of && and || will be represented by jumps, for example:
434 // (A && B && ... X) or (A && B && ... X) || Y
435 // A == true jump to B
436 // A == false jump to end or Y, result is A(false) or Y
437 // (A || B || ... X) or (A || B || ... X) && Y
438 // A == true jump to end or Y, result is A(true) or Y
439 // A == false jump to B
440 // Notice that when negating expression, we simply flip Neg on each Pred
441 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
442 std::function<bool(const MachineInstr &)> Pred;
443 bool Neg; // Neg of Pred is calculated before jump
444 unsigned TJumpOffset;
445 unsigned FJumpOffset;
446 };
447
448 SmallVector<Elt, 8> Expression;
449
450 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(Expr); };
451
452public:
453 Predicate(std::function<bool(const MachineInstr &)> Pred) {
454 Expression.push_back({Pred, false, 1, 1});
455 };
456
457 bool operator()(const MachineInstr &MI) const {
458 unsigned Idx = 0;
459 unsigned ResultIdx = Expression.size();
460 bool Result;
461 do {
462 Result = Expression[Idx].Pred(MI);
463 Result = Expression[Idx].Neg ? !Result : Result;
464 if (Result) {
465 Idx += Expression[Idx].TJumpOffset;
466 } else {
467 Idx += Expression[Idx].FJumpOffset;
468 }
469 } while ((Idx != ResultIdx));
470
471 return Result;
472 };
473
474 Predicate operator!() const {
475 SmallVector<Elt, 8> NegExpression;
476 for (const Elt &ExprElt : Expression) {
477 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
478 ExprElt.TJumpOffset});
479 }
480 return Predicate(std::move(NegExpression));
481 };
482
483 Predicate operator&&(const Predicate &RHS) const {
484 SmallVector<Elt, 8> AndExpression = Expression;
485
486 unsigned RHSSize = RHS.Expression.size();
487 unsigned ResultIdx = Expression.size();
488 for (unsigned i = 0; i < ResultIdx; ++i) {
489 // LHS results in false, whole expression results in false.
490 if (i + AndExpression[i].FJumpOffset == ResultIdx)
491 AndExpression[i].FJumpOffset += RHSSize;
492 }
493
494 AndExpression.append(RHS.Expression);
495
496 return Predicate(std::move(AndExpression));
497 }
498
499 Predicate operator||(const Predicate &RHS) const {
500 SmallVector<Elt, 8> OrExpression = Expression;
501
502 unsigned RHSSize = RHS.Expression.size();
503 unsigned ResultIdx = Expression.size();
504 for (unsigned i = 0; i < ResultIdx; ++i) {
505 // LHS results in true, whole expression results in true.
506 if (i + OrExpression[i].TJumpOffset == ResultIdx)
507 OrExpression[i].TJumpOffset += RHSSize;
508 }
509
510 OrExpression.append(RHS.Expression);
511
512 return Predicate(std::move(OrExpression));
513 }
514};
515
516// Initialize rules
519 : ST(&_ST), MRI(&_MRI) {
520
521 addRulesForGOpcs({G_ADD, G_SUB}, Standard)
522 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
523 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
524 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
525 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
527 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
528 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}})
529 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}});
530
531 addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
532 .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
533 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
534
535 addRulesForGOpcs({G_UADDE, G_USUBE, G_SADDE, G_SSUBE}, Standard)
537 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
538
539 addRulesForGOpcs({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}, Standard)
540 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
541 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
542 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
543 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
545 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
546
547 bool HasVecMulU64 = ST->hasVectorMulU64();
548 addRulesForGOpcs({G_MUL}, Standard)
549 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
550 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
551 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
552 .Uni(S64, {{SgprB64}, {SgprB64, SgprB64}})
554 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
555 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
556 .Div(S64, {{VgprB64}, {VgprB64, VgprB64}}, HasVecMulU64)
557 .Div(S64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32Mul}, !HasVecMulU64);
558
559 bool hasMulHi = ST->hasScalarMulHiInsts();
560 addRulesForGOpcs({G_UMULH, G_SMULH}, Standard)
561 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
562 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasMulHi)
563 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasMulHi);
564
565 addRulesForGOpcs({G_AMDGPU_MAD_U64_U32}, Standard)
566 .Div(S64, {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}})
568
569 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
570 addRulesForGOpcs({G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32}, Standard)
571 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}, UniMul64}, HasScalarSMulU64)
572 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}, DivSMulToMAD});
573
574 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
576 .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
577 .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
578 .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
579 .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
580 .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
581 .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
582 .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
583
584 addRulesForGOpcs({G_SHL}, Standard)
585 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
586 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
588 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
589 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
590 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
591 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
592 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
593
594 addRulesForGOpcs({G_LSHR}, Standard)
595 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
596 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
598 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
599 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
600 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
601 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
602 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
603
604 addRulesForGOpcs({G_ASHR}, Standard)
605 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
606 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
608 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
609 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
610 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
611 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
612 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
613
614 addRulesForGOpcs({G_FSHR}, Standard)
615 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
616 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
617
618 addRulesForGOpcs({G_BSWAP}, Standard)
619 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
620 .Div(S16, {{Vgpr16}, {Vgpr16}})
621 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
622 .Div(S32, {{Vgpr32}, {Vgpr32}})
623 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}})
624 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}});
625
626 addRulesForGOpcs({G_AMDGPU_CVT_F32_UBYTE0, G_AMDGPU_CVT_F32_UBYTE1,
627 G_AMDGPU_CVT_F32_UBYTE2, G_AMDGPU_CVT_F32_UBYTE3,
628 G_AMDGPU_RCP_IFLAG},
629 Standard)
630 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
631 .Div(S32, {{Vgpr32}, {Vgpr32}});
632
633 addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
634
635 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
636 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
637 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
638 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
639 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
640
641 addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
642 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
643 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
644 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
645 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
647 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
648
649 addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
650 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
651 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
652 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
653 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
655 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
656
657 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT and G_FCONSTANT
658 // here, rest is trivially regbankselected earlier
659 addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
660 addRulesForGOpcs({G_CONSTANT})
661 .Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
662
663 addRulesForGOpcs({G_FREEZE})
664 .Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt}}})
665 .Any({{DivS1}, {{Vcc}, {Vcc}}})
666 .Any({{UniS16}, {{Sgpr16}, {Sgpr16}}})
667 .Any({{UniBRC}, {{SgprBRC}, {SgprBRC}}})
668 .Any({{DivBRC}, {{VgprBRC}, {VgprBRC}}});
669
670 addRulesForGOpcs({G_UNMERGE_VALUES})
671 .Any({{UniS16}, {{}, {}, UnmergeToShiftTrunc}})
672 .Any({{UniBRC}, {{}, {}, VerifyAllSgpr}})
673 .Any({{DivBRC}, {{}, {}, ApplyAllVgpr}});
674
675 addRulesForGOpcs({G_PHI})
676 .Any({{UniS1}, {{}, {}, AextToS32InIncomingBlockGPHI}})
677 .Any({{UniS16}, {{}, {}, VerifyAllSgprGPHI}})
678 .Any({{UniBRC}, {{}, {}, VerifyAllSgprGPHI}})
679 .Any({{DivBRC}, {{}, {}, VerifyAllSgprOrVgprGPHI}});
680
681 addRulesForGOpcs({G_EXTRACT_VECTOR_ELT})
682 .Any({{UniB32, UniBRC, UniS32}, {{SgprB32}, {SgprBRC, Sgpr32}}})
683 .Any({{DivB32, DivBRC, UniS32}, {{VgprB32}, {VgprBRC, Sgpr32}}})
684 .Any({{DivB32, BRC, DivS32},
686 .Any({{UniB64, UniBRC, UniS32}, {{SgprB64}, {SgprBRC, Sgpr32}}})
687 .Any({{DivB64, DivBRC, UniS32},
689 .Any({{DivB64, BRC, DivS32},
691
692 // LOAD {Div}, {{VgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
693 // LOAD {Uni}, {{UniInVgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
694 // LOAD_NORET {}, {{}, {Imm, VgprSrc, ..., Sgpr_WF_RsrcIdx}}
695 // STORE {}, {{}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
696 addRulesForGOpcs({G_AMDGPU_INTRIN_IMAGE_LOAD, G_AMDGPU_INTRIN_IMAGE_LOAD_D16,
697 G_AMDGPU_INTRIN_IMAGE_LOAD_NORET,
698 G_AMDGPU_INTRIN_IMAGE_STORE,
699 G_AMDGPU_INTRIN_IMAGE_STORE_D16})
700 .Any({{}, {{}, {}, ApplyINTRIN_IMAGE}});
701
702 Predicate isSignedICmp([](const MachineInstr &MI) -> bool {
703 auto Pred =
704 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
705 return CmpInst::isSigned(Pred);
706 });
707
708 Predicate isEqualityICmp([](const MachineInstr &MI) -> bool {
709 auto Pred =
710 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
711 return ICmpInst::isEquality(Pred);
712 });
713
714 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
715 // clang-format off
716 addRulesForGOpcs({G_ICMP})
717 .Any({{{UniS1, _, S16}, isEqualityICmp}, {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
718 .Any({{{UniS1, _, S16}, !isEqualityICmp && isSignedICmp}, {{Sgpr32Trunc}, {None, Sgpr32SExt, Sgpr32SExt}}})
719 .Any({{{UniS1, _, S16}, !isEqualityICmp && !isSignedICmp}, {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
720 .Any({{{DivS1, _, S16}}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
721 .Any({{{UniS1, _, S32}}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
722 .Any({{{DivS1, _, S32}}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
723 .Any({{{UniS1, _, S64}, isEqualityICmp}, {{Sgpr32Trunc}, {None, Sgpr64, Sgpr64}}}, HasScalarCompareEq64)
724 .Any({{{UniS1, _, S64}, isEqualityICmp}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}}, !HasScalarCompareEq64)
725 .Any({{{UniS1, _, S64}, !isEqualityICmp}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
726 .Any({{{DivS1, _, S64}}, {{Vcc}, {None, Vgpr64, Vgpr64}}})
727 .Any({{{UniS1, _, Ptr32}}, {{Sgpr32Trunc}, {None, SgprPtr32, SgprPtr32}}})
728 .Any({{{DivS1, _, Ptr32}}, {{Vcc}, {None, VgprPtr32, VgprPtr32}}})
729 .Any({{{UniS1, _, Ptr64}, isEqualityICmp}, {{Sgpr32Trunc}, {None, SgprPtr64, SgprPtr64}}}, HasScalarCompareEq64)
730 .Any({{{UniS1, _, Ptr64}, isEqualityICmp}, {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}}, !HasScalarCompareEq64)
731 .Any({{{UniS1, _, Ptr64}, !isEqualityICmp}, {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}})
732 .Any({{{DivS1, _, Ptr64}}, {{Vcc}, {None, VgprPtr64, VgprPtr64}}});
733 // clang-format on
734
735 addRulesForGOpcs({G_BRCOND})
736 .Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}})
737 .Any({{DivS1}, {{}, {Vcc}}});
738
739 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
740
741 addRulesForGOpcs({G_SELECT}, StandardB)
742 .Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
744 .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
748
749 addRulesForGOpcs({G_ANYEXT})
750 .Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away
751 .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
752 .Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away
753 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
754 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
755 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
756 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
757 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
758 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
759 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
760
761 bool Has16bitCmp = ST->has16BitInsts();
762
763 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
764 // It is up to user to deal with truncated bits.
765 addRulesForGOpcs({G_TRUNC})
766 .Any({{UniS1, UniS16}, {{None}, {None}}}) // should be combined away
767 .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
768 .Any({{UniS1, UniS64}, {{None}, {None}}}) // should be combined away
769 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
770 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
771 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
772 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
773 .Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
774 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
775 // This is non-trivial. VgprToVccCopy is done using compare instruction.
776 .Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}}, Has16bitCmp)
778 !Has16bitCmp)
779 .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
780 .Any({{DivS1, DivS64}, {{Vcc}, {Vgpr64}, VgprToVccCopy}});
781
782 addRulesForGOpcs({G_ZEXT})
786 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
787 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
788 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
789 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
790 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
791 // not extending S16 to S32 is questionable.
792 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
793 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
794 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
795 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
796
797 addRulesForGOpcs({G_SEXT})
801 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
802 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
803 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
804 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
805 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
806 // not extending S16 to S32 is questionable.
807 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
808 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
809 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
810 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
811
812 addRulesForGOpcs({G_SEXT_INREG})
813 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
814 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
815 .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
817
818 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)
819 .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}})
820 .Div(S32, {{Vgpr32}, {Vgpr32, Imm}})
821 .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
822 .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
823
824 addRulesForGOpcs({G_ASSERT_ALIGN}, Standard)
825 .Uni(S32, {{Sgpr32}, {Sgpr32}})
826 .Div(S32, {{Vgpr32}, {Vgpr32}})
827 .Uni(S64, {{Sgpr64}, {Sgpr64}})
828 .Div(S64, {{Vgpr64}, {Vgpr64}})
829 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32}}})
830 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32}}})
831 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64}}})
832 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64}}});
833
834 // Atomic read-modify-write operations: result and value are always VGPR,
835 // pointer varies by address space.
836 addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
837 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
838 G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN,
839 G_ATOMICRMW_UMAX, G_ATOMICRMW_UINC_WRAP,
840 G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX})
841 .Any({{DivS32, P0, S32}, {{Vgpr32}, {VgprP0, Vgpr32}}})
842 .Any({{DivS64, P0, S64}, {{Vgpr64}, {VgprP0, Vgpr64}}})
843 .Any({{DivS32, P1, S32}, {{Vgpr32}, {VgprP1, Vgpr32}}})
844 .Any({{DivS64, P1, S64}, {{Vgpr64}, {VgprP1, Vgpr64}}})
845 .Any({{DivS32, P3, S32}, {{Vgpr32}, {VgprP3, Vgpr32}}})
846 .Any({{DivS64, P3, S64}, {{Vgpr64}, {VgprP3, Vgpr64}}});
847
848 bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
849 bool HasAtomicBufferGlobalPkAddF16Insts =
850 ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
851 ST->hasAtomicBufferGlobalPkAddF16Insts();
852 bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
853 addRulesForGOpcs({G_ATOMICRMW_FADD})
854 .Any({{DivS32, P0, S32}, {{Vgpr32}, {VgprP0, Vgpr32}}})
855 .Any({{DivS64, P0, S64}, {{Vgpr64}, {VgprP0, Vgpr64}}})
856 .Any({{DivS32, P1, S32}, {{Vgpr32}, {VgprP1, Vgpr32}}})
857 .Any({{DivS64, P1, S64}, {{Vgpr64}, {VgprP1, Vgpr64}}})
858 .Any({{DivS32, P3, S32}, {{Vgpr32}, {VgprP3, Vgpr32}}})
859 .Any({{DivS64, P3, S64}, {{Vgpr64}, {VgprP3, Vgpr64}}})
860 .Any({{DivV2S16, P0, V2S16}, {{VgprV2S16}, {VgprP0, VgprV2S16}}},
861 HasAtomicFlatPkAdd16Insts)
862 .Any({{DivV2S16, P1, V2S16}, {{VgprV2S16}, {VgprP1, VgprV2S16}}},
863 HasAtomicBufferGlobalPkAddF16Insts)
864 .Any({{DivV2S16, P3, V2S16}, {{VgprV2S16}, {VgprP3, VgprV2S16}}},
865 HasAtomicDsPkAdd16Insts);
866
867 addRulesForGOpcs({G_ATOMIC_CMPXCHG})
868 .Any({{DivS32, P2}, {{Vgpr32}, {VgprP2, Vgpr32, Vgpr32}}})
869 .Any({{DivS64, P2}, {{Vgpr64}, {VgprP2, Vgpr64, Vgpr64}}})
870 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3, Vgpr32, Vgpr32}}})
871 .Any({{DivS64, P3}, {{Vgpr64}, {VgprP3, Vgpr64, Vgpr64}}});
872
873 addRulesForGOpcs({G_AMDGPU_ATOMIC_CMPXCHG})
874 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0, VgprV2S32}}})
875 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1, VgprV2S32}}})
876 .Any({{DivS64, P0}, {{Vgpr64}, {VgprP0, VgprV2S64}}})
877 .Any({{DivS64, P1}, {{Vgpr64}, {VgprP1, VgprV2S64}}});
878
879 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_CMPSWAP}, Standard)
880 .Div(S32, {{Vgpr32},
882 .Div(S64, {{Vgpr64},
884
885 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_ADD, G_AMDGPU_BUFFER_ATOMIC_AND,
886 G_AMDGPU_BUFFER_ATOMIC_DEC, G_AMDGPU_BUFFER_ATOMIC_FMAX,
887 G_AMDGPU_BUFFER_ATOMIC_FMIN, G_AMDGPU_BUFFER_ATOMIC_INC,
888 G_AMDGPU_BUFFER_ATOMIC_OR, G_AMDGPU_BUFFER_ATOMIC_SMAX,
889 G_AMDGPU_BUFFER_ATOMIC_SMIN, G_AMDGPU_BUFFER_ATOMIC_SUB,
890 G_AMDGPU_BUFFER_ATOMIC_SWAP, G_AMDGPU_BUFFER_ATOMIC_UMAX,
891 G_AMDGPU_BUFFER_ATOMIC_UMIN, G_AMDGPU_BUFFER_ATOMIC_XOR},
892 Standard)
895
896 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
897 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
898 bool usesTrue16 = ST->useRealTrue16Insts();
899
900 Predicate isAlign16([](const MachineInstr &MI) -> bool {
901 return (*MI.memoperands_begin())->getAlign() >= Align(16);
902 });
903
904 Predicate isAlign4([](const MachineInstr &MI) -> bool {
905 return (*MI.memoperands_begin())->getAlign() >= Align(4);
906 });
907
908 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
909 return (*MI.memoperands_begin())->isAtomic();
910 });
911
912 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
913 return AMDGPU::isUniformMMO(*MI.memoperands_begin());
914 });
915
916 Predicate isConst([](const MachineInstr &MI) -> bool {
917 // Address space in MMO be different then address space on pointer.
918 const MachineMemOperand *MMO = *MI.memoperands_begin();
919 const unsigned AS = MMO->getAddrSpace();
920 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
922 });
923
924 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
925 return (*MI.memoperands_begin())->isVolatile();
926 });
927
928 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
929 return (*MI.memoperands_begin())->isInvariant();
930 });
931
932 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
933 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
934 });
935
936 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
937 const MachineMemOperand *MMO = *MI.memoperands_begin();
938 return MMO->getAlign() >= Align(MMO->getSize().getValue());
939 });
940
941 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
942 const MachineMemOperand *MMO = *MI.memoperands_begin();
943 const unsigned MemSize = 8 * MMO->getSize().getValue();
944 return MemSize == 16 || MemSize == 8;
945 });
946
947 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
948 const MachineMemOperand *MMO = *MI.memoperands_begin();
949 return 8 * MMO->getSize().getValue() == 32;
950 });
951
952 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
953 (isConst || isInvMMO || isNoClobberMMO);
954
955 // clang-format off
956 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
957 addRulesForGOpcs({G_LOAD})
958 // flat, addrspace(0), never uniform - flat_load
959 .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
960 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
961 .Any({{DivB64, P0}, {{VgprB64}, {VgprP0}}})
962 .Any({{DivB96, P0}, {{VgprB96}, {VgprP0}}})
963 .Any({{DivB128, P0}, {{VgprB128}, {VgprP0}}})
964
965 // global, addrspace(1)
966 // divergent - global_load
967 .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
968 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
969 .Any({{DivB64, P1}, {{VgprB64}, {VgprP1}}})
970 .Any({{DivB96, P1}, {{VgprB96}, {VgprP1}}})
971 .Any({{DivB128, P1}, {{VgprB128}, {VgprP1}}})
972 .Any({{DivB256, P1}, {{VgprB256}, {VgprP1}, SplitLoad}})
973 .Any({{DivB512, P1}, {{VgprB512}, {VgprP1}, SplitLoad}})
974
975 // uniform - s_load
976 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
977 .Any({{{UniS16, P1}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
978 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
979 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
980 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
981 .Any({{{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}}}) //32-bit load
982 .Any({{{UniB64, P1}, isAlign4 && isUL}, {{SgprB64}, {SgprP1}}})
983 .Any({{{UniB96, P1}, isAlign16 && isUL}, {{SgprB96}, {SgprP1}, WidenLoad}}, !hasSMRDx3)
984 .Any({{{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP1}, SplitLoad}}, !hasSMRDx3)
985 .Any({{{UniB96, P1}, isAlign4 && isUL}, {{SgprB96}, {SgprP1}}}, hasSMRDx3)
986 .Any({{{UniB128, P1}, isAlign4 && isUL}, {{SgprB128}, {SgprP1}}})
987 .Any({{{UniB256, P1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
988 .Any({{{UniB512, P1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
989
990 // Uniform via global or buffer load, for example volatile or non-aligned
991 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
992 // selected as global_load, use SgprP1 for pointer instead to match
993 // patterns without flat-for-global, default for GFX7 and older.
994 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
995 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
996 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
997 .Any({{{UniS16, P1}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && !hasSMRDSmall) // s16 load
998 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
999 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1000 .Any({{{UniB64, P1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
1001 .Any({{{UniB96, P1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
1002 .Any({{{UniB128, P1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
1003 .Any({{{UniB256, P1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP1}, SplitLoad}})
1004 .Any({{{UniB512, P1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP1}, SplitLoad}})
1005
1006 // local, addrspace(3) - ds_load
1007 .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
1008 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1009 .Any({{DivB64, P3}, {{VgprB64}, {VgprP3}}})
1010 .Any({{DivB96, P3}, {{VgprB96}, {VgprP3}}})
1011 .Any({{DivB128, P3}, {{VgprB128}, {VgprP3}}})
1012
1013 .Any({{UniS16, P3}, {{UniInVgprS16}, {SgprP3}}}, usesTrue16) // 16-bit load
1014 .Any({{UniB32, P3}, {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1015 .Any({{UniB64, P3}, {{UniInVgprB64}, {VgprP3}}})
1016 .Any({{UniB96, P3}, {{UniInVgprB96}, {VgprP3}}})
1017 .Any({{UniB128, P3}, {{UniInVgprB128}, {VgprP3}}})
1018
1019 // constant, addrspace(4)
1020 // divergent - global_load
1021 .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
1022 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
1023 .Any({{DivB64, P4}, {{VgprB64}, {VgprP4}}})
1024 .Any({{DivB96, P4}, {{VgprB96}, {VgprP4}}})
1025 .Any({{DivB128, P4}, {{VgprB128}, {VgprP4}}})
1026 .Any({{DivB256, P4}, {{VgprB256}, {VgprP4}, SplitLoad}})
1027 .Any({{DivB512, P4}, {{VgprB512}, {VgprP4}, SplitLoad}})
1028
1029 // uniform - s_load
1030 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
1031 .Any({{{UniS16, P4}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
1032 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1033 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
1034 .Any({{{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) //32-bit load
1035 .Any({{{UniB64, P4}, isAlign4 && isUL}, {{SgprB64}, {SgprP4}}})
1036 .Any({{{UniB96, P4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasSMRDx3)
1037 .Any({{{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasSMRDx3)
1038 .Any({{{UniB96, P4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasSMRDx3)
1039 .Any({{{UniB128, P4}, isAlign4 && isUL}, {{SgprB128}, {SgprP4}}})
1040 .Any({{{UniB256, P4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}})
1041 .Any({{{UniB512, P4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}})
1042
1043 // uniform in vgpr - global_load or buffer_load
1044 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
1045 .Any({{{UniS16, P4}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && !hasSMRDSmall) // s16 load
1046 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1047 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1048 .Any({{{UniB64, P4}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP4}}})
1049 .Any({{{UniB96, P4}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP4}}})
1050 .Any({{{UniB128, P4}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP4}}})
1051 .Any({{{UniB256, P4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP4}, SplitLoad}})
1052 .Any({{{UniB512, P4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP4}, SplitLoad}})
1053
1054 // private, addrspace(5), never uniform - scratch_load
1055 .Any({{DivS16, P5}, {{Vgpr16}, {VgprP5}}}, usesTrue16)
1056 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1057 .Any({{DivB64, P5}, {{VgprB64}, {VgprP5}}})
1058 .Any({{DivB96, P5}, {{VgprB96}, {VgprP5}}})
1059 .Any({{DivB128, P5}, {{VgprB128}, {VgprP5}}})
1060
1061 .Any({{DivS32, Ptr128}, {{Vgpr32}, {VgprPtr128}}});
1062
1063
1064 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
1065 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0}}})
1066
1067 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1}}})
1068 .Any({{{UniS32, P1}, isAlign4 && isUL}, {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)
1069 .Any({{{UniS32, P1}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP1}}}, hasSMRDSmall)
1070 .Any({{{UniS32, P1}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP1}}}, !hasSMRDSmall)
1071 .Any({{{UniS32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP1}}}, hasSMRDSmall)
1072
1073 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3}}})
1074 .Any({{UniS32, P3}, {{UniInVgprS32}, {VgprP3}}})
1075
1076 .Any({{DivS32, P4}, {{Vgpr32}, {VgprP4}}})
1077 .Any({{{UniS32, P4}, isAlign4 && isUL}, {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)
1078 .Any({{{UniS32, P4}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP4}}}, hasSMRDSmall)
1079 .Any({{{UniS32, P4}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP4}}}, !hasSMRDSmall)
1080 .Any({{{UniS32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP4}}}, hasSMRDSmall)
1081
1082 .Any({{DivS32, P5}, {{Vgpr32}, {VgprP5}}});
1083
1084 addRulesForGOpcs({G_STORE})
1085 // addrspace(0)
1086 .Any({{S16, P0}, {{}, {Vgpr16, VgprP0}}}, usesTrue16) // 16-bit store
1087 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
1088 .Any({{B64, P0}, {{}, {VgprB64, VgprP0}}})
1089 .Any({{B96, P0}, {{}, {VgprB96, VgprP0}}})
1090 .Any({{B128, P0}, {{}, {VgprB128, VgprP0}}})
1091
1092 // addrspace(1), there are no stores to addrspace(4)
1093 // For targets:
1094 // - with "+flat-for-global" - global_store
1095 // - without(-flat-for-global) - buffer_store addr64
1096 .Any({{S16, DivP1}, {{}, {Vgpr16, VgprP1}}}, usesTrue16) // 16-bit store
1097 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1098 .Any({{B64, DivP1}, {{}, {VgprB64, VgprP1}}})
1099 .Any({{B96, DivP1}, {{}, {VgprB96, VgprP1}}})
1100 .Any({{B128, DivP1}, {{}, {VgprB128, VgprP1}}})
1101
1102 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
1103 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
1104 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
1105 .Any({{S16, UniP1}, {{}, {Vgpr16, SgprP1}}}, usesTrue16) // 16-bit store
1106 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1107 .Any({{B64, UniP1}, {{}, {VgprB64, SgprP1}}})
1108 .Any({{B96, UniP1}, {{}, {VgprB96, SgprP1}}})
1109 .Any({{B128, UniP1}, {{}, {VgprB128, SgprP1}}})
1110
1111 // addrspace(3) and addrspace(5)
1112 .Any({{S16, Ptr32}, {{}, {Vgpr16, VgprPtr32}}}, usesTrue16) // 16-bit store
1113 .Any({{B32, Ptr32}, {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
1114 .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
1115 .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
1116 .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
1117
1118 // clang-format on
1119
1120 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1121 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1122 StandardB)
1131
1132 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1133 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1134 StandardB)
1137
1138 addRulesForGOpcs(
1139 {G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, G_AMDGPU_BUFFER_LOAD_USHORT_TFE},
1140 StandardB)
1143
1144 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_TFE, G_AMDGPU_BUFFER_LOAD_FORMAT_TFE},
1145 StandardB)
1153 .Any({{UniB160},
1155
1156 addRulesForGOpcs(
1157 {G_AMDGPU_BUFFER_LOAD_FORMAT_D16, G_AMDGPU_TBUFFER_LOAD_FORMAT_D16},
1158 StandardB)
1165
1166 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
1167 G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
1168 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1169 G_AMDGPU_TBUFFER_STORE_FORMAT,
1170 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1171 .Any({{B32}, {{}, {VgprB32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1172 .Any({{B64}, {{}, {VgprB64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1173 .Any({{B96}, {{}, {VgprB96, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1174 .Any({{B128}, {{}, {VgprB128, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1175
1176 // Buffer atomics: resource descriptor + scalar offset are SGPR, data and
1177 // address components are VGPR.
1178 //
1179 // Operand order (SIInstructions.td BufferAtomicGenericInstruction):
1180 // dst = op vdata, rsrc, vindex, voffset, soffset, offset_imm, cachepolicy,
1181 // idxen_imm
1182 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_FADD})
1183 .Any({{S32, S32, V4S32, S32, S32, S32},
1185 .Any({{S64, S64, V4S32, S32, S32, S32},
1187 .Any({{V2S16, V2S16, V4S32, S32, S32, S32},
1188 {{VgprV2S16},
1190
1191 addRulesForGOpcs({G_PTR_ADD})
1192 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
1193 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
1194 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
1195 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
1196
1197 addRulesForGOpcs({G_INTTOPTR})
1198 .Any({{UniPtr32}, {{SgprPtr32}, {Sgpr32}}})
1199 .Any({{DivPtr32}, {{VgprPtr32}, {Vgpr32}}})
1200 .Any({{UniPtr64}, {{SgprPtr64}, {Sgpr64}}})
1201 .Any({{DivPtr64}, {{VgprPtr64}, {Vgpr64}}})
1202 .Any({{UniPtr128}, {{SgprPtr128}, {Sgpr128}}})
1203 .Any({{DivPtr128}, {{VgprPtr128}, {Vgpr128}}});
1204
1205 addRulesForGOpcs({G_PTRTOINT})
1206 .Any({{UniS32}, {{Sgpr32}, {SgprPtr32}}})
1207 .Any({{DivS32}, {{Vgpr32}, {VgprPtr32}}})
1208 .Any({{UniS64}, {{Sgpr64}, {SgprPtr64}}})
1209 .Any({{DivS64}, {{Vgpr64}, {VgprPtr64}}})
1210 .Any({{UniS128}, {{Sgpr128}, {SgprPtr128}}})
1211 .Any({{DivS128}, {{Vgpr128}, {VgprPtr128}}});
1212
1213 // FIXME: Update llvm/test/CodeGen/AMDGPU/ptrmask.ll to use GlobalISel.
1214 // Currently crashes on P8 (buffer resource) tests due to legalizer issue.
1215 addRulesForGOpcs({G_PTRMASK})
1216 .Any({{UniP1}, {{SgprP1}, {SgprP1, Sgpr64}}})
1217 .Any({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}})
1218 .Any({{UniP3}, {{SgprP3}, {SgprP3, Sgpr32}}})
1219 .Any({{DivP3}, {{VgprP3}, {VgprP3, Vgpr32}}});
1220
1221 addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
1222
1223 addRulesForGOpcs({G_BITREVERSE}, Standard)
1224 .Uni(S32, {{Sgpr32}, {Sgpr32}})
1225 .Div(S32, {{Vgpr32}, {Vgpr32}})
1226 .Uni(S64, {{Sgpr64}, {Sgpr64}})
1227 .Div(S64, {{Vgpr64}, {Vgpr64}});
1228
1229 addRulesForGOpcs({G_AMDGPU_FFBH_U32, G_AMDGPU_FFBL_B32, G_CTLZ_ZERO_UNDEF,
1230 G_CTTZ_ZERO_UNDEF})
1231 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
1232 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1233 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
1235
1236 addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
1237
1238 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
1239 .Uni(S64, {{Sgpr64}, {}});
1240
1241 addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
1242
1243 addRulesForGOpcs({G_GLOBAL_VALUE})
1244 .Any({{UniP0}, {{SgprP0}, {}}})
1245 .Any({{UniP1}, {{SgprP1}, {}}})
1246 .Any({{UniP3}, {{SgprP3}, {}}})
1247 .Any({{UniP4}, {{SgprP4}, {}}})
1248 .Any({{UniP8}, {{SgprP8}, {}}});
1249
1250 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
1251
1252 addRulesForGOpcs({G_SI_CALL})
1253 .Any({{_, UniP0}, {{None}, {SgprP0}}})
1254 .Any({{_, DivP0}, {{None}, {SgprP0Call_WF}}})
1255 .Any({{_, UniP4}, {{None}, {SgprP4}}})
1256 .Any({{_, DivP4}, {{None}, {SgprP4Call_WF}}});
1257
1258 bool hasSALUFloat = ST->hasSALUFloatInsts();
1259
1260 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
1261 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1262 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1263 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1264 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1265 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
1266 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1267 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1268 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1269 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
1271 hasSALUFloat)
1272 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1273
1274 addRulesForGOpcs({G_FSUB, G_STRICT_FSUB}, Standard)
1275 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1276 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1277 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1278 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1279 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1280 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
1281
1282 addRulesForGOpcs({G_FMAD}, Standard)
1283 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1284 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1285 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1286 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1287
1288 addRulesForGOpcs({G_FLDEXP, G_STRICT_FLDEXP}, Standard)
1289 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1290 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1291 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
1292 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1293 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr32}})
1294 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
1295
1296 addRulesForGOpcs({G_FMA, G_STRICT_FMA}, Standard)
1297 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1298 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
1299 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64, Vgpr64}})
1300 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr64}})
1304 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16, Sgpr16}}, hasSALUFloat)
1305 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}}, !hasSALUFloat)
1306 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}}, hasSALUFloat)
1307 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}, !hasSALUFloat)
1308 .Uni(V2S16,
1310 hasSALUFloat)
1312 !hasSALUFloat);
1313
1314 addRulesForGOpcs({G_AMDGPU_FMED3}, Standard)
1315 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1316 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1317 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1318 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1319
1320 // TODO: This opcode is generated from the i64->i16 signed clamped pattern in
1321 // the PreLegalizerCombiner. Move the combine to RegBankCombiner to keep more
1322 // instructions on SALU.
1323 addRulesForGOpcs({G_AMDGPU_SMED3}, Standard)
1324 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1325 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1326
1327 // FNEG and FABS are either folded as source modifiers or can be selected as
1328 // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
1329 // targets without SALU float we still select them as VGPR since there would
1330 // be no real sgpr use.
1331 addRulesForGOpcs({G_FNEG, G_FABS}, Standard)
1332 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasSALUFloat)
1333 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasSALUFloat)
1334 .Div(S16, {{Vgpr16}, {Vgpr16}})
1335 .Uni(S32, {{UniInVgprS32}, {Vgpr32}}, !hasSALUFloat)
1336 .Uni(S32, {{Sgpr32}, {Sgpr32}}, hasSALUFloat)
1337 .Div(S32, {{Vgpr32}, {Vgpr32}})
1338 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1339 .Div(S64, {{Vgpr64}, {Vgpr64}})
1340 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}}, !hasSALUFloat)
1341 .Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, hasSALUFloat)
1342 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1343 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1344 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1345
1346 addRulesForGOpcs({G_FCANONICALIZE}, Standard)
1347 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
1348 .Div(S32, {{Vgpr32}, {Vgpr32}})
1349 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
1350 .Div(S16, {{Vgpr16}, {Vgpr16}})
1351 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1352 .Div(S64, {{Vgpr64}, {Vgpr64}})
1353 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}})
1354 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1355 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1356 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1357
1358 bool hasPST = ST->hasPseudoScalarTrans();
1359 addRulesForGOpcs({G_FSQRT}, Standard)
1360 .Div(S16, {{Vgpr16}, {Vgpr16}})
1361 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasPST)
1362 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasPST);
1363
1364 addRulesForGOpcs({G_FPTOUI, G_FPTOSI})
1365 .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
1366 .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
1367 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1368 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat)
1369 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1370 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1371 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
1372 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1373 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1374 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}});
1375
1376 addRulesForGOpcs({G_UITOFP, G_SITOFP})
1377 .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
1378 .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
1379 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1380 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat)
1381 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1382 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1383 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
1384 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1385 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1386 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}});
1387
1388 addRulesForGOpcs({G_FPEXT})
1389 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1390 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1391 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}})
1392 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1393 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat);
1394
1395 addRulesForGOpcs({G_AMDGPU_CVT_PK_I16_I32}, Standard)
1396 .Uni(V2S16, {{UniInVgprV2S16}, {Vgpr32, Vgpr32}})
1397 .Div(V2S16, {{VgprV2S16}, {Vgpr32, Vgpr32}});
1398
1399 addRulesForGOpcs({G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY}, Standard)
1400 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1401 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
1402
1403 bool hasSALUMinimumMaximumInsts = ST->hasSALUMinimumMaximumInsts();
1404
1405 addRulesForGOpcs({G_FMINIMUM, G_FMAXIMUM}, Standard)
1406 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUMinimumMaximumInsts)
1407 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUMinimumMaximumInsts)
1408 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1409 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUMinimumMaximumInsts)
1410 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUMinimumMaximumInsts)
1411 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1412 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1413 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1415 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1416
1417 addRulesForGOpcs({G_FMINNUM_IEEE, G_FMAXNUM_IEEE, G_FMINNUM, G_FMAXNUM},
1418 Standard)
1419 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1420 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1421 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1422 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1424 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
1425 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1426 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1427 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1428 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
1429
1430 addRulesForGOpcs({G_FPTRUNC})
1431 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1432 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1433 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
1435 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
1436 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1437 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat);
1438
1439 addRulesForGOpcs({G_IS_FPCLASS})
1440 .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
1441 .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
1442 .Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}})
1443 .Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}})
1444 .Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
1445 .Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});
1446
1447 addRulesForGOpcs({G_FCMP}, Standard)
1448 .Any({{UniS1, _, S16}, {{Sgpr32Trunc}, {None, Sgpr16, Sgpr16}}},
1449 hasSALUFloat)
1450 .Any({{UniS1, _, S16}, {{UniInVcc}, {None, Vgpr16, Vgpr16}}},
1451 !hasSALUFloat)
1452 .Any({{DivS1, _, S16}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
1453 .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}},
1454 hasSALUFloat)
1455 .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}},
1456 !hasSALUFloat)
1457 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
1458 .Any({{UniS1, _, S64}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
1459 .Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
1460
1461 addRulesForGOpcs({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUNDEVEN, G_FFLOOR, G_FCEIL,
1462 G_FEXP2, G_FLOG2},
1463 Standard)
1464 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
1465 .Div(S16, {{Vgpr16}, {Vgpr16}})
1466 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
1467 .Div(S32, {{Vgpr32}, {Vgpr32}})
1468 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1469 .Div(S64, {{Vgpr64}, {Vgpr64}});
1470
1471 using namespace Intrinsic;
1472
1473 addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
1474
1475 addRulesForIOpcs({amdgcn_s_getreg}).Any({{}, {{Sgpr32}, {IntrId, Imm}}});
1476
1477 addRulesForIOpcs({amdgcn_s_setreg})
1478 .Any({{_, _, S32}, {{}, {IntrId, Imm, SgprB32_ReadFirstLane}}});
1479
1480 addRulesForIOpcs({amdgcn_s_sendmsg, amdgcn_s_sendmsghalt})
1481 .Any({{}, {{}, {IntrId, Imm, SgprB32_M0}}});
1482
1483 addRulesForIOpcs({amdgcn_s_sendmsg_rtn})
1484 .Any({{S32}, {{Sgpr32}, {}}})
1485 .Any({{S64}, {{Sgpr64}, {}}});
1486
1487 addRulesForIOpcs({amdgcn_s_memrealtime, amdgcn_s_memtime}, Standard)
1488 .Uni(S64, {{Sgpr64}, {IntrId}});
1489
1490 addRulesForIOpcs({amdgcn_groupstaticsize, amdgcn_pops_exiting_wave_id,
1491 amdgcn_reloc_constant, amdgcn_s_get_waveid_in_workgroup},
1492 Standard)
1493 .Uni(S32, {{Sgpr32}, {IntrId}});
1494
1495 // Intrinsics with no register operands.
1496 addRulesForIOpcs({amdgcn_endpgm,
1497 amdgcn_s_barrier,
1498 amdgcn_s_barrier_signal,
1499 amdgcn_s_barrier_wait,
1500 amdgcn_s_nop,
1501 amdgcn_s_sethalt,
1502 amdgcn_s_setprio,
1503 amdgcn_s_sleep,
1504 amdgcn_s_ttracedata_imm,
1505 amdgcn_s_wait_asynccnt,
1506 amdgcn_s_wait_bvhcnt,
1507 amdgcn_s_wait_dscnt,
1508 amdgcn_s_wait_event,
1509 amdgcn_s_wait_event_export_ready,
1510 amdgcn_s_wait_expcnt,
1511 amdgcn_s_wait_kmcnt,
1512 amdgcn_s_wait_loadcnt,
1513 amdgcn_s_wait_samplecnt,
1514 amdgcn_s_wait_storecnt,
1515 amdgcn_s_wait_tensorcnt,
1516 amdgcn_s_waitcnt,
1517 amdgcn_wave_barrier})
1518 .Any({{}, {{}, {}}});
1519
1520 addRulesForIOpcs({amdgcn_s_ttracedata}).Any({{}, {{}, {IntrId, SgprB32_M0}}});
1521
1522 addRulesForIOpcs({amdgcn_s_sleep_var})
1523 .Any({{}, {{}, {IntrId, SgprB32_ReadFirstLane}}});
1524
1525 addRulesForIOpcs({amdgcn_s_prefetch_data})
1527
1528 addRulesForIOpcs({amdgcn_class})
1529 .Any({{UniS1, _, S16}, {{UniInVcc}, {IntrId, Vgpr16, Vgpr32}}})
1530 .Any({{DivS1, _, S16}, {{Vcc}, {IntrId, Vgpr16, Vgpr32}}})
1531 .Any({{UniS1, _, S32}, {{UniInVcc}, {IntrId, Vgpr32, Vgpr32}}})
1532 .Any({{DivS1, _, S32}, {{Vcc}, {IntrId, Vgpr32, Vgpr32}}})
1533 .Any({{UniS1, _, S64}, {{UniInVcc}, {IntrId, Vgpr64, Vgpr32}}})
1534 .Any({{DivS1, _, S64}, {{Vcc}, {IntrId, Vgpr64, Vgpr32}}});
1535
1536 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
1537 addRulesForIOpcs({amdgcn_end_cf})
1538 .Any({{_, UniS32}, {{}, {IntrId, Sgpr32}}})
1539 .Any({{_, UniS64}, {{}, {IntrId, Sgpr64}}});
1540
1541 addRulesForIOpcs({amdgcn_if_break}, Standard)
1542 .Uni(S64, {{Sgpr64}, {IntrId, Vcc, Sgpr64}})
1543 .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
1544
1545 addRulesForIOpcs({amdgcn_exp})
1546 .Any({{_, _, _, S32, S32, S32, S32},
1547 {{}, {IntrId, Imm, Imm, Vgpr32, Vgpr32, Vgpr32, Vgpr32}}});
1548
1549 addRulesForIOpcs({amdgcn_exp_compr})
1550 .Any({{_, _, _, V2S16}, {{}, {IntrId, Imm, Imm, VgprV2S16, VgprV2S16}}});
1551
1552 addRulesForIOpcs({amdgcn_exp_row})
1553 .Any({{_, _, _, S32, S32, S32, S32, _, S32},
1554 {{},
1556 SgprB32_M0}}});
1557
1558 addRulesForIOpcs({amdgcn_lds_direct_load}, StandardB)
1559 .Div(B32, {{VgprB32}, {IntrId, SgprB32_M0}});
1560
1561 addRulesForIOpcs({amdgcn_lds_param_load}, Standard)
1562 .Div(S32, {{Vgpr32}, {IntrId, Imm, Imm, SgprB32_M0}});
1563
1564 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
1565 .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
1566
1567 addRulesForIOpcs({amdgcn_readfirstlane})
1568 .Any({{UniB32, _, DivB32}, {{}, {SgprB32, None, VgprB32}}})
1569 // this should not exist in the first place, it is from call lowering
1570 // readfirstlaning just in case register is not in sgpr.
1571 .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
1572
1573 addRulesForIOpcs({amdgcn_readlane}, StandardB)
1575
1576 addRulesForIOpcs({amdgcn_writelane}, StandardB)
1577 .Div(B32,
1578 {{VgprB32},
1580
1581 addRulesForIOpcs({amdgcn_permlane16, amdgcn_permlanex16}, Standard)
1582 .Div(S32, {{Vgpr32},
1585
1586 addRulesForIOpcs({amdgcn_perm}, Standard)
1587 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1588 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1589
1590 addRulesForIOpcs({amdgcn_wave_reduce_umax, amdgcn_wave_reduce_umin}, Standard)
1591 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}})
1592 .Div(S32, {{Sgpr32ToVgprDst}, {IntrId, VgprB32}})
1593 .Uni(S64, {{Sgpr64}, {IntrId, Sgpr64}})
1594 .Div(S64, {{Sgpr64ToVgprDst}, {IntrId, VgprB64}});
1595
1596 addRulesForIOpcs({amdgcn_bitop3, amdgcn_fmad_ftz}, Standard)
1597 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1598 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1599 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1600 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1601
1602 addRulesForIOpcs({amdgcn_udot4, amdgcn_sdot4, amdgcn_udot8, amdgcn_sdot8,
1603 amdgcn_dot4_f32_bf8_bf8, amdgcn_dot4_f32_bf8_fp8,
1604 amdgcn_dot4_f32_fp8_fp8, amdgcn_dot4_f32_fp8_bf8},
1605 Standard)
1606 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1607 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1608
1609 addRulesForIOpcs({amdgcn_mul_u24, amdgcn_mul_i24}, Standard)
1610 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1611 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}})
1612 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr32, Vgpr32}})
1613 .Div(S64, {{Vgpr64}, {IntrId, Vgpr32, Vgpr32}});
1614
1615 addRulesForIOpcs({amdgcn_ds_bpermute, amdgcn_ds_bpermute_fi_b32,
1616 amdgcn_ds_permute, amdgcn_fmul_legacy, amdgcn_mulhi_i24,
1617 amdgcn_mulhi_u24},
1618 Standard)
1619 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1620 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1621
1622 addRulesForIOpcs({amdgcn_cubesc, amdgcn_cubetc, amdgcn_cubema, amdgcn_cubeid,
1623 amdgcn_fma_legacy},
1624 Standard)
1625 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1626 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1627
1628 addRulesForIOpcs({amdgcn_frexp_mant, amdgcn_fract}, Standard)
1629 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}})
1630 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1631 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
1632 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1633 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64}})
1634 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64}});
1635
1636 addRulesForIOpcs({amdgcn_prng_b32})
1637 .Any({{UniS32}, {{UniInVgprS32}, {IntrId, Vgpr32}}})
1638 .Any({{DivS32}, {{Vgpr32}, {IntrId, Vgpr32}}});
1639
1640 addRulesForIOpcs({amdgcn_sffbh}, Standard)
1641 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}})
1642 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}});
1643
1644 addRulesForIOpcs({amdgcn_ubfe, amdgcn_sbfe}, Standard)
1645 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1646 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32, Sgpr32, Sgpr32}, S_BFE})
1647 .Uni(S64, {{Sgpr64}, {IntrId, Sgpr64, Sgpr32, Sgpr32}, S_BFE})
1648 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr32, Vgpr32}, V_BFE});
1649
1650 addRulesForIOpcs({amdgcn_cvt_pk_i16, amdgcn_cvt_pk_u16, amdgcn_cvt_pknorm_i16,
1651 amdgcn_cvt_pknorm_u16, amdgcn_cvt_pkrtz},
1652 Standard)
1653 .Uni(V2S16, {{UniInVgprV2S16}, {IntrId, Vgpr32, Vgpr32}})
1654 .Div(V2S16, {{VgprV2S16}, {IntrId, Vgpr32, Vgpr32}});
1655
1656 addRulesForIOpcs({amdgcn_global_load_tr_b64})
1657 .Any({{DivB64}, {{VgprB64}, {IntrId, SgprP1}}})
1658 .Any({{DivB32}, {{VgprB32}, {IntrId, SgprP1}}});
1659
1660 addRulesForIOpcs({amdgcn_global_load_tr_b128})
1661 .Any({{DivB64}, {{VgprB64}, {IntrId, SgprP1}}})
1662 .Any({{DivB128}, {{VgprB128}, {IntrId, SgprP1}}});
1663
1664 addRulesForIOpcs({amdgcn_global_atomic_ordered_add_b64})
1665 .Any({{DivS64}, {{Vgpr64}, {IntrId, VgprP1, Vgpr64}}});
1666
1667 addRulesForIOpcs(
1668 {amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num}, Standard)
1669 .Div(S32, {{Vgpr32}, {IntrId, VgprP1, Vgpr32}});
1670
1671 addRulesForIOpcs({amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num},
1672 Standard)
1673 .Div(S32, {{Vgpr32}, {IntrId, VgprP0, Vgpr32}});
1674
1675 addRulesForIOpcs({amdgcn_raw_buffer_load_lds})
1676 .Any({{_}, {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Sgpr32}}});
1677
1678 addRulesForIOpcs({amdgcn_struct_buffer_load_lds})
1679 .Any({{_},
1680 {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1681
1682 addRulesForIOpcs({amdgcn_raw_ptr_buffer_load_lds})
1683 .Any({{_}, {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Sgpr32}}});
1684
1685 addRulesForIOpcs({amdgcn_struct_ptr_buffer_load_lds})
1686 .Any({{_}, {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1687
1688 addRulesForIOpcs({amdgcn_global_load_lds})
1689 .Any({{}, {{}, {IntrId, VgprP1, SgprB32_M0}}});
1690
1691 addRulesForIOpcs({amdgcn_wwm, amdgcn_strict_wwm, amdgcn_wqm, amdgcn_softwqm,
1692 amdgcn_strict_wqm},
1693 StandardB)
1694 .Div(B32, {{VgprB32}, {IntrId, VgprB32}})
1695 .Uni(B32, {{SgprB32}, {IntrId, SgprB32}})
1696 .Div(B64, {{VgprB64}, {IntrId, VgprB64}})
1697 .Uni(B64, {{SgprB64}, {IntrId, SgprB64}})
1698 .Div(B96, {{VgprB96}, {IntrId, VgprB96}})
1699 .Uni(B96, {{SgprB96}, {IntrId, SgprB96}})
1700 .Div(B128, {{VgprB128}, {IntrId, VgprB128}})
1701 .Uni(B128, {{SgprB128}, {IntrId, SgprB128}})
1702 .Any({{UniB256}, {{SgprB256}, {IntrId, SgprB256}}})
1703 .Any({{DivB256}, {{VgprB256}, {IntrId, VgprB256}}})
1704 .Any({{UniB512}, {{SgprB512}, {IntrId, SgprB512}}})
1705 .Any({{DivB512}, {{VgprB512}, {IntrId, VgprB512}}});
1706
1707 addRulesForIOpcs({amdgcn_wqm_demote}).Any({{}, {{}, {IntrId, Vcc}}});
1708
1709 addRulesForIOpcs({amdgcn_inverse_ballot})
1710 .Any({{DivS1, _, S32}, {{Vcc}, {IntrId, SgprB32_ReadFirstLane}}})
1711 .Any({{DivS1, _, S64}, {{Vcc}, {IntrId, SgprB64_ReadFirstLane}}});
1712
1713 addRulesForIOpcs({amdgcn_live_mask, amdgcn_ps_live})
1714 .Any({{DivS1}, {{Vcc}, {}}});
1715
1716 addRulesForIOpcs({amdgcn_mov_dpp, amdgcn_mov_dpp8}, StandardB)
1717 .Div(B32, {{VgprB32}, {IntrId, VgprB32}})
1718 .Div(B64, {{VgprB64}, {IntrId, VgprB64}});
1719
1720 addRulesForIOpcs({amdgcn_update_dpp}, StandardB)
1721 .Div(B32, {{VgprB32}, {IntrId, VgprB32, VgprB32}})
1722 .Div(B64, {{VgprB64}, {IntrId, VgprB64, VgprB64}});
1723
1724 addRulesForIOpcs({amdgcn_sin, amdgcn_cos}, Standard)
1725 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1726 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}})
1727 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1728 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}});
1729
1730 addRulesForIOpcs({amdgcn_trig_preop}, Standard)
1731 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr32}})
1732 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr32}});
1733
1734 addRulesForIOpcs({amdgcn_ds_add_gs_reg_rtn, amdgcn_ds_sub_gs_reg_rtn},
1735 Standard)
1736 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1737 .Div(S64, {{Vgpr64}, {IntrId, Vgpr32}});
1738
1739 addRulesForIOpcs({amdgcn_ds_append, amdgcn_ds_consume}, Standard)
1740 .Uni(S32, {{UniInVgprS32}, {IntrId, SgprB32_M0}})
1741 .Div(S32, {{Vgpr32}, {IntrId, SgprB32_M0}});
1742
1743 addRulesForIOpcs(
1744 {amdgcn_ds_bvh_stack_rtn, amdgcn_ds_bvh_stack_push4_pop1_rtn}, Standard)
1745 .Div(S32, {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV4S32}});
1746
1747 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop1_rtn}, Standard)
1748 .Div(S32, {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV8S32}});
1749
1750 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop2_rtn}, Standard)
1751 .Div(S64, {{Vgpr64, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV8S32}});
1752
1753 addRulesForIOpcs({amdgcn_ds_ordered_add, amdgcn_ds_ordered_swap}, Standard)
1754 .Div(S32, {{Vgpr32}, {IntrId, SgprB32_M0, Vgpr32}});
1755
1756 addRulesForIOpcs({amdgcn_ds_swizzle}, Standard)
1757 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
1758 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}});
1759
1760 addRulesForIOpcs({amdgcn_permlane16_var, amdgcn_permlanex16_var}, Standard)
1761 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1762
1763 addRulesForIOpcs({amdgcn_permlane16_swap, amdgcn_permlane32_swap}, Standard)
1764 .Div(S32, {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1765
1766 addRulesForIOpcs({amdgcn_permlane64}, StandardB)
1767 .Div(B32, {{VgprB32}, {IntrId, VgprB32}});
1768
1769 addRulesForIOpcs({amdgcn_ds_read_tr4_b64, amdgcn_ds_read_tr8_b64})
1770 .Any({{DivV2S32}, {{VgprV2S32}, {IntrId, VgprP3}}});
1771
1772 addRulesForIOpcs({amdgcn_ds_read_tr6_b96})
1773 .Any({{DivV3S32}, {{VgprV3S32}, {IntrId, VgprP3}}});
1774
1775 addRulesForIOpcs({amdgcn_ds_read_tr16_b64})
1776 .Any({{DivV4S16}, {{VgprV4S16}, {IntrId, VgprP3}}});
1777
1778 addRulesForIOpcs({amdgcn_interp_inreg_p10, amdgcn_interp_inreg_p2,
1779 amdgcn_interp_inreg_p10_f16, amdgcn_interp_p10_rtz_f16},
1780 Standard)
1781 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1782 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1783
1784 addRulesForIOpcs({amdgcn_interp_inreg_p2_f16, amdgcn_interp_p2_rtz_f16},
1785 Standard)
1786 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1787 .Div(S16, {{Vgpr16}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1788
1789 addRulesForIOpcs({amdgcn_div_fmas}, Standard)
1790 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32, Vcc}})
1791 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32, Vcc}})
1792 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr64, Vgpr64, Vcc}})
1793 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr64, Vgpr64, Vcc}});
1794
1795 addRulesForIOpcs({amdgcn_div_fixup}, Standard)
1796 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1797 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1798 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1799 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1800 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr64, Vgpr64}})
1801 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr64, Vgpr64}});
1802
1803 addRulesForIOpcs({amdgcn_div_scale}, Standard)
1804 .Div(S32, {{Vgpr32, Vcc}, {IntrId, Vgpr32, Vgpr32}})
1805 .Uni(S32, {{UniInVgprS32, UniInVcc}, {IntrId, Vgpr32, Vgpr32}})
1806 .Div(S64, {{Vgpr64, Vcc}, {IntrId, Vgpr64, Vgpr64}})
1807 .Uni(S64, {{UniInVgprS64, UniInVcc}, {IntrId, Vgpr64, Vgpr64}});
1808
1809 addRulesForIOpcs({amdgcn_udot2, amdgcn_sdot2}, Standard)
1811 .Div(S32, {{Vgpr32}, {IntrId, VgprV2S16, VgprV2S16, Vgpr32}});
1812
1813 addRulesForIOpcs({amdgcn_sudot4, amdgcn_sudot8}, Standard)
1814 .Uni(S32, {{UniInVgprS32}, {IntrId, Imm, Vgpr32, Imm, Vgpr32, Vgpr32}})
1815 .Div(S32, {{Vgpr32}, {IntrId, Imm, Vgpr32, Imm, Vgpr32, Vgpr32}});
1816
1817} // end initialize rules
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
constexpr LLT S16
constexpr LLT S1
constexpr LLT V2S16
constexpr LLT S32
constexpr LLT V4S32
constexpr LLT V3S32
constexpr LLT S64
constexpr LLT V2S32
constexpr LLT S128
UniformityLLTOpPredicateID LLTToBId(LLT Ty)
bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI)
UniformityLLTOpPredicateID LLTToId(LLT Ty)
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
Register Reg
Register const TargetRegisterInfo * TRI
Machine IR instance of the generic uniformity analysis.
bool operator()(const MachineInstr &MI) const
Predicate operator||(const Predicate &RHS) const
Predicate operator&&(const Predicate &RHS) const
Predicate(std::function< bool(const MachineInstr &)> Pred)
Predicate operator!() const
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
const SetOfRulesForOpcode * getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping * findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
void addFastRuleUniform(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
bool isSigned() const
Definition InstrTypes.h:930
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
bool isEquality() const
Return true if this predicate is either EQ or NE.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
TypeSize getValue() const
Representation of each machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const TargetRegisterInfo * getTargetRegisterInfo() const
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void swap(SmallVectorImpl &RHS)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isAnyPtr(LLT Ty, unsigned Width)
bool isUniformMMO(const MachineMemOperand *MMO)
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
Definition SIInstrInfo.h:44
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
SmallVector< UniformityLLTOpPredicateID, 4 > OpUniformityAndTypes
PredicateMapping(std::initializer_list< UniformityLLTOpPredicateID > OpList, std::function< bool(const MachineInstr &)> TestFunc=nullptr)
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI) const
std::function< bool(const MachineInstr &)> TestFunc
RegBankLLTMapping(std::initializer_list< RegBankLLTMappingApplyID > DstOpMappingList, std::initializer_list< RegBankLLTMappingApplyID > SrcOpMappingList, LoweringMethodID LoweringMethod=DoNotLower)
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39