LLVM 22.0.0git
AMDGPURegBankLegalizeRules.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
39
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
44
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(0, 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(1, 64);
63 case P3:
64 return MRI.getType(Reg) == LLT::pointer(3, 32);
65 case P4:
66 return MRI.getType(Reg) == LLT::pointer(4, 64);
67 case P5:
68 return MRI.getType(Reg) == LLT::pointer(5, 32);
69 case P8:
70 return MRI.getType(Reg) == LLT::pointer(8, 128);
71 case Ptr32:
72 return isAnyPtr(MRI.getType(Reg), 32);
73 case Ptr64:
74 return isAnyPtr(MRI.getType(Reg), 64);
75 case Ptr128:
76 return isAnyPtr(MRI.getType(Reg), 128);
77 case V2S32:
78 return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
79 case V4S32:
80 return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
81 case B32:
82 return MRI.getType(Reg).getSizeInBits() == 32;
83 case B64:
84 return MRI.getType(Reg).getSizeInBits() == 64;
85 case B96:
86 return MRI.getType(Reg).getSizeInBits() == 96;
87 case B128:
88 return MRI.getType(Reg).getSizeInBits() == 128;
89 case B256:
90 return MRI.getType(Reg).getSizeInBits() == 256;
91 case B512:
92 return MRI.getType(Reg).getSizeInBits() == 512;
93 case UniS1:
94 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniform(Reg);
95 case UniS16:
96 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniform(Reg);
97 case UniS32:
98 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg);
99 case UniS64:
100 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg);
101 case UniS128:
102 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isUniform(Reg);
103 case UniP0:
104 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniform(Reg);
105 case UniP1:
106 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg);
107 case UniP3:
108 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniform(Reg);
109 case UniP4:
110 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
111 case UniP5:
112 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
113 case UniP8:
114 return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniform(Reg);
115 case UniPtr32:
116 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
117 case UniPtr64:
118 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isUniform(Reg);
119 case UniPtr128:
120 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
121 case UniV2S16:
122 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
123 case UniV2S32:
124 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isUniform(Reg);
125 case UniB32:
126 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
127 case UniB64:
128 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(Reg);
129 case UniB96:
130 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(Reg);
131 case UniB128:
132 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(Reg);
133 case UniB256:
134 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg);
135 case UniB512:
136 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
137 case DivS1:
138 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg);
139 case DivS16:
140 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg);
141 case DivS32:
142 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
143 case DivS64:
144 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg);
145 case DivS128:
146 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isDivergent(Reg);
147 case DivP0:
148 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergent(Reg);
149 case DivP1:
150 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg);
151 case DivP3:
152 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergent(Reg);
153 case DivP4:
154 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg);
155 case DivP5:
156 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg);
157 case DivPtr32:
158 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isDivergent(Reg);
159 case DivPtr64:
160 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isDivergent(Reg);
161 case DivPtr128:
162 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
163 case DivV2S16:
164 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
165 case DivV2S32:
166 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isDivergent(Reg);
167 case DivB32:
168 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
169 case DivB64:
170 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(Reg);
171 case DivB96:
172 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(Reg);
173 case DivB128:
174 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(Reg);
175 case DivB256:
176 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(Reg);
177 case DivB512:
178 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg);
179 case _:
180 return true;
181 default:
182 llvm_unreachable("missing matchUniformityAndLLT");
183 }
184}
185
187 const MachineUniformityInfo &MUI,
188 const MachineRegisterInfo &MRI) const {
189 // Check LLT signature.
190 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
191 if (OpUniformityAndTypes[i] == _) {
192 if (MI.getOperand(i).isReg())
193 return false;
194 continue;
195 }
196
197 // Remaining IDs check registers.
198 if (!MI.getOperand(i).isReg())
199 return false;
200
201 if (!matchUniformityAndLLT(MI.getOperand(i).getReg(),
202 OpUniformityAndTypes[i], MUI, MRI))
203 return false;
204 }
205
206 // More complex check.
207 if (TestFunc)
208 return TestFunc(MI);
209
210 return true;
211}
212
214
216 : FastTypes(FastTypes) {}
217
219 if (Ty == LLT::scalar(16))
220 return S16;
221 if (Ty == LLT::scalar(32))
222 return S32;
223 if (Ty == LLT::scalar(64))
224 return S64;
225 if (Ty == LLT::fixed_vector(2, 16))
226 return V2S16;
227 if (Ty == LLT::fixed_vector(2, 32))
228 return V2S32;
229 if (Ty == LLT::fixed_vector(3, 32))
230 return V3S32;
231 if (Ty == LLT::fixed_vector(4, 32))
232 return V4S32;
233 return _;
234}
235
237 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
238 isAnyPtr(Ty, 32))
239 return B32;
240 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
241 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
242 return B64;
243 if (Ty == LLT::fixed_vector(3, 32))
244 return B96;
245 if (Ty == LLT::fixed_vector(4, 32) || isAnyPtr(Ty, 128))
246 return B128;
247 return _;
248}
249
250const RegBankLLTMapping *
253 const MachineUniformityInfo &MUI) const {
254 // Search in "Fast Rules".
255 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
256 // slot that could "match fast Predicate". If not, InvalidMapping is
257 // returned which results in failure, does not search "Slow Rules".
258 if (FastTypes != NoFastRules) {
259 Register Reg = MI.getOperand(0).getReg();
260 int Slot;
261 if (FastTypes == StandardB)
262 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));
263 else
264 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));
265
266 if (Slot != -1)
267 return MUI.isUniform(Reg) ? &Uni[Slot] : &Div[Slot];
268 }
269
270 // Slow search for more complex rules.
271 for (const RegBankLegalizeRule &Rule : Rules) {
272 if (Rule.Predicate.match(MI, MUI, MRI))
273 return &Rule.OperandMapping;
274 }
275
276 return nullptr;
277}
278
280 Rules.push_back(Rule);
281}
282
284 RegBankLLTMapping RuleApplyIDs) {
285 int Slot = getFastPredicateSlot(Ty);
286 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
287 Div[Slot] = RuleApplyIDs;
288}
289
291 RegBankLLTMapping RuleApplyIDs) {
292 int Slot = getFastPredicateSlot(Ty);
293 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
294 Uni[Slot] = RuleApplyIDs;
295}
296
297int SetOfRulesForOpcode::getFastPredicateSlot(
299 switch (FastTypes) {
300 case Standard: {
301 switch (Ty) {
302 case S32:
303 return 0;
304 case S16:
305 return 1;
306 case S64:
307 return 2;
308 case V2S16:
309 return 3;
310 default:
311 return -1;
312 }
313 }
314 case StandardB: {
315 switch (Ty) {
316 case B32:
317 return 0;
318 case B64:
319 return 1;
320 case B96:
321 return 2;
322 case B128:
323 return 3;
324 default:
325 return -1;
326 }
327 }
328 case Vector: {
329 switch (Ty) {
330 case S32:
331 return 0;
332 case V2S32:
333 return 1;
334 case V3S32:
335 return 2;
336 case V4S32:
337 return 3;
338 default:
339 return -1;
340 }
341 }
342 default:
343 return -1;
344 }
345}
346
347RegBankLegalizeRules::RuleSetInitializer
348RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
349 FastRulesTypes FastTypes) {
350 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
351}
352
353RegBankLegalizeRules::RuleSetInitializer
354RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
355 FastRulesTypes FastTypes) {
356 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
357}
358
361 unsigned Opc = MI.getOpcode();
362 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
363 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
364 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
365 unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
366 auto IRAIt = IRulesAlias.find(IntrID);
367 if (IRAIt == IRulesAlias.end())
368 return nullptr;
369 return &IRules.at(IRAIt->second);
370 }
371
372 auto GRAIt = GRulesAlias.find(Opc);
373 if (GRAIt == GRulesAlias.end())
374 return nullptr;
375 return &GRules.at(GRAIt->second);
376}
377
378// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
379class Predicate {
380private:
381 struct Elt {
382 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
383 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
384 // Sequences of && and || will be represented by jumps, for example:
385 // (A && B && ... X) or (A && B && ... X) || Y
386 // A == true jump to B
387 // A == false jump to end or Y, result is A(false) or Y
388 // (A || B || ... X) or (A || B || ... X) && Y
389 // A == true jump to end or Y, result is A(true) or Y
390 // A == false jump to B
391 // Notice that when negating expression, we simply flip Neg on each Pred
392 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
393 std::function<bool(const MachineInstr &)> Pred;
394 bool Neg; // Neg of Pred is calculated before jump
395 unsigned TJumpOffset;
396 unsigned FJumpOffset;
397 };
398
399 SmallVector<Elt, 8> Expression;
400
401 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(Expr); };
402
403public:
404 Predicate(std::function<bool(const MachineInstr &)> Pred) {
405 Expression.push_back({Pred, false, 1, 1});
406 };
407
408 bool operator()(const MachineInstr &MI) const {
409 unsigned Idx = 0;
410 unsigned ResultIdx = Expression.size();
411 bool Result;
412 do {
413 Result = Expression[Idx].Pred(MI);
414 Result = Expression[Idx].Neg ? !Result : Result;
415 if (Result) {
416 Idx += Expression[Idx].TJumpOffset;
417 } else {
418 Idx += Expression[Idx].FJumpOffset;
419 }
420 } while ((Idx != ResultIdx));
421
422 return Result;
423 };
424
425 Predicate operator!() const {
426 SmallVector<Elt, 8> NegExpression;
427 for (const Elt &ExprElt : Expression) {
428 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
429 ExprElt.TJumpOffset});
430 }
431 return Predicate(std::move(NegExpression));
432 };
433
434 Predicate operator&&(const Predicate &RHS) const {
435 SmallVector<Elt, 8> AndExpression = Expression;
436
437 unsigned RHSSize = RHS.Expression.size();
438 unsigned ResultIdx = Expression.size();
439 for (unsigned i = 0; i < ResultIdx; ++i) {
440 // LHS results in false, whole expression results in false.
441 if (i + AndExpression[i].FJumpOffset == ResultIdx)
442 AndExpression[i].FJumpOffset += RHSSize;
443 }
444
445 AndExpression.append(RHS.Expression);
446
447 return Predicate(std::move(AndExpression));
448 }
449
450 Predicate operator||(const Predicate &RHS) const {
451 SmallVector<Elt, 8> OrExpression = Expression;
452
453 unsigned RHSSize = RHS.Expression.size();
454 unsigned ResultIdx = Expression.size();
455 for (unsigned i = 0; i < ResultIdx; ++i) {
456 // LHS results in true, whole expression results in true.
457 if (i + OrExpression[i].TJumpOffset == ResultIdx)
458 OrExpression[i].TJumpOffset += RHSSize;
459 }
460
461 OrExpression.append(RHS.Expression);
462
463 return Predicate(std::move(OrExpression));
464 }
465};
466
467// Initialize rules
470 : ST(&_ST), MRI(&_MRI) {
471
472 addRulesForGOpcs({G_ADD, G_SUB}, Standard)
473 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
474 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
475 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
476 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
478 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
479 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}})
480 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}});
481
482 addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
483 .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
484 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
485
486 addRulesForGOpcs({G_UADDE, G_USUBE}, Standard)
488 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
489
490 addRulesForGOpcs({G_MUL}, Standard).Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
491
492 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
494 .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
495 .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
496 .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
497 .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
498 .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
499 .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
500 .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
501
502 addRulesForGOpcs({G_SHL}, Standard)
503 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
504 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
506 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
507 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
508 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
509 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
510 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
511
512 addRulesForGOpcs({G_LSHR}, Standard)
513 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
514 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
516 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
517 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
518 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
519 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
520 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
521
522 addRulesForGOpcs({G_ASHR}, Standard)
523 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
524 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
526 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
527 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
528 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
529 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
530 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
531
532 addRulesForGOpcs({G_FSHR}, Standard)
533 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
534 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
535
536 addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
537
538 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
539 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
540 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
541 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
542 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
543
544 addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
545 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
546 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
547 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
548 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
550 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
551
552 addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
553 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
554 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
555 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
556 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
558 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
559
560 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
561 // and G_FREEZE here, rest is trivially regbankselected earlier
562 addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
563 addRulesForGOpcs({G_CONSTANT})
564 .Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
565 addRulesForGOpcs({G_FREEZE}).Any({{DivS1}, {{Vcc}, {Vcc}}});
566
567 addRulesForGOpcs({G_ICMP})
568 .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
569 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
570 .Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
571
572 addRulesForGOpcs({G_FCMP})
573 .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}})
574 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}});
575
576 addRulesForGOpcs({G_BRCOND})
577 .Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}})
578 .Any({{DivS1}, {{}, {Vcc}}});
579
580 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
581
582 addRulesForGOpcs({G_SELECT}, StandardB)
583 .Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
585 .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
589
590 addRulesForGOpcs({G_ANYEXT})
591 .Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away
592 .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
593 .Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away
594 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
595 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
596 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
597 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
598 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
599 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
600 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
601
602 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
603 // It is up to user to deal with truncated bits.
604 addRulesForGOpcs({G_TRUNC})
605 .Any({{UniS1, UniS16}, {{None}, {None}}}) // should be combined away
606 .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
607 .Any({{UniS1, UniS64}, {{None}, {None}}}) // should be combined away
608 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
609 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
610 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
611 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
612 .Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
613 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
614 // This is non-trivial. VgprToVccCopy is done using compare instruction.
615 .Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}})
616 .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
617 .Any({{DivS1, DivS64}, {{Vcc}, {Vgpr64}, VgprToVccCopy}});
618
619 addRulesForGOpcs({G_ZEXT})
623 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
624 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
625 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
626 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
627 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
628 // not extending S16 to S32 is questionable.
629 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
630 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
631 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
632 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
633
634 addRulesForGOpcs({G_SEXT})
638 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
639 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
640 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
641 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
642 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
643 // not extending S16 to S32 is questionable.
644 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
645 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
646 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
647 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
648
649 addRulesForGOpcs({G_SEXT_INREG})
650 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
651 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
652 .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
654
655 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)
656 .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}})
657 .Div(S32, {{Vgpr32}, {Vgpr32, Imm}})
658 .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
659 .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
660
661 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
662 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
663 bool usesTrue16 = ST->useRealTrue16Insts();
664
665 Predicate isAlign16([](const MachineInstr &MI) -> bool {
666 return (*MI.memoperands_begin())->getAlign() >= Align(16);
667 });
668
669 Predicate isAlign4([](const MachineInstr &MI) -> bool {
670 return (*MI.memoperands_begin())->getAlign() >= Align(4);
671 });
672
673 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
674 return (*MI.memoperands_begin())->isAtomic();
675 });
676
677 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
678 return AMDGPU::isUniformMMO(*MI.memoperands_begin());
679 });
680
681 Predicate isConst([](const MachineInstr &MI) -> bool {
682 // Address space in MMO be different then address space on pointer.
683 const MachineMemOperand *MMO = *MI.memoperands_begin();
684 const unsigned AS = MMO->getAddrSpace();
685 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
687 });
688
689 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
690 return (*MI.memoperands_begin())->isVolatile();
691 });
692
693 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
694 return (*MI.memoperands_begin())->isInvariant();
695 });
696
697 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
698 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
699 });
700
701 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
702 const MachineMemOperand *MMO = *MI.memoperands_begin();
703 return MMO->getAlign() >= Align(MMO->getSize().getValue());
704 });
705
706 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
707 const MachineMemOperand *MMO = *MI.memoperands_begin();
708 const unsigned MemSize = 8 * MMO->getSize().getValue();
709 return MemSize == 16 || MemSize == 8;
710 });
711
712 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
713 const MachineMemOperand *MMO = *MI.memoperands_begin();
714 return 8 * MMO->getSize().getValue() == 32;
715 });
716
717 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
718 (isConst || isInvMMO || isNoClobberMMO);
719
720 // clang-format off
721 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
722 addRulesForGOpcs({G_LOAD})
723 // flat, addrspace(0), never uniform - flat_load
724 .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
725 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
726 .Any({{DivB64, P0}, {{VgprB64}, {VgprP0}}})
727 .Any({{DivB96, P0}, {{VgprB96}, {VgprP0}}})
728 .Any({{DivB128, P0}, {{VgprB128}, {VgprP0}}})
729
730 // global, addrspace(1)
731 // divergent - global_load
732 .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
733 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
734 .Any({{DivB64, P1}, {{VgprB64}, {VgprP1}}})
735 .Any({{DivB96, P1}, {{VgprB96}, {VgprP1}}})
736 .Any({{DivB128, P1}, {{VgprB128}, {VgprP1}}})
737 .Any({{DivB256, P1}, {{VgprB256}, {VgprP1}, SplitLoad}})
738 .Any({{DivB512, P1}, {{VgprB512}, {VgprP1}, SplitLoad}})
739
740 // uniform - s_load
741 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
742 .Any({{{UniS16, P1}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
743 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
744 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
745 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
746 .Any({{{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}}}) //32-bit load
747 .Any({{{UniB64, P1}, isAlign4 && isUL}, {{SgprB64}, {SgprP1}}})
748 .Any({{{UniB96, P1}, isAlign16 && isUL}, {{SgprB96}, {SgprP1}, WidenLoad}}, !hasSMRDx3)
749 .Any({{{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP1}, SplitLoad}}, !hasSMRDx3)
750 .Any({{{UniB96, P1}, isAlign4 && isUL}, {{SgprB96}, {SgprP1}}}, hasSMRDx3)
751 .Any({{{UniB128, P1}, isAlign4 && isUL}, {{SgprB128}, {SgprP1}}})
752 .Any({{{UniB256, P1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
753 .Any({{{UniB512, P1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
754
755 // Uniform via global or buffer load, for example volatile or non-aligned
756 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
757 // selected as global_load, use SgprP1 for pointer instead to match
758 // patterns without flat-for-global, default for GFX7 and older.
759 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
760 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
761 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
762 .Any({{{UniS16, P1}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && !hasSMRDSmall) // s16 load
763 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
764 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
765 .Any({{{UniB64, P1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
766 .Any({{{UniB96, P1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
767 .Any({{{UniB128, P1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
768 .Any({{{UniB256, P1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP1}, SplitLoad}})
769 .Any({{{UniB512, P1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP1}, SplitLoad}})
770
771 // local, addrspace(3) - ds_load
772 .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
773 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
774 .Any({{DivB64, P3}, {{VgprB64}, {VgprP3}}})
775 .Any({{DivB96, P3}, {{VgprB96}, {VgprP3}}})
776 .Any({{DivB128, P3}, {{VgprB128}, {VgprP3}}})
777
778 .Any({{UniS16, P3}, {{UniInVgprS16}, {SgprP3}}}, usesTrue16) // 16-bit load
779 .Any({{UniB32, P3}, {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
780 .Any({{UniB64, P3}, {{UniInVgprB64}, {VgprP3}}})
781 .Any({{UniB96, P3}, {{UniInVgprB96}, {VgprP3}}})
782 .Any({{UniB128, P3}, {{UniInVgprB128}, {VgprP3}}})
783
784 // constant, addrspace(4)
785 // divergent - global_load
786 .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
787 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
788 .Any({{DivB64, P4}, {{VgprB64}, {VgprP4}}})
789 .Any({{DivB96, P4}, {{VgprB96}, {VgprP4}}})
790 .Any({{DivB128, P4}, {{VgprB128}, {VgprP4}}})
791 .Any({{DivB256, P4}, {{VgprB256}, {VgprP4}, SplitLoad}})
792 .Any({{DivB512, P4}, {{VgprB512}, {VgprP4}, SplitLoad}})
793
794 // uniform - s_load
795 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
796 .Any({{{UniS16, P4}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
797 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
798 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
799 .Any({{{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) //32-bit load
800 .Any({{{UniB64, P4}, isAlign4 && isUL}, {{SgprB64}, {SgprP4}}})
801 .Any({{{UniB96, P4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasSMRDx3)
802 .Any({{{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasSMRDx3)
803 .Any({{{UniB96, P4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasSMRDx3)
804 .Any({{{UniB128, P4}, isAlign4 && isUL}, {{SgprB128}, {SgprP4}}})
805 .Any({{{UniB256, P4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}})
806 .Any({{{UniB512, P4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}})
807
808 // uniform in vgpr - global_load or buffer_load
809 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
810 .Any({{{UniS16, P4}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && !hasSMRDSmall) // s16 load
811 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
812 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
813 .Any({{{UniB64, P4}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP4}}})
814 .Any({{{UniB96, P4}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP4}}})
815 .Any({{{UniB128, P4}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP4}}})
816 .Any({{{UniB256, P4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP4}, SplitLoad}})
817 .Any({{{UniB512, P4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP4}, SplitLoad}})
818
819 // private, addrspace(5), never uniform - scratch_load
820 .Any({{DivS16, P5}, {{Vgpr16}, {VgprP5}}}, usesTrue16)
821 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
822 .Any({{DivB64, P5}, {{VgprB64}, {VgprP5}}})
823 .Any({{DivB96, P5}, {{VgprB96}, {VgprP5}}})
824 .Any({{DivB128, P5}, {{VgprB128}, {VgprP5}}})
825
826 .Any({{DivS32, Ptr128}, {{Vgpr32}, {VgprPtr128}}});
827
828
829 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
830 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0}}})
831
832 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1}}})
833 .Any({{{UniS32, P1}, isAlign4 && isUL}, {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)
834 .Any({{{UniS32, P1}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP1}}}, hasSMRDSmall)
835 .Any({{{UniS32, P1}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP1}}}, !hasSMRDSmall)
836 .Any({{{UniS32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP1}}}, hasSMRDSmall)
837
838 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3}}})
839 .Any({{UniS32, P3}, {{UniInVgprS32}, {VgprP3}}})
840
841 .Any({{DivS32, P4}, {{Vgpr32}, {VgprP4}}})
842 .Any({{{UniS32, P4}, isAlign4 && isUL}, {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)
843 .Any({{{UniS32, P4}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP4}}}, hasSMRDSmall)
844 .Any({{{UniS32, P4}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP4}}}, !hasSMRDSmall)
845 .Any({{{UniS32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP4}}}, hasSMRDSmall)
846
847 .Any({{DivS32, P5}, {{Vgpr32}, {VgprP5}}});
848
849 addRulesForGOpcs({G_STORE})
850 // addrspace(0)
851 .Any({{S16, P0}, {{}, {Vgpr16, VgprP0}}}, usesTrue16) // 16-bit store
852 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
853 .Any({{B64, P0}, {{}, {VgprB64, VgprP0}}})
854 .Any({{B96, P0}, {{}, {VgprB96, VgprP0}}})
855 .Any({{B128, P0}, {{}, {VgprB128, VgprP0}}})
856
857 // addrspace(1), there are no stores to addrspace(4)
858 // For targets:
859 // - with "+flat-for-global" - global_store
860 // - without(-flat-for-global) - buffer_store addr64
861 .Any({{S16, DivP1}, {{}, {Vgpr16, VgprP1}}}, usesTrue16) // 16-bit store
862 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
863 .Any({{B64, DivP1}, {{}, {VgprB64, VgprP1}}})
864 .Any({{B96, DivP1}, {{}, {VgprB96, VgprP1}}})
865 .Any({{B128, DivP1}, {{}, {VgprB128, VgprP1}}})
866
867 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
868 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
869 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
870 .Any({{S16, UniP1}, {{}, {Vgpr16, SgprP1}}}, usesTrue16) // 16-bit store
871 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
872 .Any({{B64, UniP1}, {{}, {VgprB64, SgprP1}}})
873 .Any({{B96, UniP1}, {{}, {VgprB96, SgprP1}}})
874 .Any({{B128, UniP1}, {{}, {VgprB128, SgprP1}}})
875
876 // addrspace(3) and addrspace(5)
877 .Any({{S16, Ptr32}, {{}, {Vgpr16, VgprPtr32}}}, usesTrue16) // 16-bit store
878 .Any({{B32, Ptr32}, {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
879 .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
880 .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
881 .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
882 // clang-format on
883
884 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
885 G_AMDGPU_TBUFFER_LOAD_FORMAT},
886 StandardB)
895
896 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
897 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
898 StandardB)
901
902 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE})
903 .Any({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
904
905 addRulesForGOpcs({G_PTR_ADD})
906 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
907 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
908 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
909 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
910
911 addRulesForGOpcs({G_INTTOPTR})
912 .Any({{UniPtr32}, {{SgprPtr32}, {Sgpr32}}})
913 .Any({{DivPtr32}, {{VgprPtr32}, {Vgpr32}}})
914 .Any({{UniPtr64}, {{SgprPtr64}, {Sgpr64}}})
915 .Any({{DivPtr64}, {{VgprPtr64}, {Vgpr64}}})
916 .Any({{UniPtr128}, {{SgprPtr128}, {Sgpr128}}})
917 .Any({{DivPtr128}, {{VgprPtr128}, {Vgpr128}}});
918
919 addRulesForGOpcs({G_PTRTOINT})
920 .Any({{UniS32}, {{Sgpr32}, {SgprPtr32}}})
921 .Any({{DivS32}, {{Vgpr32}, {VgprPtr32}}})
922 .Any({{UniS64}, {{Sgpr64}, {SgprPtr64}}})
923 .Any({{DivS64}, {{Vgpr64}, {VgprPtr64}}})
924 .Any({{UniS128}, {{Sgpr128}, {SgprPtr128}}})
925 .Any({{DivS128}, {{Vgpr128}, {VgprPtr128}}});
926
927 addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
928
929 addRulesForGOpcs({G_BITREVERSE}, Standard)
930 .Uni(S32, {{Sgpr32}, {Sgpr32}})
931 .Div(S32, {{Vgpr32}, {Vgpr32}})
932 .Uni(S64, {{Sgpr64}, {Sgpr64}})
933 .Div(S64, {{Vgpr64}, {Vgpr64}});
934
935 addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
936
937 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
938 .Uni(S64, {{Sgpr64}, {}});
939
940 addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
941
942 addRulesForGOpcs({G_GLOBAL_VALUE})
943 .Any({{UniP0}, {{SgprP0}, {}}})
944 .Any({{UniP1}, {{SgprP1}, {}}})
945 .Any({{UniP3}, {{SgprP3}, {}}})
946 .Any({{UniP4}, {{SgprP4}, {}}})
947 .Any({{UniP8}, {{SgprP8}, {}}});
948
949 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
950
951 bool hasSALUFloat = ST->hasSALUFloatInsts();
952
953 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
954 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
955 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
956 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
957 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
958 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
959 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
960 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
961 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
962 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
964 hasSALUFloat)
965 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
966
967 addRulesForGOpcs({G_FSUB}, Standard)
968 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
969 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
970 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
971 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
972 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
973 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
974
975 addRulesForGOpcs({G_FMAD}, Standard)
976 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
977 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
978 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
979 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
980
981 addRulesForGOpcs({G_FMA}, Standard)
982 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
983 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
984 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64, Vgpr64}})
985 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr64}})
989 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16, Sgpr16}}, hasSALUFloat)
990 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}}, !hasSALUFloat)
991 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}}, hasSALUFloat)
992 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}, !hasSALUFloat)
993 .Uni(V2S16,
995 hasSALUFloat)
997 !hasSALUFloat);
998
999 // FNEG and FABS are either folded as source modifiers or can be selected as
1000 // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
1001 // targets without SALU float we still select them as VGPR since there would
1002 // be no real sgpr use.
1003 addRulesForGOpcs({G_FNEG, G_FABS}, Standard)
1004 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasSALUFloat)
1005 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasSALUFloat)
1006 .Div(S16, {{Vgpr16}, {Vgpr16}})
1007 .Uni(S32, {{UniInVgprS32}, {Vgpr32}}, !hasSALUFloat)
1008 .Uni(S32, {{Sgpr32}, {Sgpr32}}, hasSALUFloat)
1009 .Div(S32, {{Vgpr32}, {Vgpr32}})
1010 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1011 .Div(S64, {{Vgpr64}, {Vgpr64}})
1012 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}}, !hasSALUFloat)
1013 .Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, hasSALUFloat)
1014 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1015 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1016 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1017
1018 addRulesForGOpcs({G_FPTOUI})
1019 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1020 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
1021
1022 addRulesForGOpcs({G_UITOFP})
1023 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1024 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1025 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
1026
1027 addRulesForGOpcs({G_FPEXT})
1028 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1029 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1030 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}})
1031 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1032 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat);
1033
1034 addRulesForGOpcs({G_FPTRUNC})
1035 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1036 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1037 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
1039 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
1040 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1041 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat);
1042
1043 addRulesForGOpcs({G_IS_FPCLASS})
1044 .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
1045 .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
1046 .Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}})
1047 .Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}})
1048 .Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
1049 .Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});
1050
1051 using namespace Intrinsic;
1052
1053 addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
1054
1055 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
1056 addRulesForIOpcs({amdgcn_end_cf}).Any({{_, S32}, {{}, {None, Sgpr32}}});
1057
1058 addRulesForIOpcs({amdgcn_if_break}, Standard)
1059 .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
1060
1061 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
1062 .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
1063
1064 addRulesForIOpcs({amdgcn_readfirstlane})
1065 .Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}})
1066 // this should not exist in the first place, it is from call lowering
1067 // readfirstlaning just in case register is not in sgpr.
1068 .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
1069
1070} // end initialize rules
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
constexpr LLT S16
constexpr LLT S1
constexpr LLT V2S16
constexpr LLT S32
constexpr LLT V4S32
constexpr LLT V3S32
constexpr LLT S64
constexpr LLT V2S32
constexpr LLT S128
UniformityLLTOpPredicateID LLTToBId(LLT Ty)
bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI)
UniformityLLTOpPredicateID LLTToId(LLT Ty)
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
Register Reg
Machine IR instance of the generic uniformity analysis.
bool operator()(const MachineInstr &MI) const
Predicate operator||(const Predicate &RHS) const
Predicate operator&&(const Predicate &RHS) const
Predicate(std::function< bool(const MachineInstr &)> Pred)
Predicate operator!() const
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
const SetOfRulesForOpcode * getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping * findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
void addFastRuleUniform(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
TypeSize getValue() const
Representation of each machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void swap(SmallVectorImpl &RHS)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isAnyPtr(LLT Ty, unsigned Width)
bool isUniformMMO(const MachineMemOperand *MMO)
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
Definition SIInstrInfo.h:44
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
SmallVector< UniformityLLTOpPredicateID, 4 > OpUniformityAndTypes
PredicateMapping(std::initializer_list< UniformityLLTOpPredicateID > OpList, std::function< bool(const MachineInstr &)> TestFunc=nullptr)
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI) const
std::function< bool(const MachineInstr &)> TestFunc
RegBankLLTMapping(std::initializer_list< RegBankLLTMappingApplyID > DstOpMappingList, std::initializer_list< RegBankLLTMappingApplyID > SrcOpMappingList, LoweringMethodID LoweringMethod=DoNotLower)
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39