LLVM 22.0.0git
AMDGPURegBankLegalizeRules.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
39
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
44
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(0, 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(1, 64);
63 case P3:
64 return MRI.getType(Reg) == LLT::pointer(3, 32);
65 case P4:
66 return MRI.getType(Reg) == LLT::pointer(4, 64);
67 case P5:
68 return MRI.getType(Reg) == LLT::pointer(5, 32);
69 case P8:
70 return MRI.getType(Reg) == LLT::pointer(8, 128);
71 case Ptr32:
72 return isAnyPtr(MRI.getType(Reg), 32);
73 case Ptr64:
74 return isAnyPtr(MRI.getType(Reg), 64);
75 case Ptr128:
76 return isAnyPtr(MRI.getType(Reg), 128);
77 case V2S32:
78 return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
79 case V4S32:
80 return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
81 case B32:
82 return MRI.getType(Reg).getSizeInBits() == 32;
83 case B64:
84 return MRI.getType(Reg).getSizeInBits() == 64;
85 case B96:
86 return MRI.getType(Reg).getSizeInBits() == 96;
87 case B128:
88 return MRI.getType(Reg).getSizeInBits() == 128;
89 case B256:
90 return MRI.getType(Reg).getSizeInBits() == 256;
91 case B512:
92 return MRI.getType(Reg).getSizeInBits() == 512;
93 case UniS1:
94 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniform(Reg);
95 case UniS16:
96 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniform(Reg);
97 case UniS32:
98 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg);
99 case UniS64:
100 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg);
101 case UniS128:
102 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isUniform(Reg);
103 case UniP0:
104 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniform(Reg);
105 case UniP1:
106 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg);
107 case UniP3:
108 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniform(Reg);
109 case UniP4:
110 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
111 case UniP5:
112 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
113 case UniP8:
114 return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniform(Reg);
115 case UniPtr32:
116 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
117 case UniPtr64:
118 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isUniform(Reg);
119 case UniPtr128:
120 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
121 case UniV2S16:
122 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
123 case UniB32:
124 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
125 case UniB64:
126 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(Reg);
127 case UniB96:
128 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(Reg);
129 case UniB128:
130 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(Reg);
131 case UniB256:
132 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg);
133 case UniB512:
134 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
135 case DivS1:
136 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg);
137 case DivS16:
138 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg);
139 case DivS32:
140 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
141 case DivS64:
142 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg);
143 case DivS128:
144 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isDivergent(Reg);
145 case DivP0:
146 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergent(Reg);
147 case DivP1:
148 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg);
149 case DivP3:
150 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergent(Reg);
151 case DivP4:
152 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg);
153 case DivP5:
154 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg);
155 case DivPtr32:
156 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isDivergent(Reg);
157 case DivPtr64:
158 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isDivergent(Reg);
159 case DivPtr128:
160 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
161 case DivV2S16:
162 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
163 case DivB32:
164 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
165 case DivB64:
166 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(Reg);
167 case DivB96:
168 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(Reg);
169 case DivB128:
170 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(Reg);
171 case DivB256:
172 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(Reg);
173 case DivB512:
174 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg);
175 case _:
176 return true;
177 default:
178 llvm_unreachable("missing matchUniformityAndLLT");
179 }
180}
181
183 const MachineUniformityInfo &MUI,
184 const MachineRegisterInfo &MRI) const {
185 // Check LLT signature.
186 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
187 if (OpUniformityAndTypes[i] == _) {
188 if (MI.getOperand(i).isReg())
189 return false;
190 continue;
191 }
192
193 // Remaining IDs check registers.
194 if (!MI.getOperand(i).isReg())
195 return false;
196
197 if (!matchUniformityAndLLT(MI.getOperand(i).getReg(),
198 OpUniformityAndTypes[i], MUI, MRI))
199 return false;
200 }
201
202 // More complex check.
203 if (TestFunc)
204 return TestFunc(MI);
205
206 return true;
207}
208
210
212 : FastTypes(FastTypes) {}
213
215 if (Ty == LLT::scalar(16))
216 return S16;
217 if (Ty == LLT::scalar(32))
218 return S32;
219 if (Ty == LLT::scalar(64))
220 return S64;
221 if (Ty == LLT::fixed_vector(2, 16))
222 return V2S16;
223 if (Ty == LLT::fixed_vector(2, 32))
224 return V2S32;
225 if (Ty == LLT::fixed_vector(3, 32))
226 return V3S32;
227 if (Ty == LLT::fixed_vector(4, 32))
228 return V4S32;
229 return _;
230}
231
233 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
234 isAnyPtr(Ty, 32))
235 return B32;
236 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
237 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
238 return B64;
239 if (Ty == LLT::fixed_vector(3, 32))
240 return B96;
241 if (Ty == LLT::fixed_vector(4, 32) || isAnyPtr(Ty, 128))
242 return B128;
243 return _;
244}
245
246const RegBankLLTMapping *
249 const MachineUniformityInfo &MUI) const {
250 // Search in "Fast Rules".
251 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
252 // slot that could "match fast Predicate". If not, InvalidMapping is
253 // returned which results in failure, does not search "Slow Rules".
254 if (FastTypes != NoFastRules) {
255 Register Reg = MI.getOperand(0).getReg();
256 int Slot;
257 if (FastTypes == StandardB)
258 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));
259 else
260 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));
261
262 if (Slot != -1)
263 return MUI.isUniform(Reg) ? &Uni[Slot] : &Div[Slot];
264 }
265
266 // Slow search for more complex rules.
267 for (const RegBankLegalizeRule &Rule : Rules) {
268 if (Rule.Predicate.match(MI, MUI, MRI))
269 return &Rule.OperandMapping;
270 }
271
272 return nullptr;
273}
274
276 Rules.push_back(Rule);
277}
278
280 RegBankLLTMapping RuleApplyIDs) {
281 int Slot = getFastPredicateSlot(Ty);
282 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
283 Div[Slot] = RuleApplyIDs;
284}
285
287 RegBankLLTMapping RuleApplyIDs) {
288 int Slot = getFastPredicateSlot(Ty);
289 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
290 Uni[Slot] = RuleApplyIDs;
291}
292
293int SetOfRulesForOpcode::getFastPredicateSlot(
295 switch (FastTypes) {
296 case Standard: {
297 switch (Ty) {
298 case S32:
299 return 0;
300 case S16:
301 return 1;
302 case S64:
303 return 2;
304 case V2S16:
305 return 3;
306 default:
307 return -1;
308 }
309 }
310 case StandardB: {
311 switch (Ty) {
312 case B32:
313 return 0;
314 case B64:
315 return 1;
316 case B96:
317 return 2;
318 case B128:
319 return 3;
320 default:
321 return -1;
322 }
323 }
324 case Vector: {
325 switch (Ty) {
326 case S32:
327 return 0;
328 case V2S32:
329 return 1;
330 case V3S32:
331 return 2;
332 case V4S32:
333 return 3;
334 default:
335 return -1;
336 }
337 }
338 default:
339 return -1;
340 }
341}
342
343RegBankLegalizeRules::RuleSetInitializer
344RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
345 FastRulesTypes FastTypes) {
346 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
347}
348
349RegBankLegalizeRules::RuleSetInitializer
350RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
351 FastRulesTypes FastTypes) {
352 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
353}
354
357 unsigned Opc = MI.getOpcode();
358 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
359 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
360 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
361 unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
362 auto IRAIt = IRulesAlias.find(IntrID);
363 if (IRAIt == IRulesAlias.end())
364 return nullptr;
365 return &IRules.at(IRAIt->second);
366 }
367
368 auto GRAIt = GRulesAlias.find(Opc);
369 if (GRAIt == GRulesAlias.end())
370 return nullptr;
371 return &GRules.at(GRAIt->second);
372}
373
374// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
375class Predicate {
376private:
377 struct Elt {
378 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
379 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
380 // Sequences of && and || will be represented by jumps, for example:
381 // (A && B && ... X) or (A && B && ... X) || Y
382 // A == true jump to B
383 // A == false jump to end or Y, result is A(false) or Y
384 // (A || B || ... X) or (A || B || ... X) && Y
385 // A == true jump to end or Y, result is A(true) or Y
386 // A == false jump to B
387 // Notice that when negating expression, we simply flip Neg on each Pred
388 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
389 std::function<bool(const MachineInstr &)> Pred;
390 bool Neg; // Neg of Pred is calculated before jump
391 unsigned TJumpOffset;
392 unsigned FJumpOffset;
393 };
394
395 SmallVector<Elt, 8> Expression;
396
397 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(Expr); };
398
399public:
400 Predicate(std::function<bool(const MachineInstr &)> Pred) {
401 Expression.push_back({Pred, false, 1, 1});
402 };
403
404 bool operator()(const MachineInstr &MI) const {
405 unsigned Idx = 0;
406 unsigned ResultIdx = Expression.size();
407 bool Result;
408 do {
409 Result = Expression[Idx].Pred(MI);
410 Result = Expression[Idx].Neg ? !Result : Result;
411 if (Result) {
412 Idx += Expression[Idx].TJumpOffset;
413 } else {
414 Idx += Expression[Idx].FJumpOffset;
415 }
416 } while ((Idx != ResultIdx));
417
418 return Result;
419 };
420
421 Predicate operator!() const {
422 SmallVector<Elt, 8> NegExpression;
423 for (const Elt &ExprElt : Expression) {
424 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
425 ExprElt.TJumpOffset});
426 }
427 return Predicate(std::move(NegExpression));
428 };
429
430 Predicate operator&&(const Predicate &RHS) const {
431 SmallVector<Elt, 8> AndExpression = Expression;
432
433 unsigned RHSSize = RHS.Expression.size();
434 unsigned ResultIdx = Expression.size();
435 for (unsigned i = 0; i < ResultIdx; ++i) {
436 // LHS results in false, whole expression results in false.
437 if (i + AndExpression[i].FJumpOffset == ResultIdx)
438 AndExpression[i].FJumpOffset += RHSSize;
439 }
440
441 AndExpression.append(RHS.Expression);
442
443 return Predicate(std::move(AndExpression));
444 }
445
446 Predicate operator||(const Predicate &RHS) const {
447 SmallVector<Elt, 8> OrExpression = Expression;
448
449 unsigned RHSSize = RHS.Expression.size();
450 unsigned ResultIdx = Expression.size();
451 for (unsigned i = 0; i < ResultIdx; ++i) {
452 // LHS results in true, whole expression results in true.
453 if (i + OrExpression[i].TJumpOffset == ResultIdx)
454 OrExpression[i].TJumpOffset += RHSSize;
455 }
456
457 OrExpression.append(RHS.Expression);
458
459 return Predicate(std::move(OrExpression));
460 }
461};
462
463// Initialize rules
466 : ST(&_ST), MRI(&_MRI) {
467
468 addRulesForGOpcs({G_ADD, G_SUB}, Standard)
469 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
470 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
471 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
472 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
474 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
475 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}})
476 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}});
477
478 addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
479 .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
480 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
481
482 addRulesForGOpcs({G_UADDE, G_USUBE}, Standard)
484 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
485
486 addRulesForGOpcs({G_MUL}, Standard).Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
487
488 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
490 .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
491 .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
492 .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
493 .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
494 .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
495 .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
496 .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
497
498 addRulesForGOpcs({G_SHL}, Standard)
499 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
500 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
502 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
503 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
504 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
505 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
506 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
507
508 addRulesForGOpcs({G_LSHR}, Standard)
509 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
510 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
512 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
513 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
514 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
515 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
516 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
517
518 addRulesForGOpcs({G_ASHR}, Standard)
519 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
520 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
522 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
523 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
524 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
525 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
526 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
527
528 addRulesForGOpcs({G_FSHR}, Standard)
529 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
530 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
531
532 addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
533
534 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
535 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
536 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
537 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
538 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
539
540 addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
541 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
542 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
543 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
544 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
546 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
547
548 addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
549 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
550 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
551 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
552 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
554 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
555
556 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
557 // and G_FREEZE here, rest is trivially regbankselected earlier
558 addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
559 addRulesForGOpcs({G_CONSTANT})
560 .Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
561 addRulesForGOpcs({G_FREEZE}).Any({{DivS1}, {{Vcc}, {Vcc}}});
562
563 addRulesForGOpcs({G_ICMP})
564 .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
565 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
566 .Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
567
568 addRulesForGOpcs({G_FCMP})
569 .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}})
570 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}});
571
572 addRulesForGOpcs({G_BRCOND})
573 .Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}})
574 .Any({{DivS1}, {{}, {Vcc}}});
575
576 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
577
578 addRulesForGOpcs({G_SELECT}, StandardB)
579 .Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
581 .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
585
586 addRulesForGOpcs({G_ANYEXT})
587 .Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away
588 .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
589 .Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away
590 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
591 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
592 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
593 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
594 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
595 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
596 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
597
598 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
599 // It is up to user to deal with truncated bits.
600 addRulesForGOpcs({G_TRUNC})
601 .Any({{UniS1, UniS16}, {{None}, {None}}}) // should be combined away
602 .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
603 .Any({{UniS1, UniS64}, {{None}, {None}}}) // should be combined away
604 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
605 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
606 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
607 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
608 .Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
609 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
610 // This is non-trivial. VgprToVccCopy is done using compare instruction.
611 .Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}})
612 .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
613 .Any({{DivS1, DivS64}, {{Vcc}, {Vgpr64}, VgprToVccCopy}});
614
615 addRulesForGOpcs({G_ZEXT})
619 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
620 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
621 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
622 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
623 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
624 // not extending S16 to S32 is questionable.
625 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
626 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
627 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
628 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
629
630 addRulesForGOpcs({G_SEXT})
634 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
635 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
636 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
637 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
638 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
639 // not extending S16 to S32 is questionable.
640 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
641 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
642 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
643 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
644
645 addRulesForGOpcs({G_SEXT_INREG})
646 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
647 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
648 .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
650
651 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)
652 .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}})
653 .Div(S32, {{Vgpr32}, {Vgpr32, Imm}})
654 .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
655 .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
656
657 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
658 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
659 bool usesTrue16 = ST->useRealTrue16Insts();
660
661 Predicate isAlign16([](const MachineInstr &MI) -> bool {
662 return (*MI.memoperands_begin())->getAlign() >= Align(16);
663 });
664
665 Predicate isAlign4([](const MachineInstr &MI) -> bool {
666 return (*MI.memoperands_begin())->getAlign() >= Align(4);
667 });
668
669 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
670 return (*MI.memoperands_begin())->isAtomic();
671 });
672
673 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
674 return AMDGPU::isUniformMMO(*MI.memoperands_begin());
675 });
676
677 Predicate isConst([](const MachineInstr &MI) -> bool {
678 // Address space in MMO be different then address space on pointer.
679 const MachineMemOperand *MMO = *MI.memoperands_begin();
680 const unsigned AS = MMO->getAddrSpace();
681 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
683 });
684
685 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
686 return (*MI.memoperands_begin())->isVolatile();
687 });
688
689 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
690 return (*MI.memoperands_begin())->isInvariant();
691 });
692
693 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
694 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
695 });
696
697 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
698 const MachineMemOperand *MMO = *MI.memoperands_begin();
699 return MMO->getAlign() >= Align(MMO->getSize().getValue());
700 });
701
702 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
703 const MachineMemOperand *MMO = *MI.memoperands_begin();
704 const unsigned MemSize = 8 * MMO->getSize().getValue();
705 return MemSize == 16 || MemSize == 8;
706 });
707
708 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
709 const MachineMemOperand *MMO = *MI.memoperands_begin();
710 return 8 * MMO->getSize().getValue() == 32;
711 });
712
713 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
714 (isConst || isInvMMO || isNoClobberMMO);
715
716 // clang-format off
717 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
718 addRulesForGOpcs({G_LOAD})
719 // flat, addrspace(0), never uniform - flat_load
720 .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
721 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
722 .Any({{DivB64, P0}, {{VgprB64}, {VgprP0}}})
723 .Any({{DivB96, P0}, {{VgprB96}, {VgprP0}}})
724 .Any({{DivB128, P0}, {{VgprB128}, {VgprP0}}})
725
726 // global, addrspace(1)
727 // divergent - global_load
728 .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
729 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
730 .Any({{DivB64, P1}, {{VgprB64}, {VgprP1}}})
731 .Any({{DivB96, P1}, {{VgprB96}, {VgprP1}}})
732 .Any({{DivB128, P1}, {{VgprB128}, {VgprP1}}})
733 .Any({{DivB256, P1}, {{VgprB256}, {VgprP1}, SplitLoad}})
734 .Any({{DivB512, P1}, {{VgprB512}, {VgprP1}, SplitLoad}})
735
736 // uniform - s_load
737 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
738 .Any({{{UniS16, P1}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
739 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
740 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
741 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
742 .Any({{{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}}}) //32-bit load
743 .Any({{{UniB64, P1}, isAlign4 && isUL}, {{SgprB64}, {SgprP1}}})
744 .Any({{{UniB96, P1}, isAlign16 && isUL}, {{SgprB96}, {SgprP1}, WidenLoad}}, !hasSMRDx3)
745 .Any({{{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP1}, SplitLoad}}, !hasSMRDx3)
746 .Any({{{UniB96, P1}, isAlign4 && isUL}, {{SgprB96}, {SgprP1}}}, hasSMRDx3)
747 .Any({{{UniB128, P1}, isAlign4 && isUL}, {{SgprB128}, {SgprP1}}})
748 .Any({{{UniB256, P1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
749 .Any({{{UniB512, P1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
750
751 // Uniform via global or buffer load, for example volatile or non-aligned
752 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
753 // selected as global_load, use SgprP1 for pointer instead to match
754 // patterns without flat-for-global, default for GFX7 and older.
755 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
756 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
757 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
758 .Any({{{UniS16, P1}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && !hasSMRDSmall) // s16 load
759 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
760 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
761 .Any({{{UniB64, P1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
762 .Any({{{UniB96, P1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
763 .Any({{{UniB128, P1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
764 .Any({{{UniB256, P1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP1}, SplitLoad}})
765 .Any({{{UniB512, P1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP1}, SplitLoad}})
766
767 // local, addrspace(3) - ds_load
768 .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
769 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
770 .Any({{DivB64, P3}, {{VgprB64}, {VgprP3}}})
771 .Any({{DivB96, P3}, {{VgprB96}, {VgprP3}}})
772 .Any({{DivB128, P3}, {{VgprB128}, {VgprP3}}})
773
774 .Any({{UniS16, P3}, {{UniInVgprS16}, {SgprP3}}}, usesTrue16) // 16-bit load
775 .Any({{UniB32, P3}, {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
776 .Any({{UniB64, P3}, {{UniInVgprB64}, {VgprP3}}})
777 .Any({{UniB96, P3}, {{UniInVgprB96}, {VgprP3}}})
778 .Any({{UniB128, P3}, {{UniInVgprB128}, {VgprP3}}})
779
780 // constant, addrspace(4)
781 // divergent - global_load
782 .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
783 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
784 .Any({{DivB64, P4}, {{VgprB64}, {VgprP4}}})
785 .Any({{DivB96, P4}, {{VgprB96}, {VgprP4}}})
786 .Any({{DivB128, P4}, {{VgprB128}, {VgprP4}}})
787 .Any({{DivB256, P4}, {{VgprB256}, {VgprP4}, SplitLoad}})
788 .Any({{DivB512, P4}, {{VgprB512}, {VgprP4}, SplitLoad}})
789
790 // uniform - s_load
791 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
792 .Any({{{UniS16, P4}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
793 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
794 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
795 .Any({{{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) //32-bit load
796 .Any({{{UniB64, P4}, isAlign4 && isUL}, {{SgprB64}, {SgprP4}}})
797 .Any({{{UniB96, P4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasSMRDx3)
798 .Any({{{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasSMRDx3)
799 .Any({{{UniB96, P4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasSMRDx3)
800 .Any({{{UniB128, P4}, isAlign4 && isUL}, {{SgprB128}, {SgprP4}}})
801 .Any({{{UniB256, P4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}})
802 .Any({{{UniB512, P4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}})
803
804 // uniform in vgpr - global_load or buffer_load
805 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
806 .Any({{{UniS16, P4}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && !hasSMRDSmall) // s16 load
807 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
808 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
809 .Any({{{UniB64, P4}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP4}}})
810 .Any({{{UniB96, P4}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP4}}})
811 .Any({{{UniB128, P4}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP4}}})
812 .Any({{{UniB256, P4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP4}, SplitLoad}})
813 .Any({{{UniB512, P4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP4}, SplitLoad}})
814
815 // private, addrspace(5), never uniform - scratch_load
816 .Any({{DivS16, P5}, {{Vgpr16}, {VgprP5}}}, usesTrue16)
817 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
818 .Any({{DivB64, P5}, {{VgprB64}, {VgprP5}}})
819 .Any({{DivB96, P5}, {{VgprB96}, {VgprP5}}})
820 .Any({{DivB128, P5}, {{VgprB128}, {VgprP5}}})
821
822 .Any({{DivS32, Ptr128}, {{Vgpr32}, {VgprPtr128}}});
823
824
825 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
826 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0}}})
827
828 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1}}})
829 .Any({{{UniS32, P1}, isAlign4 && isUL}, {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)
830 .Any({{{UniS32, P1}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP1}}}, hasSMRDSmall)
831 .Any({{{UniS32, P1}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP1}}}, !hasSMRDSmall)
832 .Any({{{UniS32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP1}}}, hasSMRDSmall)
833
834 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3}}})
835 .Any({{UniS32, P3}, {{UniInVgprS32}, {VgprP3}}})
836
837 .Any({{DivS32, P4}, {{Vgpr32}, {VgprP4}}})
838 .Any({{{UniS32, P4}, isAlign4 && isUL}, {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)
839 .Any({{{UniS32, P4}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP4}}}, hasSMRDSmall)
840 .Any({{{UniS32, P4}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP4}}}, !hasSMRDSmall)
841 .Any({{{UniS32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP4}}}, hasSMRDSmall)
842
843 .Any({{DivS32, P5}, {{Vgpr32}, {VgprP5}}});
844
845 addRulesForGOpcs({G_STORE})
846 // addrspace(0)
847 .Any({{S16, P0}, {{}, {Vgpr16, VgprP0}}}, usesTrue16) // 16-bit store
848 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
849 .Any({{B64, P0}, {{}, {VgprB64, VgprP0}}})
850 .Any({{B96, P0}, {{}, {VgprB96, VgprP0}}})
851 .Any({{B128, P0}, {{}, {VgprB128, VgprP0}}})
852
853 // addrspace(1), there are no stores to addrspace(4)
854 // For targets:
855 // - with "+flat-for-global" - global_store
856 // - without(-flat-for-global) - buffer_store addr64
857 .Any({{S16, DivP1}, {{}, {Vgpr16, VgprP1}}}, usesTrue16) // 16-bit store
858 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
859 .Any({{B64, DivP1}, {{}, {VgprB64, VgprP1}}})
860 .Any({{B96, DivP1}, {{}, {VgprB96, VgprP1}}})
861 .Any({{B128, DivP1}, {{}, {VgprB128, VgprP1}}})
862
863 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
864 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
865 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
866 .Any({{S16, UniP1}, {{}, {Vgpr16, SgprP1}}}, usesTrue16) // 16-bit store
867 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
868 .Any({{B64, UniP1}, {{}, {VgprB64, SgprP1}}})
869 .Any({{B96, UniP1}, {{}, {VgprB96, SgprP1}}})
870 .Any({{B128, UniP1}, {{}, {VgprB128, SgprP1}}})
871
872 // addrspace(3) and addrspace(5)
873 .Any({{S16, Ptr32}, {{}, {Vgpr16, VgprPtr32}}}, usesTrue16) // 16-bit store
874 .Any({{B32, Ptr32}, {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
875 .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
876 .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
877 .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
878 // clang-format on
879
880 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
881 G_AMDGPU_TBUFFER_LOAD_FORMAT},
882 StandardB)
891
892 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE})
893 .Any({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
894
895 addRulesForGOpcs({G_PTR_ADD})
896 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
897 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
898 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
899 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
900
901 addRulesForGOpcs({G_INTTOPTR})
902 .Any({{UniPtr32}, {{SgprPtr32}, {Sgpr32}}})
903 .Any({{DivPtr32}, {{VgprPtr32}, {Vgpr32}}})
904 .Any({{UniPtr64}, {{SgprPtr64}, {Sgpr64}}})
905 .Any({{DivPtr64}, {{VgprPtr64}, {Vgpr64}}})
906 .Any({{UniPtr128}, {{SgprPtr128}, {Sgpr128}}})
907 .Any({{DivPtr128}, {{VgprPtr128}, {Vgpr128}}});
908
909 addRulesForGOpcs({G_PTRTOINT})
910 .Any({{UniS32}, {{Sgpr32}, {SgprPtr32}}})
911 .Any({{DivS32}, {{Vgpr32}, {VgprPtr32}}})
912 .Any({{UniS64}, {{Sgpr64}, {SgprPtr64}}})
913 .Any({{DivS64}, {{Vgpr64}, {VgprPtr64}}})
914 .Any({{UniS128}, {{Sgpr128}, {SgprPtr128}}})
915 .Any({{DivS128}, {{Vgpr128}, {VgprPtr128}}});
916
917 addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
918
919 addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
920
921 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
922 .Uni(S64, {{Sgpr64}, {}});
923
924 addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
925
926 addRulesForGOpcs({G_GLOBAL_VALUE})
927 .Any({{UniP0}, {{SgprP0}, {}}})
928 .Any({{UniP1}, {{SgprP1}, {}}})
929 .Any({{UniP3}, {{SgprP3}, {}}})
930 .Any({{UniP4}, {{SgprP4}, {}}})
931 .Any({{UniP8}, {{SgprP8}, {}}});
932
933 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
934
935 bool hasSALUFloat = ST->hasSALUFloatInsts();
936
937 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
938 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
939 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
940 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
941 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
942 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
943 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
944 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
945 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
946 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
948 hasSALUFloat)
949 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
950
951 // FNEG and FABS are either folded as source modifiers or can be selected as
952 // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
953 // targets without SALU float we still select them as VGPR since there would
954 // be no real sgpr use.
955 addRulesForGOpcs({G_FNEG, G_FABS}, Standard)
956 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasSALUFloat)
957 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasSALUFloat)
958 .Div(S16, {{Vgpr16}, {Vgpr16}})
959 .Uni(S32, {{UniInVgprS32}, {Vgpr32}}, !hasSALUFloat)
960 .Uni(S32, {{Sgpr32}, {Sgpr32}}, hasSALUFloat)
961 .Div(S32, {{Vgpr32}, {Vgpr32}})
962 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
963 .Div(S64, {{Vgpr64}, {Vgpr64}})
964 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}}, !hasSALUFloat)
965 .Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, hasSALUFloat)
966 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
968 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
969
970 addRulesForGOpcs({G_FPTOUI})
971 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
972 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
973
974 addRulesForGOpcs({G_UITOFP})
975 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
976 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
977 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
978
979 addRulesForGOpcs({G_IS_FPCLASS})
980 .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
981 .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
982 .Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}})
983 .Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}})
984 .Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
985 .Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});
986
987 using namespace Intrinsic;
988
989 addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
990
991 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
992 addRulesForIOpcs({amdgcn_end_cf}).Any({{_, S32}, {{}, {None, Sgpr32}}});
993
994 addRulesForIOpcs({amdgcn_if_break}, Standard)
995 .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
996
997 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
998 .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
999
1000 addRulesForIOpcs({amdgcn_readfirstlane})
1001 .Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}})
1002 // this should not exist in the first place, it is from call lowering
1003 // readfirstlaning just in case register is not in sgpr.
1004 .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
1005
1006} // end initialize rules
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
constexpr LLT S16
constexpr LLT S1
constexpr LLT V2S16
constexpr LLT S32
constexpr LLT V4S32
constexpr LLT V3S32
constexpr LLT S64
constexpr LLT V2S32
constexpr LLT S128
UniformityLLTOpPredicateID LLTToBId(LLT Ty)
bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI)
UniformityLLTOpPredicateID LLTToId(LLT Ty)
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
Register Reg
Machine IR instance of the generic uniformity analysis.
bool operator()(const MachineInstr &MI) const
Predicate operator||(const Predicate &RHS) const
Predicate operator&&(const Predicate &RHS) const
Predicate(std::function< bool(const MachineInstr &)> Pred)
Predicate operator!() const
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
const SetOfRulesForOpcode * getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping * findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
void addFastRuleUniform(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
TypeSize getValue() const
Representation of each machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void swap(SmallVectorImpl &RHS)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isAnyPtr(LLT Ty, unsigned Width)
bool isUniformMMO(const MachineMemOperand *MMO)
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
Definition SIInstrInfo.h:44
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
SmallVector< UniformityLLTOpPredicateID, 4 > OpUniformityAndTypes
PredicateMapping(std::initializer_list< UniformityLLTOpPredicateID > OpList, std::function< bool(const MachineInstr &)> TestFunc=nullptr)
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI) const
std::function< bool(const MachineInstr &)> TestFunc
RegBankLLTMapping(std::initializer_list< RegBankLLTMappingApplyID > DstOpMappingList, std::initializer_list< RegBankLLTMappingApplyID > SrcOpMappingList, LoweringMethodID LoweringMethod=DoNotLower)
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39