Line data Source code
1 : //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : /// \file
10 : /// This file implements the targeting of the Machinelegalizer class for
11 : /// AArch64.
12 : /// \todo This should be generated by TableGen.
13 : //===----------------------------------------------------------------------===//
14 :
15 : #include "AArch64LegalizerInfo.h"
16 : #include "AArch64Subtarget.h"
17 : #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
18 : #include "llvm/CodeGen/MachineInstr.h"
19 : #include "llvm/CodeGen/MachineRegisterInfo.h"
20 : #include "llvm/CodeGen/TargetOpcodes.h"
21 : #include "llvm/CodeGen/ValueTypes.h"
22 : #include "llvm/IR/DerivedTypes.h"
23 : #include "llvm/IR/Type.h"
24 :
25 : using namespace llvm;
26 : using namespace LegalizeActions;
27 : using namespace LegalityPredicates;
28 :
29 1570 : AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
30 : using namespace TargetOpcode;
31 : const LLT p0 = LLT::pointer(0, 64);
32 1570 : const LLT s1 = LLT::scalar(1);
33 1570 : const LLT s8 = LLT::scalar(8);
34 1570 : const LLT s16 = LLT::scalar(16);
35 1570 : const LLT s32 = LLT::scalar(32);
36 1570 : const LLT s64 = LLT::scalar(64);
37 : const LLT s128 = LLT::scalar(128);
38 1570 : const LLT s256 = LLT::scalar(256);
39 1570 : const LLT s512 = LLT::scalar(512);
40 : const LLT v16s8 = LLT::vector(16, 8);
41 : const LLT v8s8 = LLT::vector(8, 8);
42 : const LLT v4s8 = LLT::vector(4, 8);
43 : const LLT v8s16 = LLT::vector(8, 16);
44 : const LLT v4s16 = LLT::vector(4, 16);
45 : const LLT v2s16 = LLT::vector(2, 16);
46 1570 : const LLT v2s32 = LLT::vector(2, 32);
47 1570 : const LLT v4s32 = LLT::vector(4, 32);
48 1570 : const LLT v2s64 = LLT::vector(2, 64);
49 :
50 1570 : getActionDefinitionsBuilder(G_IMPLICIT_DEF)
51 1570 : .legalFor({p0, s1, s8, s16, s32, s64})
52 1570 : .clampScalar(0, s1, s64)
53 1570 : .widenScalarToNextPow2(0, 8);
54 :
55 1570 : getActionDefinitionsBuilder(G_PHI)
56 1570 : .legalFor({p0, s16, s32, s64})
57 1570 : .clampScalar(0, s16, s64)
58 1570 : .widenScalarToNextPow2(0);
59 :
60 1570 : getActionDefinitionsBuilder(G_BSWAP)
61 1570 : .legalFor({s32, s64})
62 1570 : .clampScalar(0, s16, s64)
63 1570 : .widenScalarToNextPow2(0);
64 :
65 1570 : getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL})
66 3140 : .legalFor({s32, s64, v2s32, v4s32, v2s64})
67 1570 : .clampScalar(0, s32, s64)
68 1570 : .widenScalarToNextPow2(0)
69 1570 : .clampNumElements(0, v2s32, v4s32)
70 1570 : .clampNumElements(0, v2s64, v2s64)
71 1570 : .moreElementsToNextPow2(0);
72 :
73 1570 : getActionDefinitionsBuilder(G_GEP)
74 1570 : .legalFor({{p0, s64}})
75 1570 : .clampScalar(1, s64, s64);
76 :
77 1570 : getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0});
78 :
79 1570 : getActionDefinitionsBuilder({G_LSHR, G_ASHR, G_SDIV, G_UDIV})
80 3140 : .legalFor({s32, s64})
81 1570 : .clampScalar(0, s32, s64)
82 1570 : .widenScalarToNextPow2(0);
83 :
84 1570 : getActionDefinitionsBuilder({G_SREM, G_UREM})
85 1570 : .lowerFor({s1, s8, s16, s32, s64});
86 :
87 1570 : getActionDefinitionsBuilder({G_SMULO, G_UMULO})
88 1570 : .lowerFor({{s64, s1}});
89 :
90 1570 : getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
91 :
92 1570 : getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO})
93 1570 : .legalFor({{s32, s1}, {s64, s1}});
94 :
95 1570 : getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMA, G_FMUL, G_FDIV})
96 1570 : .legalFor({s32, s64});
97 :
98 1570 : getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64});
99 :
100 1570 : getActionDefinitionsBuilder(G_INSERT)
101 : .unsupportedIf([=](const LegalityQuery &Query) {
102 : return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
103 1570 : })
104 : .legalIf([=](const LegalityQuery &Query) {
105 : const LLT &Ty0 = Query.Types[0];
106 : const LLT &Ty1 = Query.Types[1];
107 : if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
108 : return false;
109 : return isPowerOf2_32(Ty1.getSizeInBits()) &&
110 : (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8);
111 3140 : })
112 : .clampScalar(0, s32, s64)
113 1570 : .widenScalarToNextPow2(0)
114 3140 : .maxScalarIf(typeInSet(0, {s32}), 1, s16)
115 3140 : .maxScalarIf(typeInSet(0, {s64}), 1, s32)
116 1570 : .widenScalarToNextPow2(1);
117 :
118 1570 : getActionDefinitionsBuilder(G_EXTRACT)
119 : .unsupportedIf([=](const LegalityQuery &Query) {
120 : return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits();
121 1570 : })
122 : .legalIf([=](const LegalityQuery &Query) {
123 : const LLT &Ty0 = Query.Types[0];
124 : const LLT &Ty1 = Query.Types[1];
125 : if (Ty1 != s32 && Ty1 != s64)
126 : return false;
127 : if (Ty1 == p0)
128 : return true;
129 : return isPowerOf2_32(Ty0.getSizeInBits()) &&
130 : (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
131 3140 : })
132 : .clampScalar(1, s32, s64)
133 1570 : .widenScalarToNextPow2(1)
134 3140 : .maxScalarIf(typeInSet(1, {s32}), 0, s16)
135 3140 : .maxScalarIf(typeInSet(1, {s64}), 0, s32)
136 1570 : .widenScalarToNextPow2(0);
137 :
138 1570 : getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
139 : .legalForTypesWithMemSize({{s32, p0, 8},
140 : {s32, p0, 16},
141 : {s32, p0, 32},
142 : {s64, p0, 64},
143 : {p0, p0, 64},
144 1570 : {v2s32, p0, 64}})
145 1570 : .clampScalar(0, s32, s64)
146 1570 : .widenScalarToNextPow2(0)
147 : // TODO: We could support sum-of-pow2's but the lowering code doesn't know
148 : // how to do that yet.
149 1570 : .unsupportedIfMemSizeNotPow2()
150 : // Lower anything left over into G_*EXT and G_LOAD
151 1570 : .lower();
152 :
153 1570 : getActionDefinitionsBuilder(G_LOAD)
154 : .legalForTypesWithMemSize({{s8, p0, 8},
155 : {s16, p0, 16},
156 : {s32, p0, 32},
157 : {s64, p0, 64},
158 : {p0, p0, 64},
159 1570 : {v2s32, p0, 64}})
160 : // These extends are also legal
161 : .legalForTypesWithMemSize({{s32, p0, 8},
162 1570 : {s32, p0, 16}})
163 1570 : .clampScalar(0, s8, s64)
164 1570 : .widenScalarToNextPow2(0)
165 : // TODO: We could support sum-of-pow2's but the lowering code doesn't know
166 : // how to do that yet.
167 1570 : .unsupportedIfMemSizeNotPow2()
168 : // Lower any any-extending loads left into G_ANYEXT and G_LOAD
169 : .lowerIf([=](const LegalityQuery &Query) {
170 1 : return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].Size * 8;
171 1570 : })
172 1570 : .clampNumElements(0, v2s32, v2s32);
173 :
174 1570 : getActionDefinitionsBuilder(G_STORE)
175 : .legalForTypesWithMemSize({{s8, p0, 8},
176 : {s16, p0, 16},
177 : {s32, p0, 32},
178 : {s64, p0, 64},
179 : {p0, p0, 64},
180 1570 : {v2s32, p0, 64}})
181 1570 : .clampScalar(0, s8, s64)
182 1570 : .widenScalarToNextPow2(0)
183 : // TODO: We could support sum-of-pow2's but the lowering code doesn't know
184 : // how to do that yet.
185 1570 : .unsupportedIfMemSizeNotPow2()
186 : .lowerIf([=](const LegalityQuery &Query) {
187 : return Query.Types[0].isScalar() &&
188 : Query.Types[0].getSizeInBits() != Query.MMODescrs[0].Size * 8;
189 1570 : })
190 1570 : .clampNumElements(0, v2s32, v2s32);
191 :
192 : // Constants
193 1570 : getActionDefinitionsBuilder(G_CONSTANT)
194 1570 : .legalFor({p0, s32, s64})
195 1570 : .clampScalar(0, s32, s64)
196 1570 : .widenScalarToNextPow2(0);
197 1570 : getActionDefinitionsBuilder(G_FCONSTANT)
198 1570 : .legalFor({s32, s64})
199 1570 : .clampScalar(0, s32, s64);
200 :
201 1570 : getActionDefinitionsBuilder(G_ICMP)
202 1570 : .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
203 1570 : .clampScalar(0, s32, s32)
204 : .clampScalar(1, s32, s64)
205 1570 : .widenScalarToNextPow2(1);
206 :
207 1570 : getActionDefinitionsBuilder(G_FCMP)
208 1570 : .legalFor({{s32, s32}, {s32, s64}})
209 1570 : .clampScalar(0, s32, s32)
210 : .clampScalar(1, s32, s64)
211 1570 : .widenScalarToNextPow2(1);
212 :
213 : // Extensions
214 1570 : getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
215 1570 : .legalForCartesianProduct({s8, s16, s32, s64}, {s1, s8, s16, s32});
216 :
217 : // FP conversions
218 1570 : getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
219 1570 : {{s16, s32}, {s16, s64}, {s32, s64}});
220 1570 : getActionDefinitionsBuilder(G_FPEXT).legalFor(
221 1570 : {{s32, s16}, {s64, s16}, {s64, s32}});
222 :
223 : // Conversions
224 1570 : getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
225 3140 : .legalForCartesianProduct({s32, s64})
226 1570 : .clampScalar(0, s32, s64)
227 1570 : .widenScalarToNextPow2(0)
228 : .clampScalar(1, s32, s64)
229 1570 : .widenScalarToNextPow2(1);
230 :
231 1570 : getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
232 3140 : .legalForCartesianProduct({s32, s64})
233 1570 : .clampScalar(1, s32, s64)
234 1570 : .widenScalarToNextPow2(1)
235 : .clampScalar(0, s32, s64)
236 1570 : .widenScalarToNextPow2(0);
237 :
238 : // Control-flow
239 1570 : getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
240 1570 : getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
241 :
242 : // Select
243 1570 : getActionDefinitionsBuilder(G_SELECT)
244 1570 : .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
245 1570 : .clampScalar(0, s32, s64)
246 1570 : .widenScalarToNextPow2(0);
247 :
248 : // Pointer-handling
249 1570 : getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
250 1570 : getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
251 :
252 1570 : getActionDefinitionsBuilder(G_PTRTOINT)
253 1570 : .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
254 1570 : .maxScalar(0, s64)
255 1570 : .widenScalarToNextPow2(0, /*Min*/ 8);
256 :
257 1570 : getActionDefinitionsBuilder(G_INTTOPTR)
258 : .unsupportedIf([&](const LegalityQuery &Query) {
259 : return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
260 1570 : })
261 1570 : .legalFor({{p0, s64}});
262 :
263 : // Casts for 32 and 64-bit width type are just copies.
264 : // Same for 128-bit width type, except they are on the FPR bank.
265 1570 : getActionDefinitionsBuilder(G_BITCAST)
266 : // FIXME: This is wrong since G_BITCAST is not allowed to change the
267 : // number of bits but it's what the previous code described and fixing
268 : // it breaks tests.
269 1570 : .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
270 1570 : v8s16, v4s16, v2s16, v4s32, v2s32, v2s64});
271 :
272 1570 : getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
273 :
274 : // va_list must be a pointer, but most sized types are pretty easy to handle
275 : // as the destination.
276 1570 : getActionDefinitionsBuilder(G_VAARG)
277 1570 : .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
278 1570 : .clampScalar(0, s8, s64)
279 1570 : .widenScalarToNextPow2(0, /*Min*/ 8);
280 :
281 1570 : if (ST.hasLSE()) {
282 33 : getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
283 66 : .lowerIf(all(
284 132 : typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0),
285 66 : atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
286 :
287 : getActionDefinitionsBuilder(
288 : {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
289 : G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
290 33 : G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG})
291 66 : .legalIf(all(
292 99 : typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
293 99 : atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
294 : }
295 :
296 1570 : getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
297 :
298 : // Merge/Unmerge
299 4710 : for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
300 3140 : unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
301 3140 : unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
302 :
303 : auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
304 : const LLT &Ty = Query.Types[TypeIdx];
305 : if (Ty.isVector()) {
306 : const LLT &EltTy = Ty.getElementType();
307 : if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
308 : return true;
309 : if (!isPowerOf2_32(EltTy.getSizeInBits()))
310 : return true;
311 : }
312 : return false;
313 : };
314 : auto scalarize =
315 : [](const LegalityQuery &Query, unsigned TypeIdx) {
316 : const LLT &Ty = Query.Types[TypeIdx];
317 : return std::make_pair(TypeIdx, Ty.getElementType());
318 : };
319 :
320 : // FIXME: This rule is horrible, but specifies the same as what we had
321 : // before with the particularly strange definitions removed (e.g.
322 : // s8 = G_MERGE_VALUES s32, s32).
323 : // Part of the complexity comes from these ops being extremely flexible. For
324 : // example, you can build/decompose vectors with it, concatenate vectors,
325 : // etc. and in addition to this you can also bitcast with it at the same
326 : // time. We've been considering breaking it up into multiple ops to make it
327 : // more manageable throughout the backend.
328 3140 : getActionDefinitionsBuilder(Op)
329 : // Break up vectors with weird elements into scalars
330 : .fewerElementsIf(
331 11 : [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
332 9420 : [=](const LegalityQuery &Query) { return scalarize(Query, 0); })
333 : .fewerElementsIf(
334 11 : [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
335 9420 : [=](const LegalityQuery &Query) { return scalarize(Query, 1); })
336 : // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
337 : // or 384.
338 : .clampScalar(BigTyIdx, s8, s512)
339 : .widenScalarIf(
340 : [=](const LegalityQuery &Query) {
341 : const LLT &Ty = Query.Types[BigTyIdx];
342 : return !isPowerOf2_32(Ty.getSizeInBits()) &&
343 : Ty.getSizeInBits() % 64 != 0;
344 : },
345 : [=](const LegalityQuery &Query) {
346 : // Pick the next power of 2, or a multiple of 64 over 128.
347 : // Whichever is smaller.
348 : const LLT &Ty = Query.Types[BigTyIdx];
349 : unsigned NewSizeInBits = 1
350 : << Log2_32_Ceil(Ty.getSizeInBits() + 1);
351 : if (NewSizeInBits >= 256) {
352 : unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
353 : if (RoundedTo < NewSizeInBits)
354 : NewSizeInBits = RoundedTo;
355 : }
356 : return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
357 9420 : })
358 : // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
359 : // worth considering the multiples of 64 since 2*192 and 2*384 are not
360 : // valid.
361 : .clampScalar(LitTyIdx, s8, s256)
362 3140 : .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
363 : // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
364 : // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
365 : // At this point it's simple enough to accept the legal types.
366 : .legalIf([=](const LegalityQuery &Query) {
367 : const LLT &BigTy = Query.Types[BigTyIdx];
368 : const LLT &LitTy = Query.Types[LitTyIdx];
369 : if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
370 : return false;
371 : if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
372 : return false;
373 : return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
374 6280 : })
375 : // Any vectors left are the wrong size. Scalarize them.
376 : .fewerElementsIf([](const LegalityQuery &Query) { return true; },
377 : [](const LegalityQuery &Query) {
378 : return std::make_pair(
379 : 0, Query.Types[0].getElementType());
380 9420 : })
381 : .fewerElementsIf([](const LegalityQuery &Query) { return true; },
382 : [](const LegalityQuery &Query) {
383 : return std::make_pair(
384 : 1, Query.Types[1].getElementType());
385 9420 : });
386 : }
387 :
388 1570 : computeTables();
389 1570 : verify(*ST.getInstrInfo());
390 1570 : }
391 :
392 3 : bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
393 : MachineRegisterInfo &MRI,
394 : MachineIRBuilder &MIRBuilder) const {
395 6 : switch (MI.getOpcode()) {
396 : default:
397 : // No idea what to do.
398 : return false;
399 3 : case TargetOpcode::G_VAARG:
400 3 : return legalizeVaArg(MI, MRI, MIRBuilder);
401 : }
402 :
403 : llvm_unreachable("expected switch to return");
404 : }
405 :
406 3 : bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
407 : MachineRegisterInfo &MRI,
408 : MachineIRBuilder &MIRBuilder) const {
409 3 : MIRBuilder.setInstr(MI);
410 3 : MachineFunction &MF = MIRBuilder.getMF();
411 3 : unsigned Align = MI.getOperand(2).getImm();
412 3 : unsigned Dst = MI.getOperand(0).getReg();
413 3 : unsigned ListPtr = MI.getOperand(1).getReg();
414 :
415 3 : LLT PtrTy = MRI.getType(ListPtr);
416 3 : LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
417 :
418 3 : const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
419 3 : unsigned List = MRI.createGenericVirtualRegister(PtrTy);
420 : MIRBuilder.buildLoad(
421 : List, ListPtr,
422 6 : *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
423 3 : PtrSize, /* Align = */ PtrSize));
424 :
425 : unsigned DstPtr;
426 3 : if (Align > PtrSize) {
427 : // Realign the list to the actual required alignment.
428 1 : auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
429 :
430 1 : unsigned ListTmp = MRI.createGenericVirtualRegister(PtrTy);
431 1 : MIRBuilder.buildGEP(ListTmp, List, AlignMinus1->getOperand(0).getReg());
432 :
433 1 : DstPtr = MRI.createGenericVirtualRegister(PtrTy);
434 2 : MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
435 : } else
436 : DstPtr = List;
437 :
438 3 : uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
439 : MIRBuilder.buildLoad(
440 : Dst, DstPtr,
441 9 : *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
442 3 : ValSize, std::max(Align, PtrSize)));
443 :
444 3 : unsigned SizeReg = MRI.createGenericVirtualRegister(IntPtrTy);
445 3 : MIRBuilder.buildConstant(SizeReg, alignTo(ValSize, PtrSize));
446 :
447 3 : unsigned NewList = MRI.createGenericVirtualRegister(PtrTy);
448 3 : MIRBuilder.buildGEP(NewList, DstPtr, SizeReg);
449 :
450 : MIRBuilder.buildStore(
451 : NewList, ListPtr,
452 3 : *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
453 3 : PtrSize, /* Align = */ PtrSize));
454 :
455 3 : MI.eraseFromParent();
456 3 : return true;
457 : }
|