LLVM 23.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
18#include "AArch64RegisterInfo.h"
20#include "AArch64Subtarget.h"
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
41#include "llvm/IR/Argument.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/Constants.h"
47#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/User.h"
62#include "llvm/IR/Value.h"
63#include "llvm/MC/MCInstrDesc.h"
64#include "llvm/MC/MCSymbol.h"
71#include <algorithm>
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <utility>
76
77using namespace llvm;
78
79namespace {
80
81class AArch64FastISel final : public FastISel {
82 class Address {
83 public:
84 enum BaseKind { RegBase, FrameIndexBase };
85
86 private:
87 BaseKind Kind = RegBase;
89 union {
90 unsigned Reg;
91 int FI;
92 } Base;
93 Register OffsetReg;
94 unsigned Shift = 0;
95 int64_t Offset = 0;
96 const GlobalValue *GV = nullptr;
97
98 public:
99 Address() { Base.Reg = 0; }
100
101 void setKind(BaseKind K) { Kind = K; }
102 BaseKind getKind() const { return Kind; }
103 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
104 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
105 bool isRegBase() const { return Kind == RegBase; }
106 bool isFIBase() const { return Kind == FrameIndexBase; }
107
108 void setReg(Register Reg) {
109 assert(isRegBase() && "Invalid base register access!");
110 Base.Reg = Reg.id();
111 }
112
113 Register getReg() const {
114 assert(isRegBase() && "Invalid base register access!");
115 return Base.Reg;
116 }
117
118 void setOffsetReg(Register Reg) { OffsetReg = Reg; }
119
120 Register getOffsetReg() const { return OffsetReg; }
121
122 void setFI(unsigned FI) {
123 assert(isFIBase() && "Invalid base frame index access!");
124 Base.FI = FI;
125 }
126
127 unsigned getFI() const {
128 assert(isFIBase() && "Invalid base frame index access!");
129 return Base.FI;
130 }
131
132 void setOffset(int64_t O) { Offset = O; }
133 int64_t getOffset() { return Offset; }
134 void setShift(unsigned S) { Shift = S; }
135 unsigned getShift() { return Shift; }
136
137 void setGlobalValue(const GlobalValue *G) { GV = G; }
138 const GlobalValue *getGlobalValue() { return GV; }
139 };
140
141 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
142 /// make the right decision when generating code for different targets.
143 const AArch64Subtarget *Subtarget;
144 LLVMContext *Context;
145
146 bool fastLowerArguments() override;
147 bool fastLowerCall(CallLoweringInfo &CLI) override;
148 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
149
150private:
151 // Selection routines.
152 bool selectAddSub(const Instruction *I);
153 bool selectLogicalOp(const Instruction *I);
154 bool selectLoad(const Instruction *I);
155 bool selectStore(const Instruction *I);
156 bool selectBranch(const Instruction *I);
157 bool selectIndirectBr(const Instruction *I);
158 bool selectCmp(const Instruction *I);
159 bool selectSelect(const Instruction *I);
160 bool selectFPExt(const Instruction *I);
161 bool selectFPTrunc(const Instruction *I);
162 bool selectFPToInt(const Instruction *I, bool Signed);
163 bool selectIntToFP(const Instruction *I, bool Signed);
164 bool selectRem(const Instruction *I, unsigned ISDOpcode);
165 bool selectRet(const Instruction *I);
166 bool selectTrunc(const Instruction *I);
167 bool selectIntExt(const Instruction *I);
168 bool selectMul(const Instruction *I);
169 bool selectShift(const Instruction *I);
170 bool selectBitCast(const Instruction *I);
171 bool selectFRem(const Instruction *I);
172 bool selectSDiv(const Instruction *I);
173 bool selectGetElementPtr(const Instruction *I);
174 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
175
176 // Utility helper routines.
177 bool isTypeLegal(Type *Ty, MVT &VT);
178 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
179 bool isValueAvailable(const Value *V) const;
180 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
181 bool computeCallAddress(const Value *V, Address &Addr);
182 bool simplifyAddress(Address &Addr, MVT VT);
183 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
185 unsigned ScaleFactor, MachineMemOperand *MMO);
186 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
187 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
188 MaybeAlign Alignment);
189 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
190 const Value *Cond);
191 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
192 bool optimizeSelect(const SelectInst *SI);
193 Register getRegForGEPIndex(const Value *Idx);
194
195 // Emit helper routines.
196 Register emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
197 const Value *RHS, bool SetFlags = false,
198 bool WantResult = true, bool IsZExt = false);
199 Register emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
200 Register RHSReg, bool SetFlags = false,
201 bool WantResult = true);
202 Register emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg, uint64_t Imm,
203 bool SetFlags = false, bool WantResult = true);
204 Register emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
205 Register RHSReg, AArch64_AM::ShiftExtendType ShiftType,
206 uint64_t ShiftImm, bool SetFlags = false,
207 bool WantResult = true);
208 Register emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
210 uint64_t ShiftImm, bool SetFlags = false,
211 bool WantResult = true);
212
213 // Emit functions.
214 bool emitCompareAndBranch(const BranchInst *BI);
215 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
216 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
217 bool emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
218 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
219 Register emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
220 MachineMemOperand *MMO = nullptr);
221 bool emitStore(MVT VT, Register SrcReg, Address Addr,
222 MachineMemOperand *MMO = nullptr);
223 bool emitStoreRelease(MVT VT, Register SrcReg, Register AddrReg,
224 MachineMemOperand *MMO = nullptr);
225 Register emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, bool isZExt);
226 Register emiti1Ext(Register SrcReg, MVT DestVT, bool isZExt);
227 Register emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
228 bool SetFlags = false, bool WantResult = true,
229 bool IsZExt = false);
230 Register emitAdd_ri_(MVT VT, Register Op0, int64_t Imm);
231 Register emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
232 bool SetFlags = false, bool WantResult = true,
233 bool IsZExt = false);
234 Register emitSubs_rr(MVT RetVT, Register LHSReg, Register RHSReg,
235 bool WantResult = true);
236 Register emitSubs_rs(MVT RetVT, Register LHSReg, Register RHSReg,
237 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
238 bool WantResult = true);
239 Register emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
240 const Value *RHS);
241 Register emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, Register LHSReg,
242 uint64_t Imm);
243 Register emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, Register LHSReg,
244 Register RHSReg, uint64_t ShiftImm);
245 Register emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
246 Register emitMul_rr(MVT RetVT, Register Op0, Register Op1);
247 Register emitSMULL_rr(MVT RetVT, Register Op0, Register Op1);
248 Register emitUMULL_rr(MVT RetVT, Register Op0, Register Op1);
249 Register emitLSL_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
250 Register emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
251 bool IsZExt = true);
252 Register emitLSR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
253 Register emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
254 bool IsZExt = true);
255 Register emitASR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
256 Register emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
257 bool IsZExt = false);
258
259 Register materializeInt(const ConstantInt *CI, MVT VT);
260 Register materializeFP(const ConstantFP *CFP, MVT VT);
261 Register materializeGV(const GlobalValue *GV);
262
263 // Call handling routines.
264private:
265 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
266 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
267 SmallVectorImpl<Type *> &OrigTys, unsigned &NumBytes);
268 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
269
270public:
271 // Backend specific FastISel code.
272 Register fastMaterializeAlloca(const AllocaInst *AI) override;
273 Register fastMaterializeConstant(const Constant *C) override;
274 Register fastMaterializeFloatZero(const ConstantFP *CF) override;
275
276 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
277 const TargetLibraryInfo *LibInfo,
278 const LibcallLoweringInfo *libcallLowering)
279 : FastISel(FuncInfo, LibInfo, libcallLowering,
280 /*SkipTargetIndependentISel=*/true) {
281 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
282 Context = &FuncInfo.Fn->getContext();
283 }
284
285 bool fastSelectInstruction(const Instruction *I) override;
286
287#include "AArch64GenFastISel.inc"
288};
289
290} // end anonymous namespace
291
292/// Check if the sign-/zero-extend will be a noop.
293static bool isIntExtFree(const Instruction *I) {
295 "Unexpected integer extend instruction.");
296 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
297 "Unexpected value type.");
298 bool IsZExt = isa<ZExtInst>(I);
299
300 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
301 if (LI->hasOneUse())
302 return true;
303
304 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
305 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
306 return true;
307
308 return false;
309}
310
311/// Determine the implicit scale factor that is applied by a memory
312/// operation for a given value type.
313static unsigned getImplicitScaleFactor(MVT VT) {
314 switch (VT.SimpleTy) {
315 default:
316 return 0; // invalid
317 case MVT::i1: // fall-through
318 case MVT::i8:
319 return 1;
320 case MVT::i16:
321 return 2;
322 case MVT::i32: // fall-through
323 case MVT::f32:
324 return 4;
325 case MVT::i64: // fall-through
326 case MVT::f64:
327 return 8;
328 }
329}
330
331CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
332 if (CC == CallingConv::GHC)
333 return CC_AArch64_GHC;
334 if (CC == CallingConv::CFGuard_Check)
336 if (Subtarget->isTargetDarwin())
338 if (Subtarget->isTargetWindows())
339 return CC_AArch64_Win64PCS;
340 return CC_AArch64_AAPCS;
341}
342
343Register AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
344 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
345 "Alloca should always return a pointer.");
346
347 // Don't handle dynamic allocas.
348 auto SI = FuncInfo.StaticAllocaMap.find(AI);
349 if (SI == FuncInfo.StaticAllocaMap.end())
350 return Register();
351
352 if (SI != FuncInfo.StaticAllocaMap.end()) {
353 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
354 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
355 ResultReg)
356 .addFrameIndex(SI->second)
357 .addImm(0)
358 .addImm(0);
359 return ResultReg;
360 }
361
362 return Register();
363}
364
365Register AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
366 if (VT > MVT::i64)
367 return Register();
368
369 if (!CI->isZero())
370 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
371
372 // Create a copy from the zero register to materialize a "0" value.
373 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
374 : &AArch64::GPR32RegClass;
375 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
376 Register ResultReg = createResultReg(RC);
377 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
378 ResultReg).addReg(ZeroReg, getKillRegState(true));
379 return ResultReg;
380}
381
382Register AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
383 // Positive zero (+0.0) has to be materialized with a fmov from the zero
384 // register, because the immediate version of fmov cannot encode zero.
385 if (CFP->isNullValue())
386 return fastMaterializeFloatZero(CFP);
387
388 if (VT != MVT::f32 && VT != MVT::f64)
389 return Register();
390
391 const APFloat Val = CFP->getValueAPF();
392 bool Is64Bit = (VT == MVT::f64);
393 // This checks to see if we can use FMOV instructions to materialize
394 // a constant, otherwise we have to materialize via the constant pool.
395 int Imm =
396 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
397 if (Imm != -1) {
398 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
399 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
400 }
401
402 // For the large code model materialize the FP constant in code.
403 if (TM.getCodeModel() == CodeModel::Large) {
404 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
405 const TargetRegisterClass *RC = Is64Bit ?
406 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
407
408 Register TmpReg = createResultReg(RC);
409 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
410 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
411
412 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
413 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
414 TII.get(TargetOpcode::COPY), ResultReg)
415 .addReg(TmpReg, getKillRegState(true));
416
417 return ResultReg;
418 }
419
420 // Materialize via constant pool. MachineConstantPool wants an explicit
421 // alignment.
422 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
423
424 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
425 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
426 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
428
429 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
430 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
431 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
432 .addReg(ADRPReg)
434 return ResultReg;
435}
436
437Register AArch64FastISel::materializeGV(const GlobalValue *GV) {
438 // We can't handle thread-local variables quickly yet.
439 if (GV->isThreadLocal())
440 return Register();
441
442 // MachO still uses GOT for large code-model accesses, but ELF requires
443 // movz/movk sequences, which FastISel doesn't handle yet.
444 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
445 return Register();
446
447 if (FuncInfo.MF->getInfo<AArch64FunctionInfo>()->hasELFSignedGOT())
448 return Register();
449
450 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
451
452 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
453 if (!DestEVT.isSimple())
454 return Register();
455
456 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
457 Register ResultReg;
458
459 if (OpFlags & AArch64II::MO_GOT) {
460 // ADRP + LDRX
461 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
462 ADRPReg)
463 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
464
465 unsigned LdrOpc;
466 if (Subtarget->isTargetILP32()) {
467 ResultReg = createResultReg(&AArch64::GPR32RegClass);
468 LdrOpc = AArch64::LDRWui;
469 } else {
470 ResultReg = createResultReg(&AArch64::GPR64RegClass);
471 LdrOpc = AArch64::LDRXui;
472 }
473 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
474 ResultReg)
475 .addReg(ADRPReg)
477 AArch64II::MO_NC | OpFlags);
478 if (!Subtarget->isTargetILP32())
479 return ResultReg;
480
481 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
482 // so we must extend the result on ILP32.
483 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
484 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
485 TII.get(TargetOpcode::SUBREG_TO_REG))
486 .addDef(Result64)
487 .addReg(ResultReg, RegState::Kill)
488 .addImm(AArch64::sub_32);
489 return Result64;
490 } else {
491 // ADRP + ADDX
492 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
493 ADRPReg)
494 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
495
496 if (OpFlags & AArch64II::MO_TAGGED) {
497 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
498 // We do so by creating a MOVK that sets bits 48-63 of the register to
499 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
500 // the small code model so we can assume a binary size of <= 4GB, which
501 // makes the untagged PC relative offset positive. The binary must also be
502 // loaded into address range [0, 2^48). Both of these properties need to
503 // be ensured at runtime when using tagged addresses.
504 //
505 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
506 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
507 // are not exactly 1:1 with FastISel so we cannot easily abstract this
508 // out. At some point, it would be nice to find a way to not have this
509 // duplicate code.
510 Register DstReg = createResultReg(&AArch64::GPR64commonRegClass);
511 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
512 DstReg)
513 .addReg(ADRPReg)
514 .addGlobalAddress(GV, /*Offset=*/0x100000000,
516 .addImm(48);
517 ADRPReg = DstReg;
518 }
519
520 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
521 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
522 ResultReg)
523 .addReg(ADRPReg)
524 .addGlobalAddress(GV, 0,
526 .addImm(0);
527 }
528 return ResultReg;
529}
530
531Register AArch64FastISel::fastMaterializeConstant(const Constant *C) {
532 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
533
534 // Only handle simple types.
535 if (!CEVT.isSimple())
536 return Register();
537 MVT VT = CEVT.getSimpleVT();
538 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
539 // 'null' pointers need to have a somewhat special treatment.
541 assert(VT == MVT::i64 && "Expected 64-bit pointers");
542 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
543 }
544
545 if (const auto *CI = dyn_cast<ConstantInt>(C))
546 return materializeInt(CI, VT);
547 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
548 return materializeFP(CFP, VT);
549 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
550 return materializeGV(GV);
551
552 return Register();
553}
554
555Register AArch64FastISel::fastMaterializeFloatZero(const ConstantFP *CFP) {
556 assert(CFP->isNullValue() &&
557 "Floating-point constant is not a positive zero.");
558 MVT VT;
559 if (!isTypeLegal(CFP->getType(), VT))
560 return Register();
561
562 if (VT != MVT::f32 && VT != MVT::f64)
563 return Register();
564
565 bool Is64Bit = (VT == MVT::f64);
566 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
567 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
568 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
569}
570
571/// Check if the multiply is by a power-of-2 constant.
572static bool isMulPowOf2(const Value *I) {
573 if (const auto *MI = dyn_cast<MulOperator>(I)) {
574 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
575 if (C->getValue().isPowerOf2())
576 return true;
577 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
578 if (C->getValue().isPowerOf2())
579 return true;
580 }
581 return false;
582}
583
584// Computes the address to get to an object.
585bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
586{
587 const User *U = nullptr;
588 unsigned Opcode = Instruction::UserOp1;
589 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
590 // Don't walk into other basic blocks unless the object is an alloca from
591 // another block, otherwise it may not have a virtual register assigned.
592 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
593 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
594 Opcode = I->getOpcode();
595 U = I;
596 }
597 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
598 Opcode = C->getOpcode();
599 U = C;
600 }
601
602 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
603 if (Ty->getAddressSpace() > 255)
604 // Fast instruction selection doesn't support the special
605 // address spaces.
606 return false;
607
608 switch (Opcode) {
609 default:
610 break;
611 case Instruction::BitCast:
612 // Look through bitcasts.
613 return computeAddress(U->getOperand(0), Addr, Ty);
614
615 case Instruction::IntToPtr:
616 // Look past no-op inttoptrs.
617 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
618 TLI.getPointerTy(DL))
619 return computeAddress(U->getOperand(0), Addr, Ty);
620 break;
621
622 case Instruction::PtrToInt:
623 // Look past no-op ptrtoints.
624 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
625 return computeAddress(U->getOperand(0), Addr, Ty);
626 break;
627
628 case Instruction::GetElementPtr: {
629 Address SavedAddr = Addr;
630 uint64_t TmpOffset = Addr.getOffset();
631
632 // Iterate through the GEP folding the constants into offsets where
633 // we can.
635 GTI != E; ++GTI) {
636 const Value *Op = GTI.getOperand();
637 if (StructType *STy = GTI.getStructTypeOrNull()) {
638 const StructLayout *SL = DL.getStructLayout(STy);
639 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
640 TmpOffset += SL->getElementOffset(Idx);
641 } else {
642 uint64_t S = GTI.getSequentialElementStride(DL);
643 while (true) {
644 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
645 // Constant-offset addressing.
646 TmpOffset += CI->getSExtValue() * S;
647 break;
648 }
649 if (canFoldAddIntoGEP(U, Op)) {
650 // A compatible add with a constant operand. Fold the constant.
651 ConstantInt *CI =
652 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
653 TmpOffset += CI->getSExtValue() * S;
654 // Iterate on the other operand.
655 Op = cast<AddOperator>(Op)->getOperand(0);
656 continue;
657 }
658 // Unsupported
659 goto unsupported_gep;
660 }
661 }
662 }
663
664 // Try to grab the base operand now.
665 Addr.setOffset(TmpOffset);
666 if (computeAddress(U->getOperand(0), Addr, Ty))
667 return true;
668
669 // We failed, restore everything and try the other options.
670 Addr = SavedAddr;
671
672 unsupported_gep:
673 break;
674 }
675 case Instruction::Alloca: {
676 const AllocaInst *AI = cast<AllocaInst>(Obj);
677 DenseMap<const AllocaInst *, int>::iterator SI =
678 FuncInfo.StaticAllocaMap.find(AI);
679 if (SI != FuncInfo.StaticAllocaMap.end()) {
680 Addr.setKind(Address::FrameIndexBase);
681 Addr.setFI(SI->second);
682 return true;
683 }
684 break;
685 }
686 case Instruction::Add: {
687 // Adds of constants are common and easy enough.
688 const Value *LHS = U->getOperand(0);
689 const Value *RHS = U->getOperand(1);
690
692 std::swap(LHS, RHS);
693
694 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
695 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
696 return computeAddress(LHS, Addr, Ty);
697 }
698
699 Address Backup = Addr;
700 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
701 return true;
702 Addr = Backup;
703
704 break;
705 }
706 case Instruction::Sub: {
707 // Subs of constants are common and easy enough.
708 const Value *LHS = U->getOperand(0);
709 const Value *RHS = U->getOperand(1);
710
711 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
712 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
713 return computeAddress(LHS, Addr, Ty);
714 }
715 break;
716 }
717 case Instruction::Shl: {
718 if (Addr.getOffsetReg())
719 break;
720
721 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
722 if (!CI)
723 break;
724
725 unsigned Val = CI->getZExtValue();
726 if (Val < 1 || Val > 3)
727 break;
728
729 uint64_t NumBytes = 0;
730 if (Ty && Ty->isSized()) {
731 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
732 NumBytes = NumBits / 8;
733 if (!isPowerOf2_64(NumBits))
734 NumBytes = 0;
735 }
736
737 if (NumBytes != (1ULL << Val))
738 break;
739
740 Addr.setShift(Val);
741 Addr.setExtendType(AArch64_AM::LSL);
742
743 const Value *Src = U->getOperand(0);
744 if (const auto *I = dyn_cast<Instruction>(Src)) {
745 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
746 // Fold the zext or sext when it won't become a noop.
747 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
748 if (!isIntExtFree(ZE) &&
749 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
750 Addr.setExtendType(AArch64_AM::UXTW);
751 Src = ZE->getOperand(0);
752 }
753 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
754 if (!isIntExtFree(SE) &&
755 SE->getOperand(0)->getType()->isIntegerTy(32)) {
756 Addr.setExtendType(AArch64_AM::SXTW);
757 Src = SE->getOperand(0);
758 }
759 }
760 }
761 }
762
763 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
764 if (AI->getOpcode() == Instruction::And) {
765 const Value *LHS = AI->getOperand(0);
766 const Value *RHS = AI->getOperand(1);
767
768 if (const auto *C = dyn_cast<ConstantInt>(LHS))
769 if (C->getValue() == 0xffffffff)
770 std::swap(LHS, RHS);
771
772 if (const auto *C = dyn_cast<ConstantInt>(RHS))
773 if (C->getValue() == 0xffffffff) {
774 Addr.setExtendType(AArch64_AM::UXTW);
775 Register Reg = getRegForValue(LHS);
776 if (!Reg)
777 return false;
778 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
779 Addr.setOffsetReg(Reg);
780 return true;
781 }
782 }
783
784 Register Reg = getRegForValue(Src);
785 if (!Reg)
786 return false;
787 Addr.setOffsetReg(Reg);
788 return true;
789 }
790 case Instruction::Mul: {
791 if (Addr.getOffsetReg())
792 break;
793
794 if (!isMulPowOf2(U))
795 break;
796
797 const Value *LHS = U->getOperand(0);
798 const Value *RHS = U->getOperand(1);
799
800 // Canonicalize power-of-2 value to the RHS.
801 if (const auto *C = dyn_cast<ConstantInt>(LHS))
802 if (C->getValue().isPowerOf2())
803 std::swap(LHS, RHS);
804
805 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
806 const auto *C = cast<ConstantInt>(RHS);
807 unsigned Val = C->getValue().logBase2();
808 if (Val < 1 || Val > 3)
809 break;
810
811 uint64_t NumBytes = 0;
812 if (Ty && Ty->isSized()) {
813 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
814 NumBytes = NumBits / 8;
815 if (!isPowerOf2_64(NumBits))
816 NumBytes = 0;
817 }
818
819 if (NumBytes != (1ULL << Val))
820 break;
821
822 Addr.setShift(Val);
823 Addr.setExtendType(AArch64_AM::LSL);
824
825 const Value *Src = LHS;
826 if (const auto *I = dyn_cast<Instruction>(Src)) {
827 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
828 // Fold the zext or sext when it won't become a noop.
829 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
830 if (!isIntExtFree(ZE) &&
831 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
832 Addr.setExtendType(AArch64_AM::UXTW);
833 Src = ZE->getOperand(0);
834 }
835 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
836 if (!isIntExtFree(SE) &&
837 SE->getOperand(0)->getType()->isIntegerTy(32)) {
838 Addr.setExtendType(AArch64_AM::SXTW);
839 Src = SE->getOperand(0);
840 }
841 }
842 }
843 }
844
845 Register Reg = getRegForValue(Src);
846 if (!Reg)
847 return false;
848 Addr.setOffsetReg(Reg);
849 return true;
850 }
851 case Instruction::And: {
852 if (Addr.getOffsetReg())
853 break;
854
855 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
856 break;
857
858 const Value *LHS = U->getOperand(0);
859 const Value *RHS = U->getOperand(1);
860
861 if (const auto *C = dyn_cast<ConstantInt>(LHS))
862 if (C->getValue() == 0xffffffff)
863 std::swap(LHS, RHS);
864
865 if (const auto *C = dyn_cast<ConstantInt>(RHS))
866 if (C->getValue() == 0xffffffff) {
867 Addr.setShift(0);
868 Addr.setExtendType(AArch64_AM::LSL);
869 Addr.setExtendType(AArch64_AM::UXTW);
870
871 Register Reg = getRegForValue(LHS);
872 if (!Reg)
873 return false;
874 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
875 Addr.setOffsetReg(Reg);
876 return true;
877 }
878 break;
879 }
880 case Instruction::SExt:
881 case Instruction::ZExt: {
882 if (!Addr.getReg() || Addr.getOffsetReg())
883 break;
884
885 const Value *Src = nullptr;
886 // Fold the zext or sext when it won't become a noop.
887 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
888 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
889 Addr.setExtendType(AArch64_AM::UXTW);
890 Src = ZE->getOperand(0);
891 }
892 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
893 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
894 Addr.setExtendType(AArch64_AM::SXTW);
895 Src = SE->getOperand(0);
896 }
897 }
898
899 if (!Src)
900 break;
901
902 Addr.setShift(0);
903 Register Reg = getRegForValue(Src);
904 if (!Reg)
905 return false;
906 Addr.setOffsetReg(Reg);
907 return true;
908 }
909 } // end switch
910
911 if (Addr.isRegBase() && !Addr.getReg()) {
912 Register Reg = getRegForValue(Obj);
913 if (!Reg)
914 return false;
915 Addr.setReg(Reg);
916 return true;
917 }
918
919 if (!Addr.getOffsetReg()) {
920 Register Reg = getRegForValue(Obj);
921 if (!Reg)
922 return false;
923 Addr.setOffsetReg(Reg);
924 return true;
925 }
926
927 return false;
928}
929
930bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
931 const User *U = nullptr;
932 unsigned Opcode = Instruction::UserOp1;
933 bool InMBB = true;
934
935 if (const auto *I = dyn_cast<Instruction>(V)) {
936 Opcode = I->getOpcode();
937 U = I;
938 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
939 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
940 Opcode = C->getOpcode();
941 U = C;
942 }
943
944 switch (Opcode) {
945 default: break;
946 case Instruction::BitCast:
947 // Look past bitcasts if its operand is in the same BB.
948 if (InMBB)
949 return computeCallAddress(U->getOperand(0), Addr);
950 break;
951 case Instruction::IntToPtr:
952 // Look past no-op inttoptrs if its operand is in the same BB.
953 if (InMBB &&
954 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
955 TLI.getPointerTy(DL))
956 return computeCallAddress(U->getOperand(0), Addr);
957 break;
958 case Instruction::PtrToInt:
959 // Look past no-op ptrtoints if its operand is in the same BB.
960 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
961 return computeCallAddress(U->getOperand(0), Addr);
962 break;
963 }
964
965 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
966 Addr.setGlobalValue(GV);
967 return true;
968 }
969
970 // If all else fails, try to materialize the value in a register.
971 if (!Addr.getGlobalValue()) {
972 Addr.setReg(getRegForValue(V));
973 return Addr.getReg().isValid();
974 }
975
976 return false;
977}
978
979bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
980 EVT evt = TLI.getValueType(DL, Ty, true);
981
982 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
983 return false;
984
985 // Only handle simple types.
986 if (evt == MVT::Other || !evt.isSimple())
987 return false;
988 VT = evt.getSimpleVT();
989
990 // This is a legal type, but it's not something we handle in fast-isel.
991 if (VT == MVT::f128)
992 return false;
993
994 // Handle all other legal types, i.e. a register that will directly hold this
995 // value.
996 return TLI.isTypeLegal(VT);
997}
998
999/// Determine if the value type is supported by FastISel.
1000///
1001/// FastISel for AArch64 can handle more value types than are legal. This adds
1002/// simple value type such as i1, i8, and i16.
1003bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1004 if (Ty->isVectorTy() && !IsVectorAllowed)
1005 return false;
1006
1007 if (isTypeLegal(Ty, VT))
1008 return true;
1009
1010 // If this is a type than can be sign or zero-extended to a basic operation
1011 // go ahead and accept it now.
1012 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1013 return true;
1014
1015 return false;
1016}
1017
1018bool AArch64FastISel::isValueAvailable(const Value *V) const {
1019 if (!isa<Instruction>(V))
1020 return true;
1021
1022 const auto *I = cast<Instruction>(V);
1023 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
1024}
1025
1026bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1027 if (Subtarget->isTargetILP32())
1028 return false;
1029
1030 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1031 if (!ScaleFactor)
1032 return false;
1033
1034 bool ImmediateOffsetNeedsLowering = false;
1035 bool RegisterOffsetNeedsLowering = false;
1036 int64_t Offset = Addr.getOffset();
1037 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1038 ImmediateOffsetNeedsLowering = true;
1039 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1040 !isUInt<12>(Offset / ScaleFactor))
1041 ImmediateOffsetNeedsLowering = true;
1042
1043 // Cannot encode an offset register and an immediate offset in the same
1044 // instruction. Fold the immediate offset into the load/store instruction and
1045 // emit an additional add to take care of the offset register.
1046 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1047 RegisterOffsetNeedsLowering = true;
1048
1049 // Cannot encode zero register as base.
1050 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1051 RegisterOffsetNeedsLowering = true;
1052
1053 // If this is a stack pointer and the offset needs to be simplified then put
1054 // the alloca address into a register, set the base type back to register and
1055 // continue. This should almost never happen.
1056 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1057 {
1058 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1059 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1060 ResultReg)
1061 .addFrameIndex(Addr.getFI())
1062 .addImm(0)
1063 .addImm(0);
1064 Addr.setKind(Address::RegBase);
1065 Addr.setReg(ResultReg);
1066 }
1067
1068 if (RegisterOffsetNeedsLowering) {
1069 Register ResultReg;
1070 if (Addr.getReg()) {
1071 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1072 Addr.getExtendType() == AArch64_AM::UXTW )
1073 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1074 Addr.getOffsetReg(), Addr.getExtendType(),
1075 Addr.getShift());
1076 else
1077 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1078 Addr.getOffsetReg(), AArch64_AM::LSL,
1079 Addr.getShift());
1080 } else {
1081 if (Addr.getExtendType() == AArch64_AM::UXTW)
1082 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1083 Addr.getShift(), /*IsZExt=*/true);
1084 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1085 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1086 Addr.getShift(), /*IsZExt=*/false);
1087 else
1088 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1089 Addr.getShift());
1090 }
1091 if (!ResultReg)
1092 return false;
1093
1094 Addr.setReg(ResultReg);
1095 Addr.setOffsetReg(0);
1096 Addr.setShift(0);
1097 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1098 }
1099
1100 // Since the offset is too large for the load/store instruction get the
1101 // reg+offset into a register.
1102 if (ImmediateOffsetNeedsLowering) {
1103 Register ResultReg;
1104 if (Addr.getReg())
1105 // Try to fold the immediate into the add instruction.
1106 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1107 else
1108 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1109
1110 if (!ResultReg)
1111 return false;
1112 Addr.setReg(ResultReg);
1113 Addr.setOffset(0);
1114 }
1115 return true;
1116}
1117
1118void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1119 const MachineInstrBuilder &MIB,
1121 unsigned ScaleFactor,
1122 MachineMemOperand *MMO) {
1123 int64_t Offset = Addr.getOffset() / ScaleFactor;
1124 // Frame base works a bit differently. Handle it separately.
1125 if (Addr.isFIBase()) {
1126 int FI = Addr.getFI();
1127 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1128 // and alignment should be based on the VT.
1129 MMO = FuncInfo.MF->getMachineMemOperand(
1130 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1131 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1132 // Now add the rest of the operands.
1133 MIB.addFrameIndex(FI).addImm(Offset);
1134 } else {
1135 assert(Addr.isRegBase() && "Unexpected address kind.");
1136 const MCInstrDesc &II = MIB->getDesc();
1137 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1138 Addr.setReg(
1139 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1140 Addr.setOffsetReg(
1141 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1142 if (Addr.getOffsetReg()) {
1143 assert(Addr.getOffset() == 0 && "Unexpected offset");
1144 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1145 Addr.getExtendType() == AArch64_AM::SXTX;
1146 MIB.addReg(Addr.getReg());
1147 MIB.addReg(Addr.getOffsetReg());
1148 MIB.addImm(IsSigned);
1149 MIB.addImm(Addr.getShift() != 0);
1150 } else
1151 MIB.addReg(Addr.getReg()).addImm(Offset);
1152 }
1153
1154 if (MMO)
1155 MIB.addMemOperand(MMO);
1156}
1157
1158Register AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1159 const Value *RHS, bool SetFlags,
1160 bool WantResult, bool IsZExt) {
1162 bool NeedExtend = false;
1163 switch (RetVT.SimpleTy) {
1164 default:
1165 return Register();
1166 case MVT::i1:
1167 NeedExtend = true;
1168 break;
1169 case MVT::i8:
1170 NeedExtend = true;
1171 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1172 break;
1173 case MVT::i16:
1174 NeedExtend = true;
1175 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1176 break;
1177 case MVT::i32: // fall-through
1178 case MVT::i64:
1179 break;
1180 }
1181 MVT SrcVT = RetVT;
1182 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1183
1184 // Canonicalize immediates to the RHS first.
1185 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1186 std::swap(LHS, RHS);
1187
1188 // Canonicalize mul by power of 2 to the RHS.
1189 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1190 if (isMulPowOf2(LHS))
1191 std::swap(LHS, RHS);
1192
1193 // Canonicalize shift immediate to the RHS.
1194 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1195 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1196 if (isa<ConstantInt>(SI->getOperand(1)))
1197 if (SI->getOpcode() == Instruction::Shl ||
1198 SI->getOpcode() == Instruction::LShr ||
1199 SI->getOpcode() == Instruction::AShr )
1200 std::swap(LHS, RHS);
1201
1202 Register LHSReg = getRegForValue(LHS);
1203 if (!LHSReg)
1204 return Register();
1205
1206 if (NeedExtend)
1207 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1208
1209 Register ResultReg;
1210 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1211 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1212 if (C->isNegative())
1213 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1214 WantResult);
1215 else
1216 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1217 WantResult);
1218 } else if (const auto *C = dyn_cast<Constant>(RHS))
1219 if (C->isNullValue())
1220 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1221
1222 if (ResultReg)
1223 return ResultReg;
1224
1225 // Only extend the RHS within the instruction if there is a valid extend type.
1226 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1227 isValueAvailable(RHS)) {
1228 Register RHSReg = getRegForValue(RHS);
1229 if (!RHSReg)
1230 return Register();
1231 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1232 SetFlags, WantResult);
1233 }
1234
1235 // Check if the mul can be folded into the instruction.
1236 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1237 if (isMulPowOf2(RHS)) {
1238 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1239 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1240
1241 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1242 if (C->getValue().isPowerOf2())
1243 std::swap(MulLHS, MulRHS);
1244
1245 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1246 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1247 Register RHSReg = getRegForValue(MulLHS);
1248 if (!RHSReg)
1249 return Register();
1250 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1251 ShiftVal, SetFlags, WantResult);
1252 if (ResultReg)
1253 return ResultReg;
1254 }
1255 }
1256
1257 // Check if the shift can be folded into the instruction.
1258 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1259 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1260 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1262 switch (SI->getOpcode()) {
1263 default: break;
1264 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1265 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1266 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1267 }
1268 uint64_t ShiftVal = C->getZExtValue();
1269 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1270 Register RHSReg = getRegForValue(SI->getOperand(0));
1271 if (!RHSReg)
1272 return Register();
1273 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1274 ShiftVal, SetFlags, WantResult);
1275 if (ResultReg)
1276 return ResultReg;
1277 }
1278 }
1279 }
1280 }
1281
1282 Register RHSReg = getRegForValue(RHS);
1283 if (!RHSReg)
1284 return Register();
1285
1286 if (NeedExtend)
1287 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1288
1289 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1290}
1291
1292Register AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
1293 Register RHSReg, bool SetFlags,
1294 bool WantResult) {
1295 assert(LHSReg && RHSReg && "Invalid register number.");
1296
1297 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1298 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1299 return Register();
1300
1301 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1302 return Register();
1303
1304 static const unsigned OpcTable[2][2][2] = {
1305 { { AArch64::SUBWrr, AArch64::SUBXrr },
1306 { AArch64::ADDWrr, AArch64::ADDXrr } },
1307 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1308 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1309 };
1310 bool Is64Bit = RetVT == MVT::i64;
1311 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1312 const TargetRegisterClass *RC =
1313 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1314 Register ResultReg;
1315 if (WantResult)
1316 ResultReg = createResultReg(RC);
1317 else
1318 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1319
1320 const MCInstrDesc &II = TII.get(Opc);
1321 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1322 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1323 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1324 .addReg(LHSReg)
1325 .addReg(RHSReg);
1326 return ResultReg;
1327}
1328
1329Register AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg,
1330 uint64_t Imm, bool SetFlags,
1331 bool WantResult) {
1332 assert(LHSReg && "Invalid register number.");
1333
1334 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1335 return Register();
1336
1337 unsigned ShiftImm;
1338 if (isUInt<12>(Imm))
1339 ShiftImm = 0;
1340 else if ((Imm & 0xfff000) == Imm) {
1341 ShiftImm = 12;
1342 Imm >>= 12;
1343 } else
1344 return Register();
1345
1346 static const unsigned OpcTable[2][2][2] = {
1347 { { AArch64::SUBWri, AArch64::SUBXri },
1348 { AArch64::ADDWri, AArch64::ADDXri } },
1349 { { AArch64::SUBSWri, AArch64::SUBSXri },
1350 { AArch64::ADDSWri, AArch64::ADDSXri } }
1351 };
1352 bool Is64Bit = RetVT == MVT::i64;
1353 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1354 const TargetRegisterClass *RC;
1355 if (SetFlags)
1356 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1357 else
1358 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1359 Register ResultReg;
1360 if (WantResult)
1361 ResultReg = createResultReg(RC);
1362 else
1363 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1364
1365 const MCInstrDesc &II = TII.get(Opc);
1366 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1367 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1368 .addReg(LHSReg)
1369 .addImm(Imm)
1370 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1371 return ResultReg;
1372}
1373
1374Register AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
1375 Register RHSReg,
1377 uint64_t ShiftImm, bool SetFlags,
1378 bool WantResult) {
1379 assert(LHSReg && RHSReg && "Invalid register number.");
1380 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1381 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1382
1383 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1384 return Register();
1385
1386 // Don't deal with undefined shifts.
1387 if (ShiftImm >= RetVT.getSizeInBits())
1388 return Register();
1389
1390 static const unsigned OpcTable[2][2][2] = {
1391 { { AArch64::SUBWrs, AArch64::SUBXrs },
1392 { AArch64::ADDWrs, AArch64::ADDXrs } },
1393 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1394 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1395 };
1396 bool Is64Bit = RetVT == MVT::i64;
1397 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1398 const TargetRegisterClass *RC =
1399 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1400 Register ResultReg;
1401 if (WantResult)
1402 ResultReg = createResultReg(RC);
1403 else
1404 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1405
1406 const MCInstrDesc &II = TII.get(Opc);
1407 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1408 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1409 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1410 .addReg(LHSReg)
1411 .addReg(RHSReg)
1412 .addImm(getShifterImm(ShiftType, ShiftImm));
1413 return ResultReg;
1414}
1415
1416Register AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
1417 Register RHSReg,
1419 uint64_t ShiftImm, bool SetFlags,
1420 bool WantResult) {
1421 assert(LHSReg && RHSReg && "Invalid register number.");
1422 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1423 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1424
1425 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1426 return Register();
1427
1428 if (ShiftImm >= 4)
1429 return Register();
1430
1431 static const unsigned OpcTable[2][2][2] = {
1432 { { AArch64::SUBWrx, AArch64::SUBXrx },
1433 { AArch64::ADDWrx, AArch64::ADDXrx } },
1434 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1435 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1436 };
1437 bool Is64Bit = RetVT == MVT::i64;
1438 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1439 const TargetRegisterClass *RC = nullptr;
1440 if (SetFlags)
1441 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1442 else
1443 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1444 Register ResultReg;
1445 if (WantResult)
1446 ResultReg = createResultReg(RC);
1447 else
1448 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1449
1450 const MCInstrDesc &II = TII.get(Opc);
1451 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1452 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1453 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1454 .addReg(LHSReg)
1455 .addReg(RHSReg)
1456 .addImm(getArithExtendImm(ExtType, ShiftImm));
1457 return ResultReg;
1458}
1459
1460bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1461 Type *Ty = LHS->getType();
1462 EVT EVT = TLI.getValueType(DL, Ty, true);
1463 if (!EVT.isSimple())
1464 return false;
1465 MVT VT = EVT.getSimpleVT();
1466
1467 switch (VT.SimpleTy) {
1468 default:
1469 return false;
1470 case MVT::i1:
1471 case MVT::i8:
1472 case MVT::i16:
1473 case MVT::i32:
1474 case MVT::i64:
1475 return emitICmp(VT, LHS, RHS, IsZExt);
1476 case MVT::f32:
1477 case MVT::f64:
1478 return emitFCmp(VT, LHS, RHS);
1479 }
1480}
1481
1482bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1483 bool IsZExt) {
1484 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1485 IsZExt)
1486 .isValid();
1487}
1488
1489bool AArch64FastISel::emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
1490 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1491 /*SetFlags=*/true, /*WantResult=*/false)
1492 .isValid();
1493}
1494
1495bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1496 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1497 return false;
1498
1499 // Check to see if the 2nd operand is a constant that we can encode directly
1500 // in the compare.
1501 bool UseImm = false;
1502 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1503 if (CFP->isZero() && !CFP->isNegative())
1504 UseImm = true;
1505
1506 Register LHSReg = getRegForValue(LHS);
1507 if (!LHSReg)
1508 return false;
1509
1510 if (UseImm) {
1511 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1512 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1513 .addReg(LHSReg);
1514 return true;
1515 }
1516
1517 Register RHSReg = getRegForValue(RHS);
1518 if (!RHSReg)
1519 return false;
1520
1521 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1522 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1523 .addReg(LHSReg)
1524 .addReg(RHSReg);
1525 return true;
1526}
1527
1528Register AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1529 bool SetFlags, bool WantResult, bool IsZExt) {
1530 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1531 IsZExt);
1532}
1533
1534/// This method is a wrapper to simplify add emission.
1535///
1536/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1537/// that fails, then try to materialize the immediate into a register and use
1538/// emitAddSub_rr instead.
1539Register AArch64FastISel::emitAdd_ri_(MVT VT, Register Op0, int64_t Imm) {
1540 Register ResultReg;
1541 if (Imm < 0)
1542 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1543 else
1544 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1545
1546 if (ResultReg)
1547 return ResultReg;
1548
1549 Register CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1550 if (!CReg)
1551 return Register();
1552
1553 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1554 return ResultReg;
1555}
1556
1557Register AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1558 bool SetFlags, bool WantResult, bool IsZExt) {
1559 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1560 IsZExt);
1561}
1562
1563Register AArch64FastISel::emitSubs_rr(MVT RetVT, Register LHSReg,
1564 Register RHSReg, bool WantResult) {
1565 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1566 /*SetFlags=*/true, WantResult);
1567}
1568
1569Register AArch64FastISel::emitSubs_rs(MVT RetVT, Register LHSReg,
1570 Register RHSReg,
1572 uint64_t ShiftImm, bool WantResult) {
1573 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1574 ShiftImm, /*SetFlags=*/true, WantResult);
1575}
1576
1577Register AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1578 const Value *LHS, const Value *RHS) {
1579 // Canonicalize immediates to the RHS first.
1581 std::swap(LHS, RHS);
1582
1583 // Canonicalize mul by power-of-2 to the RHS.
1584 if (LHS->hasOneUse() && isValueAvailable(LHS))
1585 if (isMulPowOf2(LHS))
1586 std::swap(LHS, RHS);
1587
1588 // Canonicalize shift immediate to the RHS.
1589 if (LHS->hasOneUse() && isValueAvailable(LHS))
1590 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1591 if (isa<ConstantInt>(SI->getOperand(1)))
1592 std::swap(LHS, RHS);
1593
1594 Register LHSReg = getRegForValue(LHS);
1595 if (!LHSReg)
1596 return Register();
1597
1598 Register ResultReg;
1599 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1600 uint64_t Imm = C->getZExtValue();
1601 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1602 }
1603 if (ResultReg)
1604 return ResultReg;
1605
1606 // Check if the mul can be folded into the instruction.
1607 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1608 if (isMulPowOf2(RHS)) {
1609 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1610 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1611
1612 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1613 if (C->getValue().isPowerOf2())
1614 std::swap(MulLHS, MulRHS);
1615
1616 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1617 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1618
1619 Register RHSReg = getRegForValue(MulLHS);
1620 if (!RHSReg)
1621 return Register();
1622 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1623 if (ResultReg)
1624 return ResultReg;
1625 }
1626 }
1627
1628 // Check if the shift can be folded into the instruction.
1629 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1630 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1631 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1632 uint64_t ShiftVal = C->getZExtValue();
1633 Register RHSReg = getRegForValue(SI->getOperand(0));
1634 if (!RHSReg)
1635 return Register();
1636 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1637 if (ResultReg)
1638 return ResultReg;
1639 }
1640 }
1641
1642 Register RHSReg = getRegForValue(RHS);
1643 if (!RHSReg)
1644 return Register();
1645
1646 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1647 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1648 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1649 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1650 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1651 }
1652 return ResultReg;
1653}
1654
1655Register AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1656 Register LHSReg, uint64_t Imm) {
1657 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1658 "ISD nodes are not consecutive!");
1659 static const unsigned OpcTable[3][2] = {
1660 { AArch64::ANDWri, AArch64::ANDXri },
1661 { AArch64::ORRWri, AArch64::ORRXri },
1662 { AArch64::EORWri, AArch64::EORXri }
1663 };
1664 const TargetRegisterClass *RC;
1665 unsigned Opc;
1666 unsigned RegSize;
1667 switch (RetVT.SimpleTy) {
1668 default:
1669 return Register();
1670 case MVT::i1:
1671 case MVT::i8:
1672 case MVT::i16:
1673 case MVT::i32: {
1674 unsigned Idx = ISDOpc - ISD::AND;
1675 Opc = OpcTable[Idx][0];
1676 RC = &AArch64::GPR32spRegClass;
1677 RegSize = 32;
1678 break;
1679 }
1680 case MVT::i64:
1681 Opc = OpcTable[ISDOpc - ISD::AND][1];
1682 RC = &AArch64::GPR64spRegClass;
1683 RegSize = 64;
1684 break;
1685 }
1686
1688 return Register();
1689
1690 Register ResultReg =
1691 fastEmitInst_ri(Opc, RC, LHSReg,
1693 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1694 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1695 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1696 }
1697 return ResultReg;
1698}
1699
1700Register AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1701 Register LHSReg, Register RHSReg,
1702 uint64_t ShiftImm) {
1703 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1704 "ISD nodes are not consecutive!");
1705 static const unsigned OpcTable[3][2] = {
1706 { AArch64::ANDWrs, AArch64::ANDXrs },
1707 { AArch64::ORRWrs, AArch64::ORRXrs },
1708 { AArch64::EORWrs, AArch64::EORXrs }
1709 };
1710
1711 // Don't deal with undefined shifts.
1712 if (ShiftImm >= RetVT.getSizeInBits())
1713 return Register();
1714
1715 const TargetRegisterClass *RC;
1716 unsigned Opc;
1717 switch (RetVT.SimpleTy) {
1718 default:
1719 return Register();
1720 case MVT::i1:
1721 case MVT::i8:
1722 case MVT::i16:
1723 case MVT::i32:
1724 Opc = OpcTable[ISDOpc - ISD::AND][0];
1725 RC = &AArch64::GPR32RegClass;
1726 break;
1727 case MVT::i64:
1728 Opc = OpcTable[ISDOpc - ISD::AND][1];
1729 RC = &AArch64::GPR64RegClass;
1730 break;
1731 }
1732 Register ResultReg =
1733 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1735 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1736 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1737 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1738 }
1739 return ResultReg;
1740}
1741
1742Register AArch64FastISel::emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
1743 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1744}
1745
1746Register AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1747 bool WantZExt, MachineMemOperand *MMO) {
1748 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1749 return Register();
1750
1751 // Simplify this down to something we can handle.
1752 if (!simplifyAddress(Addr, VT))
1753 return Register();
1754
1755 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1756 if (!ScaleFactor)
1757 llvm_unreachable("Unexpected value type.");
1758
1759 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1760 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1761 bool UseScaled = true;
1762 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1763 UseScaled = false;
1764 ScaleFactor = 1;
1765 }
1766
1767 static const unsigned GPOpcTable[2][8][4] = {
1768 // Sign-extend.
1769 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1770 AArch64::LDURXi },
1771 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1772 AArch64::LDURXi },
1773 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1774 AArch64::LDRXui },
1775 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1776 AArch64::LDRXui },
1777 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1778 AArch64::LDRXroX },
1779 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1780 AArch64::LDRXroX },
1781 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1782 AArch64::LDRXroW },
1783 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1784 AArch64::LDRXroW }
1785 },
1786 // Zero-extend.
1787 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1788 AArch64::LDURXi },
1789 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1790 AArch64::LDURXi },
1791 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1792 AArch64::LDRXui },
1793 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1794 AArch64::LDRXui },
1795 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1796 AArch64::LDRXroX },
1797 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1798 AArch64::LDRXroX },
1799 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1800 AArch64::LDRXroW },
1801 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1802 AArch64::LDRXroW }
1803 }
1804 };
1805
1806 static const unsigned FPOpcTable[4][2] = {
1807 { AArch64::LDURSi, AArch64::LDURDi },
1808 { AArch64::LDRSui, AArch64::LDRDui },
1809 { AArch64::LDRSroX, AArch64::LDRDroX },
1810 { AArch64::LDRSroW, AArch64::LDRDroW }
1811 };
1812
1813 unsigned Opc;
1814 const TargetRegisterClass *RC;
1815 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1816 Addr.getOffsetReg();
1817 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1818 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1819 Addr.getExtendType() == AArch64_AM::SXTW)
1820 Idx++;
1821
1822 bool IsRet64Bit = RetVT == MVT::i64;
1823 switch (VT.SimpleTy) {
1824 default:
1825 llvm_unreachable("Unexpected value type.");
1826 case MVT::i1: // Intentional fall-through.
1827 case MVT::i8:
1828 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1829 RC = (IsRet64Bit && !WantZExt) ?
1830 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1831 break;
1832 case MVT::i16:
1833 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1834 RC = (IsRet64Bit && !WantZExt) ?
1835 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1836 break;
1837 case MVT::i32:
1838 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1839 RC = (IsRet64Bit && !WantZExt) ?
1840 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1841 break;
1842 case MVT::i64:
1843 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1844 RC = &AArch64::GPR64RegClass;
1845 break;
1846 case MVT::f32:
1847 Opc = FPOpcTable[Idx][0];
1848 RC = &AArch64::FPR32RegClass;
1849 break;
1850 case MVT::f64:
1851 Opc = FPOpcTable[Idx][1];
1852 RC = &AArch64::FPR64RegClass;
1853 break;
1854 }
1855
1856 // Create the base instruction, then add the operands.
1857 Register ResultReg = createResultReg(RC);
1858 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1859 TII.get(Opc), ResultReg);
1860 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1861
1862 // Loading an i1 requires special handling.
1863 if (VT == MVT::i1) {
1864 Register ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1865 assert(ANDReg && "Unexpected AND instruction emission failure.");
1866 ResultReg = ANDReg;
1867 }
1868
1869 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1870 // the 32bit reg to a 64bit reg.
1871 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1872 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1873 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1874 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1875 .addReg(ResultReg, getKillRegState(true))
1876 .addImm(AArch64::sub_32);
1877 ResultReg = Reg64;
1878 }
1879 return ResultReg;
1880}
1881
1882bool AArch64FastISel::selectAddSub(const Instruction *I) {
1883 MVT VT;
1884 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1885 return false;
1886
1887 if (VT.isVector())
1888 return selectOperator(I, I->getOpcode());
1889
1890 Register ResultReg;
1891 switch (I->getOpcode()) {
1892 default:
1893 llvm_unreachable("Unexpected instruction.");
1894 case Instruction::Add:
1895 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1896 break;
1897 case Instruction::Sub:
1898 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1899 break;
1900 }
1901 if (!ResultReg)
1902 return false;
1903
1904 updateValueMap(I, ResultReg);
1905 return true;
1906}
1907
1908bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1909 MVT VT;
1910 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1911 return false;
1912
1913 if (VT.isVector())
1914 return selectOperator(I, I->getOpcode());
1915
1916 Register ResultReg;
1917 switch (I->getOpcode()) {
1918 default:
1919 llvm_unreachable("Unexpected instruction.");
1920 case Instruction::And:
1921 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1922 break;
1923 case Instruction::Or:
1924 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1925 break;
1926 case Instruction::Xor:
1927 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1928 break;
1929 }
1930 if (!ResultReg)
1931 return false;
1932
1933 updateValueMap(I, ResultReg);
1934 return true;
1935}
1936
1937bool AArch64FastISel::selectLoad(const Instruction *I) {
1938 MVT VT;
1939 // Verify we have a legal type before going any further. Currently, we handle
1940 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1941 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1942 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1943 cast<LoadInst>(I)->isAtomic())
1944 return false;
1945
1946 const Value *SV = I->getOperand(0);
1947 if (TLI.supportSwiftError()) {
1948 // Swifterror values can come from either a function parameter with
1949 // swifterror attribute or an alloca with swifterror attribute.
1950 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1951 if (Arg->hasSwiftErrorAttr())
1952 return false;
1953 }
1954
1955 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1956 if (Alloca->isSwiftError())
1957 return false;
1958 }
1959 }
1960
1961 // See if we can handle this address.
1962 Address Addr;
1963 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1964 return false;
1965
1966 // Fold the following sign-/zero-extend into the load instruction.
1967 bool WantZExt = true;
1968 MVT RetVT = VT;
1969 const Value *IntExtVal = nullptr;
1970 if (I->hasOneUse()) {
1971 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1972 if (isTypeSupported(ZE->getType(), RetVT))
1973 IntExtVal = ZE;
1974 else
1975 RetVT = VT;
1976 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1977 if (isTypeSupported(SE->getType(), RetVT))
1978 IntExtVal = SE;
1979 else
1980 RetVT = VT;
1981 WantZExt = false;
1982 }
1983 }
1984
1985 Register ResultReg =
1986 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1987 if (!ResultReg)
1988 return false;
1989
1990 // There are a few different cases we have to handle, because the load or the
1991 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1992 // SelectionDAG. There is also an ordering issue when both instructions are in
1993 // different basic blocks.
1994 // 1.) The load instruction is selected by FastISel, but the integer extend
1995 // not. This usually happens when the integer extend is in a different
1996 // basic block and SelectionDAG took over for that basic block.
1997 // 2.) The load instruction is selected before the integer extend. This only
1998 // happens when the integer extend is in a different basic block.
1999 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2000 // by FastISel. This happens if there are instructions between the load
2001 // and the integer extend that couldn't be selected by FastISel.
2002 if (IntExtVal) {
2003 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2004 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2005 // it when it selects the integer extend.
2006 Register Reg = lookUpRegForValue(IntExtVal);
2007 auto *MI = MRI.getUniqueVRegDef(Reg);
2008 if (!MI) {
2009 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2010 if (WantZExt) {
2011 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2012 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2013 ResultReg = std::prev(I)->getOperand(0).getReg();
2014 removeDeadCode(I, std::next(I));
2015 } else
2016 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2017 AArch64::sub_32);
2018 }
2019 updateValueMap(I, ResultReg);
2020 return true;
2021 }
2022
2023 // The integer extend has already been emitted - delete all the instructions
2024 // that have been emitted by the integer extend lowering code and use the
2025 // result from the load instruction directly.
2026 while (MI) {
2027 Reg = 0;
2028 for (auto &Opnd : MI->uses()) {
2029 if (Opnd.isReg()) {
2030 Reg = Opnd.getReg();
2031 break;
2032 }
2033 }
2035 removeDeadCode(I, std::next(I));
2036 MI = nullptr;
2037 if (Reg)
2038 MI = MRI.getUniqueVRegDef(Reg);
2039 }
2040 updateValueMap(IntExtVal, ResultReg);
2041 return true;
2042 }
2043
2044 updateValueMap(I, ResultReg);
2045 return true;
2046}
2047
2048bool AArch64FastISel::emitStoreRelease(MVT VT, Register SrcReg,
2049 Register AddrReg,
2050 MachineMemOperand *MMO) {
2051 unsigned Opc;
2052 switch (VT.SimpleTy) {
2053 default: return false;
2054 case MVT::i8: Opc = AArch64::STLRB; break;
2055 case MVT::i16: Opc = AArch64::STLRH; break;
2056 case MVT::i32: Opc = AArch64::STLRW; break;
2057 case MVT::i64: Opc = AArch64::STLRX; break;
2058 }
2059
2060 const MCInstrDesc &II = TII.get(Opc);
2061 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2062 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2063 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2064 .addReg(SrcReg)
2065 .addReg(AddrReg)
2066 .addMemOperand(MMO);
2067 return true;
2068}
2069
2070bool AArch64FastISel::emitStore(MVT VT, Register SrcReg, Address Addr,
2071 MachineMemOperand *MMO) {
2072 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2073 return false;
2074
2075 // Simplify this down to something we can handle.
2076 if (!simplifyAddress(Addr, VT))
2077 return false;
2078
2079 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2080 if (!ScaleFactor)
2081 llvm_unreachable("Unexpected value type.");
2082
2083 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2084 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2085 bool UseScaled = true;
2086 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2087 UseScaled = false;
2088 ScaleFactor = 1;
2089 }
2090
2091 static const unsigned OpcTable[4][6] = {
2092 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2093 AArch64::STURSi, AArch64::STURDi },
2094 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2095 AArch64::STRSui, AArch64::STRDui },
2096 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2097 AArch64::STRSroX, AArch64::STRDroX },
2098 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2099 AArch64::STRSroW, AArch64::STRDroW }
2100 };
2101
2102 unsigned Opc;
2103 bool VTIsi1 = false;
2104 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2105 Addr.getOffsetReg();
2106 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2107 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2108 Addr.getExtendType() == AArch64_AM::SXTW)
2109 Idx++;
2110
2111 switch (VT.SimpleTy) {
2112 default: llvm_unreachable("Unexpected value type.");
2113 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2114 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2115 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2116 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2117 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2118 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2119 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2120 }
2121
2122 // Storing an i1 requires special handling.
2123 if (VTIsi1 && SrcReg != AArch64::WZR) {
2124 Register ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2125 assert(ANDReg && "Unexpected AND instruction emission failure.");
2126 SrcReg = ANDReg;
2127 }
2128 // Create the base instruction, then add the operands.
2129 const MCInstrDesc &II = TII.get(Opc);
2130 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2131 MachineInstrBuilder MIB =
2132 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2133 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2134
2135 return true;
2136}
2137
2138bool AArch64FastISel::selectStore(const Instruction *I) {
2139 MVT VT;
2140 const Value *Op0 = I->getOperand(0);
2141 // Verify we have a legal type before going any further. Currently, we handle
2142 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2143 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2144 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2145 return false;
2146
2147 const Value *PtrV = I->getOperand(1);
2148 if (TLI.supportSwiftError()) {
2149 // Swifterror values can come from either a function parameter with
2150 // swifterror attribute or an alloca with swifterror attribute.
2151 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2152 if (Arg->hasSwiftErrorAttr())
2153 return false;
2154 }
2155
2156 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2157 if (Alloca->isSwiftError())
2158 return false;
2159 }
2160 }
2161
2162 // Get the value to be stored into a register. Use the zero register directly
2163 // when possible to avoid an unnecessary copy and a wasted register.
2164 Register SrcReg;
2165 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2166 if (CI->isZero())
2167 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2168 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2169 if (CF->isZero() && !CF->isNegative()) {
2171 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2172 }
2173 }
2174
2175 if (!SrcReg)
2176 SrcReg = getRegForValue(Op0);
2177
2178 if (!SrcReg)
2179 return false;
2180
2181 auto *SI = cast<StoreInst>(I);
2182
2183 // Try to emit a STLR for seq_cst/release.
2184 if (SI->isAtomic()) {
2185 AtomicOrdering Ord = SI->getOrdering();
2186 // The non-atomic instructions are sufficient for relaxed stores.
2187 if (isReleaseOrStronger(Ord)) {
2188 // The STLR addressing mode only supports a base reg; pass that directly.
2189 Register AddrReg = getRegForValue(PtrV);
2190 if (!AddrReg)
2191 return false;
2192 return emitStoreRelease(VT, SrcReg, AddrReg,
2193 createMachineMemOperandFor(I));
2194 }
2195 }
2196
2197 // See if we can handle this address.
2198 Address Addr;
2199 if (!computeAddress(PtrV, Addr, Op0->getType()))
2200 return false;
2201
2202 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2203 return false;
2204 return true;
2205}
2206
2208 switch (Pred) {
2209 case CmpInst::FCMP_ONE:
2210 case CmpInst::FCMP_UEQ:
2211 default:
2212 // AL is our "false" for now. The other two need more compares.
2213 return AArch64CC::AL;
2214 case CmpInst::ICMP_EQ:
2215 case CmpInst::FCMP_OEQ:
2216 return AArch64CC::EQ;
2217 case CmpInst::ICMP_SGT:
2218 case CmpInst::FCMP_OGT:
2219 return AArch64CC::GT;
2220 case CmpInst::ICMP_SGE:
2221 case CmpInst::FCMP_OGE:
2222 return AArch64CC::GE;
2223 case CmpInst::ICMP_UGT:
2224 case CmpInst::FCMP_UGT:
2225 return AArch64CC::HI;
2226 case CmpInst::FCMP_OLT:
2227 return AArch64CC::MI;
2228 case CmpInst::ICMP_ULE:
2229 case CmpInst::FCMP_OLE:
2230 return AArch64CC::LS;
2231 case CmpInst::FCMP_ORD:
2232 return AArch64CC::VC;
2233 case CmpInst::FCMP_UNO:
2234 return AArch64CC::VS;
2235 case CmpInst::FCMP_UGE:
2236 return AArch64CC::PL;
2237 case CmpInst::ICMP_SLT:
2238 case CmpInst::FCMP_ULT:
2239 return AArch64CC::LT;
2240 case CmpInst::ICMP_SLE:
2241 case CmpInst::FCMP_ULE:
2242 return AArch64CC::LE;
2243 case CmpInst::FCMP_UNE:
2244 case CmpInst::ICMP_NE:
2245 return AArch64CC::NE;
2246 case CmpInst::ICMP_UGE:
2247 return AArch64CC::HS;
2248 case CmpInst::ICMP_ULT:
2249 return AArch64CC::LO;
2250 }
2251}
2252
2253/// Try to emit a combined compare-and-branch instruction.
2254bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2255 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2256 // will not be produced, as they are conditional branch instructions that do
2257 // not set flags.
2258 if (FuncInfo.MF->getFunction().hasFnAttribute(
2259 Attribute::SpeculativeLoadHardening))
2260 return false;
2261
2262 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2263 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2264 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2265
2266 const Value *LHS = CI->getOperand(0);
2267 const Value *RHS = CI->getOperand(1);
2268
2269 MVT VT;
2270 if (!isTypeSupported(LHS->getType(), VT))
2271 return false;
2272
2273 unsigned BW = VT.getSizeInBits();
2274 if (BW > 64)
2275 return false;
2276
2277 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2278 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2279
2280 // Try to take advantage of fallthrough opportunities.
2281 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2282 std::swap(TBB, FBB);
2284 }
2285
2286 int TestBit = -1;
2287 bool IsCmpNE;
2288 switch (Predicate) {
2289 default:
2290 return false;
2291 case CmpInst::ICMP_EQ:
2292 case CmpInst::ICMP_NE:
2293 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2294 std::swap(LHS, RHS);
2295
2296 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2297 return false;
2298
2299 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2300 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2301 const Value *AndLHS = AI->getOperand(0);
2302 const Value *AndRHS = AI->getOperand(1);
2303
2304 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2305 if (C->getValue().isPowerOf2())
2306 std::swap(AndLHS, AndRHS);
2307
2308 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2309 if (C->getValue().isPowerOf2()) {
2310 TestBit = C->getValue().logBase2();
2311 LHS = AndLHS;
2312 }
2313 }
2314
2315 if (VT == MVT::i1)
2316 TestBit = 0;
2317
2318 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2319 break;
2320 case CmpInst::ICMP_SLT:
2321 case CmpInst::ICMP_SGE:
2322 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2323 return false;
2324
2325 TestBit = BW - 1;
2326 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2327 break;
2328 case CmpInst::ICMP_SGT:
2329 case CmpInst::ICMP_SLE:
2330 if (!isa<ConstantInt>(RHS))
2331 return false;
2332
2333 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2334 return false;
2335
2336 TestBit = BW - 1;
2337 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2338 break;
2339 } // end switch
2340
2341 static const unsigned OpcTable[2][2][2] = {
2342 { {AArch64::CBZW, AArch64::CBZX },
2343 {AArch64::CBNZW, AArch64::CBNZX} },
2344 { {AArch64::TBZW, AArch64::TBZX },
2345 {AArch64::TBNZW, AArch64::TBNZX} }
2346 };
2347
2348 bool IsBitTest = TestBit != -1;
2349 bool Is64Bit = BW == 64;
2350 if (TestBit < 32 && TestBit >= 0)
2351 Is64Bit = false;
2352
2353 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2354 const MCInstrDesc &II = TII.get(Opc);
2355
2356 Register SrcReg = getRegForValue(LHS);
2357 if (!SrcReg)
2358 return false;
2359
2360 if (BW == 64 && !Is64Bit)
2361 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2362
2363 if ((BW < 32) && !IsBitTest)
2364 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2365
2366 // Emit the combined compare and branch instruction.
2367 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2368 MachineInstrBuilder MIB =
2369 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2370 .addReg(SrcReg);
2371 if (IsBitTest)
2372 MIB.addImm(TestBit);
2373 MIB.addMBB(TBB);
2374
2375 finishCondBranch(BI->getParent(), TBB, FBB);
2376 return true;
2377}
2378
2379bool AArch64FastISel::selectBranch(const Instruction *I) {
2380 const BranchInst *BI = cast<BranchInst>(I);
2381 if (BI->isUnconditional()) {
2382 MachineBasicBlock *MSucc = FuncInfo.getMBB(BI->getSuccessor(0));
2383 fastEmitBranch(MSucc, BI->getDebugLoc());
2384 return true;
2385 }
2386
2387 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2388 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2389
2390 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2391 if (CI->hasOneUse() && isValueAvailable(CI)) {
2392 // Try to optimize or fold the cmp.
2393 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2394 switch (Predicate) {
2395 default:
2396 break;
2398 fastEmitBranch(FBB, MIMD.getDL());
2399 return true;
2400 case CmpInst::FCMP_TRUE:
2401 fastEmitBranch(TBB, MIMD.getDL());
2402 return true;
2403 }
2404
2405 // Try to emit a combined compare-and-branch first.
2406 if (emitCompareAndBranch(BI))
2407 return true;
2408
2409 // Try to take advantage of fallthrough opportunities.
2410 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2411 std::swap(TBB, FBB);
2413 }
2414
2415 // Emit the cmp.
2416 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2417 return false;
2418
2419 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2420 // instruction.
2421 AArch64CC::CondCode CC = getCompareCC(Predicate);
2423 switch (Predicate) {
2424 default:
2425 break;
2426 case CmpInst::FCMP_UEQ:
2427 ExtraCC = AArch64CC::EQ;
2428 CC = AArch64CC::VS;
2429 break;
2430 case CmpInst::FCMP_ONE:
2431 ExtraCC = AArch64CC::MI;
2432 CC = AArch64CC::GT;
2433 break;
2434 }
2435 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2436
2437 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2438 if (ExtraCC != AArch64CC::AL) {
2439 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2440 .addImm(ExtraCC)
2441 .addMBB(TBB);
2442 }
2443
2444 // Emit the branch.
2445 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2446 .addImm(CC)
2447 .addMBB(TBB);
2448
2449 finishCondBranch(BI->getParent(), TBB, FBB);
2450 return true;
2451 }
2452 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2453 uint64_t Imm = CI->getZExtValue();
2454 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2455 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2456 .addMBB(Target);
2457
2458 // Obtain the branch probability and add the target to the successor list.
2459 if (FuncInfo.BPI) {
2460 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2461 BI->getParent(), Target->getBasicBlock());
2462 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2463 } else
2464 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2465 return true;
2466 } else {
2468 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2469 // Fake request the condition, otherwise the intrinsic might be completely
2470 // optimized away.
2471 Register CondReg = getRegForValue(BI->getCondition());
2472 if (!CondReg)
2473 return false;
2474
2475 // Emit the branch.
2476 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2477 .addImm(CC)
2478 .addMBB(TBB);
2479
2480 finishCondBranch(BI->getParent(), TBB, FBB);
2481 return true;
2482 }
2483 }
2484
2485 Register CondReg = getRegForValue(BI->getCondition());
2486 if (!CondReg)
2487 return false;
2488
2489 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2490 unsigned Opcode = AArch64::TBNZW;
2491 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2492 std::swap(TBB, FBB);
2493 Opcode = AArch64::TBZW;
2494 }
2495
2496 const MCInstrDesc &II = TII.get(Opcode);
2497 Register ConstrainedCondReg
2498 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2499 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2500 .addReg(ConstrainedCondReg)
2501 .addImm(0)
2502 .addMBB(TBB);
2503
2504 finishCondBranch(BI->getParent(), TBB, FBB);
2505 return true;
2506}
2507
2508bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2509 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2510 Register AddrReg = getRegForValue(BI->getOperand(0));
2511 if (!AddrReg)
2512 return false;
2513
2514 // Authenticated indirectbr is not implemented yet.
2515 if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos"))
2516 return false;
2517
2518 // Emit the indirect branch.
2519 const MCInstrDesc &II = TII.get(AArch64::BR);
2520 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2521 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2522
2523 // Make sure the CFG is up-to-date.
2524 for (const auto *Succ : BI->successors())
2525 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(Succ));
2526
2527 return true;
2528}
2529
2530bool AArch64FastISel::selectCmp(const Instruction *I) {
2531 const CmpInst *CI = cast<CmpInst>(I);
2532
2533 // Vectors of i1 are weird: bail out.
2534 if (CI->getType()->isVectorTy())
2535 return false;
2536
2537 // Try to optimize or fold the cmp.
2538 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2539 Register ResultReg;
2540 switch (Predicate) {
2541 default:
2542 break;
2544 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2545 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2546 TII.get(TargetOpcode::COPY), ResultReg)
2547 .addReg(AArch64::WZR, getKillRegState(true));
2548 break;
2549 case CmpInst::FCMP_TRUE:
2550 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2551 break;
2552 }
2553
2554 if (ResultReg) {
2555 updateValueMap(I, ResultReg);
2556 return true;
2557 }
2558
2559 // Emit the cmp.
2560 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2561 return false;
2562
2563 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2564
2565 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2566 // condition codes are inverted, because they are used by CSINC.
2567 static unsigned CondCodeTable[2][2] = {
2570 };
2571 unsigned *CondCodes = nullptr;
2572 switch (Predicate) {
2573 default:
2574 break;
2575 case CmpInst::FCMP_UEQ:
2576 CondCodes = &CondCodeTable[0][0];
2577 break;
2578 case CmpInst::FCMP_ONE:
2579 CondCodes = &CondCodeTable[1][0];
2580 break;
2581 }
2582
2583 if (CondCodes) {
2584 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2585 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2586 TmpReg1)
2587 .addReg(AArch64::WZR, getKillRegState(true))
2588 .addReg(AArch64::WZR, getKillRegState(true))
2589 .addImm(CondCodes[0]);
2590 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2591 ResultReg)
2592 .addReg(TmpReg1, getKillRegState(true))
2593 .addReg(AArch64::WZR, getKillRegState(true))
2594 .addImm(CondCodes[1]);
2595
2596 updateValueMap(I, ResultReg);
2597 return true;
2598 }
2599
2600 // Now set a register based on the comparison.
2601 AArch64CC::CondCode CC = getCompareCC(Predicate);
2602 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2603 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2604 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2605 ResultReg)
2606 .addReg(AArch64::WZR, getKillRegState(true))
2607 .addReg(AArch64::WZR, getKillRegState(true))
2608 .addImm(invertedCC);
2609
2610 updateValueMap(I, ResultReg);
2611 return true;
2612}
2613
2614/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2615/// value.
2616bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2617 if (!SI->getType()->isIntegerTy(1))
2618 return false;
2619
2620 const Value *Src1Val, *Src2Val;
2621 unsigned Opc = 0;
2622 bool NeedExtraOp = false;
2623 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2624 if (CI->isOne()) {
2625 Src1Val = SI->getCondition();
2626 Src2Val = SI->getFalseValue();
2627 Opc = AArch64::ORRWrr;
2628 } else {
2629 assert(CI->isZero());
2630 Src1Val = SI->getFalseValue();
2631 Src2Val = SI->getCondition();
2632 Opc = AArch64::BICWrr;
2633 }
2634 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2635 if (CI->isOne()) {
2636 Src1Val = SI->getCondition();
2637 Src2Val = SI->getTrueValue();
2638 Opc = AArch64::ORRWrr;
2639 NeedExtraOp = true;
2640 } else {
2641 assert(CI->isZero());
2642 Src1Val = SI->getCondition();
2643 Src2Val = SI->getTrueValue();
2644 Opc = AArch64::ANDWrr;
2645 }
2646 }
2647
2648 if (!Opc)
2649 return false;
2650
2651 Register Src1Reg = getRegForValue(Src1Val);
2652 if (!Src1Reg)
2653 return false;
2654
2655 Register Src2Reg = getRegForValue(Src2Val);
2656 if (!Src2Reg)
2657 return false;
2658
2659 if (NeedExtraOp)
2660 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2661
2662 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2663 Src2Reg);
2664 updateValueMap(SI, ResultReg);
2665 return true;
2666}
2667
2668bool AArch64FastISel::selectSelect(const Instruction *I) {
2669 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2670 MVT VT;
2671 if (!isTypeSupported(I->getType(), VT))
2672 return false;
2673
2674 unsigned Opc;
2675 const TargetRegisterClass *RC;
2676 switch (VT.SimpleTy) {
2677 default:
2678 return false;
2679 case MVT::i1:
2680 case MVT::i8:
2681 case MVT::i16:
2682 case MVT::i32:
2683 Opc = AArch64::CSELWr;
2684 RC = &AArch64::GPR32RegClass;
2685 break;
2686 case MVT::i64:
2687 Opc = AArch64::CSELXr;
2688 RC = &AArch64::GPR64RegClass;
2689 break;
2690 case MVT::f32:
2691 Opc = AArch64::FCSELSrrr;
2692 RC = &AArch64::FPR32RegClass;
2693 break;
2694 case MVT::f64:
2695 Opc = AArch64::FCSELDrrr;
2696 RC = &AArch64::FPR64RegClass;
2697 break;
2698 }
2699
2700 const SelectInst *SI = cast<SelectInst>(I);
2701 const Value *Cond = SI->getCondition();
2704
2705 if (optimizeSelect(SI))
2706 return true;
2707
2708 // Try to pickup the flags, so we don't have to emit another compare.
2709 if (foldXALUIntrinsic(CC, I, Cond)) {
2710 // Fake request the condition to force emission of the XALU intrinsic.
2711 Register CondReg = getRegForValue(Cond);
2712 if (!CondReg)
2713 return false;
2714 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2715 isValueAvailable(Cond)) {
2716 const auto *Cmp = cast<CmpInst>(Cond);
2717 // Try to optimize or fold the cmp.
2718 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2719 const Value *FoldSelect = nullptr;
2720 switch (Predicate) {
2721 default:
2722 break;
2724 FoldSelect = SI->getFalseValue();
2725 break;
2726 case CmpInst::FCMP_TRUE:
2727 FoldSelect = SI->getTrueValue();
2728 break;
2729 }
2730
2731 if (FoldSelect) {
2732 Register SrcReg = getRegForValue(FoldSelect);
2733 if (!SrcReg)
2734 return false;
2735
2736 updateValueMap(I, SrcReg);
2737 return true;
2738 }
2739
2740 // Emit the cmp.
2741 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2742 return false;
2743
2744 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2745 CC = getCompareCC(Predicate);
2746 switch (Predicate) {
2747 default:
2748 break;
2749 case CmpInst::FCMP_UEQ:
2750 ExtraCC = AArch64CC::EQ;
2751 CC = AArch64CC::VS;
2752 break;
2753 case CmpInst::FCMP_ONE:
2754 ExtraCC = AArch64CC::MI;
2755 CC = AArch64CC::GT;
2756 break;
2757 }
2758 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2759 } else {
2760 Register CondReg = getRegForValue(Cond);
2761 if (!CondReg)
2762 return false;
2763
2764 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2765 CondReg = constrainOperandRegClass(II, CondReg, 1);
2766
2767 // Emit a TST instruction (ANDS wzr, reg, #imm).
2768 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2769 AArch64::WZR)
2770 .addReg(CondReg)
2772 }
2773
2774 Register Src1Reg = getRegForValue(SI->getTrueValue());
2775 Register Src2Reg = getRegForValue(SI->getFalseValue());
2776
2777 if (!Src1Reg || !Src2Reg)
2778 return false;
2779
2780 if (ExtraCC != AArch64CC::AL)
2781 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2782
2783 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2784 updateValueMap(I, ResultReg);
2785 return true;
2786}
2787
2788bool AArch64FastISel::selectFPExt(const Instruction *I) {
2789 Value *V = I->getOperand(0);
2790 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2791 return false;
2792
2793 Register Op = getRegForValue(V);
2794 if (Op == 0)
2795 return false;
2796
2797 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2798 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2799 ResultReg).addReg(Op);
2800 updateValueMap(I, ResultReg);
2801 return true;
2802}
2803
2804bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2805 Value *V = I->getOperand(0);
2806 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2807 return false;
2808
2809 Register Op = getRegForValue(V);
2810 if (Op == 0)
2811 return false;
2812
2813 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2814 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2815 ResultReg).addReg(Op);
2816 updateValueMap(I, ResultReg);
2817 return true;
2818}
2819
2820// FPToUI and FPToSI
2821bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2822 MVT DestVT;
2823 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2824 return false;
2825
2826 Register SrcReg = getRegForValue(I->getOperand(0));
2827 if (!SrcReg)
2828 return false;
2829
2830 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2831 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2832 return false;
2833
2834 unsigned Opc;
2835 if (SrcVT == MVT::f64) {
2836 if (Signed)
2837 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2838 else
2839 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2840 } else {
2841 if (Signed)
2842 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2843 else
2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2845 }
2846 Register ResultReg = createResultReg(
2847 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2848 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2849 .addReg(SrcReg);
2850 updateValueMap(I, ResultReg);
2851 return true;
2852}
2853
2854bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2855 MVT DestVT;
2856 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2857 return false;
2858 // Let regular ISEL handle FP16
2859 if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2860 return false;
2861
2862 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2863 "Unexpected value type.");
2864
2865 Register SrcReg = getRegForValue(I->getOperand(0));
2866 if (!SrcReg)
2867 return false;
2868
2869 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2870
2871 // Handle sign-extension.
2872 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2873 SrcReg =
2874 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2875 if (!SrcReg)
2876 return false;
2877 }
2878
2879 unsigned Opc;
2880 if (SrcVT == MVT::i64) {
2881 if (Signed)
2882 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2883 else
2884 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2885 } else {
2886 if (Signed)
2887 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2888 else
2889 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2890 }
2891
2892 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2893 updateValueMap(I, ResultReg);
2894 return true;
2895}
2896
2897bool AArch64FastISel::fastLowerArguments() {
2898 if (!FuncInfo.CanLowerReturn)
2899 return false;
2900
2901 const Function *F = FuncInfo.Fn;
2902 if (F->isVarArg())
2903 return false;
2904
2905 CallingConv::ID CC = F->getCallingConv();
2906 if (CC != CallingConv::C && CC != CallingConv::Swift)
2907 return false;
2908
2909 if (Subtarget->hasCustomCallingConv())
2910 return false;
2911
2912 // Only handle simple cases of up to 8 GPR and FPR each.
2913 unsigned GPRCnt = 0;
2914 unsigned FPRCnt = 0;
2915 for (auto const &Arg : F->args()) {
2916 if (Arg.hasAttribute(Attribute::ByVal) ||
2917 Arg.hasAttribute(Attribute::InReg) ||
2918 Arg.hasAttribute(Attribute::StructRet) ||
2919 Arg.hasAttribute(Attribute::SwiftSelf) ||
2920 Arg.hasAttribute(Attribute::SwiftAsync) ||
2921 Arg.hasAttribute(Attribute::SwiftError) ||
2922 Arg.hasAttribute(Attribute::Nest))
2923 return false;
2924
2925 Type *ArgTy = Arg.getType();
2926 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2927 return false;
2928
2929 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2930 if (!ArgVT.isSimple())
2931 return false;
2932
2933 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2934 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2935 return false;
2936
2937 if (VT.isVector() &&
2938 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2939 return false;
2940
2941 if (VT >= MVT::i1 && VT <= MVT::i64)
2942 ++GPRCnt;
2943 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2944 VT.is128BitVector())
2945 ++FPRCnt;
2946 else
2947 return false;
2948
2949 if (GPRCnt > 8 || FPRCnt > 8)
2950 return false;
2951 }
2952
2953 static const MCPhysReg Registers[6][8] = {
2954 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2955 AArch64::W5, AArch64::W6, AArch64::W7 },
2956 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2957 AArch64::X5, AArch64::X6, AArch64::X7 },
2958 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2959 AArch64::H5, AArch64::H6, AArch64::H7 },
2960 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2961 AArch64::S5, AArch64::S6, AArch64::S7 },
2962 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2963 AArch64::D5, AArch64::D6, AArch64::D7 },
2964 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2965 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2966 };
2967
2968 unsigned GPRIdx = 0;
2969 unsigned FPRIdx = 0;
2970 for (auto const &Arg : F->args()) {
2971 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2972 unsigned SrcReg;
2973 const TargetRegisterClass *RC;
2974 if (VT >= MVT::i1 && VT <= MVT::i32) {
2975 SrcReg = Registers[0][GPRIdx++];
2976 RC = &AArch64::GPR32RegClass;
2977 VT = MVT::i32;
2978 } else if (VT == MVT::i64) {
2979 SrcReg = Registers[1][GPRIdx++];
2980 RC = &AArch64::GPR64RegClass;
2981 } else if (VT == MVT::f16 || VT == MVT::bf16) {
2982 SrcReg = Registers[2][FPRIdx++];
2983 RC = &AArch64::FPR16RegClass;
2984 } else if (VT == MVT::f32) {
2985 SrcReg = Registers[3][FPRIdx++];
2986 RC = &AArch64::FPR32RegClass;
2987 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2988 SrcReg = Registers[4][FPRIdx++];
2989 RC = &AArch64::FPR64RegClass;
2990 } else if (VT.is128BitVector()) {
2991 SrcReg = Registers[5][FPRIdx++];
2992 RC = &AArch64::FPR128RegClass;
2993 } else
2994 llvm_unreachable("Unexpected value type.");
2995
2996 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2997 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2998 // Without this, EmitLiveInCopies may eliminate the livein if its only
2999 // use is a bitcast (which isn't turned into an instruction).
3000 Register ResultReg = createResultReg(RC);
3001 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3002 TII.get(TargetOpcode::COPY), ResultReg)
3003 .addReg(DstReg, getKillRegState(true));
3004 updateValueMap(&Arg, ResultReg);
3005 }
3006 return true;
3007}
3008
3009bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3010 SmallVectorImpl<MVT> &OutVTs,
3011 SmallVectorImpl<Type *> &OrigTys,
3012 unsigned &NumBytes) {
3013 CallingConv::ID CC = CLI.CallConv;
3015 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3016 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, OrigTys,
3017 CCAssignFnForCall(CC));
3018
3019 // Get a count of how many bytes are to be pushed on the stack.
3020 NumBytes = CCInfo.getStackSize();
3021
3022 // Issue CALLSEQ_START
3023 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3024 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3025 .addImm(NumBytes).addImm(0);
3026
3027 // Process the args.
3028 for (CCValAssign &VA : ArgLocs) {
3029 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3030 MVT ArgVT = OutVTs[VA.getValNo()];
3031
3032 Register ArgReg = getRegForValue(ArgVal);
3033 if (!ArgReg)
3034 return false;
3035
3036 // Handle arg promotion: SExt, ZExt, AExt.
3037 switch (VA.getLocInfo()) {
3038 case CCValAssign::Full:
3039 break;
3040 case CCValAssign::SExt: {
3041 MVT DestVT = VA.getLocVT();
3042 MVT SrcVT = ArgVT;
3043 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3044 if (!ArgReg)
3045 return false;
3046 break;
3047 }
3048 case CCValAssign::AExt:
3049 // Intentional fall-through.
3050 case CCValAssign::ZExt: {
3051 MVT DestVT = VA.getLocVT();
3052 MVT SrcVT = ArgVT;
3053 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3054 if (!ArgReg)
3055 return false;
3056 break;
3057 }
3058 default:
3059 llvm_unreachable("Unknown arg promotion!");
3060 }
3061
3062 // Now copy/store arg to correct locations.
3063 if (VA.isRegLoc() && !VA.needsCustom()) {
3064 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3065 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3066 CLI.OutRegs.push_back(VA.getLocReg());
3067 } else if (VA.needsCustom()) {
3068 // FIXME: Handle custom args.
3069 return false;
3070 } else {
3071 assert(VA.isMemLoc() && "Assuming store on stack.");
3072
3073 // Don't emit stores for undef values.
3074 if (isa<UndefValue>(ArgVal))
3075 continue;
3076
3077 // Need to store on the stack.
3078 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3079
3080 unsigned BEAlign = 0;
3081 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3082 BEAlign = 8 - ArgSize;
3083
3084 Address Addr;
3085 Addr.setKind(Address::RegBase);
3086 Addr.setReg(AArch64::SP);
3087 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3088
3089 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3090 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3091 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3092 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3093
3094 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3095 return false;
3096 }
3097 }
3098 return true;
3099}
3100
3101bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3102 CallingConv::ID CC = CLI.CallConv;
3103
3104 // Issue CALLSEQ_END
3105 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3106 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3107 .addImm(NumBytes).addImm(0);
3108
3109 // Now the return values.
3111 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3112 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3113
3114 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3115 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3116 CCValAssign &VA = RVLocs[i];
3117 MVT CopyVT = VA.getValVT();
3118 Register CopyReg = ResultReg + i;
3119
3120 // TODO: Handle big-endian results
3121 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3122 return false;
3123
3124 // Copy result out of their specified physreg.
3125 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3126 CopyReg)
3127 .addReg(VA.getLocReg());
3128 CLI.InRegs.push_back(VA.getLocReg());
3129 }
3130
3131 CLI.ResultReg = ResultReg;
3132 CLI.NumResultRegs = RVLocs.size();
3133
3134 return true;
3135}
3136
3137bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3138 CallingConv::ID CC = CLI.CallConv;
3139 bool IsTailCall = CLI.IsTailCall;
3140 bool IsVarArg = CLI.IsVarArg;
3141 const Value *Callee = CLI.Callee;
3142 MCSymbol *Symbol = CLI.Symbol;
3143
3144 if (!Callee && !Symbol)
3145 return false;
3146
3147 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3148 // a bti instruction following the call.
3149 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3150 !Subtarget->noBTIAtReturnTwice() &&
3151 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3152 return false;
3153
3154 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3155 if (CLI.CB && CLI.CB->isIndirectCall() &&
3156 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3157 return false;
3158
3159 // Allow SelectionDAG isel to handle tail calls.
3160 if (IsTailCall)
3161 return false;
3162
3163 // FIXME: we could and should support this, but for now correctness at -O0 is
3164 // more important.
3165 if (Subtarget->isTargetILP32())
3166 return false;
3167
3168 CodeModel::Model CM = TM.getCodeModel();
3169 // Only support the small-addressing and large code models.
3170 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3171 return false;
3172
3173 // FIXME: Add large code model support for ELF.
3174 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3175 return false;
3176
3177 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3178 // attribute. Check "RtLibUseGOT" instead.
3179 if (MF->getFunction().getParent()->getRtLibUseGOT())
3180 return false;
3181
3182 // Let SDISel handle vararg functions.
3183 if (IsVarArg)
3184 return false;
3185
3186 if (Subtarget->isWindowsArm64EC())
3187 return false;
3188
3189 for (auto Flag : CLI.OutFlags)
3190 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3191 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3192 return false;
3193
3194 // Set up the argument vectors.
3195 SmallVector<MVT, 16> OutVTs;
3197 OutVTs.reserve(CLI.OutVals.size());
3198
3199 for (auto *Val : CLI.OutVals) {
3200 MVT VT;
3201 if (!isTypeLegal(Val->getType(), VT) &&
3202 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3203 return false;
3204
3205 // We don't handle vector parameters yet.
3206 if (VT.isVector() || VT.getSizeInBits() > 64)
3207 return false;
3208
3209 OutVTs.push_back(VT);
3210 OrigTys.push_back(Val->getType());
3211 }
3212
3213 Address Addr;
3214 if (Callee && !computeCallAddress(Callee, Addr))
3215 return false;
3216
3217 // The weak function target may be zero; in that case we must use indirect
3218 // addressing via a stub on windows as it may be out of range for a
3219 // PC-relative jump.
3220 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3221 Addr.getGlobalValue()->hasExternalWeakLinkage())
3222 return false;
3223
3224 // Handle the arguments now that we've gotten them.
3225 unsigned NumBytes;
3226 if (!processCallArgs(CLI, OutVTs, OrigTys, NumBytes))
3227 return false;
3228
3229 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3230 if (RegInfo->isAnyArgRegReserved(*MF))
3231 RegInfo->emitReservedArgRegCallError(*MF);
3232
3233 // Issue the call.
3234 MachineInstrBuilder MIB;
3235 if (Subtarget->useSmallAddressing()) {
3236 const MCInstrDesc &II =
3237 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3238 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3239 if (Symbol)
3240 MIB.addSym(Symbol, 0);
3241 else if (Addr.getGlobalValue())
3242 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3243 else if (Addr.getReg()) {
3244 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3245 MIB.addReg(Reg);
3246 } else
3247 return false;
3248 } else {
3249 Register CallReg;
3250 if (Symbol) {
3251 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3252 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3253 ADRPReg)
3255
3256 CallReg = createResultReg(&AArch64::GPR64RegClass);
3257 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3258 TII.get(AArch64::LDRXui), CallReg)
3259 .addReg(ADRPReg)
3260 .addSym(Symbol,
3262 } else if (Addr.getGlobalValue())
3263 CallReg = materializeGV(Addr.getGlobalValue());
3264 else if (Addr.getReg())
3265 CallReg = Addr.getReg();
3266
3267 if (!CallReg)
3268 return false;
3269
3270 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3271 CallReg = constrainOperandRegClass(II, CallReg, 0);
3272 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3273 }
3274
3275 // Add implicit physical register uses to the call.
3276 for (auto Reg : CLI.OutRegs)
3277 MIB.addReg(Reg, RegState::Implicit);
3278
3279 // Add a register mask with the call-preserved registers.
3280 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3281 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3282
3283 CLI.Call = MIB;
3284
3285 // Finish off the call including any return values.
3286 return finishCall(CLI, NumBytes);
3287}
3288
3289bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3290 if (Alignment)
3291 return Len / Alignment->value() <= 4;
3292 else
3293 return Len < 32;
3294}
3295
3296bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3297 uint64_t Len, MaybeAlign Alignment) {
3298 // Make sure we don't bloat code by inlining very large memcpy's.
3299 if (!isMemCpySmall(Len, Alignment))
3300 return false;
3301
3302 int64_t UnscaledOffset = 0;
3303 Address OrigDest = Dest;
3304 Address OrigSrc = Src;
3305
3306 while (Len) {
3307 MVT VT;
3308 if (!Alignment || *Alignment >= 8) {
3309 if (Len >= 8)
3310 VT = MVT::i64;
3311 else if (Len >= 4)
3312 VT = MVT::i32;
3313 else if (Len >= 2)
3314 VT = MVT::i16;
3315 else {
3316 VT = MVT::i8;
3317 }
3318 } else {
3319 assert(Alignment && "Alignment is set in this branch");
3320 // Bound based on alignment.
3321 if (Len >= 4 && *Alignment == 4)
3322 VT = MVT::i32;
3323 else if (Len >= 2 && *Alignment == 2)
3324 VT = MVT::i16;
3325 else {
3326 VT = MVT::i8;
3327 }
3328 }
3329
3330 Register ResultReg = emitLoad(VT, VT, Src);
3331 if (!ResultReg)
3332 return false;
3333
3334 if (!emitStore(VT, ResultReg, Dest))
3335 return false;
3336
3337 int64_t Size = VT.getSizeInBits() / 8;
3338 Len -= Size;
3339 UnscaledOffset += Size;
3340
3341 // We need to recompute the unscaled offset for each iteration.
3342 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3343 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3344 }
3345
3346 return true;
3347}
3348
3349/// Check if it is possible to fold the condition from the XALU intrinsic
3350/// into the user. The condition code will only be updated on success.
3351bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3352 const Instruction *I,
3353 const Value *Cond) {
3355 return false;
3356
3357 const auto *EV = cast<ExtractValueInst>(Cond);
3358 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3359 return false;
3360
3361 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3362 MVT RetVT;
3363 const Function *Callee = II->getCalledFunction();
3364 Type *RetTy =
3365 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3366 if (!isTypeLegal(RetTy, RetVT))
3367 return false;
3368
3369 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3370 return false;
3371
3372 const Value *LHS = II->getArgOperand(0);
3373 const Value *RHS = II->getArgOperand(1);
3374
3375 // Canonicalize immediate to the RHS.
3376 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3377 std::swap(LHS, RHS);
3378
3379 // Simplify multiplies.
3380 Intrinsic::ID IID = II->getIntrinsicID();
3381 switch (IID) {
3382 default:
3383 break;
3384 case Intrinsic::smul_with_overflow:
3385 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3386 if (C->getValue() == 2)
3387 IID = Intrinsic::sadd_with_overflow;
3388 break;
3389 case Intrinsic::umul_with_overflow:
3390 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3391 if (C->getValue() == 2)
3392 IID = Intrinsic::uadd_with_overflow;
3393 break;
3394 }
3395
3396 AArch64CC::CondCode TmpCC;
3397 switch (IID) {
3398 default:
3399 return false;
3400 case Intrinsic::sadd_with_overflow:
3401 case Intrinsic::ssub_with_overflow:
3402 TmpCC = AArch64CC::VS;
3403 break;
3404 case Intrinsic::uadd_with_overflow:
3405 TmpCC = AArch64CC::HS;
3406 break;
3407 case Intrinsic::usub_with_overflow:
3408 TmpCC = AArch64CC::LO;
3409 break;
3410 case Intrinsic::smul_with_overflow:
3411 case Intrinsic::umul_with_overflow:
3412 TmpCC = AArch64CC::NE;
3413 break;
3414 }
3415
3416 // Check if both instructions are in the same basic block.
3417 if (!isValueAvailable(II))
3418 return false;
3419
3420 // Make sure nothing is in the way
3423 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3424 // We only expect extractvalue instructions between the intrinsic and the
3425 // instruction to be selected.
3426 if (!isa<ExtractValueInst>(Itr))
3427 return false;
3428
3429 // Check that the extractvalue operand comes from the intrinsic.
3430 const auto *EVI = cast<ExtractValueInst>(Itr);
3431 if (EVI->getAggregateOperand() != II)
3432 return false;
3433 }
3434
3435 CC = TmpCC;
3436 return true;
3437}
3438
3439bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3440 // FIXME: Handle more intrinsics.
3441 switch (II->getIntrinsicID()) {
3442 default: return false;
3443 case Intrinsic::frameaddress: {
3444 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3445 MFI.setFrameAddressIsTaken(true);
3446
3447 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3448 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3449 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3451 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3452 // Recursively load frame address
3453 // ldr x0, [fp]
3454 // ldr x0, [x0]
3455 // ldr x0, [x0]
3456 // ...
3457 Register DestReg;
3458 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3459 while (Depth--) {
3460 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3461 SrcReg, 0);
3462 assert(DestReg && "Unexpected LDR instruction emission failure.");
3463 SrcReg = DestReg;
3464 }
3465
3466 updateValueMap(II, SrcReg);
3467 return true;
3468 }
3469 case Intrinsic::sponentry: {
3470 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3471
3472 // SP = FP + Fixed Object + 16
3473 int FI = MFI.CreateFixedObject(4, 0, false);
3474 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3475 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3476 TII.get(AArch64::ADDXri), ResultReg)
3477 .addFrameIndex(FI)
3478 .addImm(0)
3479 .addImm(0);
3480
3481 updateValueMap(II, ResultReg);
3482 return true;
3483 }
3484 case Intrinsic::memcpy:
3485 case Intrinsic::memmove: {
3486 const auto *MTI = cast<MemTransferInst>(II);
3487 // Don't handle volatile.
3488 if (MTI->isVolatile())
3489 return false;
3490
3491 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3492 // we would emit dead code because we don't currently handle memmoves.
3493 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3494 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3495 // Small memcpy's are common enough that we want to do them without a call
3496 // if possible.
3497 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3498 MaybeAlign Alignment;
3499 if (MTI->getDestAlign() || MTI->getSourceAlign())
3500 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3501 MTI->getSourceAlign().valueOrOne());
3502 if (isMemCpySmall(Len, Alignment)) {
3503 Address Dest, Src;
3504 if (!computeAddress(MTI->getRawDest(), Dest) ||
3505 !computeAddress(MTI->getRawSource(), Src))
3506 return false;
3507 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3508 return true;
3509 }
3510 }
3511
3512 if (!MTI->getLength()->getType()->isIntegerTy(64))
3513 return false;
3514
3515 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3516 // Fast instruction selection doesn't support the special
3517 // address spaces.
3518 return false;
3519
3520 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3521 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3522 }
3523 case Intrinsic::memset: {
3524 const MemSetInst *MSI = cast<MemSetInst>(II);
3525 // Don't handle volatile.
3526 if (MSI->isVolatile())
3527 return false;
3528
3529 if (!MSI->getLength()->getType()->isIntegerTy(64))
3530 return false;
3531
3532 if (MSI->getDestAddressSpace() > 255)
3533 // Fast instruction selection doesn't support the special
3534 // address spaces.
3535 return false;
3536
3537 return lowerCallTo(II, "memset", II->arg_size() - 1);
3538 }
3539 case Intrinsic::sin:
3540 case Intrinsic::cos:
3541 case Intrinsic::tan:
3542 case Intrinsic::pow: {
3543 MVT RetVT;
3544 if (!isTypeLegal(II->getType(), RetVT))
3545 return false;
3546
3547 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3548 return false;
3549
3550 static const RTLIB::Libcall LibCallTable[4][2] = {
3551 {RTLIB::SIN_F32, RTLIB::SIN_F64},
3552 {RTLIB::COS_F32, RTLIB::COS_F64},
3553 {RTLIB::TAN_F32, RTLIB::TAN_F64},
3554 {RTLIB::POW_F32, RTLIB::POW_F64}};
3555 RTLIB::Libcall LC;
3556 bool Is64Bit = RetVT == MVT::f64;
3557 switch (II->getIntrinsicID()) {
3558 default:
3559 llvm_unreachable("Unexpected intrinsic.");
3560 case Intrinsic::sin:
3561 LC = LibCallTable[0][Is64Bit];
3562 break;
3563 case Intrinsic::cos:
3564 LC = LibCallTable[1][Is64Bit];
3565 break;
3566 case Intrinsic::tan:
3567 LC = LibCallTable[2][Is64Bit];
3568 break;
3569 case Intrinsic::pow:
3570 LC = LibCallTable[3][Is64Bit];
3571 break;
3572 }
3573
3574 ArgListTy Args;
3575 Args.reserve(II->arg_size());
3576
3577 // Populate the argument list.
3578 for (auto &Arg : II->args())
3579 Args.emplace_back(Arg);
3580
3581 CallLoweringInfo CLI;
3582 MCContext &Ctx = MF->getContext();
3583
3584 RTLIB::LibcallImpl LCImpl = LibcallLowering->getLibcallImpl(LC);
3585 if (LCImpl == RTLIB::Unsupported)
3586 return false;
3587
3588 CallingConv::ID CC = LibcallLowering->getLibcallImplCallingConv(LCImpl);
3589 StringRef FuncName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LCImpl);
3590 CLI.setCallee(DL, Ctx, CC, II->getType(), FuncName, std::move(Args));
3591 if (!lowerCallTo(CLI))
3592 return false;
3593 updateValueMap(II, CLI.ResultReg);
3594 return true;
3595 }
3596 case Intrinsic::fabs: {
3597 MVT VT;
3598 if (!isTypeLegal(II->getType(), VT))
3599 return false;
3600
3601 unsigned Opc;
3602 switch (VT.SimpleTy) {
3603 default:
3604 return false;
3605 case MVT::f32:
3606 Opc = AArch64::FABSSr;
3607 break;
3608 case MVT::f64:
3609 Opc = AArch64::FABSDr;
3610 break;
3611 }
3612 Register SrcReg = getRegForValue(II->getOperand(0));
3613 if (!SrcReg)
3614 return false;
3615 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3616 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3617 .addReg(SrcReg);
3618 updateValueMap(II, ResultReg);
3619 return true;
3620 }
3621 case Intrinsic::trap:
3622 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3623 .addImm(1);
3624 return true;
3625 case Intrinsic::debugtrap:
3626 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3627 .addImm(0xF000);
3628 return true;
3629
3630 case Intrinsic::sqrt: {
3631 Type *RetTy = II->getCalledFunction()->getReturnType();
3632
3633 MVT VT;
3634 if (!isTypeLegal(RetTy, VT))
3635 return false;
3636
3637 Register Op0Reg = getRegForValue(II->getOperand(0));
3638 if (!Op0Reg)
3639 return false;
3640
3641 Register ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3642 if (!ResultReg)
3643 return false;
3644
3645 updateValueMap(II, ResultReg);
3646 return true;
3647 }
3648 case Intrinsic::sadd_with_overflow:
3649 case Intrinsic::uadd_with_overflow:
3650 case Intrinsic::ssub_with_overflow:
3651 case Intrinsic::usub_with_overflow:
3652 case Intrinsic::smul_with_overflow:
3653 case Intrinsic::umul_with_overflow: {
3654 // This implements the basic lowering of the xalu with overflow intrinsics.
3655 const Function *Callee = II->getCalledFunction();
3656 auto *Ty = cast<StructType>(Callee->getReturnType());
3657 Type *RetTy = Ty->getTypeAtIndex(0U);
3658
3659 MVT VT;
3660 if (!isTypeLegal(RetTy, VT))
3661 return false;
3662
3663 if (VT != MVT::i32 && VT != MVT::i64)
3664 return false;
3665
3666 const Value *LHS = II->getArgOperand(0);
3667 const Value *RHS = II->getArgOperand(1);
3668 // Canonicalize immediate to the RHS.
3669 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3670 std::swap(LHS, RHS);
3671
3672 // Simplify multiplies.
3673 Intrinsic::ID IID = II->getIntrinsicID();
3674 switch (IID) {
3675 default:
3676 break;
3677 case Intrinsic::smul_with_overflow:
3678 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3679 if (C->getValue() == 2) {
3680 IID = Intrinsic::sadd_with_overflow;
3681 RHS = LHS;
3682 }
3683 break;
3684 case Intrinsic::umul_with_overflow:
3685 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3686 if (C->getValue() == 2) {
3687 IID = Intrinsic::uadd_with_overflow;
3688 RHS = LHS;
3689 }
3690 break;
3691 }
3692
3693 Register ResultReg1, ResultReg2, MulReg;
3695 switch (IID) {
3696 default: llvm_unreachable("Unexpected intrinsic!");
3697 case Intrinsic::sadd_with_overflow:
3698 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3699 CC = AArch64CC::VS;
3700 break;
3701 case Intrinsic::uadd_with_overflow:
3702 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3703 CC = AArch64CC::HS;
3704 break;
3705 case Intrinsic::ssub_with_overflow:
3706 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3707 CC = AArch64CC::VS;
3708 break;
3709 case Intrinsic::usub_with_overflow:
3710 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3711 CC = AArch64CC::LO;
3712 break;
3713 case Intrinsic::smul_with_overflow: {
3714 CC = AArch64CC::NE;
3715 Register LHSReg = getRegForValue(LHS);
3716 if (!LHSReg)
3717 return false;
3718
3719 Register RHSReg = getRegForValue(RHS);
3720 if (!RHSReg)
3721 return false;
3722
3723 if (VT == MVT::i32) {
3724 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3725 Register MulSubReg =
3726 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3727 // cmp xreg, wreg, sxtw
3728 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3729 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3730 /*WantResult=*/false);
3731 MulReg = MulSubReg;
3732 } else {
3733 assert(VT == MVT::i64 && "Unexpected value type.");
3734 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3735 // reused in the next instruction.
3736 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3737 Register SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3738 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3739 /*WantResult=*/false);
3740 }
3741 break;
3742 }
3743 case Intrinsic::umul_with_overflow: {
3744 CC = AArch64CC::NE;
3745 Register LHSReg = getRegForValue(LHS);
3746 if (!LHSReg)
3747 return false;
3748
3749 Register RHSReg = getRegForValue(RHS);
3750 if (!RHSReg)
3751 return false;
3752
3753 if (VT == MVT::i32) {
3754 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3755 // tst xreg, #0xffffffff00000000
3756 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3757 TII.get(AArch64::ANDSXri), AArch64::XZR)
3758 .addReg(MulReg)
3759 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3760 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3761 } else {
3762 assert(VT == MVT::i64 && "Unexpected value type.");
3763 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3764 // reused in the next instruction.
3765 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3766 Register UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3767 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3768 }
3769 break;
3770 }
3771 }
3772
3773 if (MulReg) {
3774 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3775 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3776 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3777 }
3778
3779 if (!ResultReg1)
3780 return false;
3781
3782 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3783 AArch64::WZR, AArch64::WZR,
3784 getInvertedCondCode(CC));
3785 (void)ResultReg2;
3786 assert((ResultReg1 + 1) == ResultReg2 &&
3787 "Nonconsecutive result registers.");
3788 updateValueMap(II, ResultReg1, 2);
3789 return true;
3790 }
3791 case Intrinsic::aarch64_crc32b:
3792 case Intrinsic::aarch64_crc32h:
3793 case Intrinsic::aarch64_crc32w:
3794 case Intrinsic::aarch64_crc32x:
3795 case Intrinsic::aarch64_crc32cb:
3796 case Intrinsic::aarch64_crc32ch:
3797 case Intrinsic::aarch64_crc32cw:
3798 case Intrinsic::aarch64_crc32cx: {
3799 if (!Subtarget->hasCRC())
3800 return false;
3801
3802 unsigned Opc;
3803 switch (II->getIntrinsicID()) {
3804 default:
3805 llvm_unreachable("Unexpected intrinsic!");
3806 case Intrinsic::aarch64_crc32b:
3807 Opc = AArch64::CRC32Brr;
3808 break;
3809 case Intrinsic::aarch64_crc32h:
3810 Opc = AArch64::CRC32Hrr;
3811 break;
3812 case Intrinsic::aarch64_crc32w:
3813 Opc = AArch64::CRC32Wrr;
3814 break;
3815 case Intrinsic::aarch64_crc32x:
3816 Opc = AArch64::CRC32Xrr;
3817 break;
3818 case Intrinsic::aarch64_crc32cb:
3819 Opc = AArch64::CRC32CBrr;
3820 break;
3821 case Intrinsic::aarch64_crc32ch:
3822 Opc = AArch64::CRC32CHrr;
3823 break;
3824 case Intrinsic::aarch64_crc32cw:
3825 Opc = AArch64::CRC32CWrr;
3826 break;
3827 case Intrinsic::aarch64_crc32cx:
3828 Opc = AArch64::CRC32CXrr;
3829 break;
3830 }
3831
3832 Register LHSReg = getRegForValue(II->getArgOperand(0));
3833 Register RHSReg = getRegForValue(II->getArgOperand(1));
3834 if (!LHSReg || !RHSReg)
3835 return false;
3836
3837 Register ResultReg =
3838 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3839 updateValueMap(II, ResultReg);
3840 return true;
3841 }
3842 }
3843 return false;
3844}
3845
3846bool AArch64FastISel::selectRet(const Instruction *I) {
3847 const ReturnInst *Ret = cast<ReturnInst>(I);
3848 const Function &F = *I->getParent()->getParent();
3849
3850 if (!FuncInfo.CanLowerReturn)
3851 return false;
3852
3853 if (F.isVarArg())
3854 return false;
3855
3856 if (TLI.supportSwiftError() &&
3857 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3858 return false;
3859
3860 if (TLI.supportSplitCSR(FuncInfo.MF))
3861 return false;
3862
3863 // Build a list of return value registers.
3865
3866 if (Ret->getNumOperands() > 0) {
3867 CallingConv::ID CC = F.getCallingConv();
3869 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3870
3871 // Analyze operands of the call, assigning locations to each operand.
3873 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3874 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3875
3876 // Only handle a single return value for now.
3877 if (ValLocs.size() != 1)
3878 return false;
3879
3880 CCValAssign &VA = ValLocs[0];
3881 const Value *RV = Ret->getOperand(0);
3882
3883 // Don't bother handling odd stuff for now.
3884 if ((VA.getLocInfo() != CCValAssign::Full) &&
3885 (VA.getLocInfo() != CCValAssign::BCvt))
3886 return false;
3887
3888 // Only handle register returns for now.
3889 if (!VA.isRegLoc())
3890 return false;
3891
3892 Register Reg = getRegForValue(RV);
3893 if (!Reg)
3894 return false;
3895
3896 Register SrcReg = Reg + VA.getValNo();
3897 Register DestReg = VA.getLocReg();
3898 // Avoid a cross-class copy. This is very unlikely.
3899 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3900 return false;
3901
3902 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3903 if (!RVEVT.isSimple())
3904 return false;
3905
3906 // Vectors (of > 1 lane) in big endian need tricky handling.
3907 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3908 !Subtarget->isLittleEndian())
3909 return false;
3910
3911 MVT RVVT = RVEVT.getSimpleVT();
3912 if (RVVT == MVT::f128)
3913 return false;
3914
3915 MVT DestVT = VA.getValVT();
3916 // Special handling for extended integers.
3917 if (RVVT != DestVT) {
3918 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3919 return false;
3920
3921 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3922 return false;
3923
3924 bool IsZExt = Outs[0].Flags.isZExt();
3925 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3926 if (!SrcReg)
3927 return false;
3928 }
3929
3930 // "Callee" (i.e. value producer) zero extends pointers at function
3931 // boundary.
3932 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3933 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3934
3935 // Make the copy.
3936 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3937 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3938
3939 // Add register to return instruction.
3940 RetRegs.push_back(VA.getLocReg());
3941 }
3942
3943 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3944 TII.get(AArch64::RET_ReallyLR));
3945 for (Register RetReg : RetRegs)
3946 MIB.addReg(RetReg, RegState::Implicit);
3947 return true;
3948}
3949
3950bool AArch64FastISel::selectTrunc(const Instruction *I) {
3951 Type *DestTy = I->getType();
3952 Value *Op = I->getOperand(0);
3953 Type *SrcTy = Op->getType();
3954
3955 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3956 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3957 if (!SrcEVT.isSimple())
3958 return false;
3959 if (!DestEVT.isSimple())
3960 return false;
3961
3962 MVT SrcVT = SrcEVT.getSimpleVT();
3963 MVT DestVT = DestEVT.getSimpleVT();
3964
3965 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3966 SrcVT != MVT::i8)
3967 return false;
3968 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3969 DestVT != MVT::i1)
3970 return false;
3971
3972 Register SrcReg = getRegForValue(Op);
3973 if (!SrcReg)
3974 return false;
3975
3976 // If we're truncating from i64 to a smaller non-legal type then generate an
3977 // AND. Otherwise, we know the high bits are undefined and a truncate only
3978 // generate a COPY. We cannot mark the source register also as result
3979 // register, because this can incorrectly transfer the kill flag onto the
3980 // source register.
3981 Register ResultReg;
3982 if (SrcVT == MVT::i64) {
3983 uint64_t Mask = 0;
3984 switch (DestVT.SimpleTy) {
3985 default:
3986 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3987 return false;
3988 case MVT::i1:
3989 Mask = 0x1;
3990 break;
3991 case MVT::i8:
3992 Mask = 0xff;
3993 break;
3994 case MVT::i16:
3995 Mask = 0xffff;
3996 break;
3997 }
3998 // Issue an extract_subreg to get the lower 32-bits.
3999 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
4000 AArch64::sub_32);
4001 // Create the AND instruction which performs the actual truncation.
4002 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
4003 assert(ResultReg && "Unexpected AND instruction emission failure.");
4004 } else {
4005 ResultReg = createResultReg(&AArch64::GPR32RegClass);
4006 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4007 TII.get(TargetOpcode::COPY), ResultReg)
4008 .addReg(SrcReg);
4009 }
4010
4011 updateValueMap(I, ResultReg);
4012 return true;
4013}
4014
4015Register AArch64FastISel::emiti1Ext(Register SrcReg, MVT DestVT, bool IsZExt) {
4016 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4017 DestVT == MVT::i64) &&
4018 "Unexpected value type.");
4019 // Handle i8 and i16 as i32.
4020 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4021 DestVT = MVT::i32;
4022
4023 if (IsZExt) {
4024 Register ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4025 assert(ResultReg && "Unexpected AND instruction emission failure.");
4026 if (DestVT == MVT::i64) {
4027 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4028 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4029 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4030 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4031 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4032 .addReg(ResultReg)
4033 .addImm(AArch64::sub_32);
4034 ResultReg = Reg64;
4035 }
4036 return ResultReg;
4037 } else {
4038 if (DestVT == MVT::i64) {
4039 // FIXME: We're SExt i1 to i64.
4040 return Register();
4041 }
4042 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4043 0, 0);
4044 }
4045}
4046
4047Register AArch64FastISel::emitMul_rr(MVT RetVT, Register Op0, Register Op1) {
4048 unsigned Opc;
4049 Register ZReg;
4050 switch (RetVT.SimpleTy) {
4051 default:
4052 return Register();
4053 case MVT::i8:
4054 case MVT::i16:
4055 case MVT::i32:
4056 RetVT = MVT::i32;
4057 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4058 case MVT::i64:
4059 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4060 }
4061
4062 const TargetRegisterClass *RC =
4063 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4064 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4065}
4066
4067Register AArch64FastISel::emitSMULL_rr(MVT RetVT, Register Op0, Register Op1) {
4068 if (RetVT != MVT::i64)
4069 return Register();
4070
4071 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4072 Op0, Op1, AArch64::XZR);
4073}
4074
4075Register AArch64FastISel::emitUMULL_rr(MVT RetVT, Register Op0, Register Op1) {
4076 if (RetVT != MVT::i64)
4077 return Register();
4078
4079 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4080 Op0, Op1, AArch64::XZR);
4081}
4082
4083Register AArch64FastISel::emitLSL_rr(MVT RetVT, Register Op0Reg,
4084 Register Op1Reg) {
4085 unsigned Opc = 0;
4086 bool NeedTrunc = false;
4087 uint64_t Mask = 0;
4088 switch (RetVT.SimpleTy) {
4089 default:
4090 return Register();
4091 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4092 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4093 case MVT::i32: Opc = AArch64::LSLVWr; break;
4094 case MVT::i64: Opc = AArch64::LSLVXr; break;
4095 }
4096
4097 const TargetRegisterClass *RC =
4098 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4099 if (NeedTrunc)
4100 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4101
4102 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4103 if (NeedTrunc)
4104 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4105 return ResultReg;
4106}
4107
4108Register AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0,
4109 uint64_t Shift, bool IsZExt) {
4110 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4111 "Unexpected source/return type pair.");
4112 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4113 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4114 "Unexpected source value type.");
4115 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4116 RetVT == MVT::i64) && "Unexpected return value type.");
4117
4118 bool Is64Bit = (RetVT == MVT::i64);
4119 unsigned RegSize = Is64Bit ? 64 : 32;
4120 unsigned DstBits = RetVT.getSizeInBits();
4121 unsigned SrcBits = SrcVT.getSizeInBits();
4122 const TargetRegisterClass *RC =
4123 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4124
4125 // Just emit a copy for "zero" shifts.
4126 if (Shift == 0) {
4127 if (RetVT == SrcVT) {
4128 Register ResultReg = createResultReg(RC);
4129 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4130 TII.get(TargetOpcode::COPY), ResultReg)
4131 .addReg(Op0);
4132 return ResultReg;
4133 } else
4134 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4135 }
4136
4137 // Don't deal with undefined shifts.
4138 if (Shift >= DstBits)
4139 return Register();
4140
4141 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4142 // {S|U}BFM Wd, Wn, #r, #s
4143 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4144
4145 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4146 // %2 = shl i16 %1, 4
4147 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4148 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4149 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4150 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4151
4152 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4153 // %2 = shl i16 %1, 8
4154 // Wd<32+7-24,32-24> = Wn<7:0>
4155 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4156 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4157 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4158
4159 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4160 // %2 = shl i16 %1, 12
4161 // Wd<32+3-20,32-20> = Wn<3:0>
4162 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4163 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4164 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4165
4166 unsigned ImmR = RegSize - Shift;
4167 // Limit the width to the length of the source type.
4168 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4169 static const unsigned OpcTable[2][2] = {
4170 {AArch64::SBFMWri, AArch64::SBFMXri},
4171 {AArch64::UBFMWri, AArch64::UBFMXri}
4172 };
4173 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4174 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4175 Register TmpReg = MRI.createVirtualRegister(RC);
4176 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4177 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4178 .addReg(Op0)
4179 .addImm(AArch64::sub_32);
4180 Op0 = TmpReg;
4181 }
4182 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4183}
4184
4185Register AArch64FastISel::emitLSR_rr(MVT RetVT, Register Op0Reg,
4186 Register Op1Reg) {
4187 unsigned Opc = 0;
4188 bool NeedTrunc = false;
4189 uint64_t Mask = 0;
4190 switch (RetVT.SimpleTy) {
4191 default:
4192 return Register();
4193 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4194 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4195 case MVT::i32: Opc = AArch64::LSRVWr; break;
4196 case MVT::i64: Opc = AArch64::LSRVXr; break;
4197 }
4198
4199 const TargetRegisterClass *RC =
4200 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4201 if (NeedTrunc) {
4202 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4203 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4204 }
4205 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4206 if (NeedTrunc)
4207 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4208 return ResultReg;
4209}
4210
4211Register AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0,
4212 uint64_t Shift, bool IsZExt) {
4213 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4214 "Unexpected source/return type pair.");
4215 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4216 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4217 "Unexpected source value type.");
4218 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4219 RetVT == MVT::i64) && "Unexpected return value type.");
4220
4221 bool Is64Bit = (RetVT == MVT::i64);
4222 unsigned RegSize = Is64Bit ? 64 : 32;
4223 unsigned DstBits = RetVT.getSizeInBits();
4224 unsigned SrcBits = SrcVT.getSizeInBits();
4225 const TargetRegisterClass *RC =
4226 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4227
4228 // Just emit a copy for "zero" shifts.
4229 if (Shift == 0) {
4230 if (RetVT == SrcVT) {
4231 Register ResultReg = createResultReg(RC);
4232 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4233 TII.get(TargetOpcode::COPY), ResultReg)
4234 .addReg(Op0);
4235 return ResultReg;
4236 } else
4237 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4238 }
4239
4240 // Don't deal with undefined shifts.
4241 if (Shift >= DstBits)
4242 return Register();
4243
4244 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4245 // {S|U}BFM Wd, Wn, #r, #s
4246 // Wd<s-r:0> = Wn<s:r> when r <= s
4247
4248 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4249 // %2 = lshr i16 %1, 4
4250 // Wd<7-4:0> = Wn<7:4>
4251 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4252 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4253 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4254
4255 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4256 // %2 = lshr i16 %1, 8
4257 // Wd<7-7,0> = Wn<7:7>
4258 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4259 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4260 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4261
4262 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4263 // %2 = lshr i16 %1, 12
4264 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4265 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4266 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4267 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4268
4269 if (Shift >= SrcBits && IsZExt)
4270 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4271
4272 // It is not possible to fold a sign-extend into the LShr instruction. In this
4273 // case emit a sign-extend.
4274 if (!IsZExt) {
4275 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4276 if (!Op0)
4277 return Register();
4278 SrcVT = RetVT;
4279 SrcBits = SrcVT.getSizeInBits();
4280 IsZExt = true;
4281 }
4282
4283 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4284 unsigned ImmS = SrcBits - 1;
4285 static const unsigned OpcTable[2][2] = {
4286 {AArch64::SBFMWri, AArch64::SBFMXri},
4287 {AArch64::UBFMWri, AArch64::UBFMXri}
4288 };
4289 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4290 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4291 Register TmpReg = MRI.createVirtualRegister(RC);
4292 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4293 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4294 .addReg(Op0)
4295 .addImm(AArch64::sub_32);
4296 Op0 = TmpReg;
4297 }
4298 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4299}
4300
4301Register AArch64FastISel::emitASR_rr(MVT RetVT, Register Op0Reg,
4302 Register Op1Reg) {
4303 unsigned Opc = 0;
4304 bool NeedTrunc = false;
4305 uint64_t Mask = 0;
4306 switch (RetVT.SimpleTy) {
4307 default:
4308 return Register();
4309 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4310 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4311 case MVT::i32: Opc = AArch64::ASRVWr; break;
4312 case MVT::i64: Opc = AArch64::ASRVXr; break;
4313 }
4314
4315 const TargetRegisterClass *RC =
4316 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4317 if (NeedTrunc) {
4318 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4319 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4320 }
4321 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4322 if (NeedTrunc)
4323 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4324 return ResultReg;
4325}
4326
4327Register AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0,
4328 uint64_t Shift, bool IsZExt) {
4329 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4330 "Unexpected source/return type pair.");
4331 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4332 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4333 "Unexpected source value type.");
4334 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4335 RetVT == MVT::i64) && "Unexpected return value type.");
4336
4337 bool Is64Bit = (RetVT == MVT::i64);
4338 unsigned RegSize = Is64Bit ? 64 : 32;
4339 unsigned DstBits = RetVT.getSizeInBits();
4340 unsigned SrcBits = SrcVT.getSizeInBits();
4341 const TargetRegisterClass *RC =
4342 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4343
4344 // Just emit a copy for "zero" shifts.
4345 if (Shift == 0) {
4346 if (RetVT == SrcVT) {
4347 Register ResultReg = createResultReg(RC);
4348 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4349 TII.get(TargetOpcode::COPY), ResultReg)
4350 .addReg(Op0);
4351 return ResultReg;
4352 } else
4353 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4354 }
4355
4356 // Don't deal with undefined shifts.
4357 if (Shift >= DstBits)
4358 return Register();
4359
4360 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4361 // {S|U}BFM Wd, Wn, #r, #s
4362 // Wd<s-r:0> = Wn<s:r> when r <= s
4363
4364 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4365 // %2 = ashr i16 %1, 4
4366 // Wd<7-4:0> = Wn<7:4>
4367 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4368 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4369 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4370
4371 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4372 // %2 = ashr i16 %1, 8
4373 // Wd<7-7,0> = Wn<7:7>
4374 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4375 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4376 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4377
4378 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4379 // %2 = ashr i16 %1, 12
4380 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4381 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4382 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4383 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4384
4385 if (Shift >= SrcBits && IsZExt)
4386 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4387
4388 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4389 unsigned ImmS = SrcBits - 1;
4390 static const unsigned OpcTable[2][2] = {
4391 {AArch64::SBFMWri, AArch64::SBFMXri},
4392 {AArch64::UBFMWri, AArch64::UBFMXri}
4393 };
4394 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4395 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4396 Register TmpReg = MRI.createVirtualRegister(RC);
4397 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4398 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4399 .addReg(Op0)
4400 .addImm(AArch64::sub_32);
4401 Op0 = TmpReg;
4402 }
4403 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4404}
4405
4406Register AArch64FastISel::emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
4407 bool IsZExt) {
4408 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4409
4410 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4411 // DestVT are odd things, so test to make sure that they are both types we can
4412 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4413 // bail out to SelectionDAG.
4414 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4415 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4416 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4417 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4418 return Register();
4419
4420 unsigned Opc;
4421 unsigned Imm = 0;
4422
4423 switch (SrcVT.SimpleTy) {
4424 default:
4425 return Register();
4426 case MVT::i1:
4427 return emiti1Ext(SrcReg, DestVT, IsZExt);
4428 case MVT::i8:
4429 if (DestVT == MVT::i64)
4430 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4431 else
4432 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4433 Imm = 7;
4434 break;
4435 case MVT::i16:
4436 if (DestVT == MVT::i64)
4437 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4438 else
4439 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4440 Imm = 15;
4441 break;
4442 case MVT::i32:
4443 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4444 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4445 Imm = 31;
4446 break;
4447 }
4448
4449 // Handle i8 and i16 as i32.
4450 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4451 DestVT = MVT::i32;
4452 else if (DestVT == MVT::i64) {
4453 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4454 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4455 TII.get(AArch64::SUBREG_TO_REG), Src64)
4456 .addReg(SrcReg)
4457 .addImm(AArch64::sub_32);
4458 SrcReg = Src64;
4459 }
4460
4461 const TargetRegisterClass *RC =
4462 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4463 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4464}
4465
4466static bool isZExtLoad(const MachineInstr *LI) {
4467 switch (LI->getOpcode()) {
4468 default:
4469 return false;
4470 case AArch64::LDURBBi:
4471 case AArch64::LDURHHi:
4472 case AArch64::LDURWi:
4473 case AArch64::LDRBBui:
4474 case AArch64::LDRHHui:
4475 case AArch64::LDRWui:
4476 case AArch64::LDRBBroX:
4477 case AArch64::LDRHHroX:
4478 case AArch64::LDRWroX:
4479 case AArch64::LDRBBroW:
4480 case AArch64::LDRHHroW:
4481 case AArch64::LDRWroW:
4482 return true;
4483 }
4484}
4485
4486static bool isSExtLoad(const MachineInstr *LI) {
4487 switch (LI->getOpcode()) {
4488 default:
4489 return false;
4490 case AArch64::LDURSBWi:
4491 case AArch64::LDURSHWi:
4492 case AArch64::LDURSBXi:
4493 case AArch64::LDURSHXi:
4494 case AArch64::LDURSWi:
4495 case AArch64::LDRSBWui:
4496 case AArch64::LDRSHWui:
4497 case AArch64::LDRSBXui:
4498 case AArch64::LDRSHXui:
4499 case AArch64::LDRSWui:
4500 case AArch64::LDRSBWroX:
4501 case AArch64::LDRSHWroX:
4502 case AArch64::LDRSBXroX:
4503 case AArch64::LDRSHXroX:
4504 case AArch64::LDRSWroX:
4505 case AArch64::LDRSBWroW:
4506 case AArch64::LDRSHWroW:
4507 case AArch64::LDRSBXroW:
4508 case AArch64::LDRSHXroW:
4509 case AArch64::LDRSWroW:
4510 return true;
4511 }
4512}
4513
4514bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4515 MVT SrcVT) {
4516 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4517 if (!LI || !LI->hasOneUse())
4518 return false;
4519
4520 // Check if the load instruction has already been selected.
4521 Register Reg = lookUpRegForValue(LI);
4522 if (!Reg)
4523 return false;
4524
4525 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4526 if (!MI)
4527 return false;
4528
4529 // Check if the correct load instruction has been emitted - SelectionDAG might
4530 // have emitted a zero-extending load, but we need a sign-extending load.
4531 bool IsZExt = isa<ZExtInst>(I);
4532 const auto *LoadMI = MI;
4533 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4534 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4535 Register LoadReg = MI->getOperand(1).getReg();
4536 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4537 assert(LoadMI && "Expected valid instruction");
4538 }
4539 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4540 return false;
4541
4542 // Nothing to be done.
4543 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4544 updateValueMap(I, Reg);
4545 return true;
4546 }
4547
4548 if (IsZExt) {
4549 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4550 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4551 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4552 .addReg(Reg, getKillRegState(true))
4553 .addImm(AArch64::sub_32);
4554 Reg = Reg64;
4555 } else {
4556 assert((MI->getOpcode() == TargetOpcode::COPY &&
4557 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4558 "Expected copy instruction");
4559 Reg = MI->getOperand(1).getReg();
4561 removeDeadCode(I, std::next(I));
4562 }
4563 updateValueMap(I, Reg);
4564 return true;
4565}
4566
4567bool AArch64FastISel::selectIntExt(const Instruction *I) {
4569 "Unexpected integer extend instruction.");
4570 MVT RetVT;
4571 MVT SrcVT;
4572 if (!isTypeSupported(I->getType(), RetVT))
4573 return false;
4574
4575 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4576 return false;
4577
4578 // Try to optimize already sign-/zero-extended values from load instructions.
4579 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4580 return true;
4581
4582 Register SrcReg = getRegForValue(I->getOperand(0));
4583 if (!SrcReg)
4584 return false;
4585
4586 // Try to optimize already sign-/zero-extended values from function arguments.
4587 bool IsZExt = isa<ZExtInst>(I);
4588 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4589 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4590 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4591 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4592 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4593 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4594 .addReg(SrcReg)
4595 .addImm(AArch64::sub_32);
4596 SrcReg = ResultReg;
4597 }
4598
4599 updateValueMap(I, SrcReg);
4600 return true;
4601 }
4602 }
4603
4604 Register ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4605 if (!ResultReg)
4606 return false;
4607
4608 updateValueMap(I, ResultReg);
4609 return true;
4610}
4611
4612bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4613 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4614 if (!DestEVT.isSimple())
4615 return false;
4616
4617 MVT DestVT = DestEVT.getSimpleVT();
4618 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4619 return false;
4620
4621 unsigned DivOpc;
4622 bool Is64bit = (DestVT == MVT::i64);
4623 switch (ISDOpcode) {
4624 default:
4625 return false;
4626 case ISD::SREM:
4627 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4628 break;
4629 case ISD::UREM:
4630 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4631 break;
4632 }
4633 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4634 Register Src0Reg = getRegForValue(I->getOperand(0));
4635 if (!Src0Reg)
4636 return false;
4637
4638 Register Src1Reg = getRegForValue(I->getOperand(1));
4639 if (!Src1Reg)
4640 return false;
4641
4642 const TargetRegisterClass *RC =
4643 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4644 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4645 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4646 // The remainder is computed as numerator - (quotient * denominator) using the
4647 // MSUB instruction.
4648 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4649 updateValueMap(I, ResultReg);
4650 return true;
4651}
4652
4653bool AArch64FastISel::selectMul(const Instruction *I) {
4654 MVT VT;
4655 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4656 return false;
4657
4658 if (VT.isVector())
4659 return selectBinaryOp(I, ISD::MUL);
4660
4661 const Value *Src0 = I->getOperand(0);
4662 const Value *Src1 = I->getOperand(1);
4663 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4664 if (C->getValue().isPowerOf2())
4665 std::swap(Src0, Src1);
4666
4667 // Try to simplify to a shift instruction.
4668 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4669 if (C->getValue().isPowerOf2()) {
4670 uint64_t ShiftVal = C->getValue().logBase2();
4671 MVT SrcVT = VT;
4672 bool IsZExt = true;
4673 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4674 if (!isIntExtFree(ZExt)) {
4675 MVT VT;
4676 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4677 SrcVT = VT;
4678 IsZExt = true;
4679 Src0 = ZExt->getOperand(0);
4680 }
4681 }
4682 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4683 if (!isIntExtFree(SExt)) {
4684 MVT VT;
4685 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4686 SrcVT = VT;
4687 IsZExt = false;
4688 Src0 = SExt->getOperand(0);
4689 }
4690 }
4691 }
4692
4693 Register Src0Reg = getRegForValue(Src0);
4694 if (!Src0Reg)
4695 return false;
4696
4697 Register ResultReg = emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4698
4699 if (ResultReg) {
4700 updateValueMap(I, ResultReg);
4701 return true;
4702 }
4703 }
4704
4705 Register Src0Reg = getRegForValue(I->getOperand(0));
4706 if (!Src0Reg)
4707 return false;
4708
4709 Register Src1Reg = getRegForValue(I->getOperand(1));
4710 if (!Src1Reg)
4711 return false;
4712
4713 Register ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4714
4715 if (!ResultReg)
4716 return false;
4717
4718 updateValueMap(I, ResultReg);
4719 return true;
4720}
4721
4722bool AArch64FastISel::selectShift(const Instruction *I) {
4723 MVT RetVT;
4724 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4725 return false;
4726
4727 if (RetVT.isVector())
4728 return selectOperator(I, I->getOpcode());
4729
4730 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4731 Register ResultReg;
4732 uint64_t ShiftVal = C->getZExtValue();
4733 MVT SrcVT = RetVT;
4734 bool IsZExt = I->getOpcode() != Instruction::AShr;
4735 const Value *Op0 = I->getOperand(0);
4736 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4737 if (!isIntExtFree(ZExt)) {
4738 MVT TmpVT;
4739 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4740 SrcVT = TmpVT;
4741 IsZExt = true;
4742 Op0 = ZExt->getOperand(0);
4743 }
4744 }
4745 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4746 if (!isIntExtFree(SExt)) {
4747 MVT TmpVT;
4748 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4749 SrcVT = TmpVT;
4750 IsZExt = false;
4751 Op0 = SExt->getOperand(0);
4752 }
4753 }
4754 }
4755
4756 Register Op0Reg = getRegForValue(Op0);
4757 if (!Op0Reg)
4758 return false;
4759
4760 switch (I->getOpcode()) {
4761 default: llvm_unreachable("Unexpected instruction.");
4762 case Instruction::Shl:
4763 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4764 break;
4765 case Instruction::AShr:
4766 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4767 break;
4768 case Instruction::LShr:
4769 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4770 break;
4771 }
4772 if (!ResultReg)
4773 return false;
4774
4775 updateValueMap(I, ResultReg);
4776 return true;
4777 }
4778
4779 Register Op0Reg = getRegForValue(I->getOperand(0));
4780 if (!Op0Reg)
4781 return false;
4782
4783 Register Op1Reg = getRegForValue(I->getOperand(1));
4784 if (!Op1Reg)
4785 return false;
4786
4787 Register ResultReg;
4788 switch (I->getOpcode()) {
4789 default: llvm_unreachable("Unexpected instruction.");
4790 case Instruction::Shl:
4791 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4792 break;
4793 case Instruction::AShr:
4794 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4795 break;
4796 case Instruction::LShr:
4797 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4798 break;
4799 }
4800
4801 if (!ResultReg)
4802 return false;
4803
4804 updateValueMap(I, ResultReg);
4805 return true;
4806}
4807
4808bool AArch64FastISel::selectBitCast(const Instruction *I) {
4809 MVT RetVT, SrcVT;
4810
4811 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4812 return false;
4813 if (!isTypeLegal(I->getType(), RetVT))
4814 return false;
4815
4816 unsigned Opc;
4817 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4818 Opc = AArch64::FMOVWSr;
4819 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4820 Opc = AArch64::FMOVXDr;
4821 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4822 Opc = AArch64::FMOVSWr;
4823 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4824 Opc = AArch64::FMOVDXr;
4825 else
4826 return false;
4827
4828 const TargetRegisterClass *RC = nullptr;
4829 switch (RetVT.SimpleTy) {
4830 default: llvm_unreachable("Unexpected value type.");
4831 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4832 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4833 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4834 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4835 }
4836 Register Op0Reg = getRegForValue(I->getOperand(0));
4837 if (!Op0Reg)
4838 return false;
4839
4840 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4841 if (!ResultReg)
4842 return false;
4843
4844 updateValueMap(I, ResultReg);
4845 return true;
4846}
4847
4848bool AArch64FastISel::selectFRem(const Instruction *I) {
4849 MVT RetVT;
4850 if (!isTypeLegal(I->getType(), RetVT))
4851 return false;
4852
4853 RTLIB::LibcallImpl LCImpl =
4854 LibcallLowering->getLibcallImpl(RTLIB::getREM(RetVT));
4855 if (LCImpl == RTLIB::Unsupported)
4856 return false;
4857
4858 ArgListTy Args;
4859 Args.reserve(I->getNumOperands());
4860
4861 // Populate the argument list.
4862 for (auto &Arg : I->operands())
4863 Args.emplace_back(Arg);
4864
4865 CallLoweringInfo CLI;
4866 MCContext &Ctx = MF->getContext();
4867 CallingConv::ID CC = LibcallLowering->getLibcallImplCallingConv(LCImpl);
4868 StringRef FuncName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LCImpl);
4869
4870 CLI.setCallee(DL, Ctx, CC, I->getType(), FuncName, std::move(Args));
4871 if (!lowerCallTo(CLI))
4872 return false;
4873 updateValueMap(I, CLI.ResultReg);
4874 return true;
4875}
4876
4877bool AArch64FastISel::selectSDiv(const Instruction *I) {
4878 MVT VT;
4879 if (!isTypeLegal(I->getType(), VT))
4880 return false;
4881
4882 if (!isa<ConstantInt>(I->getOperand(1)))
4883 return selectBinaryOp(I, ISD::SDIV);
4884
4885 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4886 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4887 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4888 return selectBinaryOp(I, ISD::SDIV);
4889
4890 unsigned Lg2 = C.countr_zero();
4891 Register Src0Reg = getRegForValue(I->getOperand(0));
4892 if (!Src0Reg)
4893 return false;
4894
4895 if (cast<BinaryOperator>(I)->isExact()) {
4896 Register ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4897 if (!ResultReg)
4898 return false;
4899 updateValueMap(I, ResultReg);
4900 return true;
4901 }
4902
4903 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4904 Register AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4905 if (!AddReg)
4906 return false;
4907
4908 // (Src0 < 0) ? Pow2 - 1 : 0;
4909 if (!emitICmp_ri(VT, Src0Reg, 0))
4910 return false;
4911
4912 unsigned SelectOpc;
4913 const TargetRegisterClass *RC;
4914 if (VT == MVT::i64) {
4915 SelectOpc = AArch64::CSELXr;
4916 RC = &AArch64::GPR64RegClass;
4917 } else {
4918 SelectOpc = AArch64::CSELWr;
4919 RC = &AArch64::GPR32RegClass;
4920 }
4921 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4923 if (!SelectReg)
4924 return false;
4925
4926 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4927 // negate the result.
4928 Register ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4929 Register ResultReg;
4930 if (C.isNegative())
4931 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4932 AArch64_AM::ASR, Lg2);
4933 else
4934 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4935
4936 if (!ResultReg)
4937 return false;
4938
4939 updateValueMap(I, ResultReg);
4940 return true;
4941}
4942
4943/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4944/// have to duplicate it for AArch64, because otherwise we would fail during the
4945/// sign-extend emission.
4946Register AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4947 Register IdxN = getRegForValue(Idx);
4948 if (!IdxN)
4949 // Unhandled operand. Halt "fast" selection and bail.
4950 return Register();
4951
4952 // If the index is smaller or larger than intptr_t, truncate or extend it.
4953 MVT PtrVT = TLI.getPointerTy(DL);
4954 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4955 if (IdxVT.bitsLT(PtrVT)) {
4956 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4957 } else if (IdxVT.bitsGT(PtrVT))
4958 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4959 return IdxN;
4960}
4961
4962/// This is mostly a copy of the existing FastISel GEP code, but we have to
4963/// duplicate it for AArch64, because otherwise we would bail out even for
4964/// simple cases. This is because the standard fastEmit functions don't cover
4965/// MUL at all and ADD is lowered very inefficientily.
4966bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4967 if (Subtarget->isTargetILP32())
4968 return false;
4969
4970 Register N = getRegForValue(I->getOperand(0));
4971 if (!N)
4972 return false;
4973
4974 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4975 // into a single N = N + TotalOffset.
4976 uint64_t TotalOffs = 0;
4977 MVT VT = TLI.getPointerTy(DL);
4979 GTI != E; ++GTI) {
4980 const Value *Idx = GTI.getOperand();
4981 if (auto *StTy = GTI.getStructTypeOrNull()) {
4982 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4983 // N = N + Offset
4984 if (Field)
4985 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4986 } else {
4987 // If this is a constant subscript, handle it quickly.
4988 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4989 if (CI->isZero())
4990 continue;
4991 // N = N + Offset
4992 TotalOffs += GTI.getSequentialElementStride(DL) *
4993 cast<ConstantInt>(CI)->getSExtValue();
4994 continue;
4995 }
4996 if (TotalOffs) {
4997 N = emitAdd_ri_(VT, N, TotalOffs);
4998 if (!N)
4999 return false;
5000 TotalOffs = 0;
5001 }
5002
5003 // N = N + Idx * ElementSize;
5004 uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5005 Register IdxN = getRegForGEPIndex(Idx);
5006 if (!IdxN)
5007 return false;
5008
5009 if (ElementSize != 1) {
5010 Register C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5011 if (!C)
5012 return false;
5013 IdxN = emitMul_rr(VT, IdxN, C);
5014 if (!IdxN)
5015 return false;
5016 }
5017 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5018 if (!N)
5019 return false;
5020 }
5021 }
5022 if (TotalOffs) {
5023 N = emitAdd_ri_(VT, N, TotalOffs);
5024 if (!N)
5025 return false;
5026 }
5027 updateValueMap(I, N);
5028 return true;
5029}
5030
5031bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5032 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5033 "cmpxchg survived AtomicExpand at optlevel > -O0");
5034
5035 auto *RetPairTy = cast<StructType>(I->getType());
5036 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5037 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5038 "cmpxchg has a non-i1 status result");
5039
5040 MVT VT;
5041 if (!isTypeLegal(RetTy, VT))
5042 return false;
5043
5044 const TargetRegisterClass *ResRC;
5045 unsigned Opc, CmpOpc;
5046 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5047 // extractvalue selection doesn't support that.
5048 if (VT == MVT::i32) {
5049 Opc = AArch64::CMP_SWAP_32;
5050 CmpOpc = AArch64::SUBSWrs;
5051 ResRC = &AArch64::GPR32RegClass;
5052 } else if (VT == MVT::i64) {
5053 Opc = AArch64::CMP_SWAP_64;
5054 CmpOpc = AArch64::SUBSXrs;
5055 ResRC = &AArch64::GPR64RegClass;
5056 } else {
5057 return false;
5058 }
5059
5060 const MCInstrDesc &II = TII.get(Opc);
5061
5062 Register AddrReg = getRegForValue(I->getPointerOperand());
5063 Register DesiredReg = getRegForValue(I->getCompareOperand());
5064 Register NewReg = getRegForValue(I->getNewValOperand());
5065
5066 if (!AddrReg || !DesiredReg || !NewReg)
5067 return false;
5068
5069 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
5070 DesiredReg = constrainOperandRegClass(II, DesiredReg, II.getNumDefs() + 1);
5071 NewReg = constrainOperandRegClass(II, NewReg, II.getNumDefs() + 2);
5072
5073 const Register ResultReg1 = createResultReg(ResRC);
5074 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5075 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5076
5077 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5078 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5079 .addDef(ResultReg1)
5080 .addDef(ScratchReg)
5081 .addUse(AddrReg)
5082 .addUse(DesiredReg)
5083 .addUse(NewReg);
5084
5085 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5086 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5087 .addUse(ResultReg1)
5088 .addUse(DesiredReg)
5089 .addImm(0);
5090
5091 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5092 .addDef(ResultReg2)
5093 .addUse(AArch64::WZR)
5094 .addUse(AArch64::WZR)
5096
5097 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5098 updateValueMap(I, ResultReg1, 2);
5099 return true;
5100}
5101
5102bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5103 if (TLI.fallBackToDAGISel(*I))
5104 return false;
5105 switch (I->getOpcode()) {
5106 default:
5107 break;
5108 case Instruction::Add:
5109 case Instruction::Sub:
5110 return selectAddSub(I);
5111 case Instruction::Mul:
5112 return selectMul(I);
5113 case Instruction::SDiv:
5114 return selectSDiv(I);
5115 case Instruction::SRem:
5116 if (!selectBinaryOp(I, ISD::SREM))
5117 return selectRem(I, ISD::SREM);
5118 return true;
5119 case Instruction::URem:
5120 if (!selectBinaryOp(I, ISD::UREM))
5121 return selectRem(I, ISD::UREM);
5122 return true;
5123 case Instruction::Shl:
5124 case Instruction::LShr:
5125 case Instruction::AShr:
5126 return selectShift(I);
5127 case Instruction::And:
5128 case Instruction::Or:
5129 case Instruction::Xor:
5130 return selectLogicalOp(I);
5131 case Instruction::Br:
5132 return selectBranch(I);
5133 case Instruction::IndirectBr:
5134 return selectIndirectBr(I);
5135 case Instruction::BitCast:
5137 return selectBitCast(I);
5138 return true;
5139 case Instruction::FPToSI:
5140 if (!selectCast(I, ISD::FP_TO_SINT))
5141 return selectFPToInt(I, /*Signed=*/true);
5142 return true;
5143 case Instruction::FPToUI:
5144 return selectFPToInt(I, /*Signed=*/false);
5145 case Instruction::ZExt:
5146 case Instruction::SExt:
5147 return selectIntExt(I);
5148 case Instruction::Trunc:
5149 if (!selectCast(I, ISD::TRUNCATE))
5150 return selectTrunc(I);
5151 return true;
5152 case Instruction::FPExt:
5153 return selectFPExt(I);
5154 case Instruction::FPTrunc:
5155 return selectFPTrunc(I);
5156 case Instruction::SIToFP:
5157 if (!selectCast(I, ISD::SINT_TO_FP))
5158 return selectIntToFP(I, /*Signed=*/true);
5159 return true;
5160 case Instruction::UIToFP:
5161 return selectIntToFP(I, /*Signed=*/false);
5162 case Instruction::Load:
5163 return selectLoad(I);
5164 case Instruction::Store:
5165 return selectStore(I);
5166 case Instruction::FCmp:
5167 case Instruction::ICmp:
5168 return selectCmp(I);
5169 case Instruction::Select:
5170 return selectSelect(I);
5171 case Instruction::Ret:
5172 return selectRet(I);
5173 case Instruction::FRem:
5174 return selectFRem(I);
5175 case Instruction::GetElementPtr:
5176 return selectGetElementPtr(I);
5177 case Instruction::AtomicCmpXchg:
5178 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5179 }
5180
5181 // fall-back to target-independent instruction selection.
5182 return selectOperator(I, I->getOpcode());
5183}
5184
5186 const TargetLibraryInfo *LibInfo,
5187 const LibcallLoweringInfo *LibcallLowering) {
5188
5189 SMEAttrs CallerAttrs =
5190 FuncInfo.MF->getInfo<AArch64FunctionInfo>()->getSMEFnAttrs();
5191 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5192 CallerAttrs.hasStreamingInterfaceOrBody() ||
5193 CallerAttrs.hasStreamingCompatibleInterface() ||
5194 CallerAttrs.hasAgnosticZAInterface())
5195 return nullptr;
5196 return new AArch64FastISel(FuncInfo, LibInfo, LibcallLowering);
5197}
unsigned const MachineRegisterInfo * MRI
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
static bool isSExtLoad(const MachineInstr *LI)
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
static bool isMulPowOf2(const Value *I)
Check if the multiply is by a power-of-2 constant.
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type.
static bool isZExtLoad(const MachineInstr *LI)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec)
Emit a load-pair instruction for frame-destroy.
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec)
Emit a store-pair instruction for frame-setup.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
SI Pre allocate WWM Registers
This file defines the SmallVector class.
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
static const unsigned FramePtr
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAnyArgRegReserved(const MachineFunction &MF) const
void emitReservedArgRegCallError(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
const AArch64RegisterInfo * getRegisterInfo() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool hasCustomCallingConv() const
PointerType * getType() const
Overload to return most specific pointer type.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Register getLocReg() const
LocInfo getLocInfo() const
unsigned getValNo() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isUnsigned() const
Definition InstrTypes.h:936
const APFloat & getValueAPF() const
Definition Constants.h:325
bool isNegative() const
Return true if the sign bit is set.
Definition Constants.h:332
bool isZero() const
Return true if the value is positive or negative zero.
Definition Constants.h:329
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
bool selectBitCast(const User *I)
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
PointerType * getType() const
Global values are always pointers.
iterator_range< succ_op_iterator > successors()
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Tracks which library functions to use for a particular subtarget.
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
Value * getLength() const
unsigned getDestAddressSpace() const
bool isVolatile() const
constexpr unsigned id() const
Definition Register.h:100
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingCompatibleInterface() const
bool hasAgnosticZAInterface() const
bool hasStreamingInterfaceOrBody() const
void reserve(size_type N)
void push_back(const T &Elt)
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:754
Provides information about what library functions are available for the current target.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition Type.h:264
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
LLVM_ABI Libcall getREM(EVT VT)
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:56
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
bool CC_AArch64_Win64PCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
gep_type_iterator gep_type_end(const User *GEP)
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
generic_gep_type_iterator<> gep_type_iterator
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.