LLVM 23.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
18#include "AArch64RegisterInfo.h"
20#include "AArch64Subtarget.h"
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
41#include "llvm/IR/Argument.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/Constants.h"
47#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/User.h"
62#include "llvm/IR/Value.h"
63#include "llvm/MC/MCInstrDesc.h"
64#include "llvm/MC/MCSymbol.h"
71#include <algorithm>
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <utility>
76
77using namespace llvm;
78
79namespace {
80
81class AArch64FastISel final : public FastISel {
82 class Address {
83 public:
84 enum BaseKind { RegBase, FrameIndexBase };
85
86 private:
87 BaseKind Kind = RegBase;
89 union {
90 unsigned Reg;
91 int FI;
92 } Base;
93 Register OffsetReg;
94 unsigned Shift = 0;
95 int64_t Offset = 0;
96 const GlobalValue *GV = nullptr;
97
98 public:
99 Address() { Base.Reg = 0; }
100
101 void setKind(BaseKind K) { Kind = K; }
102 BaseKind getKind() const { return Kind; }
103 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
104 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
105 bool isRegBase() const { return Kind == RegBase; }
106 bool isFIBase() const { return Kind == FrameIndexBase; }
107
108 void setReg(Register Reg) {
109 assert(isRegBase() && "Invalid base register access!");
110 Base.Reg = Reg.id();
111 }
112
113 Register getReg() const {
114 assert(isRegBase() && "Invalid base register access!");
115 return Base.Reg;
116 }
117
118 void setOffsetReg(Register Reg) { OffsetReg = Reg; }
119
120 Register getOffsetReg() const { return OffsetReg; }
121
122 void setFI(unsigned FI) {
123 assert(isFIBase() && "Invalid base frame index access!");
124 Base.FI = FI;
125 }
126
127 unsigned getFI() const {
128 assert(isFIBase() && "Invalid base frame index access!");
129 return Base.FI;
130 }
131
132 void setOffset(int64_t O) { Offset = O; }
133 int64_t getOffset() { return Offset; }
134 void setShift(unsigned S) { Shift = S; }
135 unsigned getShift() { return Shift; }
136
137 void setGlobalValue(const GlobalValue *G) { GV = G; }
138 const GlobalValue *getGlobalValue() { return GV; }
139 };
140
141 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
142 /// make the right decision when generating code for different targets.
143 const AArch64Subtarget *Subtarget;
144 LLVMContext *Context;
145
146 bool fastLowerArguments() override;
147 bool fastLowerCall(CallLoweringInfo &CLI) override;
148 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
149
150private:
151 // Selection routines.
152 bool selectAddSub(const Instruction *I);
153 bool selectLogicalOp(const Instruction *I);
154 bool selectLoad(const Instruction *I);
155 bool selectStore(const Instruction *I);
156 bool selectBranch(const Instruction *I);
157 bool selectIndirectBr(const Instruction *I);
158 bool selectCmp(const Instruction *I);
159 bool selectSelect(const Instruction *I);
160 bool selectFPExt(const Instruction *I);
161 bool selectFPTrunc(const Instruction *I);
162 bool selectFPToInt(const Instruction *I, bool Signed);
163 bool selectIntToFP(const Instruction *I, bool Signed);
164 bool selectRem(const Instruction *I, unsigned ISDOpcode);
165 bool selectRet(const Instruction *I);
166 bool selectTrunc(const Instruction *I);
167 bool selectIntExt(const Instruction *I);
168 bool selectMul(const Instruction *I);
169 bool selectShift(const Instruction *I);
170 bool selectBitCast(const Instruction *I);
171 bool selectFRem(const Instruction *I);
172 bool selectSDiv(const Instruction *I);
173 bool selectGetElementPtr(const Instruction *I);
174 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
175
176 // Utility helper routines.
177 bool isTypeLegal(Type *Ty, MVT &VT);
178 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
179 bool isValueAvailable(const Value *V) const;
180 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
181 bool computeCallAddress(const Value *V, Address &Addr);
182 bool simplifyAddress(Address &Addr, MVT VT);
183 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
185 unsigned ScaleFactor, MachineMemOperand *MMO);
186 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
187 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
188 MaybeAlign Alignment);
189 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
190 const Value *Cond);
191 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
192 bool optimizeSelect(const SelectInst *SI);
193 Register getRegForGEPIndex(const Value *Idx);
194
195 // Emit helper routines.
196 Register emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
197 const Value *RHS, bool SetFlags = false,
198 bool WantResult = true, bool IsZExt = false);
199 Register emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
200 Register RHSReg, bool SetFlags = false,
201 bool WantResult = true);
202 Register emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg, uint64_t Imm,
203 bool SetFlags = false, bool WantResult = true);
204 Register emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
205 Register RHSReg, AArch64_AM::ShiftExtendType ShiftType,
206 uint64_t ShiftImm, bool SetFlags = false,
207 bool WantResult = true);
208 Register emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
210 uint64_t ShiftImm, bool SetFlags = false,
211 bool WantResult = true);
212
213 // Emit functions.
214 bool emitCompareAndBranch(const BranchInst *BI);
215 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
216 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
217 bool emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
218 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
219 Register emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
220 MachineMemOperand *MMO = nullptr);
221 bool emitStore(MVT VT, Register SrcReg, Address Addr,
222 MachineMemOperand *MMO = nullptr);
223 bool emitStoreRelease(MVT VT, Register SrcReg, Register AddrReg,
224 MachineMemOperand *MMO = nullptr);
225 Register emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, bool isZExt);
226 Register emiti1Ext(Register SrcReg, MVT DestVT, bool isZExt);
227 Register emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
228 bool SetFlags = false, bool WantResult = true,
229 bool IsZExt = false);
230 Register emitAdd_ri_(MVT VT, Register Op0, int64_t Imm);
231 Register emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
232 bool SetFlags = false, bool WantResult = true,
233 bool IsZExt = false);
234 Register emitSubs_rr(MVT RetVT, Register LHSReg, Register RHSReg,
235 bool WantResult = true);
236 Register emitSubs_rs(MVT RetVT, Register LHSReg, Register RHSReg,
237 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
238 bool WantResult = true);
239 Register emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
240 const Value *RHS);
241 Register emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, Register LHSReg,
242 uint64_t Imm);
243 Register emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, Register LHSReg,
244 Register RHSReg, uint64_t ShiftImm);
245 Register emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
246 Register emitMul_rr(MVT RetVT, Register Op0, Register Op1);
247 Register emitSMULL_rr(MVT RetVT, Register Op0, Register Op1);
248 Register emitUMULL_rr(MVT RetVT, Register Op0, Register Op1);
249 Register emitLSL_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
250 Register emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
251 bool IsZExt = true);
252 Register emitLSR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
253 Register emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
254 bool IsZExt = true);
255 Register emitASR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
256 Register emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
257 bool IsZExt = false);
258
259 Register materializeInt(const ConstantInt *CI, MVT VT);
260 Register materializeFP(const ConstantFP *CFP, MVT VT);
261 Register materializeGV(const GlobalValue *GV);
262
263 // Call handling routines.
264private:
265 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
266 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
267 SmallVectorImpl<Type *> &OrigTys, unsigned &NumBytes);
268 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
269
270public:
271 // Backend specific FastISel code.
272 Register fastMaterializeAlloca(const AllocaInst *AI) override;
273 Register fastMaterializeConstant(const Constant *C) override;
274 Register fastMaterializeFloatZero(const ConstantFP *CF) override;
275
276 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
277 const TargetLibraryInfo *LibInfo,
278 const LibcallLoweringInfo *libcallLowering)
279 : FastISel(FuncInfo, LibInfo, libcallLowering,
280 /*SkipTargetIndependentISel=*/true) {
281 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
282 Context = &FuncInfo.Fn->getContext();
283 }
284
285 bool fastSelectInstruction(const Instruction *I) override;
286
287#include "AArch64GenFastISel.inc"
288};
289
290} // end anonymous namespace
291
292/// Check if the sign-/zero-extend will be a noop.
293static bool isIntExtFree(const Instruction *I) {
295 "Unexpected integer extend instruction.");
296 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
297 "Unexpected value type.");
298 bool IsZExt = isa<ZExtInst>(I);
299
300 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
301 if (LI->hasOneUse())
302 return true;
303
304 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
305 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
306 return true;
307
308 return false;
309}
310
311/// Determine the implicit scale factor that is applied by a memory
312/// operation for a given value type.
313static unsigned getImplicitScaleFactor(MVT VT) {
314 switch (VT.SimpleTy) {
315 default:
316 return 0; // invalid
317 case MVT::i1: // fall-through
318 case MVT::i8:
319 return 1;
320 case MVT::i16:
321 return 2;
322 case MVT::i32: // fall-through
323 case MVT::f32:
324 return 4;
325 case MVT::i64: // fall-through
326 case MVT::f64:
327 return 8;
328 }
329}
330
331CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
332 if (CC == CallingConv::GHC)
333 return CC_AArch64_GHC;
334 if (CC == CallingConv::CFGuard_Check)
336 if (Subtarget->isTargetDarwin())
338 if (Subtarget->isTargetWindows())
339 return CC_AArch64_Win64PCS;
340 return CC_AArch64_AAPCS;
341}
342
343Register AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
344 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
345 "Alloca should always return a pointer.");
346
347 // Don't handle dynamic allocas.
348 auto SI = FuncInfo.StaticAllocaMap.find(AI);
349 if (SI == FuncInfo.StaticAllocaMap.end())
350 return Register();
351
352 if (SI != FuncInfo.StaticAllocaMap.end()) {
353 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
354 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
355 ResultReg)
356 .addFrameIndex(SI->second)
357 .addImm(0)
358 .addImm(0);
359 return ResultReg;
360 }
361
362 return Register();
363}
364
365Register AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
366 if (VT > MVT::i64)
367 return Register();
368
369 if (!CI->isZero())
370 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
371
372 // Create a copy from the zero register to materialize a "0" value.
373 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
374 : &AArch64::GPR32RegClass;
375 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
376 Register ResultReg = createResultReg(RC);
377 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
378 ResultReg).addReg(ZeroReg, getKillRegState(true));
379 return ResultReg;
380}
381
382Register AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
383 // Positive zero (+0.0) has to be materialized with a fmov from the zero
384 // register, because the immediate version of fmov cannot encode zero.
385 if (CFP->isNullValue())
386 return fastMaterializeFloatZero(CFP);
387
388 if (VT != MVT::f32 && VT != MVT::f64)
389 return Register();
390
391 const APFloat Val = CFP->getValueAPF();
392 bool Is64Bit = (VT == MVT::f64);
393 // This checks to see if we can use FMOV instructions to materialize
394 // a constant, otherwise we have to materialize via the constant pool.
395 int Imm =
396 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
397 if (Imm != -1) {
398 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
399 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
400 }
401
402 // For the large code model materialize the FP constant in code.
403 if (TM.getCodeModel() == CodeModel::Large) {
404 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
405 const TargetRegisterClass *RC = Is64Bit ?
406 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
407
408 Register TmpReg = createResultReg(RC);
409 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
410 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
411
412 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
413 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
414 TII.get(TargetOpcode::COPY), ResultReg)
415 .addReg(TmpReg, getKillRegState(true));
416
417 return ResultReg;
418 }
419
420 // Materialize via constant pool. MachineConstantPool wants an explicit
421 // alignment.
422 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
423
424 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
425 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
426 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
428
429 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
430 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
431 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
432 .addReg(ADRPReg)
434 return ResultReg;
435}
436
437Register AArch64FastISel::materializeGV(const GlobalValue *GV) {
438 // We can't handle thread-local variables quickly yet.
439 if (GV->isThreadLocal())
440 return Register();
441
442 // MachO still uses GOT for large code-model accesses, but ELF requires
443 // movz/movk sequences, which FastISel doesn't handle yet.
444 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
445 return Register();
446
447 if (FuncInfo.MF->getInfo<AArch64FunctionInfo>()->hasELFSignedGOT())
448 return Register();
449
450 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
451
452 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
453 if (!DestEVT.isSimple())
454 return Register();
455
456 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
457 Register ResultReg;
458
459 if (OpFlags & AArch64II::MO_GOT) {
460 // ADRP + LDRX
461 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
462 ADRPReg)
463 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
464
465 unsigned LdrOpc;
466 if (Subtarget->isTargetILP32()) {
467 ResultReg = createResultReg(&AArch64::GPR32RegClass);
468 LdrOpc = AArch64::LDRWui;
469 } else {
470 ResultReg = createResultReg(&AArch64::GPR64RegClass);
471 LdrOpc = AArch64::LDRXui;
472 }
473 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
474 ResultReg)
475 .addReg(ADRPReg)
477 AArch64II::MO_NC | OpFlags);
478 if (!Subtarget->isTargetILP32())
479 return ResultReg;
480
481 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
482 // so we must extend the result on ILP32.
483 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
484 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
485 TII.get(TargetOpcode::SUBREG_TO_REG))
486 .addDef(Result64)
487 .addImm(0)
488 .addReg(ResultReg, RegState::Kill)
489 .addImm(AArch64::sub_32);
490 return Result64;
491 } else {
492 // ADRP + ADDX
493 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
494 ADRPReg)
495 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
496
497 if (OpFlags & AArch64II::MO_TAGGED) {
498 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
499 // We do so by creating a MOVK that sets bits 48-63 of the register to
500 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
501 // the small code model so we can assume a binary size of <= 4GB, which
502 // makes the untagged PC relative offset positive. The binary must also be
503 // loaded into address range [0, 2^48). Both of these properties need to
504 // be ensured at runtime when using tagged addresses.
505 //
506 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
507 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
508 // are not exactly 1:1 with FastISel so we cannot easily abstract this
509 // out. At some point, it would be nice to find a way to not have this
510 // duplicate code.
511 Register DstReg = createResultReg(&AArch64::GPR64commonRegClass);
512 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
513 DstReg)
514 .addReg(ADRPReg)
515 .addGlobalAddress(GV, /*Offset=*/0x100000000,
517 .addImm(48);
518 ADRPReg = DstReg;
519 }
520
521 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
522 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
523 ResultReg)
524 .addReg(ADRPReg)
525 .addGlobalAddress(GV, 0,
527 .addImm(0);
528 }
529 return ResultReg;
530}
531
532Register AArch64FastISel::fastMaterializeConstant(const Constant *C) {
533 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
534
535 // Only handle simple types.
536 if (!CEVT.isSimple())
537 return Register();
538 MVT VT = CEVT.getSimpleVT();
539 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
540 // 'null' pointers need to have a somewhat special treatment.
542 assert(VT == MVT::i64 && "Expected 64-bit pointers");
543 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
544 }
545
546 if (const auto *CI = dyn_cast<ConstantInt>(C))
547 return materializeInt(CI, VT);
548 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
549 return materializeFP(CFP, VT);
550 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
551 return materializeGV(GV);
552
553 return Register();
554}
555
556Register AArch64FastISel::fastMaterializeFloatZero(const ConstantFP *CFP) {
557 assert(CFP->isNullValue() &&
558 "Floating-point constant is not a positive zero.");
559 MVT VT;
560 if (!isTypeLegal(CFP->getType(), VT))
561 return Register();
562
563 if (VT != MVT::f32 && VT != MVT::f64)
564 return Register();
565
566 bool Is64Bit = (VT == MVT::f64);
567 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
568 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
569 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
570}
571
572/// Check if the multiply is by a power-of-2 constant.
573static bool isMulPowOf2(const Value *I) {
574 if (const auto *MI = dyn_cast<MulOperator>(I)) {
575 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
576 if (C->getValue().isPowerOf2())
577 return true;
578 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
579 if (C->getValue().isPowerOf2())
580 return true;
581 }
582 return false;
583}
584
585// Computes the address to get to an object.
586bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
587{
588 const User *U = nullptr;
589 unsigned Opcode = Instruction::UserOp1;
590 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
591 // Don't walk into other basic blocks unless the object is an alloca from
592 // another block, otherwise it may not have a virtual register assigned.
593 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
594 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
595 Opcode = I->getOpcode();
596 U = I;
597 }
598 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
599 Opcode = C->getOpcode();
600 U = C;
601 }
602
603 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
604 if (Ty->getAddressSpace() > 255)
605 // Fast instruction selection doesn't support the special
606 // address spaces.
607 return false;
608
609 switch (Opcode) {
610 default:
611 break;
612 case Instruction::BitCast:
613 // Look through bitcasts.
614 return computeAddress(U->getOperand(0), Addr, Ty);
615
616 case Instruction::IntToPtr:
617 // Look past no-op inttoptrs.
618 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
619 TLI.getPointerTy(DL))
620 return computeAddress(U->getOperand(0), Addr, Ty);
621 break;
622
623 case Instruction::PtrToInt:
624 // Look past no-op ptrtoints.
625 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
626 return computeAddress(U->getOperand(0), Addr, Ty);
627 break;
628
629 case Instruction::GetElementPtr: {
630 Address SavedAddr = Addr;
631 uint64_t TmpOffset = Addr.getOffset();
632
633 // Iterate through the GEP folding the constants into offsets where
634 // we can.
636 GTI != E; ++GTI) {
637 const Value *Op = GTI.getOperand();
638 if (StructType *STy = GTI.getStructTypeOrNull()) {
639 const StructLayout *SL = DL.getStructLayout(STy);
640 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
641 TmpOffset += SL->getElementOffset(Idx);
642 } else {
643 uint64_t S = GTI.getSequentialElementStride(DL);
644 while (true) {
645 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
646 // Constant-offset addressing.
647 TmpOffset += CI->getSExtValue() * S;
648 break;
649 }
650 if (canFoldAddIntoGEP(U, Op)) {
651 // A compatible add with a constant operand. Fold the constant.
652 ConstantInt *CI =
653 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
654 TmpOffset += CI->getSExtValue() * S;
655 // Iterate on the other operand.
656 Op = cast<AddOperator>(Op)->getOperand(0);
657 continue;
658 }
659 // Unsupported
660 goto unsupported_gep;
661 }
662 }
663 }
664
665 // Try to grab the base operand now.
666 Addr.setOffset(TmpOffset);
667 if (computeAddress(U->getOperand(0), Addr, Ty))
668 return true;
669
670 // We failed, restore everything and try the other options.
671 Addr = SavedAddr;
672
673 unsupported_gep:
674 break;
675 }
676 case Instruction::Alloca: {
677 const AllocaInst *AI = cast<AllocaInst>(Obj);
678 DenseMap<const AllocaInst *, int>::iterator SI =
679 FuncInfo.StaticAllocaMap.find(AI);
680 if (SI != FuncInfo.StaticAllocaMap.end()) {
681 Addr.setKind(Address::FrameIndexBase);
682 Addr.setFI(SI->second);
683 return true;
684 }
685 break;
686 }
687 case Instruction::Add: {
688 // Adds of constants are common and easy enough.
689 const Value *LHS = U->getOperand(0);
690 const Value *RHS = U->getOperand(1);
691
693 std::swap(LHS, RHS);
694
695 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
696 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
697 return computeAddress(LHS, Addr, Ty);
698 }
699
700 Address Backup = Addr;
701 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
702 return true;
703 Addr = Backup;
704
705 break;
706 }
707 case Instruction::Sub: {
708 // Subs of constants are common and easy enough.
709 const Value *LHS = U->getOperand(0);
710 const Value *RHS = U->getOperand(1);
711
712 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
713 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
714 return computeAddress(LHS, Addr, Ty);
715 }
716 break;
717 }
718 case Instruction::Shl: {
719 if (Addr.getOffsetReg())
720 break;
721
722 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
723 if (!CI)
724 break;
725
726 unsigned Val = CI->getZExtValue();
727 if (Val < 1 || Val > 3)
728 break;
729
730 uint64_t NumBytes = 0;
731 if (Ty && Ty->isSized()) {
732 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
733 NumBytes = NumBits / 8;
734 if (!isPowerOf2_64(NumBits))
735 NumBytes = 0;
736 }
737
738 if (NumBytes != (1ULL << Val))
739 break;
740
741 Addr.setShift(Val);
742 Addr.setExtendType(AArch64_AM::LSL);
743
744 const Value *Src = U->getOperand(0);
745 if (const auto *I = dyn_cast<Instruction>(Src)) {
746 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
747 // Fold the zext or sext when it won't become a noop.
748 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
749 if (!isIntExtFree(ZE) &&
750 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
751 Addr.setExtendType(AArch64_AM::UXTW);
752 Src = ZE->getOperand(0);
753 }
754 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
755 if (!isIntExtFree(SE) &&
756 SE->getOperand(0)->getType()->isIntegerTy(32)) {
757 Addr.setExtendType(AArch64_AM::SXTW);
758 Src = SE->getOperand(0);
759 }
760 }
761 }
762 }
763
764 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
765 if (AI->getOpcode() == Instruction::And) {
766 const Value *LHS = AI->getOperand(0);
767 const Value *RHS = AI->getOperand(1);
768
769 if (const auto *C = dyn_cast<ConstantInt>(LHS))
770 if (C->getValue() == 0xffffffff)
771 std::swap(LHS, RHS);
772
773 if (const auto *C = dyn_cast<ConstantInt>(RHS))
774 if (C->getValue() == 0xffffffff) {
775 Addr.setExtendType(AArch64_AM::UXTW);
776 Register Reg = getRegForValue(LHS);
777 if (!Reg)
778 return false;
779 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
780 Addr.setOffsetReg(Reg);
781 return true;
782 }
783 }
784
785 Register Reg = getRegForValue(Src);
786 if (!Reg)
787 return false;
788 Addr.setOffsetReg(Reg);
789 return true;
790 }
791 case Instruction::Mul: {
792 if (Addr.getOffsetReg())
793 break;
794
795 if (!isMulPowOf2(U))
796 break;
797
798 const Value *LHS = U->getOperand(0);
799 const Value *RHS = U->getOperand(1);
800
801 // Canonicalize power-of-2 value to the RHS.
802 if (const auto *C = dyn_cast<ConstantInt>(LHS))
803 if (C->getValue().isPowerOf2())
804 std::swap(LHS, RHS);
805
806 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
807 const auto *C = cast<ConstantInt>(RHS);
808 unsigned Val = C->getValue().logBase2();
809 if (Val < 1 || Val > 3)
810 break;
811
812 uint64_t NumBytes = 0;
813 if (Ty && Ty->isSized()) {
814 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
815 NumBytes = NumBits / 8;
816 if (!isPowerOf2_64(NumBits))
817 NumBytes = 0;
818 }
819
820 if (NumBytes != (1ULL << Val))
821 break;
822
823 Addr.setShift(Val);
824 Addr.setExtendType(AArch64_AM::LSL);
825
826 const Value *Src = LHS;
827 if (const auto *I = dyn_cast<Instruction>(Src)) {
828 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
829 // Fold the zext or sext when it won't become a noop.
830 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
831 if (!isIntExtFree(ZE) &&
832 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
833 Addr.setExtendType(AArch64_AM::UXTW);
834 Src = ZE->getOperand(0);
835 }
836 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
837 if (!isIntExtFree(SE) &&
838 SE->getOperand(0)->getType()->isIntegerTy(32)) {
839 Addr.setExtendType(AArch64_AM::SXTW);
840 Src = SE->getOperand(0);
841 }
842 }
843 }
844 }
845
846 Register Reg = getRegForValue(Src);
847 if (!Reg)
848 return false;
849 Addr.setOffsetReg(Reg);
850 return true;
851 }
852 case Instruction::And: {
853 if (Addr.getOffsetReg())
854 break;
855
856 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
857 break;
858
859 const Value *LHS = U->getOperand(0);
860 const Value *RHS = U->getOperand(1);
861
862 if (const auto *C = dyn_cast<ConstantInt>(LHS))
863 if (C->getValue() == 0xffffffff)
864 std::swap(LHS, RHS);
865
866 if (const auto *C = dyn_cast<ConstantInt>(RHS))
867 if (C->getValue() == 0xffffffff) {
868 Addr.setShift(0);
869 Addr.setExtendType(AArch64_AM::LSL);
870 Addr.setExtendType(AArch64_AM::UXTW);
871
872 Register Reg = getRegForValue(LHS);
873 if (!Reg)
874 return false;
875 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
876 Addr.setOffsetReg(Reg);
877 return true;
878 }
879 break;
880 }
881 case Instruction::SExt:
882 case Instruction::ZExt: {
883 if (!Addr.getReg() || Addr.getOffsetReg())
884 break;
885
886 const Value *Src = nullptr;
887 // Fold the zext or sext when it won't become a noop.
888 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
889 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
890 Addr.setExtendType(AArch64_AM::UXTW);
891 Src = ZE->getOperand(0);
892 }
893 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
894 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
895 Addr.setExtendType(AArch64_AM::SXTW);
896 Src = SE->getOperand(0);
897 }
898 }
899
900 if (!Src)
901 break;
902
903 Addr.setShift(0);
904 Register Reg = getRegForValue(Src);
905 if (!Reg)
906 return false;
907 Addr.setOffsetReg(Reg);
908 return true;
909 }
910 } // end switch
911
912 if (Addr.isRegBase() && !Addr.getReg()) {
913 Register Reg = getRegForValue(Obj);
914 if (!Reg)
915 return false;
916 Addr.setReg(Reg);
917 return true;
918 }
919
920 if (!Addr.getOffsetReg()) {
921 Register Reg = getRegForValue(Obj);
922 if (!Reg)
923 return false;
924 Addr.setOffsetReg(Reg);
925 return true;
926 }
927
928 return false;
929}
930
931bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
932 const User *U = nullptr;
933 unsigned Opcode = Instruction::UserOp1;
934 bool InMBB = true;
935
936 if (const auto *I = dyn_cast<Instruction>(V)) {
937 Opcode = I->getOpcode();
938 U = I;
939 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
940 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
941 Opcode = C->getOpcode();
942 U = C;
943 }
944
945 switch (Opcode) {
946 default: break;
947 case Instruction::BitCast:
948 // Look past bitcasts if its operand is in the same BB.
949 if (InMBB)
950 return computeCallAddress(U->getOperand(0), Addr);
951 break;
952 case Instruction::IntToPtr:
953 // Look past no-op inttoptrs if its operand is in the same BB.
954 if (InMBB &&
955 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
956 TLI.getPointerTy(DL))
957 return computeCallAddress(U->getOperand(0), Addr);
958 break;
959 case Instruction::PtrToInt:
960 // Look past no-op ptrtoints if its operand is in the same BB.
961 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
962 return computeCallAddress(U->getOperand(0), Addr);
963 break;
964 }
965
966 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
967 Addr.setGlobalValue(GV);
968 return true;
969 }
970
971 // If all else fails, try to materialize the value in a register.
972 if (!Addr.getGlobalValue()) {
973 Addr.setReg(getRegForValue(V));
974 return Addr.getReg().isValid();
975 }
976
977 return false;
978}
979
980bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
981 EVT evt = TLI.getValueType(DL, Ty, true);
982
983 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
984 return false;
985
986 // Only handle simple types.
987 if (evt == MVT::Other || !evt.isSimple())
988 return false;
989 VT = evt.getSimpleVT();
990
991 // This is a legal type, but it's not something we handle in fast-isel.
992 if (VT == MVT::f128)
993 return false;
994
995 // Handle all other legal types, i.e. a register that will directly hold this
996 // value.
997 return TLI.isTypeLegal(VT);
998}
999
1000/// Determine if the value type is supported by FastISel.
1001///
1002/// FastISel for AArch64 can handle more value types than are legal. This adds
1003/// simple value type such as i1, i8, and i16.
1004bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1005 if (Ty->isVectorTy() && !IsVectorAllowed)
1006 return false;
1007
1008 if (isTypeLegal(Ty, VT))
1009 return true;
1010
1011 // If this is a type than can be sign or zero-extended to a basic operation
1012 // go ahead and accept it now.
1013 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1014 return true;
1015
1016 return false;
1017}
1018
1019bool AArch64FastISel::isValueAvailable(const Value *V) const {
1020 if (!isa<Instruction>(V))
1021 return true;
1022
1023 const auto *I = cast<Instruction>(V);
1024 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
1025}
1026
1027bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1028 if (Subtarget->isTargetILP32())
1029 return false;
1030
1031 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1032 if (!ScaleFactor)
1033 return false;
1034
1035 bool ImmediateOffsetNeedsLowering = false;
1036 bool RegisterOffsetNeedsLowering = false;
1037 int64_t Offset = Addr.getOffset();
1038 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1039 ImmediateOffsetNeedsLowering = true;
1040 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1041 !isUInt<12>(Offset / ScaleFactor))
1042 ImmediateOffsetNeedsLowering = true;
1043
1044 // Cannot encode an offset register and an immediate offset in the same
1045 // instruction. Fold the immediate offset into the load/store instruction and
1046 // emit an additional add to take care of the offset register.
1047 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1048 RegisterOffsetNeedsLowering = true;
1049
1050 // Cannot encode zero register as base.
1051 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1052 RegisterOffsetNeedsLowering = true;
1053
1054 // If this is a stack pointer and the offset needs to be simplified then put
1055 // the alloca address into a register, set the base type back to register and
1056 // continue. This should almost never happen.
1057 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1058 {
1059 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1060 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1061 ResultReg)
1062 .addFrameIndex(Addr.getFI())
1063 .addImm(0)
1064 .addImm(0);
1065 Addr.setKind(Address::RegBase);
1066 Addr.setReg(ResultReg);
1067 }
1068
1069 if (RegisterOffsetNeedsLowering) {
1070 Register ResultReg;
1071 if (Addr.getReg()) {
1072 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1073 Addr.getExtendType() == AArch64_AM::UXTW )
1074 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1075 Addr.getOffsetReg(), Addr.getExtendType(),
1076 Addr.getShift());
1077 else
1078 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1079 Addr.getOffsetReg(), AArch64_AM::LSL,
1080 Addr.getShift());
1081 } else {
1082 if (Addr.getExtendType() == AArch64_AM::UXTW)
1083 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1084 Addr.getShift(), /*IsZExt=*/true);
1085 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1086 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1087 Addr.getShift(), /*IsZExt=*/false);
1088 else
1089 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1090 Addr.getShift());
1091 }
1092 if (!ResultReg)
1093 return false;
1094
1095 Addr.setReg(ResultReg);
1096 Addr.setOffsetReg(0);
1097 Addr.setShift(0);
1098 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1099 }
1100
1101 // Since the offset is too large for the load/store instruction get the
1102 // reg+offset into a register.
1103 if (ImmediateOffsetNeedsLowering) {
1104 Register ResultReg;
1105 if (Addr.getReg())
1106 // Try to fold the immediate into the add instruction.
1107 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1108 else
1109 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1110
1111 if (!ResultReg)
1112 return false;
1113 Addr.setReg(ResultReg);
1114 Addr.setOffset(0);
1115 }
1116 return true;
1117}
1118
1119void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1120 const MachineInstrBuilder &MIB,
1122 unsigned ScaleFactor,
1123 MachineMemOperand *MMO) {
1124 int64_t Offset = Addr.getOffset() / ScaleFactor;
1125 // Frame base works a bit differently. Handle it separately.
1126 if (Addr.isFIBase()) {
1127 int FI = Addr.getFI();
1128 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1129 // and alignment should be based on the VT.
1130 MMO = FuncInfo.MF->getMachineMemOperand(
1131 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1132 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1133 // Now add the rest of the operands.
1134 MIB.addFrameIndex(FI).addImm(Offset);
1135 } else {
1136 assert(Addr.isRegBase() && "Unexpected address kind.");
1137 const MCInstrDesc &II = MIB->getDesc();
1138 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1139 Addr.setReg(
1140 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1141 Addr.setOffsetReg(
1142 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1143 if (Addr.getOffsetReg()) {
1144 assert(Addr.getOffset() == 0 && "Unexpected offset");
1145 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1146 Addr.getExtendType() == AArch64_AM::SXTX;
1147 MIB.addReg(Addr.getReg());
1148 MIB.addReg(Addr.getOffsetReg());
1149 MIB.addImm(IsSigned);
1150 MIB.addImm(Addr.getShift() != 0);
1151 } else
1152 MIB.addReg(Addr.getReg()).addImm(Offset);
1153 }
1154
1155 if (MMO)
1156 MIB.addMemOperand(MMO);
1157}
1158
1159Register AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1160 const Value *RHS, bool SetFlags,
1161 bool WantResult, bool IsZExt) {
1163 bool NeedExtend = false;
1164 switch (RetVT.SimpleTy) {
1165 default:
1166 return Register();
1167 case MVT::i1:
1168 NeedExtend = true;
1169 break;
1170 case MVT::i8:
1171 NeedExtend = true;
1172 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1173 break;
1174 case MVT::i16:
1175 NeedExtend = true;
1176 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1177 break;
1178 case MVT::i32: // fall-through
1179 case MVT::i64:
1180 break;
1181 }
1182 MVT SrcVT = RetVT;
1183 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1184
1185 // Canonicalize immediates to the RHS first.
1186 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1187 std::swap(LHS, RHS);
1188
1189 // Canonicalize mul by power of 2 to the RHS.
1190 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1191 if (isMulPowOf2(LHS))
1192 std::swap(LHS, RHS);
1193
1194 // Canonicalize shift immediate to the RHS.
1195 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1196 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1197 if (isa<ConstantInt>(SI->getOperand(1)))
1198 if (SI->getOpcode() == Instruction::Shl ||
1199 SI->getOpcode() == Instruction::LShr ||
1200 SI->getOpcode() == Instruction::AShr )
1201 std::swap(LHS, RHS);
1202
1203 Register LHSReg = getRegForValue(LHS);
1204 if (!LHSReg)
1205 return Register();
1206
1207 if (NeedExtend)
1208 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1209
1210 Register ResultReg;
1211 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1212 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1213 if (C->isNegative())
1214 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1215 WantResult);
1216 else
1217 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1218 WantResult);
1219 } else if (const auto *C = dyn_cast<Constant>(RHS))
1220 if (C->isNullValue())
1221 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1222
1223 if (ResultReg)
1224 return ResultReg;
1225
1226 // Only extend the RHS within the instruction if there is a valid extend type.
1227 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1228 isValueAvailable(RHS)) {
1229 Register RHSReg = getRegForValue(RHS);
1230 if (!RHSReg)
1231 return Register();
1232 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1233 SetFlags, WantResult);
1234 }
1235
1236 // Check if the mul can be folded into the instruction.
1237 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1238 if (isMulPowOf2(RHS)) {
1239 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1240 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1241
1242 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1243 if (C->getValue().isPowerOf2())
1244 std::swap(MulLHS, MulRHS);
1245
1246 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1247 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1248 Register RHSReg = getRegForValue(MulLHS);
1249 if (!RHSReg)
1250 return Register();
1251 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1252 ShiftVal, SetFlags, WantResult);
1253 if (ResultReg)
1254 return ResultReg;
1255 }
1256 }
1257
1258 // Check if the shift can be folded into the instruction.
1259 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1260 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1261 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1263 switch (SI->getOpcode()) {
1264 default: break;
1265 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1266 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1267 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1268 }
1269 uint64_t ShiftVal = C->getZExtValue();
1270 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1271 Register RHSReg = getRegForValue(SI->getOperand(0));
1272 if (!RHSReg)
1273 return Register();
1274 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1275 ShiftVal, SetFlags, WantResult);
1276 if (ResultReg)
1277 return ResultReg;
1278 }
1279 }
1280 }
1281 }
1282
1283 Register RHSReg = getRegForValue(RHS);
1284 if (!RHSReg)
1285 return Register();
1286
1287 if (NeedExtend)
1288 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1289
1290 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1291}
1292
1293Register AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
1294 Register RHSReg, bool SetFlags,
1295 bool WantResult) {
1296 assert(LHSReg && RHSReg && "Invalid register number.");
1297
1298 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1299 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1300 return Register();
1301
1302 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1303 return Register();
1304
1305 static const unsigned OpcTable[2][2][2] = {
1306 { { AArch64::SUBWrr, AArch64::SUBXrr },
1307 { AArch64::ADDWrr, AArch64::ADDXrr } },
1308 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1309 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1310 };
1311 bool Is64Bit = RetVT == MVT::i64;
1312 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1313 const TargetRegisterClass *RC =
1314 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1315 Register ResultReg;
1316 if (WantResult)
1317 ResultReg = createResultReg(RC);
1318 else
1319 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1320
1321 const MCInstrDesc &II = TII.get(Opc);
1322 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1323 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1324 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1325 .addReg(LHSReg)
1326 .addReg(RHSReg);
1327 return ResultReg;
1328}
1329
1330Register AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg,
1331 uint64_t Imm, bool SetFlags,
1332 bool WantResult) {
1333 assert(LHSReg && "Invalid register number.");
1334
1335 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1336 return Register();
1337
1338 unsigned ShiftImm;
1339 if (isUInt<12>(Imm))
1340 ShiftImm = 0;
1341 else if ((Imm & 0xfff000) == Imm) {
1342 ShiftImm = 12;
1343 Imm >>= 12;
1344 } else
1345 return Register();
1346
1347 static const unsigned OpcTable[2][2][2] = {
1348 { { AArch64::SUBWri, AArch64::SUBXri },
1349 { AArch64::ADDWri, AArch64::ADDXri } },
1350 { { AArch64::SUBSWri, AArch64::SUBSXri },
1351 { AArch64::ADDSWri, AArch64::ADDSXri } }
1352 };
1353 bool Is64Bit = RetVT == MVT::i64;
1354 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1355 const TargetRegisterClass *RC;
1356 if (SetFlags)
1357 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1358 else
1359 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1360 Register ResultReg;
1361 if (WantResult)
1362 ResultReg = createResultReg(RC);
1363 else
1364 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1365
1366 const MCInstrDesc &II = TII.get(Opc);
1367 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1368 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1369 .addReg(LHSReg)
1370 .addImm(Imm)
1371 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1372 return ResultReg;
1373}
1374
1375Register AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
1376 Register RHSReg,
1378 uint64_t ShiftImm, bool SetFlags,
1379 bool WantResult) {
1380 assert(LHSReg && RHSReg && "Invalid register number.");
1381 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1382 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1383
1384 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1385 return Register();
1386
1387 // Don't deal with undefined shifts.
1388 if (ShiftImm >= RetVT.getSizeInBits())
1389 return Register();
1390
1391 static const unsigned OpcTable[2][2][2] = {
1392 { { AArch64::SUBWrs, AArch64::SUBXrs },
1393 { AArch64::ADDWrs, AArch64::ADDXrs } },
1394 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1395 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1396 };
1397 bool Is64Bit = RetVT == MVT::i64;
1398 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1399 const TargetRegisterClass *RC =
1400 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1401 Register ResultReg;
1402 if (WantResult)
1403 ResultReg = createResultReg(RC);
1404 else
1405 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1406
1407 const MCInstrDesc &II = TII.get(Opc);
1408 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1409 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1410 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1411 .addReg(LHSReg)
1412 .addReg(RHSReg)
1413 .addImm(getShifterImm(ShiftType, ShiftImm));
1414 return ResultReg;
1415}
1416
1417Register AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
1418 Register RHSReg,
1420 uint64_t ShiftImm, bool SetFlags,
1421 bool WantResult) {
1422 assert(LHSReg && RHSReg && "Invalid register number.");
1423 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1424 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1425
1426 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1427 return Register();
1428
1429 if (ShiftImm >= 4)
1430 return Register();
1431
1432 static const unsigned OpcTable[2][2][2] = {
1433 { { AArch64::SUBWrx, AArch64::SUBXrx },
1434 { AArch64::ADDWrx, AArch64::ADDXrx } },
1435 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1436 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1437 };
1438 bool Is64Bit = RetVT == MVT::i64;
1439 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1440 const TargetRegisterClass *RC = nullptr;
1441 if (SetFlags)
1442 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1443 else
1444 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1445 Register ResultReg;
1446 if (WantResult)
1447 ResultReg = createResultReg(RC);
1448 else
1449 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1450
1451 const MCInstrDesc &II = TII.get(Opc);
1452 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1453 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1454 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1455 .addReg(LHSReg)
1456 .addReg(RHSReg)
1457 .addImm(getArithExtendImm(ExtType, ShiftImm));
1458 return ResultReg;
1459}
1460
1461bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1462 Type *Ty = LHS->getType();
1463 EVT EVT = TLI.getValueType(DL, Ty, true);
1464 if (!EVT.isSimple())
1465 return false;
1466 MVT VT = EVT.getSimpleVT();
1467
1468 switch (VT.SimpleTy) {
1469 default:
1470 return false;
1471 case MVT::i1:
1472 case MVT::i8:
1473 case MVT::i16:
1474 case MVT::i32:
1475 case MVT::i64:
1476 return emitICmp(VT, LHS, RHS, IsZExt);
1477 case MVT::f32:
1478 case MVT::f64:
1479 return emitFCmp(VT, LHS, RHS);
1480 }
1481}
1482
1483bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1484 bool IsZExt) {
1485 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1486 IsZExt)
1487 .isValid();
1488}
1489
1490bool AArch64FastISel::emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
1491 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1492 /*SetFlags=*/true, /*WantResult=*/false)
1493 .isValid();
1494}
1495
1496bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1497 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1498 return false;
1499
1500 // Check to see if the 2nd operand is a constant that we can encode directly
1501 // in the compare.
1502 bool UseImm = false;
1503 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1504 if (CFP->isZero() && !CFP->isNegative())
1505 UseImm = true;
1506
1507 Register LHSReg = getRegForValue(LHS);
1508 if (!LHSReg)
1509 return false;
1510
1511 if (UseImm) {
1512 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1513 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1514 .addReg(LHSReg);
1515 return true;
1516 }
1517
1518 Register RHSReg = getRegForValue(RHS);
1519 if (!RHSReg)
1520 return false;
1521
1522 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1523 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1524 .addReg(LHSReg)
1525 .addReg(RHSReg);
1526 return true;
1527}
1528
1529Register AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1530 bool SetFlags, bool WantResult, bool IsZExt) {
1531 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1532 IsZExt);
1533}
1534
1535/// This method is a wrapper to simplify add emission.
1536///
1537/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1538/// that fails, then try to materialize the immediate into a register and use
1539/// emitAddSub_rr instead.
1540Register AArch64FastISel::emitAdd_ri_(MVT VT, Register Op0, int64_t Imm) {
1541 Register ResultReg;
1542 if (Imm < 0)
1543 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1544 else
1545 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1546
1547 if (ResultReg)
1548 return ResultReg;
1549
1550 Register CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1551 if (!CReg)
1552 return Register();
1553
1554 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1555 return ResultReg;
1556}
1557
1558Register AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1559 bool SetFlags, bool WantResult, bool IsZExt) {
1560 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1561 IsZExt);
1562}
1563
1564Register AArch64FastISel::emitSubs_rr(MVT RetVT, Register LHSReg,
1565 Register RHSReg, bool WantResult) {
1566 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1567 /*SetFlags=*/true, WantResult);
1568}
1569
1570Register AArch64FastISel::emitSubs_rs(MVT RetVT, Register LHSReg,
1571 Register RHSReg,
1573 uint64_t ShiftImm, bool WantResult) {
1574 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1575 ShiftImm, /*SetFlags=*/true, WantResult);
1576}
1577
1578Register AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1579 const Value *LHS, const Value *RHS) {
1580 // Canonicalize immediates to the RHS first.
1582 std::swap(LHS, RHS);
1583
1584 // Canonicalize mul by power-of-2 to the RHS.
1585 if (LHS->hasOneUse() && isValueAvailable(LHS))
1586 if (isMulPowOf2(LHS))
1587 std::swap(LHS, RHS);
1588
1589 // Canonicalize shift immediate to the RHS.
1590 if (LHS->hasOneUse() && isValueAvailable(LHS))
1591 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1592 if (isa<ConstantInt>(SI->getOperand(1)))
1593 std::swap(LHS, RHS);
1594
1595 Register LHSReg = getRegForValue(LHS);
1596 if (!LHSReg)
1597 return Register();
1598
1599 Register ResultReg;
1600 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1601 uint64_t Imm = C->getZExtValue();
1602 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1603 }
1604 if (ResultReg)
1605 return ResultReg;
1606
1607 // Check if the mul can be folded into the instruction.
1608 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1609 if (isMulPowOf2(RHS)) {
1610 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1611 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1612
1613 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1614 if (C->getValue().isPowerOf2())
1615 std::swap(MulLHS, MulRHS);
1616
1617 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1618 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1619
1620 Register RHSReg = getRegForValue(MulLHS);
1621 if (!RHSReg)
1622 return Register();
1623 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1624 if (ResultReg)
1625 return ResultReg;
1626 }
1627 }
1628
1629 // Check if the shift can be folded into the instruction.
1630 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1631 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1632 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1633 uint64_t ShiftVal = C->getZExtValue();
1634 Register RHSReg = getRegForValue(SI->getOperand(0));
1635 if (!RHSReg)
1636 return Register();
1637 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1638 if (ResultReg)
1639 return ResultReg;
1640 }
1641 }
1642
1643 Register RHSReg = getRegForValue(RHS);
1644 if (!RHSReg)
1645 return Register();
1646
1647 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1648 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1649 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1650 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1651 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1652 }
1653 return ResultReg;
1654}
1655
1656Register AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1657 Register LHSReg, uint64_t Imm) {
1658 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1659 "ISD nodes are not consecutive!");
1660 static const unsigned OpcTable[3][2] = {
1661 { AArch64::ANDWri, AArch64::ANDXri },
1662 { AArch64::ORRWri, AArch64::ORRXri },
1663 { AArch64::EORWri, AArch64::EORXri }
1664 };
1665 const TargetRegisterClass *RC;
1666 unsigned Opc;
1667 unsigned RegSize;
1668 switch (RetVT.SimpleTy) {
1669 default:
1670 return Register();
1671 case MVT::i1:
1672 case MVT::i8:
1673 case MVT::i16:
1674 case MVT::i32: {
1675 unsigned Idx = ISDOpc - ISD::AND;
1676 Opc = OpcTable[Idx][0];
1677 RC = &AArch64::GPR32spRegClass;
1678 RegSize = 32;
1679 break;
1680 }
1681 case MVT::i64:
1682 Opc = OpcTable[ISDOpc - ISD::AND][1];
1683 RC = &AArch64::GPR64spRegClass;
1684 RegSize = 64;
1685 break;
1686 }
1687
1689 return Register();
1690
1691 Register ResultReg =
1692 fastEmitInst_ri(Opc, RC, LHSReg,
1694 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1695 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1696 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1697 }
1698 return ResultReg;
1699}
1700
1701Register AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1702 Register LHSReg, Register RHSReg,
1703 uint64_t ShiftImm) {
1704 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1705 "ISD nodes are not consecutive!");
1706 static const unsigned OpcTable[3][2] = {
1707 { AArch64::ANDWrs, AArch64::ANDXrs },
1708 { AArch64::ORRWrs, AArch64::ORRXrs },
1709 { AArch64::EORWrs, AArch64::EORXrs }
1710 };
1711
1712 // Don't deal with undefined shifts.
1713 if (ShiftImm >= RetVT.getSizeInBits())
1714 return Register();
1715
1716 const TargetRegisterClass *RC;
1717 unsigned Opc;
1718 switch (RetVT.SimpleTy) {
1719 default:
1720 return Register();
1721 case MVT::i1:
1722 case MVT::i8:
1723 case MVT::i16:
1724 case MVT::i32:
1725 Opc = OpcTable[ISDOpc - ISD::AND][0];
1726 RC = &AArch64::GPR32RegClass;
1727 break;
1728 case MVT::i64:
1729 Opc = OpcTable[ISDOpc - ISD::AND][1];
1730 RC = &AArch64::GPR64RegClass;
1731 break;
1732 }
1733 Register ResultReg =
1734 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1736 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1737 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1738 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1739 }
1740 return ResultReg;
1741}
1742
1743Register AArch64FastISel::emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
1744 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1745}
1746
1747Register AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1748 bool WantZExt, MachineMemOperand *MMO) {
1749 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1750 return Register();
1751
1752 // Simplify this down to something we can handle.
1753 if (!simplifyAddress(Addr, VT))
1754 return Register();
1755
1756 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1757 if (!ScaleFactor)
1758 llvm_unreachable("Unexpected value type.");
1759
1760 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1761 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1762 bool UseScaled = true;
1763 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1764 UseScaled = false;
1765 ScaleFactor = 1;
1766 }
1767
1768 static const unsigned GPOpcTable[2][8][4] = {
1769 // Sign-extend.
1770 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1771 AArch64::LDURXi },
1772 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1773 AArch64::LDURXi },
1774 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1775 AArch64::LDRXui },
1776 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1777 AArch64::LDRXui },
1778 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1779 AArch64::LDRXroX },
1780 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1781 AArch64::LDRXroX },
1782 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1783 AArch64::LDRXroW },
1784 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1785 AArch64::LDRXroW }
1786 },
1787 // Zero-extend.
1788 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1789 AArch64::LDURXi },
1790 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1791 AArch64::LDURXi },
1792 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1793 AArch64::LDRXui },
1794 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1795 AArch64::LDRXui },
1796 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1797 AArch64::LDRXroX },
1798 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1799 AArch64::LDRXroX },
1800 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1801 AArch64::LDRXroW },
1802 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1803 AArch64::LDRXroW }
1804 }
1805 };
1806
1807 static const unsigned FPOpcTable[4][2] = {
1808 { AArch64::LDURSi, AArch64::LDURDi },
1809 { AArch64::LDRSui, AArch64::LDRDui },
1810 { AArch64::LDRSroX, AArch64::LDRDroX },
1811 { AArch64::LDRSroW, AArch64::LDRDroW }
1812 };
1813
1814 unsigned Opc;
1815 const TargetRegisterClass *RC;
1816 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1817 Addr.getOffsetReg();
1818 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1819 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1820 Addr.getExtendType() == AArch64_AM::SXTW)
1821 Idx++;
1822
1823 bool IsRet64Bit = RetVT == MVT::i64;
1824 switch (VT.SimpleTy) {
1825 default:
1826 llvm_unreachable("Unexpected value type.");
1827 case MVT::i1: // Intentional fall-through.
1828 case MVT::i8:
1829 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1830 RC = (IsRet64Bit && !WantZExt) ?
1831 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1832 break;
1833 case MVT::i16:
1834 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1835 RC = (IsRet64Bit && !WantZExt) ?
1836 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837 break;
1838 case MVT::i32:
1839 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1840 RC = (IsRet64Bit && !WantZExt) ?
1841 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842 break;
1843 case MVT::i64:
1844 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1845 RC = &AArch64::GPR64RegClass;
1846 break;
1847 case MVT::f32:
1848 Opc = FPOpcTable[Idx][0];
1849 RC = &AArch64::FPR32RegClass;
1850 break;
1851 case MVT::f64:
1852 Opc = FPOpcTable[Idx][1];
1853 RC = &AArch64::FPR64RegClass;
1854 break;
1855 }
1856
1857 // Create the base instruction, then add the operands.
1858 Register ResultReg = createResultReg(RC);
1859 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1860 TII.get(Opc), ResultReg);
1861 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1862
1863 // Loading an i1 requires special handling.
1864 if (VT == MVT::i1) {
1865 Register ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1866 assert(ANDReg && "Unexpected AND instruction emission failure.");
1867 ResultReg = ANDReg;
1868 }
1869
1870 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1871 // the 32bit reg to a 64bit reg.
1872 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1873 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1874 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1875 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1876 .addImm(0)
1877 .addReg(ResultReg, getKillRegState(true))
1878 .addImm(AArch64::sub_32);
1879 ResultReg = Reg64;
1880 }
1881 return ResultReg;
1882}
1883
1884bool AArch64FastISel::selectAddSub(const Instruction *I) {
1885 MVT VT;
1886 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1887 return false;
1888
1889 if (VT.isVector())
1890 return selectOperator(I, I->getOpcode());
1891
1892 Register ResultReg;
1893 switch (I->getOpcode()) {
1894 default:
1895 llvm_unreachable("Unexpected instruction.");
1896 case Instruction::Add:
1897 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1898 break;
1899 case Instruction::Sub:
1900 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1901 break;
1902 }
1903 if (!ResultReg)
1904 return false;
1905
1906 updateValueMap(I, ResultReg);
1907 return true;
1908}
1909
1910bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1911 MVT VT;
1912 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1913 return false;
1914
1915 if (VT.isVector())
1916 return selectOperator(I, I->getOpcode());
1917
1918 Register ResultReg;
1919 switch (I->getOpcode()) {
1920 default:
1921 llvm_unreachable("Unexpected instruction.");
1922 case Instruction::And:
1923 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1924 break;
1925 case Instruction::Or:
1926 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1927 break;
1928 case Instruction::Xor:
1929 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1930 break;
1931 }
1932 if (!ResultReg)
1933 return false;
1934
1935 updateValueMap(I, ResultReg);
1936 return true;
1937}
1938
1939bool AArch64FastISel::selectLoad(const Instruction *I) {
1940 MVT VT;
1941 // Verify we have a legal type before going any further. Currently, we handle
1942 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1943 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1944 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1945 cast<LoadInst>(I)->isAtomic())
1946 return false;
1947
1948 const Value *SV = I->getOperand(0);
1949 if (TLI.supportSwiftError()) {
1950 // Swifterror values can come from either a function parameter with
1951 // swifterror attribute or an alloca with swifterror attribute.
1952 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1953 if (Arg->hasSwiftErrorAttr())
1954 return false;
1955 }
1956
1957 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1958 if (Alloca->isSwiftError())
1959 return false;
1960 }
1961 }
1962
1963 // See if we can handle this address.
1964 Address Addr;
1965 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1966 return false;
1967
1968 // Fold the following sign-/zero-extend into the load instruction.
1969 bool WantZExt = true;
1970 MVT RetVT = VT;
1971 const Value *IntExtVal = nullptr;
1972 if (I->hasOneUse()) {
1973 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1974 if (isTypeSupported(ZE->getType(), RetVT))
1975 IntExtVal = ZE;
1976 else
1977 RetVT = VT;
1978 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1979 if (isTypeSupported(SE->getType(), RetVT))
1980 IntExtVal = SE;
1981 else
1982 RetVT = VT;
1983 WantZExt = false;
1984 }
1985 }
1986
1987 Register ResultReg =
1988 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1989 if (!ResultReg)
1990 return false;
1991
1992 // There are a few different cases we have to handle, because the load or the
1993 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1994 // SelectionDAG. There is also an ordering issue when both instructions are in
1995 // different basic blocks.
1996 // 1.) The load instruction is selected by FastISel, but the integer extend
1997 // not. This usually happens when the integer extend is in a different
1998 // basic block and SelectionDAG took over for that basic block.
1999 // 2.) The load instruction is selected before the integer extend. This only
2000 // happens when the integer extend is in a different basic block.
2001 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2002 // by FastISel. This happens if there are instructions between the load
2003 // and the integer extend that couldn't be selected by FastISel.
2004 if (IntExtVal) {
2005 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2006 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2007 // it when it selects the integer extend.
2008 Register Reg = lookUpRegForValue(IntExtVal);
2009 auto *MI = MRI.getUniqueVRegDef(Reg);
2010 if (!MI) {
2011 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2012 if (WantZExt) {
2013 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2014 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2015 ResultReg = std::prev(I)->getOperand(0).getReg();
2016 removeDeadCode(I, std::next(I));
2017 } else
2018 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2019 AArch64::sub_32);
2020 }
2021 updateValueMap(I, ResultReg);
2022 return true;
2023 }
2024
2025 // The integer extend has already been emitted - delete all the instructions
2026 // that have been emitted by the integer extend lowering code and use the
2027 // result from the load instruction directly.
2028 while (MI) {
2029 Reg = 0;
2030 for (auto &Opnd : MI->uses()) {
2031 if (Opnd.isReg()) {
2032 Reg = Opnd.getReg();
2033 break;
2034 }
2035 }
2037 removeDeadCode(I, std::next(I));
2038 MI = nullptr;
2039 if (Reg)
2040 MI = MRI.getUniqueVRegDef(Reg);
2041 }
2042 updateValueMap(IntExtVal, ResultReg);
2043 return true;
2044 }
2045
2046 updateValueMap(I, ResultReg);
2047 return true;
2048}
2049
2050bool AArch64FastISel::emitStoreRelease(MVT VT, Register SrcReg,
2051 Register AddrReg,
2052 MachineMemOperand *MMO) {
2053 unsigned Opc;
2054 switch (VT.SimpleTy) {
2055 default: return false;
2056 case MVT::i8: Opc = AArch64::STLRB; break;
2057 case MVT::i16: Opc = AArch64::STLRH; break;
2058 case MVT::i32: Opc = AArch64::STLRW; break;
2059 case MVT::i64: Opc = AArch64::STLRX; break;
2060 }
2061
2062 const MCInstrDesc &II = TII.get(Opc);
2063 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2064 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2065 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2066 .addReg(SrcReg)
2067 .addReg(AddrReg)
2068 .addMemOperand(MMO);
2069 return true;
2070}
2071
2072bool AArch64FastISel::emitStore(MVT VT, Register SrcReg, Address Addr,
2073 MachineMemOperand *MMO) {
2074 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2075 return false;
2076
2077 // Simplify this down to something we can handle.
2078 if (!simplifyAddress(Addr, VT))
2079 return false;
2080
2081 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2082 if (!ScaleFactor)
2083 llvm_unreachable("Unexpected value type.");
2084
2085 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2086 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2087 bool UseScaled = true;
2088 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2089 UseScaled = false;
2090 ScaleFactor = 1;
2091 }
2092
2093 static const unsigned OpcTable[4][6] = {
2094 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2095 AArch64::STURSi, AArch64::STURDi },
2096 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2097 AArch64::STRSui, AArch64::STRDui },
2098 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2099 AArch64::STRSroX, AArch64::STRDroX },
2100 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2101 AArch64::STRSroW, AArch64::STRDroW }
2102 };
2103
2104 unsigned Opc;
2105 bool VTIsi1 = false;
2106 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2107 Addr.getOffsetReg();
2108 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2109 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2110 Addr.getExtendType() == AArch64_AM::SXTW)
2111 Idx++;
2112
2113 switch (VT.SimpleTy) {
2114 default: llvm_unreachable("Unexpected value type.");
2115 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2116 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2117 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2118 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2119 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2120 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2121 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2122 }
2123
2124 // Storing an i1 requires special handling.
2125 if (VTIsi1 && SrcReg != AArch64::WZR) {
2126 Register ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2127 assert(ANDReg && "Unexpected AND instruction emission failure.");
2128 SrcReg = ANDReg;
2129 }
2130 // Create the base instruction, then add the operands.
2131 const MCInstrDesc &II = TII.get(Opc);
2132 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2133 MachineInstrBuilder MIB =
2134 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2135 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2136
2137 return true;
2138}
2139
2140bool AArch64FastISel::selectStore(const Instruction *I) {
2141 MVT VT;
2142 const Value *Op0 = I->getOperand(0);
2143 // Verify we have a legal type before going any further. Currently, we handle
2144 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2145 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2146 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2147 return false;
2148
2149 const Value *PtrV = I->getOperand(1);
2150 if (TLI.supportSwiftError()) {
2151 // Swifterror values can come from either a function parameter with
2152 // swifterror attribute or an alloca with swifterror attribute.
2153 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2154 if (Arg->hasSwiftErrorAttr())
2155 return false;
2156 }
2157
2158 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2159 if (Alloca->isSwiftError())
2160 return false;
2161 }
2162 }
2163
2164 // Get the value to be stored into a register. Use the zero register directly
2165 // when possible to avoid an unnecessary copy and a wasted register.
2166 Register SrcReg;
2167 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2168 if (CI->isZero())
2169 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2170 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2171 if (CF->isZero() && !CF->isNegative()) {
2173 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2174 }
2175 }
2176
2177 if (!SrcReg)
2178 SrcReg = getRegForValue(Op0);
2179
2180 if (!SrcReg)
2181 return false;
2182
2183 auto *SI = cast<StoreInst>(I);
2184
2185 // Try to emit a STLR for seq_cst/release.
2186 if (SI->isAtomic()) {
2187 AtomicOrdering Ord = SI->getOrdering();
2188 // The non-atomic instructions are sufficient for relaxed stores.
2189 if (isReleaseOrStronger(Ord)) {
2190 // The STLR addressing mode only supports a base reg; pass that directly.
2191 Register AddrReg = getRegForValue(PtrV);
2192 if (!AddrReg)
2193 return false;
2194 return emitStoreRelease(VT, SrcReg, AddrReg,
2195 createMachineMemOperandFor(I));
2196 }
2197 }
2198
2199 // See if we can handle this address.
2200 Address Addr;
2201 if (!computeAddress(PtrV, Addr, Op0->getType()))
2202 return false;
2203
2204 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2205 return false;
2206 return true;
2207}
2208
2210 switch (Pred) {
2211 case CmpInst::FCMP_ONE:
2212 case CmpInst::FCMP_UEQ:
2213 default:
2214 // AL is our "false" for now. The other two need more compares.
2215 return AArch64CC::AL;
2216 case CmpInst::ICMP_EQ:
2217 case CmpInst::FCMP_OEQ:
2218 return AArch64CC::EQ;
2219 case CmpInst::ICMP_SGT:
2220 case CmpInst::FCMP_OGT:
2221 return AArch64CC::GT;
2222 case CmpInst::ICMP_SGE:
2223 case CmpInst::FCMP_OGE:
2224 return AArch64CC::GE;
2225 case CmpInst::ICMP_UGT:
2226 case CmpInst::FCMP_UGT:
2227 return AArch64CC::HI;
2228 case CmpInst::FCMP_OLT:
2229 return AArch64CC::MI;
2230 case CmpInst::ICMP_ULE:
2231 case CmpInst::FCMP_OLE:
2232 return AArch64CC::LS;
2233 case CmpInst::FCMP_ORD:
2234 return AArch64CC::VC;
2235 case CmpInst::FCMP_UNO:
2236 return AArch64CC::VS;
2237 case CmpInst::FCMP_UGE:
2238 return AArch64CC::PL;
2239 case CmpInst::ICMP_SLT:
2240 case CmpInst::FCMP_ULT:
2241 return AArch64CC::LT;
2242 case CmpInst::ICMP_SLE:
2243 case CmpInst::FCMP_ULE:
2244 return AArch64CC::LE;
2245 case CmpInst::FCMP_UNE:
2246 case CmpInst::ICMP_NE:
2247 return AArch64CC::NE;
2248 case CmpInst::ICMP_UGE:
2249 return AArch64CC::HS;
2250 case CmpInst::ICMP_ULT:
2251 return AArch64CC::LO;
2252 }
2253}
2254
2255/// Try to emit a combined compare-and-branch instruction.
2256bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2257 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2258 // will not be produced, as they are conditional branch instructions that do
2259 // not set flags.
2260 if (FuncInfo.MF->getFunction().hasFnAttribute(
2261 Attribute::SpeculativeLoadHardening))
2262 return false;
2263
2264 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2265 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2266 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2267
2268 const Value *LHS = CI->getOperand(0);
2269 const Value *RHS = CI->getOperand(1);
2270
2271 MVT VT;
2272 if (!isTypeSupported(LHS->getType(), VT))
2273 return false;
2274
2275 unsigned BW = VT.getSizeInBits();
2276 if (BW > 64)
2277 return false;
2278
2279 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2280 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2281
2282 // Try to take advantage of fallthrough opportunities.
2283 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2284 std::swap(TBB, FBB);
2286 }
2287
2288 int TestBit = -1;
2289 bool IsCmpNE;
2290 switch (Predicate) {
2291 default:
2292 return false;
2293 case CmpInst::ICMP_EQ:
2294 case CmpInst::ICMP_NE:
2295 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2296 std::swap(LHS, RHS);
2297
2298 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2299 return false;
2300
2301 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2302 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2303 const Value *AndLHS = AI->getOperand(0);
2304 const Value *AndRHS = AI->getOperand(1);
2305
2306 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2307 if (C->getValue().isPowerOf2())
2308 std::swap(AndLHS, AndRHS);
2309
2310 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2311 if (C->getValue().isPowerOf2()) {
2312 TestBit = C->getValue().logBase2();
2313 LHS = AndLHS;
2314 }
2315 }
2316
2317 if (VT == MVT::i1)
2318 TestBit = 0;
2319
2320 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2321 break;
2322 case CmpInst::ICMP_SLT:
2323 case CmpInst::ICMP_SGE:
2324 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2325 return false;
2326
2327 TestBit = BW - 1;
2328 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2329 break;
2330 case CmpInst::ICMP_SGT:
2331 case CmpInst::ICMP_SLE:
2332 if (!isa<ConstantInt>(RHS))
2333 return false;
2334
2335 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2336 return false;
2337
2338 TestBit = BW - 1;
2339 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2340 break;
2341 } // end switch
2342
2343 static const unsigned OpcTable[2][2][2] = {
2344 { {AArch64::CBZW, AArch64::CBZX },
2345 {AArch64::CBNZW, AArch64::CBNZX} },
2346 { {AArch64::TBZW, AArch64::TBZX },
2347 {AArch64::TBNZW, AArch64::TBNZX} }
2348 };
2349
2350 bool IsBitTest = TestBit != -1;
2351 bool Is64Bit = BW == 64;
2352 if (TestBit < 32 && TestBit >= 0)
2353 Is64Bit = false;
2354
2355 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2356 const MCInstrDesc &II = TII.get(Opc);
2357
2358 Register SrcReg = getRegForValue(LHS);
2359 if (!SrcReg)
2360 return false;
2361
2362 if (BW == 64 && !Is64Bit)
2363 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2364
2365 if ((BW < 32) && !IsBitTest)
2366 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2367
2368 // Emit the combined compare and branch instruction.
2369 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2370 MachineInstrBuilder MIB =
2371 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2372 .addReg(SrcReg);
2373 if (IsBitTest)
2374 MIB.addImm(TestBit);
2375 MIB.addMBB(TBB);
2376
2377 finishCondBranch(BI->getParent(), TBB, FBB);
2378 return true;
2379}
2380
2381bool AArch64FastISel::selectBranch(const Instruction *I) {
2382 const BranchInst *BI = cast<BranchInst>(I);
2383 if (BI->isUnconditional()) {
2384 MachineBasicBlock *MSucc = FuncInfo.getMBB(BI->getSuccessor(0));
2385 fastEmitBranch(MSucc, BI->getDebugLoc());
2386 return true;
2387 }
2388
2389 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2390 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2391
2392 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2393 if (CI->hasOneUse() && isValueAvailable(CI)) {
2394 // Try to optimize or fold the cmp.
2395 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2396 switch (Predicate) {
2397 default:
2398 break;
2400 fastEmitBranch(FBB, MIMD.getDL());
2401 return true;
2402 case CmpInst::FCMP_TRUE:
2403 fastEmitBranch(TBB, MIMD.getDL());
2404 return true;
2405 }
2406
2407 // Try to emit a combined compare-and-branch first.
2408 if (emitCompareAndBranch(BI))
2409 return true;
2410
2411 // Try to take advantage of fallthrough opportunities.
2412 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2413 std::swap(TBB, FBB);
2415 }
2416
2417 // Emit the cmp.
2418 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2419 return false;
2420
2421 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2422 // instruction.
2423 AArch64CC::CondCode CC = getCompareCC(Predicate);
2425 switch (Predicate) {
2426 default:
2427 break;
2428 case CmpInst::FCMP_UEQ:
2429 ExtraCC = AArch64CC::EQ;
2430 CC = AArch64CC::VS;
2431 break;
2432 case CmpInst::FCMP_ONE:
2433 ExtraCC = AArch64CC::MI;
2434 CC = AArch64CC::GT;
2435 break;
2436 }
2437 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2438
2439 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2440 if (ExtraCC != AArch64CC::AL) {
2441 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2442 .addImm(ExtraCC)
2443 .addMBB(TBB);
2444 }
2445
2446 // Emit the branch.
2447 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2448 .addImm(CC)
2449 .addMBB(TBB);
2450
2451 finishCondBranch(BI->getParent(), TBB, FBB);
2452 return true;
2453 }
2454 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2455 uint64_t Imm = CI->getZExtValue();
2456 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2457 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2458 .addMBB(Target);
2459
2460 // Obtain the branch probability and add the target to the successor list.
2461 if (FuncInfo.BPI) {
2462 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2463 BI->getParent(), Target->getBasicBlock());
2464 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2465 } else
2466 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2467 return true;
2468 } else {
2470 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2471 // Fake request the condition, otherwise the intrinsic might be completely
2472 // optimized away.
2473 Register CondReg = getRegForValue(BI->getCondition());
2474 if (!CondReg)
2475 return false;
2476
2477 // Emit the branch.
2478 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2479 .addImm(CC)
2480 .addMBB(TBB);
2481
2482 finishCondBranch(BI->getParent(), TBB, FBB);
2483 return true;
2484 }
2485 }
2486
2487 Register CondReg = getRegForValue(BI->getCondition());
2488 if (!CondReg)
2489 return false;
2490
2491 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2492 unsigned Opcode = AArch64::TBNZW;
2493 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2494 std::swap(TBB, FBB);
2495 Opcode = AArch64::TBZW;
2496 }
2497
2498 const MCInstrDesc &II = TII.get(Opcode);
2499 Register ConstrainedCondReg
2500 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2501 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2502 .addReg(ConstrainedCondReg)
2503 .addImm(0)
2504 .addMBB(TBB);
2505
2506 finishCondBranch(BI->getParent(), TBB, FBB);
2507 return true;
2508}
2509
2510bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2511 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2512 Register AddrReg = getRegForValue(BI->getOperand(0));
2513 if (!AddrReg)
2514 return false;
2515
2516 // Authenticated indirectbr is not implemented yet.
2517 if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos"))
2518 return false;
2519
2520 // Emit the indirect branch.
2521 const MCInstrDesc &II = TII.get(AArch64::BR);
2522 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2523 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2524
2525 // Make sure the CFG is up-to-date.
2526 for (const auto *Succ : BI->successors())
2527 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(Succ));
2528
2529 return true;
2530}
2531
2532bool AArch64FastISel::selectCmp(const Instruction *I) {
2533 const CmpInst *CI = cast<CmpInst>(I);
2534
2535 // Vectors of i1 are weird: bail out.
2536 if (CI->getType()->isVectorTy())
2537 return false;
2538
2539 // Try to optimize or fold the cmp.
2540 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2541 Register ResultReg;
2542 switch (Predicate) {
2543 default:
2544 break;
2546 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2547 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2548 TII.get(TargetOpcode::COPY), ResultReg)
2549 .addReg(AArch64::WZR, getKillRegState(true));
2550 break;
2551 case CmpInst::FCMP_TRUE:
2552 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2553 break;
2554 }
2555
2556 if (ResultReg) {
2557 updateValueMap(I, ResultReg);
2558 return true;
2559 }
2560
2561 // Emit the cmp.
2562 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2563 return false;
2564
2565 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2566
2567 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2568 // condition codes are inverted, because they are used by CSINC.
2569 static unsigned CondCodeTable[2][2] = {
2572 };
2573 unsigned *CondCodes = nullptr;
2574 switch (Predicate) {
2575 default:
2576 break;
2577 case CmpInst::FCMP_UEQ:
2578 CondCodes = &CondCodeTable[0][0];
2579 break;
2580 case CmpInst::FCMP_ONE:
2581 CondCodes = &CondCodeTable[1][0];
2582 break;
2583 }
2584
2585 if (CondCodes) {
2586 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2587 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2588 TmpReg1)
2589 .addReg(AArch64::WZR, getKillRegState(true))
2590 .addReg(AArch64::WZR, getKillRegState(true))
2591 .addImm(CondCodes[0]);
2592 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2593 ResultReg)
2594 .addReg(TmpReg1, getKillRegState(true))
2595 .addReg(AArch64::WZR, getKillRegState(true))
2596 .addImm(CondCodes[1]);
2597
2598 updateValueMap(I, ResultReg);
2599 return true;
2600 }
2601
2602 // Now set a register based on the comparison.
2603 AArch64CC::CondCode CC = getCompareCC(Predicate);
2604 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2605 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2606 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2607 ResultReg)
2608 .addReg(AArch64::WZR, getKillRegState(true))
2609 .addReg(AArch64::WZR, getKillRegState(true))
2610 .addImm(invertedCC);
2611
2612 updateValueMap(I, ResultReg);
2613 return true;
2614}
2615
2616/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2617/// value.
2618bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2619 if (!SI->getType()->isIntegerTy(1))
2620 return false;
2621
2622 const Value *Src1Val, *Src2Val;
2623 unsigned Opc = 0;
2624 bool NeedExtraOp = false;
2625 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2626 if (CI->isOne()) {
2627 Src1Val = SI->getCondition();
2628 Src2Val = SI->getFalseValue();
2629 Opc = AArch64::ORRWrr;
2630 } else {
2631 assert(CI->isZero());
2632 Src1Val = SI->getFalseValue();
2633 Src2Val = SI->getCondition();
2634 Opc = AArch64::BICWrr;
2635 }
2636 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2637 if (CI->isOne()) {
2638 Src1Val = SI->getCondition();
2639 Src2Val = SI->getTrueValue();
2640 Opc = AArch64::ORRWrr;
2641 NeedExtraOp = true;
2642 } else {
2643 assert(CI->isZero());
2644 Src1Val = SI->getCondition();
2645 Src2Val = SI->getTrueValue();
2646 Opc = AArch64::ANDWrr;
2647 }
2648 }
2649
2650 if (!Opc)
2651 return false;
2652
2653 Register Src1Reg = getRegForValue(Src1Val);
2654 if (!Src1Reg)
2655 return false;
2656
2657 Register Src2Reg = getRegForValue(Src2Val);
2658 if (!Src2Reg)
2659 return false;
2660
2661 if (NeedExtraOp)
2662 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2663
2664 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2665 Src2Reg);
2666 updateValueMap(SI, ResultReg);
2667 return true;
2668}
2669
2670bool AArch64FastISel::selectSelect(const Instruction *I) {
2671 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2672 MVT VT;
2673 if (!isTypeSupported(I->getType(), VT))
2674 return false;
2675
2676 unsigned Opc;
2677 const TargetRegisterClass *RC;
2678 switch (VT.SimpleTy) {
2679 default:
2680 return false;
2681 case MVT::i1:
2682 case MVT::i8:
2683 case MVT::i16:
2684 case MVT::i32:
2685 Opc = AArch64::CSELWr;
2686 RC = &AArch64::GPR32RegClass;
2687 break;
2688 case MVT::i64:
2689 Opc = AArch64::CSELXr;
2690 RC = &AArch64::GPR64RegClass;
2691 break;
2692 case MVT::f32:
2693 Opc = AArch64::FCSELSrrr;
2694 RC = &AArch64::FPR32RegClass;
2695 break;
2696 case MVT::f64:
2697 Opc = AArch64::FCSELDrrr;
2698 RC = &AArch64::FPR64RegClass;
2699 break;
2700 }
2701
2702 const SelectInst *SI = cast<SelectInst>(I);
2703 const Value *Cond = SI->getCondition();
2706
2707 if (optimizeSelect(SI))
2708 return true;
2709
2710 // Try to pickup the flags, so we don't have to emit another compare.
2711 if (foldXALUIntrinsic(CC, I, Cond)) {
2712 // Fake request the condition to force emission of the XALU intrinsic.
2713 Register CondReg = getRegForValue(Cond);
2714 if (!CondReg)
2715 return false;
2716 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2717 isValueAvailable(Cond)) {
2718 const auto *Cmp = cast<CmpInst>(Cond);
2719 // Try to optimize or fold the cmp.
2720 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2721 const Value *FoldSelect = nullptr;
2722 switch (Predicate) {
2723 default:
2724 break;
2726 FoldSelect = SI->getFalseValue();
2727 break;
2728 case CmpInst::FCMP_TRUE:
2729 FoldSelect = SI->getTrueValue();
2730 break;
2731 }
2732
2733 if (FoldSelect) {
2734 Register SrcReg = getRegForValue(FoldSelect);
2735 if (!SrcReg)
2736 return false;
2737
2738 updateValueMap(I, SrcReg);
2739 return true;
2740 }
2741
2742 // Emit the cmp.
2743 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2744 return false;
2745
2746 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2747 CC = getCompareCC(Predicate);
2748 switch (Predicate) {
2749 default:
2750 break;
2751 case CmpInst::FCMP_UEQ:
2752 ExtraCC = AArch64CC::EQ;
2753 CC = AArch64CC::VS;
2754 break;
2755 case CmpInst::FCMP_ONE:
2756 ExtraCC = AArch64CC::MI;
2757 CC = AArch64CC::GT;
2758 break;
2759 }
2760 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2761 } else {
2762 Register CondReg = getRegForValue(Cond);
2763 if (!CondReg)
2764 return false;
2765
2766 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2767 CondReg = constrainOperandRegClass(II, CondReg, 1);
2768
2769 // Emit a TST instruction (ANDS wzr, reg, #imm).
2770 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2771 AArch64::WZR)
2772 .addReg(CondReg)
2774 }
2775
2776 Register Src1Reg = getRegForValue(SI->getTrueValue());
2777 Register Src2Reg = getRegForValue(SI->getFalseValue());
2778
2779 if (!Src1Reg || !Src2Reg)
2780 return false;
2781
2782 if (ExtraCC != AArch64CC::AL)
2783 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2784
2785 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2786 updateValueMap(I, ResultReg);
2787 return true;
2788}
2789
2790bool AArch64FastISel::selectFPExt(const Instruction *I) {
2791 Value *V = I->getOperand(0);
2792 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2793 return false;
2794
2795 Register Op = getRegForValue(V);
2796 if (Op == 0)
2797 return false;
2798
2799 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2800 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2801 ResultReg).addReg(Op);
2802 updateValueMap(I, ResultReg);
2803 return true;
2804}
2805
2806bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2807 Value *V = I->getOperand(0);
2808 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2809 return false;
2810
2811 Register Op = getRegForValue(V);
2812 if (Op == 0)
2813 return false;
2814
2815 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2816 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2817 ResultReg).addReg(Op);
2818 updateValueMap(I, ResultReg);
2819 return true;
2820}
2821
2822// FPToUI and FPToSI
2823bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2824 MVT DestVT;
2825 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2826 return false;
2827
2828 Register SrcReg = getRegForValue(I->getOperand(0));
2829 if (!SrcReg)
2830 return false;
2831
2832 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2833 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2834 return false;
2835
2836 unsigned Opc;
2837 if (SrcVT == MVT::f64) {
2838 if (Signed)
2839 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2840 else
2841 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2842 } else {
2843 if (Signed)
2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2845 else
2846 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2847 }
2848 Register ResultReg = createResultReg(
2849 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2850 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2851 .addReg(SrcReg);
2852 updateValueMap(I, ResultReg);
2853 return true;
2854}
2855
2856bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2857 MVT DestVT;
2858 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2859 return false;
2860 // Let regular ISEL handle FP16
2861 if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2862 return false;
2863
2864 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2865 "Unexpected value type.");
2866
2867 Register SrcReg = getRegForValue(I->getOperand(0));
2868 if (!SrcReg)
2869 return false;
2870
2871 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2872
2873 // Handle sign-extension.
2874 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2875 SrcReg =
2876 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2877 if (!SrcReg)
2878 return false;
2879 }
2880
2881 unsigned Opc;
2882 if (SrcVT == MVT::i64) {
2883 if (Signed)
2884 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2885 else
2886 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2887 } else {
2888 if (Signed)
2889 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2890 else
2891 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2892 }
2893
2894 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2895 updateValueMap(I, ResultReg);
2896 return true;
2897}
2898
2899bool AArch64FastISel::fastLowerArguments() {
2900 if (!FuncInfo.CanLowerReturn)
2901 return false;
2902
2903 const Function *F = FuncInfo.Fn;
2904 if (F->isVarArg())
2905 return false;
2906
2907 CallingConv::ID CC = F->getCallingConv();
2908 if (CC != CallingConv::C && CC != CallingConv::Swift)
2909 return false;
2910
2911 if (Subtarget->hasCustomCallingConv())
2912 return false;
2913
2914 // Only handle simple cases of up to 8 GPR and FPR each.
2915 unsigned GPRCnt = 0;
2916 unsigned FPRCnt = 0;
2917 for (auto const &Arg : F->args()) {
2918 if (Arg.hasAttribute(Attribute::ByVal) ||
2919 Arg.hasAttribute(Attribute::InReg) ||
2920 Arg.hasAttribute(Attribute::StructRet) ||
2921 Arg.hasAttribute(Attribute::SwiftSelf) ||
2922 Arg.hasAttribute(Attribute::SwiftAsync) ||
2923 Arg.hasAttribute(Attribute::SwiftError) ||
2924 Arg.hasAttribute(Attribute::Nest))
2925 return false;
2926
2927 Type *ArgTy = Arg.getType();
2928 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2929 return false;
2930
2931 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2932 if (!ArgVT.isSimple())
2933 return false;
2934
2935 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2936 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2937 return false;
2938
2939 if (VT.isVector() &&
2940 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2941 return false;
2942
2943 if (VT >= MVT::i1 && VT <= MVT::i64)
2944 ++GPRCnt;
2945 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2946 VT.is128BitVector())
2947 ++FPRCnt;
2948 else
2949 return false;
2950
2951 if (GPRCnt > 8 || FPRCnt > 8)
2952 return false;
2953 }
2954
2955 static const MCPhysReg Registers[6][8] = {
2956 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2957 AArch64::W5, AArch64::W6, AArch64::W7 },
2958 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2959 AArch64::X5, AArch64::X6, AArch64::X7 },
2960 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2961 AArch64::H5, AArch64::H6, AArch64::H7 },
2962 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2963 AArch64::S5, AArch64::S6, AArch64::S7 },
2964 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2965 AArch64::D5, AArch64::D6, AArch64::D7 },
2966 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2967 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2968 };
2969
2970 unsigned GPRIdx = 0;
2971 unsigned FPRIdx = 0;
2972 for (auto const &Arg : F->args()) {
2973 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2974 unsigned SrcReg;
2975 const TargetRegisterClass *RC;
2976 if (VT >= MVT::i1 && VT <= MVT::i32) {
2977 SrcReg = Registers[0][GPRIdx++];
2978 RC = &AArch64::GPR32RegClass;
2979 VT = MVT::i32;
2980 } else if (VT == MVT::i64) {
2981 SrcReg = Registers[1][GPRIdx++];
2982 RC = &AArch64::GPR64RegClass;
2983 } else if (VT == MVT::f16 || VT == MVT::bf16) {
2984 SrcReg = Registers[2][FPRIdx++];
2985 RC = &AArch64::FPR16RegClass;
2986 } else if (VT == MVT::f32) {
2987 SrcReg = Registers[3][FPRIdx++];
2988 RC = &AArch64::FPR32RegClass;
2989 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2990 SrcReg = Registers[4][FPRIdx++];
2991 RC = &AArch64::FPR64RegClass;
2992 } else if (VT.is128BitVector()) {
2993 SrcReg = Registers[5][FPRIdx++];
2994 RC = &AArch64::FPR128RegClass;
2995 } else
2996 llvm_unreachable("Unexpected value type.");
2997
2998 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2999 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3000 // Without this, EmitLiveInCopies may eliminate the livein if its only
3001 // use is a bitcast (which isn't turned into an instruction).
3002 Register ResultReg = createResultReg(RC);
3003 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3004 TII.get(TargetOpcode::COPY), ResultReg)
3005 .addReg(DstReg, getKillRegState(true));
3006 updateValueMap(&Arg, ResultReg);
3007 }
3008 return true;
3009}
3010
3011bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3012 SmallVectorImpl<MVT> &OutVTs,
3013 SmallVectorImpl<Type *> &OrigTys,
3014 unsigned &NumBytes) {
3015 CallingConv::ID CC = CLI.CallConv;
3017 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3018 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, OrigTys,
3019 CCAssignFnForCall(CC));
3020
3021 // Get a count of how many bytes are to be pushed on the stack.
3022 NumBytes = CCInfo.getStackSize();
3023
3024 // Issue CALLSEQ_START
3025 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3026 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3027 .addImm(NumBytes).addImm(0);
3028
3029 // Process the args.
3030 for (CCValAssign &VA : ArgLocs) {
3031 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3032 MVT ArgVT = OutVTs[VA.getValNo()];
3033
3034 Register ArgReg = getRegForValue(ArgVal);
3035 if (!ArgReg)
3036 return false;
3037
3038 // Handle arg promotion: SExt, ZExt, AExt.
3039 switch (VA.getLocInfo()) {
3040 case CCValAssign::Full:
3041 break;
3042 case CCValAssign::SExt: {
3043 MVT DestVT = VA.getLocVT();
3044 MVT SrcVT = ArgVT;
3045 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3046 if (!ArgReg)
3047 return false;
3048 break;
3049 }
3050 case CCValAssign::AExt:
3051 // Intentional fall-through.
3052 case CCValAssign::ZExt: {
3053 MVT DestVT = VA.getLocVT();
3054 MVT SrcVT = ArgVT;
3055 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3056 if (!ArgReg)
3057 return false;
3058 break;
3059 }
3060 default:
3061 llvm_unreachable("Unknown arg promotion!");
3062 }
3063
3064 // Now copy/store arg to correct locations.
3065 if (VA.isRegLoc() && !VA.needsCustom()) {
3066 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3067 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3068 CLI.OutRegs.push_back(VA.getLocReg());
3069 } else if (VA.needsCustom()) {
3070 // FIXME: Handle custom args.
3071 return false;
3072 } else {
3073 assert(VA.isMemLoc() && "Assuming store on stack.");
3074
3075 // Don't emit stores for undef values.
3076 if (isa<UndefValue>(ArgVal))
3077 continue;
3078
3079 // Need to store on the stack.
3080 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3081
3082 unsigned BEAlign = 0;
3083 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3084 BEAlign = 8 - ArgSize;
3085
3086 Address Addr;
3087 Addr.setKind(Address::RegBase);
3088 Addr.setReg(AArch64::SP);
3089 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3090
3091 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3092 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3093 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3094 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3095
3096 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3097 return false;
3098 }
3099 }
3100 return true;
3101}
3102
3103bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3104 CallingConv::ID CC = CLI.CallConv;
3105
3106 // Issue CALLSEQ_END
3107 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3108 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3109 .addImm(NumBytes).addImm(0);
3110
3111 // Now the return values.
3113 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3114 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3115
3116 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3117 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3118 CCValAssign &VA = RVLocs[i];
3119 MVT CopyVT = VA.getValVT();
3120 Register CopyReg = ResultReg + i;
3121
3122 // TODO: Handle big-endian results
3123 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3124 return false;
3125
3126 // Copy result out of their specified physreg.
3127 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3128 CopyReg)
3129 .addReg(VA.getLocReg());
3130 CLI.InRegs.push_back(VA.getLocReg());
3131 }
3132
3133 CLI.ResultReg = ResultReg;
3134 CLI.NumResultRegs = RVLocs.size();
3135
3136 return true;
3137}
3138
3139bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3140 CallingConv::ID CC = CLI.CallConv;
3141 bool IsTailCall = CLI.IsTailCall;
3142 bool IsVarArg = CLI.IsVarArg;
3143 const Value *Callee = CLI.Callee;
3144 MCSymbol *Symbol = CLI.Symbol;
3145
3146 if (!Callee && !Symbol)
3147 return false;
3148
3149 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3150 // a bti instruction following the call.
3151 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3152 !Subtarget->noBTIAtReturnTwice() &&
3153 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3154 return false;
3155
3156 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3157 if (CLI.CB && CLI.CB->isIndirectCall() &&
3158 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3159 return false;
3160
3161 // Allow SelectionDAG isel to handle tail calls.
3162 if (IsTailCall)
3163 return false;
3164
3165 // FIXME: we could and should support this, but for now correctness at -O0 is
3166 // more important.
3167 if (Subtarget->isTargetILP32())
3168 return false;
3169
3170 CodeModel::Model CM = TM.getCodeModel();
3171 // Only support the small-addressing and large code models.
3172 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3173 return false;
3174
3175 // FIXME: Add large code model support for ELF.
3176 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3177 return false;
3178
3179 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3180 // attribute. Check "RtLibUseGOT" instead.
3181 if (MF->getFunction().getParent()->getRtLibUseGOT())
3182 return false;
3183
3184 // Let SDISel handle vararg functions.
3185 if (IsVarArg)
3186 return false;
3187
3188 if (Subtarget->isWindowsArm64EC())
3189 return false;
3190
3191 for (auto Flag : CLI.OutFlags)
3192 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3193 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3194 return false;
3195
3196 // Set up the argument vectors.
3197 SmallVector<MVT, 16> OutVTs;
3199 OutVTs.reserve(CLI.OutVals.size());
3200
3201 for (auto *Val : CLI.OutVals) {
3202 MVT VT;
3203 if (!isTypeLegal(Val->getType(), VT) &&
3204 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3205 return false;
3206
3207 // We don't handle vector parameters yet.
3208 if (VT.isVector() || VT.getSizeInBits() > 64)
3209 return false;
3210
3211 OutVTs.push_back(VT);
3212 OrigTys.push_back(Val->getType());
3213 }
3214
3215 Address Addr;
3216 if (Callee && !computeCallAddress(Callee, Addr))
3217 return false;
3218
3219 // The weak function target may be zero; in that case we must use indirect
3220 // addressing via a stub on windows as it may be out of range for a
3221 // PC-relative jump.
3222 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3223 Addr.getGlobalValue()->hasExternalWeakLinkage())
3224 return false;
3225
3226 // Handle the arguments now that we've gotten them.
3227 unsigned NumBytes;
3228 if (!processCallArgs(CLI, OutVTs, OrigTys, NumBytes))
3229 return false;
3230
3231 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3232 if (RegInfo->isAnyArgRegReserved(*MF))
3233 RegInfo->emitReservedArgRegCallError(*MF);
3234
3235 // Issue the call.
3236 MachineInstrBuilder MIB;
3237 if (Subtarget->useSmallAddressing()) {
3238 const MCInstrDesc &II =
3239 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3240 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3241 if (Symbol)
3242 MIB.addSym(Symbol, 0);
3243 else if (Addr.getGlobalValue())
3244 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3245 else if (Addr.getReg()) {
3246 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3247 MIB.addReg(Reg);
3248 } else
3249 return false;
3250 } else {
3251 Register CallReg;
3252 if (Symbol) {
3253 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3254 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3255 ADRPReg)
3257
3258 CallReg = createResultReg(&AArch64::GPR64RegClass);
3259 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3260 TII.get(AArch64::LDRXui), CallReg)
3261 .addReg(ADRPReg)
3262 .addSym(Symbol,
3264 } else if (Addr.getGlobalValue())
3265 CallReg = materializeGV(Addr.getGlobalValue());
3266 else if (Addr.getReg())
3267 CallReg = Addr.getReg();
3268
3269 if (!CallReg)
3270 return false;
3271
3272 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3273 CallReg = constrainOperandRegClass(II, CallReg, 0);
3274 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3275 }
3276
3277 // Add implicit physical register uses to the call.
3278 for (auto Reg : CLI.OutRegs)
3280
3281 // Add a register mask with the call-preserved registers.
3282 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3283 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3284
3285 CLI.Call = MIB;
3286
3287 // Finish off the call including any return values.
3288 return finishCall(CLI, NumBytes);
3289}
3290
3291bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3292 if (Alignment)
3293 return Len / Alignment->value() <= 4;
3294 else
3295 return Len < 32;
3296}
3297
3298bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3299 uint64_t Len, MaybeAlign Alignment) {
3300 // Make sure we don't bloat code by inlining very large memcpy's.
3301 if (!isMemCpySmall(Len, Alignment))
3302 return false;
3303
3304 int64_t UnscaledOffset = 0;
3305 Address OrigDest = Dest;
3306 Address OrigSrc = Src;
3307
3308 while (Len) {
3309 MVT VT;
3310 if (!Alignment || *Alignment >= 8) {
3311 if (Len >= 8)
3312 VT = MVT::i64;
3313 else if (Len >= 4)
3314 VT = MVT::i32;
3315 else if (Len >= 2)
3316 VT = MVT::i16;
3317 else {
3318 VT = MVT::i8;
3319 }
3320 } else {
3321 assert(Alignment && "Alignment is set in this branch");
3322 // Bound based on alignment.
3323 if (Len >= 4 && *Alignment == 4)
3324 VT = MVT::i32;
3325 else if (Len >= 2 && *Alignment == 2)
3326 VT = MVT::i16;
3327 else {
3328 VT = MVT::i8;
3329 }
3330 }
3331
3332 Register ResultReg = emitLoad(VT, VT, Src);
3333 if (!ResultReg)
3334 return false;
3335
3336 if (!emitStore(VT, ResultReg, Dest))
3337 return false;
3338
3339 int64_t Size = VT.getSizeInBits() / 8;
3340 Len -= Size;
3341 UnscaledOffset += Size;
3342
3343 // We need to recompute the unscaled offset for each iteration.
3344 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3345 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3346 }
3347
3348 return true;
3349}
3350
3351/// Check if it is possible to fold the condition from the XALU intrinsic
3352/// into the user. The condition code will only be updated on success.
3353bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3354 const Instruction *I,
3355 const Value *Cond) {
3357 return false;
3358
3359 const auto *EV = cast<ExtractValueInst>(Cond);
3360 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3361 return false;
3362
3363 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3364 MVT RetVT;
3365 const Function *Callee = II->getCalledFunction();
3366 Type *RetTy =
3367 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3368 if (!isTypeLegal(RetTy, RetVT))
3369 return false;
3370
3371 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3372 return false;
3373
3374 const Value *LHS = II->getArgOperand(0);
3375 const Value *RHS = II->getArgOperand(1);
3376
3377 // Canonicalize immediate to the RHS.
3378 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3379 std::swap(LHS, RHS);
3380
3381 // Simplify multiplies.
3382 Intrinsic::ID IID = II->getIntrinsicID();
3383 switch (IID) {
3384 default:
3385 break;
3386 case Intrinsic::smul_with_overflow:
3387 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3388 if (C->getValue() == 2)
3389 IID = Intrinsic::sadd_with_overflow;
3390 break;
3391 case Intrinsic::umul_with_overflow:
3392 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3393 if (C->getValue() == 2)
3394 IID = Intrinsic::uadd_with_overflow;
3395 break;
3396 }
3397
3398 AArch64CC::CondCode TmpCC;
3399 switch (IID) {
3400 default:
3401 return false;
3402 case Intrinsic::sadd_with_overflow:
3403 case Intrinsic::ssub_with_overflow:
3404 TmpCC = AArch64CC::VS;
3405 break;
3406 case Intrinsic::uadd_with_overflow:
3407 TmpCC = AArch64CC::HS;
3408 break;
3409 case Intrinsic::usub_with_overflow:
3410 TmpCC = AArch64CC::LO;
3411 break;
3412 case Intrinsic::smul_with_overflow:
3413 case Intrinsic::umul_with_overflow:
3414 TmpCC = AArch64CC::NE;
3415 break;
3416 }
3417
3418 // Check if both instructions are in the same basic block.
3419 if (!isValueAvailable(II))
3420 return false;
3421
3422 // Make sure nothing is in the way
3425 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3426 // We only expect extractvalue instructions between the intrinsic and the
3427 // instruction to be selected.
3428 if (!isa<ExtractValueInst>(Itr))
3429 return false;
3430
3431 // Check that the extractvalue operand comes from the intrinsic.
3432 const auto *EVI = cast<ExtractValueInst>(Itr);
3433 if (EVI->getAggregateOperand() != II)
3434 return false;
3435 }
3436
3437 CC = TmpCC;
3438 return true;
3439}
3440
3441bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3442 // FIXME: Handle more intrinsics.
3443 switch (II->getIntrinsicID()) {
3444 default: return false;
3445 case Intrinsic::frameaddress: {
3446 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3447 MFI.setFrameAddressIsTaken(true);
3448
3449 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3450 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3451 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3452 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3453 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3454 // Recursively load frame address
3455 // ldr x0, [fp]
3456 // ldr x0, [x0]
3457 // ldr x0, [x0]
3458 // ...
3459 Register DestReg;
3460 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3461 while (Depth--) {
3462 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3463 SrcReg, 0);
3464 assert(DestReg && "Unexpected LDR instruction emission failure.");
3465 SrcReg = DestReg;
3466 }
3467
3468 updateValueMap(II, SrcReg);
3469 return true;
3470 }
3471 case Intrinsic::sponentry: {
3472 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3473
3474 // SP = FP + Fixed Object + 16
3475 int FI = MFI.CreateFixedObject(4, 0, false);
3476 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3477 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3478 TII.get(AArch64::ADDXri), ResultReg)
3479 .addFrameIndex(FI)
3480 .addImm(0)
3481 .addImm(0);
3482
3483 updateValueMap(II, ResultReg);
3484 return true;
3485 }
3486 case Intrinsic::memcpy:
3487 case Intrinsic::memmove: {
3488 const auto *MTI = cast<MemTransferInst>(II);
3489 // Don't handle volatile.
3490 if (MTI->isVolatile())
3491 return false;
3492
3493 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3494 // we would emit dead code because we don't currently handle memmoves.
3495 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3496 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3497 // Small memcpy's are common enough that we want to do them without a call
3498 // if possible.
3499 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3500 MaybeAlign Alignment;
3501 if (MTI->getDestAlign() || MTI->getSourceAlign())
3502 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3503 MTI->getSourceAlign().valueOrOne());
3504 if (isMemCpySmall(Len, Alignment)) {
3505 Address Dest, Src;
3506 if (!computeAddress(MTI->getRawDest(), Dest) ||
3507 !computeAddress(MTI->getRawSource(), Src))
3508 return false;
3509 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3510 return true;
3511 }
3512 }
3513
3514 if (!MTI->getLength()->getType()->isIntegerTy(64))
3515 return false;
3516
3517 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3518 // Fast instruction selection doesn't support the special
3519 // address spaces.
3520 return false;
3521
3522 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3523 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3524 }
3525 case Intrinsic::memset: {
3526 const MemSetInst *MSI = cast<MemSetInst>(II);
3527 // Don't handle volatile.
3528 if (MSI->isVolatile())
3529 return false;
3530
3531 if (!MSI->getLength()->getType()->isIntegerTy(64))
3532 return false;
3533
3534 if (MSI->getDestAddressSpace() > 255)
3535 // Fast instruction selection doesn't support the special
3536 // address spaces.
3537 return false;
3538
3539 return lowerCallTo(II, "memset", II->arg_size() - 1);
3540 }
3541 case Intrinsic::sin:
3542 case Intrinsic::cos:
3543 case Intrinsic::tan:
3544 case Intrinsic::pow: {
3545 MVT RetVT;
3546 if (!isTypeLegal(II->getType(), RetVT))
3547 return false;
3548
3549 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3550 return false;
3551
3552 static const RTLIB::Libcall LibCallTable[4][2] = {
3553 {RTLIB::SIN_F32, RTLIB::SIN_F64},
3554 {RTLIB::COS_F32, RTLIB::COS_F64},
3555 {RTLIB::TAN_F32, RTLIB::TAN_F64},
3556 {RTLIB::POW_F32, RTLIB::POW_F64}};
3557 RTLIB::Libcall LC;
3558 bool Is64Bit = RetVT == MVT::f64;
3559 switch (II->getIntrinsicID()) {
3560 default:
3561 llvm_unreachable("Unexpected intrinsic.");
3562 case Intrinsic::sin:
3563 LC = LibCallTable[0][Is64Bit];
3564 break;
3565 case Intrinsic::cos:
3566 LC = LibCallTable[1][Is64Bit];
3567 break;
3568 case Intrinsic::tan:
3569 LC = LibCallTable[2][Is64Bit];
3570 break;
3571 case Intrinsic::pow:
3572 LC = LibCallTable[3][Is64Bit];
3573 break;
3574 }
3575
3576 ArgListTy Args;
3577 Args.reserve(II->arg_size());
3578
3579 // Populate the argument list.
3580 for (auto &Arg : II->args())
3581 Args.emplace_back(Arg);
3582
3583 CallLoweringInfo CLI;
3584 MCContext &Ctx = MF->getContext();
3585
3586 RTLIB::LibcallImpl LCImpl = LibcallLowering->getLibcallImpl(LC);
3587 if (LCImpl == RTLIB::Unsupported)
3588 return false;
3589
3590 CallingConv::ID CC = LibcallLowering->getLibcallImplCallingConv(LCImpl);
3591 StringRef FuncName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LCImpl);
3592 CLI.setCallee(DL, Ctx, CC, II->getType(), FuncName, std::move(Args));
3593 if (!lowerCallTo(CLI))
3594 return false;
3595 updateValueMap(II, CLI.ResultReg);
3596 return true;
3597 }
3598 case Intrinsic::fabs: {
3599 MVT VT;
3600 if (!isTypeLegal(II->getType(), VT))
3601 return false;
3602
3603 unsigned Opc;
3604 switch (VT.SimpleTy) {
3605 default:
3606 return false;
3607 case MVT::f32:
3608 Opc = AArch64::FABSSr;
3609 break;
3610 case MVT::f64:
3611 Opc = AArch64::FABSDr;
3612 break;
3613 }
3614 Register SrcReg = getRegForValue(II->getOperand(0));
3615 if (!SrcReg)
3616 return false;
3617 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3618 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3619 .addReg(SrcReg);
3620 updateValueMap(II, ResultReg);
3621 return true;
3622 }
3623 case Intrinsic::trap:
3624 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3625 .addImm(1);
3626 return true;
3627 case Intrinsic::debugtrap:
3628 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3629 .addImm(0xF000);
3630 return true;
3631
3632 case Intrinsic::sqrt: {
3633 Type *RetTy = II->getCalledFunction()->getReturnType();
3634
3635 MVT VT;
3636 if (!isTypeLegal(RetTy, VT))
3637 return false;
3638
3639 Register Op0Reg = getRegForValue(II->getOperand(0));
3640 if (!Op0Reg)
3641 return false;
3642
3643 Register ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3644 if (!ResultReg)
3645 return false;
3646
3647 updateValueMap(II, ResultReg);
3648 return true;
3649 }
3650 case Intrinsic::sadd_with_overflow:
3651 case Intrinsic::uadd_with_overflow:
3652 case Intrinsic::ssub_with_overflow:
3653 case Intrinsic::usub_with_overflow:
3654 case Intrinsic::smul_with_overflow:
3655 case Intrinsic::umul_with_overflow: {
3656 // This implements the basic lowering of the xalu with overflow intrinsics.
3657 const Function *Callee = II->getCalledFunction();
3658 auto *Ty = cast<StructType>(Callee->getReturnType());
3659 Type *RetTy = Ty->getTypeAtIndex(0U);
3660
3661 MVT VT;
3662 if (!isTypeLegal(RetTy, VT))
3663 return false;
3664
3665 if (VT != MVT::i32 && VT != MVT::i64)
3666 return false;
3667
3668 const Value *LHS = II->getArgOperand(0);
3669 const Value *RHS = II->getArgOperand(1);
3670 // Canonicalize immediate to the RHS.
3671 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3672 std::swap(LHS, RHS);
3673
3674 // Simplify multiplies.
3675 Intrinsic::ID IID = II->getIntrinsicID();
3676 switch (IID) {
3677 default:
3678 break;
3679 case Intrinsic::smul_with_overflow:
3680 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3681 if (C->getValue() == 2) {
3682 IID = Intrinsic::sadd_with_overflow;
3683 RHS = LHS;
3684 }
3685 break;
3686 case Intrinsic::umul_with_overflow:
3687 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3688 if (C->getValue() == 2) {
3689 IID = Intrinsic::uadd_with_overflow;
3690 RHS = LHS;
3691 }
3692 break;
3693 }
3694
3695 Register ResultReg1, ResultReg2, MulReg;
3697 switch (IID) {
3698 default: llvm_unreachable("Unexpected intrinsic!");
3699 case Intrinsic::sadd_with_overflow:
3700 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3701 CC = AArch64CC::VS;
3702 break;
3703 case Intrinsic::uadd_with_overflow:
3704 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3705 CC = AArch64CC::HS;
3706 break;
3707 case Intrinsic::ssub_with_overflow:
3708 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3709 CC = AArch64CC::VS;
3710 break;
3711 case Intrinsic::usub_with_overflow:
3712 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3713 CC = AArch64CC::LO;
3714 break;
3715 case Intrinsic::smul_with_overflow: {
3716 CC = AArch64CC::NE;
3717 Register LHSReg = getRegForValue(LHS);
3718 if (!LHSReg)
3719 return false;
3720
3721 Register RHSReg = getRegForValue(RHS);
3722 if (!RHSReg)
3723 return false;
3724
3725 if (VT == MVT::i32) {
3726 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3727 Register MulSubReg =
3728 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3729 // cmp xreg, wreg, sxtw
3730 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3731 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3732 /*WantResult=*/false);
3733 MulReg = MulSubReg;
3734 } else {
3735 assert(VT == MVT::i64 && "Unexpected value type.");
3736 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3737 // reused in the next instruction.
3738 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3739 Register SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3740 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3741 /*WantResult=*/false);
3742 }
3743 break;
3744 }
3745 case Intrinsic::umul_with_overflow: {
3746 CC = AArch64CC::NE;
3747 Register LHSReg = getRegForValue(LHS);
3748 if (!LHSReg)
3749 return false;
3750
3751 Register RHSReg = getRegForValue(RHS);
3752 if (!RHSReg)
3753 return false;
3754
3755 if (VT == MVT::i32) {
3756 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3757 // tst xreg, #0xffffffff00000000
3758 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3759 TII.get(AArch64::ANDSXri), AArch64::XZR)
3760 .addReg(MulReg)
3761 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3762 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3763 } else {
3764 assert(VT == MVT::i64 && "Unexpected value type.");
3765 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3766 // reused in the next instruction.
3767 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3768 Register UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3769 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3770 }
3771 break;
3772 }
3773 }
3774
3775 if (MulReg) {
3776 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3777 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3778 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3779 }
3780
3781 if (!ResultReg1)
3782 return false;
3783
3784 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3785 AArch64::WZR, AArch64::WZR,
3786 getInvertedCondCode(CC));
3787 (void)ResultReg2;
3788 assert((ResultReg1 + 1) == ResultReg2 &&
3789 "Nonconsecutive result registers.");
3790 updateValueMap(II, ResultReg1, 2);
3791 return true;
3792 }
3793 case Intrinsic::aarch64_crc32b:
3794 case Intrinsic::aarch64_crc32h:
3795 case Intrinsic::aarch64_crc32w:
3796 case Intrinsic::aarch64_crc32x:
3797 case Intrinsic::aarch64_crc32cb:
3798 case Intrinsic::aarch64_crc32ch:
3799 case Intrinsic::aarch64_crc32cw:
3800 case Intrinsic::aarch64_crc32cx: {
3801 if (!Subtarget->hasCRC())
3802 return false;
3803
3804 unsigned Opc;
3805 switch (II->getIntrinsicID()) {
3806 default:
3807 llvm_unreachable("Unexpected intrinsic!");
3808 case Intrinsic::aarch64_crc32b:
3809 Opc = AArch64::CRC32Brr;
3810 break;
3811 case Intrinsic::aarch64_crc32h:
3812 Opc = AArch64::CRC32Hrr;
3813 break;
3814 case Intrinsic::aarch64_crc32w:
3815 Opc = AArch64::CRC32Wrr;
3816 break;
3817 case Intrinsic::aarch64_crc32x:
3818 Opc = AArch64::CRC32Xrr;
3819 break;
3820 case Intrinsic::aarch64_crc32cb:
3821 Opc = AArch64::CRC32CBrr;
3822 break;
3823 case Intrinsic::aarch64_crc32ch:
3824 Opc = AArch64::CRC32CHrr;
3825 break;
3826 case Intrinsic::aarch64_crc32cw:
3827 Opc = AArch64::CRC32CWrr;
3828 break;
3829 case Intrinsic::aarch64_crc32cx:
3830 Opc = AArch64::CRC32CXrr;
3831 break;
3832 }
3833
3834 Register LHSReg = getRegForValue(II->getArgOperand(0));
3835 Register RHSReg = getRegForValue(II->getArgOperand(1));
3836 if (!LHSReg || !RHSReg)
3837 return false;
3838
3839 Register ResultReg =
3840 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3841 updateValueMap(II, ResultReg);
3842 return true;
3843 }
3844 }
3845 return false;
3846}
3847
3848bool AArch64FastISel::selectRet(const Instruction *I) {
3849 const ReturnInst *Ret = cast<ReturnInst>(I);
3850 const Function &F = *I->getParent()->getParent();
3851
3852 if (!FuncInfo.CanLowerReturn)
3853 return false;
3854
3855 if (F.isVarArg())
3856 return false;
3857
3858 if (TLI.supportSwiftError() &&
3859 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3860 return false;
3861
3862 if (TLI.supportSplitCSR(FuncInfo.MF))
3863 return false;
3864
3865 // Build a list of return value registers.
3867
3868 if (Ret->getNumOperands() > 0) {
3869 CallingConv::ID CC = F.getCallingConv();
3871 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3872
3873 // Analyze operands of the call, assigning locations to each operand.
3875 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3876 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3877
3878 // Only handle a single return value for now.
3879 if (ValLocs.size() != 1)
3880 return false;
3881
3882 CCValAssign &VA = ValLocs[0];
3883 const Value *RV = Ret->getOperand(0);
3884
3885 // Don't bother handling odd stuff for now.
3886 if ((VA.getLocInfo() != CCValAssign::Full) &&
3887 (VA.getLocInfo() != CCValAssign::BCvt))
3888 return false;
3889
3890 // Only handle register returns for now.
3891 if (!VA.isRegLoc())
3892 return false;
3893
3894 Register Reg = getRegForValue(RV);
3895 if (!Reg)
3896 return false;
3897
3898 Register SrcReg = Reg + VA.getValNo();
3899 Register DestReg = VA.getLocReg();
3900 // Avoid a cross-class copy. This is very unlikely.
3901 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3902 return false;
3903
3904 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3905 if (!RVEVT.isSimple())
3906 return false;
3907
3908 // Vectors (of > 1 lane) in big endian need tricky handling.
3909 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3910 !Subtarget->isLittleEndian())
3911 return false;
3912
3913 MVT RVVT = RVEVT.getSimpleVT();
3914 if (RVVT == MVT::f128)
3915 return false;
3916
3917 MVT DestVT = VA.getValVT();
3918 // Special handling for extended integers.
3919 if (RVVT != DestVT) {
3920 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3921 return false;
3922
3923 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3924 return false;
3925
3926 bool IsZExt = Outs[0].Flags.isZExt();
3927 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3928 if (!SrcReg)
3929 return false;
3930 }
3931
3932 // "Callee" (i.e. value producer) zero extends pointers at function
3933 // boundary.
3934 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3935 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3936
3937 // Make the copy.
3938 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3939 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3940
3941 // Add register to return instruction.
3942 RetRegs.push_back(VA.getLocReg());
3943 }
3944
3945 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3946 TII.get(AArch64::RET_ReallyLR));
3947 for (Register RetReg : RetRegs)
3948 MIB.addReg(RetReg, RegState::Implicit);
3949 return true;
3950}
3951
3952bool AArch64FastISel::selectTrunc(const Instruction *I) {
3953 Type *DestTy = I->getType();
3954 Value *Op = I->getOperand(0);
3955 Type *SrcTy = Op->getType();
3956
3957 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3958 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3959 if (!SrcEVT.isSimple())
3960 return false;
3961 if (!DestEVT.isSimple())
3962 return false;
3963
3964 MVT SrcVT = SrcEVT.getSimpleVT();
3965 MVT DestVT = DestEVT.getSimpleVT();
3966
3967 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3968 SrcVT != MVT::i8)
3969 return false;
3970 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3971 DestVT != MVT::i1)
3972 return false;
3973
3974 Register SrcReg = getRegForValue(Op);
3975 if (!SrcReg)
3976 return false;
3977
3978 // If we're truncating from i64 to a smaller non-legal type then generate an
3979 // AND. Otherwise, we know the high bits are undefined and a truncate only
3980 // generate a COPY. We cannot mark the source register also as result
3981 // register, because this can incorrectly transfer the kill flag onto the
3982 // source register.
3983 Register ResultReg;
3984 if (SrcVT == MVT::i64) {
3985 uint64_t Mask = 0;
3986 switch (DestVT.SimpleTy) {
3987 default:
3988 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3989 return false;
3990 case MVT::i1:
3991 Mask = 0x1;
3992 break;
3993 case MVT::i8:
3994 Mask = 0xff;
3995 break;
3996 case MVT::i16:
3997 Mask = 0xffff;
3998 break;
3999 }
4000 // Issue an extract_subreg to get the lower 32-bits.
4001 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
4002 AArch64::sub_32);
4003 // Create the AND instruction which performs the actual truncation.
4004 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
4005 assert(ResultReg && "Unexpected AND instruction emission failure.");
4006 } else {
4007 ResultReg = createResultReg(&AArch64::GPR32RegClass);
4008 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4009 TII.get(TargetOpcode::COPY), ResultReg)
4010 .addReg(SrcReg);
4011 }
4012
4013 updateValueMap(I, ResultReg);
4014 return true;
4015}
4016
4017Register AArch64FastISel::emiti1Ext(Register SrcReg, MVT DestVT, bool IsZExt) {
4018 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4019 DestVT == MVT::i64) &&
4020 "Unexpected value type.");
4021 // Handle i8 and i16 as i32.
4022 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4023 DestVT = MVT::i32;
4024
4025 if (IsZExt) {
4026 Register ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4027 assert(ResultReg && "Unexpected AND instruction emission failure.");
4028 if (DestVT == MVT::i64) {
4029 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4030 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4031 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4032 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4033 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4034 .addImm(0)
4035 .addReg(ResultReg)
4036 .addImm(AArch64::sub_32);
4037 ResultReg = Reg64;
4038 }
4039 return ResultReg;
4040 } else {
4041 if (DestVT == MVT::i64) {
4042 // FIXME: We're SExt i1 to i64.
4043 return Register();
4044 }
4045 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4046 0, 0);
4047 }
4048}
4049
4050Register AArch64FastISel::emitMul_rr(MVT RetVT, Register Op0, Register Op1) {
4051 unsigned Opc;
4052 Register ZReg;
4053 switch (RetVT.SimpleTy) {
4054 default:
4055 return Register();
4056 case MVT::i8:
4057 case MVT::i16:
4058 case MVT::i32:
4059 RetVT = MVT::i32;
4060 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4061 case MVT::i64:
4062 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4063 }
4064
4065 const TargetRegisterClass *RC =
4066 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4067 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4068}
4069
4070Register AArch64FastISel::emitSMULL_rr(MVT RetVT, Register Op0, Register Op1) {
4071 if (RetVT != MVT::i64)
4072 return Register();
4073
4074 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4075 Op0, Op1, AArch64::XZR);
4076}
4077
4078Register AArch64FastISel::emitUMULL_rr(MVT RetVT, Register Op0, Register Op1) {
4079 if (RetVT != MVT::i64)
4080 return Register();
4081
4082 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4083 Op0, Op1, AArch64::XZR);
4084}
4085
4086Register AArch64FastISel::emitLSL_rr(MVT RetVT, Register Op0Reg,
4087 Register Op1Reg) {
4088 unsigned Opc = 0;
4089 bool NeedTrunc = false;
4090 uint64_t Mask = 0;
4091 switch (RetVT.SimpleTy) {
4092 default:
4093 return Register();
4094 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4095 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4096 case MVT::i32: Opc = AArch64::LSLVWr; break;
4097 case MVT::i64: Opc = AArch64::LSLVXr; break;
4098 }
4099
4100 const TargetRegisterClass *RC =
4101 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4102 if (NeedTrunc)
4103 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4104
4105 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4106 if (NeedTrunc)
4107 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4108 return ResultReg;
4109}
4110
4111Register AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0,
4112 uint64_t Shift, bool IsZExt) {
4113 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4114 "Unexpected source/return type pair.");
4115 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4116 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4117 "Unexpected source value type.");
4118 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4119 RetVT == MVT::i64) && "Unexpected return value type.");
4120
4121 bool Is64Bit = (RetVT == MVT::i64);
4122 unsigned RegSize = Is64Bit ? 64 : 32;
4123 unsigned DstBits = RetVT.getSizeInBits();
4124 unsigned SrcBits = SrcVT.getSizeInBits();
4125 const TargetRegisterClass *RC =
4126 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4127
4128 // Just emit a copy for "zero" shifts.
4129 if (Shift == 0) {
4130 if (RetVT == SrcVT) {
4131 Register ResultReg = createResultReg(RC);
4132 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4133 TII.get(TargetOpcode::COPY), ResultReg)
4134 .addReg(Op0);
4135 return ResultReg;
4136 } else
4137 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4138 }
4139
4140 // Don't deal with undefined shifts.
4141 if (Shift >= DstBits)
4142 return Register();
4143
4144 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4145 // {S|U}BFM Wd, Wn, #r, #s
4146 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4147
4148 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4149 // %2 = shl i16 %1, 4
4150 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4151 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4152 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4153 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4154
4155 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4156 // %2 = shl i16 %1, 8
4157 // Wd<32+7-24,32-24> = Wn<7:0>
4158 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4159 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4160 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4161
4162 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4163 // %2 = shl i16 %1, 12
4164 // Wd<32+3-20,32-20> = Wn<3:0>
4165 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4166 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4167 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4168
4169 unsigned ImmR = RegSize - Shift;
4170 // Limit the width to the length of the source type.
4171 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4172 static const unsigned OpcTable[2][2] = {
4173 {AArch64::SBFMWri, AArch64::SBFMXri},
4174 {AArch64::UBFMWri, AArch64::UBFMXri}
4175 };
4176 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4177 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4178 Register TmpReg = MRI.createVirtualRegister(RC);
4179 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4180 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4181 .addImm(0)
4182 .addReg(Op0)
4183 .addImm(AArch64::sub_32);
4184 Op0 = TmpReg;
4185 }
4186 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4187}
4188
4189Register AArch64FastISel::emitLSR_rr(MVT RetVT, Register Op0Reg,
4190 Register Op1Reg) {
4191 unsigned Opc = 0;
4192 bool NeedTrunc = false;
4193 uint64_t Mask = 0;
4194 switch (RetVT.SimpleTy) {
4195 default:
4196 return Register();
4197 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4198 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4199 case MVT::i32: Opc = AArch64::LSRVWr; break;
4200 case MVT::i64: Opc = AArch64::LSRVXr; break;
4201 }
4202
4203 const TargetRegisterClass *RC =
4204 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4205 if (NeedTrunc) {
4206 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4207 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4208 }
4209 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4210 if (NeedTrunc)
4211 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4212 return ResultReg;
4213}
4214
4215Register AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0,
4216 uint64_t Shift, bool IsZExt) {
4217 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4218 "Unexpected source/return type pair.");
4219 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4220 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4221 "Unexpected source value type.");
4222 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4223 RetVT == MVT::i64) && "Unexpected return value type.");
4224
4225 bool Is64Bit = (RetVT == MVT::i64);
4226 unsigned RegSize = Is64Bit ? 64 : 32;
4227 unsigned DstBits = RetVT.getSizeInBits();
4228 unsigned SrcBits = SrcVT.getSizeInBits();
4229 const TargetRegisterClass *RC =
4230 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4231
4232 // Just emit a copy for "zero" shifts.
4233 if (Shift == 0) {
4234 if (RetVT == SrcVT) {
4235 Register ResultReg = createResultReg(RC);
4236 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4237 TII.get(TargetOpcode::COPY), ResultReg)
4238 .addReg(Op0);
4239 return ResultReg;
4240 } else
4241 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4242 }
4243
4244 // Don't deal with undefined shifts.
4245 if (Shift >= DstBits)
4246 return Register();
4247
4248 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4249 // {S|U}BFM Wd, Wn, #r, #s
4250 // Wd<s-r:0> = Wn<s:r> when r <= s
4251
4252 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4253 // %2 = lshr i16 %1, 4
4254 // Wd<7-4:0> = Wn<7:4>
4255 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4256 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4257 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4258
4259 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4260 // %2 = lshr i16 %1, 8
4261 // Wd<7-7,0> = Wn<7:7>
4262 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4263 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4264 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4265
4266 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4267 // %2 = lshr i16 %1, 12
4268 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4269 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4270 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4271 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4272
4273 if (Shift >= SrcBits && IsZExt)
4274 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4275
4276 // It is not possible to fold a sign-extend into the LShr instruction. In this
4277 // case emit a sign-extend.
4278 if (!IsZExt) {
4279 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4280 if (!Op0)
4281 return Register();
4282 SrcVT = RetVT;
4283 SrcBits = SrcVT.getSizeInBits();
4284 IsZExt = true;
4285 }
4286
4287 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4288 unsigned ImmS = SrcBits - 1;
4289 static const unsigned OpcTable[2][2] = {
4290 {AArch64::SBFMWri, AArch64::SBFMXri},
4291 {AArch64::UBFMWri, AArch64::UBFMXri}
4292 };
4293 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4294 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4295 Register TmpReg = MRI.createVirtualRegister(RC);
4296 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4297 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4298 .addImm(0)
4299 .addReg(Op0)
4300 .addImm(AArch64::sub_32);
4301 Op0 = TmpReg;
4302 }
4303 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4304}
4305
4306Register AArch64FastISel::emitASR_rr(MVT RetVT, Register Op0Reg,
4307 Register Op1Reg) {
4308 unsigned Opc = 0;
4309 bool NeedTrunc = false;
4310 uint64_t Mask = 0;
4311 switch (RetVT.SimpleTy) {
4312 default:
4313 return Register();
4314 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4315 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4316 case MVT::i32: Opc = AArch64::ASRVWr; break;
4317 case MVT::i64: Opc = AArch64::ASRVXr; break;
4318 }
4319
4320 const TargetRegisterClass *RC =
4321 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4322 if (NeedTrunc) {
4323 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4324 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4325 }
4326 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4327 if (NeedTrunc)
4328 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4329 return ResultReg;
4330}
4331
4332Register AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0,
4333 uint64_t Shift, bool IsZExt) {
4334 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4335 "Unexpected source/return type pair.");
4336 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4337 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4338 "Unexpected source value type.");
4339 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4340 RetVT == MVT::i64) && "Unexpected return value type.");
4341
4342 bool Is64Bit = (RetVT == MVT::i64);
4343 unsigned RegSize = Is64Bit ? 64 : 32;
4344 unsigned DstBits = RetVT.getSizeInBits();
4345 unsigned SrcBits = SrcVT.getSizeInBits();
4346 const TargetRegisterClass *RC =
4347 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4348
4349 // Just emit a copy for "zero" shifts.
4350 if (Shift == 0) {
4351 if (RetVT == SrcVT) {
4352 Register ResultReg = createResultReg(RC);
4353 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4354 TII.get(TargetOpcode::COPY), ResultReg)
4355 .addReg(Op0);
4356 return ResultReg;
4357 } else
4358 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4359 }
4360
4361 // Don't deal with undefined shifts.
4362 if (Shift >= DstBits)
4363 return Register();
4364
4365 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4366 // {S|U}BFM Wd, Wn, #r, #s
4367 // Wd<s-r:0> = Wn<s:r> when r <= s
4368
4369 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4370 // %2 = ashr i16 %1, 4
4371 // Wd<7-4:0> = Wn<7:4>
4372 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4373 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4374 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4375
4376 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4377 // %2 = ashr i16 %1, 8
4378 // Wd<7-7,0> = Wn<7:7>
4379 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4380 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4381 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4382
4383 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4384 // %2 = ashr i16 %1, 12
4385 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4386 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4387 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4388 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4389
4390 if (Shift >= SrcBits && IsZExt)
4391 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4392
4393 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4394 unsigned ImmS = SrcBits - 1;
4395 static const unsigned OpcTable[2][2] = {
4396 {AArch64::SBFMWri, AArch64::SBFMXri},
4397 {AArch64::UBFMWri, AArch64::UBFMXri}
4398 };
4399 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4400 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4401 Register TmpReg = MRI.createVirtualRegister(RC);
4402 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4403 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4404 .addImm(0)
4405 .addReg(Op0)
4406 .addImm(AArch64::sub_32);
4407 Op0 = TmpReg;
4408 }
4409 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4410}
4411
4412Register AArch64FastISel::emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
4413 bool IsZExt) {
4414 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4415
4416 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4417 // DestVT are odd things, so test to make sure that they are both types we can
4418 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4419 // bail out to SelectionDAG.
4420 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4421 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4422 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4423 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4424 return Register();
4425
4426 unsigned Opc;
4427 unsigned Imm = 0;
4428
4429 switch (SrcVT.SimpleTy) {
4430 default:
4431 return Register();
4432 case MVT::i1:
4433 return emiti1Ext(SrcReg, DestVT, IsZExt);
4434 case MVT::i8:
4435 if (DestVT == MVT::i64)
4436 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4437 else
4438 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4439 Imm = 7;
4440 break;
4441 case MVT::i16:
4442 if (DestVT == MVT::i64)
4443 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4444 else
4445 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4446 Imm = 15;
4447 break;
4448 case MVT::i32:
4449 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4450 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4451 Imm = 31;
4452 break;
4453 }
4454
4455 // Handle i8 and i16 as i32.
4456 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4457 DestVT = MVT::i32;
4458 else if (DestVT == MVT::i64) {
4459 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4460 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4461 TII.get(AArch64::SUBREG_TO_REG), Src64)
4462 .addImm(0)
4463 .addReg(SrcReg)
4464 .addImm(AArch64::sub_32);
4465 SrcReg = Src64;
4466 }
4467
4468 const TargetRegisterClass *RC =
4469 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4470 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4471}
4472
4473static bool isZExtLoad(const MachineInstr *LI) {
4474 switch (LI->getOpcode()) {
4475 default:
4476 return false;
4477 case AArch64::LDURBBi:
4478 case AArch64::LDURHHi:
4479 case AArch64::LDURWi:
4480 case AArch64::LDRBBui:
4481 case AArch64::LDRHHui:
4482 case AArch64::LDRWui:
4483 case AArch64::LDRBBroX:
4484 case AArch64::LDRHHroX:
4485 case AArch64::LDRWroX:
4486 case AArch64::LDRBBroW:
4487 case AArch64::LDRHHroW:
4488 case AArch64::LDRWroW:
4489 return true;
4490 }
4491}
4492
4493static bool isSExtLoad(const MachineInstr *LI) {
4494 switch (LI->getOpcode()) {
4495 default:
4496 return false;
4497 case AArch64::LDURSBWi:
4498 case AArch64::LDURSHWi:
4499 case AArch64::LDURSBXi:
4500 case AArch64::LDURSHXi:
4501 case AArch64::LDURSWi:
4502 case AArch64::LDRSBWui:
4503 case AArch64::LDRSHWui:
4504 case AArch64::LDRSBXui:
4505 case AArch64::LDRSHXui:
4506 case AArch64::LDRSWui:
4507 case AArch64::LDRSBWroX:
4508 case AArch64::LDRSHWroX:
4509 case AArch64::LDRSBXroX:
4510 case AArch64::LDRSHXroX:
4511 case AArch64::LDRSWroX:
4512 case AArch64::LDRSBWroW:
4513 case AArch64::LDRSHWroW:
4514 case AArch64::LDRSBXroW:
4515 case AArch64::LDRSHXroW:
4516 case AArch64::LDRSWroW:
4517 return true;
4518 }
4519}
4520
4521bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4522 MVT SrcVT) {
4523 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4524 if (!LI || !LI->hasOneUse())
4525 return false;
4526
4527 // Check if the load instruction has already been selected.
4528 Register Reg = lookUpRegForValue(LI);
4529 if (!Reg)
4530 return false;
4531
4532 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4533 if (!MI)
4534 return false;
4535
4536 // Check if the correct load instruction has been emitted - SelectionDAG might
4537 // have emitted a zero-extending load, but we need a sign-extending load.
4538 bool IsZExt = isa<ZExtInst>(I);
4539 const auto *LoadMI = MI;
4540 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4541 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4542 Register LoadReg = MI->getOperand(1).getReg();
4543 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4544 assert(LoadMI && "Expected valid instruction");
4545 }
4546 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4547 return false;
4548
4549 // Nothing to be done.
4550 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4551 updateValueMap(I, Reg);
4552 return true;
4553 }
4554
4555 if (IsZExt) {
4556 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4557 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4558 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4559 .addImm(0)
4560 .addReg(Reg, getKillRegState(true))
4561 .addImm(AArch64::sub_32);
4562 Reg = Reg64;
4563 } else {
4564 assert((MI->getOpcode() == TargetOpcode::COPY &&
4565 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4566 "Expected copy instruction");
4567 Reg = MI->getOperand(1).getReg();
4569 removeDeadCode(I, std::next(I));
4570 }
4571 updateValueMap(I, Reg);
4572 return true;
4573}
4574
4575bool AArch64FastISel::selectIntExt(const Instruction *I) {
4577 "Unexpected integer extend instruction.");
4578 MVT RetVT;
4579 MVT SrcVT;
4580 if (!isTypeSupported(I->getType(), RetVT))
4581 return false;
4582
4583 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4584 return false;
4585
4586 // Try to optimize already sign-/zero-extended values from load instructions.
4587 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4588 return true;
4589
4590 Register SrcReg = getRegForValue(I->getOperand(0));
4591 if (!SrcReg)
4592 return false;
4593
4594 // Try to optimize already sign-/zero-extended values from function arguments.
4595 bool IsZExt = isa<ZExtInst>(I);
4596 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4597 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4598 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4599 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4600 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4601 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4602 .addImm(0)
4603 .addReg(SrcReg)
4604 .addImm(AArch64::sub_32);
4605 SrcReg = ResultReg;
4606 }
4607
4608 updateValueMap(I, SrcReg);
4609 return true;
4610 }
4611 }
4612
4613 Register ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4614 if (!ResultReg)
4615 return false;
4616
4617 updateValueMap(I, ResultReg);
4618 return true;
4619}
4620
4621bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4622 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4623 if (!DestEVT.isSimple())
4624 return false;
4625
4626 MVT DestVT = DestEVT.getSimpleVT();
4627 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4628 return false;
4629
4630 unsigned DivOpc;
4631 bool Is64bit = (DestVT == MVT::i64);
4632 switch (ISDOpcode) {
4633 default:
4634 return false;
4635 case ISD::SREM:
4636 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4637 break;
4638 case ISD::UREM:
4639 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4640 break;
4641 }
4642 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4643 Register Src0Reg = getRegForValue(I->getOperand(0));
4644 if (!Src0Reg)
4645 return false;
4646
4647 Register Src1Reg = getRegForValue(I->getOperand(1));
4648 if (!Src1Reg)
4649 return false;
4650
4651 const TargetRegisterClass *RC =
4652 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4653 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4654 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4655 // The remainder is computed as numerator - (quotient * denominator) using the
4656 // MSUB instruction.
4657 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4658 updateValueMap(I, ResultReg);
4659 return true;
4660}
4661
4662bool AArch64FastISel::selectMul(const Instruction *I) {
4663 MVT VT;
4664 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4665 return false;
4666
4667 if (VT.isVector())
4668 return selectBinaryOp(I, ISD::MUL);
4669
4670 const Value *Src0 = I->getOperand(0);
4671 const Value *Src1 = I->getOperand(1);
4672 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4673 if (C->getValue().isPowerOf2())
4674 std::swap(Src0, Src1);
4675
4676 // Try to simplify to a shift instruction.
4677 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4678 if (C->getValue().isPowerOf2()) {
4679 uint64_t ShiftVal = C->getValue().logBase2();
4680 MVT SrcVT = VT;
4681 bool IsZExt = true;
4682 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4683 if (!isIntExtFree(ZExt)) {
4684 MVT VT;
4685 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4686 SrcVT = VT;
4687 IsZExt = true;
4688 Src0 = ZExt->getOperand(0);
4689 }
4690 }
4691 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4692 if (!isIntExtFree(SExt)) {
4693 MVT VT;
4694 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4695 SrcVT = VT;
4696 IsZExt = false;
4697 Src0 = SExt->getOperand(0);
4698 }
4699 }
4700 }
4701
4702 Register Src0Reg = getRegForValue(Src0);
4703 if (!Src0Reg)
4704 return false;
4705
4706 Register ResultReg = emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4707
4708 if (ResultReg) {
4709 updateValueMap(I, ResultReg);
4710 return true;
4711 }
4712 }
4713
4714 Register Src0Reg = getRegForValue(I->getOperand(0));
4715 if (!Src0Reg)
4716 return false;
4717
4718 Register Src1Reg = getRegForValue(I->getOperand(1));
4719 if (!Src1Reg)
4720 return false;
4721
4722 Register ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4723
4724 if (!ResultReg)
4725 return false;
4726
4727 updateValueMap(I, ResultReg);
4728 return true;
4729}
4730
4731bool AArch64FastISel::selectShift(const Instruction *I) {
4732 MVT RetVT;
4733 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4734 return false;
4735
4736 if (RetVT.isVector())
4737 return selectOperator(I, I->getOpcode());
4738
4739 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4740 Register ResultReg;
4741 uint64_t ShiftVal = C->getZExtValue();
4742 MVT SrcVT = RetVT;
4743 bool IsZExt = I->getOpcode() != Instruction::AShr;
4744 const Value *Op0 = I->getOperand(0);
4745 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4746 if (!isIntExtFree(ZExt)) {
4747 MVT TmpVT;
4748 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4749 SrcVT = TmpVT;
4750 IsZExt = true;
4751 Op0 = ZExt->getOperand(0);
4752 }
4753 }
4754 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4755 if (!isIntExtFree(SExt)) {
4756 MVT TmpVT;
4757 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4758 SrcVT = TmpVT;
4759 IsZExt = false;
4760 Op0 = SExt->getOperand(0);
4761 }
4762 }
4763 }
4764
4765 Register Op0Reg = getRegForValue(Op0);
4766 if (!Op0Reg)
4767 return false;
4768
4769 switch (I->getOpcode()) {
4770 default: llvm_unreachable("Unexpected instruction.");
4771 case Instruction::Shl:
4772 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4773 break;
4774 case Instruction::AShr:
4775 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4776 break;
4777 case Instruction::LShr:
4778 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4779 break;
4780 }
4781 if (!ResultReg)
4782 return false;
4783
4784 updateValueMap(I, ResultReg);
4785 return true;
4786 }
4787
4788 Register Op0Reg = getRegForValue(I->getOperand(0));
4789 if (!Op0Reg)
4790 return false;
4791
4792 Register Op1Reg = getRegForValue(I->getOperand(1));
4793 if (!Op1Reg)
4794 return false;
4795
4796 Register ResultReg;
4797 switch (I->getOpcode()) {
4798 default: llvm_unreachable("Unexpected instruction.");
4799 case Instruction::Shl:
4800 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4801 break;
4802 case Instruction::AShr:
4803 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4804 break;
4805 case Instruction::LShr:
4806 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4807 break;
4808 }
4809
4810 if (!ResultReg)
4811 return false;
4812
4813 updateValueMap(I, ResultReg);
4814 return true;
4815}
4816
4817bool AArch64FastISel::selectBitCast(const Instruction *I) {
4818 MVT RetVT, SrcVT;
4819
4820 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4821 return false;
4822 if (!isTypeLegal(I->getType(), RetVT))
4823 return false;
4824
4825 unsigned Opc;
4826 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4827 Opc = AArch64::FMOVWSr;
4828 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4829 Opc = AArch64::FMOVXDr;
4830 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4831 Opc = AArch64::FMOVSWr;
4832 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4833 Opc = AArch64::FMOVDXr;
4834 else
4835 return false;
4836
4837 const TargetRegisterClass *RC = nullptr;
4838 switch (RetVT.SimpleTy) {
4839 default: llvm_unreachable("Unexpected value type.");
4840 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4841 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4842 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4843 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4844 }
4845 Register Op0Reg = getRegForValue(I->getOperand(0));
4846 if (!Op0Reg)
4847 return false;
4848
4849 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4850 if (!ResultReg)
4851 return false;
4852
4853 updateValueMap(I, ResultReg);
4854 return true;
4855}
4856
4857bool AArch64FastISel::selectFRem(const Instruction *I) {
4858 MVT RetVT;
4859 if (!isTypeLegal(I->getType(), RetVT))
4860 return false;
4861
4862 RTLIB::LibcallImpl LCImpl =
4863 LibcallLowering->getLibcallImpl(RTLIB::getREM(RetVT));
4864 if (LCImpl == RTLIB::Unsupported)
4865 return false;
4866
4867 ArgListTy Args;
4868 Args.reserve(I->getNumOperands());
4869
4870 // Populate the argument list.
4871 for (auto &Arg : I->operands())
4872 Args.emplace_back(Arg);
4873
4874 CallLoweringInfo CLI;
4875 MCContext &Ctx = MF->getContext();
4876 CallingConv::ID CC = LibcallLowering->getLibcallImplCallingConv(LCImpl);
4877 StringRef FuncName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LCImpl);
4878
4879 CLI.setCallee(DL, Ctx, CC, I->getType(), FuncName, std::move(Args));
4880 if (!lowerCallTo(CLI))
4881 return false;
4882 updateValueMap(I, CLI.ResultReg);
4883 return true;
4884}
4885
4886bool AArch64FastISel::selectSDiv(const Instruction *I) {
4887 MVT VT;
4888 if (!isTypeLegal(I->getType(), VT))
4889 return false;
4890
4891 if (!isa<ConstantInt>(I->getOperand(1)))
4892 return selectBinaryOp(I, ISD::SDIV);
4893
4894 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4895 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4896 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4897 return selectBinaryOp(I, ISD::SDIV);
4898
4899 unsigned Lg2 = C.countr_zero();
4900 Register Src0Reg = getRegForValue(I->getOperand(0));
4901 if (!Src0Reg)
4902 return false;
4903
4904 if (cast<BinaryOperator>(I)->isExact()) {
4905 Register ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4906 if (!ResultReg)
4907 return false;
4908 updateValueMap(I, ResultReg);
4909 return true;
4910 }
4911
4912 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4913 Register AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4914 if (!AddReg)
4915 return false;
4916
4917 // (Src0 < 0) ? Pow2 - 1 : 0;
4918 if (!emitICmp_ri(VT, Src0Reg, 0))
4919 return false;
4920
4921 unsigned SelectOpc;
4922 const TargetRegisterClass *RC;
4923 if (VT == MVT::i64) {
4924 SelectOpc = AArch64::CSELXr;
4925 RC = &AArch64::GPR64RegClass;
4926 } else {
4927 SelectOpc = AArch64::CSELWr;
4928 RC = &AArch64::GPR32RegClass;
4929 }
4930 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4932 if (!SelectReg)
4933 return false;
4934
4935 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4936 // negate the result.
4937 Register ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4938 Register ResultReg;
4939 if (C.isNegative())
4940 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4941 AArch64_AM::ASR, Lg2);
4942 else
4943 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4944
4945 if (!ResultReg)
4946 return false;
4947
4948 updateValueMap(I, ResultReg);
4949 return true;
4950}
4951
4952/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4953/// have to duplicate it for AArch64, because otherwise we would fail during the
4954/// sign-extend emission.
4955Register AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4956 Register IdxN = getRegForValue(Idx);
4957 if (!IdxN)
4958 // Unhandled operand. Halt "fast" selection and bail.
4959 return Register();
4960
4961 // If the index is smaller or larger than intptr_t, truncate or extend it.
4962 MVT PtrVT = TLI.getPointerTy(DL);
4963 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4964 if (IdxVT.bitsLT(PtrVT)) {
4965 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4966 } else if (IdxVT.bitsGT(PtrVT))
4967 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4968 return IdxN;
4969}
4970
4971/// This is mostly a copy of the existing FastISel GEP code, but we have to
4972/// duplicate it for AArch64, because otherwise we would bail out even for
4973/// simple cases. This is because the standard fastEmit functions don't cover
4974/// MUL at all and ADD is lowered very inefficientily.
4975bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4976 if (Subtarget->isTargetILP32())
4977 return false;
4978
4979 Register N = getRegForValue(I->getOperand(0));
4980 if (!N)
4981 return false;
4982
4983 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4984 // into a single N = N + TotalOffset.
4985 uint64_t TotalOffs = 0;
4986 MVT VT = TLI.getPointerTy(DL);
4988 GTI != E; ++GTI) {
4989 const Value *Idx = GTI.getOperand();
4990 if (auto *StTy = GTI.getStructTypeOrNull()) {
4991 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4992 // N = N + Offset
4993 if (Field)
4994 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4995 } else {
4996 // If this is a constant subscript, handle it quickly.
4997 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4998 if (CI->isZero())
4999 continue;
5000 // N = N + Offset
5001 TotalOffs += GTI.getSequentialElementStride(DL) *
5002 cast<ConstantInt>(CI)->getSExtValue();
5003 continue;
5004 }
5005 if (TotalOffs) {
5006 N = emitAdd_ri_(VT, N, TotalOffs);
5007 if (!N)
5008 return false;
5009 TotalOffs = 0;
5010 }
5011
5012 // N = N + Idx * ElementSize;
5013 uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5014 Register IdxN = getRegForGEPIndex(Idx);
5015 if (!IdxN)
5016 return false;
5017
5018 if (ElementSize != 1) {
5019 Register C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5020 if (!C)
5021 return false;
5022 IdxN = emitMul_rr(VT, IdxN, C);
5023 if (!IdxN)
5024 return false;
5025 }
5026 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5027 if (!N)
5028 return false;
5029 }
5030 }
5031 if (TotalOffs) {
5032 N = emitAdd_ri_(VT, N, TotalOffs);
5033 if (!N)
5034 return false;
5035 }
5036 updateValueMap(I, N);
5037 return true;
5038}
5039
5040bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5041 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5042 "cmpxchg survived AtomicExpand at optlevel > -O0");
5043
5044 auto *RetPairTy = cast<StructType>(I->getType());
5045 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5046 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5047 "cmpxchg has a non-i1 status result");
5048
5049 MVT VT;
5050 if (!isTypeLegal(RetTy, VT))
5051 return false;
5052
5053 const TargetRegisterClass *ResRC;
5054 unsigned Opc, CmpOpc;
5055 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5056 // extractvalue selection doesn't support that.
5057 if (VT == MVT::i32) {
5058 Opc = AArch64::CMP_SWAP_32;
5059 CmpOpc = AArch64::SUBSWrs;
5060 ResRC = &AArch64::GPR32RegClass;
5061 } else if (VT == MVT::i64) {
5062 Opc = AArch64::CMP_SWAP_64;
5063 CmpOpc = AArch64::SUBSXrs;
5064 ResRC = &AArch64::GPR64RegClass;
5065 } else {
5066 return false;
5067 }
5068
5069 const MCInstrDesc &II = TII.get(Opc);
5070
5071 Register AddrReg = getRegForValue(I->getPointerOperand());
5072 Register DesiredReg = getRegForValue(I->getCompareOperand());
5073 Register NewReg = getRegForValue(I->getNewValOperand());
5074
5075 if (!AddrReg || !DesiredReg || !NewReg)
5076 return false;
5077
5078 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
5079 DesiredReg = constrainOperandRegClass(II, DesiredReg, II.getNumDefs() + 1);
5080 NewReg = constrainOperandRegClass(II, NewReg, II.getNumDefs() + 2);
5081
5082 const Register ResultReg1 = createResultReg(ResRC);
5083 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5084 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5085
5086 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5087 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5088 .addDef(ResultReg1)
5089 .addDef(ScratchReg)
5090 .addUse(AddrReg)
5091 .addUse(DesiredReg)
5092 .addUse(NewReg);
5093
5094 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5095 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5096 .addUse(ResultReg1)
5097 .addUse(DesiredReg)
5098 .addImm(0);
5099
5100 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5101 .addDef(ResultReg2)
5102 .addUse(AArch64::WZR)
5103 .addUse(AArch64::WZR)
5105
5106 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5107 updateValueMap(I, ResultReg1, 2);
5108 return true;
5109}
5110
5111bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5112 if (TLI.fallBackToDAGISel(*I))
5113 return false;
5114 switch (I->getOpcode()) {
5115 default:
5116 break;
5117 case Instruction::Add:
5118 case Instruction::Sub:
5119 return selectAddSub(I);
5120 case Instruction::Mul:
5121 return selectMul(I);
5122 case Instruction::SDiv:
5123 return selectSDiv(I);
5124 case Instruction::SRem:
5125 if (!selectBinaryOp(I, ISD::SREM))
5126 return selectRem(I, ISD::SREM);
5127 return true;
5128 case Instruction::URem:
5129 if (!selectBinaryOp(I, ISD::UREM))
5130 return selectRem(I, ISD::UREM);
5131 return true;
5132 case Instruction::Shl:
5133 case Instruction::LShr:
5134 case Instruction::AShr:
5135 return selectShift(I);
5136 case Instruction::And:
5137 case Instruction::Or:
5138 case Instruction::Xor:
5139 return selectLogicalOp(I);
5140 case Instruction::Br:
5141 return selectBranch(I);
5142 case Instruction::IndirectBr:
5143 return selectIndirectBr(I);
5144 case Instruction::BitCast:
5146 return selectBitCast(I);
5147 return true;
5148 case Instruction::FPToSI:
5149 if (!selectCast(I, ISD::FP_TO_SINT))
5150 return selectFPToInt(I, /*Signed=*/true);
5151 return true;
5152 case Instruction::FPToUI:
5153 return selectFPToInt(I, /*Signed=*/false);
5154 case Instruction::ZExt:
5155 case Instruction::SExt:
5156 return selectIntExt(I);
5157 case Instruction::Trunc:
5158 if (!selectCast(I, ISD::TRUNCATE))
5159 return selectTrunc(I);
5160 return true;
5161 case Instruction::FPExt:
5162 return selectFPExt(I);
5163 case Instruction::FPTrunc:
5164 return selectFPTrunc(I);
5165 case Instruction::SIToFP:
5166 if (!selectCast(I, ISD::SINT_TO_FP))
5167 return selectIntToFP(I, /*Signed=*/true);
5168 return true;
5169 case Instruction::UIToFP:
5170 return selectIntToFP(I, /*Signed=*/false);
5171 case Instruction::Load:
5172 return selectLoad(I);
5173 case Instruction::Store:
5174 return selectStore(I);
5175 case Instruction::FCmp:
5176 case Instruction::ICmp:
5177 return selectCmp(I);
5178 case Instruction::Select:
5179 return selectSelect(I);
5180 case Instruction::Ret:
5181 return selectRet(I);
5182 case Instruction::FRem:
5183 return selectFRem(I);
5184 case Instruction::GetElementPtr:
5185 return selectGetElementPtr(I);
5186 case Instruction::AtomicCmpXchg:
5187 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5188 }
5189
5190 // fall-back to target-independent instruction selection.
5191 return selectOperator(I, I->getOpcode());
5192}
5193
5195 const TargetLibraryInfo *LibInfo,
5196 const LibcallLoweringInfo *LibcallLowering) {
5197
5198 SMEAttrs CallerAttrs =
5199 FuncInfo.MF->getInfo<AArch64FunctionInfo>()->getSMEFnAttrs();
5200 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5201 CallerAttrs.hasStreamingInterfaceOrBody() ||
5202 CallerAttrs.hasStreamingCompatibleInterface() ||
5203 CallerAttrs.hasAgnosticZAInterface())
5204 return nullptr;
5205 return new AArch64FastISel(FuncInfo, LibInfo, LibcallLowering);
5206}
unsigned const MachineRegisterInfo * MRI
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
static bool isSExtLoad(const MachineInstr *LI)
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
static bool isMulPowOf2(const Value *I)
Check if the multiply is by a power-of-2 constant.
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type.
static bool isZExtLoad(const MachineInstr *LI)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec)
Emit a load-pair instruction for frame-destroy.
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec)
Emit a store-pair instruction for frame-setup.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
SI Pre allocate WWM Registers
This file defines the SmallVector class.
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
static const unsigned FramePtr
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAnyArgRegReserved(const MachineFunction &MF) const
void emitReservedArgRegCallError(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
const AArch64RegisterInfo * getRegisterInfo() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool hasCustomCallingConv() const
PointerType * getType() const
Overload to return most specific pointer type.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Register getLocReg() const
LocInfo getLocInfo() const
unsigned getValNo() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isUnsigned() const
Definition InstrTypes.h:936
const APFloat & getValueAPF() const
Definition Constants.h:325
bool isNegative() const
Return true if the sign bit is set.
Definition Constants.h:332
bool isZero() const
Return true if the value is positive or negative zero.
Definition Constants.h:329
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
bool selectBitCast(const User *I)
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
PointerType * getType() const
Global values are always pointers.
iterator_range< succ_op_iterator > successors()
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Tracks which library functions to use for a particular subtarget.
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
Value * getLength() const
unsigned getDestAddressSpace() const
bool isVolatile() const
constexpr unsigned id() const
Definition Register.h:100
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingCompatibleInterface() const
bool hasAgnosticZAInterface() const
bool hasStreamingInterfaceOrBody() const
void reserve(size_type N)
void push_back(const T &Elt)
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:754
Provides information about what library functions are available for the current target.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition Type.h:264
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:879
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:703
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:925
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:738
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:855
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
LLVM_ABI Libcall getREM(EVT VT)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:56
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
bool CC_AArch64_Win64PCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr unsigned getKillRegState(bool B)
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
gep_type_iterator gep_type_end(const User *GEP)
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
generic_gep_type_iterator<> gep_type_iterator
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.