LLVM 23.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
18#include "AArch64RegisterInfo.h"
20#include "AArch64Subtarget.h"
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
42#include "llvm/IR/Argument.h"
43#include "llvm/IR/Attributes.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/CallingConv.h"
46#include "llvm/IR/Constant.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/Function.h"
52#include "llvm/IR/GlobalValue.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/IntrinsicsAArch64.h"
59#include "llvm/IR/Module.h"
60#include "llvm/IR/Operator.h"
61#include "llvm/IR/Type.h"
62#include "llvm/IR/User.h"
63#include "llvm/IR/Value.h"
64#include "llvm/MC/MCInstrDesc.h"
65#include "llvm/MC/MCSymbol.h"
72#include <algorithm>
73#include <cassert>
74#include <cstdint>
75#include <iterator>
76#include <utility>
77
78using namespace llvm;
79
80namespace {
81
82class AArch64FastISel final : public FastISel {
83 class Address {
84 public:
85 enum BaseKind { RegBase, FrameIndexBase };
86
87 private:
88 BaseKind Kind = RegBase;
90 union {
91 unsigned Reg;
92 int FI;
93 } Base;
94 Register OffsetReg;
95 unsigned Shift = 0;
96 int64_t Offset = 0;
97 const GlobalValue *GV = nullptr;
98
99 public:
100 Address() { Base.Reg = 0; }
101
102 void setKind(BaseKind K) { Kind = K; }
103 BaseKind getKind() const { return Kind; }
104 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
105 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
106 bool isRegBase() const { return Kind == RegBase; }
107 bool isFIBase() const { return Kind == FrameIndexBase; }
108
109 void setReg(Register Reg) {
110 assert(isRegBase() && "Invalid base register access!");
111 Base.Reg = Reg.id();
112 }
113
114 Register getReg() const {
115 assert(isRegBase() && "Invalid base register access!");
116 return Base.Reg;
117 }
118
119 void setOffsetReg(Register Reg) { OffsetReg = Reg; }
120
121 Register getOffsetReg() const { return OffsetReg; }
122
123 void setFI(unsigned FI) {
124 assert(isFIBase() && "Invalid base frame index access!");
125 Base.FI = FI;
126 }
127
128 unsigned getFI() const {
129 assert(isFIBase() && "Invalid base frame index access!");
130 return Base.FI;
131 }
132
133 void setOffset(int64_t O) { Offset = O; }
134 int64_t getOffset() { return Offset; }
135 void setShift(unsigned S) { Shift = S; }
136 unsigned getShift() { return Shift; }
137
138 void setGlobalValue(const GlobalValue *G) { GV = G; }
139 const GlobalValue *getGlobalValue() { return GV; }
140 };
141
142 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
143 /// make the right decision when generating code for different targets.
144 const AArch64Subtarget *Subtarget;
145 LLVMContext *Context;
146
147 bool fastLowerArguments() override;
148 bool fastLowerCall(CallLoweringInfo &CLI) override;
149 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
150
151private:
152 // Selection routines.
153 bool selectAddSub(const Instruction *I);
154 bool selectLogicalOp(const Instruction *I);
155 bool selectLoad(const Instruction *I);
156 bool selectStore(const Instruction *I);
157 bool selectBranch(const Instruction *I);
158 bool selectIndirectBr(const Instruction *I);
159 bool selectCmp(const Instruction *I);
160 bool selectSelect(const Instruction *I);
161 bool selectFPExt(const Instruction *I);
162 bool selectFPTrunc(const Instruction *I);
163 bool selectFPToInt(const Instruction *I, bool Signed);
164 bool selectIntToFP(const Instruction *I, bool Signed);
165 bool selectRem(const Instruction *I, unsigned ISDOpcode);
166 bool selectRet(const Instruction *I);
167 bool selectTrunc(const Instruction *I);
168 bool selectIntExt(const Instruction *I);
169 bool selectMul(const Instruction *I);
170 bool selectShift(const Instruction *I);
171 bool selectBitCast(const Instruction *I);
172 bool selectFRem(const Instruction *I);
173 bool selectSDiv(const Instruction *I);
174 bool selectGetElementPtr(const Instruction *I);
175 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
176
177 // Utility helper routines.
178 bool isTypeLegal(Type *Ty, MVT &VT);
179 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
180 bool isValueAvailable(const Value *V) const;
181 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
182 bool computeCallAddress(const Value *V, Address &Addr);
183 bool simplifyAddress(Address &Addr, MVT VT);
184 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
186 unsigned ScaleFactor, MachineMemOperand *MMO);
187 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
188 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
189 MaybeAlign Alignment);
190 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
191 const Value *Cond);
192 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
193 bool optimizeSelect(const SelectInst *SI);
194 Register getRegForGEPIndex(const Value *Idx);
195
196 // Emit helper routines.
197 Register emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
198 const Value *RHS, bool SetFlags = false,
199 bool WantResult = true, bool IsZExt = false);
200 Register emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
201 Register RHSReg, bool SetFlags = false,
202 bool WantResult = true);
203 Register emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg, uint64_t Imm,
204 bool SetFlags = false, bool WantResult = true);
205 Register emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
206 Register RHSReg, AArch64_AM::ShiftExtendType ShiftType,
207 uint64_t ShiftImm, bool SetFlags = false,
208 bool WantResult = true);
209 Register emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
211 uint64_t ShiftImm, bool SetFlags = false,
212 bool WantResult = true);
213
214 // Emit functions.
215 bool emitCompareAndBranch(const BranchInst *BI);
216 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
217 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
218 bool emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
219 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
220 Register emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
221 MachineMemOperand *MMO = nullptr);
222 bool emitStore(MVT VT, Register SrcReg, Address Addr,
223 MachineMemOperand *MMO = nullptr);
224 bool emitStoreRelease(MVT VT, Register SrcReg, Register AddrReg,
225 MachineMemOperand *MMO = nullptr);
226 Register emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, bool isZExt);
227 Register emiti1Ext(Register SrcReg, MVT DestVT, bool isZExt);
228 Register emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
229 bool SetFlags = false, bool WantResult = true,
230 bool IsZExt = false);
231 Register emitAdd_ri_(MVT VT, Register Op0, int64_t Imm);
232 Register emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
233 bool SetFlags = false, bool WantResult = true,
234 bool IsZExt = false);
235 Register emitSubs_rr(MVT RetVT, Register LHSReg, Register RHSReg,
236 bool WantResult = true);
237 Register emitSubs_rs(MVT RetVT, Register LHSReg, Register RHSReg,
238 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
239 bool WantResult = true);
240 Register emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
241 const Value *RHS);
242 Register emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, Register LHSReg,
243 uint64_t Imm);
244 Register emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, Register LHSReg,
245 Register RHSReg, uint64_t ShiftImm);
246 Register emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
247 Register emitMul_rr(MVT RetVT, Register Op0, Register Op1);
248 Register emitSMULL_rr(MVT RetVT, Register Op0, Register Op1);
249 Register emitUMULL_rr(MVT RetVT, Register Op0, Register Op1);
250 Register emitLSL_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
251 Register emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
252 bool IsZExt = true);
253 Register emitLSR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
254 Register emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
255 bool IsZExt = true);
256 Register emitASR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
257 Register emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
258 bool IsZExt = false);
259
260 Register materializeInt(const ConstantInt *CI, MVT VT);
261 Register materializeFP(const ConstantFP *CFP, MVT VT);
262 Register materializeGV(const GlobalValue *GV);
263
264 // Call handling routines.
265private:
266 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
267 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
268 SmallVectorImpl<Type *> &OrigTys, unsigned &NumBytes);
269 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
270
271public:
272 // Backend specific FastISel code.
273 Register fastMaterializeAlloca(const AllocaInst *AI) override;
274 Register fastMaterializeConstant(const Constant *C) override;
275 Register fastMaterializeFloatZero(const ConstantFP *CF) override;
276
277 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
278 const TargetLibraryInfo *LibInfo,
279 const LibcallLoweringInfo *libcallLowering)
280 : FastISel(FuncInfo, LibInfo, libcallLowering,
281 /*SkipTargetIndependentISel=*/true) {
282 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
283 Context = &FuncInfo.Fn->getContext();
284 }
285
286 bool fastSelectInstruction(const Instruction *I) override;
287
288#include "AArch64GenFastISel.inc"
289};
290
291} // end anonymous namespace
292
293/// Check if the sign-/zero-extend will be a noop.
294static bool isIntExtFree(const Instruction *I) {
296 "Unexpected integer extend instruction.");
297 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
298 "Unexpected value type.");
299 bool IsZExt = isa<ZExtInst>(I);
300
301 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
302 if (LI->hasOneUse())
303 return true;
304
305 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
306 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
307 return true;
308
309 return false;
310}
311
312/// Determine the implicit scale factor that is applied by a memory
313/// operation for a given value type.
314static unsigned getImplicitScaleFactor(MVT VT) {
315 switch (VT.SimpleTy) {
316 default:
317 return 0; // invalid
318 case MVT::i1: // fall-through
319 case MVT::i8:
320 return 1;
321 case MVT::i16:
322 return 2;
323 case MVT::i32: // fall-through
324 case MVT::f32:
325 return 4;
326 case MVT::i64: // fall-through
327 case MVT::f64:
328 return 8;
329 }
330}
331
332CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
333 if (CC == CallingConv::GHC)
334 return CC_AArch64_GHC;
335 if (CC == CallingConv::CFGuard_Check)
337 if (Subtarget->isTargetDarwin())
339 if (Subtarget->isTargetWindows())
340 return CC_AArch64_Win64PCS;
341 return CC_AArch64_AAPCS;
342}
343
344Register AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
345 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
346 "Alloca should always return a pointer.");
347
348 // Don't handle dynamic allocas.
349 auto SI = FuncInfo.StaticAllocaMap.find(AI);
350 if (SI == FuncInfo.StaticAllocaMap.end())
351 return Register();
352
353 if (SI != FuncInfo.StaticAllocaMap.end()) {
354 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
355 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
356 ResultReg)
357 .addFrameIndex(SI->second)
358 .addImm(0)
359 .addImm(0);
360 return ResultReg;
361 }
362
363 return Register();
364}
365
366Register AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
367 if (VT > MVT::i64)
368 return Register();
369
370 if (!CI->isZero())
371 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
372
373 // Create a copy from the zero register to materialize a "0" value.
374 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
375 : &AArch64::GPR32RegClass;
376 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
377 Register ResultReg = createResultReg(RC);
378 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
379 ResultReg).addReg(ZeroReg, getKillRegState(true));
380 return ResultReg;
381}
382
383Register AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
384 // Positive zero (+0.0) has to be materialized with a fmov from the zero
385 // register, because the immediate version of fmov cannot encode zero.
386 if (CFP->isNullValue())
387 return fastMaterializeFloatZero(CFP);
388
389 if (VT != MVT::f32 && VT != MVT::f64)
390 return Register();
391
392 const APFloat Val = CFP->getValueAPF();
393 bool Is64Bit = (VT == MVT::f64);
394 // This checks to see if we can use FMOV instructions to materialize
395 // a constant, otherwise we have to materialize via the constant pool.
396 int Imm =
397 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
398 if (Imm != -1) {
399 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
400 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
401 }
402
403 // For the large code model materialize the FP constant in code.
404 if (TM.getCodeModel() == CodeModel::Large) {
405 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
406 const TargetRegisterClass *RC = Is64Bit ?
407 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
408
409 Register TmpReg = createResultReg(RC);
410 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
411 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
412
413 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
414 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
415 TII.get(TargetOpcode::COPY), ResultReg)
416 .addReg(TmpReg, getKillRegState(true));
417
418 return ResultReg;
419 }
420
421 // Materialize via constant pool. MachineConstantPool wants an explicit
422 // alignment.
423 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
424
425 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
426 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
427 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
429
430 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
431 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
432 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
433 .addReg(ADRPReg)
435 return ResultReg;
436}
437
438Register AArch64FastISel::materializeGV(const GlobalValue *GV) {
439 // We can't handle thread-local variables quickly yet.
440 if (GV->isThreadLocal())
441 return Register();
442
443 // MachO still uses GOT for large code-model accesses, but ELF requires
444 // movz/movk sequences, which FastISel doesn't handle yet.
445 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
446 return Register();
447
448 if (FuncInfo.MF->getInfo<AArch64FunctionInfo>()->hasELFSignedGOT())
449 return Register();
450
451 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
452
453 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
454 if (!DestEVT.isSimple())
455 return Register();
456
457 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
458 Register ResultReg;
459
460 if (OpFlags & AArch64II::MO_GOT) {
461 // ADRP + LDRX
462 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
463 ADRPReg)
464 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
465
466 unsigned LdrOpc;
467 if (Subtarget->isTargetILP32()) {
468 ResultReg = createResultReg(&AArch64::GPR32RegClass);
469 LdrOpc = AArch64::LDRWui;
470 } else {
471 ResultReg = createResultReg(&AArch64::GPR64RegClass);
472 LdrOpc = AArch64::LDRXui;
473 }
474 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
475 ResultReg)
476 .addReg(ADRPReg)
478 AArch64II::MO_NC | OpFlags);
479 if (!Subtarget->isTargetILP32())
480 return ResultReg;
481
482 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
483 // so we must extend the result on ILP32.
484 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
485 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
486 TII.get(TargetOpcode::SUBREG_TO_REG))
487 .addDef(Result64)
488 .addImm(0)
489 .addReg(ResultReg, RegState::Kill)
490 .addImm(AArch64::sub_32);
491 return Result64;
492 } else {
493 // ADRP + ADDX
494 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
495 ADRPReg)
496 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
497
498 if (OpFlags & AArch64II::MO_TAGGED) {
499 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
500 // We do so by creating a MOVK that sets bits 48-63 of the register to
501 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
502 // the small code model so we can assume a binary size of <= 4GB, which
503 // makes the untagged PC relative offset positive. The binary must also be
504 // loaded into address range [0, 2^48). Both of these properties need to
505 // be ensured at runtime when using tagged addresses.
506 //
507 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
508 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
509 // are not exactly 1:1 with FastISel so we cannot easily abstract this
510 // out. At some point, it would be nice to find a way to not have this
511 // duplicate code.
512 Register DstReg = createResultReg(&AArch64::GPR64commonRegClass);
513 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
514 DstReg)
515 .addReg(ADRPReg)
516 .addGlobalAddress(GV, /*Offset=*/0x100000000,
518 .addImm(48);
519 ADRPReg = DstReg;
520 }
521
522 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
523 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
524 ResultReg)
525 .addReg(ADRPReg)
526 .addGlobalAddress(GV, 0,
528 .addImm(0);
529 }
530 return ResultReg;
531}
532
533Register AArch64FastISel::fastMaterializeConstant(const Constant *C) {
534 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
535
536 // Only handle simple types.
537 if (!CEVT.isSimple())
538 return Register();
539 MVT VT = CEVT.getSimpleVT();
540 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
541 // 'null' pointers need to have a somewhat special treatment.
543 assert(VT == MVT::i64 && "Expected 64-bit pointers");
544 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
545 }
546
547 if (const auto *CI = dyn_cast<ConstantInt>(C))
548 return materializeInt(CI, VT);
549 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
550 return materializeFP(CFP, VT);
551 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
552 return materializeGV(GV);
553
554 return Register();
555}
556
557Register AArch64FastISel::fastMaterializeFloatZero(const ConstantFP *CFP) {
558 assert(CFP->isNullValue() &&
559 "Floating-point constant is not a positive zero.");
560 MVT VT;
561 if (!isTypeLegal(CFP->getType(), VT))
562 return Register();
563
564 if (VT != MVT::f32 && VT != MVT::f64)
565 return Register();
566
567 bool Is64Bit = (VT == MVT::f64);
568 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
569 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
570 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
571}
572
573/// Check if the multiply is by a power-of-2 constant.
574static bool isMulPowOf2(const Value *I) {
575 if (const auto *MI = dyn_cast<MulOperator>(I)) {
576 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
577 if (C->getValue().isPowerOf2())
578 return true;
579 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
580 if (C->getValue().isPowerOf2())
581 return true;
582 }
583 return false;
584}
585
586// Computes the address to get to an object.
587bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
588{
589 const User *U = nullptr;
590 unsigned Opcode = Instruction::UserOp1;
591 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
592 // Don't walk into other basic blocks unless the object is an alloca from
593 // another block, otherwise it may not have a virtual register assigned.
594 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
595 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
596 Opcode = I->getOpcode();
597 U = I;
598 }
599 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
600 Opcode = C->getOpcode();
601 U = C;
602 }
603
604 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
605 if (Ty->getAddressSpace() > 255)
606 // Fast instruction selection doesn't support the special
607 // address spaces.
608 return false;
609
610 switch (Opcode) {
611 default:
612 break;
613 case Instruction::BitCast:
614 // Look through bitcasts.
615 return computeAddress(U->getOperand(0), Addr, Ty);
616
617 case Instruction::IntToPtr:
618 // Look past no-op inttoptrs.
619 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
620 TLI.getPointerTy(DL))
621 return computeAddress(U->getOperand(0), Addr, Ty);
622 break;
623
624 case Instruction::PtrToInt:
625 // Look past no-op ptrtoints.
626 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
627 return computeAddress(U->getOperand(0), Addr, Ty);
628 break;
629
630 case Instruction::GetElementPtr: {
631 Address SavedAddr = Addr;
632 uint64_t TmpOffset = Addr.getOffset();
633
634 // Iterate through the GEP folding the constants into offsets where
635 // we can.
637 GTI != E; ++GTI) {
638 const Value *Op = GTI.getOperand();
639 if (StructType *STy = GTI.getStructTypeOrNull()) {
640 const StructLayout *SL = DL.getStructLayout(STy);
641 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
642 TmpOffset += SL->getElementOffset(Idx);
643 } else {
644 uint64_t S = GTI.getSequentialElementStride(DL);
645 while (true) {
646 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
647 // Constant-offset addressing.
648 TmpOffset += CI->getSExtValue() * S;
649 break;
650 }
651 if (canFoldAddIntoGEP(U, Op)) {
652 // A compatible add with a constant operand. Fold the constant.
653 ConstantInt *CI =
654 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
655 TmpOffset += CI->getSExtValue() * S;
656 // Iterate on the other operand.
657 Op = cast<AddOperator>(Op)->getOperand(0);
658 continue;
659 }
660 // Unsupported
661 goto unsupported_gep;
662 }
663 }
664 }
665
666 // Try to grab the base operand now.
667 Addr.setOffset(TmpOffset);
668 if (computeAddress(U->getOperand(0), Addr, Ty))
669 return true;
670
671 // We failed, restore everything and try the other options.
672 Addr = SavedAddr;
673
674 unsupported_gep:
675 break;
676 }
677 case Instruction::Alloca: {
678 const AllocaInst *AI = cast<AllocaInst>(Obj);
679 DenseMap<const AllocaInst *, int>::iterator SI =
680 FuncInfo.StaticAllocaMap.find(AI);
681 if (SI != FuncInfo.StaticAllocaMap.end()) {
682 Addr.setKind(Address::FrameIndexBase);
683 Addr.setFI(SI->second);
684 return true;
685 }
686 break;
687 }
688 case Instruction::Add: {
689 // Adds of constants are common and easy enough.
690 const Value *LHS = U->getOperand(0);
691 const Value *RHS = U->getOperand(1);
692
694 std::swap(LHS, RHS);
695
696 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
697 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
698 return computeAddress(LHS, Addr, Ty);
699 }
700
701 Address Backup = Addr;
702 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
703 return true;
704 Addr = Backup;
705
706 break;
707 }
708 case Instruction::Sub: {
709 // Subs of constants are common and easy enough.
710 const Value *LHS = U->getOperand(0);
711 const Value *RHS = U->getOperand(1);
712
713 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
714 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
715 return computeAddress(LHS, Addr, Ty);
716 }
717 break;
718 }
719 case Instruction::Shl: {
720 if (Addr.getOffsetReg())
721 break;
722
723 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
724 if (!CI)
725 break;
726
727 unsigned Val = CI->getZExtValue();
728 if (Val < 1 || Val > 3)
729 break;
730
731 uint64_t NumBytes = 0;
732 if (Ty && Ty->isSized()) {
733 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
734 NumBytes = NumBits / 8;
735 if (!isPowerOf2_64(NumBits))
736 NumBytes = 0;
737 }
738
739 if (NumBytes != (1ULL << Val))
740 break;
741
742 Addr.setShift(Val);
743 Addr.setExtendType(AArch64_AM::LSL);
744
745 const Value *Src = U->getOperand(0);
746 if (const auto *I = dyn_cast<Instruction>(Src)) {
747 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
748 // Fold the zext or sext when it won't become a noop.
749 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
750 if (!isIntExtFree(ZE) &&
751 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
752 Addr.setExtendType(AArch64_AM::UXTW);
753 Src = ZE->getOperand(0);
754 }
755 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
756 if (!isIntExtFree(SE) &&
757 SE->getOperand(0)->getType()->isIntegerTy(32)) {
758 Addr.setExtendType(AArch64_AM::SXTW);
759 Src = SE->getOperand(0);
760 }
761 }
762 }
763 }
764
765 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
766 if (AI->getOpcode() == Instruction::And) {
767 const Value *LHS = AI->getOperand(0);
768 const Value *RHS = AI->getOperand(1);
769
770 if (const auto *C = dyn_cast<ConstantInt>(LHS))
771 if (C->getValue() == 0xffffffff)
772 std::swap(LHS, RHS);
773
774 if (const auto *C = dyn_cast<ConstantInt>(RHS))
775 if (C->getValue() == 0xffffffff) {
776 Addr.setExtendType(AArch64_AM::UXTW);
777 Register Reg = getRegForValue(LHS);
778 if (!Reg)
779 return false;
780 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
781 Addr.setOffsetReg(Reg);
782 return true;
783 }
784 }
785
786 Register Reg = getRegForValue(Src);
787 if (!Reg)
788 return false;
789 Addr.setOffsetReg(Reg);
790 return true;
791 }
792 case Instruction::Mul: {
793 if (Addr.getOffsetReg())
794 break;
795
796 if (!isMulPowOf2(U))
797 break;
798
799 const Value *LHS = U->getOperand(0);
800 const Value *RHS = U->getOperand(1);
801
802 // Canonicalize power-of-2 value to the RHS.
803 if (const auto *C = dyn_cast<ConstantInt>(LHS))
804 if (C->getValue().isPowerOf2())
805 std::swap(LHS, RHS);
806
807 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
808 const auto *C = cast<ConstantInt>(RHS);
809 unsigned Val = C->getValue().logBase2();
810 if (Val < 1 || Val > 3)
811 break;
812
813 uint64_t NumBytes = 0;
814 if (Ty && Ty->isSized()) {
815 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
816 NumBytes = NumBits / 8;
817 if (!isPowerOf2_64(NumBits))
818 NumBytes = 0;
819 }
820
821 if (NumBytes != (1ULL << Val))
822 break;
823
824 Addr.setShift(Val);
825 Addr.setExtendType(AArch64_AM::LSL);
826
827 const Value *Src = LHS;
828 if (const auto *I = dyn_cast<Instruction>(Src)) {
829 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
830 // Fold the zext or sext when it won't become a noop.
831 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
832 if (!isIntExtFree(ZE) &&
833 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
834 Addr.setExtendType(AArch64_AM::UXTW);
835 Src = ZE->getOperand(0);
836 }
837 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
838 if (!isIntExtFree(SE) &&
839 SE->getOperand(0)->getType()->isIntegerTy(32)) {
840 Addr.setExtendType(AArch64_AM::SXTW);
841 Src = SE->getOperand(0);
842 }
843 }
844 }
845 }
846
847 Register Reg = getRegForValue(Src);
848 if (!Reg)
849 return false;
850 Addr.setOffsetReg(Reg);
851 return true;
852 }
853 case Instruction::And: {
854 if (Addr.getOffsetReg())
855 break;
856
857 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
858 break;
859
860 const Value *LHS = U->getOperand(0);
861 const Value *RHS = U->getOperand(1);
862
863 if (const auto *C = dyn_cast<ConstantInt>(LHS))
864 if (C->getValue() == 0xffffffff)
865 std::swap(LHS, RHS);
866
867 if (const auto *C = dyn_cast<ConstantInt>(RHS))
868 if (C->getValue() == 0xffffffff) {
869 Addr.setShift(0);
870 Addr.setExtendType(AArch64_AM::LSL);
871 Addr.setExtendType(AArch64_AM::UXTW);
872
873 Register Reg = getRegForValue(LHS);
874 if (!Reg)
875 return false;
876 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
877 Addr.setOffsetReg(Reg);
878 return true;
879 }
880 break;
881 }
882 case Instruction::SExt:
883 case Instruction::ZExt: {
884 if (!Addr.getReg() || Addr.getOffsetReg())
885 break;
886
887 const Value *Src = nullptr;
888 // Fold the zext or sext when it won't become a noop.
889 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
890 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
891 Addr.setExtendType(AArch64_AM::UXTW);
892 Src = ZE->getOperand(0);
893 }
894 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
895 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
896 Addr.setExtendType(AArch64_AM::SXTW);
897 Src = SE->getOperand(0);
898 }
899 }
900
901 if (!Src)
902 break;
903
904 Addr.setShift(0);
905 Register Reg = getRegForValue(Src);
906 if (!Reg)
907 return false;
908 Addr.setOffsetReg(Reg);
909 return true;
910 }
911 } // end switch
912
913 if (Addr.isRegBase() && !Addr.getReg()) {
914 Register Reg = getRegForValue(Obj);
915 if (!Reg)
916 return false;
917 Addr.setReg(Reg);
918 return true;
919 }
920
921 if (!Addr.getOffsetReg()) {
922 Register Reg = getRegForValue(Obj);
923 if (!Reg)
924 return false;
925 Addr.setOffsetReg(Reg);
926 return true;
927 }
928
929 return false;
930}
931
932bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
933 const User *U = nullptr;
934 unsigned Opcode = Instruction::UserOp1;
935 bool InMBB = true;
936
937 if (const auto *I = dyn_cast<Instruction>(V)) {
938 Opcode = I->getOpcode();
939 U = I;
940 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
941 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
942 Opcode = C->getOpcode();
943 U = C;
944 }
945
946 switch (Opcode) {
947 default: break;
948 case Instruction::BitCast:
949 // Look past bitcasts if its operand is in the same BB.
950 if (InMBB)
951 return computeCallAddress(U->getOperand(0), Addr);
952 break;
953 case Instruction::IntToPtr:
954 // Look past no-op inttoptrs if its operand is in the same BB.
955 if (InMBB &&
956 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
957 TLI.getPointerTy(DL))
958 return computeCallAddress(U->getOperand(0), Addr);
959 break;
960 case Instruction::PtrToInt:
961 // Look past no-op ptrtoints if its operand is in the same BB.
962 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
963 return computeCallAddress(U->getOperand(0), Addr);
964 break;
965 }
966
967 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
968 Addr.setGlobalValue(GV);
969 return true;
970 }
971
972 // If all else fails, try to materialize the value in a register.
973 if (!Addr.getGlobalValue()) {
974 Addr.setReg(getRegForValue(V));
975 return Addr.getReg().isValid();
976 }
977
978 return false;
979}
980
981bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
982 EVT evt = TLI.getValueType(DL, Ty, true);
983
984 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
985 return false;
986
987 // Only handle simple types.
988 if (evt == MVT::Other || !evt.isSimple())
989 return false;
990 VT = evt.getSimpleVT();
991
992 // This is a legal type, but it's not something we handle in fast-isel.
993 if (VT == MVT::f128)
994 return false;
995
996 // Handle all other legal types, i.e. a register that will directly hold this
997 // value.
998 return TLI.isTypeLegal(VT);
999}
1000
1001/// Determine if the value type is supported by FastISel.
1002///
1003/// FastISel for AArch64 can handle more value types than are legal. This adds
1004/// simple value type such as i1, i8, and i16.
1005bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1006 if (Ty->isVectorTy() && !IsVectorAllowed)
1007 return false;
1008
1009 if (isTypeLegal(Ty, VT))
1010 return true;
1011
1012 // If this is a type than can be sign or zero-extended to a basic operation
1013 // go ahead and accept it now.
1014 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1015 return true;
1016
1017 return false;
1018}
1019
1020bool AArch64FastISel::isValueAvailable(const Value *V) const {
1021 if (!isa<Instruction>(V))
1022 return true;
1023
1024 const auto *I = cast<Instruction>(V);
1025 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
1026}
1027
1028bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1029 if (Subtarget->isTargetILP32())
1030 return false;
1031
1032 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1033 if (!ScaleFactor)
1034 return false;
1035
1036 bool ImmediateOffsetNeedsLowering = false;
1037 bool RegisterOffsetNeedsLowering = false;
1038 int64_t Offset = Addr.getOffset();
1039 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1040 ImmediateOffsetNeedsLowering = true;
1041 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1042 !isUInt<12>(Offset / ScaleFactor))
1043 ImmediateOffsetNeedsLowering = true;
1044
1045 // Cannot encode an offset register and an immediate offset in the same
1046 // instruction. Fold the immediate offset into the load/store instruction and
1047 // emit an additional add to take care of the offset register.
1048 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1049 RegisterOffsetNeedsLowering = true;
1050
1051 // Cannot encode zero register as base.
1052 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1053 RegisterOffsetNeedsLowering = true;
1054
1055 // If this is a stack pointer and the offset needs to be simplified then put
1056 // the alloca address into a register, set the base type back to register and
1057 // continue. This should almost never happen.
1058 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1059 {
1060 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1061 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1062 ResultReg)
1063 .addFrameIndex(Addr.getFI())
1064 .addImm(0)
1065 .addImm(0);
1066 Addr.setKind(Address::RegBase);
1067 Addr.setReg(ResultReg);
1068 }
1069
1070 if (RegisterOffsetNeedsLowering) {
1071 Register ResultReg;
1072 if (Addr.getReg()) {
1073 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1074 Addr.getExtendType() == AArch64_AM::UXTW )
1075 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1076 Addr.getOffsetReg(), Addr.getExtendType(),
1077 Addr.getShift());
1078 else
1079 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1080 Addr.getOffsetReg(), AArch64_AM::LSL,
1081 Addr.getShift());
1082 } else {
1083 if (Addr.getExtendType() == AArch64_AM::UXTW)
1084 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1085 Addr.getShift(), /*IsZExt=*/true);
1086 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1087 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1088 Addr.getShift(), /*IsZExt=*/false);
1089 else
1090 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1091 Addr.getShift());
1092 }
1093 if (!ResultReg)
1094 return false;
1095
1096 Addr.setReg(ResultReg);
1097 Addr.setOffsetReg(0);
1098 Addr.setShift(0);
1099 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1100 }
1101
1102 // Since the offset is too large for the load/store instruction get the
1103 // reg+offset into a register.
1104 if (ImmediateOffsetNeedsLowering) {
1105 Register ResultReg;
1106 if (Addr.getReg())
1107 // Try to fold the immediate into the add instruction.
1108 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1109 else
1110 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1111
1112 if (!ResultReg)
1113 return false;
1114 Addr.setReg(ResultReg);
1115 Addr.setOffset(0);
1116 }
1117 return true;
1118}
1119
1120void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1121 const MachineInstrBuilder &MIB,
1123 unsigned ScaleFactor,
1124 MachineMemOperand *MMO) {
1125 int64_t Offset = Addr.getOffset() / ScaleFactor;
1126 // Frame base works a bit differently. Handle it separately.
1127 if (Addr.isFIBase()) {
1128 int FI = Addr.getFI();
1129 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1130 // and alignment should be based on the VT.
1131 MMO = FuncInfo.MF->getMachineMemOperand(
1132 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1133 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1134 // Now add the rest of the operands.
1135 MIB.addFrameIndex(FI).addImm(Offset);
1136 } else {
1137 assert(Addr.isRegBase() && "Unexpected address kind.");
1138 const MCInstrDesc &II = MIB->getDesc();
1139 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1140 Addr.setReg(
1141 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1142 Addr.setOffsetReg(
1143 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1144 if (Addr.getOffsetReg()) {
1145 assert(Addr.getOffset() == 0 && "Unexpected offset");
1146 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1147 Addr.getExtendType() == AArch64_AM::SXTX;
1148 MIB.addReg(Addr.getReg());
1149 MIB.addReg(Addr.getOffsetReg());
1150 MIB.addImm(IsSigned);
1151 MIB.addImm(Addr.getShift() != 0);
1152 } else
1153 MIB.addReg(Addr.getReg()).addImm(Offset);
1154 }
1155
1156 if (MMO)
1157 MIB.addMemOperand(MMO);
1158}
1159
1160Register AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1161 const Value *RHS, bool SetFlags,
1162 bool WantResult, bool IsZExt) {
1164 bool NeedExtend = false;
1165 switch (RetVT.SimpleTy) {
1166 default:
1167 return Register();
1168 case MVT::i1:
1169 NeedExtend = true;
1170 break;
1171 case MVT::i8:
1172 NeedExtend = true;
1173 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1174 break;
1175 case MVT::i16:
1176 NeedExtend = true;
1177 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1178 break;
1179 case MVT::i32: // fall-through
1180 case MVT::i64:
1181 break;
1182 }
1183 MVT SrcVT = RetVT;
1184 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1185
1186 // Canonicalize immediates to the RHS first.
1187 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1188 std::swap(LHS, RHS);
1189
1190 // Canonicalize mul by power of 2 to the RHS.
1191 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1192 if (isMulPowOf2(LHS))
1193 std::swap(LHS, RHS);
1194
1195 // Canonicalize shift immediate to the RHS.
1196 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1197 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1198 if (isa<ConstantInt>(SI->getOperand(1)))
1199 if (SI->getOpcode() == Instruction::Shl ||
1200 SI->getOpcode() == Instruction::LShr ||
1201 SI->getOpcode() == Instruction::AShr )
1202 std::swap(LHS, RHS);
1203
1204 Register LHSReg = getRegForValue(LHS);
1205 if (!LHSReg)
1206 return Register();
1207
1208 if (NeedExtend)
1209 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1210
1211 Register ResultReg;
1212 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1213 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1214 if (C->isNegative())
1215 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1216 WantResult);
1217 else
1218 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1219 WantResult);
1220 } else if (const auto *C = dyn_cast<Constant>(RHS))
1221 if (C->isNullValue())
1222 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1223
1224 if (ResultReg)
1225 return ResultReg;
1226
1227 // Only extend the RHS within the instruction if there is a valid extend type.
1228 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1229 isValueAvailable(RHS)) {
1230 Register RHSReg = getRegForValue(RHS);
1231 if (!RHSReg)
1232 return Register();
1233 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1234 SetFlags, WantResult);
1235 }
1236
1237 // Check if the mul can be folded into the instruction.
1238 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1239 if (isMulPowOf2(RHS)) {
1240 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1241 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1242
1243 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1244 if (C->getValue().isPowerOf2())
1245 std::swap(MulLHS, MulRHS);
1246
1247 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1248 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1249 Register RHSReg = getRegForValue(MulLHS);
1250 if (!RHSReg)
1251 return Register();
1252 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1253 ShiftVal, SetFlags, WantResult);
1254 if (ResultReg)
1255 return ResultReg;
1256 }
1257 }
1258
1259 // Check if the shift can be folded into the instruction.
1260 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1261 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1262 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1264 switch (SI->getOpcode()) {
1265 default: break;
1266 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1267 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1268 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1269 }
1270 uint64_t ShiftVal = C->getZExtValue();
1271 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1272 Register RHSReg = getRegForValue(SI->getOperand(0));
1273 if (!RHSReg)
1274 return Register();
1275 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1276 ShiftVal, SetFlags, WantResult);
1277 if (ResultReg)
1278 return ResultReg;
1279 }
1280 }
1281 }
1282 }
1283
1284 Register RHSReg = getRegForValue(RHS);
1285 if (!RHSReg)
1286 return Register();
1287
1288 if (NeedExtend)
1289 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1290
1291 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1292}
1293
1294Register AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
1295 Register RHSReg, bool SetFlags,
1296 bool WantResult) {
1297 assert(LHSReg && RHSReg && "Invalid register number.");
1298
1299 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1300 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1301 return Register();
1302
1303 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1304 return Register();
1305
1306 static const unsigned OpcTable[2][2][2] = {
1307 { { AArch64::SUBWrr, AArch64::SUBXrr },
1308 { AArch64::ADDWrr, AArch64::ADDXrr } },
1309 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1310 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1311 };
1312 bool Is64Bit = RetVT == MVT::i64;
1313 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1314 const TargetRegisterClass *RC =
1315 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1316 Register ResultReg;
1317 if (WantResult)
1318 ResultReg = createResultReg(RC);
1319 else
1320 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1321
1322 const MCInstrDesc &II = TII.get(Opc);
1323 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1324 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1325 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1326 .addReg(LHSReg)
1327 .addReg(RHSReg);
1328 return ResultReg;
1329}
1330
1331Register AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg,
1332 uint64_t Imm, bool SetFlags,
1333 bool WantResult) {
1334 assert(LHSReg && "Invalid register number.");
1335
1336 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1337 return Register();
1338
1339 unsigned ShiftImm;
1340 if (isUInt<12>(Imm))
1341 ShiftImm = 0;
1342 else if ((Imm & 0xfff000) == Imm) {
1343 ShiftImm = 12;
1344 Imm >>= 12;
1345 } else
1346 return Register();
1347
1348 static const unsigned OpcTable[2][2][2] = {
1349 { { AArch64::SUBWri, AArch64::SUBXri },
1350 { AArch64::ADDWri, AArch64::ADDXri } },
1351 { { AArch64::SUBSWri, AArch64::SUBSXri },
1352 { AArch64::ADDSWri, AArch64::ADDSXri } }
1353 };
1354 bool Is64Bit = RetVT == MVT::i64;
1355 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1356 const TargetRegisterClass *RC;
1357 if (SetFlags)
1358 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1359 else
1360 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1361 Register ResultReg;
1362 if (WantResult)
1363 ResultReg = createResultReg(RC);
1364 else
1365 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1366
1367 const MCInstrDesc &II = TII.get(Opc);
1368 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1369 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1370 .addReg(LHSReg)
1371 .addImm(Imm)
1372 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1373 return ResultReg;
1374}
1375
1376Register AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
1377 Register RHSReg,
1379 uint64_t ShiftImm, bool SetFlags,
1380 bool WantResult) {
1381 assert(LHSReg && RHSReg && "Invalid register number.");
1382 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1383 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1384
1385 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1386 return Register();
1387
1388 // Don't deal with undefined shifts.
1389 if (ShiftImm >= RetVT.getSizeInBits())
1390 return Register();
1391
1392 static const unsigned OpcTable[2][2][2] = {
1393 { { AArch64::SUBWrs, AArch64::SUBXrs },
1394 { AArch64::ADDWrs, AArch64::ADDXrs } },
1395 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1396 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1397 };
1398 bool Is64Bit = RetVT == MVT::i64;
1399 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1400 const TargetRegisterClass *RC =
1401 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1402 Register ResultReg;
1403 if (WantResult)
1404 ResultReg = createResultReg(RC);
1405 else
1406 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1407
1408 const MCInstrDesc &II = TII.get(Opc);
1409 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1410 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1411 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1412 .addReg(LHSReg)
1413 .addReg(RHSReg)
1414 .addImm(getShifterImm(ShiftType, ShiftImm));
1415 return ResultReg;
1416}
1417
1418Register AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
1419 Register RHSReg,
1421 uint64_t ShiftImm, bool SetFlags,
1422 bool WantResult) {
1423 assert(LHSReg && RHSReg && "Invalid register number.");
1424 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1425 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1426
1427 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1428 return Register();
1429
1430 if (ShiftImm >= 4)
1431 return Register();
1432
1433 static const unsigned OpcTable[2][2][2] = {
1434 { { AArch64::SUBWrx, AArch64::SUBXrx },
1435 { AArch64::ADDWrx, AArch64::ADDXrx } },
1436 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1437 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1438 };
1439 bool Is64Bit = RetVT == MVT::i64;
1440 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1441 const TargetRegisterClass *RC = nullptr;
1442 if (SetFlags)
1443 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1444 else
1445 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1446 Register ResultReg;
1447 if (WantResult)
1448 ResultReg = createResultReg(RC);
1449 else
1450 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1451
1452 const MCInstrDesc &II = TII.get(Opc);
1453 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1454 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1455 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1456 .addReg(LHSReg)
1457 .addReg(RHSReg)
1458 .addImm(getArithExtendImm(ExtType, ShiftImm));
1459 return ResultReg;
1460}
1461
1462bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1463 Type *Ty = LHS->getType();
1464 EVT EVT = TLI.getValueType(DL, Ty, true);
1465 if (!EVT.isSimple())
1466 return false;
1467 MVT VT = EVT.getSimpleVT();
1468
1469 switch (VT.SimpleTy) {
1470 default:
1471 return false;
1472 case MVT::i1:
1473 case MVT::i8:
1474 case MVT::i16:
1475 case MVT::i32:
1476 case MVT::i64:
1477 return emitICmp(VT, LHS, RHS, IsZExt);
1478 case MVT::f32:
1479 case MVT::f64:
1480 return emitFCmp(VT, LHS, RHS);
1481 }
1482}
1483
1484bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1485 bool IsZExt) {
1486 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1487 IsZExt)
1488 .isValid();
1489}
1490
1491bool AArch64FastISel::emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
1492 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1493 /*SetFlags=*/true, /*WantResult=*/false)
1494 .isValid();
1495}
1496
1497bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1498 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1499 return false;
1500
1501 // Check to see if the 2nd operand is a constant that we can encode directly
1502 // in the compare.
1503 bool UseImm = false;
1504 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1505 if (CFP->isZero() && !CFP->isNegative())
1506 UseImm = true;
1507
1508 Register LHSReg = getRegForValue(LHS);
1509 if (!LHSReg)
1510 return false;
1511
1512 if (UseImm) {
1513 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1514 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1515 .addReg(LHSReg);
1516 return true;
1517 }
1518
1519 Register RHSReg = getRegForValue(RHS);
1520 if (!RHSReg)
1521 return false;
1522
1523 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1524 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1525 .addReg(LHSReg)
1526 .addReg(RHSReg);
1527 return true;
1528}
1529
1530Register AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1531 bool SetFlags, bool WantResult, bool IsZExt) {
1532 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1533 IsZExt);
1534}
1535
1536/// This method is a wrapper to simplify add emission.
1537///
1538/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1539/// that fails, then try to materialize the immediate into a register and use
1540/// emitAddSub_rr instead.
1541Register AArch64FastISel::emitAdd_ri_(MVT VT, Register Op0, int64_t Imm) {
1542 Register ResultReg;
1543 if (Imm < 0)
1544 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1545 else
1546 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1547
1548 if (ResultReg)
1549 return ResultReg;
1550
1551 Register CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1552 if (!CReg)
1553 return Register();
1554
1555 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1556 return ResultReg;
1557}
1558
1559Register AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1560 bool SetFlags, bool WantResult, bool IsZExt) {
1561 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1562 IsZExt);
1563}
1564
1565Register AArch64FastISel::emitSubs_rr(MVT RetVT, Register LHSReg,
1566 Register RHSReg, bool WantResult) {
1567 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1568 /*SetFlags=*/true, WantResult);
1569}
1570
1571Register AArch64FastISel::emitSubs_rs(MVT RetVT, Register LHSReg,
1572 Register RHSReg,
1574 uint64_t ShiftImm, bool WantResult) {
1575 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1576 ShiftImm, /*SetFlags=*/true, WantResult);
1577}
1578
1579Register AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1580 const Value *LHS, const Value *RHS) {
1581 // Canonicalize immediates to the RHS first.
1583 std::swap(LHS, RHS);
1584
1585 // Canonicalize mul by power-of-2 to the RHS.
1586 if (LHS->hasOneUse() && isValueAvailable(LHS))
1587 if (isMulPowOf2(LHS))
1588 std::swap(LHS, RHS);
1589
1590 // Canonicalize shift immediate to the RHS.
1591 if (LHS->hasOneUse() && isValueAvailable(LHS))
1592 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1593 if (isa<ConstantInt>(SI->getOperand(1)))
1594 std::swap(LHS, RHS);
1595
1596 Register LHSReg = getRegForValue(LHS);
1597 if (!LHSReg)
1598 return Register();
1599
1600 Register ResultReg;
1601 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1602 uint64_t Imm = C->getZExtValue();
1603 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1604 }
1605 if (ResultReg)
1606 return ResultReg;
1607
1608 // Check if the mul can be folded into the instruction.
1609 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1610 if (isMulPowOf2(RHS)) {
1611 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1612 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1613
1614 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1615 if (C->getValue().isPowerOf2())
1616 std::swap(MulLHS, MulRHS);
1617
1618 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1619 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1620
1621 Register RHSReg = getRegForValue(MulLHS);
1622 if (!RHSReg)
1623 return Register();
1624 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1625 if (ResultReg)
1626 return ResultReg;
1627 }
1628 }
1629
1630 // Check if the shift can be folded into the instruction.
1631 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1632 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1633 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1634 uint64_t ShiftVal = C->getZExtValue();
1635 Register RHSReg = getRegForValue(SI->getOperand(0));
1636 if (!RHSReg)
1637 return Register();
1638 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1639 if (ResultReg)
1640 return ResultReg;
1641 }
1642 }
1643
1644 Register RHSReg = getRegForValue(RHS);
1645 if (!RHSReg)
1646 return Register();
1647
1648 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1649 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1650 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1651 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1652 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1653 }
1654 return ResultReg;
1655}
1656
1657Register AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1658 Register LHSReg, uint64_t Imm) {
1659 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1660 "ISD nodes are not consecutive!");
1661 static const unsigned OpcTable[3][2] = {
1662 { AArch64::ANDWri, AArch64::ANDXri },
1663 { AArch64::ORRWri, AArch64::ORRXri },
1664 { AArch64::EORWri, AArch64::EORXri }
1665 };
1666 const TargetRegisterClass *RC;
1667 unsigned Opc;
1668 unsigned RegSize;
1669 switch (RetVT.SimpleTy) {
1670 default:
1671 return Register();
1672 case MVT::i1:
1673 case MVT::i8:
1674 case MVT::i16:
1675 case MVT::i32: {
1676 unsigned Idx = ISDOpc - ISD::AND;
1677 Opc = OpcTable[Idx][0];
1678 RC = &AArch64::GPR32spRegClass;
1679 RegSize = 32;
1680 break;
1681 }
1682 case MVT::i64:
1683 Opc = OpcTable[ISDOpc - ISD::AND][1];
1684 RC = &AArch64::GPR64spRegClass;
1685 RegSize = 64;
1686 break;
1687 }
1688
1690 return Register();
1691
1692 Register ResultReg =
1693 fastEmitInst_ri(Opc, RC, LHSReg,
1695 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1696 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1697 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1698 }
1699 return ResultReg;
1700}
1701
1702Register AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1703 Register LHSReg, Register RHSReg,
1704 uint64_t ShiftImm) {
1705 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1706 "ISD nodes are not consecutive!");
1707 static const unsigned OpcTable[3][2] = {
1708 { AArch64::ANDWrs, AArch64::ANDXrs },
1709 { AArch64::ORRWrs, AArch64::ORRXrs },
1710 { AArch64::EORWrs, AArch64::EORXrs }
1711 };
1712
1713 // Don't deal with undefined shifts.
1714 if (ShiftImm >= RetVT.getSizeInBits())
1715 return Register();
1716
1717 const TargetRegisterClass *RC;
1718 unsigned Opc;
1719 switch (RetVT.SimpleTy) {
1720 default:
1721 return Register();
1722 case MVT::i1:
1723 case MVT::i8:
1724 case MVT::i16:
1725 case MVT::i32:
1726 Opc = OpcTable[ISDOpc - ISD::AND][0];
1727 RC = &AArch64::GPR32RegClass;
1728 break;
1729 case MVT::i64:
1730 Opc = OpcTable[ISDOpc - ISD::AND][1];
1731 RC = &AArch64::GPR64RegClass;
1732 break;
1733 }
1734 Register ResultReg =
1735 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1737 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1738 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1739 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1740 }
1741 return ResultReg;
1742}
1743
1744Register AArch64FastISel::emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
1745 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1746}
1747
1748Register AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1749 bool WantZExt, MachineMemOperand *MMO) {
1750 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1751 return Register();
1752
1753 // Simplify this down to something we can handle.
1754 if (!simplifyAddress(Addr, VT))
1755 return Register();
1756
1757 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1758 if (!ScaleFactor)
1759 llvm_unreachable("Unexpected value type.");
1760
1761 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1762 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1763 bool UseScaled = true;
1764 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1765 UseScaled = false;
1766 ScaleFactor = 1;
1767 }
1768
1769 static const unsigned GPOpcTable[2][8][4] = {
1770 // Sign-extend.
1771 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1772 AArch64::LDURXi },
1773 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1774 AArch64::LDURXi },
1775 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1776 AArch64::LDRXui },
1777 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1778 AArch64::LDRXui },
1779 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1780 AArch64::LDRXroX },
1781 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1782 AArch64::LDRXroX },
1783 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1784 AArch64::LDRXroW },
1785 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1786 AArch64::LDRXroW }
1787 },
1788 // Zero-extend.
1789 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1790 AArch64::LDURXi },
1791 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1792 AArch64::LDURXi },
1793 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1794 AArch64::LDRXui },
1795 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1796 AArch64::LDRXui },
1797 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1798 AArch64::LDRXroX },
1799 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1800 AArch64::LDRXroX },
1801 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1802 AArch64::LDRXroW },
1803 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1804 AArch64::LDRXroW }
1805 }
1806 };
1807
1808 static const unsigned FPOpcTable[4][2] = {
1809 { AArch64::LDURSi, AArch64::LDURDi },
1810 { AArch64::LDRSui, AArch64::LDRDui },
1811 { AArch64::LDRSroX, AArch64::LDRDroX },
1812 { AArch64::LDRSroW, AArch64::LDRDroW }
1813 };
1814
1815 unsigned Opc;
1816 const TargetRegisterClass *RC;
1817 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1818 Addr.getOffsetReg();
1819 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1820 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1821 Addr.getExtendType() == AArch64_AM::SXTW)
1822 Idx++;
1823
1824 bool IsRet64Bit = RetVT == MVT::i64;
1825 switch (VT.SimpleTy) {
1826 default:
1827 llvm_unreachable("Unexpected value type.");
1828 case MVT::i1: // Intentional fall-through.
1829 case MVT::i8:
1830 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1831 RC = (IsRet64Bit && !WantZExt) ?
1832 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1833 break;
1834 case MVT::i16:
1835 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1836 RC = (IsRet64Bit && !WantZExt) ?
1837 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1838 break;
1839 case MVT::i32:
1840 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1841 RC = (IsRet64Bit && !WantZExt) ?
1842 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1843 break;
1844 case MVT::i64:
1845 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1846 RC = &AArch64::GPR64RegClass;
1847 break;
1848 case MVT::f32:
1849 Opc = FPOpcTable[Idx][0];
1850 RC = &AArch64::FPR32RegClass;
1851 break;
1852 case MVT::f64:
1853 Opc = FPOpcTable[Idx][1];
1854 RC = &AArch64::FPR64RegClass;
1855 break;
1856 }
1857
1858 // Create the base instruction, then add the operands.
1859 Register ResultReg = createResultReg(RC);
1860 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1861 TII.get(Opc), ResultReg);
1862 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1863
1864 // Loading an i1 requires special handling.
1865 if (VT == MVT::i1) {
1866 Register ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1867 assert(ANDReg && "Unexpected AND instruction emission failure.");
1868 ResultReg = ANDReg;
1869 }
1870
1871 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1872 // the 32bit reg to a 64bit reg.
1873 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1874 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1875 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1876 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1877 .addImm(0)
1878 .addReg(ResultReg, getKillRegState(true))
1879 .addImm(AArch64::sub_32);
1880 ResultReg = Reg64;
1881 }
1882 return ResultReg;
1883}
1884
1885bool AArch64FastISel::selectAddSub(const Instruction *I) {
1886 MVT VT;
1887 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1888 return false;
1889
1890 if (VT.isVector())
1891 return selectOperator(I, I->getOpcode());
1892
1893 Register ResultReg;
1894 switch (I->getOpcode()) {
1895 default:
1896 llvm_unreachable("Unexpected instruction.");
1897 case Instruction::Add:
1898 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1899 break;
1900 case Instruction::Sub:
1901 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1902 break;
1903 }
1904 if (!ResultReg)
1905 return false;
1906
1907 updateValueMap(I, ResultReg);
1908 return true;
1909}
1910
1911bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1912 MVT VT;
1913 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1914 return false;
1915
1916 if (VT.isVector())
1917 return selectOperator(I, I->getOpcode());
1918
1919 Register ResultReg;
1920 switch (I->getOpcode()) {
1921 default:
1922 llvm_unreachable("Unexpected instruction.");
1923 case Instruction::And:
1924 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1925 break;
1926 case Instruction::Or:
1927 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1928 break;
1929 case Instruction::Xor:
1930 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1931 break;
1932 }
1933 if (!ResultReg)
1934 return false;
1935
1936 updateValueMap(I, ResultReg);
1937 return true;
1938}
1939
1940bool AArch64FastISel::selectLoad(const Instruction *I) {
1941 MVT VT;
1942 // Verify we have a legal type before going any further. Currently, we handle
1943 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1944 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1945 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1946 cast<LoadInst>(I)->isAtomic())
1947 return false;
1948
1949 const Value *SV = I->getOperand(0);
1950 if (TLI.supportSwiftError()) {
1951 // Swifterror values can come from either a function parameter with
1952 // swifterror attribute or an alloca with swifterror attribute.
1953 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1954 if (Arg->hasSwiftErrorAttr())
1955 return false;
1956 }
1957
1958 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1959 if (Alloca->isSwiftError())
1960 return false;
1961 }
1962 }
1963
1964 // See if we can handle this address.
1965 Address Addr;
1966 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1967 return false;
1968
1969 // Fold the following sign-/zero-extend into the load instruction.
1970 bool WantZExt = true;
1971 MVT RetVT = VT;
1972 const Value *IntExtVal = nullptr;
1973 if (I->hasOneUse()) {
1974 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1975 if (isTypeSupported(ZE->getType(), RetVT))
1976 IntExtVal = ZE;
1977 else
1978 RetVT = VT;
1979 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1980 if (isTypeSupported(SE->getType(), RetVT))
1981 IntExtVal = SE;
1982 else
1983 RetVT = VT;
1984 WantZExt = false;
1985 }
1986 }
1987
1988 Register ResultReg =
1989 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1990 if (!ResultReg)
1991 return false;
1992
1993 // There are a few different cases we have to handle, because the load or the
1994 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1995 // SelectionDAG. There is also an ordering issue when both instructions are in
1996 // different basic blocks.
1997 // 1.) The load instruction is selected by FastISel, but the integer extend
1998 // not. This usually happens when the integer extend is in a different
1999 // basic block and SelectionDAG took over for that basic block.
2000 // 2.) The load instruction is selected before the integer extend. This only
2001 // happens when the integer extend is in a different basic block.
2002 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2003 // by FastISel. This happens if there are instructions between the load
2004 // and the integer extend that couldn't be selected by FastISel.
2005 if (IntExtVal) {
2006 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2007 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2008 // it when it selects the integer extend.
2009 Register Reg = lookUpRegForValue(IntExtVal);
2010 auto *MI = MRI.getUniqueVRegDef(Reg);
2011 if (!MI) {
2012 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2013 if (WantZExt) {
2014 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2015 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2016 ResultReg = std::prev(I)->getOperand(0).getReg();
2017 removeDeadCode(I, std::next(I));
2018 } else
2019 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2020 AArch64::sub_32);
2021 }
2022 updateValueMap(I, ResultReg);
2023 return true;
2024 }
2025
2026 // The integer extend has already been emitted - delete all the instructions
2027 // that have been emitted by the integer extend lowering code and use the
2028 // result from the load instruction directly.
2029 while (MI) {
2030 Reg = 0;
2031 for (auto &Opnd : MI->uses()) {
2032 if (Opnd.isReg()) {
2033 Reg = Opnd.getReg();
2034 break;
2035 }
2036 }
2038 removeDeadCode(I, std::next(I));
2039 MI = nullptr;
2040 if (Reg)
2041 MI = MRI.getUniqueVRegDef(Reg);
2042 }
2043 updateValueMap(IntExtVal, ResultReg);
2044 return true;
2045 }
2046
2047 updateValueMap(I, ResultReg);
2048 return true;
2049}
2050
2051bool AArch64FastISel::emitStoreRelease(MVT VT, Register SrcReg,
2052 Register AddrReg,
2053 MachineMemOperand *MMO) {
2054 unsigned Opc;
2055 switch (VT.SimpleTy) {
2056 default: return false;
2057 case MVT::i8: Opc = AArch64::STLRB; break;
2058 case MVT::i16: Opc = AArch64::STLRH; break;
2059 case MVT::i32: Opc = AArch64::STLRW; break;
2060 case MVT::i64: Opc = AArch64::STLRX; break;
2061 }
2062
2063 const MCInstrDesc &II = TII.get(Opc);
2064 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2065 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2066 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2067 .addReg(SrcReg)
2068 .addReg(AddrReg)
2069 .addMemOperand(MMO);
2070 return true;
2071}
2072
2073bool AArch64FastISel::emitStore(MVT VT, Register SrcReg, Address Addr,
2074 MachineMemOperand *MMO) {
2075 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2076 return false;
2077
2078 // Simplify this down to something we can handle.
2079 if (!simplifyAddress(Addr, VT))
2080 return false;
2081
2082 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2083 if (!ScaleFactor)
2084 llvm_unreachable("Unexpected value type.");
2085
2086 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2087 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2088 bool UseScaled = true;
2089 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2090 UseScaled = false;
2091 ScaleFactor = 1;
2092 }
2093
2094 static const unsigned OpcTable[4][6] = {
2095 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2096 AArch64::STURSi, AArch64::STURDi },
2097 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2098 AArch64::STRSui, AArch64::STRDui },
2099 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2100 AArch64::STRSroX, AArch64::STRDroX },
2101 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2102 AArch64::STRSroW, AArch64::STRDroW }
2103 };
2104
2105 unsigned Opc;
2106 bool VTIsi1 = false;
2107 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2108 Addr.getOffsetReg();
2109 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2110 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2111 Addr.getExtendType() == AArch64_AM::SXTW)
2112 Idx++;
2113
2114 switch (VT.SimpleTy) {
2115 default: llvm_unreachable("Unexpected value type.");
2116 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2117 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2118 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2119 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2120 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2121 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2122 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2123 }
2124
2125 // Storing an i1 requires special handling.
2126 if (VTIsi1 && SrcReg != AArch64::WZR) {
2127 Register ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2128 assert(ANDReg && "Unexpected AND instruction emission failure.");
2129 SrcReg = ANDReg;
2130 }
2131 // Create the base instruction, then add the operands.
2132 const MCInstrDesc &II = TII.get(Opc);
2133 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2134 MachineInstrBuilder MIB =
2135 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2136 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2137
2138 return true;
2139}
2140
2141bool AArch64FastISel::selectStore(const Instruction *I) {
2142 MVT VT;
2143 const Value *Op0 = I->getOperand(0);
2144 // Verify we have a legal type before going any further. Currently, we handle
2145 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2146 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2147 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2148 return false;
2149
2150 const Value *PtrV = I->getOperand(1);
2151 if (TLI.supportSwiftError()) {
2152 // Swifterror values can come from either a function parameter with
2153 // swifterror attribute or an alloca with swifterror attribute.
2154 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2155 if (Arg->hasSwiftErrorAttr())
2156 return false;
2157 }
2158
2159 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2160 if (Alloca->isSwiftError())
2161 return false;
2162 }
2163 }
2164
2165 // Get the value to be stored into a register. Use the zero register directly
2166 // when possible to avoid an unnecessary copy and a wasted register.
2167 Register SrcReg;
2168 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2169 if (CI->isZero())
2170 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2171 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2172 if (CF->isZero() && !CF->isNegative()) {
2174 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2175 }
2176 }
2177
2178 if (!SrcReg)
2179 SrcReg = getRegForValue(Op0);
2180
2181 if (!SrcReg)
2182 return false;
2183
2184 auto *SI = cast<StoreInst>(I);
2185
2186 // Try to emit a STLR for seq_cst/release.
2187 if (SI->isAtomic()) {
2188 AtomicOrdering Ord = SI->getOrdering();
2189 // The non-atomic instructions are sufficient for relaxed stores.
2190 if (isReleaseOrStronger(Ord)) {
2191 // The STLR addressing mode only supports a base reg; pass that directly.
2192 Register AddrReg = getRegForValue(PtrV);
2193 if (!AddrReg)
2194 return false;
2195 return emitStoreRelease(VT, SrcReg, AddrReg,
2196 createMachineMemOperandFor(I));
2197 }
2198 }
2199
2200 // See if we can handle this address.
2201 Address Addr;
2202 if (!computeAddress(PtrV, Addr, Op0->getType()))
2203 return false;
2204
2205 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2206 return false;
2207 return true;
2208}
2209
2211 switch (Pred) {
2212 case CmpInst::FCMP_ONE:
2213 case CmpInst::FCMP_UEQ:
2214 default:
2215 // AL is our "false" for now. The other two need more compares.
2216 return AArch64CC::AL;
2217 case CmpInst::ICMP_EQ:
2218 case CmpInst::FCMP_OEQ:
2219 return AArch64CC::EQ;
2220 case CmpInst::ICMP_SGT:
2221 case CmpInst::FCMP_OGT:
2222 return AArch64CC::GT;
2223 case CmpInst::ICMP_SGE:
2224 case CmpInst::FCMP_OGE:
2225 return AArch64CC::GE;
2226 case CmpInst::ICMP_UGT:
2227 case CmpInst::FCMP_UGT:
2228 return AArch64CC::HI;
2229 case CmpInst::FCMP_OLT:
2230 return AArch64CC::MI;
2231 case CmpInst::ICMP_ULE:
2232 case CmpInst::FCMP_OLE:
2233 return AArch64CC::LS;
2234 case CmpInst::FCMP_ORD:
2235 return AArch64CC::VC;
2236 case CmpInst::FCMP_UNO:
2237 return AArch64CC::VS;
2238 case CmpInst::FCMP_UGE:
2239 return AArch64CC::PL;
2240 case CmpInst::ICMP_SLT:
2241 case CmpInst::FCMP_ULT:
2242 return AArch64CC::LT;
2243 case CmpInst::ICMP_SLE:
2244 case CmpInst::FCMP_ULE:
2245 return AArch64CC::LE;
2246 case CmpInst::FCMP_UNE:
2247 case CmpInst::ICMP_NE:
2248 return AArch64CC::NE;
2249 case CmpInst::ICMP_UGE:
2250 return AArch64CC::HS;
2251 case CmpInst::ICMP_ULT:
2252 return AArch64CC::LO;
2253 }
2254}
2255
2256/// Try to emit a combined compare-and-branch instruction.
2257bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2258 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2259 // will not be produced, as they are conditional branch instructions that do
2260 // not set flags.
2261 if (FuncInfo.MF->getFunction().hasFnAttribute(
2262 Attribute::SpeculativeLoadHardening))
2263 return false;
2264
2265 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2266 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2267 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2268
2269 const Value *LHS = CI->getOperand(0);
2270 const Value *RHS = CI->getOperand(1);
2271
2272 MVT VT;
2273 if (!isTypeSupported(LHS->getType(), VT))
2274 return false;
2275
2276 unsigned BW = VT.getSizeInBits();
2277 if (BW > 64)
2278 return false;
2279
2280 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2281 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2282
2283 // Try to take advantage of fallthrough opportunities.
2284 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2285 std::swap(TBB, FBB);
2287 }
2288
2289 int TestBit = -1;
2290 bool IsCmpNE;
2291 switch (Predicate) {
2292 default:
2293 return false;
2294 case CmpInst::ICMP_EQ:
2295 case CmpInst::ICMP_NE:
2296 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2297 std::swap(LHS, RHS);
2298
2299 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2300 return false;
2301
2302 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2303 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2304 const Value *AndLHS = AI->getOperand(0);
2305 const Value *AndRHS = AI->getOperand(1);
2306
2307 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2308 if (C->getValue().isPowerOf2())
2309 std::swap(AndLHS, AndRHS);
2310
2311 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2312 if (C->getValue().isPowerOf2()) {
2313 TestBit = C->getValue().logBase2();
2314 LHS = AndLHS;
2315 }
2316 }
2317
2318 if (VT == MVT::i1)
2319 TestBit = 0;
2320
2321 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2322 break;
2323 case CmpInst::ICMP_SLT:
2324 case CmpInst::ICMP_SGE:
2325 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2326 return false;
2327
2328 TestBit = BW - 1;
2329 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2330 break;
2331 case CmpInst::ICMP_SGT:
2332 case CmpInst::ICMP_SLE:
2333 if (!isa<ConstantInt>(RHS))
2334 return false;
2335
2336 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2337 return false;
2338
2339 TestBit = BW - 1;
2340 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2341 break;
2342 } // end switch
2343
2344 static const unsigned OpcTable[2][2][2] = {
2345 { {AArch64::CBZW, AArch64::CBZX },
2346 {AArch64::CBNZW, AArch64::CBNZX} },
2347 { {AArch64::TBZW, AArch64::TBZX },
2348 {AArch64::TBNZW, AArch64::TBNZX} }
2349 };
2350
2351 bool IsBitTest = TestBit != -1;
2352 bool Is64Bit = BW == 64;
2353 if (TestBit < 32 && TestBit >= 0)
2354 Is64Bit = false;
2355
2356 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2357 const MCInstrDesc &II = TII.get(Opc);
2358
2359 Register SrcReg = getRegForValue(LHS);
2360 if (!SrcReg)
2361 return false;
2362
2363 if (BW == 64 && !Is64Bit)
2364 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2365
2366 if ((BW < 32) && !IsBitTest)
2367 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2368
2369 // Emit the combined compare and branch instruction.
2370 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2371 MachineInstrBuilder MIB =
2372 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2373 .addReg(SrcReg);
2374 if (IsBitTest)
2375 MIB.addImm(TestBit);
2376 MIB.addMBB(TBB);
2377
2378 finishCondBranch(BI->getParent(), TBB, FBB);
2379 return true;
2380}
2381
2382bool AArch64FastISel::selectBranch(const Instruction *I) {
2383 const BranchInst *BI = cast<BranchInst>(I);
2384 if (BI->isUnconditional()) {
2385 MachineBasicBlock *MSucc = FuncInfo.getMBB(BI->getSuccessor(0));
2386 fastEmitBranch(MSucc, BI->getDebugLoc());
2387 return true;
2388 }
2389
2390 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2391 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2392
2393 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2394 if (CI->hasOneUse() && isValueAvailable(CI)) {
2395 // Try to optimize or fold the cmp.
2396 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2397 switch (Predicate) {
2398 default:
2399 break;
2401 fastEmitBranch(FBB, MIMD.getDL());
2402 return true;
2403 case CmpInst::FCMP_TRUE:
2404 fastEmitBranch(TBB, MIMD.getDL());
2405 return true;
2406 }
2407
2408 // Try to emit a combined compare-and-branch first.
2409 if (emitCompareAndBranch(BI))
2410 return true;
2411
2412 // Try to take advantage of fallthrough opportunities.
2413 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2414 std::swap(TBB, FBB);
2416 }
2417
2418 // Emit the cmp.
2419 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2420 return false;
2421
2422 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2423 // instruction.
2424 AArch64CC::CondCode CC = getCompareCC(Predicate);
2426 switch (Predicate) {
2427 default:
2428 break;
2429 case CmpInst::FCMP_UEQ:
2430 ExtraCC = AArch64CC::EQ;
2431 CC = AArch64CC::VS;
2432 break;
2433 case CmpInst::FCMP_ONE:
2434 ExtraCC = AArch64CC::MI;
2435 CC = AArch64CC::GT;
2436 break;
2437 }
2438 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2439
2440 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2441 if (ExtraCC != AArch64CC::AL) {
2442 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2443 .addImm(ExtraCC)
2444 .addMBB(TBB);
2445 }
2446
2447 // Emit the branch.
2448 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2449 .addImm(CC)
2450 .addMBB(TBB);
2451
2452 finishCondBranch(BI->getParent(), TBB, FBB);
2453 return true;
2454 }
2455 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2456 uint64_t Imm = CI->getZExtValue();
2457 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2458 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2459 .addMBB(Target);
2460
2461 // Obtain the branch probability and add the target to the successor list.
2462 if (FuncInfo.BPI) {
2463 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2464 BI->getParent(), Target->getBasicBlock());
2465 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2466 } else
2467 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2468 return true;
2469 } else {
2471 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2472 // Fake request the condition, otherwise the intrinsic might be completely
2473 // optimized away.
2474 Register CondReg = getRegForValue(BI->getCondition());
2475 if (!CondReg)
2476 return false;
2477
2478 // Emit the branch.
2479 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2480 .addImm(CC)
2481 .addMBB(TBB);
2482
2483 finishCondBranch(BI->getParent(), TBB, FBB);
2484 return true;
2485 }
2486 }
2487
2488 Register CondReg = getRegForValue(BI->getCondition());
2489 if (!CondReg)
2490 return false;
2491
2492 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2493 unsigned Opcode = AArch64::TBNZW;
2494 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2495 std::swap(TBB, FBB);
2496 Opcode = AArch64::TBZW;
2497 }
2498
2499 const MCInstrDesc &II = TII.get(Opcode);
2500 Register ConstrainedCondReg
2501 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2502 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2503 .addReg(ConstrainedCondReg)
2504 .addImm(0)
2505 .addMBB(TBB);
2506
2507 finishCondBranch(BI->getParent(), TBB, FBB);
2508 return true;
2509}
2510
2511bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2512 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2513 Register AddrReg = getRegForValue(BI->getOperand(0));
2514 if (!AddrReg)
2515 return false;
2516
2517 // Authenticated indirectbr is not implemented yet.
2518 if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos"))
2519 return false;
2520
2521 // Emit the indirect branch.
2522 const MCInstrDesc &II = TII.get(AArch64::BR);
2523 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2524 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2525
2526 // Make sure the CFG is up-to-date.
2527 for (const auto *Succ : BI->successors())
2528 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(Succ));
2529
2530 return true;
2531}
2532
2533bool AArch64FastISel::selectCmp(const Instruction *I) {
2534 const CmpInst *CI = cast<CmpInst>(I);
2535
2536 // Vectors of i1 are weird: bail out.
2537 if (CI->getType()->isVectorTy())
2538 return false;
2539
2540 // Try to optimize or fold the cmp.
2541 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2542 Register ResultReg;
2543 switch (Predicate) {
2544 default:
2545 break;
2547 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2548 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2549 TII.get(TargetOpcode::COPY), ResultReg)
2550 .addReg(AArch64::WZR, getKillRegState(true));
2551 break;
2552 case CmpInst::FCMP_TRUE:
2553 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2554 break;
2555 }
2556
2557 if (ResultReg) {
2558 updateValueMap(I, ResultReg);
2559 return true;
2560 }
2561
2562 // Emit the cmp.
2563 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2564 return false;
2565
2566 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2567
2568 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2569 // condition codes are inverted, because they are used by CSINC.
2570 static unsigned CondCodeTable[2][2] = {
2573 };
2574 unsigned *CondCodes = nullptr;
2575 switch (Predicate) {
2576 default:
2577 break;
2578 case CmpInst::FCMP_UEQ:
2579 CondCodes = &CondCodeTable[0][0];
2580 break;
2581 case CmpInst::FCMP_ONE:
2582 CondCodes = &CondCodeTable[1][0];
2583 break;
2584 }
2585
2586 if (CondCodes) {
2587 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2588 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2589 TmpReg1)
2590 .addReg(AArch64::WZR, getKillRegState(true))
2591 .addReg(AArch64::WZR, getKillRegState(true))
2592 .addImm(CondCodes[0]);
2593 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2594 ResultReg)
2595 .addReg(TmpReg1, getKillRegState(true))
2596 .addReg(AArch64::WZR, getKillRegState(true))
2597 .addImm(CondCodes[1]);
2598
2599 updateValueMap(I, ResultReg);
2600 return true;
2601 }
2602
2603 // Now set a register based on the comparison.
2604 AArch64CC::CondCode CC = getCompareCC(Predicate);
2605 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2606 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2607 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2608 ResultReg)
2609 .addReg(AArch64::WZR, getKillRegState(true))
2610 .addReg(AArch64::WZR, getKillRegState(true))
2611 .addImm(invertedCC);
2612
2613 updateValueMap(I, ResultReg);
2614 return true;
2615}
2616
2617/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2618/// value.
2619bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2620 if (!SI->getType()->isIntegerTy(1))
2621 return false;
2622
2623 const Value *Src1Val, *Src2Val;
2624 unsigned Opc = 0;
2625 bool NeedExtraOp = false;
2626 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2627 if (CI->isOne()) {
2628 Src1Val = SI->getCondition();
2629 Src2Val = SI->getFalseValue();
2630 Opc = AArch64::ORRWrr;
2631 } else {
2632 assert(CI->isZero());
2633 Src1Val = SI->getFalseValue();
2634 Src2Val = SI->getCondition();
2635 Opc = AArch64::BICWrr;
2636 }
2637 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2638 if (CI->isOne()) {
2639 Src1Val = SI->getCondition();
2640 Src2Val = SI->getTrueValue();
2641 Opc = AArch64::ORRWrr;
2642 NeedExtraOp = true;
2643 } else {
2644 assert(CI->isZero());
2645 Src1Val = SI->getCondition();
2646 Src2Val = SI->getTrueValue();
2647 Opc = AArch64::ANDWrr;
2648 }
2649 }
2650
2651 if (!Opc)
2652 return false;
2653
2654 Register Src1Reg = getRegForValue(Src1Val);
2655 if (!Src1Reg)
2656 return false;
2657
2658 Register Src2Reg = getRegForValue(Src2Val);
2659 if (!Src2Reg)
2660 return false;
2661
2662 if (NeedExtraOp)
2663 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2664
2665 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2666 Src2Reg);
2667 updateValueMap(SI, ResultReg);
2668 return true;
2669}
2670
2671bool AArch64FastISel::selectSelect(const Instruction *I) {
2672 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2673 MVT VT;
2674 if (!isTypeSupported(I->getType(), VT))
2675 return false;
2676
2677 unsigned Opc;
2678 const TargetRegisterClass *RC;
2679 switch (VT.SimpleTy) {
2680 default:
2681 return false;
2682 case MVT::i1:
2683 case MVT::i8:
2684 case MVT::i16:
2685 case MVT::i32:
2686 Opc = AArch64::CSELWr;
2687 RC = &AArch64::GPR32RegClass;
2688 break;
2689 case MVT::i64:
2690 Opc = AArch64::CSELXr;
2691 RC = &AArch64::GPR64RegClass;
2692 break;
2693 case MVT::f32:
2694 Opc = AArch64::FCSELSrrr;
2695 RC = &AArch64::FPR32RegClass;
2696 break;
2697 case MVT::f64:
2698 Opc = AArch64::FCSELDrrr;
2699 RC = &AArch64::FPR64RegClass;
2700 break;
2701 }
2702
2703 const SelectInst *SI = cast<SelectInst>(I);
2704 const Value *Cond = SI->getCondition();
2707
2708 if (optimizeSelect(SI))
2709 return true;
2710
2711 // Try to pickup the flags, so we don't have to emit another compare.
2712 if (foldXALUIntrinsic(CC, I, Cond)) {
2713 // Fake request the condition to force emission of the XALU intrinsic.
2714 Register CondReg = getRegForValue(Cond);
2715 if (!CondReg)
2716 return false;
2717 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2718 isValueAvailable(Cond)) {
2719 const auto *Cmp = cast<CmpInst>(Cond);
2720 // Try to optimize or fold the cmp.
2721 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2722 const Value *FoldSelect = nullptr;
2723 switch (Predicate) {
2724 default:
2725 break;
2727 FoldSelect = SI->getFalseValue();
2728 break;
2729 case CmpInst::FCMP_TRUE:
2730 FoldSelect = SI->getTrueValue();
2731 break;
2732 }
2733
2734 if (FoldSelect) {
2735 Register SrcReg = getRegForValue(FoldSelect);
2736 if (!SrcReg)
2737 return false;
2738
2739 updateValueMap(I, SrcReg);
2740 return true;
2741 }
2742
2743 // Emit the cmp.
2744 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2745 return false;
2746
2747 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2748 CC = getCompareCC(Predicate);
2749 switch (Predicate) {
2750 default:
2751 break;
2752 case CmpInst::FCMP_UEQ:
2753 ExtraCC = AArch64CC::EQ;
2754 CC = AArch64CC::VS;
2755 break;
2756 case CmpInst::FCMP_ONE:
2757 ExtraCC = AArch64CC::MI;
2758 CC = AArch64CC::GT;
2759 break;
2760 }
2761 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2762 } else {
2763 Register CondReg = getRegForValue(Cond);
2764 if (!CondReg)
2765 return false;
2766
2767 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2768 CondReg = constrainOperandRegClass(II, CondReg, 1);
2769
2770 // Emit a TST instruction (ANDS wzr, reg, #imm).
2771 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2772 AArch64::WZR)
2773 .addReg(CondReg)
2775 }
2776
2777 Register Src1Reg = getRegForValue(SI->getTrueValue());
2778 Register Src2Reg = getRegForValue(SI->getFalseValue());
2779
2780 if (!Src1Reg || !Src2Reg)
2781 return false;
2782
2783 if (ExtraCC != AArch64CC::AL)
2784 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2785
2786 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2787 updateValueMap(I, ResultReg);
2788 return true;
2789}
2790
2791bool AArch64FastISel::selectFPExt(const Instruction *I) {
2792 Value *V = I->getOperand(0);
2793 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2794 return false;
2795
2796 Register Op = getRegForValue(V);
2797 if (Op == 0)
2798 return false;
2799
2800 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2801 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2802 ResultReg).addReg(Op);
2803 updateValueMap(I, ResultReg);
2804 return true;
2805}
2806
2807bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2808 Value *V = I->getOperand(0);
2809 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2810 return false;
2811
2812 Register Op = getRegForValue(V);
2813 if (Op == 0)
2814 return false;
2815
2816 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2817 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2818 ResultReg).addReg(Op);
2819 updateValueMap(I, ResultReg);
2820 return true;
2821}
2822
2823// FPToUI and FPToSI
2824bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2825 MVT DestVT;
2826 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2827 return false;
2828
2829 Register SrcReg = getRegForValue(I->getOperand(0));
2830 if (!SrcReg)
2831 return false;
2832
2833 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2834 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2835 return false;
2836
2837 unsigned Opc;
2838 if (SrcVT == MVT::f64) {
2839 if (Signed)
2840 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2841 else
2842 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2843 } else {
2844 if (Signed)
2845 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2846 else
2847 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2848 }
2849 Register ResultReg = createResultReg(
2850 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2851 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2852 .addReg(SrcReg);
2853 updateValueMap(I, ResultReg);
2854 return true;
2855}
2856
2857bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2858 MVT DestVT;
2859 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2860 return false;
2861 // Let regular ISEL handle FP16
2862 if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2863 return false;
2864
2865 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2866 "Unexpected value type.");
2867
2868 Register SrcReg = getRegForValue(I->getOperand(0));
2869 if (!SrcReg)
2870 return false;
2871
2872 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2873
2874 // Handle sign-extension.
2875 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2876 SrcReg =
2877 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2878 if (!SrcReg)
2879 return false;
2880 }
2881
2882 unsigned Opc;
2883 if (SrcVT == MVT::i64) {
2884 if (Signed)
2885 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2886 else
2887 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2888 } else {
2889 if (Signed)
2890 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2891 else
2892 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2893 }
2894
2895 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2896 updateValueMap(I, ResultReg);
2897 return true;
2898}
2899
2900bool AArch64FastISel::fastLowerArguments() {
2901 if (!FuncInfo.CanLowerReturn)
2902 return false;
2903
2904 const Function *F = FuncInfo.Fn;
2905 if (F->isVarArg())
2906 return false;
2907
2908 CallingConv::ID CC = F->getCallingConv();
2909 if (CC != CallingConv::C && CC != CallingConv::Swift)
2910 return false;
2911
2912 if (Subtarget->hasCustomCallingConv())
2913 return false;
2914
2915 // Only handle simple cases of up to 8 GPR and FPR each.
2916 unsigned GPRCnt = 0;
2917 unsigned FPRCnt = 0;
2918 for (auto const &Arg : F->args()) {
2919 if (Arg.hasAttribute(Attribute::ByVal) ||
2920 Arg.hasAttribute(Attribute::InReg) ||
2921 Arg.hasAttribute(Attribute::StructRet) ||
2922 Arg.hasAttribute(Attribute::SwiftSelf) ||
2923 Arg.hasAttribute(Attribute::SwiftAsync) ||
2924 Arg.hasAttribute(Attribute::SwiftError) ||
2925 Arg.hasAttribute(Attribute::Nest))
2926 return false;
2927
2928 Type *ArgTy = Arg.getType();
2929 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2930 return false;
2931
2932 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2933 if (!ArgVT.isSimple())
2934 return false;
2935
2936 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2937 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2938 return false;
2939
2940 if (VT.isVector() &&
2941 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2942 return false;
2943
2944 if (VT >= MVT::i1 && VT <= MVT::i64)
2945 ++GPRCnt;
2946 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2947 VT.is128BitVector())
2948 ++FPRCnt;
2949 else
2950 return false;
2951
2952 if (GPRCnt > 8 || FPRCnt > 8)
2953 return false;
2954 }
2955
2956 static const MCPhysReg Registers[6][8] = {
2957 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2958 AArch64::W5, AArch64::W6, AArch64::W7 },
2959 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2960 AArch64::X5, AArch64::X6, AArch64::X7 },
2961 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2962 AArch64::H5, AArch64::H6, AArch64::H7 },
2963 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2964 AArch64::S5, AArch64::S6, AArch64::S7 },
2965 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2966 AArch64::D5, AArch64::D6, AArch64::D7 },
2967 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2968 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2969 };
2970
2971 unsigned GPRIdx = 0;
2972 unsigned FPRIdx = 0;
2973 for (auto const &Arg : F->args()) {
2974 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2975 unsigned SrcReg;
2976 const TargetRegisterClass *RC;
2977 if (VT >= MVT::i1 && VT <= MVT::i32) {
2978 SrcReg = Registers[0][GPRIdx++];
2979 RC = &AArch64::GPR32RegClass;
2980 VT = MVT::i32;
2981 } else if (VT == MVT::i64) {
2982 SrcReg = Registers[1][GPRIdx++];
2983 RC = &AArch64::GPR64RegClass;
2984 } else if (VT == MVT::f16 || VT == MVT::bf16) {
2985 SrcReg = Registers[2][FPRIdx++];
2986 RC = &AArch64::FPR16RegClass;
2987 } else if (VT == MVT::f32) {
2988 SrcReg = Registers[3][FPRIdx++];
2989 RC = &AArch64::FPR32RegClass;
2990 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2991 SrcReg = Registers[4][FPRIdx++];
2992 RC = &AArch64::FPR64RegClass;
2993 } else if (VT.is128BitVector()) {
2994 SrcReg = Registers[5][FPRIdx++];
2995 RC = &AArch64::FPR128RegClass;
2996 } else
2997 llvm_unreachable("Unexpected value type.");
2998
2999 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3000 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3001 // Without this, EmitLiveInCopies may eliminate the livein if its only
3002 // use is a bitcast (which isn't turned into an instruction).
3003 Register ResultReg = createResultReg(RC);
3004 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3005 TII.get(TargetOpcode::COPY), ResultReg)
3006 .addReg(DstReg, getKillRegState(true));
3007 updateValueMap(&Arg, ResultReg);
3008 }
3009 return true;
3010}
3011
3012bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3013 SmallVectorImpl<MVT> &OutVTs,
3014 SmallVectorImpl<Type *> &OrigTys,
3015 unsigned &NumBytes) {
3016 CallingConv::ID CC = CLI.CallConv;
3018 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3019 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, OrigTys,
3020 CCAssignFnForCall(CC));
3021
3022 // Get a count of how many bytes are to be pushed on the stack.
3023 NumBytes = CCInfo.getStackSize();
3024
3025 // Issue CALLSEQ_START
3026 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3027 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3028 .addImm(NumBytes).addImm(0);
3029
3030 // Process the args.
3031 for (CCValAssign &VA : ArgLocs) {
3032 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3033 MVT ArgVT = OutVTs[VA.getValNo()];
3034
3035 Register ArgReg = getRegForValue(ArgVal);
3036 if (!ArgReg)
3037 return false;
3038
3039 // Handle arg promotion: SExt, ZExt, AExt.
3040 switch (VA.getLocInfo()) {
3041 case CCValAssign::Full:
3042 break;
3043 case CCValAssign::SExt: {
3044 MVT DestVT = VA.getLocVT();
3045 MVT SrcVT = ArgVT;
3046 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3047 if (!ArgReg)
3048 return false;
3049 break;
3050 }
3051 case CCValAssign::AExt:
3052 // Intentional fall-through.
3053 case CCValAssign::ZExt: {
3054 MVT DestVT = VA.getLocVT();
3055 MVT SrcVT = ArgVT;
3056 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3057 if (!ArgReg)
3058 return false;
3059 break;
3060 }
3061 default:
3062 llvm_unreachable("Unknown arg promotion!");
3063 }
3064
3065 // Now copy/store arg to correct locations.
3066 if (VA.isRegLoc() && !VA.needsCustom()) {
3067 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3068 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3069 CLI.OutRegs.push_back(VA.getLocReg());
3070 } else if (VA.needsCustom()) {
3071 // FIXME: Handle custom args.
3072 return false;
3073 } else {
3074 assert(VA.isMemLoc() && "Assuming store on stack.");
3075
3076 // Don't emit stores for undef values.
3077 if (isa<UndefValue>(ArgVal))
3078 continue;
3079
3080 // Need to store on the stack.
3081 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3082
3083 unsigned BEAlign = 0;
3084 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3085 BEAlign = 8 - ArgSize;
3086
3087 Address Addr;
3088 Addr.setKind(Address::RegBase);
3089 Addr.setReg(AArch64::SP);
3090 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3091
3092 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3093 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3094 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3095 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3096
3097 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3098 return false;
3099 }
3100 }
3101 return true;
3102}
3103
3104bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3105 CallingConv::ID CC = CLI.CallConv;
3106
3107 // Issue CALLSEQ_END
3108 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3109 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3110 .addImm(NumBytes).addImm(0);
3111
3112 // Now the return values.
3114 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3115 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3116
3117 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3118 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3119 CCValAssign &VA = RVLocs[i];
3120 MVT CopyVT = VA.getValVT();
3121 Register CopyReg = ResultReg + i;
3122
3123 // TODO: Handle big-endian results
3124 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3125 return false;
3126
3127 // Copy result out of their specified physreg.
3128 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3129 CopyReg)
3130 .addReg(VA.getLocReg());
3131 CLI.InRegs.push_back(VA.getLocReg());
3132 }
3133
3134 CLI.ResultReg = ResultReg;
3135 CLI.NumResultRegs = RVLocs.size();
3136
3137 return true;
3138}
3139
3140bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3141 CallingConv::ID CC = CLI.CallConv;
3142 bool IsTailCall = CLI.IsTailCall;
3143 bool IsVarArg = CLI.IsVarArg;
3144 const Value *Callee = CLI.Callee;
3145 MCSymbol *Symbol = CLI.Symbol;
3146
3147 if (!Callee && !Symbol)
3148 return false;
3149
3150 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3151 // a bti instruction following the call.
3152 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3153 !Subtarget->noBTIAtReturnTwice() &&
3154 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3155 return false;
3156
3157 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3158 if (CLI.CB && CLI.CB->isIndirectCall() &&
3159 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3160 return false;
3161
3162 // Allow SelectionDAG isel to handle clang.arc.attachedcall operand bundle.
3163 if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB))
3164 return false;
3165
3166 // Allow SelectionDAG isel to handle tail calls.
3167 if (IsTailCall)
3168 return false;
3169
3170 // FIXME: we could and should support this, but for now correctness at -O0 is
3171 // more important.
3172 if (Subtarget->isTargetILP32())
3173 return false;
3174
3175 CodeModel::Model CM = TM.getCodeModel();
3176 // Only support the small-addressing and large code models.
3177 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3178 return false;
3179
3180 // FIXME: Add large code model support for ELF.
3181 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3182 return false;
3183
3184 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3185 // attribute. Check "RtLibUseGOT" instead.
3186 if (MF->getFunction().getParent()->getRtLibUseGOT())
3187 return false;
3188
3189 // Let SDISel handle vararg functions.
3190 if (IsVarArg)
3191 return false;
3192
3193 if (Subtarget->isWindowsArm64EC())
3194 return false;
3195
3196 for (auto Flag : CLI.OutFlags)
3197 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3198 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3199 return false;
3200
3201 // Set up the argument vectors.
3202 SmallVector<MVT, 16> OutVTs;
3204 OutVTs.reserve(CLI.OutVals.size());
3205
3206 for (auto *Val : CLI.OutVals) {
3207 MVT VT;
3208 if (!isTypeLegal(Val->getType(), VT) &&
3209 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3210 return false;
3211
3212 // We don't handle vector parameters yet.
3213 if (VT.isVector() || VT.getSizeInBits() > 64)
3214 return false;
3215
3216 OutVTs.push_back(VT);
3217 OrigTys.push_back(Val->getType());
3218 }
3219
3220 Address Addr;
3221 if (Callee && !computeCallAddress(Callee, Addr))
3222 return false;
3223
3224 // The weak function target may be zero; in that case we must use indirect
3225 // addressing via a stub on windows as it may be out of range for a
3226 // PC-relative jump.
3227 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3228 Addr.getGlobalValue()->hasExternalWeakLinkage())
3229 return false;
3230
3231 // Handle the arguments now that we've gotten them.
3232 unsigned NumBytes;
3233 if (!processCallArgs(CLI, OutVTs, OrigTys, NumBytes))
3234 return false;
3235
3236 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3237 if (RegInfo->isAnyArgRegReserved(*MF))
3238 RegInfo->emitReservedArgRegCallError(*MF);
3239
3240 // Issue the call.
3241 MachineInstrBuilder MIB;
3242 if (Subtarget->useSmallAddressing()) {
3243 const MCInstrDesc &II =
3244 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3245 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3246 if (Symbol)
3247 MIB.addSym(Symbol, 0);
3248 else if (Addr.getGlobalValue())
3249 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3250 else if (Addr.getReg()) {
3251 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3252 MIB.addReg(Reg);
3253 } else
3254 return false;
3255 } else {
3256 Register CallReg;
3257 if (Symbol) {
3258 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3259 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3260 ADRPReg)
3262
3263 CallReg = createResultReg(&AArch64::GPR64RegClass);
3264 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3265 TII.get(AArch64::LDRXui), CallReg)
3266 .addReg(ADRPReg)
3267 .addSym(Symbol,
3269 } else if (Addr.getGlobalValue())
3270 CallReg = materializeGV(Addr.getGlobalValue());
3271 else if (Addr.getReg())
3272 CallReg = Addr.getReg();
3273
3274 if (!CallReg)
3275 return false;
3276
3277 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3278 CallReg = constrainOperandRegClass(II, CallReg, 0);
3279 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3280 }
3281
3282 // Add implicit physical register uses to the call.
3283 for (auto Reg : CLI.OutRegs)
3285
3286 // Add a register mask with the call-preserved registers.
3287 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3288 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3289
3290 CLI.Call = MIB;
3291
3292 // Finish off the call including any return values.
3293 return finishCall(CLI, NumBytes);
3294}
3295
3296bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3297 if (Alignment)
3298 return Len / Alignment->value() <= 4;
3299 else
3300 return Len < 32;
3301}
3302
3303bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3304 uint64_t Len, MaybeAlign Alignment) {
3305 // Make sure we don't bloat code by inlining very large memcpy's.
3306 if (!isMemCpySmall(Len, Alignment))
3307 return false;
3308
3309 int64_t UnscaledOffset = 0;
3310 Address OrigDest = Dest;
3311 Address OrigSrc = Src;
3312
3313 while (Len) {
3314 MVT VT;
3315 if (!Alignment || *Alignment >= 8) {
3316 if (Len >= 8)
3317 VT = MVT::i64;
3318 else if (Len >= 4)
3319 VT = MVT::i32;
3320 else if (Len >= 2)
3321 VT = MVT::i16;
3322 else {
3323 VT = MVT::i8;
3324 }
3325 } else {
3326 assert(Alignment && "Alignment is set in this branch");
3327 // Bound based on alignment.
3328 if (Len >= 4 && *Alignment == 4)
3329 VT = MVT::i32;
3330 else if (Len >= 2 && *Alignment == 2)
3331 VT = MVT::i16;
3332 else {
3333 VT = MVT::i8;
3334 }
3335 }
3336
3337 Register ResultReg = emitLoad(VT, VT, Src);
3338 if (!ResultReg)
3339 return false;
3340
3341 if (!emitStore(VT, ResultReg, Dest))
3342 return false;
3343
3344 int64_t Size = VT.getSizeInBits() / 8;
3345 Len -= Size;
3346 UnscaledOffset += Size;
3347
3348 // We need to recompute the unscaled offset for each iteration.
3349 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3350 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3351 }
3352
3353 return true;
3354}
3355
3356/// Check if it is possible to fold the condition from the XALU intrinsic
3357/// into the user. The condition code will only be updated on success.
3358bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3359 const Instruction *I,
3360 const Value *Cond) {
3362 return false;
3363
3364 const auto *EV = cast<ExtractValueInst>(Cond);
3365 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3366 return false;
3367
3368 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3369 MVT RetVT;
3370 const Function *Callee = II->getCalledFunction();
3371 Type *RetTy =
3372 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3373 if (!isTypeLegal(RetTy, RetVT))
3374 return false;
3375
3376 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3377 return false;
3378
3379 const Value *LHS = II->getArgOperand(0);
3380 const Value *RHS = II->getArgOperand(1);
3381
3382 // Canonicalize immediate to the RHS.
3383 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3384 std::swap(LHS, RHS);
3385
3386 // Simplify multiplies.
3387 Intrinsic::ID IID = II->getIntrinsicID();
3388 switch (IID) {
3389 default:
3390 break;
3391 case Intrinsic::smul_with_overflow:
3392 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3393 if (C->getValue() == 2)
3394 IID = Intrinsic::sadd_with_overflow;
3395 break;
3396 case Intrinsic::umul_with_overflow:
3397 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3398 if (C->getValue() == 2)
3399 IID = Intrinsic::uadd_with_overflow;
3400 break;
3401 }
3402
3403 AArch64CC::CondCode TmpCC;
3404 switch (IID) {
3405 default:
3406 return false;
3407 case Intrinsic::sadd_with_overflow:
3408 case Intrinsic::ssub_with_overflow:
3409 TmpCC = AArch64CC::VS;
3410 break;
3411 case Intrinsic::uadd_with_overflow:
3412 TmpCC = AArch64CC::HS;
3413 break;
3414 case Intrinsic::usub_with_overflow:
3415 TmpCC = AArch64CC::LO;
3416 break;
3417 case Intrinsic::smul_with_overflow:
3418 case Intrinsic::umul_with_overflow:
3419 TmpCC = AArch64CC::NE;
3420 break;
3421 }
3422
3423 // Check if both instructions are in the same basic block.
3424 if (!isValueAvailable(II))
3425 return false;
3426
3427 // Make sure nothing is in the way
3430 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3431 // We only expect extractvalue instructions between the intrinsic and the
3432 // instruction to be selected.
3433 if (!isa<ExtractValueInst>(Itr))
3434 return false;
3435
3436 // Check that the extractvalue operand comes from the intrinsic.
3437 const auto *EVI = cast<ExtractValueInst>(Itr);
3438 if (EVI->getAggregateOperand() != II)
3439 return false;
3440 }
3441
3442 CC = TmpCC;
3443 return true;
3444}
3445
3446bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3447 // FIXME: Handle more intrinsics.
3448 switch (II->getIntrinsicID()) {
3449 default: return false;
3450 case Intrinsic::frameaddress: {
3451 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3452 MFI.setFrameAddressIsTaken(true);
3453
3454 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3455 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3456 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3457 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3458 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3459 // Recursively load frame address
3460 // ldr x0, [fp]
3461 // ldr x0, [x0]
3462 // ldr x0, [x0]
3463 // ...
3464 Register DestReg;
3465 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3466 while (Depth--) {
3467 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3468 SrcReg, 0);
3469 assert(DestReg && "Unexpected LDR instruction emission failure.");
3470 SrcReg = DestReg;
3471 }
3472
3473 updateValueMap(II, SrcReg);
3474 return true;
3475 }
3476 case Intrinsic::sponentry: {
3477 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3478
3479 // SP = FP + Fixed Object + 16
3480 int FI = MFI.CreateFixedObject(4, 0, false);
3481 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3482 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3483 TII.get(AArch64::ADDXri), ResultReg)
3484 .addFrameIndex(FI)
3485 .addImm(0)
3486 .addImm(0);
3487
3488 updateValueMap(II, ResultReg);
3489 return true;
3490 }
3491 case Intrinsic::memcpy:
3492 case Intrinsic::memmove: {
3493 const auto *MTI = cast<MemTransferInst>(II);
3494 // Don't handle volatile.
3495 if (MTI->isVolatile())
3496 return false;
3497
3498 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3499 // we would emit dead code because we don't currently handle memmoves.
3500 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3501 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3502 // Small memcpy's are common enough that we want to do them without a call
3503 // if possible.
3504 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3505 MaybeAlign Alignment;
3506 if (MTI->getDestAlign() || MTI->getSourceAlign())
3507 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3508 MTI->getSourceAlign().valueOrOne());
3509 if (isMemCpySmall(Len, Alignment)) {
3510 Address Dest, Src;
3511 if (!computeAddress(MTI->getRawDest(), Dest) ||
3512 !computeAddress(MTI->getRawSource(), Src))
3513 return false;
3514 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3515 return true;
3516 }
3517 }
3518
3519 if (!MTI->getLength()->getType()->isIntegerTy(64))
3520 return false;
3521
3522 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3523 // Fast instruction selection doesn't support the special
3524 // address spaces.
3525 return false;
3526
3527 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3528 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3529 }
3530 case Intrinsic::memset: {
3531 const MemSetInst *MSI = cast<MemSetInst>(II);
3532 // Don't handle volatile.
3533 if (MSI->isVolatile())
3534 return false;
3535
3536 if (!MSI->getLength()->getType()->isIntegerTy(64))
3537 return false;
3538
3539 if (MSI->getDestAddressSpace() > 255)
3540 // Fast instruction selection doesn't support the special
3541 // address spaces.
3542 return false;
3543
3544 return lowerCallTo(II, "memset", II->arg_size() - 1);
3545 }
3546 case Intrinsic::sin:
3547 case Intrinsic::cos:
3548 case Intrinsic::tan:
3549 case Intrinsic::pow: {
3550 MVT RetVT;
3551 if (!isTypeLegal(II->getType(), RetVT))
3552 return false;
3553
3554 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3555 return false;
3556
3557 static const RTLIB::Libcall LibCallTable[4][2] = {
3558 {RTLIB::SIN_F32, RTLIB::SIN_F64},
3559 {RTLIB::COS_F32, RTLIB::COS_F64},
3560 {RTLIB::TAN_F32, RTLIB::TAN_F64},
3561 {RTLIB::POW_F32, RTLIB::POW_F64}};
3562 RTLIB::Libcall LC;
3563 bool Is64Bit = RetVT == MVT::f64;
3564 switch (II->getIntrinsicID()) {
3565 default:
3566 llvm_unreachable("Unexpected intrinsic.");
3567 case Intrinsic::sin:
3568 LC = LibCallTable[0][Is64Bit];
3569 break;
3570 case Intrinsic::cos:
3571 LC = LibCallTable[1][Is64Bit];
3572 break;
3573 case Intrinsic::tan:
3574 LC = LibCallTable[2][Is64Bit];
3575 break;
3576 case Intrinsic::pow:
3577 LC = LibCallTable[3][Is64Bit];
3578 break;
3579 }
3580
3581 ArgListTy Args;
3582 Args.reserve(II->arg_size());
3583
3584 // Populate the argument list.
3585 for (auto &Arg : II->args())
3586 Args.emplace_back(Arg);
3587
3588 CallLoweringInfo CLI;
3589 MCContext &Ctx = MF->getContext();
3590
3591 RTLIB::LibcallImpl LCImpl = LibcallLowering->getLibcallImpl(LC);
3592 if (LCImpl == RTLIB::Unsupported)
3593 return false;
3594
3595 CallingConv::ID CC = LibcallLowering->getLibcallImplCallingConv(LCImpl);
3596 StringRef FuncName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LCImpl);
3597 CLI.setCallee(DL, Ctx, CC, II->getType(), FuncName, std::move(Args));
3598 if (!lowerCallTo(CLI))
3599 return false;
3600 updateValueMap(II, CLI.ResultReg);
3601 return true;
3602 }
3603 case Intrinsic::fabs: {
3604 MVT VT;
3605 if (!isTypeLegal(II->getType(), VT))
3606 return false;
3607
3608 unsigned Opc;
3609 switch (VT.SimpleTy) {
3610 default:
3611 return false;
3612 case MVT::f32:
3613 Opc = AArch64::FABSSr;
3614 break;
3615 case MVT::f64:
3616 Opc = AArch64::FABSDr;
3617 break;
3618 }
3619 Register SrcReg = getRegForValue(II->getOperand(0));
3620 if (!SrcReg)
3621 return false;
3622 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3623 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3624 .addReg(SrcReg);
3625 updateValueMap(II, ResultReg);
3626 return true;
3627 }
3628 case Intrinsic::trap:
3629 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3630 .addImm(1);
3631 return true;
3632 case Intrinsic::debugtrap:
3633 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3634 .addImm(0xF000);
3635 return true;
3636
3637 case Intrinsic::sqrt: {
3638 Type *RetTy = II->getCalledFunction()->getReturnType();
3639
3640 MVT VT;
3641 if (!isTypeLegal(RetTy, VT))
3642 return false;
3643
3644 Register Op0Reg = getRegForValue(II->getOperand(0));
3645 if (!Op0Reg)
3646 return false;
3647
3648 Register ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3649 if (!ResultReg)
3650 return false;
3651
3652 updateValueMap(II, ResultReg);
3653 return true;
3654 }
3655 case Intrinsic::sadd_with_overflow:
3656 case Intrinsic::uadd_with_overflow:
3657 case Intrinsic::ssub_with_overflow:
3658 case Intrinsic::usub_with_overflow:
3659 case Intrinsic::smul_with_overflow:
3660 case Intrinsic::umul_with_overflow: {
3661 // This implements the basic lowering of the xalu with overflow intrinsics.
3662 const Function *Callee = II->getCalledFunction();
3663 auto *Ty = cast<StructType>(Callee->getReturnType());
3664 Type *RetTy = Ty->getTypeAtIndex(0U);
3665
3666 MVT VT;
3667 if (!isTypeLegal(RetTy, VT))
3668 return false;
3669
3670 if (VT != MVT::i32 && VT != MVT::i64)
3671 return false;
3672
3673 const Value *LHS = II->getArgOperand(0);
3674 const Value *RHS = II->getArgOperand(1);
3675 // Canonicalize immediate to the RHS.
3676 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3677 std::swap(LHS, RHS);
3678
3679 // Simplify multiplies.
3680 Intrinsic::ID IID = II->getIntrinsicID();
3681 switch (IID) {
3682 default:
3683 break;
3684 case Intrinsic::smul_with_overflow:
3685 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3686 if (C->getValue() == 2) {
3687 IID = Intrinsic::sadd_with_overflow;
3688 RHS = LHS;
3689 }
3690 break;
3691 case Intrinsic::umul_with_overflow:
3692 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3693 if (C->getValue() == 2) {
3694 IID = Intrinsic::uadd_with_overflow;
3695 RHS = LHS;
3696 }
3697 break;
3698 }
3699
3700 Register ResultReg1, ResultReg2, MulReg;
3702 switch (IID) {
3703 default: llvm_unreachable("Unexpected intrinsic!");
3704 case Intrinsic::sadd_with_overflow:
3705 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3706 CC = AArch64CC::VS;
3707 break;
3708 case Intrinsic::uadd_with_overflow:
3709 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3710 CC = AArch64CC::HS;
3711 break;
3712 case Intrinsic::ssub_with_overflow:
3713 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3714 CC = AArch64CC::VS;
3715 break;
3716 case Intrinsic::usub_with_overflow:
3717 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3718 CC = AArch64CC::LO;
3719 break;
3720 case Intrinsic::smul_with_overflow: {
3721 CC = AArch64CC::NE;
3722 Register LHSReg = getRegForValue(LHS);
3723 if (!LHSReg)
3724 return false;
3725
3726 Register RHSReg = getRegForValue(RHS);
3727 if (!RHSReg)
3728 return false;
3729
3730 if (VT == MVT::i32) {
3731 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3732 Register MulSubReg =
3733 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3734 // cmp xreg, wreg, sxtw
3735 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3736 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3737 /*WantResult=*/false);
3738 MulReg = MulSubReg;
3739 } else {
3740 assert(VT == MVT::i64 && "Unexpected value type.");
3741 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3742 // reused in the next instruction.
3743 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3744 Register SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3745 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3746 /*WantResult=*/false);
3747 }
3748 break;
3749 }
3750 case Intrinsic::umul_with_overflow: {
3751 CC = AArch64CC::NE;
3752 Register LHSReg = getRegForValue(LHS);
3753 if (!LHSReg)
3754 return false;
3755
3756 Register RHSReg = getRegForValue(RHS);
3757 if (!RHSReg)
3758 return false;
3759
3760 if (VT == MVT::i32) {
3761 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3762 // tst xreg, #0xffffffff00000000
3763 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3764 TII.get(AArch64::ANDSXri), AArch64::XZR)
3765 .addReg(MulReg)
3766 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3767 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3768 } else {
3769 assert(VT == MVT::i64 && "Unexpected value type.");
3770 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3771 // reused in the next instruction.
3772 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3773 Register UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3774 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3775 }
3776 break;
3777 }
3778 }
3779
3780 if (MulReg) {
3781 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3782 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3783 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3784 }
3785
3786 if (!ResultReg1)
3787 return false;
3788
3789 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3790 AArch64::WZR, AArch64::WZR,
3791 getInvertedCondCode(CC));
3792 (void)ResultReg2;
3793 assert((ResultReg1 + 1) == ResultReg2 &&
3794 "Nonconsecutive result registers.");
3795 updateValueMap(II, ResultReg1, 2);
3796 return true;
3797 }
3798 case Intrinsic::aarch64_crc32b:
3799 case Intrinsic::aarch64_crc32h:
3800 case Intrinsic::aarch64_crc32w:
3801 case Intrinsic::aarch64_crc32x:
3802 case Intrinsic::aarch64_crc32cb:
3803 case Intrinsic::aarch64_crc32ch:
3804 case Intrinsic::aarch64_crc32cw:
3805 case Intrinsic::aarch64_crc32cx: {
3806 if (!Subtarget->hasCRC())
3807 return false;
3808
3809 unsigned Opc;
3810 switch (II->getIntrinsicID()) {
3811 default:
3812 llvm_unreachable("Unexpected intrinsic!");
3813 case Intrinsic::aarch64_crc32b:
3814 Opc = AArch64::CRC32Brr;
3815 break;
3816 case Intrinsic::aarch64_crc32h:
3817 Opc = AArch64::CRC32Hrr;
3818 break;
3819 case Intrinsic::aarch64_crc32w:
3820 Opc = AArch64::CRC32Wrr;
3821 break;
3822 case Intrinsic::aarch64_crc32x:
3823 Opc = AArch64::CRC32Xrr;
3824 break;
3825 case Intrinsic::aarch64_crc32cb:
3826 Opc = AArch64::CRC32CBrr;
3827 break;
3828 case Intrinsic::aarch64_crc32ch:
3829 Opc = AArch64::CRC32CHrr;
3830 break;
3831 case Intrinsic::aarch64_crc32cw:
3832 Opc = AArch64::CRC32CWrr;
3833 break;
3834 case Intrinsic::aarch64_crc32cx:
3835 Opc = AArch64::CRC32CXrr;
3836 break;
3837 }
3838
3839 Register LHSReg = getRegForValue(II->getArgOperand(0));
3840 Register RHSReg = getRegForValue(II->getArgOperand(1));
3841 if (!LHSReg || !RHSReg)
3842 return false;
3843
3844 Register ResultReg =
3845 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3846 updateValueMap(II, ResultReg);
3847 return true;
3848 }
3849 }
3850 return false;
3851}
3852
3853bool AArch64FastISel::selectRet(const Instruction *I) {
3854 const ReturnInst *Ret = cast<ReturnInst>(I);
3855 const Function &F = *I->getParent()->getParent();
3856
3857 if (!FuncInfo.CanLowerReturn)
3858 return false;
3859
3860 if (F.isVarArg())
3861 return false;
3862
3863 if (TLI.supportSwiftError() &&
3864 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3865 return false;
3866
3867 if (TLI.supportSplitCSR(FuncInfo.MF))
3868 return false;
3869
3870 // Build a list of return value registers.
3872
3873 if (Ret->getNumOperands() > 0) {
3874 CallingConv::ID CC = F.getCallingConv();
3876 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3877
3878 // Analyze operands of the call, assigning locations to each operand.
3880 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3881 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3882
3883 // Only handle a single return value for now.
3884 if (ValLocs.size() != 1)
3885 return false;
3886
3887 CCValAssign &VA = ValLocs[0];
3888 const Value *RV = Ret->getOperand(0);
3889
3890 // Don't bother handling odd stuff for now.
3891 if ((VA.getLocInfo() != CCValAssign::Full) &&
3892 (VA.getLocInfo() != CCValAssign::BCvt))
3893 return false;
3894
3895 // Only handle register returns for now.
3896 if (!VA.isRegLoc())
3897 return false;
3898
3899 Register Reg = getRegForValue(RV);
3900 if (!Reg)
3901 return false;
3902
3903 Register SrcReg = Reg + VA.getValNo();
3904 Register DestReg = VA.getLocReg();
3905 // Avoid a cross-class copy. This is very unlikely.
3906 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3907 return false;
3908
3909 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3910 if (!RVEVT.isSimple())
3911 return false;
3912
3913 // Vectors (of > 1 lane) in big endian need tricky handling.
3914 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3915 !Subtarget->isLittleEndian())
3916 return false;
3917
3918 MVT RVVT = RVEVT.getSimpleVT();
3919 if (RVVT == MVT::f128)
3920 return false;
3921
3922 MVT DestVT = VA.getValVT();
3923 // Special handling for extended integers.
3924 if (RVVT != DestVT) {
3925 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3926 return false;
3927
3928 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3929 return false;
3930
3931 bool IsZExt = Outs[0].Flags.isZExt();
3932 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3933 if (!SrcReg)
3934 return false;
3935 }
3936
3937 // "Callee" (i.e. value producer) zero extends pointers at function
3938 // boundary.
3939 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3940 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3941
3942 // Make the copy.
3943 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3944 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3945
3946 // Add register to return instruction.
3947 RetRegs.push_back(VA.getLocReg());
3948 }
3949
3950 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3951 TII.get(AArch64::RET_ReallyLR));
3952 for (Register RetReg : RetRegs)
3953 MIB.addReg(RetReg, RegState::Implicit);
3954 return true;
3955}
3956
3957bool AArch64FastISel::selectTrunc(const Instruction *I) {
3958 Type *DestTy = I->getType();
3959 Value *Op = I->getOperand(0);
3960 Type *SrcTy = Op->getType();
3961
3962 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3963 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3964 if (!SrcEVT.isSimple())
3965 return false;
3966 if (!DestEVT.isSimple())
3967 return false;
3968
3969 MVT SrcVT = SrcEVT.getSimpleVT();
3970 MVT DestVT = DestEVT.getSimpleVT();
3971
3972 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3973 SrcVT != MVT::i8)
3974 return false;
3975 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3976 DestVT != MVT::i1)
3977 return false;
3978
3979 Register SrcReg = getRegForValue(Op);
3980 if (!SrcReg)
3981 return false;
3982
3983 // If we're truncating from i64 to a smaller non-legal type then generate an
3984 // AND. Otherwise, we know the high bits are undefined and a truncate only
3985 // generate a COPY. We cannot mark the source register also as result
3986 // register, because this can incorrectly transfer the kill flag onto the
3987 // source register.
3988 Register ResultReg;
3989 if (SrcVT == MVT::i64) {
3990 uint64_t Mask = 0;
3991 switch (DestVT.SimpleTy) {
3992 default:
3993 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3994 return false;
3995 case MVT::i1:
3996 Mask = 0x1;
3997 break;
3998 case MVT::i8:
3999 Mask = 0xff;
4000 break;
4001 case MVT::i16:
4002 Mask = 0xffff;
4003 break;
4004 }
4005 // Issue an extract_subreg to get the lower 32-bits.
4006 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
4007 AArch64::sub_32);
4008 // Create the AND instruction which performs the actual truncation.
4009 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
4010 assert(ResultReg && "Unexpected AND instruction emission failure.");
4011 } else {
4012 ResultReg = createResultReg(&AArch64::GPR32RegClass);
4013 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4014 TII.get(TargetOpcode::COPY), ResultReg)
4015 .addReg(SrcReg);
4016 }
4017
4018 updateValueMap(I, ResultReg);
4019 return true;
4020}
4021
4022Register AArch64FastISel::emiti1Ext(Register SrcReg, MVT DestVT, bool IsZExt) {
4023 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4024 DestVT == MVT::i64) &&
4025 "Unexpected value type.");
4026 // Handle i8 and i16 as i32.
4027 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4028 DestVT = MVT::i32;
4029
4030 if (IsZExt) {
4031 Register ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4032 assert(ResultReg && "Unexpected AND instruction emission failure.");
4033 if (DestVT == MVT::i64) {
4034 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4035 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4036 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4037 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4038 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4039 .addImm(0)
4040 .addReg(ResultReg)
4041 .addImm(AArch64::sub_32);
4042 ResultReg = Reg64;
4043 }
4044 return ResultReg;
4045 } else {
4046 if (DestVT == MVT::i64) {
4047 // FIXME: We're SExt i1 to i64.
4048 return Register();
4049 }
4050 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4051 0, 0);
4052 }
4053}
4054
4055Register AArch64FastISel::emitMul_rr(MVT RetVT, Register Op0, Register Op1) {
4056 unsigned Opc;
4057 Register ZReg;
4058 switch (RetVT.SimpleTy) {
4059 default:
4060 return Register();
4061 case MVT::i8:
4062 case MVT::i16:
4063 case MVT::i32:
4064 RetVT = MVT::i32;
4065 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4066 case MVT::i64:
4067 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4068 }
4069
4070 const TargetRegisterClass *RC =
4071 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4072 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4073}
4074
4075Register AArch64FastISel::emitSMULL_rr(MVT RetVT, Register Op0, Register Op1) {
4076 if (RetVT != MVT::i64)
4077 return Register();
4078
4079 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4080 Op0, Op1, AArch64::XZR);
4081}
4082
4083Register AArch64FastISel::emitUMULL_rr(MVT RetVT, Register Op0, Register Op1) {
4084 if (RetVT != MVT::i64)
4085 return Register();
4086
4087 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4088 Op0, Op1, AArch64::XZR);
4089}
4090
4091Register AArch64FastISel::emitLSL_rr(MVT RetVT, Register Op0Reg,
4092 Register Op1Reg) {
4093 unsigned Opc = 0;
4094 bool NeedTrunc = false;
4095 uint64_t Mask = 0;
4096 switch (RetVT.SimpleTy) {
4097 default:
4098 return Register();
4099 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4100 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4101 case MVT::i32: Opc = AArch64::LSLVWr; break;
4102 case MVT::i64: Opc = AArch64::LSLVXr; break;
4103 }
4104
4105 const TargetRegisterClass *RC =
4106 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4107 if (NeedTrunc)
4108 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4109
4110 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4111 if (NeedTrunc)
4112 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4113 return ResultReg;
4114}
4115
4116Register AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0,
4117 uint64_t Shift, bool IsZExt) {
4118 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4119 "Unexpected source/return type pair.");
4120 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4121 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4122 "Unexpected source value type.");
4123 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4124 RetVT == MVT::i64) && "Unexpected return value type.");
4125
4126 bool Is64Bit = (RetVT == MVT::i64);
4127 unsigned RegSize = Is64Bit ? 64 : 32;
4128 unsigned DstBits = RetVT.getSizeInBits();
4129 unsigned SrcBits = SrcVT.getSizeInBits();
4130 const TargetRegisterClass *RC =
4131 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4132
4133 // Just emit a copy for "zero" shifts.
4134 if (Shift == 0) {
4135 if (RetVT == SrcVT) {
4136 Register ResultReg = createResultReg(RC);
4137 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4138 TII.get(TargetOpcode::COPY), ResultReg)
4139 .addReg(Op0);
4140 return ResultReg;
4141 } else
4142 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4143 }
4144
4145 // Don't deal with undefined shifts.
4146 if (Shift >= DstBits)
4147 return Register();
4148
4149 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4150 // {S|U}BFM Wd, Wn, #r, #s
4151 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4152
4153 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4154 // %2 = shl i16 %1, 4
4155 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4156 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4157 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4158 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4159
4160 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4161 // %2 = shl i16 %1, 8
4162 // Wd<32+7-24,32-24> = Wn<7:0>
4163 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4164 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4165 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4166
4167 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4168 // %2 = shl i16 %1, 12
4169 // Wd<32+3-20,32-20> = Wn<3:0>
4170 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4171 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4172 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4173
4174 unsigned ImmR = RegSize - Shift;
4175 // Limit the width to the length of the source type.
4176 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4177 static const unsigned OpcTable[2][2] = {
4178 {AArch64::SBFMWri, AArch64::SBFMXri},
4179 {AArch64::UBFMWri, AArch64::UBFMXri}
4180 };
4181 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4182 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4183 Register TmpReg = MRI.createVirtualRegister(RC);
4184 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4185 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4186 .addImm(0)
4187 .addReg(Op0)
4188 .addImm(AArch64::sub_32);
4189 Op0 = TmpReg;
4190 }
4191 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4192}
4193
4194Register AArch64FastISel::emitLSR_rr(MVT RetVT, Register Op0Reg,
4195 Register Op1Reg) {
4196 unsigned Opc = 0;
4197 bool NeedTrunc = false;
4198 uint64_t Mask = 0;
4199 switch (RetVT.SimpleTy) {
4200 default:
4201 return Register();
4202 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4203 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4204 case MVT::i32: Opc = AArch64::LSRVWr; break;
4205 case MVT::i64: Opc = AArch64::LSRVXr; break;
4206 }
4207
4208 const TargetRegisterClass *RC =
4209 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4210 if (NeedTrunc) {
4211 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4212 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4213 }
4214 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4215 if (NeedTrunc)
4216 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4217 return ResultReg;
4218}
4219
4220Register AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0,
4221 uint64_t Shift, bool IsZExt) {
4222 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4223 "Unexpected source/return type pair.");
4224 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4225 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4226 "Unexpected source value type.");
4227 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4228 RetVT == MVT::i64) && "Unexpected return value type.");
4229
4230 bool Is64Bit = (RetVT == MVT::i64);
4231 unsigned RegSize = Is64Bit ? 64 : 32;
4232 unsigned DstBits = RetVT.getSizeInBits();
4233 unsigned SrcBits = SrcVT.getSizeInBits();
4234 const TargetRegisterClass *RC =
4235 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4236
4237 // Just emit a copy for "zero" shifts.
4238 if (Shift == 0) {
4239 if (RetVT == SrcVT) {
4240 Register ResultReg = createResultReg(RC);
4241 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4242 TII.get(TargetOpcode::COPY), ResultReg)
4243 .addReg(Op0);
4244 return ResultReg;
4245 } else
4246 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4247 }
4248
4249 // Don't deal with undefined shifts.
4250 if (Shift >= DstBits)
4251 return Register();
4252
4253 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4254 // {S|U}BFM Wd, Wn, #r, #s
4255 // Wd<s-r:0> = Wn<s:r> when r <= s
4256
4257 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4258 // %2 = lshr i16 %1, 4
4259 // Wd<7-4:0> = Wn<7:4>
4260 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4261 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4262 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4263
4264 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4265 // %2 = lshr i16 %1, 8
4266 // Wd<7-7,0> = Wn<7:7>
4267 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4268 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4269 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4270
4271 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4272 // %2 = lshr i16 %1, 12
4273 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4274 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4275 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4276 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4277
4278 if (Shift >= SrcBits && IsZExt)
4279 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4280
4281 // It is not possible to fold a sign-extend into the LShr instruction. In this
4282 // case emit a sign-extend.
4283 if (!IsZExt) {
4284 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4285 if (!Op0)
4286 return Register();
4287 SrcVT = RetVT;
4288 SrcBits = SrcVT.getSizeInBits();
4289 IsZExt = true;
4290 }
4291
4292 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4293 unsigned ImmS = SrcBits - 1;
4294 static const unsigned OpcTable[2][2] = {
4295 {AArch64::SBFMWri, AArch64::SBFMXri},
4296 {AArch64::UBFMWri, AArch64::UBFMXri}
4297 };
4298 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4299 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4300 Register TmpReg = MRI.createVirtualRegister(RC);
4301 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4302 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4303 .addImm(0)
4304 .addReg(Op0)
4305 .addImm(AArch64::sub_32);
4306 Op0 = TmpReg;
4307 }
4308 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4309}
4310
4311Register AArch64FastISel::emitASR_rr(MVT RetVT, Register Op0Reg,
4312 Register Op1Reg) {
4313 unsigned Opc = 0;
4314 bool NeedTrunc = false;
4315 uint64_t Mask = 0;
4316 switch (RetVT.SimpleTy) {
4317 default:
4318 return Register();
4319 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4320 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4321 case MVT::i32: Opc = AArch64::ASRVWr; break;
4322 case MVT::i64: Opc = AArch64::ASRVXr; break;
4323 }
4324
4325 const TargetRegisterClass *RC =
4326 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4327 if (NeedTrunc) {
4328 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4329 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4330 }
4331 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4332 if (NeedTrunc)
4333 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4334 return ResultReg;
4335}
4336
4337Register AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0,
4338 uint64_t Shift, bool IsZExt) {
4339 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4340 "Unexpected source/return type pair.");
4341 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4342 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4343 "Unexpected source value type.");
4344 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4345 RetVT == MVT::i64) && "Unexpected return value type.");
4346
4347 bool Is64Bit = (RetVT == MVT::i64);
4348 unsigned RegSize = Is64Bit ? 64 : 32;
4349 unsigned DstBits = RetVT.getSizeInBits();
4350 unsigned SrcBits = SrcVT.getSizeInBits();
4351 const TargetRegisterClass *RC =
4352 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4353
4354 // Just emit a copy for "zero" shifts.
4355 if (Shift == 0) {
4356 if (RetVT == SrcVT) {
4357 Register ResultReg = createResultReg(RC);
4358 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4359 TII.get(TargetOpcode::COPY), ResultReg)
4360 .addReg(Op0);
4361 return ResultReg;
4362 } else
4363 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4364 }
4365
4366 // Don't deal with undefined shifts.
4367 if (Shift >= DstBits)
4368 return Register();
4369
4370 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4371 // {S|U}BFM Wd, Wn, #r, #s
4372 // Wd<s-r:0> = Wn<s:r> when r <= s
4373
4374 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4375 // %2 = ashr i16 %1, 4
4376 // Wd<7-4:0> = Wn<7:4>
4377 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4378 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4379 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4380
4381 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4382 // %2 = ashr i16 %1, 8
4383 // Wd<7-7,0> = Wn<7:7>
4384 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4385 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4386 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4387
4388 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4389 // %2 = ashr i16 %1, 12
4390 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4391 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4392 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4393 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4394
4395 if (Shift >= SrcBits && IsZExt)
4396 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4397
4398 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4399 unsigned ImmS = SrcBits - 1;
4400 static const unsigned OpcTable[2][2] = {
4401 {AArch64::SBFMWri, AArch64::SBFMXri},
4402 {AArch64::UBFMWri, AArch64::UBFMXri}
4403 };
4404 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4405 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4406 Register TmpReg = MRI.createVirtualRegister(RC);
4407 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4408 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4409 .addImm(0)
4410 .addReg(Op0)
4411 .addImm(AArch64::sub_32);
4412 Op0 = TmpReg;
4413 }
4414 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4415}
4416
4417Register AArch64FastISel::emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
4418 bool IsZExt) {
4419 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4420
4421 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4422 // DestVT are odd things, so test to make sure that they are both types we can
4423 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4424 // bail out to SelectionDAG.
4425 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4426 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4427 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4428 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4429 return Register();
4430
4431 unsigned Opc;
4432 unsigned Imm = 0;
4433
4434 switch (SrcVT.SimpleTy) {
4435 default:
4436 return Register();
4437 case MVT::i1:
4438 return emiti1Ext(SrcReg, DestVT, IsZExt);
4439 case MVT::i8:
4440 if (DestVT == MVT::i64)
4441 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4442 else
4443 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4444 Imm = 7;
4445 break;
4446 case MVT::i16:
4447 if (DestVT == MVT::i64)
4448 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4449 else
4450 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4451 Imm = 15;
4452 break;
4453 case MVT::i32:
4454 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4455 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4456 Imm = 31;
4457 break;
4458 }
4459
4460 // Handle i8 and i16 as i32.
4461 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4462 DestVT = MVT::i32;
4463 else if (DestVT == MVT::i64) {
4464 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4465 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4466 TII.get(AArch64::SUBREG_TO_REG), Src64)
4467 .addImm(0)
4468 .addReg(SrcReg)
4469 .addImm(AArch64::sub_32);
4470 SrcReg = Src64;
4471 }
4472
4473 const TargetRegisterClass *RC =
4474 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4475 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4476}
4477
4478static bool isZExtLoad(const MachineInstr *LI) {
4479 switch (LI->getOpcode()) {
4480 default:
4481 return false;
4482 case AArch64::LDURBBi:
4483 case AArch64::LDURHHi:
4484 case AArch64::LDURWi:
4485 case AArch64::LDRBBui:
4486 case AArch64::LDRHHui:
4487 case AArch64::LDRWui:
4488 case AArch64::LDRBBroX:
4489 case AArch64::LDRHHroX:
4490 case AArch64::LDRWroX:
4491 case AArch64::LDRBBroW:
4492 case AArch64::LDRHHroW:
4493 case AArch64::LDRWroW:
4494 return true;
4495 }
4496}
4497
4498static bool isSExtLoad(const MachineInstr *LI) {
4499 switch (LI->getOpcode()) {
4500 default:
4501 return false;
4502 case AArch64::LDURSBWi:
4503 case AArch64::LDURSHWi:
4504 case AArch64::LDURSBXi:
4505 case AArch64::LDURSHXi:
4506 case AArch64::LDURSWi:
4507 case AArch64::LDRSBWui:
4508 case AArch64::LDRSHWui:
4509 case AArch64::LDRSBXui:
4510 case AArch64::LDRSHXui:
4511 case AArch64::LDRSWui:
4512 case AArch64::LDRSBWroX:
4513 case AArch64::LDRSHWroX:
4514 case AArch64::LDRSBXroX:
4515 case AArch64::LDRSHXroX:
4516 case AArch64::LDRSWroX:
4517 case AArch64::LDRSBWroW:
4518 case AArch64::LDRSHWroW:
4519 case AArch64::LDRSBXroW:
4520 case AArch64::LDRSHXroW:
4521 case AArch64::LDRSWroW:
4522 return true;
4523 }
4524}
4525
4526bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4527 MVT SrcVT) {
4528 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4529 if (!LI || !LI->hasOneUse())
4530 return false;
4531
4532 // Check if the load instruction has already been selected.
4533 Register Reg = lookUpRegForValue(LI);
4534 if (!Reg)
4535 return false;
4536
4537 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4538 if (!MI)
4539 return false;
4540
4541 // Check if the correct load instruction has been emitted - SelectionDAG might
4542 // have emitted a zero-extending load, but we need a sign-extending load.
4543 bool IsZExt = isa<ZExtInst>(I);
4544 const auto *LoadMI = MI;
4545 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4546 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4547 Register LoadReg = MI->getOperand(1).getReg();
4548 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4549 assert(LoadMI && "Expected valid instruction");
4550 }
4551 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4552 return false;
4553
4554 // Nothing to be done.
4555 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4556 updateValueMap(I, Reg);
4557 return true;
4558 }
4559
4560 if (IsZExt) {
4561 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4562 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4563 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4564 .addImm(0)
4565 .addReg(Reg, getKillRegState(true))
4566 .addImm(AArch64::sub_32);
4567 Reg = Reg64;
4568 } else {
4569 assert((MI->getOpcode() == TargetOpcode::COPY &&
4570 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4571 "Expected copy instruction");
4572 Reg = MI->getOperand(1).getReg();
4574 removeDeadCode(I, std::next(I));
4575 }
4576 updateValueMap(I, Reg);
4577 return true;
4578}
4579
4580bool AArch64FastISel::selectIntExt(const Instruction *I) {
4582 "Unexpected integer extend instruction.");
4583 MVT RetVT;
4584 MVT SrcVT;
4585 if (!isTypeSupported(I->getType(), RetVT))
4586 return false;
4587
4588 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4589 return false;
4590
4591 // Try to optimize already sign-/zero-extended values from load instructions.
4592 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4593 return true;
4594
4595 Register SrcReg = getRegForValue(I->getOperand(0));
4596 if (!SrcReg)
4597 return false;
4598
4599 // Try to optimize already sign-/zero-extended values from function arguments.
4600 bool IsZExt = isa<ZExtInst>(I);
4601 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4602 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4603 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4604 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4605 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4606 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4607 .addImm(0)
4608 .addReg(SrcReg)
4609 .addImm(AArch64::sub_32);
4610 SrcReg = ResultReg;
4611 }
4612
4613 updateValueMap(I, SrcReg);
4614 return true;
4615 }
4616 }
4617
4618 Register ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4619 if (!ResultReg)
4620 return false;
4621
4622 updateValueMap(I, ResultReg);
4623 return true;
4624}
4625
4626bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4627 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4628 if (!DestEVT.isSimple())
4629 return false;
4630
4631 MVT DestVT = DestEVT.getSimpleVT();
4632 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4633 return false;
4634
4635 unsigned DivOpc;
4636 bool Is64bit = (DestVT == MVT::i64);
4637 switch (ISDOpcode) {
4638 default:
4639 return false;
4640 case ISD::SREM:
4641 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4642 break;
4643 case ISD::UREM:
4644 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4645 break;
4646 }
4647 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4648 Register Src0Reg = getRegForValue(I->getOperand(0));
4649 if (!Src0Reg)
4650 return false;
4651
4652 Register Src1Reg = getRegForValue(I->getOperand(1));
4653 if (!Src1Reg)
4654 return false;
4655
4656 const TargetRegisterClass *RC =
4657 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4658 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4659 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4660 // The remainder is computed as numerator - (quotient * denominator) using the
4661 // MSUB instruction.
4662 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4663 updateValueMap(I, ResultReg);
4664 return true;
4665}
4666
4667bool AArch64FastISel::selectMul(const Instruction *I) {
4668 MVT VT;
4669 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4670 return false;
4671
4672 if (VT.isVector())
4673 return selectBinaryOp(I, ISD::MUL);
4674
4675 const Value *Src0 = I->getOperand(0);
4676 const Value *Src1 = I->getOperand(1);
4677 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4678 if (C->getValue().isPowerOf2())
4679 std::swap(Src0, Src1);
4680
4681 // Try to simplify to a shift instruction.
4682 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4683 if (C->getValue().isPowerOf2()) {
4684 uint64_t ShiftVal = C->getValue().logBase2();
4685 MVT SrcVT = VT;
4686 bool IsZExt = true;
4687 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4688 if (!isIntExtFree(ZExt)) {
4689 MVT VT;
4690 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4691 SrcVT = VT;
4692 IsZExt = true;
4693 Src0 = ZExt->getOperand(0);
4694 }
4695 }
4696 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4697 if (!isIntExtFree(SExt)) {
4698 MVT VT;
4699 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4700 SrcVT = VT;
4701 IsZExt = false;
4702 Src0 = SExt->getOperand(0);
4703 }
4704 }
4705 }
4706
4707 Register Src0Reg = getRegForValue(Src0);
4708 if (!Src0Reg)
4709 return false;
4710
4711 Register ResultReg = emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4712
4713 if (ResultReg) {
4714 updateValueMap(I, ResultReg);
4715 return true;
4716 }
4717 }
4718
4719 Register Src0Reg = getRegForValue(I->getOperand(0));
4720 if (!Src0Reg)
4721 return false;
4722
4723 Register Src1Reg = getRegForValue(I->getOperand(1));
4724 if (!Src1Reg)
4725 return false;
4726
4727 Register ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4728
4729 if (!ResultReg)
4730 return false;
4731
4732 updateValueMap(I, ResultReg);
4733 return true;
4734}
4735
4736bool AArch64FastISel::selectShift(const Instruction *I) {
4737 MVT RetVT;
4738 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4739 return false;
4740
4741 if (RetVT.isVector())
4742 return selectOperator(I, I->getOpcode());
4743
4744 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4745 Register ResultReg;
4746 uint64_t ShiftVal = C->getZExtValue();
4747 MVT SrcVT = RetVT;
4748 bool IsZExt = I->getOpcode() != Instruction::AShr;
4749 const Value *Op0 = I->getOperand(0);
4750 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4751 if (!isIntExtFree(ZExt)) {
4752 MVT TmpVT;
4753 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4754 SrcVT = TmpVT;
4755 IsZExt = true;
4756 Op0 = ZExt->getOperand(0);
4757 }
4758 }
4759 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4760 if (!isIntExtFree(SExt)) {
4761 MVT TmpVT;
4762 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4763 SrcVT = TmpVT;
4764 IsZExt = false;
4765 Op0 = SExt->getOperand(0);
4766 }
4767 }
4768 }
4769
4770 Register Op0Reg = getRegForValue(Op0);
4771 if (!Op0Reg)
4772 return false;
4773
4774 switch (I->getOpcode()) {
4775 default: llvm_unreachable("Unexpected instruction.");
4776 case Instruction::Shl:
4777 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4778 break;
4779 case Instruction::AShr:
4780 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4781 break;
4782 case Instruction::LShr:
4783 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4784 break;
4785 }
4786 if (!ResultReg)
4787 return false;
4788
4789 updateValueMap(I, ResultReg);
4790 return true;
4791 }
4792
4793 Register Op0Reg = getRegForValue(I->getOperand(0));
4794 if (!Op0Reg)
4795 return false;
4796
4797 Register Op1Reg = getRegForValue(I->getOperand(1));
4798 if (!Op1Reg)
4799 return false;
4800
4801 Register ResultReg;
4802 switch (I->getOpcode()) {
4803 default: llvm_unreachable("Unexpected instruction.");
4804 case Instruction::Shl:
4805 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4806 break;
4807 case Instruction::AShr:
4808 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4809 break;
4810 case Instruction::LShr:
4811 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4812 break;
4813 }
4814
4815 if (!ResultReg)
4816 return false;
4817
4818 updateValueMap(I, ResultReg);
4819 return true;
4820}
4821
4822bool AArch64FastISel::selectBitCast(const Instruction *I) {
4823 MVT RetVT, SrcVT;
4824
4825 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4826 return false;
4827 if (!isTypeLegal(I->getType(), RetVT))
4828 return false;
4829
4830 unsigned Opc;
4831 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4832 Opc = AArch64::FMOVWSr;
4833 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4834 Opc = AArch64::FMOVXDr;
4835 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4836 Opc = AArch64::FMOVSWr;
4837 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4838 Opc = AArch64::FMOVDXr;
4839 else
4840 return false;
4841
4842 const TargetRegisterClass *RC = nullptr;
4843 switch (RetVT.SimpleTy) {
4844 default: llvm_unreachable("Unexpected value type.");
4845 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4846 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4847 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4848 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4849 }
4850 Register Op0Reg = getRegForValue(I->getOperand(0));
4851 if (!Op0Reg)
4852 return false;
4853
4854 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4855 if (!ResultReg)
4856 return false;
4857
4858 updateValueMap(I, ResultReg);
4859 return true;
4860}
4861
4862bool AArch64FastISel::selectFRem(const Instruction *I) {
4863 MVT RetVT;
4864 if (!isTypeLegal(I->getType(), RetVT))
4865 return false;
4866
4867 RTLIB::LibcallImpl LCImpl =
4868 LibcallLowering->getLibcallImpl(RTLIB::getREM(RetVT));
4869 if (LCImpl == RTLIB::Unsupported)
4870 return false;
4871
4872 ArgListTy Args;
4873 Args.reserve(I->getNumOperands());
4874
4875 // Populate the argument list.
4876 for (auto &Arg : I->operands())
4877 Args.emplace_back(Arg);
4878
4879 CallLoweringInfo CLI;
4880 MCContext &Ctx = MF->getContext();
4881 CallingConv::ID CC = LibcallLowering->getLibcallImplCallingConv(LCImpl);
4882 StringRef FuncName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LCImpl);
4883
4884 CLI.setCallee(DL, Ctx, CC, I->getType(), FuncName, std::move(Args));
4885 if (!lowerCallTo(CLI))
4886 return false;
4887 updateValueMap(I, CLI.ResultReg);
4888 return true;
4889}
4890
4891bool AArch64FastISel::selectSDiv(const Instruction *I) {
4892 MVT VT;
4893 if (!isTypeLegal(I->getType(), VT))
4894 return false;
4895
4896 if (!isa<ConstantInt>(I->getOperand(1)))
4897 return selectBinaryOp(I, ISD::SDIV);
4898
4899 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4900 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4901 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4902 return selectBinaryOp(I, ISD::SDIV);
4903
4904 unsigned Lg2 = C.countr_zero();
4905 Register Src0Reg = getRegForValue(I->getOperand(0));
4906 if (!Src0Reg)
4907 return false;
4908
4909 if (cast<BinaryOperator>(I)->isExact()) {
4910 Register ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4911 if (!ResultReg)
4912 return false;
4913 updateValueMap(I, ResultReg);
4914 return true;
4915 }
4916
4917 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4918 Register AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4919 if (!AddReg)
4920 return false;
4921
4922 // (Src0 < 0) ? Pow2 - 1 : 0;
4923 if (!emitICmp_ri(VT, Src0Reg, 0))
4924 return false;
4925
4926 unsigned SelectOpc;
4927 const TargetRegisterClass *RC;
4928 if (VT == MVT::i64) {
4929 SelectOpc = AArch64::CSELXr;
4930 RC = &AArch64::GPR64RegClass;
4931 } else {
4932 SelectOpc = AArch64::CSELWr;
4933 RC = &AArch64::GPR32RegClass;
4934 }
4935 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4937 if (!SelectReg)
4938 return false;
4939
4940 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4941 // negate the result.
4942 Register ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4943 Register ResultReg;
4944 if (C.isNegative())
4945 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4946 AArch64_AM::ASR, Lg2);
4947 else
4948 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4949
4950 if (!ResultReg)
4951 return false;
4952
4953 updateValueMap(I, ResultReg);
4954 return true;
4955}
4956
4957/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4958/// have to duplicate it for AArch64, because otherwise we would fail during the
4959/// sign-extend emission.
4960Register AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4961 Register IdxN = getRegForValue(Idx);
4962 if (!IdxN)
4963 // Unhandled operand. Halt "fast" selection and bail.
4964 return Register();
4965
4966 // If the index is smaller or larger than intptr_t, truncate or extend it.
4967 MVT PtrVT = TLI.getPointerTy(DL);
4968 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4969 if (IdxVT.bitsLT(PtrVT)) {
4970 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4971 } else if (IdxVT.bitsGT(PtrVT))
4972 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4973 return IdxN;
4974}
4975
4976/// This is mostly a copy of the existing FastISel GEP code, but we have to
4977/// duplicate it for AArch64, because otherwise we would bail out even for
4978/// simple cases. This is because the standard fastEmit functions don't cover
4979/// MUL at all and ADD is lowered very inefficientily.
4980bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4981 if (Subtarget->isTargetILP32())
4982 return false;
4983
4984 Register N = getRegForValue(I->getOperand(0));
4985 if (!N)
4986 return false;
4987
4988 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4989 // into a single N = N + TotalOffset.
4990 uint64_t TotalOffs = 0;
4991 MVT VT = TLI.getPointerTy(DL);
4993 GTI != E; ++GTI) {
4994 const Value *Idx = GTI.getOperand();
4995 if (auto *StTy = GTI.getStructTypeOrNull()) {
4996 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4997 // N = N + Offset
4998 if (Field)
4999 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
5000 } else {
5001 // If this is a constant subscript, handle it quickly.
5002 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
5003 if (CI->isZero())
5004 continue;
5005 // N = N + Offset
5006 TotalOffs += GTI.getSequentialElementStride(DL) *
5007 cast<ConstantInt>(CI)->getSExtValue();
5008 continue;
5009 }
5010 if (TotalOffs) {
5011 N = emitAdd_ri_(VT, N, TotalOffs);
5012 if (!N)
5013 return false;
5014 TotalOffs = 0;
5015 }
5016
5017 // N = N + Idx * ElementSize;
5018 uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5019 Register IdxN = getRegForGEPIndex(Idx);
5020 if (!IdxN)
5021 return false;
5022
5023 if (ElementSize != 1) {
5024 Register C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5025 if (!C)
5026 return false;
5027 IdxN = emitMul_rr(VT, IdxN, C);
5028 if (!IdxN)
5029 return false;
5030 }
5031 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5032 if (!N)
5033 return false;
5034 }
5035 }
5036 if (TotalOffs) {
5037 N = emitAdd_ri_(VT, N, TotalOffs);
5038 if (!N)
5039 return false;
5040 }
5041 updateValueMap(I, N);
5042 return true;
5043}
5044
5045bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5046 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5047 "cmpxchg survived AtomicExpand at optlevel > -O0");
5048
5049 auto *RetPairTy = cast<StructType>(I->getType());
5050 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5051 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5052 "cmpxchg has a non-i1 status result");
5053
5054 MVT VT;
5055 if (!isTypeLegal(RetTy, VT))
5056 return false;
5057
5058 const TargetRegisterClass *ResRC;
5059 unsigned Opc, CmpOpc;
5060 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5061 // extractvalue selection doesn't support that.
5062 if (VT == MVT::i32) {
5063 Opc = AArch64::CMP_SWAP_32;
5064 CmpOpc = AArch64::SUBSWrs;
5065 ResRC = &AArch64::GPR32RegClass;
5066 } else if (VT == MVT::i64) {
5067 Opc = AArch64::CMP_SWAP_64;
5068 CmpOpc = AArch64::SUBSXrs;
5069 ResRC = &AArch64::GPR64RegClass;
5070 } else {
5071 return false;
5072 }
5073
5074 const MCInstrDesc &II = TII.get(Opc);
5075
5076 Register AddrReg = getRegForValue(I->getPointerOperand());
5077 Register DesiredReg = getRegForValue(I->getCompareOperand());
5078 Register NewReg = getRegForValue(I->getNewValOperand());
5079
5080 if (!AddrReg || !DesiredReg || !NewReg)
5081 return false;
5082
5083 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
5084 DesiredReg = constrainOperandRegClass(II, DesiredReg, II.getNumDefs() + 1);
5085 NewReg = constrainOperandRegClass(II, NewReg, II.getNumDefs() + 2);
5086
5087 const Register ResultReg1 = createResultReg(ResRC);
5088 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5089 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5090
5091 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5092 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5093 .addDef(ResultReg1)
5094 .addDef(ScratchReg)
5095 .addUse(AddrReg)
5096 .addUse(DesiredReg)
5097 .addUse(NewReg);
5098
5099 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5100 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5101 .addUse(ResultReg1)
5102 .addUse(DesiredReg)
5103 .addImm(0);
5104
5105 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5106 .addDef(ResultReg2)
5107 .addUse(AArch64::WZR)
5108 .addUse(AArch64::WZR)
5110
5111 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5112 updateValueMap(I, ResultReg1, 2);
5113 return true;
5114}
5115
5116bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5117 if (TLI.fallBackToDAGISel(*I))
5118 return false;
5119 switch (I->getOpcode()) {
5120 default:
5121 break;
5122 case Instruction::Add:
5123 case Instruction::Sub:
5124 return selectAddSub(I);
5125 case Instruction::Mul:
5126 return selectMul(I);
5127 case Instruction::SDiv:
5128 return selectSDiv(I);
5129 case Instruction::SRem:
5130 if (!selectBinaryOp(I, ISD::SREM))
5131 return selectRem(I, ISD::SREM);
5132 return true;
5133 case Instruction::URem:
5134 if (!selectBinaryOp(I, ISD::UREM))
5135 return selectRem(I, ISD::UREM);
5136 return true;
5137 case Instruction::Shl:
5138 case Instruction::LShr:
5139 case Instruction::AShr:
5140 return selectShift(I);
5141 case Instruction::And:
5142 case Instruction::Or:
5143 case Instruction::Xor:
5144 return selectLogicalOp(I);
5145 case Instruction::Br:
5146 return selectBranch(I);
5147 case Instruction::IndirectBr:
5148 return selectIndirectBr(I);
5149 case Instruction::BitCast:
5151 return selectBitCast(I);
5152 return true;
5153 case Instruction::FPToSI:
5154 if (!selectCast(I, ISD::FP_TO_SINT))
5155 return selectFPToInt(I, /*Signed=*/true);
5156 return true;
5157 case Instruction::FPToUI:
5158 return selectFPToInt(I, /*Signed=*/false);
5159 case Instruction::ZExt:
5160 case Instruction::SExt:
5161 return selectIntExt(I);
5162 case Instruction::Trunc:
5163 if (!selectCast(I, ISD::TRUNCATE))
5164 return selectTrunc(I);
5165 return true;
5166 case Instruction::FPExt:
5167 return selectFPExt(I);
5168 case Instruction::FPTrunc:
5169 return selectFPTrunc(I);
5170 case Instruction::SIToFP:
5171 if (!selectCast(I, ISD::SINT_TO_FP))
5172 return selectIntToFP(I, /*Signed=*/true);
5173 return true;
5174 case Instruction::UIToFP:
5175 return selectIntToFP(I, /*Signed=*/false);
5176 case Instruction::Load:
5177 return selectLoad(I);
5178 case Instruction::Store:
5179 return selectStore(I);
5180 case Instruction::FCmp:
5181 case Instruction::ICmp:
5182 return selectCmp(I);
5183 case Instruction::Select:
5184 return selectSelect(I);
5185 case Instruction::Ret:
5186 return selectRet(I);
5187 case Instruction::FRem:
5188 return selectFRem(I);
5189 case Instruction::GetElementPtr:
5190 return selectGetElementPtr(I);
5191 case Instruction::AtomicCmpXchg:
5192 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5193 }
5194
5195 // fall-back to target-independent instruction selection.
5196 return selectOperator(I, I->getOpcode());
5197}
5198
5200 const TargetLibraryInfo *LibInfo,
5201 const LibcallLoweringInfo *LibcallLowering) {
5202
5203 SMEAttrs CallerAttrs =
5204 FuncInfo.MF->getInfo<AArch64FunctionInfo>()->getSMEFnAttrs();
5205 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5206 CallerAttrs.hasStreamingInterfaceOrBody() ||
5207 CallerAttrs.hasStreamingCompatibleInterface() ||
5208 CallerAttrs.hasAgnosticZAInterface())
5209 return nullptr;
5210 return new AArch64FastISel(FuncInfo, LibInfo, LibcallLowering);
5211}
unsigned const MachineRegisterInfo * MRI
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
static bool isSExtLoad(const MachineInstr *LI)
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
static bool isMulPowOf2(const Value *I)
Check if the multiply is by a power-of-2 constant.
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type.
static bool isZExtLoad(const MachineInstr *LI)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec)
Emit a load-pair instruction for frame-destroy.
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec)
Emit a store-pair instruction for frame-setup.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
This file defines ARC utility functions which are used by various parts of the compiler.
OptimizedStructLayoutField Field
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
SI Pre allocate WWM Registers
This file defines the SmallVector class.
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
static const unsigned FramePtr
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAnyArgRegReserved(const MachineFunction &MF) const
void emitReservedArgRegCallError(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
const AArch64RegisterInfo * getRegisterInfo() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool hasCustomCallingConv() const
PointerType * getType() const
Overload to return most specific pointer type.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Register getLocReg() const
LocInfo getLocInfo() const
unsigned getValNo() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isUnsigned() const
Definition InstrTypes.h:936
const APFloat & getValueAPF() const
Definition Constants.h:325
bool isNegative() const
Return true if the sign bit is set.
Definition Constants.h:332
bool isZero() const
Return true if the value is positive or negative zero.
Definition Constants.h:329
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
bool selectBitCast(const User *I)
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
PointerType * getType() const
Global values are always pointers.
iterator_range< succ_op_iterator > successors()
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Tracks which library functions to use for a particular subtarget.
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
Value * getLength() const
unsigned getDestAddressSpace() const
bool isVolatile() const
constexpr unsigned id() const
Definition Register.h:100
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingCompatibleInterface() const
bool hasAgnosticZAInterface() const
bool hasStreamingInterfaceOrBody() const
void reserve(size_type N)
void push_back(const T &Elt)
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:754
Provides information about what library functions are available for the current target.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition Type.h:264
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
LLVM_ABI Libcall getREM(EVT VT)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ User
could "use" a pointer
bool hasAttachedCallOpBundle(const CallBase *CB)
Definition ObjCARCUtil.h:29
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:56
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
bool CC_AArch64_Win64PCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr unsigned getKillRegState(bool B)
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
gep_type_iterator gep_type_end(const User *GEP)
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
generic_gep_type_iterator<> gep_type_iterator
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.