LLVM 19.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/DenseMap.h"
41#include "llvm/IR/Argument.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/Constants.h"
47#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/User.h"
62#include "llvm/IR/Value.h"
63#include "llvm/MC/MCInstrDesc.h"
65#include "llvm/MC/MCSymbol.h"
72#include <algorithm>
73#include <cassert>
74#include <cstdint>
75#include <iterator>
76#include <utility>
77
78using namespace llvm;
79
80namespace {
81
82class AArch64FastISel final : public FastISel {
83 class Address {
84 public:
85 using BaseKind = enum {
86 RegBase,
87 FrameIndexBase
88 };
89
90 private:
91 BaseKind Kind = RegBase;
93 union {
94 unsigned Reg;
95 int FI;
96 } Base;
97 unsigned OffsetReg = 0;
98 unsigned Shift = 0;
99 int64_t Offset = 0;
100 const GlobalValue *GV = nullptr;
101
102 public:
103 Address() { Base.Reg = 0; }
104
105 void setKind(BaseKind K) { Kind = K; }
106 BaseKind getKind() const { return Kind; }
107 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
108 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
109 bool isRegBase() const { return Kind == RegBase; }
110 bool isFIBase() const { return Kind == FrameIndexBase; }
111
112 void setReg(unsigned Reg) {
113 assert(isRegBase() && "Invalid base register access!");
114 Base.Reg = Reg;
115 }
116
117 unsigned getReg() const {
118 assert(isRegBase() && "Invalid base register access!");
119 return Base.Reg;
120 }
121
122 void setOffsetReg(unsigned Reg) {
123 OffsetReg = Reg;
124 }
125
126 unsigned getOffsetReg() const {
127 return OffsetReg;
128 }
129
130 void setFI(unsigned FI) {
131 assert(isFIBase() && "Invalid base frame index access!");
132 Base.FI = FI;
133 }
134
135 unsigned getFI() const {
136 assert(isFIBase() && "Invalid base frame index access!");
137 return Base.FI;
138 }
139
140 void setOffset(int64_t O) { Offset = O; }
141 int64_t getOffset() { return Offset; }
142 void setShift(unsigned S) { Shift = S; }
143 unsigned getShift() { return Shift; }
144
145 void setGlobalValue(const GlobalValue *G) { GV = G; }
146 const GlobalValue *getGlobalValue() { return GV; }
147 };
148
149 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
150 /// make the right decision when generating code for different targets.
151 const AArch64Subtarget *Subtarget;
152 LLVMContext *Context;
153
154 bool fastLowerArguments() override;
155 bool fastLowerCall(CallLoweringInfo &CLI) override;
156 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
157
158private:
159 // Selection routines.
160 bool selectAddSub(const Instruction *I);
161 bool selectLogicalOp(const Instruction *I);
162 bool selectLoad(const Instruction *I);
163 bool selectStore(const Instruction *I);
164 bool selectBranch(const Instruction *I);
165 bool selectIndirectBr(const Instruction *I);
166 bool selectCmp(const Instruction *I);
167 bool selectSelect(const Instruction *I);
168 bool selectFPExt(const Instruction *I);
169 bool selectFPTrunc(const Instruction *I);
170 bool selectFPToInt(const Instruction *I, bool Signed);
171 bool selectIntToFP(const Instruction *I, bool Signed);
172 bool selectRem(const Instruction *I, unsigned ISDOpcode);
173 bool selectRet(const Instruction *I);
174 bool selectTrunc(const Instruction *I);
175 bool selectIntExt(const Instruction *I);
176 bool selectMul(const Instruction *I);
177 bool selectShift(const Instruction *I);
178 bool selectBitCast(const Instruction *I);
179 bool selectFRem(const Instruction *I);
180 bool selectSDiv(const Instruction *I);
181 bool selectGetElementPtr(const Instruction *I);
182 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
183
184 // Utility helper routines.
185 bool isTypeLegal(Type *Ty, MVT &VT);
186 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
187 bool isValueAvailable(const Value *V) const;
188 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
189 bool computeCallAddress(const Value *V, Address &Addr);
190 bool simplifyAddress(Address &Addr, MVT VT);
191 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
193 unsigned ScaleFactor, MachineMemOperand *MMO);
194 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
195 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
196 MaybeAlign Alignment);
197 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
198 const Value *Cond);
199 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
200 bool optimizeSelect(const SelectInst *SI);
201 unsigned getRegForGEPIndex(const Value *Idx);
202
203 // Emit helper routines.
204 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
205 const Value *RHS, bool SetFlags = false,
206 bool WantResult = true, bool IsZExt = false);
207 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
208 unsigned RHSReg, bool SetFlags = false,
209 bool WantResult = true);
210 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
211 uint64_t Imm, bool SetFlags = false,
212 bool WantResult = true);
213 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
214 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
215 uint64_t ShiftImm, bool SetFlags = false,
216 bool WantResult = true);
217 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
218 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
219 uint64_t ShiftImm, bool SetFlags = false,
220 bool WantResult = true);
221
222 // Emit functions.
223 bool emitCompareAndBranch(const BranchInst *BI);
224 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
225 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
226 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
227 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
228 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
229 MachineMemOperand *MMO = nullptr);
230 bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
231 MachineMemOperand *MMO = nullptr);
232 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
233 MachineMemOperand *MMO = nullptr);
234 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
235 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
236 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
237 bool SetFlags = false, bool WantResult = true,
238 bool IsZExt = false);
239 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
240 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
241 bool SetFlags = false, bool WantResult = true,
242 bool IsZExt = false);
243 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
244 bool WantResult = true);
245 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
246 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
247 bool WantResult = true);
248 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
249 const Value *RHS);
250 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251 uint64_t Imm);
252 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
253 unsigned RHSReg, uint64_t ShiftImm);
254 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
255 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
257 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
258 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
259 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
260 bool IsZExt = true);
261 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
262 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
263 bool IsZExt = true);
264 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
265 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
266 bool IsZExt = false);
267
268 unsigned materializeInt(const ConstantInt *CI, MVT VT);
269 unsigned materializeFP(const ConstantFP *CFP, MVT VT);
270 unsigned materializeGV(const GlobalValue *GV);
271
272 // Call handling routines.
273private:
274 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
275 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
276 unsigned &NumBytes);
277 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
278
279public:
280 // Backend specific FastISel code.
281 unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
282 unsigned fastMaterializeConstant(const Constant *C) override;
283 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
284
285 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
286 const TargetLibraryInfo *LibInfo)
287 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
288 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
289 Context = &FuncInfo.Fn->getContext();
290 }
291
292 bool fastSelectInstruction(const Instruction *I) override;
293
294#include "AArch64GenFastISel.inc"
295};
296
297} // end anonymous namespace
298
299/// Check if the sign-/zero-extend will be a noop.
300static bool isIntExtFree(const Instruction *I) {
301 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
302 "Unexpected integer extend instruction.");
303 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
304 "Unexpected value type.");
305 bool IsZExt = isa<ZExtInst>(I);
306
307 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
308 if (LI->hasOneUse())
309 return true;
310
311 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
312 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
313 return true;
314
315 return false;
316}
317
318/// Determine the implicit scale factor that is applied by a memory
319/// operation for a given value type.
320static unsigned getImplicitScaleFactor(MVT VT) {
321 switch (VT.SimpleTy) {
322 default:
323 return 0; // invalid
324 case MVT::i1: // fall-through
325 case MVT::i8:
326 return 1;
327 case MVT::i16:
328 return 2;
329 case MVT::i32: // fall-through
330 case MVT::f32:
331 return 4;
332 case MVT::i64: // fall-through
333 case MVT::f64:
334 return 8;
335 }
336}
337
338CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
339 if (CC == CallingConv::GHC)
340 return CC_AArch64_GHC;
343 if (Subtarget->isTargetDarwin())
345 if (Subtarget->isTargetWindows())
346 return CC_AArch64_Win64PCS;
347 return CC_AArch64_AAPCS;
348}
349
350unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
351 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
352 "Alloca should always return a pointer.");
353
354 // Don't handle dynamic allocas.
355 if (!FuncInfo.StaticAllocaMap.count(AI))
356 return 0;
357
359 FuncInfo.StaticAllocaMap.find(AI);
360
361 if (SI != FuncInfo.StaticAllocaMap.end()) {
362 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
363 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
364 ResultReg)
365 .addFrameIndex(SI->second)
366 .addImm(0)
367 .addImm(0);
368 return ResultReg;
369 }
370
371 return 0;
372}
373
374unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
375 if (VT > MVT::i64)
376 return 0;
377
378 if (!CI->isZero())
379 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
380
381 // Create a copy from the zero register to materialize a "0" value.
382 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
383 : &AArch64::GPR32RegClass;
384 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
385 Register ResultReg = createResultReg(RC);
386 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
387 ResultReg).addReg(ZeroReg, getKillRegState(true));
388 return ResultReg;
389}
390
391unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
392 // Positive zero (+0.0) has to be materialized with a fmov from the zero
393 // register, because the immediate version of fmov cannot encode zero.
394 if (CFP->isNullValue())
395 return fastMaterializeFloatZero(CFP);
396
397 if (VT != MVT::f32 && VT != MVT::f64)
398 return 0;
399
400 const APFloat Val = CFP->getValueAPF();
401 bool Is64Bit = (VT == MVT::f64);
402 // This checks to see if we can use FMOV instructions to materialize
403 // a constant, otherwise we have to materialize via the constant pool.
404 int Imm =
405 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
406 if (Imm != -1) {
407 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
408 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
409 }
410
411 // For the large code model materialize the FP constant in code.
412 if (TM.getCodeModel() == CodeModel::Large) {
413 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
414 const TargetRegisterClass *RC = Is64Bit ?
415 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
416
417 Register TmpReg = createResultReg(RC);
418 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
420
421 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
422 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
423 TII.get(TargetOpcode::COPY), ResultReg)
424 .addReg(TmpReg, getKillRegState(true));
425
426 return ResultReg;
427 }
428
429 // Materialize via constant pool. MachineConstantPool wants an explicit
430 // alignment.
431 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
432
433 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
434 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
435 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
437
438 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
439 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
440 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
441 .addReg(ADRPReg)
443 return ResultReg;
444}
445
446unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
447 // We can't handle thread-local variables quickly yet.
448 if (GV->isThreadLocal())
449 return 0;
450
451 // MachO still uses GOT for large code-model accesses, but ELF requires
452 // movz/movk sequences, which FastISel doesn't handle yet.
453 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
454 return 0;
455
456 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
457
458 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
459 if (!DestEVT.isSimple())
460 return 0;
461
462 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
463 unsigned ResultReg;
464
465 if (OpFlags & AArch64II::MO_GOT) {
466 // ADRP + LDRX
467 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
468 ADRPReg)
469 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
470
471 unsigned LdrOpc;
472 if (Subtarget->isTargetILP32()) {
473 ResultReg = createResultReg(&AArch64::GPR32RegClass);
474 LdrOpc = AArch64::LDRWui;
475 } else {
476 ResultReg = createResultReg(&AArch64::GPR64RegClass);
477 LdrOpc = AArch64::LDRXui;
478 }
479 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
480 ResultReg)
481 .addReg(ADRPReg)
483 AArch64II::MO_NC | OpFlags);
484 if (!Subtarget->isTargetILP32())
485 return ResultReg;
486
487 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
488 // so we must extend the result on ILP32.
489 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
490 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
491 TII.get(TargetOpcode::SUBREG_TO_REG))
492 .addDef(Result64)
493 .addImm(0)
494 .addReg(ResultReg, RegState::Kill)
495 .addImm(AArch64::sub_32);
496 return Result64;
497 } else {
498 // ADRP + ADDX
499 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
500 ADRPReg)
501 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
502
503 if (OpFlags & AArch64II::MO_TAGGED) {
504 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
505 // We do so by creating a MOVK that sets bits 48-63 of the register to
506 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
507 // the small code model so we can assume a binary size of <= 4GB, which
508 // makes the untagged PC relative offset positive. The binary must also be
509 // loaded into address range [0, 2^48). Both of these properties need to
510 // be ensured at runtime when using tagged addresses.
511 //
512 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
513 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
514 // are not exactly 1:1 with FastISel so we cannot easily abstract this
515 // out. At some point, it would be nice to find a way to not have this
516 // duplciate code.
517 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
518 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
519 DstReg)
520 .addReg(ADRPReg)
521 .addGlobalAddress(GV, /*Offset=*/0x100000000,
523 .addImm(48);
524 ADRPReg = DstReg;
525 }
526
527 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
528 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
529 ResultReg)
530 .addReg(ADRPReg)
531 .addGlobalAddress(GV, 0,
533 .addImm(0);
534 }
535 return ResultReg;
536}
537
538unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
539 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
540
541 // Only handle simple types.
542 if (!CEVT.isSimple())
543 return 0;
544 MVT VT = CEVT.getSimpleVT();
545 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
546 // 'null' pointers need to have a somewhat special treatment.
547 if (isa<ConstantPointerNull>(C)) {
548 assert(VT == MVT::i64 && "Expected 64-bit pointers");
549 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
550 }
551
552 if (const auto *CI = dyn_cast<ConstantInt>(C))
553 return materializeInt(CI, VT);
554 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
555 return materializeFP(CFP, VT);
556 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
557 return materializeGV(GV);
558
559 return 0;
560}
561
562unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
563 assert(CFP->isNullValue() &&
564 "Floating-point constant is not a positive zero.");
565 MVT VT;
566 if (!isTypeLegal(CFP->getType(), VT))
567 return 0;
568
569 if (VT != MVT::f32 && VT != MVT::f64)
570 return 0;
571
572 bool Is64Bit = (VT == MVT::f64);
573 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
574 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
575 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
576}
577
578/// Check if the multiply is by a power-of-2 constant.
579static bool isMulPowOf2(const Value *I) {
580 if (const auto *MI = dyn_cast<MulOperator>(I)) {
581 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
582 if (C->getValue().isPowerOf2())
583 return true;
584 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
585 if (C->getValue().isPowerOf2())
586 return true;
587 }
588 return false;
589}
590
591// Computes the address to get to an object.
592bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
593{
594 const User *U = nullptr;
595 unsigned Opcode = Instruction::UserOp1;
596 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
597 // Don't walk into other basic blocks unless the object is an alloca from
598 // another block, otherwise it may not have a virtual register assigned.
599 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
600 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
601 Opcode = I->getOpcode();
602 U = I;
603 }
604 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
605 Opcode = C->getOpcode();
606 U = C;
607 }
608
609 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
610 if (Ty->getAddressSpace() > 255)
611 // Fast instruction selection doesn't support the special
612 // address spaces.
613 return false;
614
615 switch (Opcode) {
616 default:
617 break;
618 case Instruction::BitCast:
619 // Look through bitcasts.
620 return computeAddress(U->getOperand(0), Addr, Ty);
621
622 case Instruction::IntToPtr:
623 // Look past no-op inttoptrs.
624 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
625 TLI.getPointerTy(DL))
626 return computeAddress(U->getOperand(0), Addr, Ty);
627 break;
628
629 case Instruction::PtrToInt:
630 // Look past no-op ptrtoints.
631 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
632 return computeAddress(U->getOperand(0), Addr, Ty);
633 break;
634
635 case Instruction::GetElementPtr: {
636 Address SavedAddr = Addr;
637 uint64_t TmpOffset = Addr.getOffset();
638
639 // Iterate through the GEP folding the constants into offsets where
640 // we can.
641 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
642 GTI != E; ++GTI) {
643 const Value *Op = GTI.getOperand();
644 if (StructType *STy = GTI.getStructTypeOrNull()) {
645 const StructLayout *SL = DL.getStructLayout(STy);
646 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
647 TmpOffset += SL->getElementOffset(Idx);
648 } else {
649 uint64_t S = GTI.getSequentialElementStride(DL);
650 while (true) {
651 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
652 // Constant-offset addressing.
653 TmpOffset += CI->getSExtValue() * S;
654 break;
655 }
656 if (canFoldAddIntoGEP(U, Op)) {
657 // A compatible add with a constant operand. Fold the constant.
658 ConstantInt *CI =
659 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
660 TmpOffset += CI->getSExtValue() * S;
661 // Iterate on the other operand.
662 Op = cast<AddOperator>(Op)->getOperand(0);
663 continue;
664 }
665 // Unsupported
666 goto unsupported_gep;
667 }
668 }
669 }
670
671 // Try to grab the base operand now.
672 Addr.setOffset(TmpOffset);
673 if (computeAddress(U->getOperand(0), Addr, Ty))
674 return true;
675
676 // We failed, restore everything and try the other options.
677 Addr = SavedAddr;
678
679 unsupported_gep:
680 break;
681 }
682 case Instruction::Alloca: {
683 const AllocaInst *AI = cast<AllocaInst>(Obj);
685 FuncInfo.StaticAllocaMap.find(AI);
686 if (SI != FuncInfo.StaticAllocaMap.end()) {
687 Addr.setKind(Address::FrameIndexBase);
688 Addr.setFI(SI->second);
689 return true;
690 }
691 break;
692 }
693 case Instruction::Add: {
694 // Adds of constants are common and easy enough.
695 const Value *LHS = U->getOperand(0);
696 const Value *RHS = U->getOperand(1);
697
698 if (isa<ConstantInt>(LHS))
699 std::swap(LHS, RHS);
700
701 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
702 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
703 return computeAddress(LHS, Addr, Ty);
704 }
705
706 Address Backup = Addr;
707 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
708 return true;
709 Addr = Backup;
710
711 break;
712 }
713 case Instruction::Sub: {
714 // Subs of constants are common and easy enough.
715 const Value *LHS = U->getOperand(0);
716 const Value *RHS = U->getOperand(1);
717
718 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
719 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
720 return computeAddress(LHS, Addr, Ty);
721 }
722 break;
723 }
724 case Instruction::Shl: {
725 if (Addr.getOffsetReg())
726 break;
727
728 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
729 if (!CI)
730 break;
731
732 unsigned Val = CI->getZExtValue();
733 if (Val < 1 || Val > 3)
734 break;
735
736 uint64_t NumBytes = 0;
737 if (Ty && Ty->isSized()) {
738 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
739 NumBytes = NumBits / 8;
740 if (!isPowerOf2_64(NumBits))
741 NumBytes = 0;
742 }
743
744 if (NumBytes != (1ULL << Val))
745 break;
746
747 Addr.setShift(Val);
748 Addr.setExtendType(AArch64_AM::LSL);
749
750 const Value *Src = U->getOperand(0);
751 if (const auto *I = dyn_cast<Instruction>(Src)) {
752 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
753 // Fold the zext or sext when it won't become a noop.
754 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
755 if (!isIntExtFree(ZE) &&
756 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
757 Addr.setExtendType(AArch64_AM::UXTW);
758 Src = ZE->getOperand(0);
759 }
760 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
761 if (!isIntExtFree(SE) &&
762 SE->getOperand(0)->getType()->isIntegerTy(32)) {
763 Addr.setExtendType(AArch64_AM::SXTW);
764 Src = SE->getOperand(0);
765 }
766 }
767 }
768 }
769
770 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
771 if (AI->getOpcode() == Instruction::And) {
772 const Value *LHS = AI->getOperand(0);
773 const Value *RHS = AI->getOperand(1);
774
775 if (const auto *C = dyn_cast<ConstantInt>(LHS))
776 if (C->getValue() == 0xffffffff)
777 std::swap(LHS, RHS);
778
779 if (const auto *C = dyn_cast<ConstantInt>(RHS))
780 if (C->getValue() == 0xffffffff) {
781 Addr.setExtendType(AArch64_AM::UXTW);
782 Register Reg = getRegForValue(LHS);
783 if (!Reg)
784 return false;
785 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
786 Addr.setOffsetReg(Reg);
787 return true;
788 }
789 }
790
791 Register Reg = getRegForValue(Src);
792 if (!Reg)
793 return false;
794 Addr.setOffsetReg(Reg);
795 return true;
796 }
797 case Instruction::Mul: {
798 if (Addr.getOffsetReg())
799 break;
800
801 if (!isMulPowOf2(U))
802 break;
803
804 const Value *LHS = U->getOperand(0);
805 const Value *RHS = U->getOperand(1);
806
807 // Canonicalize power-of-2 value to the RHS.
808 if (const auto *C = dyn_cast<ConstantInt>(LHS))
809 if (C->getValue().isPowerOf2())
810 std::swap(LHS, RHS);
811
812 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
813 const auto *C = cast<ConstantInt>(RHS);
814 unsigned Val = C->getValue().logBase2();
815 if (Val < 1 || Val > 3)
816 break;
817
818 uint64_t NumBytes = 0;
819 if (Ty && Ty->isSized()) {
820 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
821 NumBytes = NumBits / 8;
822 if (!isPowerOf2_64(NumBits))
823 NumBytes = 0;
824 }
825
826 if (NumBytes != (1ULL << Val))
827 break;
828
829 Addr.setShift(Val);
830 Addr.setExtendType(AArch64_AM::LSL);
831
832 const Value *Src = LHS;
833 if (const auto *I = dyn_cast<Instruction>(Src)) {
834 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
835 // Fold the zext or sext when it won't become a noop.
836 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
837 if (!isIntExtFree(ZE) &&
838 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
839 Addr.setExtendType(AArch64_AM::UXTW);
840 Src = ZE->getOperand(0);
841 }
842 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
843 if (!isIntExtFree(SE) &&
844 SE->getOperand(0)->getType()->isIntegerTy(32)) {
845 Addr.setExtendType(AArch64_AM::SXTW);
846 Src = SE->getOperand(0);
847 }
848 }
849 }
850 }
851
852 Register Reg = getRegForValue(Src);
853 if (!Reg)
854 return false;
855 Addr.setOffsetReg(Reg);
856 return true;
857 }
858 case Instruction::And: {
859 if (Addr.getOffsetReg())
860 break;
861
862 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
863 break;
864
865 const Value *LHS = U->getOperand(0);
866 const Value *RHS = U->getOperand(1);
867
868 if (const auto *C = dyn_cast<ConstantInt>(LHS))
869 if (C->getValue() == 0xffffffff)
870 std::swap(LHS, RHS);
871
872 if (const auto *C = dyn_cast<ConstantInt>(RHS))
873 if (C->getValue() == 0xffffffff) {
874 Addr.setShift(0);
875 Addr.setExtendType(AArch64_AM::LSL);
876 Addr.setExtendType(AArch64_AM::UXTW);
877
878 Register Reg = getRegForValue(LHS);
879 if (!Reg)
880 return false;
881 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
882 Addr.setOffsetReg(Reg);
883 return true;
884 }
885 break;
886 }
887 case Instruction::SExt:
888 case Instruction::ZExt: {
889 if (!Addr.getReg() || Addr.getOffsetReg())
890 break;
891
892 const Value *Src = nullptr;
893 // Fold the zext or sext when it won't become a noop.
894 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
895 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
896 Addr.setExtendType(AArch64_AM::UXTW);
897 Src = ZE->getOperand(0);
898 }
899 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
900 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
901 Addr.setExtendType(AArch64_AM::SXTW);
902 Src = SE->getOperand(0);
903 }
904 }
905
906 if (!Src)
907 break;
908
909 Addr.setShift(0);
910 Register Reg = getRegForValue(Src);
911 if (!Reg)
912 return false;
913 Addr.setOffsetReg(Reg);
914 return true;
915 }
916 } // end switch
917
918 if (Addr.isRegBase() && !Addr.getReg()) {
919 Register Reg = getRegForValue(Obj);
920 if (!Reg)
921 return false;
922 Addr.setReg(Reg);
923 return true;
924 }
925
926 if (!Addr.getOffsetReg()) {
927 Register Reg = getRegForValue(Obj);
928 if (!Reg)
929 return false;
930 Addr.setOffsetReg(Reg);
931 return true;
932 }
933
934 return false;
935}
936
937bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
938 const User *U = nullptr;
939 unsigned Opcode = Instruction::UserOp1;
940 bool InMBB = true;
941
942 if (const auto *I = dyn_cast<Instruction>(V)) {
943 Opcode = I->getOpcode();
944 U = I;
945 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
946 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
947 Opcode = C->getOpcode();
948 U = C;
949 }
950
951 switch (Opcode) {
952 default: break;
953 case Instruction::BitCast:
954 // Look past bitcasts if its operand is in the same BB.
955 if (InMBB)
956 return computeCallAddress(U->getOperand(0), Addr);
957 break;
958 case Instruction::IntToPtr:
959 // Look past no-op inttoptrs if its operand is in the same BB.
960 if (InMBB &&
961 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
962 TLI.getPointerTy(DL))
963 return computeCallAddress(U->getOperand(0), Addr);
964 break;
965 case Instruction::PtrToInt:
966 // Look past no-op ptrtoints if its operand is in the same BB.
967 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
968 return computeCallAddress(U->getOperand(0), Addr);
969 break;
970 }
971
972 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
973 Addr.setGlobalValue(GV);
974 return true;
975 }
976
977 // If all else fails, try to materialize the value in a register.
978 if (!Addr.getGlobalValue()) {
979 Addr.setReg(getRegForValue(V));
980 return Addr.getReg() != 0;
981 }
982
983 return false;
984}
985
986bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
987 EVT evt = TLI.getValueType(DL, Ty, true);
988
989 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
990 return false;
991
992 // Only handle simple types.
993 if (evt == MVT::Other || !evt.isSimple())
994 return false;
995 VT = evt.getSimpleVT();
996
997 // This is a legal type, but it's not something we handle in fast-isel.
998 if (VT == MVT::f128)
999 return false;
1000
1001 // Handle all other legal types, i.e. a register that will directly hold this
1002 // value.
1003 return TLI.isTypeLegal(VT);
1004}
1005
1006/// Determine if the value type is supported by FastISel.
1007///
1008/// FastISel for AArch64 can handle more value types than are legal. This adds
1009/// simple value type such as i1, i8, and i16.
1010bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1011 if (Ty->isVectorTy() && !IsVectorAllowed)
1012 return false;
1013
1014 if (isTypeLegal(Ty, VT))
1015 return true;
1016
1017 // If this is a type than can be sign or zero-extended to a basic operation
1018 // go ahead and accept it now.
1019 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1020 return true;
1021
1022 return false;
1023}
1024
1025bool AArch64FastISel::isValueAvailable(const Value *V) const {
1026 if (!isa<Instruction>(V))
1027 return true;
1028
1029 const auto *I = cast<Instruction>(V);
1030 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1031}
1032
1033bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1034 if (Subtarget->isTargetILP32())
1035 return false;
1036
1037 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1038 if (!ScaleFactor)
1039 return false;
1040
1041 bool ImmediateOffsetNeedsLowering = false;
1042 bool RegisterOffsetNeedsLowering = false;
1043 int64_t Offset = Addr.getOffset();
1044 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1045 ImmediateOffsetNeedsLowering = true;
1046 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1047 !isUInt<12>(Offset / ScaleFactor))
1048 ImmediateOffsetNeedsLowering = true;
1049
1050 // Cannot encode an offset register and an immediate offset in the same
1051 // instruction. Fold the immediate offset into the load/store instruction and
1052 // emit an additional add to take care of the offset register.
1053 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1054 RegisterOffsetNeedsLowering = true;
1055
1056 // Cannot encode zero register as base.
1057 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1058 RegisterOffsetNeedsLowering = true;
1059
1060 // If this is a stack pointer and the offset needs to be simplified then put
1061 // the alloca address into a register, set the base type back to register and
1062 // continue. This should almost never happen.
1063 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1064 {
1065 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1066 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1067 ResultReg)
1068 .addFrameIndex(Addr.getFI())
1069 .addImm(0)
1070 .addImm(0);
1071 Addr.setKind(Address::RegBase);
1072 Addr.setReg(ResultReg);
1073 }
1074
1075 if (RegisterOffsetNeedsLowering) {
1076 unsigned ResultReg = 0;
1077 if (Addr.getReg()) {
1078 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1079 Addr.getExtendType() == AArch64_AM::UXTW )
1080 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1081 Addr.getOffsetReg(), Addr.getExtendType(),
1082 Addr.getShift());
1083 else
1084 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1085 Addr.getOffsetReg(), AArch64_AM::LSL,
1086 Addr.getShift());
1087 } else {
1088 if (Addr.getExtendType() == AArch64_AM::UXTW)
1089 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1090 Addr.getShift(), /*IsZExt=*/true);
1091 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1092 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1093 Addr.getShift(), /*IsZExt=*/false);
1094 else
1095 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1096 Addr.getShift());
1097 }
1098 if (!ResultReg)
1099 return false;
1100
1101 Addr.setReg(ResultReg);
1102 Addr.setOffsetReg(0);
1103 Addr.setShift(0);
1104 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1105 }
1106
1107 // Since the offset is too large for the load/store instruction get the
1108 // reg+offset into a register.
1109 if (ImmediateOffsetNeedsLowering) {
1110 unsigned ResultReg;
1111 if (Addr.getReg())
1112 // Try to fold the immediate into the add instruction.
1113 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1114 else
1115 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1116
1117 if (!ResultReg)
1118 return false;
1119 Addr.setReg(ResultReg);
1120 Addr.setOffset(0);
1121 }
1122 return true;
1123}
1124
1125void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1126 const MachineInstrBuilder &MIB,
1128 unsigned ScaleFactor,
1129 MachineMemOperand *MMO) {
1130 int64_t Offset = Addr.getOffset() / ScaleFactor;
1131 // Frame base works a bit differently. Handle it separately.
1132 if (Addr.isFIBase()) {
1133 int FI = Addr.getFI();
1134 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1135 // and alignment should be based on the VT.
1136 MMO = FuncInfo.MF->getMachineMemOperand(
1137 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1138 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1139 // Now add the rest of the operands.
1140 MIB.addFrameIndex(FI).addImm(Offset);
1141 } else {
1142 assert(Addr.isRegBase() && "Unexpected address kind.");
1143 const MCInstrDesc &II = MIB->getDesc();
1144 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1145 Addr.setReg(
1146 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1147 Addr.setOffsetReg(
1148 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1149 if (Addr.getOffsetReg()) {
1150 assert(Addr.getOffset() == 0 && "Unexpected offset");
1151 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1152 Addr.getExtendType() == AArch64_AM::SXTX;
1153 MIB.addReg(Addr.getReg());
1154 MIB.addReg(Addr.getOffsetReg());
1155 MIB.addImm(IsSigned);
1156 MIB.addImm(Addr.getShift() != 0);
1157 } else
1158 MIB.addReg(Addr.getReg()).addImm(Offset);
1159 }
1160
1161 if (MMO)
1162 MIB.addMemOperand(MMO);
1163}
1164
1165unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1166 const Value *RHS, bool SetFlags,
1167 bool WantResult, bool IsZExt) {
1169 bool NeedExtend = false;
1170 switch (RetVT.SimpleTy) {
1171 default:
1172 return 0;
1173 case MVT::i1:
1174 NeedExtend = true;
1175 break;
1176 case MVT::i8:
1177 NeedExtend = true;
1178 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1179 break;
1180 case MVT::i16:
1181 NeedExtend = true;
1182 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1183 break;
1184 case MVT::i32: // fall-through
1185 case MVT::i64:
1186 break;
1187 }
1188 MVT SrcVT = RetVT;
1189 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1190
1191 // Canonicalize immediates to the RHS first.
1192 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1193 std::swap(LHS, RHS);
1194
1195 // Canonicalize mul by power of 2 to the RHS.
1196 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1197 if (isMulPowOf2(LHS))
1198 std::swap(LHS, RHS);
1199
1200 // Canonicalize shift immediate to the RHS.
1201 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1202 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1203 if (isa<ConstantInt>(SI->getOperand(1)))
1204 if (SI->getOpcode() == Instruction::Shl ||
1205 SI->getOpcode() == Instruction::LShr ||
1206 SI->getOpcode() == Instruction::AShr )
1207 std::swap(LHS, RHS);
1208
1209 Register LHSReg = getRegForValue(LHS);
1210 if (!LHSReg)
1211 return 0;
1212
1213 if (NeedExtend)
1214 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1215
1216 unsigned ResultReg = 0;
1217 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1218 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1219 if (C->isNegative())
1220 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1221 WantResult);
1222 else
1223 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1224 WantResult);
1225 } else if (const auto *C = dyn_cast<Constant>(RHS))
1226 if (C->isNullValue())
1227 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1228
1229 if (ResultReg)
1230 return ResultReg;
1231
1232 // Only extend the RHS within the instruction if there is a valid extend type.
1233 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1234 isValueAvailable(RHS)) {
1235 Register RHSReg = getRegForValue(RHS);
1236 if (!RHSReg)
1237 return 0;
1238 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1239 SetFlags, WantResult);
1240 }
1241
1242 // Check if the mul can be folded into the instruction.
1243 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1244 if (isMulPowOf2(RHS)) {
1245 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1246 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1247
1248 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1249 if (C->getValue().isPowerOf2())
1250 std::swap(MulLHS, MulRHS);
1251
1252 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1253 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1254 Register RHSReg = getRegForValue(MulLHS);
1255 if (!RHSReg)
1256 return 0;
1257 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1258 ShiftVal, SetFlags, WantResult);
1259 if (ResultReg)
1260 return ResultReg;
1261 }
1262 }
1263
1264 // Check if the shift can be folded into the instruction.
1265 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1266 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1267 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1269 switch (SI->getOpcode()) {
1270 default: break;
1271 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1272 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1273 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1274 }
1275 uint64_t ShiftVal = C->getZExtValue();
1276 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1277 Register RHSReg = getRegForValue(SI->getOperand(0));
1278 if (!RHSReg)
1279 return 0;
1280 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1281 ShiftVal, SetFlags, WantResult);
1282 if (ResultReg)
1283 return ResultReg;
1284 }
1285 }
1286 }
1287 }
1288
1289 Register RHSReg = getRegForValue(RHS);
1290 if (!RHSReg)
1291 return 0;
1292
1293 if (NeedExtend)
1294 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1295
1296 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1297}
1298
1299unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1300 unsigned RHSReg, bool SetFlags,
1301 bool WantResult) {
1302 assert(LHSReg && RHSReg && "Invalid register number.");
1303
1304 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1305 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1306 return 0;
1307
1308 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1309 return 0;
1310
1311 static const unsigned OpcTable[2][2][2] = {
1312 { { AArch64::SUBWrr, AArch64::SUBXrr },
1313 { AArch64::ADDWrr, AArch64::ADDXrr } },
1314 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1315 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1316 };
1317 bool Is64Bit = RetVT == MVT::i64;
1318 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1319 const TargetRegisterClass *RC =
1320 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1321 unsigned ResultReg;
1322 if (WantResult)
1323 ResultReg = createResultReg(RC);
1324 else
1325 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1326
1327 const MCInstrDesc &II = TII.get(Opc);
1328 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1329 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1330 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1331 .addReg(LHSReg)
1332 .addReg(RHSReg);
1333 return ResultReg;
1334}
1335
1336unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1337 uint64_t Imm, bool SetFlags,
1338 bool WantResult) {
1339 assert(LHSReg && "Invalid register number.");
1340
1341 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1342 return 0;
1343
1344 unsigned ShiftImm;
1345 if (isUInt<12>(Imm))
1346 ShiftImm = 0;
1347 else if ((Imm & 0xfff000) == Imm) {
1348 ShiftImm = 12;
1349 Imm >>= 12;
1350 } else
1351 return 0;
1352
1353 static const unsigned OpcTable[2][2][2] = {
1354 { { AArch64::SUBWri, AArch64::SUBXri },
1355 { AArch64::ADDWri, AArch64::ADDXri } },
1356 { { AArch64::SUBSWri, AArch64::SUBSXri },
1357 { AArch64::ADDSWri, AArch64::ADDSXri } }
1358 };
1359 bool Is64Bit = RetVT == MVT::i64;
1360 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1361 const TargetRegisterClass *RC;
1362 if (SetFlags)
1363 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1364 else
1365 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1366 unsigned ResultReg;
1367 if (WantResult)
1368 ResultReg = createResultReg(RC);
1369 else
1370 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1371
1372 const MCInstrDesc &II = TII.get(Opc);
1373 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1374 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1375 .addReg(LHSReg)
1376 .addImm(Imm)
1377 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1378 return ResultReg;
1379}
1380
1381unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1382 unsigned RHSReg,
1384 uint64_t ShiftImm, bool SetFlags,
1385 bool WantResult) {
1386 assert(LHSReg && RHSReg && "Invalid register number.");
1387 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1388 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1389
1390 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1391 return 0;
1392
1393 // Don't deal with undefined shifts.
1394 if (ShiftImm >= RetVT.getSizeInBits())
1395 return 0;
1396
1397 static const unsigned OpcTable[2][2][2] = {
1398 { { AArch64::SUBWrs, AArch64::SUBXrs },
1399 { AArch64::ADDWrs, AArch64::ADDXrs } },
1400 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1401 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1402 };
1403 bool Is64Bit = RetVT == MVT::i64;
1404 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1405 const TargetRegisterClass *RC =
1406 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1407 unsigned ResultReg;
1408 if (WantResult)
1409 ResultReg = createResultReg(RC);
1410 else
1411 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1412
1413 const MCInstrDesc &II = TII.get(Opc);
1414 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1415 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1416 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1417 .addReg(LHSReg)
1418 .addReg(RHSReg)
1419 .addImm(getShifterImm(ShiftType, ShiftImm));
1420 return ResultReg;
1421}
1422
1423unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1424 unsigned RHSReg,
1426 uint64_t ShiftImm, bool SetFlags,
1427 bool WantResult) {
1428 assert(LHSReg && RHSReg && "Invalid register number.");
1429 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1430 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1431
1432 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1433 return 0;
1434
1435 if (ShiftImm >= 4)
1436 return 0;
1437
1438 static const unsigned OpcTable[2][2][2] = {
1439 { { AArch64::SUBWrx, AArch64::SUBXrx },
1440 { AArch64::ADDWrx, AArch64::ADDXrx } },
1441 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1442 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1443 };
1444 bool Is64Bit = RetVT == MVT::i64;
1445 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1446 const TargetRegisterClass *RC = nullptr;
1447 if (SetFlags)
1448 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1449 else
1450 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1451 unsigned ResultReg;
1452 if (WantResult)
1453 ResultReg = createResultReg(RC);
1454 else
1455 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1456
1457 const MCInstrDesc &II = TII.get(Opc);
1458 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1459 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1460 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1461 .addReg(LHSReg)
1462 .addReg(RHSReg)
1463 .addImm(getArithExtendImm(ExtType, ShiftImm));
1464 return ResultReg;
1465}
1466
1467bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1468 Type *Ty = LHS->getType();
1469 EVT EVT = TLI.getValueType(DL, Ty, true);
1470 if (!EVT.isSimple())
1471 return false;
1472 MVT VT = EVT.getSimpleVT();
1473
1474 switch (VT.SimpleTy) {
1475 default:
1476 return false;
1477 case MVT::i1:
1478 case MVT::i8:
1479 case MVT::i16:
1480 case MVT::i32:
1481 case MVT::i64:
1482 return emitICmp(VT, LHS, RHS, IsZExt);
1483 case MVT::f32:
1484 case MVT::f64:
1485 return emitFCmp(VT, LHS, RHS);
1486 }
1487}
1488
1489bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1490 bool IsZExt) {
1491 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1492 IsZExt) != 0;
1493}
1494
1495bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1496 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1497 /*SetFlags=*/true, /*WantResult=*/false) != 0;
1498}
1499
1500bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1501 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1502 return false;
1503
1504 // Check to see if the 2nd operand is a constant that we can encode directly
1505 // in the compare.
1506 bool UseImm = false;
1507 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1508 if (CFP->isZero() && !CFP->isNegative())
1509 UseImm = true;
1510
1511 Register LHSReg = getRegForValue(LHS);
1512 if (!LHSReg)
1513 return false;
1514
1515 if (UseImm) {
1516 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1517 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1518 .addReg(LHSReg);
1519 return true;
1520 }
1521
1522 Register RHSReg = getRegForValue(RHS);
1523 if (!RHSReg)
1524 return false;
1525
1526 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1527 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1528 .addReg(LHSReg)
1529 .addReg(RHSReg);
1530 return true;
1531}
1532
1533unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1534 bool SetFlags, bool WantResult, bool IsZExt) {
1535 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1536 IsZExt);
1537}
1538
1539/// This method is a wrapper to simplify add emission.
1540///
1541/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1542/// that fails, then try to materialize the immediate into a register and use
1543/// emitAddSub_rr instead.
1544unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1545 unsigned ResultReg;
1546 if (Imm < 0)
1547 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1548 else
1549 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1550
1551 if (ResultReg)
1552 return ResultReg;
1553
1554 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1555 if (!CReg)
1556 return 0;
1557
1558 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1559 return ResultReg;
1560}
1561
1562unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1563 bool SetFlags, bool WantResult, bool IsZExt) {
1564 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1565 IsZExt);
1566}
1567
1568unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1569 unsigned RHSReg, bool WantResult) {
1570 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1571 /*SetFlags=*/true, WantResult);
1572}
1573
1574unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1575 unsigned RHSReg,
1577 uint64_t ShiftImm, bool WantResult) {
1578 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1579 ShiftImm, /*SetFlags=*/true, WantResult);
1580}
1581
1582unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1583 const Value *LHS, const Value *RHS) {
1584 // Canonicalize immediates to the RHS first.
1585 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1586 std::swap(LHS, RHS);
1587
1588 // Canonicalize mul by power-of-2 to the RHS.
1589 if (LHS->hasOneUse() && isValueAvailable(LHS))
1590 if (isMulPowOf2(LHS))
1591 std::swap(LHS, RHS);
1592
1593 // Canonicalize shift immediate to the RHS.
1594 if (LHS->hasOneUse() && isValueAvailable(LHS))
1595 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1596 if (isa<ConstantInt>(SI->getOperand(1)))
1597 std::swap(LHS, RHS);
1598
1599 Register LHSReg = getRegForValue(LHS);
1600 if (!LHSReg)
1601 return 0;
1602
1603 unsigned ResultReg = 0;
1604 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1605 uint64_t Imm = C->getZExtValue();
1606 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1607 }
1608 if (ResultReg)
1609 return ResultReg;
1610
1611 // Check if the mul can be folded into the instruction.
1612 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1613 if (isMulPowOf2(RHS)) {
1614 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1615 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1616
1617 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1618 if (C->getValue().isPowerOf2())
1619 std::swap(MulLHS, MulRHS);
1620
1621 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1622 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1623
1624 Register RHSReg = getRegForValue(MulLHS);
1625 if (!RHSReg)
1626 return 0;
1627 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1628 if (ResultReg)
1629 return ResultReg;
1630 }
1631 }
1632
1633 // Check if the shift can be folded into the instruction.
1634 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1635 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1636 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1637 uint64_t ShiftVal = C->getZExtValue();
1638 Register RHSReg = getRegForValue(SI->getOperand(0));
1639 if (!RHSReg)
1640 return 0;
1641 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1642 if (ResultReg)
1643 return ResultReg;
1644 }
1645 }
1646
1647 Register RHSReg = getRegForValue(RHS);
1648 if (!RHSReg)
1649 return 0;
1650
1651 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1652 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1653 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1654 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1655 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1656 }
1657 return ResultReg;
1658}
1659
1660unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1661 unsigned LHSReg, uint64_t Imm) {
1662 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1663 "ISD nodes are not consecutive!");
1664 static const unsigned OpcTable[3][2] = {
1665 { AArch64::ANDWri, AArch64::ANDXri },
1666 { AArch64::ORRWri, AArch64::ORRXri },
1667 { AArch64::EORWri, AArch64::EORXri }
1668 };
1669 const TargetRegisterClass *RC;
1670 unsigned Opc;
1671 unsigned RegSize;
1672 switch (RetVT.SimpleTy) {
1673 default:
1674 return 0;
1675 case MVT::i1:
1676 case MVT::i8:
1677 case MVT::i16:
1678 case MVT::i32: {
1679 unsigned Idx = ISDOpc - ISD::AND;
1680 Opc = OpcTable[Idx][0];
1681 RC = &AArch64::GPR32spRegClass;
1682 RegSize = 32;
1683 break;
1684 }
1685 case MVT::i64:
1686 Opc = OpcTable[ISDOpc - ISD::AND][1];
1687 RC = &AArch64::GPR64spRegClass;
1688 RegSize = 64;
1689 break;
1690 }
1691
1693 return 0;
1694
1695 Register ResultReg =
1696 fastEmitInst_ri(Opc, RC, LHSReg,
1698 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1699 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1700 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1701 }
1702 return ResultReg;
1703}
1704
1705unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1706 unsigned LHSReg, unsigned RHSReg,
1707 uint64_t ShiftImm) {
1708 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1709 "ISD nodes are not consecutive!");
1710 static const unsigned OpcTable[3][2] = {
1711 { AArch64::ANDWrs, AArch64::ANDXrs },
1712 { AArch64::ORRWrs, AArch64::ORRXrs },
1713 { AArch64::EORWrs, AArch64::EORXrs }
1714 };
1715
1716 // Don't deal with undefined shifts.
1717 if (ShiftImm >= RetVT.getSizeInBits())
1718 return 0;
1719
1720 const TargetRegisterClass *RC;
1721 unsigned Opc;
1722 switch (RetVT.SimpleTy) {
1723 default:
1724 return 0;
1725 case MVT::i1:
1726 case MVT::i8:
1727 case MVT::i16:
1728 case MVT::i32:
1729 Opc = OpcTable[ISDOpc - ISD::AND][0];
1730 RC = &AArch64::GPR32RegClass;
1731 break;
1732 case MVT::i64:
1733 Opc = OpcTable[ISDOpc - ISD::AND][1];
1734 RC = &AArch64::GPR64RegClass;
1735 break;
1736 }
1737 Register ResultReg =
1738 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1740 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1741 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1742 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1743 }
1744 return ResultReg;
1745}
1746
1747unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1748 uint64_t Imm) {
1749 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1750}
1751
1752unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1753 bool WantZExt, MachineMemOperand *MMO) {
1754 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1755 return 0;
1756
1757 // Simplify this down to something we can handle.
1758 if (!simplifyAddress(Addr, VT))
1759 return 0;
1760
1761 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1762 if (!ScaleFactor)
1763 llvm_unreachable("Unexpected value type.");
1764
1765 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1766 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1767 bool UseScaled = true;
1768 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1769 UseScaled = false;
1770 ScaleFactor = 1;
1771 }
1772
1773 static const unsigned GPOpcTable[2][8][4] = {
1774 // Sign-extend.
1775 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1776 AArch64::LDURXi },
1777 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1778 AArch64::LDURXi },
1779 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1780 AArch64::LDRXui },
1781 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1782 AArch64::LDRXui },
1783 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1784 AArch64::LDRXroX },
1785 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1786 AArch64::LDRXroX },
1787 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1788 AArch64::LDRXroW },
1789 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1790 AArch64::LDRXroW }
1791 },
1792 // Zero-extend.
1793 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1794 AArch64::LDURXi },
1795 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1796 AArch64::LDURXi },
1797 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1798 AArch64::LDRXui },
1799 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1800 AArch64::LDRXui },
1801 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1802 AArch64::LDRXroX },
1803 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1804 AArch64::LDRXroX },
1805 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1806 AArch64::LDRXroW },
1807 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1808 AArch64::LDRXroW }
1809 }
1810 };
1811
1812 static const unsigned FPOpcTable[4][2] = {
1813 { AArch64::LDURSi, AArch64::LDURDi },
1814 { AArch64::LDRSui, AArch64::LDRDui },
1815 { AArch64::LDRSroX, AArch64::LDRDroX },
1816 { AArch64::LDRSroW, AArch64::LDRDroW }
1817 };
1818
1819 unsigned Opc;
1820 const TargetRegisterClass *RC;
1821 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1822 Addr.getOffsetReg();
1823 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1824 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1825 Addr.getExtendType() == AArch64_AM::SXTW)
1826 Idx++;
1827
1828 bool IsRet64Bit = RetVT == MVT::i64;
1829 switch (VT.SimpleTy) {
1830 default:
1831 llvm_unreachable("Unexpected value type.");
1832 case MVT::i1: // Intentional fall-through.
1833 case MVT::i8:
1834 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1835 RC = (IsRet64Bit && !WantZExt) ?
1836 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837 break;
1838 case MVT::i16:
1839 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1840 RC = (IsRet64Bit && !WantZExt) ?
1841 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842 break;
1843 case MVT::i32:
1844 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1845 RC = (IsRet64Bit && !WantZExt) ?
1846 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847 break;
1848 case MVT::i64:
1849 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1850 RC = &AArch64::GPR64RegClass;
1851 break;
1852 case MVT::f32:
1853 Opc = FPOpcTable[Idx][0];
1854 RC = &AArch64::FPR32RegClass;
1855 break;
1856 case MVT::f64:
1857 Opc = FPOpcTable[Idx][1];
1858 RC = &AArch64::FPR64RegClass;
1859 break;
1860 }
1861
1862 // Create the base instruction, then add the operands.
1863 Register ResultReg = createResultReg(RC);
1864 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1865 TII.get(Opc), ResultReg);
1866 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1867
1868 // Loading an i1 requires special handling.
1869 if (VT == MVT::i1) {
1870 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1871 assert(ANDReg && "Unexpected AND instruction emission failure.");
1872 ResultReg = ANDReg;
1873 }
1874
1875 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1876 // the 32bit reg to a 64bit reg.
1877 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1878 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1879 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1880 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1881 .addImm(0)
1882 .addReg(ResultReg, getKillRegState(true))
1883 .addImm(AArch64::sub_32);
1884 ResultReg = Reg64;
1885 }
1886 return ResultReg;
1887}
1888
1889bool AArch64FastISel::selectAddSub(const Instruction *I) {
1890 MVT VT;
1891 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1892 return false;
1893
1894 if (VT.isVector())
1895 return selectOperator(I, I->getOpcode());
1896
1897 unsigned ResultReg;
1898 switch (I->getOpcode()) {
1899 default:
1900 llvm_unreachable("Unexpected instruction.");
1901 case Instruction::Add:
1902 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1903 break;
1904 case Instruction::Sub:
1905 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1906 break;
1907 }
1908 if (!ResultReg)
1909 return false;
1910
1911 updateValueMap(I, ResultReg);
1912 return true;
1913}
1914
1915bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1916 MVT VT;
1917 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1918 return false;
1919
1920 if (VT.isVector())
1921 return selectOperator(I, I->getOpcode());
1922
1923 unsigned ResultReg;
1924 switch (I->getOpcode()) {
1925 default:
1926 llvm_unreachable("Unexpected instruction.");
1927 case Instruction::And:
1928 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1929 break;
1930 case Instruction::Or:
1931 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1932 break;
1933 case Instruction::Xor:
1934 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1935 break;
1936 }
1937 if (!ResultReg)
1938 return false;
1939
1940 updateValueMap(I, ResultReg);
1941 return true;
1942}
1943
1944bool AArch64FastISel::selectLoad(const Instruction *I) {
1945 MVT VT;
1946 // Verify we have a legal type before going any further. Currently, we handle
1947 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1948 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1949 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1950 cast<LoadInst>(I)->isAtomic())
1951 return false;
1952
1953 const Value *SV = I->getOperand(0);
1954 if (TLI.supportSwiftError()) {
1955 // Swifterror values can come from either a function parameter with
1956 // swifterror attribute or an alloca with swifterror attribute.
1957 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1958 if (Arg->hasSwiftErrorAttr())
1959 return false;
1960 }
1961
1962 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1963 if (Alloca->isSwiftError())
1964 return false;
1965 }
1966 }
1967
1968 // See if we can handle this address.
1969 Address Addr;
1970 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1971 return false;
1972
1973 // Fold the following sign-/zero-extend into the load instruction.
1974 bool WantZExt = true;
1975 MVT RetVT = VT;
1976 const Value *IntExtVal = nullptr;
1977 if (I->hasOneUse()) {
1978 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1979 if (isTypeSupported(ZE->getType(), RetVT))
1980 IntExtVal = ZE;
1981 else
1982 RetVT = VT;
1983 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1984 if (isTypeSupported(SE->getType(), RetVT))
1985 IntExtVal = SE;
1986 else
1987 RetVT = VT;
1988 WantZExt = false;
1989 }
1990 }
1991
1992 unsigned ResultReg =
1993 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1994 if (!ResultReg)
1995 return false;
1996
1997 // There are a few different cases we have to handle, because the load or the
1998 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1999 // SelectionDAG. There is also an ordering issue when both instructions are in
2000 // different basic blocks.
2001 // 1.) The load instruction is selected by FastISel, but the integer extend
2002 // not. This usually happens when the integer extend is in a different
2003 // basic block and SelectionDAG took over for that basic block.
2004 // 2.) The load instruction is selected before the integer extend. This only
2005 // happens when the integer extend is in a different basic block.
2006 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2007 // by FastISel. This happens if there are instructions between the load
2008 // and the integer extend that couldn't be selected by FastISel.
2009 if (IntExtVal) {
2010 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2011 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2012 // it when it selects the integer extend.
2013 Register Reg = lookUpRegForValue(IntExtVal);
2014 auto *MI = MRI.getUniqueVRegDef(Reg);
2015 if (!MI) {
2016 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2017 if (WantZExt) {
2018 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2019 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2020 ResultReg = std::prev(I)->getOperand(0).getReg();
2021 removeDeadCode(I, std::next(I));
2022 } else
2023 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2024 AArch64::sub_32);
2025 }
2026 updateValueMap(I, ResultReg);
2027 return true;
2028 }
2029
2030 // The integer extend has already been emitted - delete all the instructions
2031 // that have been emitted by the integer extend lowering code and use the
2032 // result from the load instruction directly.
2033 while (MI) {
2034 Reg = 0;
2035 for (auto &Opnd : MI->uses()) {
2036 if (Opnd.isReg()) {
2037 Reg = Opnd.getReg();
2038 break;
2039 }
2040 }
2042 removeDeadCode(I, std::next(I));
2043 MI = nullptr;
2044 if (Reg)
2045 MI = MRI.getUniqueVRegDef(Reg);
2046 }
2047 updateValueMap(IntExtVal, ResultReg);
2048 return true;
2049 }
2050
2051 updateValueMap(I, ResultReg);
2052 return true;
2053}
2054
2055bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2056 unsigned AddrReg,
2057 MachineMemOperand *MMO) {
2058 unsigned Opc;
2059 switch (VT.SimpleTy) {
2060 default: return false;
2061 case MVT::i8: Opc = AArch64::STLRB; break;
2062 case MVT::i16: Opc = AArch64::STLRH; break;
2063 case MVT::i32: Opc = AArch64::STLRW; break;
2064 case MVT::i64: Opc = AArch64::STLRX; break;
2065 }
2066
2067 const MCInstrDesc &II = TII.get(Opc);
2068 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2069 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2070 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2071 .addReg(SrcReg)
2072 .addReg(AddrReg)
2073 .addMemOperand(MMO);
2074 return true;
2075}
2076
2077bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2078 MachineMemOperand *MMO) {
2079 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2080 return false;
2081
2082 // Simplify this down to something we can handle.
2083 if (!simplifyAddress(Addr, VT))
2084 return false;
2085
2086 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2087 if (!ScaleFactor)
2088 llvm_unreachable("Unexpected value type.");
2089
2090 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2091 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2092 bool UseScaled = true;
2093 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2094 UseScaled = false;
2095 ScaleFactor = 1;
2096 }
2097
2098 static const unsigned OpcTable[4][6] = {
2099 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2100 AArch64::STURSi, AArch64::STURDi },
2101 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2102 AArch64::STRSui, AArch64::STRDui },
2103 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2104 AArch64::STRSroX, AArch64::STRDroX },
2105 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2106 AArch64::STRSroW, AArch64::STRDroW }
2107 };
2108
2109 unsigned Opc;
2110 bool VTIsi1 = false;
2111 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2112 Addr.getOffsetReg();
2113 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2114 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2115 Addr.getExtendType() == AArch64_AM::SXTW)
2116 Idx++;
2117
2118 switch (VT.SimpleTy) {
2119 default: llvm_unreachable("Unexpected value type.");
2120 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2121 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2122 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2123 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2124 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2125 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2126 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2127 }
2128
2129 // Storing an i1 requires special handling.
2130 if (VTIsi1 && SrcReg != AArch64::WZR) {
2131 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2132 assert(ANDReg && "Unexpected AND instruction emission failure.");
2133 SrcReg = ANDReg;
2134 }
2135 // Create the base instruction, then add the operands.
2136 const MCInstrDesc &II = TII.get(Opc);
2137 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2139 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2140 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2141
2142 return true;
2143}
2144
2145bool AArch64FastISel::selectStore(const Instruction *I) {
2146 MVT VT;
2147 const Value *Op0 = I->getOperand(0);
2148 // Verify we have a legal type before going any further. Currently, we handle
2149 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2150 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2151 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2152 return false;
2153
2154 const Value *PtrV = I->getOperand(1);
2155 if (TLI.supportSwiftError()) {
2156 // Swifterror values can come from either a function parameter with
2157 // swifterror attribute or an alloca with swifterror attribute.
2158 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2159 if (Arg->hasSwiftErrorAttr())
2160 return false;
2161 }
2162
2163 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2164 if (Alloca->isSwiftError())
2165 return false;
2166 }
2167 }
2168
2169 // Get the value to be stored into a register. Use the zero register directly
2170 // when possible to avoid an unnecessary copy and a wasted register.
2171 unsigned SrcReg = 0;
2172 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2173 if (CI->isZero())
2174 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2175 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2176 if (CF->isZero() && !CF->isNegative()) {
2178 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2179 }
2180 }
2181
2182 if (!SrcReg)
2183 SrcReg = getRegForValue(Op0);
2184
2185 if (!SrcReg)
2186 return false;
2187
2188 auto *SI = cast<StoreInst>(I);
2189
2190 // Try to emit a STLR for seq_cst/release.
2191 if (SI->isAtomic()) {
2192 AtomicOrdering Ord = SI->getOrdering();
2193 // The non-atomic instructions are sufficient for relaxed stores.
2194 if (isReleaseOrStronger(Ord)) {
2195 // The STLR addressing mode only supports a base reg; pass that directly.
2196 Register AddrReg = getRegForValue(PtrV);
2197 return emitStoreRelease(VT, SrcReg, AddrReg,
2198 createMachineMemOperandFor(I));
2199 }
2200 }
2201
2202 // See if we can handle this address.
2203 Address Addr;
2204 if (!computeAddress(PtrV, Addr, Op0->getType()))
2205 return false;
2206
2207 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2208 return false;
2209 return true;
2210}
2211
2213 switch (Pred) {
2214 case CmpInst::FCMP_ONE:
2215 case CmpInst::FCMP_UEQ:
2216 default:
2217 // AL is our "false" for now. The other two need more compares.
2218 return AArch64CC::AL;
2219 case CmpInst::ICMP_EQ:
2220 case CmpInst::FCMP_OEQ:
2221 return AArch64CC::EQ;
2222 case CmpInst::ICMP_SGT:
2223 case CmpInst::FCMP_OGT:
2224 return AArch64CC::GT;
2225 case CmpInst::ICMP_SGE:
2226 case CmpInst::FCMP_OGE:
2227 return AArch64CC::GE;
2228 case CmpInst::ICMP_UGT:
2229 case CmpInst::FCMP_UGT:
2230 return AArch64CC::HI;
2231 case CmpInst::FCMP_OLT:
2232 return AArch64CC::MI;
2233 case CmpInst::ICMP_ULE:
2234 case CmpInst::FCMP_OLE:
2235 return AArch64CC::LS;
2236 case CmpInst::FCMP_ORD:
2237 return AArch64CC::VC;
2238 case CmpInst::FCMP_UNO:
2239 return AArch64CC::VS;
2240 case CmpInst::FCMP_UGE:
2241 return AArch64CC::PL;
2242 case CmpInst::ICMP_SLT:
2243 case CmpInst::FCMP_ULT:
2244 return AArch64CC::LT;
2245 case CmpInst::ICMP_SLE:
2246 case CmpInst::FCMP_ULE:
2247 return AArch64CC::LE;
2248 case CmpInst::FCMP_UNE:
2249 case CmpInst::ICMP_NE:
2250 return AArch64CC::NE;
2251 case CmpInst::ICMP_UGE:
2252 return AArch64CC::HS;
2253 case CmpInst::ICMP_ULT:
2254 return AArch64CC::LO;
2255 }
2256}
2257
2258/// Try to emit a combined compare-and-branch instruction.
2259bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2260 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2261 // will not be produced, as they are conditional branch instructions that do
2262 // not set flags.
2263 if (FuncInfo.MF->getFunction().hasFnAttribute(
2264 Attribute::SpeculativeLoadHardening))
2265 return false;
2266
2267 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2268 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2269 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2270
2271 const Value *LHS = CI->getOperand(0);
2272 const Value *RHS = CI->getOperand(1);
2273
2274 MVT VT;
2275 if (!isTypeSupported(LHS->getType(), VT))
2276 return false;
2277
2278 unsigned BW = VT.getSizeInBits();
2279 if (BW > 64)
2280 return false;
2281
2282 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2283 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2284
2285 // Try to take advantage of fallthrough opportunities.
2286 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2287 std::swap(TBB, FBB);
2289 }
2290
2291 int TestBit = -1;
2292 bool IsCmpNE;
2293 switch (Predicate) {
2294 default:
2295 return false;
2296 case CmpInst::ICMP_EQ:
2297 case CmpInst::ICMP_NE:
2298 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2299 std::swap(LHS, RHS);
2300
2301 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2302 return false;
2303
2304 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2305 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2306 const Value *AndLHS = AI->getOperand(0);
2307 const Value *AndRHS = AI->getOperand(1);
2308
2309 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2310 if (C->getValue().isPowerOf2())
2311 std::swap(AndLHS, AndRHS);
2312
2313 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2314 if (C->getValue().isPowerOf2()) {
2315 TestBit = C->getValue().logBase2();
2316 LHS = AndLHS;
2317 }
2318 }
2319
2320 if (VT == MVT::i1)
2321 TestBit = 0;
2322
2323 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2324 break;
2325 case CmpInst::ICMP_SLT:
2326 case CmpInst::ICMP_SGE:
2327 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2328 return false;
2329
2330 TestBit = BW - 1;
2331 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2332 break;
2333 case CmpInst::ICMP_SGT:
2334 case CmpInst::ICMP_SLE:
2335 if (!isa<ConstantInt>(RHS))
2336 return false;
2337
2338 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2339 return false;
2340
2341 TestBit = BW - 1;
2342 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2343 break;
2344 } // end switch
2345
2346 static const unsigned OpcTable[2][2][2] = {
2347 { {AArch64::CBZW, AArch64::CBZX },
2348 {AArch64::CBNZW, AArch64::CBNZX} },
2349 { {AArch64::TBZW, AArch64::TBZX },
2350 {AArch64::TBNZW, AArch64::TBNZX} }
2351 };
2352
2353 bool IsBitTest = TestBit != -1;
2354 bool Is64Bit = BW == 64;
2355 if (TestBit < 32 && TestBit >= 0)
2356 Is64Bit = false;
2357
2358 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2359 const MCInstrDesc &II = TII.get(Opc);
2360
2361 Register SrcReg = getRegForValue(LHS);
2362 if (!SrcReg)
2363 return false;
2364
2365 if (BW == 64 && !Is64Bit)
2366 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2367
2368 if ((BW < 32) && !IsBitTest)
2369 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2370
2371 // Emit the combined compare and branch instruction.
2372 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2374 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2375 .addReg(SrcReg);
2376 if (IsBitTest)
2377 MIB.addImm(TestBit);
2378 MIB.addMBB(TBB);
2379
2380 finishCondBranch(BI->getParent(), TBB, FBB);
2381 return true;
2382}
2383
2384bool AArch64FastISel::selectBranch(const Instruction *I) {
2385 const BranchInst *BI = cast<BranchInst>(I);
2386 if (BI->isUnconditional()) {
2387 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2388 fastEmitBranch(MSucc, BI->getDebugLoc());
2389 return true;
2390 }
2391
2392 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2393 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2394
2395 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2396 if (CI->hasOneUse() && isValueAvailable(CI)) {
2397 // Try to optimize or fold the cmp.
2398 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2399 switch (Predicate) {
2400 default:
2401 break;
2403 fastEmitBranch(FBB, MIMD.getDL());
2404 return true;
2405 case CmpInst::FCMP_TRUE:
2406 fastEmitBranch(TBB, MIMD.getDL());
2407 return true;
2408 }
2409
2410 // Try to emit a combined compare-and-branch first.
2411 if (emitCompareAndBranch(BI))
2412 return true;
2413
2414 // Try to take advantage of fallthrough opportunities.
2415 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2416 std::swap(TBB, FBB);
2418 }
2419
2420 // Emit the cmp.
2421 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2422 return false;
2423
2424 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2425 // instruction.
2426 AArch64CC::CondCode CC = getCompareCC(Predicate);
2428 switch (Predicate) {
2429 default:
2430 break;
2431 case CmpInst::FCMP_UEQ:
2432 ExtraCC = AArch64CC::EQ;
2433 CC = AArch64CC::VS;
2434 break;
2435 case CmpInst::FCMP_ONE:
2436 ExtraCC = AArch64CC::MI;
2437 CC = AArch64CC::GT;
2438 break;
2439 }
2440 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2441
2442 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2443 if (ExtraCC != AArch64CC::AL) {
2444 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2445 .addImm(ExtraCC)
2446 .addMBB(TBB);
2447 }
2448
2449 // Emit the branch.
2450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2451 .addImm(CC)
2452 .addMBB(TBB);
2453
2454 finishCondBranch(BI->getParent(), TBB, FBB);
2455 return true;
2456 }
2457 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2458 uint64_t Imm = CI->getZExtValue();
2459 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2460 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2461 .addMBB(Target);
2462
2463 // Obtain the branch probability and add the target to the successor list.
2464 if (FuncInfo.BPI) {
2465 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2466 BI->getParent(), Target->getBasicBlock());
2467 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2468 } else
2469 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2470 return true;
2471 } else {
2473 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2474 // Fake request the condition, otherwise the intrinsic might be completely
2475 // optimized away.
2476 Register CondReg = getRegForValue(BI->getCondition());
2477 if (!CondReg)
2478 return false;
2479
2480 // Emit the branch.
2481 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2482 .addImm(CC)
2483 .addMBB(TBB);
2484
2485 finishCondBranch(BI->getParent(), TBB, FBB);
2486 return true;
2487 }
2488 }
2489
2490 Register CondReg = getRegForValue(BI->getCondition());
2491 if (CondReg == 0)
2492 return false;
2493
2494 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2495 unsigned Opcode = AArch64::TBNZW;
2496 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2497 std::swap(TBB, FBB);
2498 Opcode = AArch64::TBZW;
2499 }
2500
2501 const MCInstrDesc &II = TII.get(Opcode);
2502 Register ConstrainedCondReg
2503 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2504 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2505 .addReg(ConstrainedCondReg)
2506 .addImm(0)
2507 .addMBB(TBB);
2508
2509 finishCondBranch(BI->getParent(), TBB, FBB);
2510 return true;
2511}
2512
2513bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2514 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2515 Register AddrReg = getRegForValue(BI->getOperand(0));
2516 if (AddrReg == 0)
2517 return false;
2518
2519 // Emit the indirect branch.
2520 const MCInstrDesc &II = TII.get(AArch64::BR);
2521 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2522 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2523
2524 // Make sure the CFG is up-to-date.
2525 for (const auto *Succ : BI->successors())
2526 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2527
2528 return true;
2529}
2530
2531bool AArch64FastISel::selectCmp(const Instruction *I) {
2532 const CmpInst *CI = cast<CmpInst>(I);
2533
2534 // Vectors of i1 are weird: bail out.
2535 if (CI->getType()->isVectorTy())
2536 return false;
2537
2538 // Try to optimize or fold the cmp.
2539 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2540 unsigned ResultReg = 0;
2541 switch (Predicate) {
2542 default:
2543 break;
2545 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2546 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2547 TII.get(TargetOpcode::COPY), ResultReg)
2548 .addReg(AArch64::WZR, getKillRegState(true));
2549 break;
2550 case CmpInst::FCMP_TRUE:
2551 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2552 break;
2553 }
2554
2555 if (ResultReg) {
2556 updateValueMap(I, ResultReg);
2557 return true;
2558 }
2559
2560 // Emit the cmp.
2561 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2562 return false;
2563
2564 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2565
2566 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2567 // condition codes are inverted, because they are used by CSINC.
2568 static unsigned CondCodeTable[2][2] = {
2571 };
2572 unsigned *CondCodes = nullptr;
2573 switch (Predicate) {
2574 default:
2575 break;
2576 case CmpInst::FCMP_UEQ:
2577 CondCodes = &CondCodeTable[0][0];
2578 break;
2579 case CmpInst::FCMP_ONE:
2580 CondCodes = &CondCodeTable[1][0];
2581 break;
2582 }
2583
2584 if (CondCodes) {
2585 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2586 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2587 TmpReg1)
2588 .addReg(AArch64::WZR, getKillRegState(true))
2589 .addReg(AArch64::WZR, getKillRegState(true))
2590 .addImm(CondCodes[0]);
2591 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2592 ResultReg)
2593 .addReg(TmpReg1, getKillRegState(true))
2594 .addReg(AArch64::WZR, getKillRegState(true))
2595 .addImm(CondCodes[1]);
2596
2597 updateValueMap(I, ResultReg);
2598 return true;
2599 }
2600
2601 // Now set a register based on the comparison.
2602 AArch64CC::CondCode CC = getCompareCC(Predicate);
2603 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2604 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2605 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2606 ResultReg)
2607 .addReg(AArch64::WZR, getKillRegState(true))
2608 .addReg(AArch64::WZR, getKillRegState(true))
2609 .addImm(invertedCC);
2610
2611 updateValueMap(I, ResultReg);
2612 return true;
2613}
2614
2615/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2616/// value.
2617bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2618 if (!SI->getType()->isIntegerTy(1))
2619 return false;
2620
2621 const Value *Src1Val, *Src2Val;
2622 unsigned Opc = 0;
2623 bool NeedExtraOp = false;
2624 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2625 if (CI->isOne()) {
2626 Src1Val = SI->getCondition();
2627 Src2Val = SI->getFalseValue();
2628 Opc = AArch64::ORRWrr;
2629 } else {
2630 assert(CI->isZero());
2631 Src1Val = SI->getFalseValue();
2632 Src2Val = SI->getCondition();
2633 Opc = AArch64::BICWrr;
2634 }
2635 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2636 if (CI->isOne()) {
2637 Src1Val = SI->getCondition();
2638 Src2Val = SI->getTrueValue();
2639 Opc = AArch64::ORRWrr;
2640 NeedExtraOp = true;
2641 } else {
2642 assert(CI->isZero());
2643 Src1Val = SI->getCondition();
2644 Src2Val = SI->getTrueValue();
2645 Opc = AArch64::ANDWrr;
2646 }
2647 }
2648
2649 if (!Opc)
2650 return false;
2651
2652 Register Src1Reg = getRegForValue(Src1Val);
2653 if (!Src1Reg)
2654 return false;
2655
2656 Register Src2Reg = getRegForValue(Src2Val);
2657 if (!Src2Reg)
2658 return false;
2659
2660 if (NeedExtraOp)
2661 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2662
2663 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2664 Src2Reg);
2665 updateValueMap(SI, ResultReg);
2666 return true;
2667}
2668
2669bool AArch64FastISel::selectSelect(const Instruction *I) {
2670 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2671 MVT VT;
2672 if (!isTypeSupported(I->getType(), VT))
2673 return false;
2674
2675 unsigned Opc;
2676 const TargetRegisterClass *RC;
2677 switch (VT.SimpleTy) {
2678 default:
2679 return false;
2680 case MVT::i1:
2681 case MVT::i8:
2682 case MVT::i16:
2683 case MVT::i32:
2684 Opc = AArch64::CSELWr;
2685 RC = &AArch64::GPR32RegClass;
2686 break;
2687 case MVT::i64:
2688 Opc = AArch64::CSELXr;
2689 RC = &AArch64::GPR64RegClass;
2690 break;
2691 case MVT::f32:
2692 Opc = AArch64::FCSELSrrr;
2693 RC = &AArch64::FPR32RegClass;
2694 break;
2695 case MVT::f64:
2696 Opc = AArch64::FCSELDrrr;
2697 RC = &AArch64::FPR64RegClass;
2698 break;
2699 }
2700
2701 const SelectInst *SI = cast<SelectInst>(I);
2702 const Value *Cond = SI->getCondition();
2705
2706 if (optimizeSelect(SI))
2707 return true;
2708
2709 // Try to pickup the flags, so we don't have to emit another compare.
2710 if (foldXALUIntrinsic(CC, I, Cond)) {
2711 // Fake request the condition to force emission of the XALU intrinsic.
2712 Register CondReg = getRegForValue(Cond);
2713 if (!CondReg)
2714 return false;
2715 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2716 isValueAvailable(Cond)) {
2717 const auto *Cmp = cast<CmpInst>(Cond);
2718 // Try to optimize or fold the cmp.
2719 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2720 const Value *FoldSelect = nullptr;
2721 switch (Predicate) {
2722 default:
2723 break;
2725 FoldSelect = SI->getFalseValue();
2726 break;
2727 case CmpInst::FCMP_TRUE:
2728 FoldSelect = SI->getTrueValue();
2729 break;
2730 }
2731
2732 if (FoldSelect) {
2733 Register SrcReg = getRegForValue(FoldSelect);
2734 if (!SrcReg)
2735 return false;
2736
2737 updateValueMap(I, SrcReg);
2738 return true;
2739 }
2740
2741 // Emit the cmp.
2742 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2743 return false;
2744
2745 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2746 CC = getCompareCC(Predicate);
2747 switch (Predicate) {
2748 default:
2749 break;
2750 case CmpInst::FCMP_UEQ:
2751 ExtraCC = AArch64CC::EQ;
2752 CC = AArch64CC::VS;
2753 break;
2754 case CmpInst::FCMP_ONE:
2755 ExtraCC = AArch64CC::MI;
2756 CC = AArch64CC::GT;
2757 break;
2758 }
2759 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2760 } else {
2761 Register CondReg = getRegForValue(Cond);
2762 if (!CondReg)
2763 return false;
2764
2765 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2766 CondReg = constrainOperandRegClass(II, CondReg, 1);
2767
2768 // Emit a TST instruction (ANDS wzr, reg, #imm).
2769 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2770 AArch64::WZR)
2771 .addReg(CondReg)
2773 }
2774
2775 Register Src1Reg = getRegForValue(SI->getTrueValue());
2776 Register Src2Reg = getRegForValue(SI->getFalseValue());
2777
2778 if (!Src1Reg || !Src2Reg)
2779 return false;
2780
2781 if (ExtraCC != AArch64CC::AL)
2782 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2783
2784 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2785 updateValueMap(I, ResultReg);
2786 return true;
2787}
2788
2789bool AArch64FastISel::selectFPExt(const Instruction *I) {
2790 Value *V = I->getOperand(0);
2791 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2792 return false;
2793
2794 Register Op = getRegForValue(V);
2795 if (Op == 0)
2796 return false;
2797
2798 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2799 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2800 ResultReg).addReg(Op);
2801 updateValueMap(I, ResultReg);
2802 return true;
2803}
2804
2805bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2806 Value *V = I->getOperand(0);
2807 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2808 return false;
2809
2810 Register Op = getRegForValue(V);
2811 if (Op == 0)
2812 return false;
2813
2814 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2815 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2816 ResultReg).addReg(Op);
2817 updateValueMap(I, ResultReg);
2818 return true;
2819}
2820
2821// FPToUI and FPToSI
2822bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2823 MVT DestVT;
2824 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2825 return false;
2826
2827 Register SrcReg = getRegForValue(I->getOperand(0));
2828 if (SrcReg == 0)
2829 return false;
2830
2831 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2832 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2833 return false;
2834
2835 unsigned Opc;
2836 if (SrcVT == MVT::f64) {
2837 if (Signed)
2838 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2839 else
2840 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2841 } else {
2842 if (Signed)
2843 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2844 else
2845 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2846 }
2847 Register ResultReg = createResultReg(
2848 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2849 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2850 .addReg(SrcReg);
2851 updateValueMap(I, ResultReg);
2852 return true;
2853}
2854
2855bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2856 MVT DestVT;
2857 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2858 return false;
2859 // Let regular ISEL handle FP16
2860 if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2861 return false;
2862
2863 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2864 "Unexpected value type.");
2865
2866 Register SrcReg = getRegForValue(I->getOperand(0));
2867 if (!SrcReg)
2868 return false;
2869
2870 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2871
2872 // Handle sign-extension.
2873 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2874 SrcReg =
2875 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2876 if (!SrcReg)
2877 return false;
2878 }
2879
2880 unsigned Opc;
2881 if (SrcVT == MVT::i64) {
2882 if (Signed)
2883 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2884 else
2885 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2886 } else {
2887 if (Signed)
2888 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2889 else
2890 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2891 }
2892
2893 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2894 updateValueMap(I, ResultReg);
2895 return true;
2896}
2897
2898bool AArch64FastISel::fastLowerArguments() {
2899 if (!FuncInfo.CanLowerReturn)
2900 return false;
2901
2902 const Function *F = FuncInfo.Fn;
2903 if (F->isVarArg())
2904 return false;
2905
2906 CallingConv::ID CC = F->getCallingConv();
2908 return false;
2909
2910 if (Subtarget->hasCustomCallingConv())
2911 return false;
2912
2913 // Only handle simple cases of up to 8 GPR and FPR each.
2914 unsigned GPRCnt = 0;
2915 unsigned FPRCnt = 0;
2916 for (auto const &Arg : F->args()) {
2917 if (Arg.hasAttribute(Attribute::ByVal) ||
2918 Arg.hasAttribute(Attribute::InReg) ||
2919 Arg.hasAttribute(Attribute::StructRet) ||
2920 Arg.hasAttribute(Attribute::SwiftSelf) ||
2921 Arg.hasAttribute(Attribute::SwiftAsync) ||
2922 Arg.hasAttribute(Attribute::SwiftError) ||
2923 Arg.hasAttribute(Attribute::Nest))
2924 return false;
2925
2926 Type *ArgTy = Arg.getType();
2927 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2928 return false;
2929
2930 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2931 if (!ArgVT.isSimple())
2932 return false;
2933
2934 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2935 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2936 return false;
2937
2938 if (VT.isVector() &&
2939 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2940 return false;
2941
2942 if (VT >= MVT::i1 && VT <= MVT::i64)
2943 ++GPRCnt;
2944 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2945 VT.is128BitVector())
2946 ++FPRCnt;
2947 else
2948 return false;
2949
2950 if (GPRCnt > 8 || FPRCnt > 8)
2951 return false;
2952 }
2953
2954 static const MCPhysReg Registers[6][8] = {
2955 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2956 AArch64::W5, AArch64::W6, AArch64::W7 },
2957 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2958 AArch64::X5, AArch64::X6, AArch64::X7 },
2959 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2960 AArch64::H5, AArch64::H6, AArch64::H7 },
2961 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2962 AArch64::S5, AArch64::S6, AArch64::S7 },
2963 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2964 AArch64::D5, AArch64::D6, AArch64::D7 },
2965 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2966 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2967 };
2968
2969 unsigned GPRIdx = 0;
2970 unsigned FPRIdx = 0;
2971 for (auto const &Arg : F->args()) {
2972 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2973 unsigned SrcReg;
2974 const TargetRegisterClass *RC;
2975 if (VT >= MVT::i1 && VT <= MVT::i32) {
2976 SrcReg = Registers[0][GPRIdx++];
2977 RC = &AArch64::GPR32RegClass;
2978 VT = MVT::i32;
2979 } else if (VT == MVT::i64) {
2980 SrcReg = Registers[1][GPRIdx++];
2981 RC = &AArch64::GPR64RegClass;
2982 } else if (VT == MVT::f16 || VT == MVT::bf16) {
2983 SrcReg = Registers[2][FPRIdx++];
2984 RC = &AArch64::FPR16RegClass;
2985 } else if (VT == MVT::f32) {
2986 SrcReg = Registers[3][FPRIdx++];
2987 RC = &AArch64::FPR32RegClass;
2988 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2989 SrcReg = Registers[4][FPRIdx++];
2990 RC = &AArch64::FPR64RegClass;
2991 } else if (VT.is128BitVector()) {
2992 SrcReg = Registers[5][FPRIdx++];
2993 RC = &AArch64::FPR128RegClass;
2994 } else
2995 llvm_unreachable("Unexpected value type.");
2996
2997 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2998 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2999 // Without this, EmitLiveInCopies may eliminate the livein if its only
3000 // use is a bitcast (which isn't turned into an instruction).
3001 Register ResultReg = createResultReg(RC);
3002 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3003 TII.get(TargetOpcode::COPY), ResultReg)
3004 .addReg(DstReg, getKillRegState(true));
3005 updateValueMap(&Arg, ResultReg);
3006 }
3007 return true;
3008}
3009
3010bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3011 SmallVectorImpl<MVT> &OutVTs,
3012 unsigned &NumBytes) {
3013 CallingConv::ID CC = CLI.CallConv;
3015 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3016 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3017
3018 // Get a count of how many bytes are to be pushed on the stack.
3019 NumBytes = CCInfo.getStackSize();
3020
3021 // Issue CALLSEQ_START
3022 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3023 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3024 .addImm(NumBytes).addImm(0);
3025
3026 // Process the args.
3027 for (CCValAssign &VA : ArgLocs) {
3028 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3029 MVT ArgVT = OutVTs[VA.getValNo()];
3030
3031 Register ArgReg = getRegForValue(ArgVal);
3032 if (!ArgReg)
3033 return false;
3034
3035 // Handle arg promotion: SExt, ZExt, AExt.
3036 switch (VA.getLocInfo()) {
3037 case CCValAssign::Full:
3038 break;
3039 case CCValAssign::SExt: {
3040 MVT DestVT = VA.getLocVT();
3041 MVT SrcVT = ArgVT;
3042 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3043 if (!ArgReg)
3044 return false;
3045 break;
3046 }
3047 case CCValAssign::AExt:
3048 // Intentional fall-through.
3049 case CCValAssign::ZExt: {
3050 MVT DestVT = VA.getLocVT();
3051 MVT SrcVT = ArgVT;
3052 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3053 if (!ArgReg)
3054 return false;
3055 break;
3056 }
3057 default:
3058 llvm_unreachable("Unknown arg promotion!");
3059 }
3060
3061 // Now copy/store arg to correct locations.
3062 if (VA.isRegLoc() && !VA.needsCustom()) {
3063 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3064 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3065 CLI.OutRegs.push_back(VA.getLocReg());
3066 } else if (VA.needsCustom()) {
3067 // FIXME: Handle custom args.
3068 return false;
3069 } else {
3070 assert(VA.isMemLoc() && "Assuming store on stack.");
3071
3072 // Don't emit stores for undef values.
3073 if (isa<UndefValue>(ArgVal))
3074 continue;
3075
3076 // Need to store on the stack.
3077 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3078
3079 unsigned BEAlign = 0;
3080 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3081 BEAlign = 8 - ArgSize;
3082
3083 Address Addr;
3084 Addr.setKind(Address::RegBase);
3085 Addr.setReg(AArch64::SP);
3086 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3087
3088 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3089 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3090 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3091 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3092
3093 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3094 return false;
3095 }
3096 }
3097 return true;
3098}
3099
3100bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3101 CallingConv::ID CC = CLI.CallConv;
3102
3103 // Issue CALLSEQ_END
3104 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3105 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3106 .addImm(NumBytes).addImm(0);
3107
3108 // Now the return values.
3110 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3111 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3112
3113 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3114 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3115 CCValAssign &VA = RVLocs[i];
3116 MVT CopyVT = VA.getValVT();
3117 unsigned CopyReg = ResultReg + i;
3118
3119 // TODO: Handle big-endian results
3120 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3121 return false;
3122
3123 // Copy result out of their specified physreg.
3124 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3125 CopyReg)
3126 .addReg(VA.getLocReg());
3127 CLI.InRegs.push_back(VA.getLocReg());
3128 }
3129
3130 CLI.ResultReg = ResultReg;
3131 CLI.NumResultRegs = RVLocs.size();
3132
3133 return true;
3134}
3135
3136bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3137 CallingConv::ID CC = CLI.CallConv;
3138 bool IsTailCall = CLI.IsTailCall;
3139 bool IsVarArg = CLI.IsVarArg;
3140 const Value *Callee = CLI.Callee;
3141 MCSymbol *Symbol = CLI.Symbol;
3142
3143 if (!Callee && !Symbol)
3144 return false;
3145
3146 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3147 // a bti instruction following the call.
3148 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3149 !Subtarget->noBTIAtReturnTwice() &&
3151 return false;
3152
3153 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3154 if (CLI.CB && CLI.CB->isIndirectCall() &&
3155 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3156 return false;
3157
3158 // Allow SelectionDAG isel to handle tail calls.
3159 if (IsTailCall)
3160 return false;
3161
3162 // FIXME: we could and should support this, but for now correctness at -O0 is
3163 // more important.
3164 if (Subtarget->isTargetILP32())
3165 return false;
3166
3167 CodeModel::Model CM = TM.getCodeModel();
3168 // Only support the small-addressing and large code models.
3169 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3170 return false;
3171
3172 // FIXME: Add large code model support for ELF.
3173 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3174 return false;
3175
3176 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3177 // attribute. Check "RtLibUseGOT" instead.
3178 if (MF->getFunction().getParent()->getRtLibUseGOT())
3179 return false;
3180
3181 // Let SDISel handle vararg functions.
3182 if (IsVarArg)
3183 return false;
3184
3185 if (Subtarget->isWindowsArm64EC())
3186 return false;
3187
3188 for (auto Flag : CLI.OutFlags)
3189 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3190 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3191 return false;
3192
3193 // Set up the argument vectors.
3194 SmallVector<MVT, 16> OutVTs;
3195 OutVTs.reserve(CLI.OutVals.size());
3196
3197 for (auto *Val : CLI.OutVals) {
3198 MVT VT;
3199 if (!isTypeLegal(Val->getType(), VT) &&
3200 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3201 return false;
3202
3203 // We don't handle vector parameters yet.
3204 if (VT.isVector() || VT.getSizeInBits() > 64)
3205 return false;
3206
3207 OutVTs.push_back(VT);
3208 }
3209
3210 Address Addr;
3211 if (Callee && !computeCallAddress(Callee, Addr))
3212 return false;
3213
3214 // The weak function target may be zero; in that case we must use indirect
3215 // addressing via a stub on windows as it may be out of range for a
3216 // PC-relative jump.
3217 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3218 Addr.getGlobalValue()->hasExternalWeakLinkage())
3219 return false;
3220
3221 // Handle the arguments now that we've gotten them.
3222 unsigned NumBytes;
3223 if (!processCallArgs(CLI, OutVTs, NumBytes))
3224 return false;
3225
3226 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3227 if (RegInfo->isAnyArgRegReserved(*MF))
3228 RegInfo->emitReservedArgRegCallError(*MF);
3229
3230 // Issue the call.
3232 if (Subtarget->useSmallAddressing()) {
3233 const MCInstrDesc &II =
3234 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3235 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3236 if (Symbol)
3237 MIB.addSym(Symbol, 0);
3238 else if (Addr.getGlobalValue())
3239 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3240 else if (Addr.getReg()) {
3242 MIB.addReg(Reg);
3243 } else
3244 return false;
3245 } else {
3246 unsigned CallReg = 0;
3247 if (Symbol) {
3248 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3249 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3250 ADRPReg)
3252
3253 CallReg = createResultReg(&AArch64::GPR64RegClass);
3254 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3255 TII.get(AArch64::LDRXui), CallReg)
3256 .addReg(ADRPReg)
3257 .addSym(Symbol,
3259 } else if (Addr.getGlobalValue())
3260 CallReg = materializeGV(Addr.getGlobalValue());
3261 else if (Addr.getReg())
3262 CallReg = Addr.getReg();
3263
3264 if (!CallReg)
3265 return false;
3266
3267 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3268 CallReg = constrainOperandRegClass(II, CallReg, 0);
3269 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3270 }
3271
3272 // Add implicit physical register uses to the call.
3273 for (auto Reg : CLI.OutRegs)
3274 MIB.addReg(Reg, RegState::Implicit);
3275
3276 // Add a register mask with the call-preserved registers.
3277 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3278 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3279
3280 CLI.Call = MIB;
3281
3282 // Finish off the call including any return values.
3283 return finishCall(CLI, NumBytes);
3284}
3285
3286bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3287 if (Alignment)
3288 return Len / Alignment->value() <= 4;
3289 else
3290 return Len < 32;
3291}
3292
3293bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3294 uint64_t Len, MaybeAlign Alignment) {
3295 // Make sure we don't bloat code by inlining very large memcpy's.
3296 if (!isMemCpySmall(Len, Alignment))
3297 return false;
3298
3299 int64_t UnscaledOffset = 0;
3300 Address OrigDest = Dest;
3301 Address OrigSrc = Src;
3302
3303 while (Len) {
3304 MVT VT;
3305 if (!Alignment || *Alignment >= 8) {
3306 if (Len >= 8)
3307 VT = MVT::i64;
3308 else if (Len >= 4)
3309 VT = MVT::i32;
3310 else if (Len >= 2)
3311 VT = MVT::i16;
3312 else {
3313 VT = MVT::i8;
3314 }
3315 } else {
3316 assert(Alignment && "Alignment is set in this branch");
3317 // Bound based on alignment.
3318 if (Len >= 4 && *Alignment == 4)
3319 VT = MVT::i32;
3320 else if (Len >= 2 && *Alignment == 2)
3321 VT = MVT::i16;
3322 else {
3323 VT = MVT::i8;
3324 }
3325 }
3326
3327 unsigned ResultReg = emitLoad(VT, VT, Src);
3328 if (!ResultReg)
3329 return false;
3330
3331 if (!emitStore(VT, ResultReg, Dest))
3332 return false;
3333
3334 int64_t Size = VT.getSizeInBits() / 8;
3335 Len -= Size;
3336 UnscaledOffset += Size;
3337
3338 // We need to recompute the unscaled offset for each iteration.
3339 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3340 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3341 }
3342
3343 return true;
3344}
3345
3346/// Check if it is possible to fold the condition from the XALU intrinsic
3347/// into the user. The condition code will only be updated on success.
3348bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3349 const Instruction *I,
3350 const Value *Cond) {
3351 if (!isa<ExtractValueInst>(Cond))
3352 return false;
3353
3354 const auto *EV = cast<ExtractValueInst>(Cond);
3355 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3356 return false;
3357
3358 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3359 MVT RetVT;
3360 const Function *Callee = II->getCalledFunction();
3361 Type *RetTy =
3362 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3363 if (!isTypeLegal(RetTy, RetVT))
3364 return false;
3365
3366 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3367 return false;
3368
3369 const Value *LHS = II->getArgOperand(0);
3370 const Value *RHS = II->getArgOperand(1);
3371
3372 // Canonicalize immediate to the RHS.
3373 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3374 std::swap(LHS, RHS);
3375
3376 // Simplify multiplies.
3377 Intrinsic::ID IID = II->getIntrinsicID();
3378 switch (IID) {
3379 default:
3380 break;
3381 case Intrinsic::smul_with_overflow:
3382 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3383 if (C->getValue() == 2)
3384 IID = Intrinsic::sadd_with_overflow;
3385 break;
3386 case Intrinsic::umul_with_overflow:
3387 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3388 if (C->getValue() == 2)
3389 IID = Intrinsic::uadd_with_overflow;
3390 break;
3391 }
3392
3393 AArch64CC::CondCode TmpCC;
3394 switch (IID) {
3395 default:
3396 return false;
3397 case Intrinsic::sadd_with_overflow:
3398 case Intrinsic::ssub_with_overflow:
3399 TmpCC = AArch64CC::VS;
3400 break;
3401 case Intrinsic::uadd_with_overflow:
3402 TmpCC = AArch64CC::HS;
3403 break;
3404 case Intrinsic::usub_with_overflow:
3405 TmpCC = AArch64CC::LO;
3406 break;
3407 case Intrinsic::smul_with_overflow:
3408 case Intrinsic::umul_with_overflow:
3409 TmpCC = AArch64CC::NE;
3410 break;
3411 }
3412
3413 // Check if both instructions are in the same basic block.
3414 if (!isValueAvailable(II))
3415 return false;
3416
3417 // Make sure nothing is in the way
3420 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3421 // We only expect extractvalue instructions between the intrinsic and the
3422 // instruction to be selected.
3423 if (!isa<ExtractValueInst>(Itr))
3424 return false;
3425
3426 // Check that the extractvalue operand comes from the intrinsic.
3427 const auto *EVI = cast<ExtractValueInst>(Itr);
3428 if (EVI->getAggregateOperand() != II)
3429 return false;
3430 }
3431
3432 CC = TmpCC;
3433 return true;
3434}
3435
3436bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3437 // FIXME: Handle more intrinsics.
3438 switch (II->getIntrinsicID()) {
3439 default: return false;
3440 case Intrinsic::frameaddress: {
3441 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3442 MFI.setFrameAddressIsTaken(true);
3443
3444 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3445 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3446 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3447 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3448 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3449 // Recursively load frame address
3450 // ldr x0, [fp]
3451 // ldr x0, [x0]
3452 // ldr x0, [x0]
3453 // ...
3454 unsigned DestReg;
3455 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3456 while (Depth--) {
3457 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3458 SrcReg, 0);
3459 assert(DestReg && "Unexpected LDR instruction emission failure.");
3460 SrcReg = DestReg;
3461 }
3462
3463 updateValueMap(II, SrcReg);
3464 return true;
3465 }
3466 case Intrinsic::sponentry: {
3467 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3468
3469 // SP = FP + Fixed Object + 16
3470 int FI = MFI.CreateFixedObject(4, 0, false);
3471 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3472 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3473 TII.get(AArch64::ADDXri), ResultReg)
3474 .addFrameIndex(FI)
3475 .addImm(0)
3476 .addImm(0);
3477
3478 updateValueMap(II, ResultReg);
3479 return true;
3480 }
3481 case Intrinsic::memcpy:
3482 case Intrinsic::memmove: {
3483 const auto *MTI = cast<MemTransferInst>(II);
3484 // Don't handle volatile.
3485 if (MTI->isVolatile())
3486 return false;
3487
3488 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3489 // we would emit dead code because we don't currently handle memmoves.
3490 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3491 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3492 // Small memcpy's are common enough that we want to do them without a call
3493 // if possible.
3494 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3495 MaybeAlign Alignment;
3496 if (MTI->getDestAlign() || MTI->getSourceAlign())
3497 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3498 MTI->getSourceAlign().valueOrOne());
3499 if (isMemCpySmall(Len, Alignment)) {
3500 Address Dest, Src;
3501 if (!computeAddress(MTI->getRawDest(), Dest) ||
3502 !computeAddress(MTI->getRawSource(), Src))
3503 return false;
3504 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3505 return true;
3506 }
3507 }
3508
3509 if (!MTI->getLength()->getType()->isIntegerTy(64))
3510 return false;
3511
3512 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3513 // Fast instruction selection doesn't support the special
3514 // address spaces.
3515 return false;
3516
3517 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3518 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3519 }
3520 case Intrinsic::memset: {
3521 const MemSetInst *MSI = cast<MemSetInst>(II);
3522 // Don't handle volatile.
3523 if (MSI->isVolatile())
3524 return false;
3525
3526 if (!MSI->getLength()->getType()->isIntegerTy(64))
3527 return false;
3528
3529 if (MSI->getDestAddressSpace() > 255)
3530 // Fast instruction selection doesn't support the special
3531 // address spaces.
3532 return false;
3533
3534 return lowerCallTo(II, "memset", II->arg_size() - 1);
3535 }
3536 case Intrinsic::sin:
3537 case Intrinsic::cos:
3538 case Intrinsic::tan:
3539 case Intrinsic::pow: {
3540 MVT RetVT;
3541 if (!isTypeLegal(II->getType(), RetVT))
3542 return false;
3543
3544 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3545 return false;
3546
3547 static const RTLIB::Libcall LibCallTable[4][2] = {
3548 {RTLIB::SIN_F32, RTLIB::SIN_F64},
3549 {RTLIB::COS_F32, RTLIB::COS_F64},
3550 {RTLIB::TAN_F32, RTLIB::TAN_F64},
3551 {RTLIB::POW_F32, RTLIB::POW_F64}};
3552 RTLIB::Libcall LC;
3553 bool Is64Bit = RetVT == MVT::f64;
3554 switch (II->getIntrinsicID()) {
3555 default:
3556 llvm_unreachable("Unexpected intrinsic.");
3557 case Intrinsic::sin:
3558 LC = LibCallTable[0][Is64Bit];
3559 break;
3560 case Intrinsic::cos:
3561 LC = LibCallTable[1][Is64Bit];
3562 break;
3563 case Intrinsic::tan:
3564 LC = LibCallTable[2][Is64Bit];
3565 break;
3566 case Intrinsic::pow:
3567 LC = LibCallTable[3][Is64Bit];
3568 break;
3569 }
3570
3571 ArgListTy Args;
3572 Args.reserve(II->arg_size());
3573
3574 // Populate the argument list.
3575 for (auto &Arg : II->args()) {
3576 ArgListEntry Entry;
3577 Entry.Val = Arg;
3578 Entry.Ty = Arg->getType();
3579 Args.push_back(Entry);
3580 }
3581
3582 CallLoweringInfo CLI;
3583 MCContext &Ctx = MF->getContext();
3584 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3585 TLI.getLibcallName(LC), std::move(Args));
3586 if (!lowerCallTo(CLI))
3587 return false;
3588 updateValueMap(II, CLI.ResultReg);
3589 return true;
3590 }
3591 case Intrinsic::fabs: {
3592 MVT VT;
3593 if (!isTypeLegal(II->getType(), VT))
3594 return false;
3595
3596 unsigned Opc;
3597 switch (VT.SimpleTy) {
3598 default:
3599 return false;
3600 case MVT::f32:
3601 Opc = AArch64::FABSSr;
3602 break;
3603 case MVT::f64:
3604 Opc = AArch64::FABSDr;
3605 break;
3606 }
3607 Register SrcReg = getRegForValue(II->getOperand(0));
3608 if (!SrcReg)
3609 return false;
3610 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3611 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3612 .addReg(SrcReg);
3613 updateValueMap(II, ResultReg);
3614 return true;
3615 }
3616 case Intrinsic::trap:
3617 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3618 .addImm(1);
3619 return true;
3620 case Intrinsic::debugtrap:
3621 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3622 .addImm(0xF000);
3623 return true;
3624
3625 case Intrinsic::sqrt: {
3626 Type *RetTy = II->getCalledFunction()->getReturnType();
3627
3628 MVT VT;
3629 if (!isTypeLegal(RetTy, VT))
3630 return false;
3631
3632 Register Op0Reg = getRegForValue(II->getOperand(0));
3633 if (!Op0Reg)
3634 return false;
3635
3636 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3637 if (!ResultReg)
3638 return false;
3639
3640 updateValueMap(II, ResultReg);
3641 return true;
3642 }
3643 case Intrinsic::sadd_with_overflow:
3644 case Intrinsic::uadd_with_overflow:
3645 case Intrinsic::ssub_with_overflow:
3646 case Intrinsic::usub_with_overflow:
3647 case Intrinsic::smul_with_overflow:
3648 case Intrinsic::umul_with_overflow: {
3649 // This implements the basic lowering of the xalu with overflow intrinsics.
3650 const Function *Callee = II->getCalledFunction();
3651 auto *Ty = cast<StructType>(Callee->getReturnType());
3652 Type *RetTy = Ty->getTypeAtIndex(0U);
3653
3654 MVT VT;
3655 if (!isTypeLegal(RetTy, VT))
3656 return false;
3657
3658 if (VT != MVT::i32 && VT != MVT::i64)
3659 return false;
3660
3661 const Value *LHS = II->getArgOperand(0);
3662 const Value *RHS = II->getArgOperand(1);
3663 // Canonicalize immediate to the RHS.
3664 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3665 std::swap(LHS, RHS);
3666
3667 // Simplify multiplies.
3668 Intrinsic::ID IID = II->getIntrinsicID();
3669 switch (IID) {
3670 default:
3671 break;
3672 case Intrinsic::smul_with_overflow:
3673 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3674 if (C->getValue() == 2) {
3675 IID = Intrinsic::sadd_with_overflow;
3676 RHS = LHS;
3677 }
3678 break;
3679 case Intrinsic::umul_with_overflow:
3680 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3681 if (C->getValue() == 2) {
3682 IID = Intrinsic::uadd_with_overflow;
3683 RHS = LHS;
3684 }
3685 break;
3686 }
3687
3688 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3690 switch (IID) {
3691 default: llvm_unreachable("Unexpected intrinsic!");
3692 case Intrinsic::sadd_with_overflow:
3693 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3694 CC = AArch64CC::VS;
3695 break;
3696 case Intrinsic::uadd_with_overflow:
3697 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3698 CC = AArch64CC::HS;
3699 break;
3700 case Intrinsic::ssub_with_overflow:
3701 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3702 CC = AArch64CC::VS;
3703 break;
3704 case Intrinsic::usub_with_overflow:
3705 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3706 CC = AArch64CC::LO;
3707 break;
3708 case Intrinsic::smul_with_overflow: {
3709 CC = AArch64CC::NE;
3710 Register LHSReg = getRegForValue(LHS);
3711 if (!LHSReg)
3712 return false;
3713
3714 Register RHSReg = getRegForValue(RHS);
3715 if (!RHSReg)
3716 return false;
3717
3718 if (VT == MVT::i32) {
3719 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3720 Register MulSubReg =
3721 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3722 // cmp xreg, wreg, sxtw
3723 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3724 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3725 /*WantResult=*/false);
3726 MulReg = MulSubReg;
3727 } else {
3728 assert(VT == MVT::i64 && "Unexpected value type.");
3729 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3730 // reused in the next instruction.
3731 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3732 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3733 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3734 /*WantResult=*/false);
3735 }
3736 break;
3737 }
3738 case Intrinsic::umul_with_overflow: {
3739 CC = AArch64CC::NE;
3740 Register LHSReg = getRegForValue(LHS);
3741 if (!LHSReg)
3742 return false;
3743
3744 Register RHSReg = getRegForValue(RHS);
3745 if (!RHSReg)
3746 return false;
3747
3748 if (VT == MVT::i32) {
3749 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3750 // tst xreg, #0xffffffff00000000
3751 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3752 TII.get(AArch64::ANDSXri), AArch64::XZR)
3753 .addReg(MulReg)
3754 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3755 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3756 } else {
3757 assert(VT == MVT::i64 && "Unexpected value type.");
3758 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3759 // reused in the next instruction.
3760 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3761 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3762 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3763 }
3764 break;
3765 }
3766 }
3767
3768 if (MulReg) {
3769 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3770 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3771 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3772 }
3773
3774 if (!ResultReg1)
3775 return false;
3776
3777 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3778 AArch64::WZR, AArch64::WZR,
3779 getInvertedCondCode(CC));
3780 (void)ResultReg2;
3781 assert((ResultReg1 + 1) == ResultReg2 &&
3782 "Nonconsecutive result registers.");
3783 updateValueMap(II, ResultReg1, 2);
3784 return true;
3785 }
3786 case Intrinsic::aarch64_crc32b:
3787 case Intrinsic::aarch64_crc32h:
3788 case Intrinsic::aarch64_crc32w:
3789 case Intrinsic::aarch64_crc32x:
3790 case Intrinsic::aarch64_crc32cb:
3791 case Intrinsic::aarch64_crc32ch:
3792 case Intrinsic::aarch64_crc32cw:
3793 case Intrinsic::aarch64_crc32cx: {
3794 if (!Subtarget->hasCRC())
3795 return false;
3796
3797 unsigned Opc;
3798 switch (II->getIntrinsicID()) {
3799 default:
3800 llvm_unreachable("Unexpected intrinsic!");
3801 case Intrinsic::aarch64_crc32b:
3802 Opc = AArch64::CRC32Brr;
3803 break;
3804 case Intrinsic::aarch64_crc32h:
3805 Opc = AArch64::CRC32Hrr;
3806 break;
3807 case Intrinsic::aarch64_crc32w:
3808 Opc = AArch64::CRC32Wrr;
3809 break;
3810 case Intrinsic::aarch64_crc32x:
3811 Opc = AArch64::CRC32Xrr;
3812 break;
3813 case Intrinsic::aarch64_crc32cb:
3814 Opc = AArch64::CRC32CBrr;
3815 break;
3816 case Intrinsic::aarch64_crc32ch:
3817 Opc = AArch64::CRC32CHrr;
3818 break;
3819 case Intrinsic::aarch64_crc32cw:
3820 Opc = AArch64::CRC32CWrr;
3821 break;
3822 case Intrinsic::aarch64_crc32cx:
3823 Opc = AArch64::CRC32CXrr;
3824 break;
3825 }
3826
3827 Register LHSReg = getRegForValue(II->getArgOperand(0));
3828 Register RHSReg = getRegForValue(II->getArgOperand(1));
3829 if (!LHSReg || !RHSReg)
3830 return false;
3831
3832 Register ResultReg =
3833 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3834 updateValueMap(II, ResultReg);
3835 return true;
3836 }
3837 }
3838 return false;
3839}
3840
3841bool AArch64FastISel::selectRet(const Instruction *I) {
3842 const ReturnInst *Ret = cast<ReturnInst>(I);
3843 const Function &F = *I->getParent()->getParent();
3844
3845 if (!FuncInfo.CanLowerReturn)
3846 return false;
3847
3848 if (F.isVarArg())
3849 return false;
3850
3851 if (TLI.supportSwiftError() &&
3852 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3853 return false;
3854
3855 if (TLI.supportSplitCSR(FuncInfo.MF))
3856 return false;
3857
3858 // Build a list of return value registers.
3860
3861 if (Ret->getNumOperands() > 0) {
3862 CallingConv::ID CC = F.getCallingConv();
3864 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3865
3866 // Analyze operands of the call, assigning locations to each operand.
3868 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3869 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3870
3871 // Only handle a single return value for now.
3872 if (ValLocs.size() != 1)
3873 return false;
3874
3875 CCValAssign &VA = ValLocs[0];
3876 const Value *RV = Ret->getOperand(0);
3877
3878 // Don't bother handling odd stuff for now.
3879 if ((VA.getLocInfo() != CCValAssign::Full) &&
3880 (VA.getLocInfo() != CCValAssign::BCvt))
3881 return false;
3882
3883 // Only handle register returns for now.
3884 if (!VA.isRegLoc())
3885 return false;
3886
3887 Register Reg = getRegForValue(RV);
3888 if (Reg == 0)
3889 return false;
3890
3891 unsigned SrcReg = Reg + VA.getValNo();
3892 Register DestReg = VA.getLocReg();
3893 // Avoid a cross-class copy. This is very unlikely.
3894 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3895 return false;
3896
3897 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3898 if (!RVEVT.isSimple())
3899 return false;
3900
3901 // Vectors (of > 1 lane) in big endian need tricky handling.
3902 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3903 !Subtarget->isLittleEndian())
3904 return false;
3905
3906 MVT RVVT = RVEVT.getSimpleVT();
3907 if (RVVT == MVT::f128)
3908 return false;
3909
3910 MVT DestVT = VA.getValVT();
3911 // Special handling for extended integers.
3912 if (RVVT != DestVT) {
3913 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3914 return false;
3915
3916 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3917 return false;
3918
3919 bool IsZExt = Outs[0].Flags.isZExt();
3920 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3921 if (SrcReg == 0)
3922 return false;
3923 }
3924
3925 // "Callee" (i.e. value producer) zero extends pointers at function
3926 // boundary.
3927 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3928 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3929
3930 // Make the copy.
3931 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3932 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3933
3934 // Add register to return instruction.
3935 RetRegs.push_back(VA.getLocReg());
3936 }
3937
3938 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3939 TII.get(AArch64::RET_ReallyLR));
3940 for (unsigned RetReg : RetRegs)
3941 MIB.addReg(RetReg, RegState::Implicit);
3942 return true;
3943}
3944
3945bool AArch64FastISel::selectTrunc(const Instruction *I) {
3946 Type *DestTy = I->getType();
3947 Value *Op = I->getOperand(0);
3948 Type *SrcTy = Op->getType();
3949
3950 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3951 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3952 if (!SrcEVT.isSimple())
3953 return false;
3954 if (!DestEVT.isSimple())
3955 return false;
3956
3957 MVT SrcVT = SrcEVT.getSimpleVT();
3958 MVT DestVT = DestEVT.getSimpleVT();
3959
3960 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3961 SrcVT != MVT::i8)
3962 return false;
3963 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3964 DestVT != MVT::i1)
3965 return false;
3966
3967 Register SrcReg = getRegForValue(Op);
3968 if (!SrcReg)
3969 return false;
3970
3971 // If we're truncating from i64 to a smaller non-legal type then generate an
3972 // AND. Otherwise, we know the high bits are undefined and a truncate only
3973 // generate a COPY. We cannot mark the source register also as result
3974 // register, because this can incorrectly transfer the kill flag onto the
3975 // source register.
3976 unsigned ResultReg;
3977 if (SrcVT == MVT::i64) {
3978 uint64_t Mask = 0;
3979 switch (DestVT.SimpleTy) {
3980 default:
3981 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3982 return false;
3983 case MVT::i1:
3984 Mask = 0x1;
3985 break;
3986 case MVT::i8:
3987 Mask = 0xff;
3988 break;
3989 case MVT::i16:
3990 Mask = 0xffff;
3991 break;
3992 }
3993 // Issue an extract_subreg to get the lower 32-bits.
3994 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3995 AArch64::sub_32);
3996 // Create the AND instruction which performs the actual truncation.
3997 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3998 assert(ResultReg && "Unexpected AND instruction emission failure.");
3999 } else {
4000 ResultReg = createResultReg(&AArch64::GPR32RegClass);
4001 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4002 TII.get(TargetOpcode::COPY), ResultReg)
4003 .addReg(SrcReg);
4004 }
4005
4006 updateValueMap(I, ResultReg);
4007 return true;
4008}
4009
4010unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4011 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4012 DestVT == MVT::i64) &&
4013 "Unexpected value type.");
4014 // Handle i8 and i16 as i32.
4015 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4016 DestVT = MVT::i32;
4017
4018 if (IsZExt) {
4019 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4020 assert(ResultReg && "Unexpected AND instruction emission failure.");
4021 if (DestVT == MVT::i64) {
4022 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4023 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4024 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4025 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4026 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4027 .addImm(0)
4028 .addReg(ResultReg)
4029 .addImm(AArch64::sub_32);
4030 ResultReg = Reg64;
4031 }
4032 return ResultReg;
4033 } else {
4034 if (DestVT == MVT::i64) {
4035 // FIXME: We're SExt i1 to i64.
4036 return 0;
4037 }
4038 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4039 0, 0);
4040 }
4041}
4042
4043unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4044 unsigned Opc, ZReg;
4045 switch (RetVT.SimpleTy) {
4046 default: return 0;
4047 case MVT::i8:
4048 case MVT::i16:
4049 case MVT::i32:
4050 RetVT = MVT::i32;
4051 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4052 case MVT::i64:
4053 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4054 }
4055
4056 const TargetRegisterClass *RC =
4057 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4058 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4059}
4060
4061unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4062 if (RetVT != MVT::i64)
4063 return 0;
4064
4065 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4066 Op0, Op1, AArch64::XZR);
4067}
4068
4069unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4070 if (RetVT != MVT::i64)
4071 return 0;
4072
4073 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4074 Op0, Op1, AArch64::XZR);
4075}
4076
4077unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4078 unsigned Op1Reg) {
4079 unsigned Opc = 0;
4080 bool NeedTrunc = false;
4081 uint64_t Mask = 0;
4082 switch (RetVT.SimpleTy) {
4083 default: return 0;
4084 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4085 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4086 case MVT::i32: Opc = AArch64::LSLVWr; break;
4087 case MVT::i64: Opc = AArch64::LSLVXr; break;
4088 }
4089
4090 const TargetRegisterClass *RC =
4091 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4092 if (NeedTrunc)
4093 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4094
4095 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4096 if (NeedTrunc)
4097 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4098 return ResultReg;
4099}
4100
4101unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4102 uint64_t Shift, bool IsZExt) {
4103 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4104 "Unexpected source/return type pair.");
4105 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4106 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4107 "Unexpected source value type.");
4108 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4109 RetVT == MVT::i64) && "Unexpected return value type.");
4110
4111 bool Is64Bit = (RetVT == MVT::i64);
4112 unsigned RegSize = Is64Bit ? 64 : 32;
4113 unsigned DstBits = RetVT.getSizeInBits();
4114 unsigned SrcBits = SrcVT.getSizeInBits();
4115 const TargetRegisterClass *RC =
4116 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4117
4118 // Just emit a copy for "zero" shifts.
4119 if (Shift == 0) {
4120 if (RetVT == SrcVT) {
4121 Register ResultReg = createResultReg(RC);
4122 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4123 TII.get(TargetOpcode::COPY), ResultReg)
4124 .addReg(Op0);
4125 return ResultReg;
4126 } else
4127 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4128 }
4129
4130 // Don't deal with undefined shifts.
4131 if (Shift >= DstBits)
4132 return 0;
4133
4134 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4135 // {S|U}BFM Wd, Wn, #r, #s
4136 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4137
4138 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4139 // %2 = shl i16 %1, 4
4140 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4141 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4142 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4143 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4144
4145 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4146 // %2 = shl i16 %1, 8
4147 // Wd<32+7-24,32-24> = Wn<7:0>
4148 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4149 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4150 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4151
4152 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4153 // %2 = shl i16 %1, 12
4154 // Wd<32+3-20,32-20> = Wn<3:0>
4155 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4156 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4157 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4158
4159 unsigned ImmR = RegSize - Shift;
4160 // Limit the width to the length of the source type.
4161 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4162 static const unsigned OpcTable[2][2] = {
4163 {AArch64::SBFMWri, AArch64::SBFMXri},
4164 {AArch64::UBFMWri, AArch64::UBFMXri}
4165 };
4166 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4167 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4168 Register TmpReg = MRI.createVirtualRegister(RC);
4169 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4170 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4171 .addImm(0)
4172 .addReg(Op0)
4173 .addImm(AArch64::sub_32);
4174 Op0 = TmpReg;
4175 }
4176 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4177}
4178
4179unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4180 unsigned Op1Reg) {
4181 unsigned Opc = 0;
4182 bool NeedTrunc = false;
4183 uint64_t Mask = 0;
4184 switch (RetVT.SimpleTy) {
4185 default: return 0;
4186 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4187 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4188 case MVT::i32: Opc = AArch64::LSRVWr; break;
4189 case MVT::i64: Opc = AArch64::LSRVXr; break;
4190 }
4191
4192 const TargetRegisterClass *RC =
4193 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4194 if (NeedTrunc) {
4195 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4196 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4197 }
4198 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4199 if (NeedTrunc)
4200 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4201 return ResultReg;
4202}
4203
4204unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4205 uint64_t Shift, bool IsZExt) {
4206 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4207 "Unexpected source/return type pair.");
4208 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4209 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4210 "Unexpected source value type.");
4211 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4212 RetVT == MVT::i64) && "Unexpected return value type.");
4213
4214 bool Is64Bit = (RetVT == MVT::i64);
4215 unsigned RegSize = Is64Bit ? 64 : 32;
4216 unsigned DstBits = RetVT.getSizeInBits();
4217 unsigned SrcBits = SrcVT.getSizeInBits();
4218 const TargetRegisterClass *RC =
4219 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4220
4221 // Just emit a copy for "zero" shifts.
4222 if (Shift == 0) {
4223 if (RetVT == SrcVT) {
4224 Register ResultReg = createResultReg(RC);
4225 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4226 TII.get(TargetOpcode::COPY), ResultReg)
4227 .addReg(Op0);
4228 return ResultReg;
4229 } else
4230 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4231 }
4232
4233 // Don't deal with undefined shifts.
4234 if (Shift >= DstBits)
4235 return 0;
4236
4237 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4238 // {S|U}BFM Wd, Wn, #r, #s
4239 // Wd<s-r:0> = Wn<s:r> when r <= s
4240
4241 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4242 // %2 = lshr i16 %1, 4
4243 // Wd<7-4:0> = Wn<7:4>
4244 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4245 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4246 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4247
4248 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4249 // %2 = lshr i16 %1, 8
4250 // Wd<7-7,0> = Wn<7:7>
4251 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4252 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4253 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4254
4255 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4256 // %2 = lshr i16 %1, 12
4257 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4258 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4259 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4260 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4261
4262 if (Shift >= SrcBits && IsZExt)
4263 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4264
4265 // It is not possible to fold a sign-extend into the LShr instruction. In this
4266 // case emit a sign-extend.
4267 if (!IsZExt) {
4268 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4269 if (!Op0)
4270 return 0;
4271 SrcVT = RetVT;
4272 SrcBits = SrcVT.getSizeInBits();
4273 IsZExt = true;
4274 }
4275
4276 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4277 unsigned ImmS = SrcBits - 1;
4278 static const unsigned OpcTable[2][2] = {
4279 {AArch64::SBFMWri, AArch64::SBFMXri},
4280 {AArch64::UBFMWri, AArch64::UBFMXri}
4281 };
4282 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4283 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4284 Register TmpReg = MRI.createVirtualRegister(RC);
4285 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4286 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4287 .addImm(0)
4288 .addReg(Op0)
4289 .addImm(AArch64::sub_32);
4290 Op0 = TmpReg;
4291 }
4292 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4293}
4294
4295unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4296 unsigned Op1Reg) {
4297 unsigned Opc = 0;
4298 bool NeedTrunc = false;
4299 uint64_t Mask = 0;
4300 switch (RetVT.SimpleTy) {
4301 default: return 0;
4302 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4303 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4304 case MVT::i32: Opc = AArch64::ASRVWr; break;
4305 case MVT::i64: Opc = AArch64::ASRVXr; break;
4306 }
4307
4308 const TargetRegisterClass *RC =
4309 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4310 if (NeedTrunc) {
4311 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4312 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4313 }
4314 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4315 if (NeedTrunc)
4316 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4317 return ResultReg;
4318}
4319
4320unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4321 uint64_t Shift, bool IsZExt) {
4322 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4323 "Unexpected source/return type pair.");
4324 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4325 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4326 "Unexpected source value type.");
4327 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4328 RetVT == MVT::i64) && "Unexpected return value type.");
4329
4330 bool Is64Bit = (RetVT == MVT::i64);
4331 unsigned RegSize = Is64Bit ? 64 : 32;
4332 unsigned DstBits = RetVT.getSizeInBits();
4333 unsigned SrcBits = SrcVT.getSizeInBits();
4334 const TargetRegisterClass *RC =
4335 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4336
4337 // Just emit a copy for "zero" shifts.
4338 if (Shift == 0) {
4339 if (RetVT == SrcVT) {
4340 Register ResultReg = createResultReg(RC);
4341 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4342 TII.get(TargetOpcode::COPY), ResultReg)
4343 .addReg(Op0);
4344 return ResultReg;
4345 } else
4346 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4347 }
4348
4349 // Don't deal with undefined shifts.
4350 if (Shift >= DstBits)
4351 return 0;
4352
4353 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4354 // {S|U}BFM Wd, Wn, #r, #s
4355 // Wd<s-r:0> = Wn<s:r> when r <= s
4356
4357 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4358 // %2 = ashr i16 %1, 4
4359 // Wd<7-4:0> = Wn<7:4>
4360 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4361 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4362 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4363
4364 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4365 // %2 = ashr i16 %1, 8
4366 // Wd<7-7,0> = Wn<7:7>
4367 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4368 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4369 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4370
4371 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4372 // %2 = ashr i16 %1, 12
4373 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4374 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4375 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4376 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4377
4378 if (Shift >= SrcBits && IsZExt)
4379 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4380
4381 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4382 unsigned ImmS = SrcBits - 1;
4383 static const unsigned OpcTable[2][2] = {
4384 {AArch64::SBFMWri, AArch64::SBFMXri},
4385 {AArch64::UBFMWri, AArch64::UBFMXri}
4386 };
4387 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4388 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4389 Register TmpReg = MRI.createVirtualRegister(RC);
4390 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4391 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4392 .addImm(0)
4393 .addReg(Op0)
4394 .addImm(AArch64::sub_32);
4395 Op0 = TmpReg;
4396 }
4397 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4398}
4399
4400unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4401 bool IsZExt) {
4402 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4403
4404 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4405 // DestVT are odd things, so test to make sure that they are both types we can
4406 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4407 // bail out to SelectionDAG.
4408 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4409 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4410 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4411 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4412 return 0;
4413
4414 unsigned Opc;
4415 unsigned Imm = 0;
4416
4417 switch (SrcVT.SimpleTy) {
4418 default:
4419 return 0;
4420 case MVT::i1:
4421 return emiti1Ext(SrcReg, DestVT, IsZExt);
4422 case MVT::i8:
4423 if (DestVT == MVT::i64)
4424 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4425 else
4426 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4427 Imm = 7;
4428 break;
4429 case MVT::i16:
4430 if (DestVT == MVT::i64)
4431 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4432 else
4433 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4434 Imm = 15;
4435 break;
4436 case MVT::i32:
4437 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4438 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4439 Imm = 31;
4440 break;
4441 }
4442
4443 // Handle i8 and i16 as i32.
4444 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4445 DestVT = MVT::i32;
4446 else if (DestVT == MVT::i64) {
4447 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4448 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4449 TII.get(AArch64::SUBREG_TO_REG), Src64)
4450 .addImm(0)
4451 .addReg(SrcReg)
4452 .addImm(AArch64::sub_32);
4453 SrcReg = Src64;
4454 }
4455
4456 const TargetRegisterClass *RC =
4457 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4458 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4459}
4460
4461static bool isZExtLoad(const MachineInstr *LI) {
4462 switch (LI->getOpcode()) {
4463 default:
4464 return false;
4465 case AArch64::LDURBBi:
4466 case AArch64::LDURHHi:
4467 case AArch64::LDURWi:
4468 case AArch64::LDRBBui:
4469 case AArch64::LDRHHui:
4470 case AArch64::LDRWui:
4471 case AArch64::LDRBBroX:
4472 case AArch64::LDRHHroX:
4473 case AArch64::LDRWroX:
4474 case AArch64::LDRBBroW:
4475 case AArch64::LDRHHroW:
4476 case AArch64::LDRWroW:
4477 return true;
4478 }
4479}
4480
4481static bool isSExtLoad(const MachineInstr *LI) {
4482 switch (LI->getOpcode()) {
4483 default:
4484 return false;
4485 case AArch64::LDURSBWi:
4486 case AArch64::LDURSHWi:
4487 case AArch64::LDURSBXi:
4488 case AArch64::LDURSHXi:
4489 case AArch64::LDURSWi:
4490 case AArch64::LDRSBWui:
4491 case AArch64::LDRSHWui:
4492 case AArch64::LDRSBXui:
4493 case AArch64::LDRSHXui:
4494 case AArch64::LDRSWui:
4495 case AArch64::LDRSBWroX:
4496 case AArch64::LDRSHWroX:
4497 case AArch64::LDRSBXroX:
4498 case AArch64::LDRSHXroX:
4499 case AArch64::LDRSWroX:
4500 case AArch64::LDRSBWroW:
4501 case AArch64::LDRSHWroW:
4502 case AArch64::LDRSBXroW:
4503 case AArch64::LDRSHXroW:
4504 case AArch64::LDRSWroW:
4505 return true;
4506 }
4507}
4508
4509bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4510 MVT SrcVT) {
4511 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4512 if (!LI || !LI->hasOneUse())
4513 return false;
4514
4515 // Check if the load instruction has already been selected.
4516 Register Reg = lookUpRegForValue(LI);
4517 if (!Reg)
4518 return false;
4519
4520 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4521 if (!MI)
4522 return false;
4523
4524 // Check if the correct load instruction has been emitted - SelectionDAG might
4525 // have emitted a zero-extending load, but we need a sign-extending load.
4526 bool IsZExt = isa<ZExtInst>(I);
4527 const auto *LoadMI = MI;
4528 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4529 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4530 Register LoadReg = MI->getOperand(1).getReg();
4531 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4532 assert(LoadMI && "Expected valid instruction");
4533 }
4534 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4535 return false;
4536
4537 // Nothing to be done.
4538 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4539 updateValueMap(I, Reg);
4540 return true;
4541 }
4542
4543 if (IsZExt) {
4544 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4545 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4546 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4547 .addImm(0)
4548 .addReg(Reg, getKillRegState(true))
4549 .addImm(AArch64::sub_32);
4550 Reg = Reg64;
4551 } else {
4552 assert((MI->getOpcode() == TargetOpcode::COPY &&
4553 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4554 "Expected copy instruction");
4555 Reg = MI->getOperand(1).getReg();
4557 removeDeadCode(I, std::next(I));
4558 }
4559 updateValueMap(I, Reg);
4560 return true;
4561}
4562
4563bool AArch64FastISel::selectIntExt(const Instruction *I) {
4564 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4565 "Unexpected integer extend instruction.");
4566 MVT RetVT;
4567 MVT SrcVT;
4568 if (!isTypeSupported(I->getType(), RetVT))
4569 return false;
4570
4571 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4572 return false;
4573
4574 // Try to optimize already sign-/zero-extended values from load instructions.
4575 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4576 return true;
4577
4578 Register SrcReg = getRegForValue(I->getOperand(0));
4579 if (!SrcReg)
4580 return false;
4581
4582 // Try to optimize already sign-/zero-extended values from function arguments.
4583 bool IsZExt = isa<ZExtInst>(I);
4584 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4585 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4586 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4587 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4588 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4589 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4590 .addImm(0)
4591 .addReg(SrcReg)
4592 .addImm(AArch64::sub_32);
4593 SrcReg = ResultReg;
4594 }
4595
4596 updateValueMap(I, SrcReg);
4597 return true;
4598 }
4599 }
4600
4601 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4602 if (!ResultReg)
4603 return false;
4604
4605 updateValueMap(I, ResultReg);
4606 return true;
4607}
4608
4609bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4610 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4611 if (!DestEVT.isSimple())
4612 return false;
4613
4614 MVT DestVT = DestEVT.getSimpleVT();
4615 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4616 return false;
4617
4618 unsigned DivOpc;
4619 bool Is64bit = (DestVT == MVT::i64);
4620 switch (ISDOpcode) {
4621 default:
4622 return false;
4623 case ISD::SREM:
4624 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4625 break;
4626 case ISD::UREM:
4627 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4628 break;
4629 }
4630 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4631 Register Src0Reg = getRegForValue(I->getOperand(0));
4632 if (!Src0Reg)
4633 return false;
4634
4635 Register Src1Reg = getRegForValue(I->getOperand(1));
4636 if (!Src1Reg)
4637 return false;
4638
4639 const TargetRegisterClass *RC =
4640 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4641 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4642 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4643 // The remainder is computed as numerator - (quotient * denominator) using the
4644 // MSUB instruction.
4645 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4646 updateValueMap(I, ResultReg);
4647 return true;
4648}
4649
4650bool AArch64FastISel::selectMul(const Instruction *I) {
4651 MVT VT;
4652 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4653 return false;
4654
4655 if (VT.isVector())
4656 return selectBinaryOp(I, ISD::MUL);
4657
4658 const Value *Src0 = I->getOperand(0);
4659 const Value *Src1 = I->getOperand(1);
4660 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4661 if (C->getValue().isPowerOf2())
4662 std::swap(Src0, Src1);
4663
4664 // Try to simplify to a shift instruction.
4665 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4666 if (C->getValue().isPowerOf2()) {
4667 uint64_t ShiftVal = C->getValue().logBase2();
4668 MVT SrcVT = VT;
4669 bool IsZExt = true;
4670 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4671 if (!isIntExtFree(ZExt)) {
4672 MVT VT;
4673 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4674 SrcVT = VT;
4675 IsZExt = true;
4676 Src0 = ZExt->getOperand(0);
4677 }
4678 }
4679 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4680 if (!isIntExtFree(SExt)) {
4681 MVT VT;
4682 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4683 SrcVT = VT;
4684 IsZExt = false;
4685 Src0 = SExt->getOperand(0);
4686 }
4687 }
4688 }
4689
4690 Register Src0Reg = getRegForValue(Src0);
4691 if (!Src0Reg)
4692 return false;
4693
4694 unsigned ResultReg =
4695 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4696
4697 if (ResultReg) {
4698 updateValueMap(I, ResultReg);
4699 return true;
4700 }
4701 }
4702
4703 Register Src0Reg = getRegForValue(I->getOperand(0));
4704 if (!Src0Reg)
4705 return false;
4706
4707 Register Src1Reg = getRegForValue(I->getOperand(1));
4708 if (!Src1Reg)
4709 return false;
4710
4711 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4712
4713 if (!ResultReg)
4714 return false;
4715
4716 updateValueMap(I, ResultReg);
4717 return true;
4718}
4719
4720bool AArch64FastISel::selectShift(const Instruction *I) {
4721 MVT RetVT;
4722 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4723 return false;
4724
4725 if (RetVT.isVector())
4726 return selectOperator(I, I->getOpcode());
4727
4728 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4729 unsigned ResultReg = 0;
4730 uint64_t ShiftVal = C->getZExtValue();
4731 MVT SrcVT = RetVT;
4732 bool IsZExt = I->getOpcode() != Instruction::AShr;
4733 const Value *Op0 = I->getOperand(0);
4734 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4735 if (!isIntExtFree(ZExt)) {
4736 MVT TmpVT;
4737 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4738 SrcVT = TmpVT;
4739 IsZExt = true;
4740 Op0 = ZExt->getOperand(0);
4741 }
4742 }
4743 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4744 if (!isIntExtFree(SExt)) {
4745 MVT TmpVT;
4746 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4747 SrcVT = TmpVT;
4748 IsZExt = false;
4749 Op0 = SExt->getOperand(0);
4750 }
4751 }
4752 }
4753
4754 Register Op0Reg = getRegForValue(Op0);
4755 if (!Op0Reg)
4756 return false;
4757
4758 switch (I->getOpcode()) {
4759 default: llvm_unreachable("Unexpected instruction.");
4760 case Instruction::Shl:
4761 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4762 break;
4763 case Instruction::AShr:
4764 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4765 break;
4766 case Instruction::LShr:
4767 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4768 break;
4769 }
4770 if (!ResultReg)
4771 return false;
4772
4773 updateValueMap(I, ResultReg);
4774 return true;
4775 }
4776
4777 Register Op0Reg = getRegForValue(I->getOperand(0));
4778 if (!Op0Reg)
4779 return false;
4780
4781 Register Op1Reg = getRegForValue(I->getOperand(1));
4782 if (!Op1Reg)
4783 return false;
4784
4785 unsigned ResultReg = 0;
4786 switch (I->getOpcode()) {
4787 default: llvm_unreachable("Unexpected instruction.");
4788 case Instruction::Shl:
4789 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4790 break;
4791 case Instruction::AShr:
4792 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4793 break;
4794 case Instruction::LShr:
4795 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4796 break;
4797 }
4798
4799 if (!ResultReg)
4800 return false;
4801
4802 updateValueMap(I, ResultReg);
4803 return true;
4804}
4805
4806bool AArch64FastISel::selectBitCast(const Instruction *I) {
4807 MVT RetVT, SrcVT;
4808
4809 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4810 return false;
4811 if (!isTypeLegal(I->getType(), RetVT))
4812 return false;
4813
4814 unsigned Opc;
4815 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4816 Opc = AArch64::FMOVWSr;
4817 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4818 Opc = AArch64::FMOVXDr;
4819 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4820 Opc = AArch64::FMOVSWr;
4821 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4822 Opc = AArch64::FMOVDXr;
4823 else
4824 return false;
4825
4826 const TargetRegisterClass *RC = nullptr;
4827 switch (RetVT.SimpleTy) {
4828 default: llvm_unreachable("Unexpected value type.");
4829 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4830 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4831 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4832 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4833 }
4834 Register Op0Reg = getRegForValue(I->getOperand(0));
4835 if (!Op0Reg)
4836 return false;
4837
4838 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4839 if (!ResultReg)
4840 return false;
4841
4842 updateValueMap(I, ResultReg);
4843 return true;
4844}
4845
4846bool AArch64FastISel::selectFRem(const Instruction *I) {
4847 MVT RetVT;
4848 if (!isTypeLegal(I->getType(), RetVT))
4849 return false;
4850
4851 RTLIB::Libcall LC;
4852 switch (RetVT.SimpleTy) {
4853 default:
4854 return false;
4855 case MVT::f32:
4856 LC = RTLIB::REM_F32;
4857 break;
4858 case MVT::f64:
4859 LC = RTLIB::REM_F64;
4860 break;
4861 }
4862
4863 ArgListTy Args;
4864 Args.reserve(I->getNumOperands());
4865
4866 // Populate the argument list.
4867 for (auto &Arg : I->operands()) {
4868 ArgListEntry Entry;
4869 Entry.Val = Arg;
4870 Entry.Ty = Arg->getType();
4871 Args.push_back(Entry);
4872 }
4873
4874 CallLoweringInfo CLI;
4875 MCContext &Ctx = MF->getContext();
4876 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4877 TLI.getLibcallName(LC), std::move(Args));
4878 if (!lowerCallTo(CLI))
4879 return false;
4880 updateValueMap(I, CLI.ResultReg);
4881 return true;
4882}
4883
4884bool AArch64FastISel::selectSDiv(const Instruction *I) {
4885 MVT VT;
4886 if (!isTypeLegal(I->getType(), VT))
4887 return false;
4888
4889 if (!isa<ConstantInt>(I->getOperand(1)))
4890 return selectBinaryOp(I, ISD::SDIV);
4891
4892 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4893 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4894 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4895 return selectBinaryOp(I, ISD::SDIV);
4896
4897 unsigned Lg2 = C.countr_zero();
4898 Register Src0Reg = getRegForValue(I->getOperand(0));
4899 if (!Src0Reg)
4900 return false;
4901
4902 if (cast<BinaryOperator>(I)->isExact()) {
4903 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4904 if (!ResultReg)
4905 return false;
4906 updateValueMap(I, ResultReg);
4907 return true;
4908 }
4909
4910 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4911 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4912 if (!AddReg)
4913 return false;
4914
4915 // (Src0 < 0) ? Pow2 - 1 : 0;
4916 if (!emitICmp_ri(VT, Src0Reg, 0))
4917 return false;
4918
4919 unsigned SelectOpc;
4920 const TargetRegisterClass *RC;
4921 if (VT == MVT::i64) {
4922 SelectOpc = AArch64::CSELXr;
4923 RC = &AArch64::GPR64RegClass;
4924 } else {
4925 SelectOpc = AArch64::CSELWr;
4926 RC = &AArch64::GPR32RegClass;
4927 }
4928 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4930 if (!SelectReg)
4931 return false;
4932
4933 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4934 // negate the result.
4935 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4936 unsigned ResultReg;
4937 if (C.isNegative())
4938 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4939 AArch64_AM::ASR, Lg2);
4940 else
4941 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4942
4943 if (!ResultReg)
4944 return false;
4945
4946 updateValueMap(I, ResultReg);
4947 return true;
4948}
4949
4950/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4951/// have to duplicate it for AArch64, because otherwise we would fail during the
4952/// sign-extend emission.
4953unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4954 Register IdxN = getRegForValue(Idx);
4955 if (IdxN == 0)
4956 // Unhandled operand. Halt "fast" selection and bail.
4957 return 0;
4958
4959 // If the index is smaller or larger than intptr_t, truncate or extend it.
4960 MVT PtrVT = TLI.getPointerTy(DL);
4961 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4962 if (IdxVT.bitsLT(PtrVT)) {
4963 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4964 } else if (IdxVT.bitsGT(PtrVT))
4965 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4966 return IdxN;
4967}
4968
4969/// This is mostly a copy of the existing FastISel GEP code, but we have to
4970/// duplicate it for AArch64, because otherwise we would bail out even for
4971/// simple cases. This is because the standard fastEmit functions don't cover
4972/// MUL at all and ADD is lowered very inefficientily.
4973bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4974 if (Subtarget->isTargetILP32())
4975 return false;
4976
4977 Register N = getRegForValue(I->getOperand(0));
4978 if (!N)
4979 return false;
4980
4981 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4982 // into a single N = N + TotalOffset.
4983 uint64_t TotalOffs = 0;
4984 MVT VT = TLI.getPointerTy(DL);
4986 GTI != E; ++GTI) {
4987 const Value *Idx = GTI.getOperand();
4988 if (auto *StTy = GTI.getStructTypeOrNull()) {
4989 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4990 // N = N + Offset
4991 if (Field)
4992 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4993 } else {
4994 // If this is a constant subscript, handle it quickly.
4995 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4996 if (CI->isZero())
4997 continue;
4998 // N = N + Offset
4999 TotalOffs += GTI.getSequentialElementStride(DL) *
5000 cast<ConstantInt>(CI)->getSExtValue();
5001 continue;
5002 }
5003 if (TotalOffs) {
5004 N = emitAdd_ri_(VT, N, TotalOffs);
5005 if (!N)
5006 return false;
5007 TotalOffs = 0;
5008 }
5009
5010 // N = N + Idx * ElementSize;
5011 uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5012 unsigned IdxN = getRegForGEPIndex(Idx);
5013 if (!IdxN)
5014 return false;
5015
5016 if (ElementSize != 1) {
5017 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5018 if (!C)
5019 return false;
5020 IdxN = emitMul_rr(VT, IdxN, C);
5021 if (!IdxN)
5022 return false;
5023 }
5024 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5025 if (!N)
5026 return false;
5027 }
5028 }
5029 if (TotalOffs) {
5030 N = emitAdd_ri_(VT, N, TotalOffs);
5031 if (!N)
5032 return false;
5033 }
5034 updateValueMap(I, N);
5035 return true;
5036}
5037
5038bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5039 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5040 "cmpxchg survived AtomicExpand at optlevel > -O0");
5041
5042 auto *RetPairTy = cast<StructType>(I->getType());
5043 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5044 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5045 "cmpxchg has a non-i1 status result");
5046
5047 MVT VT;
5048 if (!isTypeLegal(RetTy, VT))
5049 return false;
5050
5051 const TargetRegisterClass *ResRC;
5052 unsigned Opc, CmpOpc;
5053 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5054 // extractvalue selection doesn't support that.
5055 if (VT == MVT::i32) {
5056 Opc = AArch64::CMP_SWAP_32;
5057 CmpOpc = AArch64::SUBSWrs;
5058 ResRC = &AArch64::GPR32RegClass;
5059 } else if (VT == MVT::i64) {
5060 Opc = AArch64::CMP_SWAP_64;
5061 CmpOpc = AArch64::SUBSXrs;
5062 ResRC = &AArch64::GPR64RegClass;
5063 } else {
5064 return false;
5065 }
5066
5067 const MCInstrDesc &II = TII.get(Opc);
5068
5069 const Register AddrReg = constrainOperandRegClass(
5070 II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5071 const Register DesiredReg = constrainOperandRegClass(
5072 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5073 const Register NewReg = constrainOperandRegClass(
5074 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5075
5076 const Register ResultReg1 = createResultReg(ResRC);
5077 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5078 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5079
5080 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5081 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5082 .addDef(ResultReg1)
5083 .addDef(ScratchReg)
5084 .addUse(AddrReg)
5085 .addUse(DesiredReg)
5086 .addUse(NewReg);
5087
5088 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5089 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5090 .addUse(ResultReg1)
5091 .addUse(DesiredReg)
5092 .addImm(0);
5093
5094 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5095 .addDef(ResultReg2)
5096 .addUse(AArch64::WZR)
5097 .addUse(AArch64::WZR)
5099
5100 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5101 updateValueMap(I, ResultReg1, 2);
5102 return true;
5103}
5104
5105bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5106 if (TLI.fallBackToDAGISel(*I))
5107 return false;
5108 switch (I->getOpcode()) {
5109 default:
5110 break;
5111 case Instruction::Add:
5112 case Instruction::Sub:
5113 return selectAddSub(I);
5114 case Instruction::Mul:
5115 return selectMul(I);
5116 case Instruction::SDiv:
5117 return selectSDiv(I);
5118 case Instruction::SRem:
5119 if (!selectBinaryOp(I, ISD::SREM))
5120 return selectRem(I, ISD::SREM);
5121 return true;
5122 case Instruction::URem:
5123 if (!selectBinaryOp(I, ISD::UREM))
5124 return selectRem(I, ISD::UREM);
5125 return true;
5126 case Instruction::Shl:
5127 case Instruction::LShr:
5128 case Instruction::AShr:
5129 return selectShift(I);
5130 case Instruction::And:
5131 case Instruction::Or:
5132 case Instruction::Xor:
5133 return selectLogicalOp(I);
5134 case Instruction::Br:
5135 return selectBranch(I);
5136 case Instruction::IndirectBr:
5137 return selectIndirectBr(I);
5138 case Instruction::BitCast:
5140 return selectBitCast(I);
5141 return true;
5142 case Instruction::FPToSI:
5143 if (!selectCast(I, ISD::FP_TO_SINT))
5144 return selectFPToInt(I, /*Signed=*/true);
5145 return true;
5146 case Instruction::FPToUI:
5147 return selectFPToInt(I, /*Signed=*/false);
5148 case Instruction::ZExt:
5149 case Instruction::SExt:
5150 return selectIntExt(I);
5151 case Instruction::Trunc:
5152 if (!selectCast(I, ISD::TRUNCATE))
5153 return selectTrunc(I);
5154 return true;
5155 case Instruction::FPExt:
5156 return selectFPExt(I);
5157 case Instruction::FPTrunc:
5158 return selectFPTrunc(I);
5159 case Instruction::SIToFP:
5160 if (!selectCast(I, ISD::SINT_TO_FP))
5161 return selectIntToFP(I, /*Signed=*/true);
5162 return true;
5163 case Instruction::UIToFP:
5164 return selectIntToFP(I, /*Signed=*/false);
5165 case Instruction::Load:
5166 return selectLoad(I);
5167 case Instruction::Store:
5168 return selectStore(I);
5169 case Instruction::FCmp:
5170 case Instruction::ICmp:
5171 return selectCmp(I);
5172 case Instruction::Select:
5173 return selectSelect(I);
5174 case Instruction::Ret:
5175 return selectRet(I);
5176 case Instruction::FRem:
5177 return selectFRem(I);
5178 case Instruction::GetElementPtr:
5179 return selectGetElementPtr(I);
5180 case Instruction::AtomicCmpXchg:
5181 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5182 }
5183
5184 // fall-back to target-independent instruction selection.
5185 return selectOperator(I, I->getOpcode());
5186}
5187
5189 const TargetLibraryInfo *LibInfo) {
5190
5191 SMEAttrs CallerAttrs(*FuncInfo.Fn);
5192 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5193 CallerAttrs.hasStreamingInterfaceOrBody() ||
5194 CallerAttrs.hasStreamingCompatibleInterface())
5195 return nullptr;
5196 return new AArch64FastISel(FuncInfo, LibInfo);
5197}
unsigned const MachineRegisterInfo * MRI
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
static bool isSExtLoad(const MachineInstr *LI)
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
static bool isMulPowOf2(const Value *I)
Check if the multiply is by a power-of-2 constant.
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type.
static bool isZExtLoad(const MachineInstr *LI)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec)
Emit a load-pair instruction for frame-destroy.
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec)
Emit a store-pair instruction for frame-setup.
unsigned RegSize
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
Definition: