LLVM 17.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/DenseMap.h"
41#include "llvm/IR/Argument.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/Constants.h"
47#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/Operator.h"
59#include "llvm/IR/Type.h"
60#include "llvm/IR/User.h"
61#include "llvm/IR/Value.h"
62#include "llvm/MC/MCInstrDesc.h"
64#include "llvm/MC/MCSymbol.h"
71#include <algorithm>
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <utility>
76
77using namespace llvm;
78
79namespace {
80
81class AArch64FastISel final : public FastISel {
82 class Address {
83 public:
84 using BaseKind = enum {
85 RegBase,
86 FrameIndexBase
87 };
88
89 private:
90 BaseKind Kind = RegBase;
92 union {
93 unsigned Reg;
94 int FI;
95 } Base;
96 unsigned OffsetReg = 0;
97 unsigned Shift = 0;
98 int64_t Offset = 0;
99 const GlobalValue *GV = nullptr;
100
101 public:
102 Address() { Base.Reg = 0; }
103
104 void setKind(BaseKind K) { Kind = K; }
105 BaseKind getKind() const { return Kind; }
106 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
107 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
108 bool isRegBase() const { return Kind == RegBase; }
109 bool isFIBase() const { return Kind == FrameIndexBase; }
110
111 void setReg(unsigned Reg) {
112 assert(isRegBase() && "Invalid base register access!");
113 Base.Reg = Reg;
114 }
115
116 unsigned getReg() const {
117 assert(isRegBase() && "Invalid base register access!");
118 return Base.Reg;
119 }
120
121 void setOffsetReg(unsigned Reg) {
122 OffsetReg = Reg;
123 }
124
125 unsigned getOffsetReg() const {
126 return OffsetReg;
127 }
128
129 void setFI(unsigned FI) {
130 assert(isFIBase() && "Invalid base frame index access!");
131 Base.FI = FI;
132 }
133
134 unsigned getFI() const {
135 assert(isFIBase() && "Invalid base frame index access!");
136 return Base.FI;
137 }
138
139 void setOffset(int64_t O) { Offset = O; }
140 int64_t getOffset() { return Offset; }
141 void setShift(unsigned S) { Shift = S; }
142 unsigned getShift() { return Shift; }
143
144 void setGlobalValue(const GlobalValue *G) { GV = G; }
145 const GlobalValue *getGlobalValue() { return GV; }
146 };
147
148 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
149 /// make the right decision when generating code for different targets.
150 const AArch64Subtarget *Subtarget;
152
153 bool fastLowerArguments() override;
154 bool fastLowerCall(CallLoweringInfo &CLI) override;
155 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
156
157private:
158 // Selection routines.
159 bool selectAddSub(const Instruction *I);
160 bool selectLogicalOp(const Instruction *I);
161 bool selectLoad(const Instruction *I);
162 bool selectStore(const Instruction *I);
163 bool selectBranch(const Instruction *I);
164 bool selectIndirectBr(const Instruction *I);
165 bool selectCmp(const Instruction *I);
166 bool selectSelect(const Instruction *I);
167 bool selectFPExt(const Instruction *I);
168 bool selectFPTrunc(const Instruction *I);
169 bool selectFPToInt(const Instruction *I, bool Signed);
170 bool selectIntToFP(const Instruction *I, bool Signed);
171 bool selectRem(const Instruction *I, unsigned ISDOpcode);
172 bool selectRet(const Instruction *I);
173 bool selectTrunc(const Instruction *I);
174 bool selectIntExt(const Instruction *I);
175 bool selectMul(const Instruction *I);
176 bool selectShift(const Instruction *I);
177 bool selectBitCast(const Instruction *I);
178 bool selectFRem(const Instruction *I);
179 bool selectSDiv(const Instruction *I);
180 bool selectGetElementPtr(const Instruction *I);
181 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
182
183 // Utility helper routines.
184 bool isTypeLegal(Type *Ty, MVT &VT);
185 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
186 bool isValueAvailable(const Value *V) const;
187 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
188 bool computeCallAddress(const Value *V, Address &Addr);
189 bool simplifyAddress(Address &Addr, MVT VT);
190 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
192 unsigned ScaleFactor, MachineMemOperand *MMO);
193 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
194 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
195 MaybeAlign Alignment);
196 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
197 const Value *Cond);
198 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
199 bool optimizeSelect(const SelectInst *SI);
200 unsigned getRegForGEPIndex(const Value *Idx);
201
202 // Emit helper routines.
203 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
204 const Value *RHS, bool SetFlags = false,
205 bool WantResult = true, bool IsZExt = false);
206 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
207 unsigned RHSReg, bool SetFlags = false,
208 bool WantResult = true);
209 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
210 uint64_t Imm, bool SetFlags = false,
211 bool WantResult = true);
212 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
213 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
214 uint64_t ShiftImm, bool SetFlags = false,
215 bool WantResult = true);
216 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
218 uint64_t ShiftImm, bool SetFlags = false,
219 bool WantResult = true);
220
221 // Emit functions.
222 bool emitCompareAndBranch(const BranchInst *BI);
223 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
226 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228 MachineMemOperand *MMO = nullptr);
229 bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230 MachineMemOperand *MMO = nullptr);
231 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232 MachineMemOperand *MMO = nullptr);
233 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236 bool SetFlags = false, bool WantResult = true,
237 bool IsZExt = false);
238 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
239 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240 bool SetFlags = false, bool WantResult = true,
241 bool IsZExt = false);
242 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
243 bool WantResult = true);
244 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
245 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
246 bool WantResult = true);
247 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
248 const Value *RHS);
249 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
250 uint64_t Imm);
251 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252 unsigned RHSReg, uint64_t ShiftImm);
253 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
254 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
257 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
258 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
259 bool IsZExt = true);
260 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
261 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
262 bool IsZExt = true);
263 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
264 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
265 bool IsZExt = false);
266
267 unsigned materializeInt(const ConstantInt *CI, MVT VT);
268 unsigned materializeFP(const ConstantFP *CFP, MVT VT);
269 unsigned materializeGV(const GlobalValue *GV);
270
271 // Call handling routines.
272private:
273 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
274 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
275 unsigned &NumBytes);
276 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
277
278public:
279 // Backend specific FastISel code.
280 unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
281 unsigned fastMaterializeConstant(const Constant *C) override;
282 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
283
284 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
285 const TargetLibraryInfo *LibInfo)
286 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
287 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
288 Context = &FuncInfo.Fn->getContext();
289 }
290
291 bool fastSelectInstruction(const Instruction *I) override;
292
293#include "AArch64GenFastISel.inc"
294};
295
296} // end anonymous namespace
297
298/// Check if the sign-/zero-extend will be a noop.
299static bool isIntExtFree(const Instruction *I) {
300 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
301 "Unexpected integer extend instruction.");
302 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
303 "Unexpected value type.");
304 bool IsZExt = isa<ZExtInst>(I);
305
306 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
307 if (LI->hasOneUse())
308 return true;
309
310 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
311 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
312 return true;
313
314 return false;
315}
316
317/// Determine the implicit scale factor that is applied by a memory
318/// operation for a given value type.
319static unsigned getImplicitScaleFactor(MVT VT) {
320 switch (VT.SimpleTy) {
321 default:
322 return 0; // invalid
323 case MVT::i1: // fall-through
324 case MVT::i8:
325 return 1;
326 case MVT::i16:
327 return 2;
328 case MVT::i32: // fall-through
329 case MVT::f32:
330 return 4;
331 case MVT::i64: // fall-through
332 case MVT::f64:
333 return 8;
334 }
335}
336
337CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
340 if (CC == CallingConv::GHC)
341 return CC_AArch64_GHC;
344 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
345}
346
347unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
348 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
349 "Alloca should always return a pointer.");
350
351 // Don't handle dynamic allocas.
352 if (!FuncInfo.StaticAllocaMap.count(AI))
353 return 0;
354
356 FuncInfo.StaticAllocaMap.find(AI);
357
358 if (SI != FuncInfo.StaticAllocaMap.end()) {
359 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
360 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
361 ResultReg)
362 .addFrameIndex(SI->second)
363 .addImm(0)
364 .addImm(0);
365 return ResultReg;
366 }
367
368 return 0;
369}
370
371unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
372 if (VT > MVT::i64)
373 return 0;
374
375 if (!CI->isZero())
376 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
377
378 // Create a copy from the zero register to materialize a "0" value.
379 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
380 : &AArch64::GPR32RegClass;
381 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
382 Register ResultReg = createResultReg(RC);
383 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
384 ResultReg).addReg(ZeroReg, getKillRegState(true));
385 return ResultReg;
386}
387
388unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
389 // Positive zero (+0.0) has to be materialized with a fmov from the zero
390 // register, because the immediate version of fmov cannot encode zero.
391 if (CFP->isNullValue())
392 return fastMaterializeFloatZero(CFP);
393
394 if (VT != MVT::f32 && VT != MVT::f64)
395 return 0;
396
397 const APFloat Val = CFP->getValueAPF();
398 bool Is64Bit = (VT == MVT::f64);
399 // This checks to see if we can use FMOV instructions to materialize
400 // a constant, otherwise we have to materialize via the constant pool.
401 int Imm =
402 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
403 if (Imm != -1) {
404 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
405 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
406 }
407
408 // For the large code model materialize the FP constant in code.
409 if (TM.getCodeModel() == CodeModel::Large) {
410 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
411 const TargetRegisterClass *RC = Is64Bit ?
412 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
413
414 Register TmpReg = createResultReg(RC);
415 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
417
418 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
419 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
420 TII.get(TargetOpcode::COPY), ResultReg)
421 .addReg(TmpReg, getKillRegState(true));
422
423 return ResultReg;
424 }
425
426 // Materialize via constant pool. MachineConstantPool wants an explicit
427 // alignment.
428 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
429
430 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
431 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
432 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
434
435 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
436 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
437 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
438 .addReg(ADRPReg)
440 return ResultReg;
441}
442
443unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
444 // We can't handle thread-local variables quickly yet.
445 if (GV->isThreadLocal())
446 return 0;
447
448 // MachO still uses GOT for large code-model accesses, but ELF requires
449 // movz/movk sequences, which FastISel doesn't handle yet.
450 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
451 return 0;
452
453 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
454
455 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
456 if (!DestEVT.isSimple())
457 return 0;
458
459 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
460 unsigned ResultReg;
461
462 if (OpFlags & AArch64II::MO_GOT) {
463 // ADRP + LDRX
464 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
465 ADRPReg)
466 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
467
468 unsigned LdrOpc;
469 if (Subtarget->isTargetILP32()) {
470 ResultReg = createResultReg(&AArch64::GPR32RegClass);
471 LdrOpc = AArch64::LDRWui;
472 } else {
473 ResultReg = createResultReg(&AArch64::GPR64RegClass);
474 LdrOpc = AArch64::LDRXui;
475 }
476 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
477 ResultReg)
478 .addReg(ADRPReg)
480 AArch64II::MO_NC | OpFlags);
481 if (!Subtarget->isTargetILP32())
482 return ResultReg;
483
484 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
485 // so we must extend the result on ILP32.
486 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
487 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
488 TII.get(TargetOpcode::SUBREG_TO_REG))
489 .addDef(Result64)
490 .addImm(0)
491 .addReg(ResultReg, RegState::Kill)
492 .addImm(AArch64::sub_32);
493 return Result64;
494 } else {
495 // ADRP + ADDX
496 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
497 ADRPReg)
498 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
499
500 if (OpFlags & AArch64II::MO_TAGGED) {
501 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
502 // We do so by creating a MOVK that sets bits 48-63 of the register to
503 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
504 // the small code model so we can assume a binary size of <= 4GB, which
505 // makes the untagged PC relative offset positive. The binary must also be
506 // loaded into address range [0, 2^48). Both of these properties need to
507 // be ensured at runtime when using tagged addresses.
508 //
509 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
510 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
511 // are not exactly 1:1 with FastISel so we cannot easily abstract this
512 // out. At some point, it would be nice to find a way to not have this
513 // duplciate code.
514 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
515 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
516 DstReg)
517 .addReg(ADRPReg)
518 .addGlobalAddress(GV, /*Offset=*/0x100000000,
520 .addImm(48);
521 ADRPReg = DstReg;
522 }
523
524 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
525 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
526 ResultReg)
527 .addReg(ADRPReg)
528 .addGlobalAddress(GV, 0,
530 .addImm(0);
531 }
532 return ResultReg;
533}
534
535unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
536 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
537
538 // Only handle simple types.
539 if (!CEVT.isSimple())
540 return 0;
541 MVT VT = CEVT.getSimpleVT();
542 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
543 // 'null' pointers need to have a somewhat special treatment.
544 if (isa<ConstantPointerNull>(C)) {
545 assert(VT == MVT::i64 && "Expected 64-bit pointers");
546 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
547 }
548
549 if (const auto *CI = dyn_cast<ConstantInt>(C))
550 return materializeInt(CI, VT);
551 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
552 return materializeFP(CFP, VT);
553 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
554 return materializeGV(GV);
555
556 return 0;
557}
558
559unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
560 assert(CFP->isNullValue() &&
561 "Floating-point constant is not a positive zero.");
562 MVT VT;
563 if (!isTypeLegal(CFP->getType(), VT))
564 return 0;
565
566 if (VT != MVT::f32 && VT != MVT::f64)
567 return 0;
568
569 bool Is64Bit = (VT == MVT::f64);
570 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
571 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
572 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
573}
574
575/// Check if the multiply is by a power-of-2 constant.
576static bool isMulPowOf2(const Value *I) {
577 if (const auto *MI = dyn_cast<MulOperator>(I)) {
578 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
579 if (C->getValue().isPowerOf2())
580 return true;
581 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
582 if (C->getValue().isPowerOf2())
583 return true;
584 }
585 return false;
586}
587
588// Computes the address to get to an object.
589bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
590{
591 const User *U = nullptr;
592 unsigned Opcode = Instruction::UserOp1;
593 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
594 // Don't walk into other basic blocks unless the object is an alloca from
595 // another block, otherwise it may not have a virtual register assigned.
596 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
597 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
598 Opcode = I->getOpcode();
599 U = I;
600 }
601 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
602 Opcode = C->getOpcode();
603 U = C;
604 }
605
606 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
607 if (Ty->getAddressSpace() > 255)
608 // Fast instruction selection doesn't support the special
609 // address spaces.
610 return false;
611
612 switch (Opcode) {
613 default:
614 break;
615 case Instruction::BitCast:
616 // Look through bitcasts.
617 return computeAddress(U->getOperand(0), Addr, Ty);
618
619 case Instruction::IntToPtr:
620 // Look past no-op inttoptrs.
621 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
622 TLI.getPointerTy(DL))
623 return computeAddress(U->getOperand(0), Addr, Ty);
624 break;
625
626 case Instruction::PtrToInt:
627 // Look past no-op ptrtoints.
628 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
629 return computeAddress(U->getOperand(0), Addr, Ty);
630 break;
631
632 case Instruction::GetElementPtr: {
633 Address SavedAddr = Addr;
634 uint64_t TmpOffset = Addr.getOffset();
635
636 // Iterate through the GEP folding the constants into offsets where
637 // we can.
639 GTI != E; ++GTI) {
640 const Value *Op = GTI.getOperand();
641 if (StructType *STy = GTI.getStructTypeOrNull()) {
642 const StructLayout *SL = DL.getStructLayout(STy);
643 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
644 TmpOffset += SL->getElementOffset(Idx);
645 } else {
646 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
647 while (true) {
648 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
649 // Constant-offset addressing.
650 TmpOffset += CI->getSExtValue() * S;
651 break;
652 }
653 if (canFoldAddIntoGEP(U, Op)) {
654 // A compatible add with a constant operand. Fold the constant.
655 ConstantInt *CI =
656 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
657 TmpOffset += CI->getSExtValue() * S;
658 // Iterate on the other operand.
659 Op = cast<AddOperator>(Op)->getOperand(0);
660 continue;
661 }
662 // Unsupported
663 goto unsupported_gep;
664 }
665 }
666 }
667
668 // Try to grab the base operand now.
669 Addr.setOffset(TmpOffset);
670 if (computeAddress(U->getOperand(0), Addr, Ty))
671 return true;
672
673 // We failed, restore everything and try the other options.
674 Addr = SavedAddr;
675
676 unsupported_gep:
677 break;
678 }
679 case Instruction::Alloca: {
680 const AllocaInst *AI = cast<AllocaInst>(Obj);
682 FuncInfo.StaticAllocaMap.find(AI);
683 if (SI != FuncInfo.StaticAllocaMap.end()) {
684 Addr.setKind(Address::FrameIndexBase);
685 Addr.setFI(SI->second);
686 return true;
687 }
688 break;
689 }
690 case Instruction::Add: {
691 // Adds of constants are common and easy enough.
692 const Value *LHS = U->getOperand(0);
693 const Value *RHS = U->getOperand(1);
694
695 if (isa<ConstantInt>(LHS))
696 std::swap(LHS, RHS);
697
698 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
699 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
700 return computeAddress(LHS, Addr, Ty);
701 }
702
703 Address Backup = Addr;
704 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
705 return true;
706 Addr = Backup;
707
708 break;
709 }
710 case Instruction::Sub: {
711 // Subs of constants are common and easy enough.
712 const Value *LHS = U->getOperand(0);
713 const Value *RHS = U->getOperand(1);
714
715 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
716 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
717 return computeAddress(LHS, Addr, Ty);
718 }
719 break;
720 }
721 case Instruction::Shl: {
722 if (Addr.getOffsetReg())
723 break;
724
725 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
726 if (!CI)
727 break;
728
729 unsigned Val = CI->getZExtValue();
730 if (Val < 1 || Val > 3)
731 break;
732
733 uint64_t NumBytes = 0;
734 if (Ty && Ty->isSized()) {
735 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
736 NumBytes = NumBits / 8;
737 if (!isPowerOf2_64(NumBits))
738 NumBytes = 0;
739 }
740
741 if (NumBytes != (1ULL << Val))
742 break;
743
744 Addr.setShift(Val);
745 Addr.setExtendType(AArch64_AM::LSL);
746
747 const Value *Src = U->getOperand(0);
748 if (const auto *I = dyn_cast<Instruction>(Src)) {
749 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
750 // Fold the zext or sext when it won't become a noop.
751 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
752 if (!isIntExtFree(ZE) &&
753 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
754 Addr.setExtendType(AArch64_AM::UXTW);
755 Src = ZE->getOperand(0);
756 }
757 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
758 if (!isIntExtFree(SE) &&
759 SE->getOperand(0)->getType()->isIntegerTy(32)) {
760 Addr.setExtendType(AArch64_AM::SXTW);
761 Src = SE->getOperand(0);
762 }
763 }
764 }
765 }
766
767 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
768 if (AI->getOpcode() == Instruction::And) {
769 const Value *LHS = AI->getOperand(0);
770 const Value *RHS = AI->getOperand(1);
771
772 if (const auto *C = dyn_cast<ConstantInt>(LHS))
773 if (C->getValue() == 0xffffffff)
774 std::swap(LHS, RHS);
775
776 if (const auto *C = dyn_cast<ConstantInt>(RHS))
777 if (C->getValue() == 0xffffffff) {
778 Addr.setExtendType(AArch64_AM::UXTW);
779 Register Reg = getRegForValue(LHS);
780 if (!Reg)
781 return false;
782 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
783 Addr.setOffsetReg(Reg);
784 return true;
785 }
786 }
787
788 Register Reg = getRegForValue(Src);
789 if (!Reg)
790 return false;
791 Addr.setOffsetReg(Reg);
792 return true;
793 }
794 case Instruction::Mul: {
795 if (Addr.getOffsetReg())
796 break;
797
798 if (!isMulPowOf2(U))
799 break;
800
801 const Value *LHS = U->getOperand(0);
802 const Value *RHS = U->getOperand(1);
803
804 // Canonicalize power-of-2 value to the RHS.
805 if (const auto *C = dyn_cast<ConstantInt>(LHS))
806 if (C->getValue().isPowerOf2())
807 std::swap(LHS, RHS);
808
809 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
810 const auto *C = cast<ConstantInt>(RHS);
811 unsigned Val = C->getValue().logBase2();
812 if (Val < 1 || Val > 3)
813 break;
814
815 uint64_t NumBytes = 0;
816 if (Ty && Ty->isSized()) {
817 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
818 NumBytes = NumBits / 8;
819 if (!isPowerOf2_64(NumBits))
820 NumBytes = 0;
821 }
822
823 if (NumBytes != (1ULL << Val))
824 break;
825
826 Addr.setShift(Val);
827 Addr.setExtendType(AArch64_AM::LSL);
828
829 const Value *Src = LHS;
830 if (const auto *I = dyn_cast<Instruction>(Src)) {
831 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
832 // Fold the zext or sext when it won't become a noop.
833 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
834 if (!isIntExtFree(ZE) &&
835 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
836 Addr.setExtendType(AArch64_AM::UXTW);
837 Src = ZE->getOperand(0);
838 }
839 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
840 if (!isIntExtFree(SE) &&
841 SE->getOperand(0)->getType()->isIntegerTy(32)) {
842 Addr.setExtendType(AArch64_AM::SXTW);
843 Src = SE->getOperand(0);
844 }
845 }
846 }
847 }
848
849 Register Reg = getRegForValue(Src);
850 if (!Reg)
851 return false;
852 Addr.setOffsetReg(Reg);
853 return true;
854 }
855 case Instruction::And: {
856 if (Addr.getOffsetReg())
857 break;
858
859 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
860 break;
861
862 const Value *LHS = U->getOperand(0);
863 const Value *RHS = U->getOperand(1);
864
865 if (const auto *C = dyn_cast<ConstantInt>(LHS))
866 if (C->getValue() == 0xffffffff)
867 std::swap(LHS, RHS);
868
869 if (const auto *C = dyn_cast<ConstantInt>(RHS))
870 if (C->getValue() == 0xffffffff) {
871 Addr.setShift(0);
872 Addr.setExtendType(AArch64_AM::LSL);
873 Addr.setExtendType(AArch64_AM::UXTW);
874
875 Register Reg = getRegForValue(LHS);
876 if (!Reg)
877 return false;
878 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
879 Addr.setOffsetReg(Reg);
880 return true;
881 }
882 break;
883 }
884 case Instruction::SExt:
885 case Instruction::ZExt: {
886 if (!Addr.getReg() || Addr.getOffsetReg())
887 break;
888
889 const Value *Src = nullptr;
890 // Fold the zext or sext when it won't become a noop.
891 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
892 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
893 Addr.setExtendType(AArch64_AM::UXTW);
894 Src = ZE->getOperand(0);
895 }
896 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
897 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
898 Addr.setExtendType(AArch64_AM::SXTW);
899 Src = SE->getOperand(0);
900 }
901 }
902
903 if (!Src)
904 break;
905
906 Addr.setShift(0);
907 Register Reg = getRegForValue(Src);
908 if (!Reg)
909 return false;
910 Addr.setOffsetReg(Reg);
911 return true;
912 }
913 } // end switch
914
915 if (Addr.isRegBase() && !Addr.getReg()) {
916 Register Reg = getRegForValue(Obj);
917 if (!Reg)
918 return false;
919 Addr.setReg(Reg);
920 return true;
921 }
922
923 if (!Addr.getOffsetReg()) {
924 Register Reg = getRegForValue(Obj);
925 if (!Reg)
926 return false;
927 Addr.setOffsetReg(Reg);
928 return true;
929 }
930
931 return false;
932}
933
934bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
935 const User *U = nullptr;
936 unsigned Opcode = Instruction::UserOp1;
937 bool InMBB = true;
938
939 if (const auto *I = dyn_cast<Instruction>(V)) {
940 Opcode = I->getOpcode();
941 U = I;
942 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
943 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
944 Opcode = C->getOpcode();
945 U = C;
946 }
947
948 switch (Opcode) {
949 default: break;
950 case Instruction::BitCast:
951 // Look past bitcasts if its operand is in the same BB.
952 if (InMBB)
953 return computeCallAddress(U->getOperand(0), Addr);
954 break;
955 case Instruction::IntToPtr:
956 // Look past no-op inttoptrs if its operand is in the same BB.
957 if (InMBB &&
958 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
959 TLI.getPointerTy(DL))
960 return computeCallAddress(U->getOperand(0), Addr);
961 break;
962 case Instruction::PtrToInt:
963 // Look past no-op ptrtoints if its operand is in the same BB.
964 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
965 return computeCallAddress(U->getOperand(0), Addr);
966 break;
967 }
968
969 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
970 Addr.setGlobalValue(GV);
971 return true;
972 }
973
974 // If all else fails, try to materialize the value in a register.
975 if (!Addr.getGlobalValue()) {
976 Addr.setReg(getRegForValue(V));
977 return Addr.getReg() != 0;
978 }
979
980 return false;
981}
982
983bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
984 EVT evt = TLI.getValueType(DL, Ty, true);
985
986 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
987 return false;
988
989 // Only handle simple types.
990 if (evt == MVT::Other || !evt.isSimple())
991 return false;
992 VT = evt.getSimpleVT();
993
994 // This is a legal type, but it's not something we handle in fast-isel.
995 if (VT == MVT::f128)
996 return false;
997
998 // Handle all other legal types, i.e. a register that will directly hold this
999 // value.
1000 return TLI.isTypeLegal(VT);
1001}
1002
1003/// Determine if the value type is supported by FastISel.
1004///
1005/// FastISel for AArch64 can handle more value types than are legal. This adds
1006/// simple value type such as i1, i8, and i16.
1007bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1008 if (Ty->isVectorTy() && !IsVectorAllowed)
1009 return false;
1010
1011 if (isTypeLegal(Ty, VT))
1012 return true;
1013
1014 // If this is a type than can be sign or zero-extended to a basic operation
1015 // go ahead and accept it now.
1016 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1017 return true;
1018
1019 return false;
1020}
1021
1022bool AArch64FastISel::isValueAvailable(const Value *V) const {
1023 if (!isa<Instruction>(V))
1024 return true;
1025
1026 const auto *I = cast<Instruction>(V);
1027 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1028}
1029
1030bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1031 if (Subtarget->isTargetILP32())
1032 return false;
1033
1034 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1035 if (!ScaleFactor)
1036 return false;
1037
1038 bool ImmediateOffsetNeedsLowering = false;
1039 bool RegisterOffsetNeedsLowering = false;
1040 int64_t Offset = Addr.getOffset();
1041 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1042 ImmediateOffsetNeedsLowering = true;
1043 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1044 !isUInt<12>(Offset / ScaleFactor))
1045 ImmediateOffsetNeedsLowering = true;
1046
1047 // Cannot encode an offset register and an immediate offset in the same
1048 // instruction. Fold the immediate offset into the load/store instruction and
1049 // emit an additional add to take care of the offset register.
1050 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1051 RegisterOffsetNeedsLowering = true;
1052
1053 // Cannot encode zero register as base.
1054 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1055 RegisterOffsetNeedsLowering = true;
1056
1057 // If this is a stack pointer and the offset needs to be simplified then put
1058 // the alloca address into a register, set the base type back to register and
1059 // continue. This should almost never happen.
1060 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1061 {
1062 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1063 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1064 ResultReg)
1065 .addFrameIndex(Addr.getFI())
1066 .addImm(0)
1067 .addImm(0);
1068 Addr.setKind(Address::RegBase);
1069 Addr.setReg(ResultReg);
1070 }
1071
1072 if (RegisterOffsetNeedsLowering) {
1073 unsigned ResultReg = 0;
1074 if (Addr.getReg()) {
1075 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1076 Addr.getExtendType() == AArch64_AM::UXTW )
1077 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1078 Addr.getOffsetReg(), Addr.getExtendType(),
1079 Addr.getShift());
1080 else
1081 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1082 Addr.getOffsetReg(), AArch64_AM::LSL,
1083 Addr.getShift());
1084 } else {
1085 if (Addr.getExtendType() == AArch64_AM::UXTW)
1086 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1087 Addr.getShift(), /*IsZExt=*/true);
1088 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1089 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1090 Addr.getShift(), /*IsZExt=*/false);
1091 else
1092 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1093 Addr.getShift());
1094 }
1095 if (!ResultReg)
1096 return false;
1097
1098 Addr.setReg(ResultReg);
1099 Addr.setOffsetReg(0);
1100 Addr.setShift(0);
1101 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1102 }
1103
1104 // Since the offset is too large for the load/store instruction get the
1105 // reg+offset into a register.
1106 if (ImmediateOffsetNeedsLowering) {
1107 unsigned ResultReg;
1108 if (Addr.getReg())
1109 // Try to fold the immediate into the add instruction.
1110 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1111 else
1112 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1113
1114 if (!ResultReg)
1115 return false;
1116 Addr.setReg(ResultReg);
1117 Addr.setOffset(0);
1118 }
1119 return true;
1120}
1121
1122void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1123 const MachineInstrBuilder &MIB,
1125 unsigned ScaleFactor,
1126 MachineMemOperand *MMO) {
1127 int64_t Offset = Addr.getOffset() / ScaleFactor;
1128 // Frame base works a bit differently. Handle it separately.
1129 if (Addr.isFIBase()) {
1130 int FI = Addr.getFI();
1131 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1132 // and alignment should be based on the VT.
1133 MMO = FuncInfo.MF->getMachineMemOperand(
1135 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1136 // Now add the rest of the operands.
1137 MIB.addFrameIndex(FI).addImm(Offset);
1138 } else {
1139 assert(Addr.isRegBase() && "Unexpected address kind.");
1140 const MCInstrDesc &II = MIB->getDesc();
1141 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1142 Addr.setReg(
1143 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1144 Addr.setOffsetReg(
1145 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1146 if (Addr.getOffsetReg()) {
1147 assert(Addr.getOffset() == 0 && "Unexpected offset");
1148 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1149 Addr.getExtendType() == AArch64_AM::SXTX;
1150 MIB.addReg(Addr.getReg());
1151 MIB.addReg(Addr.getOffsetReg());
1152 MIB.addImm(IsSigned);
1153 MIB.addImm(Addr.getShift() != 0);
1154 } else
1155 MIB.addReg(Addr.getReg()).addImm(Offset);
1156 }
1157
1158 if (MMO)
1159 MIB.addMemOperand(MMO);
1160}
1161
1162unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1163 const Value *RHS, bool SetFlags,
1164 bool WantResult, bool IsZExt) {
1166 bool NeedExtend = false;
1167 switch (RetVT.SimpleTy) {
1168 default:
1169 return 0;
1170 case MVT::i1:
1171 NeedExtend = true;
1172 break;
1173 case MVT::i8:
1174 NeedExtend = true;
1175 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1176 break;
1177 case MVT::i16:
1178 NeedExtend = true;
1179 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1180 break;
1181 case MVT::i32: // fall-through
1182 case MVT::i64:
1183 break;
1184 }
1185 MVT SrcVT = RetVT;
1186 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1187
1188 // Canonicalize immediates to the RHS first.
1189 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1190 std::swap(LHS, RHS);
1191
1192 // Canonicalize mul by power of 2 to the RHS.
1193 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1194 if (isMulPowOf2(LHS))
1195 std::swap(LHS, RHS);
1196
1197 // Canonicalize shift immediate to the RHS.
1198 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1199 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1200 if (isa<ConstantInt>(SI->getOperand(1)))
1201 if (SI->getOpcode() == Instruction::Shl ||
1202 SI->getOpcode() == Instruction::LShr ||
1203 SI->getOpcode() == Instruction::AShr )
1204 std::swap(LHS, RHS);
1205
1206 Register LHSReg = getRegForValue(LHS);
1207 if (!LHSReg)
1208 return 0;
1209
1210 if (NeedExtend)
1211 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1212
1213 unsigned ResultReg = 0;
1214 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1215 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1216 if (C->isNegative())
1217 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1218 WantResult);
1219 else
1220 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1221 WantResult);
1222 } else if (const auto *C = dyn_cast<Constant>(RHS))
1223 if (C->isNullValue())
1224 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1225
1226 if (ResultReg)
1227 return ResultReg;
1228
1229 // Only extend the RHS within the instruction if there is a valid extend type.
1230 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1231 isValueAvailable(RHS)) {
1232 if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1233 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1234 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1235 Register RHSReg = getRegForValue(SI->getOperand(0));
1236 if (!RHSReg)
1237 return 0;
1238 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
1239 C->getZExtValue(), SetFlags, WantResult);
1240 }
1241 Register RHSReg = getRegForValue(RHS);
1242 if (!RHSReg)
1243 return 0;
1244 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1245 SetFlags, WantResult);
1246 }
1247
1248 // Check if the mul can be folded into the instruction.
1249 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1250 if (isMulPowOf2(RHS)) {
1251 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1252 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1253
1254 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1255 if (C->getValue().isPowerOf2())
1256 std::swap(MulLHS, MulRHS);
1257
1258 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1259 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1260 Register RHSReg = getRegForValue(MulLHS);
1261 if (!RHSReg)
1262 return 0;
1263 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1264 ShiftVal, SetFlags, WantResult);
1265 if (ResultReg)
1266 return ResultReg;
1267 }
1268 }
1269
1270 // Check if the shift can be folded into the instruction.
1271 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1272 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1273 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1275 switch (SI->getOpcode()) {
1276 default: break;
1277 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1278 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1279 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1280 }
1281 uint64_t ShiftVal = C->getZExtValue();
1282 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1283 Register RHSReg = getRegForValue(SI->getOperand(0));
1284 if (!RHSReg)
1285 return 0;
1286 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1287 ShiftVal, SetFlags, WantResult);
1288 if (ResultReg)
1289 return ResultReg;
1290 }
1291 }
1292 }
1293 }
1294
1295 Register RHSReg = getRegForValue(RHS);
1296 if (!RHSReg)
1297 return 0;
1298
1299 if (NeedExtend)
1300 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1301
1302 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1303}
1304
1305unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1306 unsigned RHSReg, bool SetFlags,
1307 bool WantResult) {
1308 assert(LHSReg && RHSReg && "Invalid register number.");
1309
1310 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1311 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1312 return 0;
1313
1314 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1315 return 0;
1316
1317 static const unsigned OpcTable[2][2][2] = {
1318 { { AArch64::SUBWrr, AArch64::SUBXrr },
1319 { AArch64::ADDWrr, AArch64::ADDXrr } },
1320 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1321 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1322 };
1323 bool Is64Bit = RetVT == MVT::i64;
1324 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1325 const TargetRegisterClass *RC =
1326 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1327 unsigned ResultReg;
1328 if (WantResult)
1329 ResultReg = createResultReg(RC);
1330 else
1331 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1332
1333 const MCInstrDesc &II = TII.get(Opc);
1334 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1335 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1336 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1337 .addReg(LHSReg)
1338 .addReg(RHSReg);
1339 return ResultReg;
1340}
1341
1342unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1343 uint64_t Imm, bool SetFlags,
1344 bool WantResult) {
1345 assert(LHSReg && "Invalid register number.");
1346
1347 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1348 return 0;
1349
1350 unsigned ShiftImm;
1351 if (isUInt<12>(Imm))
1352 ShiftImm = 0;
1353 else if ((Imm & 0xfff000) == Imm) {
1354 ShiftImm = 12;
1355 Imm >>= 12;
1356 } else
1357 return 0;
1358
1359 static const unsigned OpcTable[2][2][2] = {
1360 { { AArch64::SUBWri, AArch64::SUBXri },
1361 { AArch64::ADDWri, AArch64::ADDXri } },
1362 { { AArch64::SUBSWri, AArch64::SUBSXri },
1363 { AArch64::ADDSWri, AArch64::ADDSXri } }
1364 };
1365 bool Is64Bit = RetVT == MVT::i64;
1366 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1367 const TargetRegisterClass *RC;
1368 if (SetFlags)
1369 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1370 else
1371 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1372 unsigned ResultReg;
1373 if (WantResult)
1374 ResultReg = createResultReg(RC);
1375 else
1376 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1377
1378 const MCInstrDesc &II = TII.get(Opc);
1379 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1380 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1381 .addReg(LHSReg)
1382 .addImm(Imm)
1383 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1384 return ResultReg;
1385}
1386
1387unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1388 unsigned RHSReg,
1390 uint64_t ShiftImm, bool SetFlags,
1391 bool WantResult) {
1392 assert(LHSReg && RHSReg && "Invalid register number.");
1393 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1394 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1395
1396 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1397 return 0;
1398
1399 // Don't deal with undefined shifts.
1400 if (ShiftImm >= RetVT.getSizeInBits())
1401 return 0;
1402
1403 static const unsigned OpcTable[2][2][2] = {
1404 { { AArch64::SUBWrs, AArch64::SUBXrs },
1405 { AArch64::ADDWrs, AArch64::ADDXrs } },
1406 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1407 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1408 };
1409 bool Is64Bit = RetVT == MVT::i64;
1410 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1411 const TargetRegisterClass *RC =
1412 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1413 unsigned ResultReg;
1414 if (WantResult)
1415 ResultReg = createResultReg(RC);
1416 else
1417 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1418
1419 const MCInstrDesc &II = TII.get(Opc);
1420 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1421 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1422 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1423 .addReg(LHSReg)
1424 .addReg(RHSReg)
1425 .addImm(getShifterImm(ShiftType, ShiftImm));
1426 return ResultReg;
1427}
1428
1429unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1430 unsigned RHSReg,
1432 uint64_t ShiftImm, bool SetFlags,
1433 bool WantResult) {
1434 assert(LHSReg && RHSReg && "Invalid register number.");
1435 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1436 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1437
1438 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1439 return 0;
1440
1441 if (ShiftImm >= 4)
1442 return 0;
1443
1444 static const unsigned OpcTable[2][2][2] = {
1445 { { AArch64::SUBWrx, AArch64::SUBXrx },
1446 { AArch64::ADDWrx, AArch64::ADDXrx } },
1447 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1448 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1449 };
1450 bool Is64Bit = RetVT == MVT::i64;
1451 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1452 const TargetRegisterClass *RC = nullptr;
1453 if (SetFlags)
1454 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1455 else
1456 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1457 unsigned ResultReg;
1458 if (WantResult)
1459 ResultReg = createResultReg(RC);
1460 else
1461 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1462
1463 const MCInstrDesc &II = TII.get(Opc);
1464 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1465 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1466 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1467 .addReg(LHSReg)
1468 .addReg(RHSReg)
1469 .addImm(getArithExtendImm(ExtType, ShiftImm));
1470 return ResultReg;
1471}
1472
1473bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1474 Type *Ty = LHS->getType();
1475 EVT EVT = TLI.getValueType(DL, Ty, true);
1476 if (!EVT.isSimple())
1477 return false;
1478 MVT VT = EVT.getSimpleVT();
1479
1480 switch (VT.SimpleTy) {
1481 default:
1482 return false;
1483 case MVT::i1:
1484 case MVT::i8:
1485 case MVT::i16:
1486 case MVT::i32:
1487 case MVT::i64:
1488 return emitICmp(VT, LHS, RHS, IsZExt);
1489 case MVT::f32:
1490 case MVT::f64:
1491 return emitFCmp(VT, LHS, RHS);
1492 }
1493}
1494
1495bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1496 bool IsZExt) {
1497 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1498 IsZExt) != 0;
1499}
1500
1501bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1502 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1503 /*SetFlags=*/true, /*WantResult=*/false) != 0;
1504}
1505
1506bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1507 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1508 return false;
1509
1510 // Check to see if the 2nd operand is a constant that we can encode directly
1511 // in the compare.
1512 bool UseImm = false;
1513 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1514 if (CFP->isZero() && !CFP->isNegative())
1515 UseImm = true;
1516
1517 Register LHSReg = getRegForValue(LHS);
1518 if (!LHSReg)
1519 return false;
1520
1521 if (UseImm) {
1522 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1523 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1524 .addReg(LHSReg);
1525 return true;
1526 }
1527
1528 Register RHSReg = getRegForValue(RHS);
1529 if (!RHSReg)
1530 return false;
1531
1532 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1533 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1534 .addReg(LHSReg)
1535 .addReg(RHSReg);
1536 return true;
1537}
1538
1539unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1540 bool SetFlags, bool WantResult, bool IsZExt) {
1541 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1542 IsZExt);
1543}
1544
1545/// This method is a wrapper to simplify add emission.
1546///
1547/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1548/// that fails, then try to materialize the immediate into a register and use
1549/// emitAddSub_rr instead.
1550unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1551 unsigned ResultReg;
1552 if (Imm < 0)
1553 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1554 else
1555 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1556
1557 if (ResultReg)
1558 return ResultReg;
1559
1560 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1561 if (!CReg)
1562 return 0;
1563
1564 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1565 return ResultReg;
1566}
1567
1568unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1569 bool SetFlags, bool WantResult, bool IsZExt) {
1570 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1571 IsZExt);
1572}
1573
1574unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1575 unsigned RHSReg, bool WantResult) {
1576 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1577 /*SetFlags=*/true, WantResult);
1578}
1579
1580unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1581 unsigned RHSReg,
1583 uint64_t ShiftImm, bool WantResult) {
1584 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1585 ShiftImm, /*SetFlags=*/true, WantResult);
1586}
1587
1588unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1589 const Value *LHS, const Value *RHS) {
1590 // Canonicalize immediates to the RHS first.
1591 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1592 std::swap(LHS, RHS);
1593
1594 // Canonicalize mul by power-of-2 to the RHS.
1595 if (LHS->hasOneUse() && isValueAvailable(LHS))
1596 if (isMulPowOf2(LHS))
1597 std::swap(LHS, RHS);
1598
1599 // Canonicalize shift immediate to the RHS.
1600 if (LHS->hasOneUse() && isValueAvailable(LHS))
1601 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1602 if (isa<ConstantInt>(SI->getOperand(1)))
1603 std::swap(LHS, RHS);
1604
1605 Register LHSReg = getRegForValue(LHS);
1606 if (!LHSReg)
1607 return 0;
1608
1609 unsigned ResultReg = 0;
1610 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1611 uint64_t Imm = C->getZExtValue();
1612 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1613 }
1614 if (ResultReg)
1615 return ResultReg;
1616
1617 // Check if the mul can be folded into the instruction.
1618 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1619 if (isMulPowOf2(RHS)) {
1620 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1621 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1622
1623 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1624 if (C->getValue().isPowerOf2())
1625 std::swap(MulLHS, MulRHS);
1626
1627 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1628 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1629
1630 Register RHSReg = getRegForValue(MulLHS);
1631 if (!RHSReg)
1632 return 0;
1633 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1634 if (ResultReg)
1635 return ResultReg;
1636 }
1637 }
1638
1639 // Check if the shift can be folded into the instruction.
1640 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1641 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1642 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1643 uint64_t ShiftVal = C->getZExtValue();
1644 Register RHSReg = getRegForValue(SI->getOperand(0));
1645 if (!RHSReg)
1646 return 0;
1647 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1648 if (ResultReg)
1649 return ResultReg;
1650 }
1651 }
1652
1653 Register RHSReg = getRegForValue(RHS);
1654 if (!RHSReg)
1655 return 0;
1656
1657 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1658 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1659 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1660 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1661 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1662 }
1663 return ResultReg;
1664}
1665
1666unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1667 unsigned LHSReg, uint64_t Imm) {
1668 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1669 "ISD nodes are not consecutive!");
1670 static const unsigned OpcTable[3][2] = {
1671 { AArch64::ANDWri, AArch64::ANDXri },
1672 { AArch64::ORRWri, AArch64::ORRXri },
1673 { AArch64::EORWri, AArch64::EORXri }
1674 };
1675 const TargetRegisterClass *RC;
1676 unsigned Opc;
1677 unsigned RegSize;
1678 switch (RetVT.SimpleTy) {
1679 default:
1680 return 0;
1681 case MVT::i1:
1682 case MVT::i8:
1683 case MVT::i16:
1684 case MVT::i32: {
1685 unsigned Idx = ISDOpc - ISD::AND;
1686 Opc = OpcTable[Idx][0];
1687 RC = &AArch64::GPR32spRegClass;
1688 RegSize = 32;
1689 break;
1690 }
1691 case MVT::i64:
1692 Opc = OpcTable[ISDOpc - ISD::AND][1];
1693 RC = &AArch64::GPR64spRegClass;
1694 RegSize = 64;
1695 break;
1696 }
1697
1699 return 0;
1700
1701 Register ResultReg =
1702 fastEmitInst_ri(Opc, RC, LHSReg,
1704 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1705 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1706 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1707 }
1708 return ResultReg;
1709}
1710
1711unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1712 unsigned LHSReg, unsigned RHSReg,
1713 uint64_t ShiftImm) {
1714 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1715 "ISD nodes are not consecutive!");
1716 static const unsigned OpcTable[3][2] = {
1717 { AArch64::ANDWrs, AArch64::ANDXrs },
1718 { AArch64::ORRWrs, AArch64::ORRXrs },
1719 { AArch64::EORWrs, AArch64::EORXrs }
1720 };
1721
1722 // Don't deal with undefined shifts.
1723 if (ShiftImm >= RetVT.getSizeInBits())
1724 return 0;
1725
1726 const TargetRegisterClass *RC;
1727 unsigned Opc;
1728 switch (RetVT.SimpleTy) {
1729 default:
1730 return 0;
1731 case MVT::i1:
1732 case MVT::i8:
1733 case MVT::i16:
1734 case MVT::i32:
1735 Opc = OpcTable[ISDOpc - ISD::AND][0];
1736 RC = &AArch64::GPR32RegClass;
1737 break;
1738 case MVT::i64:
1739 Opc = OpcTable[ISDOpc - ISD::AND][1];
1740 RC = &AArch64::GPR64RegClass;
1741 break;
1742 }
1743 Register ResultReg =
1744 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1746 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1747 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1748 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1749 }
1750 return ResultReg;
1751}
1752
1753unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1754 uint64_t Imm) {
1755 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1756}
1757
1758unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1759 bool WantZExt, MachineMemOperand *MMO) {
1760 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1761 return 0;
1762
1763 // Simplify this down to something we can handle.
1764 if (!simplifyAddress(Addr, VT))
1765 return 0;
1766
1767 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1768 if (!ScaleFactor)
1769 llvm_unreachable("Unexpected value type.");
1770
1771 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1772 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1773 bool UseScaled = true;
1774 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1775 UseScaled = false;
1776 ScaleFactor = 1;
1777 }
1778
1779 static const unsigned GPOpcTable[2][8][4] = {
1780 // Sign-extend.
1781 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1782 AArch64::LDURXi },
1783 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1784 AArch64::LDURXi },
1785 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1786 AArch64::LDRXui },
1787 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1788 AArch64::LDRXui },
1789 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1790 AArch64::LDRXroX },
1791 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1792 AArch64::LDRXroX },
1793 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1794 AArch64::LDRXroW },
1795 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1796 AArch64::LDRXroW }
1797 },
1798 // Zero-extend.
1799 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1800 AArch64::LDURXi },
1801 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1802 AArch64::LDURXi },
1803 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1804 AArch64::LDRXui },
1805 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1806 AArch64::LDRXui },
1807 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1808 AArch64::LDRXroX },
1809 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1810 AArch64::LDRXroX },
1811 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1812 AArch64::LDRXroW },
1813 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1814 AArch64::LDRXroW }
1815 }
1816 };
1817
1818 static const unsigned FPOpcTable[4][2] = {
1819 { AArch64::LDURSi, AArch64::LDURDi },
1820 { AArch64::LDRSui, AArch64::LDRDui },
1821 { AArch64::LDRSroX, AArch64::LDRDroX },
1822 { AArch64::LDRSroW, AArch64::LDRDroW }
1823 };
1824
1825 unsigned Opc;
1826 const TargetRegisterClass *RC;
1827 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1828 Addr.getOffsetReg();
1829 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1830 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1831 Addr.getExtendType() == AArch64_AM::SXTW)
1832 Idx++;
1833
1834 bool IsRet64Bit = RetVT == MVT::i64;
1835 switch (VT.SimpleTy) {
1836 default:
1837 llvm_unreachable("Unexpected value type.");
1838 case MVT::i1: // Intentional fall-through.
1839 case MVT::i8:
1840 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1841 RC = (IsRet64Bit && !WantZExt) ?
1842 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1843 break;
1844 case MVT::i16:
1845 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1846 RC = (IsRet64Bit && !WantZExt) ?
1847 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1848 break;
1849 case MVT::i32:
1850 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1851 RC = (IsRet64Bit && !WantZExt) ?
1852 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1853 break;
1854 case MVT::i64:
1855 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1856 RC = &AArch64::GPR64RegClass;
1857 break;
1858 case MVT::f32:
1859 Opc = FPOpcTable[Idx][0];
1860 RC = &AArch64::FPR32RegClass;
1861 break;
1862 case MVT::f64:
1863 Opc = FPOpcTable[Idx][1];
1864 RC = &AArch64::FPR64RegClass;
1865 break;
1866 }
1867
1868 // Create the base instruction, then add the operands.
1869 Register ResultReg = createResultReg(RC);
1870 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1871 TII.get(Opc), ResultReg);
1872 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1873
1874 // Loading an i1 requires special handling.
1875 if (VT == MVT::i1) {
1876 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1877 assert(ANDReg && "Unexpected AND instruction emission failure.");
1878 ResultReg = ANDReg;
1879 }
1880
1881 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1882 // the 32bit reg to a 64bit reg.
1883 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1884 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1885 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1886 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1887 .addImm(0)
1888 .addReg(ResultReg, getKillRegState(true))
1889 .addImm(AArch64::sub_32);
1890 ResultReg = Reg64;
1891 }
1892 return ResultReg;
1893}
1894
1895bool AArch64FastISel::selectAddSub(const Instruction *I) {
1896 MVT VT;
1897 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1898 return false;
1899
1900 if (VT.isVector())
1901 return selectOperator(I, I->getOpcode());
1902
1903 unsigned ResultReg;
1904 switch (I->getOpcode()) {
1905 default:
1906 llvm_unreachable("Unexpected instruction.");
1907 case Instruction::Add:
1908 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1909 break;
1910 case Instruction::Sub:
1911 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1912 break;
1913 }
1914 if (!ResultReg)
1915 return false;
1916
1917 updateValueMap(I, ResultReg);
1918 return true;
1919}
1920
1921bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1922 MVT VT;
1923 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1924 return false;
1925
1926 if (VT.isVector())
1927 return selectOperator(I, I->getOpcode());
1928
1929 unsigned ResultReg;
1930 switch (I->getOpcode()) {
1931 default:
1932 llvm_unreachable("Unexpected instruction.");
1933 case Instruction::And:
1934 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1935 break;
1936 case Instruction::Or:
1937 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1938 break;
1939 case Instruction::Xor:
1940 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1941 break;
1942 }
1943 if (!ResultReg)
1944 return false;
1945
1946 updateValueMap(I, ResultReg);
1947 return true;
1948}
1949
1950bool AArch64FastISel::selectLoad(const Instruction *I) {
1951 MVT VT;
1952 // Verify we have a legal type before going any further. Currently, we handle
1953 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1954 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1955 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1956 cast<LoadInst>(I)->isAtomic())
1957 return false;
1958
1959 const Value *SV = I->getOperand(0);
1960 if (TLI.supportSwiftError()) {
1961 // Swifterror values can come from either a function parameter with
1962 // swifterror attribute or an alloca with swifterror attribute.
1963 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1964 if (Arg->hasSwiftErrorAttr())
1965 return false;
1966 }
1967
1968 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1969 if (Alloca->isSwiftError())
1970 return false;
1971 }
1972 }
1973
1974 // See if we can handle this address.
1975 Address Addr;
1976 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1977 return false;
1978
1979 // Fold the following sign-/zero-extend into the load instruction.
1980 bool WantZExt = true;
1981 MVT RetVT = VT;
1982 const Value *IntExtVal = nullptr;
1983 if (I->hasOneUse()) {
1984 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1985 if (isTypeSupported(ZE->getType(), RetVT))
1986 IntExtVal = ZE;
1987 else
1988 RetVT = VT;
1989 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1990 if (isTypeSupported(SE->getType(), RetVT))
1991 IntExtVal = SE;
1992 else
1993 RetVT = VT;
1994 WantZExt = false;
1995 }
1996 }
1997
1998 unsigned ResultReg =
1999 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
2000 if (!ResultReg)
2001 return false;
2002
2003 // There are a few different cases we have to handle, because the load or the
2004 // sign-/zero-extend might not be selected by FastISel if we fall-back to
2005 // SelectionDAG. There is also an ordering issue when both instructions are in
2006 // different basic blocks.
2007 // 1.) The load instruction is selected by FastISel, but the integer extend
2008 // not. This usually happens when the integer extend is in a different
2009 // basic block and SelectionDAG took over for that basic block.
2010 // 2.) The load instruction is selected before the integer extend. This only
2011 // happens when the integer extend is in a different basic block.
2012 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2013 // by FastISel. This happens if there are instructions between the load
2014 // and the integer extend that couldn't be selected by FastISel.
2015 if (IntExtVal) {
2016 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2017 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2018 // it when it selects the integer extend.
2019 Register Reg = lookUpRegForValue(IntExtVal);
2020 auto *MI = MRI.getUniqueVRegDef(Reg);
2021 if (!MI) {
2022 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2023 if (WantZExt) {
2024 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2025 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2026 ResultReg = std::prev(I)->getOperand(0).getReg();
2027 removeDeadCode(I, std::next(I));
2028 } else
2029 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2030 AArch64::sub_32);
2031 }
2032 updateValueMap(I, ResultReg);
2033 return true;
2034 }
2035
2036 // The integer extend has already been emitted - delete all the instructions
2037 // that have been emitted by the integer extend lowering code and use the
2038 // result from the load instruction directly.
2039 while (MI) {
2040 Reg = 0;
2041 for (auto &Opnd : MI->uses()) {
2042 if (Opnd.isReg()) {
2043 Reg = Opnd.getReg();
2044 break;
2045 }
2046 }
2048 removeDeadCode(I, std::next(I));
2049 MI = nullptr;
2050 if (Reg)
2051 MI = MRI.getUniqueVRegDef(Reg);
2052 }
2053 updateValueMap(IntExtVal, ResultReg);
2054 return true;
2055 }
2056
2057 updateValueMap(I, ResultReg);
2058 return true;
2059}
2060
2061bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2062 unsigned AddrReg,
2063 MachineMemOperand *MMO) {
2064 unsigned Opc;
2065 switch (VT.SimpleTy) {
2066 default: return false;
2067 case MVT::i8: Opc = AArch64::STLRB; break;
2068 case MVT::i16: Opc = AArch64::STLRH; break;
2069 case MVT::i32: Opc = AArch64::STLRW; break;
2070 case MVT::i64: Opc = AArch64::STLRX; break;
2071 }
2072
2073 const MCInstrDesc &II = TII.get(Opc);
2074 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2075 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2076 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2077 .addReg(SrcReg)
2078 .addReg(AddrReg)
2079 .addMemOperand(MMO);
2080 return true;
2081}
2082
2083bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2084 MachineMemOperand *MMO) {
2085 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2086 return false;
2087
2088 // Simplify this down to something we can handle.
2089 if (!simplifyAddress(Addr, VT))
2090 return false;
2091
2092 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2093 if (!ScaleFactor)
2094 llvm_unreachable("Unexpected value type.");
2095
2096 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2097 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2098 bool UseScaled = true;
2099 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2100 UseScaled = false;
2101 ScaleFactor = 1;
2102 }
2103
2104 static const unsigned OpcTable[4][6] = {
2105 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2106 AArch64::STURSi, AArch64::STURDi },
2107 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2108 AArch64::STRSui, AArch64::STRDui },
2109 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2110 AArch64::STRSroX, AArch64::STRDroX },
2111 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2112 AArch64::STRSroW, AArch64::STRDroW }
2113 };
2114
2115 unsigned Opc;
2116 bool VTIsi1 = false;
2117 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2118 Addr.getOffsetReg();
2119 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2120 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2121 Addr.getExtendType() == AArch64_AM::SXTW)
2122 Idx++;
2123
2124 switch (VT.SimpleTy) {
2125 default: llvm_unreachable("Unexpected value type.");
2126 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2127 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2128 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2129 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2130 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2131 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2132 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2133 }
2134
2135 // Storing an i1 requires special handling.
2136 if (VTIsi1 && SrcReg != AArch64::WZR) {
2137 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2138 assert(ANDReg && "Unexpected AND instruction emission failure.");
2139 SrcReg = ANDReg;
2140 }
2141 // Create the base instruction, then add the operands.
2142 const MCInstrDesc &II = TII.get(Opc);
2143 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2145 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2146 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2147
2148 return true;
2149}
2150
2151bool AArch64FastISel::selectStore(const Instruction *I) {
2152 MVT VT;
2153 const Value *Op0 = I->getOperand(0);
2154 // Verify we have a legal type before going any further. Currently, we handle
2155 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2156 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2157 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2158 return false;
2159
2160 const Value *PtrV = I->getOperand(1);
2161 if (TLI.supportSwiftError()) {
2162 // Swifterror values can come from either a function parameter with
2163 // swifterror attribute or an alloca with swifterror attribute.
2164 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2165 if (Arg->hasSwiftErrorAttr())
2166 return false;
2167 }
2168
2169 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2170 if (Alloca->isSwiftError())
2171 return false;
2172 }
2173 }
2174
2175 // Get the value to be stored into a register. Use the zero register directly
2176 // when possible to avoid an unnecessary copy and a wasted register.
2177 unsigned SrcReg = 0;
2178 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2179 if (CI->isZero())
2180 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2181 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2182 if (CF->isZero() && !CF->isNegative()) {
2184 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2185 }
2186 }
2187
2188 if (!SrcReg)
2189 SrcReg = getRegForValue(Op0);
2190
2191 if (!SrcReg)
2192 return false;
2193
2194 auto *SI = cast<StoreInst>(I);
2195
2196 // Try to emit a STLR for seq_cst/release.
2197 if (SI->isAtomic()) {
2198 AtomicOrdering Ord = SI->getOrdering();
2199 // The non-atomic instructions are sufficient for relaxed stores.
2200 if (isReleaseOrStronger(Ord)) {
2201 // The STLR addressing mode only supports a base reg; pass that directly.
2202 Register AddrReg = getRegForValue(PtrV);
2203 return emitStoreRelease(VT, SrcReg, AddrReg,
2204 createMachineMemOperandFor(I));
2205 }
2206 }
2207
2208 // See if we can handle this address.
2209 Address Addr;
2210 if (!computeAddress(PtrV, Addr, Op0->getType()))
2211 return false;
2212
2213 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2214 return false;
2215 return true;
2216}
2217
2219 switch (Pred) {
2220 case CmpInst::FCMP_ONE:
2221 case CmpInst::FCMP_UEQ:
2222 default:
2223 // AL is our "false" for now. The other two need more compares.
2224 return AArch64CC::AL;
2225 case CmpInst::ICMP_EQ:
2226 case CmpInst::FCMP_OEQ:
2227 return AArch64CC::EQ;
2228 case CmpInst::ICMP_SGT:
2229 case CmpInst::FCMP_OGT:
2230 return AArch64CC::GT;
2231 case CmpInst::ICMP_SGE:
2232 case CmpInst::FCMP_OGE:
2233 return AArch64CC::GE;
2234 case CmpInst::ICMP_UGT:
2235 case CmpInst::FCMP_UGT:
2236 return AArch64CC::HI;
2237 case CmpInst::FCMP_OLT:
2238 return AArch64CC::MI;
2239 case CmpInst::ICMP_ULE:
2240 case CmpInst::FCMP_OLE:
2241 return AArch64CC::LS;
2242 case CmpInst::FCMP_ORD:
2243 return AArch64CC::VC;
2244 case CmpInst::FCMP_UNO:
2245 return AArch64CC::VS;
2246 case CmpInst::FCMP_UGE:
2247 return AArch64CC::PL;
2248 case CmpInst::ICMP_SLT:
2249 case CmpInst::FCMP_ULT:
2250 return AArch64CC::LT;
2251 case CmpInst::ICMP_SLE:
2252 case CmpInst::FCMP_ULE:
2253 return AArch64CC::LE;
2254 case CmpInst::FCMP_UNE:
2255 case CmpInst::ICMP_NE:
2256 return AArch64CC::NE;
2257 case CmpInst::ICMP_UGE:
2258 return AArch64CC::HS;
2259 case CmpInst::ICMP_ULT:
2260 return AArch64CC::LO;
2261 }
2262}
2263
2264/// Try to emit a combined compare-and-branch instruction.
2265bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2266 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2267 // will not be produced, as they are conditional branch instructions that do
2268 // not set flags.
2269 if (FuncInfo.MF->getFunction().hasFnAttribute(
2270 Attribute::SpeculativeLoadHardening))
2271 return false;
2272
2273 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2274 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2275 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2276
2277 const Value *LHS = CI->getOperand(0);
2278 const Value *RHS = CI->getOperand(1);
2279
2280 MVT VT;
2281 if (!isTypeSupported(LHS->getType(), VT))
2282 return false;
2283
2284 unsigned BW = VT.getSizeInBits();
2285 if (BW > 64)
2286 return false;
2287
2288 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2289 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2290
2291 // Try to take advantage of fallthrough opportunities.
2292 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2293 std::swap(TBB, FBB);
2295 }
2296
2297 int TestBit = -1;
2298 bool IsCmpNE;
2299 switch (Predicate) {
2300 default:
2301 return false;
2302 case CmpInst::ICMP_EQ:
2303 case CmpInst::ICMP_NE:
2304 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2305 std::swap(LHS, RHS);
2306
2307 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2308 return false;
2309
2310 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2311 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2312 const Value *AndLHS = AI->getOperand(0);
2313 const Value *AndRHS = AI->getOperand(1);
2314
2315 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2316 if (C->getValue().isPowerOf2())
2317 std::swap(AndLHS, AndRHS);
2318
2319 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2320 if (C->getValue().isPowerOf2()) {
2321 TestBit = C->getValue().logBase2();
2322 LHS = AndLHS;
2323 }
2324 }
2325
2326 if (VT == MVT::i1)
2327 TestBit = 0;
2328
2329 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2330 break;
2331 case CmpInst::ICMP_SLT:
2332 case CmpInst::ICMP_SGE:
2333 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2334 return false;
2335
2336 TestBit = BW - 1;
2337 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2338 break;
2339 case CmpInst::ICMP_SGT:
2340 case CmpInst::ICMP_SLE:
2341 if (!isa<ConstantInt>(RHS))
2342 return false;
2343
2344 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2345 return false;
2346
2347 TestBit = BW - 1;
2348 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2349 break;
2350 } // end switch
2351
2352 static const unsigned OpcTable[2][2][2] = {
2353 { {AArch64::CBZW, AArch64::CBZX },
2354 {AArch64::CBNZW, AArch64::CBNZX} },
2355 { {AArch64::TBZW, AArch64::TBZX },
2356 {AArch64::TBNZW, AArch64::TBNZX} }
2357 };
2358
2359 bool IsBitTest = TestBit != -1;
2360 bool Is64Bit = BW == 64;
2361 if (TestBit < 32 && TestBit >= 0)
2362 Is64Bit = false;
2363
2364 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2365 const MCInstrDesc &II = TII.get(Opc);
2366
2367 Register SrcReg = getRegForValue(LHS);
2368 if (!SrcReg)
2369 return false;
2370
2371 if (BW == 64 && !Is64Bit)
2372 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2373
2374 if ((BW < 32) && !IsBitTest)
2375 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2376
2377 // Emit the combined compare and branch instruction.
2378 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2380 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2381 .addReg(SrcReg);
2382 if (IsBitTest)
2383 MIB.addImm(TestBit);
2384 MIB.addMBB(TBB);
2385
2386 finishCondBranch(BI->getParent(), TBB, FBB);
2387 return true;
2388}
2389
2390bool AArch64FastISel::selectBranch(const Instruction *I) {
2391 const BranchInst *BI = cast<BranchInst>(I);
2392 if (BI->isUnconditional()) {
2393 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2394 fastEmitBranch(MSucc, BI->getDebugLoc());
2395 return true;
2396 }
2397
2398 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2399 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2400
2401 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2402 if (CI->hasOneUse() && isValueAvailable(CI)) {
2403 // Try to optimize or fold the cmp.
2404 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2405 switch (Predicate) {
2406 default:
2407 break;
2409 fastEmitBranch(FBB, MIMD.getDL());
2410 return true;
2411 case CmpInst::FCMP_TRUE:
2412 fastEmitBranch(TBB, MIMD.getDL());
2413 return true;
2414 }
2415
2416 // Try to emit a combined compare-and-branch first.
2417 if (emitCompareAndBranch(BI))
2418 return true;
2419
2420 // Try to take advantage of fallthrough opportunities.
2421 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2422 std::swap(TBB, FBB);
2424 }
2425
2426 // Emit the cmp.
2427 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2428 return false;
2429
2430 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2431 // instruction.
2432 AArch64CC::CondCode CC = getCompareCC(Predicate);
2434 switch (Predicate) {
2435 default:
2436 break;
2437 case CmpInst::FCMP_UEQ:
2438 ExtraCC = AArch64CC::EQ;
2439 CC = AArch64CC::VS;
2440 break;
2441 case CmpInst::FCMP_ONE:
2442 ExtraCC = AArch64CC::MI;
2443 CC = AArch64CC::GT;
2444 break;
2445 }
2446 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2447
2448 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2449 if (ExtraCC != AArch64CC::AL) {
2450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2451 .addImm(ExtraCC)
2452 .addMBB(TBB);
2453 }
2454
2455 // Emit the branch.
2456 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2457 .addImm(CC)
2458 .addMBB(TBB);
2459
2460 finishCondBranch(BI->getParent(), TBB, FBB);
2461 return true;
2462 }
2463 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2464 uint64_t Imm = CI->getZExtValue();
2465 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2466 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2467 .addMBB(Target);
2468
2469 // Obtain the branch probability and add the target to the successor list.
2470 if (FuncInfo.BPI) {
2471 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2472 BI->getParent(), Target->getBasicBlock());
2473 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2474 } else
2475 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2476 return true;
2477 } else {
2479 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2480 // Fake request the condition, otherwise the intrinsic might be completely
2481 // optimized away.
2482 Register CondReg = getRegForValue(BI->getCondition());
2483 if (!CondReg)
2484 return false;
2485
2486 // Emit the branch.
2487 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2488 .addImm(CC)
2489 .addMBB(TBB);
2490
2491 finishCondBranch(BI->getParent(), TBB, FBB);
2492 return true;
2493 }
2494 }
2495
2496 Register CondReg = getRegForValue(BI->getCondition());
2497 if (CondReg == 0)
2498 return false;
2499
2500 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2501 unsigned Opcode = AArch64::TBNZW;
2502 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2503 std::swap(TBB, FBB);
2504 Opcode = AArch64::TBZW;
2505 }
2506
2507 const MCInstrDesc &II = TII.get(Opcode);
2508 Register ConstrainedCondReg
2509 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2510 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2511 .addReg(ConstrainedCondReg)
2512 .addImm(0)
2513 .addMBB(TBB);
2514
2515 finishCondBranch(BI->getParent(), TBB, FBB);
2516 return true;
2517}
2518
2519bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2520 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2521 Register AddrReg = getRegForValue(BI->getOperand(0));
2522 if (AddrReg == 0)
2523 return false;
2524
2525 // Emit the indirect branch.
2526 const MCInstrDesc &II = TII.get(AArch64::BR);
2527 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2528 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2529
2530 // Make sure the CFG is up-to-date.
2531 for (const auto *Succ : BI->successors())
2532 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2533
2534 return true;
2535}
2536
2537bool AArch64FastISel::selectCmp(const Instruction *I) {
2538 const CmpInst *CI = cast<CmpInst>(I);
2539
2540 // Vectors of i1 are weird: bail out.
2541 if (CI->getType()->isVectorTy())
2542 return false;
2543
2544 // Try to optimize or fold the cmp.
2545 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2546 unsigned ResultReg = 0;
2547 switch (Predicate) {
2548 default:
2549 break;
2551 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2552 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2553 TII.get(TargetOpcode::COPY), ResultReg)
2554 .addReg(AArch64::WZR, getKillRegState(true));
2555 break;
2556 case CmpInst::FCMP_TRUE:
2557 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2558 break;
2559 }
2560
2561 if (ResultReg) {
2562 updateValueMap(I, ResultReg);
2563 return true;
2564 }
2565
2566 // Emit the cmp.
2567 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2568 return false;
2569
2570 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2571
2572 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2573 // condition codes are inverted, because they are used by CSINC.
2574 static unsigned CondCodeTable[2][2] = {
2577 };
2578 unsigned *CondCodes = nullptr;
2579 switch (Predicate) {
2580 default:
2581 break;
2582 case CmpInst::FCMP_UEQ:
2583 CondCodes = &CondCodeTable[0][0];
2584 break;
2585 case CmpInst::FCMP_ONE:
2586 CondCodes = &CondCodeTable[1][0];
2587 break;
2588 }
2589
2590 if (CondCodes) {
2591 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2592 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2593 TmpReg1)
2594 .addReg(AArch64::WZR, getKillRegState(true))
2595 .addReg(AArch64::WZR, getKillRegState(true))
2596 .addImm(CondCodes[0]);
2597 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2598 ResultReg)
2599 .addReg(TmpReg1, getKillRegState(true))
2600 .addReg(AArch64::WZR, getKillRegState(true))
2601 .addImm(CondCodes[1]);
2602
2603 updateValueMap(I, ResultReg);
2604 return true;
2605 }
2606
2607 // Now set a register based on the comparison.
2608 AArch64CC::CondCode CC = getCompareCC(Predicate);
2609 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2610 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2611 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2612 ResultReg)
2613 .addReg(AArch64::WZR, getKillRegState(true))
2614 .addReg(AArch64::WZR, getKillRegState(true))
2615 .addImm(invertedCC);
2616
2617 updateValueMap(I, ResultReg);
2618 return true;
2619}
2620
2621/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2622/// value.
2623bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2624 if (!SI->getType()->isIntegerTy(1))
2625 return false;
2626
2627 const Value *Src1Val, *Src2Val;
2628 unsigned Opc = 0;
2629 bool NeedExtraOp = false;
2630 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2631 if (CI->isOne()) {
2632 Src1Val = SI->getCondition();
2633 Src2Val = SI->getFalseValue();
2634 Opc = AArch64::ORRWrr;
2635 } else {
2636 assert(CI->isZero());
2637 Src1Val = SI->getFalseValue();
2638 Src2Val = SI->getCondition();
2639 Opc = AArch64::BICWrr;
2640 }
2641 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2642 if (CI->isOne()) {
2643 Src1Val = SI->getCondition();
2644 Src2Val = SI->getTrueValue();
2645 Opc = AArch64::ORRWrr;
2646 NeedExtraOp = true;
2647 } else {
2648 assert(CI->isZero());
2649 Src1Val = SI->getCondition();
2650 Src2Val = SI->getTrueValue();
2651 Opc = AArch64::ANDWrr;
2652 }
2653 }
2654
2655 if (!Opc)
2656 return false;
2657
2658 Register Src1Reg = getRegForValue(Src1Val);
2659 if (!Src1Reg)
2660 return false;
2661
2662 Register Src2Reg = getRegForValue(Src2Val);
2663 if (!Src2Reg)
2664 return false;
2665
2666 if (NeedExtraOp)
2667 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2668
2669 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2670 Src2Reg);
2671 updateValueMap(SI, ResultReg);
2672 return true;
2673}
2674
2675bool AArch64FastISel::selectSelect(const Instruction *I) {
2676 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2677 MVT VT;
2678 if (!isTypeSupported(I->getType(), VT))
2679 return false;
2680
2681 unsigned Opc;
2682 const TargetRegisterClass *RC;
2683 switch (VT.SimpleTy) {
2684 default:
2685 return false;
2686 case MVT::i1:
2687 case MVT::i8:
2688 case MVT::i16:
2689 case MVT::i32:
2690 Opc = AArch64::CSELWr;
2691 RC = &AArch64::GPR32RegClass;
2692 break;
2693 case MVT::i64:
2694 Opc = AArch64::CSELXr;
2695 RC = &AArch64::GPR64RegClass;
2696 break;
2697 case MVT::f32:
2698 Opc = AArch64::FCSELSrrr;
2699 RC = &AArch64::FPR32RegClass;
2700 break;
2701 case MVT::f64:
2702 Opc = AArch64::FCSELDrrr;
2703 RC = &AArch64::FPR64RegClass;
2704 break;
2705 }
2706
2707 const SelectInst *SI = cast<SelectInst>(I);
2708 const Value *Cond = SI->getCondition();
2711
2712 if (optimizeSelect(SI))
2713 return true;
2714
2715 // Try to pickup the flags, so we don't have to emit another compare.
2716 if (foldXALUIntrinsic(CC, I, Cond)) {
2717 // Fake request the condition to force emission of the XALU intrinsic.
2718 Register CondReg = getRegForValue(Cond);
2719 if (!CondReg)
2720 return false;
2721 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2722 isValueAvailable(Cond)) {
2723 const auto *Cmp = cast<CmpInst>(Cond);
2724 // Try to optimize or fold the cmp.
2725 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2726 const Value *FoldSelect = nullptr;
2727 switch (Predicate) {
2728 default:
2729 break;
2731 FoldSelect = SI->getFalseValue();
2732 break;
2733 case CmpInst::FCMP_TRUE:
2734 FoldSelect = SI->getTrueValue();
2735 break;
2736 }
2737
2738 if (FoldSelect) {
2739 Register SrcReg = getRegForValue(FoldSelect);
2740 if (!SrcReg)
2741 return false;
2742
2743 updateValueMap(I, SrcReg);
2744 return true;
2745 }
2746
2747 // Emit the cmp.
2748 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2749 return false;
2750
2751 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2752 CC = getCompareCC(Predicate);
2753 switch (Predicate) {
2754 default:
2755 break;
2756 case CmpInst::FCMP_UEQ:
2757 ExtraCC = AArch64CC::EQ;
2758 CC = AArch64CC::VS;
2759 break;
2760 case CmpInst::FCMP_ONE:
2761 ExtraCC = AArch64CC::MI;
2762 CC = AArch64CC::GT;
2763 break;
2764 }
2765 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2766 } else {
2767 Register CondReg = getRegForValue(Cond);
2768 if (!CondReg)
2769 return false;
2770
2771 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2772 CondReg = constrainOperandRegClass(II, CondReg, 1);
2773
2774 // Emit a TST instruction (ANDS wzr, reg, #imm).
2775 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2776 AArch64::WZR)
2777 .addReg(CondReg)
2779 }
2780
2781 Register Src1Reg = getRegForValue(SI->getTrueValue());
2782 Register Src2Reg = getRegForValue(SI->getFalseValue());
2783
2784 if (!Src1Reg || !Src2Reg)
2785 return false;
2786
2787 if (ExtraCC != AArch64CC::AL)
2788 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2789
2790 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2791 updateValueMap(I, ResultReg);
2792 return true;
2793}
2794
2795bool AArch64FastISel::selectFPExt(const Instruction *I) {
2796 Value *V = I->getOperand(0);
2797 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2798 return false;
2799
2800 Register Op = getRegForValue(V);
2801 if (Op == 0)
2802 return false;
2803
2804 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2805 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2806 ResultReg).addReg(Op);
2807 updateValueMap(I, ResultReg);
2808 return true;
2809}
2810
2811bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2812 Value *V = I->getOperand(0);
2813 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2814 return false;
2815
2816 Register Op = getRegForValue(V);
2817 if (Op == 0)
2818 return false;
2819
2820 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2821 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2822 ResultReg).addReg(Op);
2823 updateValueMap(I, ResultReg);
2824 return true;
2825}
2826
2827// FPToUI and FPToSI
2828bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2829 MVT DestVT;
2830 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2831 return false;
2832
2833 Register SrcReg = getRegForValue(I->getOperand(0));
2834 if (SrcReg == 0)
2835 return false;
2836
2837 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2838 if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2839 return false;
2840
2841 unsigned Opc;
2842 if (SrcVT == MVT::f64) {
2843 if (Signed)
2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2845 else
2846 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2847 } else {
2848 if (Signed)
2849 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2850 else
2851 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2852 }
2853 Register ResultReg = createResultReg(
2854 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2855 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2856 .addReg(SrcReg);
2857 updateValueMap(I, ResultReg);
2858 return true;
2859}
2860
2861bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2862 MVT DestVT;
2863 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2864 return false;
2865 // Let regular ISEL handle FP16
2866 if (DestVT == MVT::f16)
2867 return false;
2868
2869 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2870 "Unexpected value type.");
2871
2872 Register SrcReg = getRegForValue(I->getOperand(0));
2873 if (!SrcReg)
2874 return false;
2875
2876 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2877
2878 // Handle sign-extension.
2879 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2880 SrcReg =
2881 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2882 if (!SrcReg)
2883 return false;
2884 }
2885
2886 unsigned Opc;
2887 if (SrcVT == MVT::i64) {
2888 if (Signed)
2889 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2890 else
2891 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2892 } else {
2893 if (Signed)
2894 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2895 else
2896 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2897 }
2898
2899 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2900 updateValueMap(I, ResultReg);
2901 return true;
2902}
2903
2904bool AArch64FastISel::fastLowerArguments() {
2905 if (!FuncInfo.CanLowerReturn)
2906 return false;
2907
2908 const Function *F = FuncInfo.Fn;
2909 if (F->isVarArg())
2910 return false;
2911
2912 CallingConv::ID CC = F->getCallingConv();
2914 return false;
2915
2916 if (Subtarget->hasCustomCallingConv())
2917 return false;
2918
2919 // Only handle simple cases of up to 8 GPR and FPR each.
2920 unsigned GPRCnt = 0;
2921 unsigned FPRCnt = 0;
2922 for (auto const &Arg : F->args()) {
2923 if (Arg.hasAttribute(Attribute::ByVal) ||
2924 Arg.hasAttribute(Attribute::InReg) ||
2925 Arg.hasAttribute(Attribute::StructRet) ||
2926 Arg.hasAttribute(Attribute::SwiftSelf) ||
2927 Arg.hasAttribute(Attribute::SwiftAsync) ||
2928 Arg.hasAttribute(Attribute::SwiftError) ||
2929 Arg.hasAttribute(Attribute::Nest))
2930 return false;
2931
2932 Type *ArgTy = Arg.getType();
2933 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2934 return false;
2935
2936 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2937 if (!ArgVT.isSimple())
2938 return false;
2939
2940 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2941 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2942 return false;
2943
2944 if (VT.isVector() &&
2945 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2946 return false;
2947
2948 if (VT >= MVT::i1 && VT <= MVT::i64)
2949 ++GPRCnt;
2950 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2951 VT.is128BitVector())
2952 ++FPRCnt;
2953 else
2954 return false;
2955
2956 if (GPRCnt > 8 || FPRCnt > 8)
2957 return false;
2958 }
2959
2960 static const MCPhysReg Registers[6][8] = {
2961 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2962 AArch64::W5, AArch64::W6, AArch64::W7 },
2963 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2964 AArch64::X5, AArch64::X6, AArch64::X7 },
2965 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2966 AArch64::H5, AArch64::H6, AArch64::H7 },
2967 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2968 AArch64::S5, AArch64::S6, AArch64::S7 },
2969 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2970 AArch64::D5, AArch64::D6, AArch64::D7 },
2971 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2972 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2973 };
2974
2975 unsigned GPRIdx = 0;
2976 unsigned FPRIdx = 0;
2977 for (auto const &Arg : F->args()) {
2978 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2979 unsigned SrcReg;
2980 const TargetRegisterClass *RC;
2981 if (VT >= MVT::i1 && VT <= MVT::i32) {
2982 SrcReg = Registers[0][GPRIdx++];
2983 RC = &AArch64::GPR32RegClass;
2984 VT = MVT::i32;
2985 } else if (VT == MVT::i64) {
2986 SrcReg = Registers[1][GPRIdx++];
2987 RC = &AArch64::GPR64RegClass;
2988 } else if (VT == MVT::f16) {
2989 SrcReg = Registers[2][FPRIdx++];
2990 RC = &AArch64::FPR16RegClass;
2991 } else if (VT == MVT::f32) {
2992 SrcReg = Registers[3][FPRIdx++];
2993 RC = &AArch64::FPR32RegClass;
2994 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2995 SrcReg = Registers[4][FPRIdx++];
2996 RC = &AArch64::FPR64RegClass;
2997 } else if (VT.is128BitVector()) {
2998 SrcReg = Registers[5][FPRIdx++];
2999 RC = &AArch64::FPR128RegClass;
3000 } else
3001 llvm_unreachable("Unexpected value type.");
3002
3003 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3004 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3005 // Without this, EmitLiveInCopies may eliminate the livein if its only
3006 // use is a bitcast (which isn't turned into an instruction).
3007 Register ResultReg = createResultReg(RC);
3008 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3009 TII.get(TargetOpcode::COPY), ResultReg)
3010 .addReg(DstReg, getKillRegState(true));
3011 updateValueMap(&Arg, ResultReg);
3012 }
3013 return true;
3014}
3015
3016bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3017 SmallVectorImpl<MVT> &OutVTs,
3018 unsigned &NumBytes) {
3019 CallingConv::ID CC = CLI.CallConv;
3021 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3022 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3023
3024 // Get a count of how many bytes are to be pushed on the stack.
3025 NumBytes = CCInfo.getStackSize();
3026
3027 // Issue CALLSEQ_START
3028 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3029 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3030 .addImm(NumBytes).addImm(0);
3031
3032 // Process the args.
3033 for (CCValAssign &VA : ArgLocs) {
3034 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3035 MVT ArgVT = OutVTs[VA.getValNo()];
3036
3037 Register ArgReg = getRegForValue(ArgVal);
3038 if (!ArgReg)
3039 return false;
3040
3041 // Handle arg promotion: SExt, ZExt, AExt.
3042 switch (VA.getLocInfo()) {
3043 case CCValAssign::Full:
3044 break;
3045 case CCValAssign::SExt: {
3046 MVT DestVT = VA.getLocVT();
3047 MVT SrcVT = ArgVT;
3048 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3049 if (!ArgReg)
3050 return false;
3051 break;
3052 }
3053 case CCValAssign::AExt:
3054 // Intentional fall-through.
3055 case CCValAssign::ZExt: {
3056 MVT DestVT = VA.getLocVT();
3057 MVT SrcVT = ArgVT;
3058 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3059 if (!ArgReg)
3060 return false;
3061 break;
3062 }
3063 default:
3064 llvm_unreachable("Unknown arg promotion!");
3065 }
3066
3067 // Now copy/store arg to correct locations.
3068 if (VA.isRegLoc() && !VA.needsCustom()) {
3069 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3070 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3071 CLI.OutRegs.push_back(VA.getLocReg());
3072 } else if (VA.needsCustom()) {
3073 // FIXME: Handle custom args.
3074 return false;
3075 } else {
3076 assert(VA.isMemLoc() && "Assuming store on stack.");
3077
3078 // Don't emit stores for undef values.
3079 if (isa<UndefValue>(ArgVal))
3080 continue;
3081
3082 // Need to store on the stack.
3083 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3084
3085 unsigned BEAlign = 0;
3086 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3087 BEAlign = 8 - ArgSize;
3088
3089 Address Addr;
3090 Addr.setKind(Address::RegBase);
3091 Addr.setReg(AArch64::SP);
3092 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3093
3094 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3095 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3096 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3097 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3098
3099 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3100 return false;
3101 }
3102 }
3103 return true;
3104}
3105
3106bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3107 CallingConv::ID CC = CLI.CallConv;
3108
3109 // Issue CALLSEQ_END
3110 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3111 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3112 .addImm(NumBytes).addImm(0);
3113
3114 // Now the return values.
3116 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3117 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3118
3119 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3120 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3121 CCValAssign &VA = RVLocs[i];
3122 MVT CopyVT = VA.getValVT();
3123 unsigned CopyReg = ResultReg + i;
3124
3125 // TODO: Handle big-endian results
3126 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3127 return false;
3128
3129 // Copy result out of their specified physreg.
3130 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3131 CopyReg)
3132 .addReg(VA.getLocReg());
3133 CLI.InRegs.push_back(VA.getLocReg());
3134 }
3135
3136 CLI.ResultReg = ResultReg;
3137 CLI.NumResultRegs = RVLocs.size();
3138
3139 return true;
3140}
3141
3142bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3143 CallingConv::ID CC = CLI.CallConv;
3144 bool IsTailCall = CLI.IsTailCall;
3145 bool IsVarArg = CLI.IsVarArg;
3146 const Value *Callee = CLI.Callee;
3147 MCSymbol *Symbol = CLI.Symbol;
3148
3149 if (!Callee && !Symbol)
3150 return false;
3151
3152 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3153 // a bti instruction following the call.
3154 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3155 !Subtarget->noBTIAtReturnTwice() &&
3157 return false;
3158
3159 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3160 if (CLI.CB && CLI.CB->isIndirectCall() &&
3161 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3162 return false;
3163
3164 // Allow SelectionDAG isel to handle tail calls.
3165 if (IsTailCall)
3166 return false;
3167
3168 // FIXME: we could and should support this, but for now correctness at -O0 is
3169 // more important.
3170 if (Subtarget->isTargetILP32())
3171 return false;
3172
3173 CodeModel::Model CM = TM.getCodeModel();
3174 // Only support the small-addressing and large code models.
3175 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3176 return false;
3177
3178 // FIXME: Add large code model support for ELF.
3179 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3180 return false;
3181
3182 // Let SDISel handle vararg functions.
3183 if (IsVarArg)
3184 return false;
3185
3186 for (auto Flag : CLI.OutFlags)
3187 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3188 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3189 return false;
3190
3191 // Set up the argument vectors.
3192 SmallVector<MVT, 16> OutVTs;
3193 OutVTs.reserve(CLI.OutVals.size());
3194
3195 for (auto *Val : CLI.OutVals) {
3196 MVT VT;
3197 if (!isTypeLegal(Val->getType(), VT) &&
3198 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3199 return false;
3200
3201 // We don't handle vector parameters yet.
3202 if (VT.isVector() || VT.getSizeInBits() > 64)
3203 return false;
3204
3205 OutVTs.push_back(VT);
3206 }
3207
3208 Address Addr;
3209 if (Callee && !computeCallAddress(Callee, Addr))
3210 return false;
3211
3212 // The weak function target may be zero; in that case we must use indirect
3213 // addressing via a stub on windows as it may be out of range for a
3214 // PC-relative jump.
3215 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3216 Addr.getGlobalValue()->hasExternalWeakLinkage())
3217 return false;
3218
3219 // Handle the arguments now that we've gotten them.
3220 unsigned NumBytes;
3221 if (!processCallArgs(CLI, OutVTs, NumBytes))
3222 return false;
3223
3224 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3225 if (RegInfo->isAnyArgRegReserved(*MF))
3226 RegInfo->emitReservedArgRegCallError(*MF);
3227
3228 // Issue the call.
3230 if (Subtarget->useSmallAddressing()) {
3231 const MCInstrDesc &II =
3232 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3233 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3234 if (Symbol)
3235 MIB.addSym(Symbol, 0);
3236 else if (Addr.getGlobalValue())
3237 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3238 else if (Addr.getReg()) {
3239 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3240 MIB.addReg(Reg);
3241 } else
3242 return false;
3243 } else {
3244 unsigned CallReg = 0;
3245 if (Symbol) {
3246 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3247 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3248 ADRPReg)
3250
3251 CallReg = createResultReg(&AArch64::GPR64RegClass);
3252 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3253 TII.get(AArch64::LDRXui), CallReg)
3254 .addReg(ADRPReg)
3255 .addSym(Symbol,
3257 } else if (Addr.getGlobalValue())
3258 CallReg = materializeGV(Addr.getGlobalValue());
3259 else if (Addr.getReg())
3260 CallReg = Addr.getReg();
3261
3262 if (!CallReg)
3263 return false;
3264
3265 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3266 CallReg = constrainOperandRegClass(II, CallReg, 0);
3267 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3268 }
3269
3270 // Add implicit physical register uses to the call.
3271 for (auto Reg : CLI.OutRegs)
3272 MIB.addReg(Reg, RegState::Implicit);
3273
3274 // Add a register mask with the call-preserved registers.
3275 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3276 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3277
3278 CLI.Call = MIB;
3279
3280 // Finish off the call including any return values.
3281 return finishCall(CLI, NumBytes);
3282}
3283
3284bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3285 if (Alignment)
3286 return Len / Alignment->value() <= 4;
3287 else
3288 return Len < 32;
3289}
3290
3291bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3292 uint64_t Len, MaybeAlign Alignment) {
3293 // Make sure we don't bloat code by inlining very large memcpy's.
3294 if (!isMemCpySmall(Len, Alignment))
3295 return false;
3296
3297 int64_t UnscaledOffset = 0;
3298 Address OrigDest = Dest;
3299 Address OrigSrc = Src;
3300
3301 while (Len) {
3302 MVT VT;
3303 if (!Alignment || *Alignment >= 8) {
3304 if (Len >= 8)
3305 VT = MVT::i64;
3306 else if (Len >= 4)
3307 VT = MVT::i32;
3308 else if (Len >= 2)
3309 VT = MVT::i16;
3310 else {
3311 VT = MVT::i8;
3312 }
3313 } else {
3314 assert(Alignment && "Alignment is set in this branch");
3315 // Bound based on alignment.
3316 if (Len >= 4 && *Alignment == 4)
3317 VT = MVT::i32;
3318 else if (Len >= 2 && *Alignment == 2)
3319 VT = MVT::i16;
3320 else {
3321 VT = MVT::i8;
3322 }
3323 }
3324
3325 unsigned ResultReg = emitLoad(VT, VT, Src);
3326 if (!ResultReg)
3327 return false;
3328
3329 if (!emitStore(VT, ResultReg, Dest))
3330 return false;
3331
3332 int64_t Size = VT.getSizeInBits() / 8;
3333 Len -= Size;
3334 UnscaledOffset += Size;
3335
3336 // We need to recompute the unscaled offset for each iteration.
3337 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3338 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3339 }
3340
3341 return true;
3342}
3343
3344/// Check if it is possible to fold the condition from the XALU intrinsic
3345/// into the user. The condition code will only be updated on success.
3346bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3347 const Instruction *I,
3348 const Value *Cond) {
3349 if (!isa<ExtractValueInst>(Cond))
3350 return false;
3351
3352 const auto *EV = cast<ExtractValueInst>(Cond);
3353 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3354 return false;
3355
3356 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3357 MVT RetVT;
3358 const Function *Callee = II->getCalledFunction();
3359 Type *RetTy =
3360 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3361 if (!isTypeLegal(RetTy, RetVT))
3362 return false;
3363
3364 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3365 return false;
3366
3367 const Value *LHS = II->getArgOperand(0);
3368 const Value *RHS = II->getArgOperand(1);
3369
3370 // Canonicalize immediate to the RHS.
3371 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3372 std::swap(LHS, RHS);
3373
3374 // Simplify multiplies.
3375 Intrinsic::ID IID = II->getIntrinsicID();
3376 switch (IID) {
3377 default:
3378 break;
3379 case Intrinsic::smul_with_overflow:
3380 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3381 if (C->getValue() == 2)
3382 IID = Intrinsic::sadd_with_overflow;
3383 break;
3384 case Intrinsic::umul_with_overflow:
3385 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3386 if (C->getValue() == 2)
3387 IID = Intrinsic::uadd_with_overflow;
3388 break;
3389 }
3390
3391 AArch64CC::CondCode TmpCC;
3392 switch (IID) {
3393 default:
3394 return false;
3395 case Intrinsic::sadd_with_overflow:
3396 case Intrinsic::ssub_with_overflow:
3397 TmpCC = AArch64CC::VS;
3398 break;
3399 case Intrinsic::uadd_with_overflow:
3400 TmpCC = AArch64CC::HS;
3401 break;
3402 case Intrinsic::usub_with_overflow:
3403 TmpCC = AArch64CC::LO;
3404 break;
3405 case Intrinsic::smul_with_overflow:
3406 case Intrinsic::umul_with_overflow:
3407 TmpCC = AArch64CC::NE;
3408 break;
3409 }
3410
3411 // Check if both instructions are in the same basic block.
3412 if (!isValueAvailable(II))
3413 return false;
3414
3415 // Make sure nothing is in the way
3418 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3419 // We only expect extractvalue instructions between the intrinsic and the
3420 // instruction to be selected.
3421 if (!isa<ExtractValueInst>(Itr))
3422 return false;
3423
3424 // Check that the extractvalue operand comes from the intrinsic.
3425 const auto *EVI = cast<ExtractValueInst>(Itr);
3426 if (EVI->getAggregateOperand() != II)
3427 return false;
3428 }
3429
3430 CC = TmpCC;
3431 return true;
3432}
3433
3434bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3435 // FIXME: Handle more intrinsics.
3436 switch (II->getIntrinsicID()) {
3437 default: return false;
3438 case Intrinsic::frameaddress: {
3439 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3440 MFI.setFrameAddressIsTaken(true);
3441
3442 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3443 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3444 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3445 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3446 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3447 // Recursively load frame address
3448 // ldr x0, [fp]
3449 // ldr x0, [x0]
3450 // ldr x0, [x0]
3451 // ...
3452 unsigned DestReg;
3453 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3454 while (Depth--) {
3455 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3456 SrcReg, 0);
3457 assert(DestReg && "Unexpected LDR instruction emission failure.");
3458 SrcReg = DestReg;
3459 }
3460
3461 updateValueMap(II, SrcReg);
3462 return true;
3463 }
3464 case Intrinsic::sponentry: {
3465 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3466
3467 // SP = FP + Fixed Object + 16
3468 int FI = MFI.CreateFixedObject(4, 0, false);
3469 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3470 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3471 TII.get(AArch64::ADDXri), ResultReg)
3472 .addFrameIndex(FI)
3473 .addImm(0)
3474 .addImm(0);
3475
3476 updateValueMap(II, ResultReg);
3477 return true;
3478 }
3479 case Intrinsic::memcpy:
3480 case Intrinsic::memmove: {
3481 const auto *MTI = cast<MemTransferInst>(II);
3482 // Don't handle volatile.
3483 if (MTI->isVolatile())
3484 return false;
3485
3486 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3487 // we would emit dead code because we don't currently handle memmoves.
3488 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3489 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3490 // Small memcpy's are common enough that we want to do them without a call
3491 // if possible.
3492 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3493 MaybeAlign Alignment;
3494 if (MTI->getDestAlign() || MTI->getSourceAlign())
3495 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3496 MTI->getSourceAlign().valueOrOne());
3497 if (isMemCpySmall(Len, Alignment)) {
3498 Address Dest, Src;
3499 if (!computeAddress(MTI->getRawDest(), Dest) ||
3500 !computeAddress(MTI->getRawSource(), Src))
3501 return false;
3502 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3503 return true;
3504 }
3505 }
3506
3507 if (!MTI->getLength()->getType()->isIntegerTy(64))
3508 return false;
3509
3510 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3511 // Fast instruction selection doesn't support the special
3512 // address spaces.
3513 return false;
3514
3515 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3516 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3517 }
3518 case Intrinsic::memset: {
3519 const MemSetInst *MSI = cast<MemSetInst>(II);
3520 // Don't handle volatile.
3521 if (MSI->isVolatile())
3522 return false;
3523
3524 if (!MSI->getLength()->getType()->isIntegerTy(64))
3525 return false;
3526
3527 if (MSI->getDestAddressSpace() > 255)
3528 // Fast instruction selection doesn't support the special
3529 // address spaces.
3530 return false;
3531
3532 return lowerCallTo(II, "memset", II->arg_size() - 1);
3533 }
3534 case Intrinsic::sin:
3535 case Intrinsic::cos:
3536 case Intrinsic::pow: {
3537 MVT RetVT;
3538 if (!isTypeLegal(II->getType(), RetVT))
3539 return false;
3540
3541 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3542 return false;
3543
3544 static const RTLIB::Libcall LibCallTable[3][2] = {
3545 { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3546 { RTLIB::COS_F32, RTLIB::COS_F64 },
3547 { RTLIB::POW_F32, RTLIB::POW_F64 }
3548 };
3549 RTLIB::Libcall LC;
3550 bool Is64Bit = RetVT == MVT::f64;
3551 switch (II->getIntrinsicID()) {
3552 default:
3553 llvm_unreachable("Unexpected intrinsic.");
3554 case Intrinsic::sin:
3555 LC = LibCallTable[0][Is64Bit];
3556 break;
3557 case Intrinsic::cos:
3558 LC = LibCallTable[1][Is64Bit];
3559 break;
3560 case Intrinsic::pow:
3561 LC = LibCallTable[2][Is64Bit];
3562 break;
3563 }
3564
3565 ArgListTy Args;
3566 Args.reserve(II->arg_size());
3567
3568 // Populate the argument list.
3569 for (auto &Arg : II->args()) {
3570 ArgListEntry Entry;
3571 Entry.Val = Arg;
3572 Entry.Ty = Arg->getType();
3573 Args.push_back(Entry);
3574 }
3575
3576 CallLoweringInfo CLI;
3577 MCContext &Ctx = MF->getContext();
3578 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3579 TLI.getLibcallName(LC), std::move(Args));
3580 if (!lowerCallTo(CLI))
3581 return false;
3582 updateValueMap(II, CLI.ResultReg);
3583 return true;
3584 }
3585 case Intrinsic::fabs: {
3586 MVT VT;
3587 if (!isTypeLegal(II->getType(), VT))
3588 return false;
3589
3590 unsigned Opc;
3591 switch (VT.SimpleTy) {
3592 default:
3593 return false;
3594 case MVT::f32:
3595 Opc = AArch64::FABSSr;
3596 break;
3597 case MVT::f64:
3598 Opc = AArch64::FABSDr;
3599 break;
3600 }
3601 Register SrcReg = getRegForValue(II->getOperand(0));
3602 if (!SrcReg)
3603 return false;
3604 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3605 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3606 .addReg(SrcReg);
3607 updateValueMap(II, ResultReg);
3608 return true;
3609 }
3610 case Intrinsic::trap:
3611 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3612 .addImm(1);
3613 return true;
3614 case Intrinsic::debugtrap:
3615 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3616 .addImm(0xF000);
3617 return true;
3618
3619 case Intrinsic::sqrt: {
3621
3622 MVT VT;
3623 if (!isTypeLegal(RetTy, VT))
3624 return false;
3625
3626 Register Op0Reg = getRegForValue(II->getOperand(0));
3627 if (!Op0Reg)
3628 return false;
3629
3630 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3631 if (!ResultReg)
3632 return false;
3633
3634 updateValueMap(II, ResultReg);
3635 return true;
3636 }
3637 case Intrinsic::sadd_with_overflow:
3638 case Intrinsic::uadd_with_overflow:
3639 case Intrinsic::ssub_with_overflow:
3640 case Intrinsic::usub_with_overflow:
3641 case Intrinsic::smul_with_overflow:
3642 case Intrinsic::umul_with_overflow: {
3643 // This implements the basic lowering of the xalu with overflow intrinsics.
3644 const Function *Callee = II->getCalledFunction();
3645 auto *Ty = cast<StructType>(Callee->getReturnType());
3646 Type *RetTy = Ty->getTypeAtIndex(0U);
3647
3648 MVT VT;
3649 if (!isTypeLegal(RetTy, VT))
3650 return false;
3651
3652 if (VT != MVT::i32 && VT != MVT::i64)
3653 return false;
3654
3655 const Value *LHS = II->getArgOperand(0);
3656 const Value *RHS = II->getArgOperand(1);
3657 // Canonicalize immediate to the RHS.
3658 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3659 std::swap(LHS, RHS);
3660
3661 // Simplify multiplies.
3662 Intrinsic::ID IID = II->getIntrinsicID();
3663 switch (IID) {
3664 default:
3665 break;
3666 case Intrinsic::smul_with_overflow:
3667 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3668 if (C->getValue() == 2) {
3669 IID = Intrinsic::sadd_with_overflow;
3670 RHS = LHS;
3671 }
3672 break;
3673 case Intrinsic::umul_with_overflow:
3674 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3675 if (C->getValue() == 2) {
3676 IID = Intrinsic::uadd_with_overflow;
3677 RHS = LHS;
3678 }
3679 break;
3680 }
3681
3682 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3684 switch (IID) {
3685 default: llvm_unreachable("Unexpected intrinsic!");
3686 case Intrinsic::sadd_with_overflow:
3687 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3688 CC = AArch64CC::VS;
3689 break;
3690 case Intrinsic::uadd_with_overflow:
3691 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3692 CC = AArch64CC::HS;
3693 break;
3694 case Intrinsic::ssub_with_overflow:
3695 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3696 CC = AArch64CC::VS;
3697 break;
3698 case Intrinsic::usub_with_overflow:
3699 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3700 CC = AArch64CC::LO;
3701 break;
3702 case Intrinsic::smul_with_overflow: {
3703 CC = AArch64CC::NE;
3704 Register LHSReg = getRegForValue(LHS);
3705 if (!LHSReg)
3706 return false;
3707
3708 Register RHSReg = getRegForValue(RHS);
3709 if (!RHSReg)
3710 return false;
3711
3712 if (VT == MVT::i32) {
3713 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3714 Register MulSubReg =
3715 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3716 // cmp xreg, wreg, sxtw
3717 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3718 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3719 /*WantResult=*/false);
3720 MulReg = MulSubReg;
3721 } else {
3722 assert(VT == MVT::i64 && "Unexpected value type.");
3723 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3724 // reused in the next instruction.
3725 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3726 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3727 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3728 /*WantResult=*/false);
3729 }
3730 break;
3731 }
3732 case Intrinsic::umul_with_overflow: {
3733 CC = AArch64CC::NE;
3734 Register LHSReg = getRegForValue(LHS);
3735 if (!LHSReg)
3736 return false;
3737
3738 Register RHSReg = getRegForValue(RHS);
3739 if (!RHSReg)
3740 return false;
3741
3742 if (VT == MVT::i32) {
3743 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3744 // tst xreg, #0xffffffff00000000
3745 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3746 TII.get(AArch64::ANDSXri), AArch64::XZR)
3747 .addReg(MulReg)
3748 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3749 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3750 } else {
3751 assert(VT == MVT::i64 && "Unexpected value type.");
3752 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3753 // reused in the next instruction.
3754 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3755 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3756 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3757 }
3758 break;
3759 }
3760 }
3761
3762 if (MulReg) {
3763 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3764 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3765 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3766 }
3767
3768 if (!ResultReg1)
3769 return false;
3770
3771 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3772 AArch64::WZR, AArch64::WZR,
3773 getInvertedCondCode(CC));
3774 (void)ResultReg2;
3775 assert((ResultReg1 + 1) == ResultReg2 &&
3776 "Nonconsecutive result registers.");
3777 updateValueMap(II, ResultReg1, 2);
3778 return true;
3779 }
3780 case Intrinsic::aarch64_crc32b:
3781 case Intrinsic::aarch64_crc32h:
3782 case Intrinsic::aarch64_crc32w:
3783 case Intrinsic::aarch64_crc32x:
3784 case Intrinsic::aarch64_crc32cb:
3785 case Intrinsic::aarch64_crc32ch:
3786 case Intrinsic::aarch64_crc32cw:
3787 case Intrinsic::aarch64_crc32cx: {
3788 if (!Subtarget->hasCRC())
3789 return false;
3790
3791 unsigned Opc;
3792 switch (II->getIntrinsicID()) {
3793 default:
3794 llvm_unreachable("Unexpected intrinsic!");
3795 case Intrinsic::aarch64_crc32b:
3796 Opc = AArch64::CRC32Brr;
3797 break;
3798 case Intrinsic::aarch64_crc32h:
3799 Opc = AArch64::CRC32Hrr;
3800 break;
3801 case Intrinsic::aarch64_crc32w:
3802 Opc = AArch64::CRC32Wrr;
3803 break;
3804 case Intrinsic::aarch64_crc32x:
3805 Opc = AArch64::CRC32Xrr;
3806 break;
3807 case Intrinsic::aarch64_crc32cb:
3808 Opc = AArch64::CRC32CBrr;
3809 break;
3810 case Intrinsic::aarch64_crc32ch:
3811 Opc = AArch64::CRC32CHrr;
3812 break;
3813 case Intrinsic::aarch64_crc32cw:
3814 Opc = AArch64::CRC32CWrr;
3815 break;
3816 case Intrinsic::aarch64_crc32cx:
3817 Opc = AArch64::CRC32CXrr;
3818 break;
3819 }
3820
3821 Register LHSReg = getRegForValue(II->getArgOperand(0));
3822 Register RHSReg = getRegForValue(II->getArgOperand(1));
3823 if (!LHSReg || !RHSReg)
3824 return false;
3825
3826 Register ResultReg =
3827 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3828 updateValueMap(II, ResultReg);
3829 return true;
3830 }
3831 }
3832 return false;
3833}
3834
3835bool AArch64FastISel::selectRet(const Instruction *I) {
3836 const ReturnInst *Ret = cast<ReturnInst>(I);
3837 const Function &F = *I->getParent()->getParent();
3838
3839 if (!FuncInfo.CanLowerReturn)
3840 return false;
3841
3842 if (F.isVarArg())
3843 return false;
3844
3845 if (TLI.supportSwiftError() &&
3846 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3847 return false;
3848
3849 if (TLI.supportSplitCSR(FuncInfo.MF))
3850 return false;
3851
3852 // Build a list of return value registers.
3854
3855 if (Ret->getNumOperands() > 0) {
3856 CallingConv::ID CC = F.getCallingConv();
3858 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3859
3860 // Analyze operands of the call, assigning locations to each operand.
3862 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3865 CCInfo.AnalyzeReturn(Outs, RetCC);
3866
3867 // Only handle a single return value for now.
3868 if (ValLocs.size() != 1)
3869 return false;
3870
3871 CCValAssign &VA = ValLocs[0];
3872 const Value *RV = Ret->getOperand(0);
3873
3874 // Don't bother handling odd stuff for now.
3875 if ((VA.getLocInfo() != CCValAssign::Full) &&
3876 (VA.getLocInfo() != CCValAssign::BCvt))
3877 return false;
3878
3879 // Only handle register returns for now.
3880 if (!VA.isRegLoc())
3881 return false;
3882
3883 Register Reg = getRegForValue(RV);
3884 if (Reg == 0)
3885 return false;
3886
3887 unsigned SrcReg = Reg + VA.getValNo();
3888 Register DestReg = VA.getLocReg();
3889 // Avoid a cross-class copy. This is very unlikely.
3890 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3891 return false;
3892
3893 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3894 if (!RVEVT.isSimple())
3895 return false;
3896
3897 // Vectors (of > 1 lane) in big endian need tricky handling.
3898 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3899 !Subtarget->isLittleEndian())
3900 return false;
3901
3902 MVT RVVT = RVEVT.getSimpleVT();
3903 if (RVVT == MVT::f128)
3904 return false;
3905
3906 MVT DestVT = VA.getValVT();
3907 // Special handling for extended integers.
3908 if (RVVT != DestVT) {
3909 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3910 return false;
3911
3912 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3913 return false;
3914
3915 bool IsZExt = Outs[0].Flags.isZExt();
3916 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3917 if (SrcReg == 0)
3918 return false;
3919 }
3920
3921 // "Callee" (i.e. value producer) zero extends pointers at function
3922 // boundary.
3923 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3924 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3925
3926 // Make the copy.
3927 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3928 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3929
3930 // Add register to return instruction.
3931 RetRegs.push_back(VA.getLocReg());
3932 }
3933
3934 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3935 TII.get(AArch64::RET_ReallyLR));
3936 for (unsigned RetReg : RetRegs)
3937 MIB.addReg(RetReg, RegState::Implicit);
3938 return true;
3939}
3940
3941bool AArch64FastISel::selectTrunc(const Instruction *I) {
3942 Type *DestTy = I->getType();
3943 Value *Op = I->getOperand(0);
3944 Type *SrcTy = Op->getType();
3945
3946 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3947 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3948 if (!SrcEVT.isSimple())
3949 return false;
3950 if (!DestEVT.isSimple())
3951 return false;
3952
3953 MVT SrcVT = SrcEVT.getSimpleVT();
3954 MVT DestVT = DestEVT.getSimpleVT();
3955
3956 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3957 SrcVT != MVT::i8)
3958 return false;
3959 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3960 DestVT != MVT::i1)
3961 return false;
3962
3963 Register SrcReg = getRegForValue(Op);
3964 if (!SrcReg)
3965 return false;
3966
3967 // If we're truncating from i64 to a smaller non-legal type then generate an
3968 // AND. Otherwise, we know the high bits are undefined and a truncate only
3969 // generate a COPY. We cannot mark the source register also as result
3970 // register, because this can incorrectly transfer the kill flag onto the
3971 // source register.
3972 unsigned ResultReg;
3973 if (SrcVT == MVT::i64) {
3974 uint64_t Mask = 0;
3975 switch (DestVT.SimpleTy) {
3976 default:
3977 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3978 return false;
3979 case MVT::i1:
3980 Mask = 0x1;
3981 break;
3982 case MVT::i8:
3983 Mask = 0xff;
3984 break;
3985 case MVT::i16:
3986 Mask = 0xffff;
3987 break;
3988 }
3989 // Issue an extract_subreg to get the lower 32-bits.
3990 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3991 AArch64::sub_32);
3992 // Create the AND instruction which performs the actual truncation.
3993 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3994 assert(ResultReg && "Unexpected AND instruction emission failure.");
3995 } else {
3996 ResultReg = createResultReg(&AArch64::GPR32RegClass);
3997 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3998 TII.get(TargetOpcode::COPY), ResultReg)
3999 .addReg(SrcReg);
4000 }
4001
4002 updateValueMap(I, ResultReg);
4003 return true;
4004}
4005
4006unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4007 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4008 DestVT == MVT::i64) &&
4009 "Unexpected value type.");
4010 // Handle i8 and i16 as i32.
4011 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4012 DestVT = MVT::i32;
4013
4014 if (IsZExt) {
4015 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4016 assert(ResultReg && "Unexpected AND instruction emission failure.");
4017 if (DestVT == MVT::i64) {
4018 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4019 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4020 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4021 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4022 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4023 .addImm(0)
4024 .addReg(ResultReg)
4025 .addImm(AArch64::sub_32);
4026 ResultReg = Reg64;
4027 }
4028 return ResultReg;
4029 } else {
4030 if (DestVT == MVT::i64) {
4031 // FIXME: We're SExt i1 to i64.
4032 return 0;
4033 }
4034 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4035 0, 0);
4036 }
4037}
4038
4039unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4040 unsigned Opc, ZReg;
4041 switch (RetVT.SimpleTy) {
4042 default: return 0;
4043 case MVT::i8:
4044 case MVT::i16:
4045 case MVT::i32:
4046 RetVT = MVT::i32;
4047 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4048 case MVT::i64:
4049 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4050 }
4051
4052 const TargetRegisterClass *RC =
4053 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4054 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4055}
4056
4057unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4058 if (RetVT != MVT::i64)
4059 return 0;
4060
4061 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4062 Op0, Op1, AArch64::XZR);
4063}
4064
4065unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4066 if (RetVT != MVT::i64)
4067 return 0;
4068
4069 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4070 Op0, Op1, AArch64::XZR);
4071}
4072
4073unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4074 unsigned Op1Reg) {
4075 unsigned Opc = 0;
4076 bool NeedTrunc = false;
4077 uint64_t Mask = 0;
4078 switch (RetVT.SimpleTy) {
4079 default: return 0;
4080 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4081 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4082 case MVT::i32: Opc = AArch64::LSLVWr; break;
4083 case MVT::i64: Opc = AArch64::LSLVXr; break;
4084 }
4085
4086 const TargetRegisterClass *RC =
4087 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4088 if (NeedTrunc)
4089 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4090
4091 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4092 if (NeedTrunc)
4093 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4094 return ResultReg;
4095}
4096
4097unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4098 uint64_t Shift, bool IsZExt) {
4099 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4100 "Unexpected source/return type pair.");
4101 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4102 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4103 "Unexpected source value type.");
4104 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4105 RetVT == MVT::i64) && "Unexpected return value type.");
4106
4107 bool Is64Bit = (RetVT == MVT::i64);
4108 unsigned RegSize = Is64Bit ? 64 : 32;
4109 unsigned DstBits = RetVT.getSizeInBits();
4110 unsigned SrcBits = SrcVT.getSizeInBits();
4111 const TargetRegisterClass *RC =
4112 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4113
4114 // Just emit a copy for "zero" shifts.
4115 if (Shift == 0) {
4116 if (RetVT == SrcVT) {
4117 Register ResultReg = createResultReg(RC);
4118 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4119 TII.get(TargetOpcode::COPY), ResultReg)
4120 .addReg(Op0);
4121 return ResultReg;
4122 } else
4123 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4124 }
4125
4126 // Don't deal with undefined shifts.
4127 if (Shift >= DstBits)
4128 return 0;
4129
4130 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4131 // {S|U}BFM Wd, Wn, #r, #s
4132 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4133
4134 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4135 // %2 = shl i16 %1, 4
4136 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4137 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4138 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4139 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4140
4141 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4142 // %2 = shl i16 %1, 8
4143 // Wd<32+7-24,32-24> = Wn<7:0>
4144 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4145 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4146 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4147
4148 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4149 // %2 = shl i16 %1, 12
4150 // Wd<32+3-20,32-20> = Wn<3:0>
4151 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4152 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4153 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4154
4155 unsigned ImmR = RegSize - Shift;
4156 // Limit the width to the length of the source type.
4157 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4158 static const unsigned OpcTable[2][2] = {
4159 {AArch64::SBFMWri, AArch64::SBFMXri},
4160 {AArch64::UBFMWri, AArch64::UBFMXri}
4161 };
4162 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4163 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4164 Register TmpReg = MRI.createVirtualRegister(RC);
4165 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4166 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4167 .addImm(0)
4168 .addReg(Op0)
4169 .addImm(AArch64::sub_32);
4170 Op0 = TmpReg;
4171 }
4172 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4173}
4174
4175unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4176 unsigned Op1Reg) {
4177 unsigned Opc = 0;
4178 bool NeedTrunc = false;
4179 uint64_t Mask = 0;
4180 switch (RetVT.SimpleTy) {
4181 default: return 0;
4182 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4183 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4184 case MVT::i32: Opc = AArch64::LSRVWr; break;
4185 case MVT::i64: Opc = AArch64::LSRVXr; break;
4186 }
4187
4188 const TargetRegisterClass *RC =
4189 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4190 if (NeedTrunc) {
4191 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4192 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4193 }
4194 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4195 if (NeedTrunc)
4196 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4197 return ResultReg;
4198}
4199
4200unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4201 uint64_t Shift, bool IsZExt) {
4202 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4203 "Unexpected source/return type pair.");
4204 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4205 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4206 "Unexpected source value type.");
4207 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4208 RetVT == MVT::i64) && "Unexpected return value type.");
4209
4210 bool Is64Bit = (RetVT == MVT::i64);
4211 unsigned RegSize = Is64Bit ? 64 : 32;
4212 unsigned DstBits = RetVT.getSizeInBits();
4213 unsigned SrcBits = SrcVT.getSizeInBits();
4214 const TargetRegisterClass *RC =
4215 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4216
4217 // Just emit a copy for "zero" shifts.
4218 if (Shift == 0) {
4219 if (RetVT == SrcVT) {
4220 Register ResultReg = createResultReg(RC);
4221 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4222 TII.get(TargetOpcode::COPY), ResultReg)
4223 .addReg(Op0);
4224 return ResultReg;
4225 } else
4226 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4227 }
4228
4229 // Don't deal with undefined shifts.
4230 if (Shift >= DstBits)
4231 return 0;
4232
4233 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4234 // {S|U}BFM Wd, Wn, #r, #s
4235 // Wd<s-r:0> = Wn<s:r> when r <= s
4236
4237 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4238 // %2 = lshr i16 %1, 4
4239 // Wd<7-4:0> = Wn<7:4>
4240 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4241 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4242 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4243
4244 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4245 // %2 = lshr i16 %1, 8
4246 // Wd<7-7,0> = Wn<7:7>
4247 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4248 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4249 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4250
4251 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4252 // %2 = lshr i16 %1, 12
4253 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4254 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4255 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4256 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4257
4258 if (Shift >= SrcBits && IsZExt)
4259 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4260
4261 // It is not possible to fold a sign-extend into the LShr instruction. In this
4262 // case emit a sign-extend.
4263 if (!IsZExt) {
4264 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4265 if (!Op0)
4266 return 0;
4267 SrcVT = RetVT;
4268 SrcBits = SrcVT.getSizeInBits();
4269 IsZExt = true;
4270 }
4271
4272 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4273 unsigned ImmS = SrcBits - 1;
4274 static const unsigned OpcTable[2][2] = {
4275 {AArch64::SBFMWri, AArch64::SBFMXri},
4276 {AArch64::UBFMWri, AArch64::UBFMXri}
4277 };
4278 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4279 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4280 Register TmpReg = MRI.createVirtualRegister(RC);
4281 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4282 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4283 .addImm(0)
4284 .addReg(Op0)
4285 .addImm(AArch64::sub_32);
4286 Op0 = TmpReg;
4287 }
4288 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4289}
4290
4291unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4292 unsigned Op1Reg) {
4293 unsigned Opc = 0;
4294 bool NeedTrunc = false;
4295 uint64_t Mask = 0;
4296 switch (RetVT.SimpleTy) {
4297 default: return 0;
4298 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4299 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4300 case MVT::i32: Opc = AArch64::ASRVWr; break;
4301 case MVT::i64: Opc = AArch64::ASRVXr; break;
4302 }
4303
4304 const TargetRegisterClass *RC =
4305 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4306 if (NeedTrunc) {
4307 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4308 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4309 }
4310 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4311 if (NeedTrunc)
4312 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4313 return ResultReg;
4314}
4315
4316unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4317 uint64_t Shift, bool IsZExt) {
4318 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4319 "Unexpected source/return type pair.");
4320 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4321 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4322 "Unexpected source value type.");
4323 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4324 RetVT == MVT::i64) && "Unexpected return value type.");
4325
4326 bool Is64Bit = (RetVT == MVT::i64);
4327 unsigned RegSize = Is64Bit ? 64 : 32;
4328 unsigned DstBits = RetVT.getSizeInBits();
4329 unsigned SrcBits = SrcVT.getSizeInBits();
4330 const TargetRegisterClass *RC =
4331 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4332
4333 // Just emit a copy for "zero" shifts.
4334 if (Shift == 0) {
4335 if (RetVT == SrcVT) {
4336 Register ResultReg = createResultReg(RC);
4337 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4338 TII.get(TargetOpcode::COPY), ResultReg)
4339 .addReg(Op0);
4340 return ResultReg;
4341 } else
4342 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4343 }
4344
4345 // Don't deal with undefined shifts.
4346 if (Shift >= DstBits)
4347 return 0;
4348
4349 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4350 // {S|U}BFM Wd, Wn, #r, #s
4351 // Wd<s-r:0> = Wn<s:r> when r <= s
4352
4353 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4354 // %2 = ashr i16 %1, 4
4355 // Wd<7-4:0> = Wn<7:4>
4356 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4357 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4358 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4359
4360 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4361 // %2 = ashr i16 %1, 8
4362 // Wd<7-7,0> = Wn<7:7>
4363 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4364 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4365 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4366
4367 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4368 // %2 = ashr i16 %1, 12
4369 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4370 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4371 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4372 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4373
4374 if (Shift >= SrcBits && IsZExt)
4375 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4376
4377 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4378 unsigned ImmS = SrcBits - 1;
4379 static const unsigned OpcTable[2][2] = {
4380 {AArch64::SBFMWri, AArch64::SBFMXri},
4381 {AArch64::UBFMWri, AArch64::UBFMXri}
4382 };
4383 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4384 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4385 Register TmpReg = MRI.createVirtualRegister(RC);
4386 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4387 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4388 .addImm(0)
4389 .addReg(Op0)
4390 .addImm(AArch64::sub_32);
4391 Op0 = TmpReg;
4392 }
4393 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4394}
4395
4396unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4397 bool IsZExt) {
4398 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4399
4400 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4401 // DestVT are odd things, so test to make sure that they are both types we can
4402 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4403 // bail out to SelectionDAG.
4404 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4405 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4406 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4407 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4408 return 0;
4409
4410 unsigned Opc;
4411 unsigned Imm = 0;
4412
4413 switch (SrcVT.SimpleTy) {
4414 default:
4415 return 0;
4416 case MVT::i1:
4417 return emiti1Ext(SrcReg, DestVT, IsZExt);
4418 case MVT::i8:
4419 if (DestVT == MVT::i64)
4420 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4421 else
4422 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4423 Imm = 7;
4424 break;
4425 case MVT::i16:
4426 if (DestVT == MVT::i64)
4427 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4428 else
4429 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4430 Imm = 15;
4431 break;
4432 case MVT::i32:
4433 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4434 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4435 Imm = 31;
4436 break;
4437 }
4438
4439 // Handle i8 and i16 as i32.
4440 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4441 DestVT = MVT::i32;
4442 else if (DestVT == MVT::i64) {
4443 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4444 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4445 TII.get(AArch64::SUBREG_TO_REG), Src64)
4446 .addImm(0)
4447 .addReg(SrcReg)
4448 .addImm(AArch64::sub_32);
4449 SrcReg = Src64;
4450 }
4451
4452 const TargetRegisterClass *RC =
4453 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4454 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4455}
4456
4457static bool isZExtLoad(const MachineInstr *LI) {
4458 switch (LI->getOpcode()) {
4459 default:
4460 return false;
4461 case AArch64::LDURBBi:
4462 case AArch64::LDURHHi:
4463 case AArch64::LDURWi:
4464 case AArch64::LDRBBui:
4465 case AArch64::LDRHHui:
4466 case AArch64::LDRWui:
4467 case AArch64::LDRBBroX:
4468 case AArch64::LDRHHroX:
4469 case AArch64::LDRWroX:
4470 case AArch64::LDRBBroW:
4471 case AArch64::LDRHHroW:
4472 case AArch64::LDRWroW:
4473 return true;
4474 }
4475}
4476
4477static bool isSExtLoad(const MachineInstr *LI) {
4478 switch (LI->getOpcode()) {
4479 default:
4480 return false;
4481 case AArch64::LDURSBWi:
4482 case AArch64::LDURSHWi:
4483 case AArch64::LDURSBXi:
4484 case AArch64::LDURSHXi:
4485 case AArch64::LDURSWi:
4486 case AArch64::LDRSBWui:
4487 case AArch64::LDRSHWui:
4488 case AArch64::LDRSBXui:
4489 case AArch64::LDRSHXui:
4490 case AArch64::LDRSWui:
4491 case AArch64::LDRSBWroX:
4492 case AArch64::LDRSHWroX:
4493 case AArch64::LDRSBXroX:
4494 case AArch64::LDRSHXroX:
4495 case AArch64::LDRSWroX:
4496 case AArch64::LDRSBWroW:
4497 case AArch64::LDRSHWroW:
4498 case AArch64::LDRSBXroW:
4499 case AArch64::LDRSHXroW:
4500 case AArch64::LDRSWroW:
4501 return true;
4502 }
4503}
4504
4505bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4506 MVT SrcVT) {
4507 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4508 if (!LI || !LI->hasOneUse())
4509 return false;
4510
4511 // Check if the load instruction has already been selected.
4512 Register Reg = lookUpRegForValue(LI);
4513 if (!Reg)
4514 return false;
4515
4516 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4517 if (!MI)
4518 return false;
4519
4520 // Check if the correct load instruction has been emitted - SelectionDAG might
4521 // have emitted a zero-extending load, but we need a sign-extending load.
4522 bool IsZExt = isa<ZExtInst>(I);
4523 const auto *LoadMI = MI;
4524 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4525 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4526 Register LoadReg = MI->getOperand(1).getReg();
4527 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4528 assert(LoadMI && "Expected valid instruction");
4529 }
4530 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4531 return false;
4532
4533 // Nothing to be done.
4534 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4535 updateValueMap(I, Reg);
4536 return true;
4537 }
4538
4539 if (IsZExt) {
4540 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4541 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4542 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4543 .addImm(0)
4544 .addReg(Reg, getKillRegState(true))
4545 .addImm(AArch64::sub_32);
4546 Reg = Reg64;
4547 } else {
4548 assert((MI->getOpcode() == TargetOpcode::COPY &&
4549 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4550 "Expected copy instruction");
4551 Reg = MI->getOperand(1).getReg();
4553 removeDeadCode(I, std::next(I));
4554 }
4555 updateValueMap(I, Reg);
4556 return true;
4557}
4558
4559bool AArch64FastISel::selectIntExt(const Instruction *I) {
4560 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4561 "Unexpected integer extend instruction.");
4562 MVT RetVT;
4563 MVT SrcVT;
4564 if (!isTypeSupported(I->getType(), RetVT))
4565 return false;
4566
4567 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4568 return false;
4569
4570 // Try to optimize already sign-/zero-extended values from load instructions.
4571 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4572 return true;
4573
4574 Register SrcReg = getRegForValue(I->getOperand(0));
4575 if (!SrcReg)
4576 return false;
4577
4578 // Try to optimize already sign-/zero-extended values from function arguments.
4579 bool IsZExt = isa<ZExtInst>(I);
4580 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4581 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4582 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4583 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4584 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4585 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4586 .addImm(0)
4587 .addReg(SrcReg)
4588 .addImm(AArch64::sub_32);
4589 SrcReg = ResultReg;
4590 }
4591
4592 updateValueMap(I, SrcReg);
4593 return true;
4594 }
4595 }
4596
4597 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4598 if (!ResultReg)
4599 return false;
4600
4601 updateValueMap(I, ResultReg);
4602 return true;
4603}
4604
4605bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4606 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4607 if (!DestEVT.isSimple())
4608 return false;
4609
4610 MVT DestVT = DestEVT.getSimpleVT();
4611 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4612 return false;
4613
4614 unsigned DivOpc;
4615 bool Is64bit = (DestVT == MVT::i64);
4616 switch (ISDOpcode) {
4617 default:
4618 return false;
4619 case ISD::SREM:
4620 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4621 break;
4622 case ISD::UREM:
4623 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4624 break;
4625 }
4626 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4627 Register Src0Reg = getRegForValue(I->getOperand(0));
4628 if (!Src0Reg)
4629 return false;
4630
4631 Register Src1Reg = getRegForValue(I->getOperand(1));
4632 if (!Src1Reg)
4633 return false;
4634
4635 const TargetRegisterClass *RC =
4636 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4637 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4638 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4639 // The remainder is computed as numerator - (quotient * denominator) using the
4640 // MSUB instruction.
4641 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4642 updateValueMap(I, ResultReg);
4643 return true;
4644}
4645
4646bool AArch64FastISel::selectMul(const Instruction *I) {
4647 MVT VT;
4648 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4649 return false;
4650
4651 if (VT.isVector())
4652 return selectBinaryOp(I, ISD::MUL);
4653
4654 const Value *Src0 = I->getOperand(0);
4655 const Value *Src1 = I->getOperand(1);
4656 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4657 if (C->getValue().isPowerOf2())
4658 std::swap(Src0, Src1);
4659
4660 // Try to simplify to a shift instruction.
4661 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4662 if (C->getValue().isPowerOf2()) {
4663 uint64_t ShiftVal = C->getValue().logBase2();
4664 MVT SrcVT = VT;
4665 bool IsZExt = true;
4666 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4667 if (!isIntExtFree(ZExt)) {
4668 MVT VT;
4669 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4670 SrcVT = VT;
4671 IsZExt = true;
4672 Src0 = ZExt->getOperand(0);
4673 }
4674 }
4675 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4676 if (!isIntExtFree(SExt)) {
4677 MVT VT;
4678 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4679 SrcVT = VT;
4680 IsZExt = false;
4681 Src0 = SExt->getOperand(0);
4682 }
4683 }
4684 }
4685
4686 Register Src0Reg = getRegForValue(Src0);
4687 if (!Src0Reg)
4688 return false;
4689
4690 unsigned ResultReg =
4691 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4692
4693 if (ResultReg) {
4694 updateValueMap(I, ResultReg);
4695 return true;
4696 }
4697 }
4698
4699 Register Src0Reg = getRegForValue(I->getOperand(0));
4700 if (!Src0Reg)
4701 return false;
4702
4703 Register Src1Reg = getRegForValue(I->getOperand(1));
4704 if (!Src1Reg)
4705 return false;
4706
4707 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4708
4709 if (!ResultReg)
4710 return false;
4711
4712 updateValueMap(I, ResultReg);
4713 return true;
4714}
4715
4716bool AArch64FastISel::selectShift(const Instruction *I) {
4717 MVT RetVT;
4718 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4719 return false;
4720
4721 if (RetVT.isVector())
4722 return selectOperator(I, I->getOpcode());
4723
4724 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4725 unsigned ResultReg = 0;
4726 uint64_t ShiftVal = C->getZExtValue();
4727 MVT SrcVT = RetVT;
4728 bool IsZExt = I->getOpcode() != Instruction::AShr;
4729 const Value *Op0 = I->getOperand(0);
4730 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4731 if (!isIntExtFree(ZExt)) {
4732 MVT TmpVT;
4733 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4734 SrcVT = TmpVT;
4735 IsZExt = true;
4736 Op0 = ZExt->getOperand(0);
4737 }
4738 }
4739 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4740 if (!isIntExtFree(SExt)) {
4741 MVT TmpVT;
4742 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4743 SrcVT = TmpVT;
4744 IsZExt = false;
4745 Op0 = SExt->getOperand(0);
4746 }
4747 }
4748 }
4749
4750 Register Op0Reg = getRegForValue(Op0);
4751 if (!Op0Reg)
4752 return false;
4753
4754 switch (I->getOpcode()) {
4755 default: llvm_unreachable("Unexpected instruction.");
4756 case Instruction::Shl:
4757 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4758 break;
4759 case Instruction::AShr:
4760 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4761 break;
4762 case Instruction::LShr:
4763 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4764 break;
4765 }
4766 if (!ResultReg)
4767 return false;
4768
4769 updateValueMap(I, ResultReg);
4770 return true;
4771 }
4772
4773 Register Op0Reg = getRegForValue(I->getOperand(0));
4774 if (!Op0Reg)
4775 return false;
4776
4777 Register Op1Reg = getRegForValue(I->getOperand(1));
4778 if (!Op1Reg)
4779 return false;
4780
4781 unsigned ResultReg = 0;
4782 switch (I->getOpcode()) {
4783 default: llvm_unreachable("Unexpected instruction.");
4784 case Instruction::Shl:
4785 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4786 break;
4787 case Instruction::AShr:
4788 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4789 break;
4790 case Instruction::LShr:
4791 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4792 break;
4793 }
4794
4795 if (!ResultReg)
4796 return false;
4797
4798 updateValueMap(I, ResultReg);
4799 return true;
4800}
4801
4802bool AArch64FastISel::selectBitCast(const Instruction *I) {
4803 MVT RetVT, SrcVT;
4804
4805 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4806 return false;
4807 if (!isTypeLegal(I->getType(), RetVT))
4808 return false;
4809
4810 unsigned Opc;
4811 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4812 Opc = AArch64::FMOVWSr;
4813 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4814 Opc = AArch64::FMOVXDr;
4815 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4816 Opc = AArch64::FMOVSWr;
4817 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4818 Opc = AArch64::FMOVDXr;
4819 else
4820 return false;
4821
4822 const TargetRegisterClass *RC = nullptr;
4823 switch (RetVT.SimpleTy) {
4824 default: llvm_unreachable("Unexpected value type.");
4825 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4826 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4827 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4828 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4829 }
4830 Register Op0Reg = getRegForValue(I->getOperand(0));
4831 if (!Op0Reg)
4832 return false;
4833
4834 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4835 if (!ResultReg)
4836 return false;
4837
4838 updateValueMap(I, ResultReg);
4839 return true;
4840}
4841
4842bool AArch64FastISel::selectFRem(const Instruction *I) {
4843 MVT RetVT;
4844 if (!isTypeLegal(I->getType(), RetVT))
4845 return false;
4846
4847 RTLIB::Libcall LC;
4848 switch (RetVT.SimpleTy) {
4849 default:
4850 return false;
4851 case MVT::f32:
4852 LC = RTLIB::REM_F32;
4853 break;
4854 case MVT::f64:
4855 LC = RTLIB::REM_F64;
4856 break;
4857 }
4858
4859 ArgListTy Args;
4860 Args.reserve(I->getNumOperands());
4861
4862 // Populate the argument list.
4863 for (auto &Arg : I->operands()) {
4864 ArgListEntry Entry;
4865 Entry.Val = Arg;
4866 Entry.Ty = Arg->getType();
4867 Args.push_back(Entry);
4868 }
4869
4870 CallLoweringInfo CLI;
4871 MCContext &Ctx = MF->getContext();
4872 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4873 TLI.getLibcallName(LC), std::move(Args));
4874 if (!lowerCallTo(CLI))
4875 return false;
4876 updateValueMap(I, CLI.ResultReg);
4877 return true;
4878}
4879
4880bool AArch64FastISel::selectSDiv(const Instruction *I) {
4881 MVT VT;
4882 if (!isTypeLegal(I->getType(), VT))
4883 return false;
4884
4885 if (!isa<ConstantInt>(I->getOperand(1)))
4886 return selectBinaryOp(I, ISD::SDIV);
4887
4888 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4889 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4890 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4891 return selectBinaryOp(I, ISD::SDIV);
4892
4893 unsigned Lg2 = C.countr_zero();
4894 Register Src0Reg = getRegForValue(I->getOperand(0));
4895 if (!Src0Reg)
4896 return false;
4897
4898 if (cast<BinaryOperator>(I)->isExact()) {
4899 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4900 if (!ResultReg)
4901 return false;
4902 updateValueMap(I, ResultReg);
4903 return true;
4904 }
4905
4906 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4907 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4908 if (!AddReg)
4909 return false;
4910
4911 // (Src0 < 0) ? Pow2 - 1 : 0;
4912 if (!emitICmp_ri(VT, Src0Reg, 0))
4913 return false;
4914
4915 unsigned SelectOpc;
4916 const TargetRegisterClass *RC;
4917 if (VT == MVT::i64) {
4918 SelectOpc = AArch64::CSELXr;
4919 RC = &AArch64::GPR64RegClass;
4920 } else {
4921 SelectOpc = AArch64::CSELWr;
4922 RC = &AArch64::GPR32RegClass;
4923 }
4924 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4926 if (!SelectReg)
4927 return false;
4928
4929 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4930 // negate the result.
4931 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4932 unsigned ResultReg;
4933 if (C.isNegative())
4934 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4935 AArch64_AM::ASR, Lg2);
4936 else
4937 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4938
4939 if (!ResultReg)
4940 return false;
4941
4942 updateValueMap(I, ResultReg);
4943 return true;
4944}
4945
4946/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4947/// have to duplicate it for AArch64, because otherwise we would fail during the
4948/// sign-extend emission.
4949unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4950 Register IdxN = getRegForValue(Idx);
4951 if (IdxN == 0)
4952 // Unhandled operand. Halt "fast" selection and bail.
4953 return 0;
4954
4955 // If the index is smaller or larger than intptr_t, truncate or extend it.
4956 MVT PtrVT = TLI.getPointerTy(DL);
4957 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4958 if (IdxVT.bitsLT(PtrVT)) {
4959 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4960 } else if (IdxVT.bitsGT(PtrVT))
4961 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4962 return IdxN;
4963}
4964
4965/// This is mostly a copy of the existing FastISel GEP code, but we have to
4966/// duplicate it for AArch64, because otherwise we would bail out even for
4967/// simple cases. This is because the standard fastEmit functions don't cover
4968/// MUL at all and ADD is lowered very inefficientily.
4969bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4970 if (Subtarget->isTargetILP32())
4971 return false;
4972
4973 Register N = getRegForValue(I->getOperand(0));
4974 if (!N)
4975 return false;
4976
4977 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4978 // into a single N = N + TotalOffset.
4979 uint64_t TotalOffs = 0;
4980 MVT VT = TLI.getPointerTy(DL);
4982 GTI != E; ++GTI) {
4983 const Value *Idx = GTI.getOperand();
4984 if (auto *StTy = GTI.getStructTypeOrNull()) {
4985 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4986 // N = N + Offset
4987 if (Field)
4988 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4989 } else {
4990 Type *Ty = GTI.getIndexedType();
4991
4992 // If this is a constant subscript, handle it quickly.
4993 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4994 if (CI->isZero())
4995 continue;
4996 // N = N + Offset
4997 TotalOffs +=
4998 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4999 continue;
5000 }
5001 if (TotalOffs) {
5002 N = emitAdd_ri_(VT, N, TotalOffs);
5003 if (!N)
5004 return false;
5005 TotalOffs = 0;
5006 }
5007
5008 // N = N + Idx * ElementSize;
5009 uint64_t ElementSize = DL.getTypeAllocSize(Ty);
5010 unsigned IdxN = getRegForGEPIndex(Idx);
5011 if (!IdxN)
5012 return false;
5013
5014 if (ElementSize != 1) {
5015 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5016 if (!C)
5017 return false;
5018 IdxN = emitMul_rr(VT, IdxN, C);
5019 if (!IdxN)
5020 return false;
5021 }
5022 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5023 if (!N)
5024 return false;
5025 }
5026 }
5027 if (TotalOffs) {
5028 N = emitAdd_ri_(VT, N, TotalOffs);
5029 if (!N)
5030 return false;
5031 }
5032 updateValueMap(I, N);
5033 return true;
5034}
5035
5036bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5037 assert(TM.getOptLevel() == CodeGenOpt::None &&
5038 "cmpxchg survived AtomicExpand at optlevel > -O0");
5039
5040 auto *RetPairTy = cast<StructType>(I->getType());
5041 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5042 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5043 "cmpxchg has a non-i1 status result");
5044
5045 MVT VT;
5046 if (!isTypeLegal(RetTy, VT))
5047 return false;
5048
5049 const TargetRegisterClass *ResRC;
5050 unsigned Opc, CmpOpc;
5051 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5052 // extractvalue selection doesn't support that.
5053 if (VT == MVT::i32) {
5054 Opc = AArch64::CMP_SWAP_32;
5055 CmpOpc = AArch64::SUBSWrs;
5056 ResRC = &AArch64::GPR32RegClass;
5057 } else if (VT == MVT::i64) {
5058 Opc = AArch64::CMP_SWAP_64;
5059 CmpOpc = AArch64::SUBSXrs;
5060 ResRC = &AArch64::GPR64RegClass;
5061 } else {
5062 return false;
5063 }
5064
5065 const MCInstrDesc &II = TII.get(Opc);
5066
5067 const Register AddrReg = constrainOperandRegClass(
5068 II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5069 const Register DesiredReg = constrainOperandRegClass(
5070 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5071 const Register NewReg = constrainOperandRegClass(
5072 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5073
5074 const Register ResultReg1 = createResultReg(ResRC);
5075 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5076 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5077
5078 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5079 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5080 .addDef(ResultReg1)
5081 .addDef(ScratchReg)
5082 .addUse(AddrReg)
5083 .addUse(DesiredReg)
5084 .addUse(NewReg);
5085
5086 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5087 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5088 .addUse(ResultReg1)
5089 .addUse(DesiredReg)
5090 .addImm(0);
5091
5092 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5093 .addDef(ResultReg2)
5094 .addUse(AArch64::WZR)
5095 .addUse(AArch64::WZR)
5097
5098 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5099 updateValueMap(I, ResultReg1, 2);
5100 return true;
5101}
5102
5103bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5104 if (TLI.fallBackToDAGISel(*I))
5105 return false;
5106 switch (I->getOpcode()) {
5107 default:
5108 break;
5109 case Instruction::Add:
5110 case Instruction::Sub:
5111 return selectAddSub(I);
5112 case Instruction::Mul:
5113 return selectMul(I);
5114 case Instruction::SDiv:
5115 return selectSDiv(I);
5116 case Instruction::SRem:
5117 if (!selectBinaryOp(I, ISD::SREM))
5118 return selectRem(I, ISD::SREM);
5119 return true;
5120 case Instruction::URem:
5121 if (!selectBinaryOp(I, ISD::UREM))
5122 return selectRem(I, ISD::UREM);
5123 return true;
5124 case Instruction::Shl:
5125 case Instruction::LShr:
5126 case Instruction::AShr:
5127 return selectShift(I);
5128 case Instruction::And:
5129 case Instruction::Or:
5130 case Instruction::Xor:
5131 return selectLogicalOp(I);
5132 case Instruction::Br:
5133 return selectBranch(I);
5134 case Instruction::IndirectBr:
5135 return selectIndirectBr(I);
5136 case Instruction::BitCast:
5138 return selectBitCast(I);
5139 return true;
5140 case Instruction::FPToSI:
5141 if (!selectCast(I, ISD::FP_TO_SINT))
5142 return selectFPToInt(I, /*Signed=*/true);
5143 return true;
5144 case Instruction::FPToUI:
5145 return selectFPToInt(I, /*Signed=*/false);
5146 case Instruction::ZExt:
5147 case Instruction::SExt:
5148 return selectIntExt(I);
5149 case Instruction::Trunc:
5150 if (!selectCast(I, ISD::TRUNCATE))
5151 return selectTrunc(I);
5152 return true;
5153 case Instruction::FPExt:
5154 return selectFPExt(I);
5155 case Instruction::FPTrunc:
5156 return selectFPTrunc(I);
5157 case Instruction::SIToFP:
5158 if (!selectCast(I, ISD::SINT_TO_FP))
5159 return selectIntToFP(I, /*Signed=*/true);
5160 return true;
5161 case Instruction::UIToFP:
5162 return selectIntToFP(I, /*Signed=*/false);
5163