LLVM 19.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/DenseMap.h"
41#include "llvm/IR/Argument.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/Constants.h"
47#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/Operator.h"
59#include "llvm/IR/Type.h"
60#include "llvm/IR/User.h"
61#include "llvm/IR/Value.h"
62#include "llvm/MC/MCInstrDesc.h"
64#include "llvm/MC/MCSymbol.h"
71#include <algorithm>
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <utility>
76
77using namespace llvm;
78
79namespace {
80
81class AArch64FastISel final : public FastISel {
82 class Address {
83 public:
84 using BaseKind = enum {
85 RegBase,
86 FrameIndexBase
87 };
88
89 private:
90 BaseKind Kind = RegBase;
92 union {
93 unsigned Reg;
94 int FI;
95 } Base;
96 unsigned OffsetReg = 0;
97 unsigned Shift = 0;
98 int64_t Offset = 0;
99 const GlobalValue *GV = nullptr;
100
101 public:
102 Address() { Base.Reg = 0; }
103
104 void setKind(BaseKind K) { Kind = K; }
105 BaseKind getKind() const { return Kind; }
106 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
107 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
108 bool isRegBase() const { return Kind == RegBase; }
109 bool isFIBase() const { return Kind == FrameIndexBase; }
110
111 void setReg(unsigned Reg) {
112 assert(isRegBase() && "Invalid base register access!");
113 Base.Reg = Reg;
114 }
115
116 unsigned getReg() const {
117 assert(isRegBase() && "Invalid base register access!");
118 return Base.Reg;
119 }
120
121 void setOffsetReg(unsigned Reg) {
122 OffsetReg = Reg;
123 }
124
125 unsigned getOffsetReg() const {
126 return OffsetReg;
127 }
128
129 void setFI(unsigned FI) {
130 assert(isFIBase() && "Invalid base frame index access!");
131 Base.FI = FI;
132 }
133
134 unsigned getFI() const {
135 assert(isFIBase() && "Invalid base frame index access!");
136 return Base.FI;
137 }
138
139 void setOffset(int64_t O) { Offset = O; }
140 int64_t getOffset() { return Offset; }
141 void setShift(unsigned S) { Shift = S; }
142 unsigned getShift() { return Shift; }
143
144 void setGlobalValue(const GlobalValue *G) { GV = G; }
145 const GlobalValue *getGlobalValue() { return GV; }
146 };
147
148 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
149 /// make the right decision when generating code for different targets.
150 const AArch64Subtarget *Subtarget;
152
153 bool fastLowerArguments() override;
154 bool fastLowerCall(CallLoweringInfo &CLI) override;
155 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
156
157private:
158 // Selection routines.
159 bool selectAddSub(const Instruction *I);
160 bool selectLogicalOp(const Instruction *I);
161 bool selectLoad(const Instruction *I);
162 bool selectStore(const Instruction *I);
163 bool selectBranch(const Instruction *I);
164 bool selectIndirectBr(const Instruction *I);
165 bool selectCmp(const Instruction *I);
166 bool selectSelect(const Instruction *I);
167 bool selectFPExt(const Instruction *I);
168 bool selectFPTrunc(const Instruction *I);
169 bool selectFPToInt(const Instruction *I, bool Signed);
170 bool selectIntToFP(const Instruction *I, bool Signed);
171 bool selectRem(const Instruction *I, unsigned ISDOpcode);
172 bool selectRet(const Instruction *I);
173 bool selectTrunc(const Instruction *I);
174 bool selectIntExt(const Instruction *I);
175 bool selectMul(const Instruction *I);
176 bool selectShift(const Instruction *I);
177 bool selectBitCast(const Instruction *I);
178 bool selectFRem(const Instruction *I);
179 bool selectSDiv(const Instruction *I);
180 bool selectGetElementPtr(const Instruction *I);
181 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
182
183 // Utility helper routines.
184 bool isTypeLegal(Type *Ty, MVT &VT);
185 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
186 bool isValueAvailable(const Value *V) const;
187 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
188 bool computeCallAddress(const Value *V, Address &Addr);
189 bool simplifyAddress(Address &Addr, MVT VT);
190 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
192 unsigned ScaleFactor, MachineMemOperand *MMO);
193 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
194 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
195 MaybeAlign Alignment);
196 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
197 const Value *Cond);
198 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
199 bool optimizeSelect(const SelectInst *SI);
200 unsigned getRegForGEPIndex(const Value *Idx);
201
202 // Emit helper routines.
203 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
204 const Value *RHS, bool SetFlags = false,
205 bool WantResult = true, bool IsZExt = false);
206 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
207 unsigned RHSReg, bool SetFlags = false,
208 bool WantResult = true);
209 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
210 uint64_t Imm, bool SetFlags = false,
211 bool WantResult = true);
212 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
213 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
214 uint64_t ShiftImm, bool SetFlags = false,
215 bool WantResult = true);
216 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
218 uint64_t ShiftImm, bool SetFlags = false,
219 bool WantResult = true);
220
221 // Emit functions.
222 bool emitCompareAndBranch(const BranchInst *BI);
223 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
226 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228 MachineMemOperand *MMO = nullptr);
229 bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230 MachineMemOperand *MMO = nullptr);
231 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232 MachineMemOperand *MMO = nullptr);
233 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236 bool SetFlags = false, bool WantResult = true,
237 bool IsZExt = false);
238 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
239 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240 bool SetFlags = false, bool WantResult = true,
241 bool IsZExt = false);
242 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
243 bool WantResult = true);
244 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
245 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
246 bool WantResult = true);
247 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
248 const Value *RHS);
249 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
250 uint64_t Imm);
251 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252 unsigned RHSReg, uint64_t ShiftImm);
253 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
254 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
257 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
258 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
259 bool IsZExt = true);
260 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
261 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
262 bool IsZExt = true);
263 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
264 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
265 bool IsZExt = false);
266
267 unsigned materializeInt(const ConstantInt *CI, MVT VT);
268 unsigned materializeFP(const ConstantFP *CFP, MVT VT);
269 unsigned materializeGV(const GlobalValue *GV);
270
271 // Call handling routines.
272private:
273 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
274 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
275 unsigned &NumBytes);
276 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
277
278public:
279 // Backend specific FastISel code.
280 unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
281 unsigned fastMaterializeConstant(const Constant *C) override;
282 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
283
284 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
285 const TargetLibraryInfo *LibInfo)
286 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
287 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
288 Context = &FuncInfo.Fn->getContext();
289 }
290
291 bool fastSelectInstruction(const Instruction *I) override;
292
293#include "AArch64GenFastISel.inc"
294};
295
296} // end anonymous namespace
297
298/// Check if the sign-/zero-extend will be a noop.
299static bool isIntExtFree(const Instruction *I) {
300 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
301 "Unexpected integer extend instruction.");
302 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
303 "Unexpected value type.");
304 bool IsZExt = isa<ZExtInst>(I);
305
306 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
307 if (LI->hasOneUse())
308 return true;
309
310 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
311 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
312 return true;
313
314 return false;
315}
316
317/// Determine the implicit scale factor that is applied by a memory
318/// operation for a given value type.
319static unsigned getImplicitScaleFactor(MVT VT) {
320 switch (VT.SimpleTy) {
321 default:
322 return 0; // invalid
323 case MVT::i1: // fall-through
324 case MVT::i8:
325 return 1;
326 case MVT::i16:
327 return 2;
328 case MVT::i32: // fall-through
329 case MVT::f32:
330 return 4;
331 case MVT::i64: // fall-through
332 case MVT::f64:
333 return 8;
334 }
335}
336
337CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
338 if (CC == CallingConv::GHC)
339 return CC_AArch64_GHC;
342 if (Subtarget->isTargetDarwin())
344 if (Subtarget->isTargetWindows())
345 return CC_AArch64_Win64PCS;
346 return CC_AArch64_AAPCS;
347}
348
349unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
350 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
351 "Alloca should always return a pointer.");
352
353 // Don't handle dynamic allocas.
354 if (!FuncInfo.StaticAllocaMap.count(AI))
355 return 0;
356
358 FuncInfo.StaticAllocaMap.find(AI);
359
360 if (SI != FuncInfo.StaticAllocaMap.end()) {
361 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
362 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
363 ResultReg)
364 .addFrameIndex(SI->second)
365 .addImm(0)
366 .addImm(0);
367 return ResultReg;
368 }
369
370 return 0;
371}
372
373unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
374 if (VT > MVT::i64)
375 return 0;
376
377 if (!CI->isZero())
378 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
379
380 // Create a copy from the zero register to materialize a "0" value.
381 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
382 : &AArch64::GPR32RegClass;
383 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
384 Register ResultReg = createResultReg(RC);
385 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
386 ResultReg).addReg(ZeroReg, getKillRegState(true));
387 return ResultReg;
388}
389
390unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
391 // Positive zero (+0.0) has to be materialized with a fmov from the zero
392 // register, because the immediate version of fmov cannot encode zero.
393 if (CFP->isNullValue())
394 return fastMaterializeFloatZero(CFP);
395
396 if (VT != MVT::f32 && VT != MVT::f64)
397 return 0;
398
399 const APFloat Val = CFP->getValueAPF();
400 bool Is64Bit = (VT == MVT::f64);
401 // This checks to see if we can use FMOV instructions to materialize
402 // a constant, otherwise we have to materialize via the constant pool.
403 int Imm =
404 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
405 if (Imm != -1) {
406 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
407 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
408 }
409
410 // For the large code model materialize the FP constant in code.
411 if (TM.getCodeModel() == CodeModel::Large) {
412 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
413 const TargetRegisterClass *RC = Is64Bit ?
414 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
415
416 Register TmpReg = createResultReg(RC);
417 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
419
420 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
421 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
422 TII.get(TargetOpcode::COPY), ResultReg)
423 .addReg(TmpReg, getKillRegState(true));
424
425 return ResultReg;
426 }
427
428 // Materialize via constant pool. MachineConstantPool wants an explicit
429 // alignment.
430 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
431
432 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
433 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
434 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
436
437 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
438 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
439 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
440 .addReg(ADRPReg)
442 return ResultReg;
443}
444
445unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
446 // We can't handle thread-local variables quickly yet.
447 if (GV->isThreadLocal())
448 return 0;
449
450 // MachO still uses GOT for large code-model accesses, but ELF requires
451 // movz/movk sequences, which FastISel doesn't handle yet.
452 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
453 return 0;
454
455 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
456
457 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
458 if (!DestEVT.isSimple())
459 return 0;
460
461 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
462 unsigned ResultReg;
463
464 if (OpFlags & AArch64II::MO_GOT) {
465 // ADRP + LDRX
466 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
467 ADRPReg)
468 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
469
470 unsigned LdrOpc;
471 if (Subtarget->isTargetILP32()) {
472 ResultReg = createResultReg(&AArch64::GPR32RegClass);
473 LdrOpc = AArch64::LDRWui;
474 } else {
475 ResultReg = createResultReg(&AArch64::GPR64RegClass);
476 LdrOpc = AArch64::LDRXui;
477 }
478 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
479 ResultReg)
480 .addReg(ADRPReg)
482 AArch64II::MO_NC | OpFlags);
483 if (!Subtarget->isTargetILP32())
484 return ResultReg;
485
486 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
487 // so we must extend the result on ILP32.
488 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
489 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
490 TII.get(TargetOpcode::SUBREG_TO_REG))
491 .addDef(Result64)
492 .addImm(0)
493 .addReg(ResultReg, RegState::Kill)
494 .addImm(AArch64::sub_32);
495 return Result64;
496 } else {
497 // ADRP + ADDX
498 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
499 ADRPReg)
500 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
501
502 if (OpFlags & AArch64II::MO_TAGGED) {
503 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
504 // We do so by creating a MOVK that sets bits 48-63 of the register to
505 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
506 // the small code model so we can assume a binary size of <= 4GB, which
507 // makes the untagged PC relative offset positive. The binary must also be
508 // loaded into address range [0, 2^48). Both of these properties need to
509 // be ensured at runtime when using tagged addresses.
510 //
511 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
512 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
513 // are not exactly 1:1 with FastISel so we cannot easily abstract this
514 // out. At some point, it would be nice to find a way to not have this
515 // duplciate code.
516 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
517 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
518 DstReg)
519 .addReg(ADRPReg)
520 .addGlobalAddress(GV, /*Offset=*/0x100000000,
522 .addImm(48);
523 ADRPReg = DstReg;
524 }
525
526 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
527 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
528 ResultReg)
529 .addReg(ADRPReg)
530 .addGlobalAddress(GV, 0,
532 .addImm(0);
533 }
534 return ResultReg;
535}
536
537unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
538 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
539
540 // Only handle simple types.
541 if (!CEVT.isSimple())
542 return 0;
543 MVT VT = CEVT.getSimpleVT();
544 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
545 // 'null' pointers need to have a somewhat special treatment.
546 if (isa<ConstantPointerNull>(C)) {
547 assert(VT == MVT::i64 && "Expected 64-bit pointers");
548 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
549 }
550
551 if (const auto *CI = dyn_cast<ConstantInt>(C))
552 return materializeInt(CI, VT);
553 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
554 return materializeFP(CFP, VT);
555 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
556 return materializeGV(GV);
557
558 return 0;
559}
560
561unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
562 assert(CFP->isNullValue() &&
563 "Floating-point constant is not a positive zero.");
564 MVT VT;
565 if (!isTypeLegal(CFP->getType(), VT))
566 return 0;
567
568 if (VT != MVT::f32 && VT != MVT::f64)
569 return 0;
570
571 bool Is64Bit = (VT == MVT::f64);
572 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
573 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
574 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
575}
576
577/// Check if the multiply is by a power-of-2 constant.
578static bool isMulPowOf2(const Value *I) {
579 if (const auto *MI = dyn_cast<MulOperator>(I)) {
580 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
581 if (C->getValue().isPowerOf2())
582 return true;
583 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
584 if (C->getValue().isPowerOf2())
585 return true;
586 }
587 return false;
588}
589
590// Computes the address to get to an object.
591bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
592{
593 const User *U = nullptr;
594 unsigned Opcode = Instruction::UserOp1;
595 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
596 // Don't walk into other basic blocks unless the object is an alloca from
597 // another block, otherwise it may not have a virtual register assigned.
598 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
599 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
600 Opcode = I->getOpcode();
601 U = I;
602 }
603 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
604 Opcode = C->getOpcode();
605 U = C;
606 }
607
608 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
609 if (Ty->getAddressSpace() > 255)
610 // Fast instruction selection doesn't support the special
611 // address spaces.
612 return false;
613
614 switch (Opcode) {
615 default:
616 break;
617 case Instruction::BitCast:
618 // Look through bitcasts.
619 return computeAddress(U->getOperand(0), Addr, Ty);
620
621 case Instruction::IntToPtr:
622 // Look past no-op inttoptrs.
623 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
624 TLI.getPointerTy(DL))
625 return computeAddress(U->getOperand(0), Addr, Ty);
626 break;
627
628 case Instruction::PtrToInt:
629 // Look past no-op ptrtoints.
630 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
631 return computeAddress(U->getOperand(0), Addr, Ty);
632 break;
633
634 case Instruction::GetElementPtr: {
635 Address SavedAddr = Addr;
636 uint64_t TmpOffset = Addr.getOffset();
637
638 // Iterate through the GEP folding the constants into offsets where
639 // we can.
641 GTI != E; ++GTI) {
642 const Value *Op = GTI.getOperand();
643 if (StructType *STy = GTI.getStructTypeOrNull()) {
644 const StructLayout *SL = DL.getStructLayout(STy);
645 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
646 TmpOffset += SL->getElementOffset(Idx);
647 } else {
648 uint64_t S = GTI.getSequentialElementStride(DL);
649 while (true) {
650 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
651 // Constant-offset addressing.
652 TmpOffset += CI->getSExtValue() * S;
653 break;
654 }
655 if (canFoldAddIntoGEP(U, Op)) {
656 // A compatible add with a constant operand. Fold the constant.
657 ConstantInt *CI =
658 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
659 TmpOffset += CI->getSExtValue() * S;
660 // Iterate on the other operand.
661 Op = cast<AddOperator>(Op)->getOperand(0);
662 continue;
663 }
664 // Unsupported
665 goto unsupported_gep;
666 }
667 }
668 }
669
670 // Try to grab the base operand now.
671 Addr.setOffset(TmpOffset);
672 if (computeAddress(U->getOperand(0), Addr, Ty))
673 return true;
674
675 // We failed, restore everything and try the other options.
676 Addr = SavedAddr;
677
678 unsupported_gep:
679 break;
680 }
681 case Instruction::Alloca: {
682 const AllocaInst *AI = cast<AllocaInst>(Obj);
684 FuncInfo.StaticAllocaMap.find(AI);
685 if (SI != FuncInfo.StaticAllocaMap.end()) {
686 Addr.setKind(Address::FrameIndexBase);
687 Addr.setFI(SI->second);
688 return true;
689 }
690 break;
691 }
692 case Instruction::Add: {
693 // Adds of constants are common and easy enough.
694 const Value *LHS = U->getOperand(0);
695 const Value *RHS = U->getOperand(1);
696
697 if (isa<ConstantInt>(LHS))
698 std::swap(LHS, RHS);
699
700 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
701 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
702 return computeAddress(LHS, Addr, Ty);
703 }
704
705 Address Backup = Addr;
706 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
707 return true;
708 Addr = Backup;
709
710 break;
711 }
712 case Instruction::Sub: {
713 // Subs of constants are common and easy enough.
714 const Value *LHS = U->getOperand(0);
715 const Value *RHS = U->getOperand(1);
716
717 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
718 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
719 return computeAddress(LHS, Addr, Ty);
720 }
721 break;
722 }
723 case Instruction::Shl: {
724 if (Addr.getOffsetReg())
725 break;
726
727 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
728 if (!CI)
729 break;
730
731 unsigned Val = CI->getZExtValue();
732 if (Val < 1 || Val > 3)
733 break;
734
735 uint64_t NumBytes = 0;
736 if (Ty && Ty->isSized()) {
737 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
738 NumBytes = NumBits / 8;
739 if (!isPowerOf2_64(NumBits))
740 NumBytes = 0;
741 }
742
743 if (NumBytes != (1ULL << Val))
744 break;
745
746 Addr.setShift(Val);
747 Addr.setExtendType(AArch64_AM::LSL);
748
749 const Value *Src = U->getOperand(0);
750 if (const auto *I = dyn_cast<Instruction>(Src)) {
751 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
752 // Fold the zext or sext when it won't become a noop.
753 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
754 if (!isIntExtFree(ZE) &&
755 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
756 Addr.setExtendType(AArch64_AM::UXTW);
757 Src = ZE->getOperand(0);
758 }
759 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
760 if (!isIntExtFree(SE) &&
761 SE->getOperand(0)->getType()->isIntegerTy(32)) {
762 Addr.setExtendType(AArch64_AM::SXTW);
763 Src = SE->getOperand(0);
764 }
765 }
766 }
767 }
768
769 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
770 if (AI->getOpcode() == Instruction::And) {
771 const Value *LHS = AI->getOperand(0);
772 const Value *RHS = AI->getOperand(1);
773
774 if (const auto *C = dyn_cast<ConstantInt>(LHS))
775 if (C->getValue() == 0xffffffff)
776 std::swap(LHS, RHS);
777
778 if (const auto *C = dyn_cast<ConstantInt>(RHS))
779 if (C->getValue() == 0xffffffff) {
780 Addr.setExtendType(AArch64_AM::UXTW);
781 Register Reg = getRegForValue(LHS);
782 if (!Reg)
783 return false;
784 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
785 Addr.setOffsetReg(Reg);
786 return true;
787 }
788 }
789
790 Register Reg = getRegForValue(Src);
791 if (!Reg)
792 return false;
793 Addr.setOffsetReg(Reg);
794 return true;
795 }
796 case Instruction::Mul: {
797 if (Addr.getOffsetReg())
798 break;
799
800 if (!isMulPowOf2(U))
801 break;
802
803 const Value *LHS = U->getOperand(0);
804 const Value *RHS = U->getOperand(1);
805
806 // Canonicalize power-of-2 value to the RHS.
807 if (const auto *C = dyn_cast<ConstantInt>(LHS))
808 if (C->getValue().isPowerOf2())
809 std::swap(LHS, RHS);
810
811 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
812 const auto *C = cast<ConstantInt>(RHS);
813 unsigned Val = C->getValue().logBase2();
814 if (Val < 1 || Val > 3)
815 break;
816
817 uint64_t NumBytes = 0;
818 if (Ty && Ty->isSized()) {
819 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
820 NumBytes = NumBits / 8;
821 if (!isPowerOf2_64(NumBits))
822 NumBytes = 0;
823 }
824
825 if (NumBytes != (1ULL << Val))
826 break;
827
828 Addr.setShift(Val);
829 Addr.setExtendType(AArch64_AM::LSL);
830
831 const Value *Src = LHS;
832 if (const auto *I = dyn_cast<Instruction>(Src)) {
833 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
834 // Fold the zext or sext when it won't become a noop.
835 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
836 if (!isIntExtFree(ZE) &&
837 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
838 Addr.setExtendType(AArch64_AM::UXTW);
839 Src = ZE->getOperand(0);
840 }
841 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
842 if (!isIntExtFree(SE) &&
843 SE->getOperand(0)->getType()->isIntegerTy(32)) {
844 Addr.setExtendType(AArch64_AM::SXTW);
845 Src = SE->getOperand(0);
846 }
847 }
848 }
849 }
850
851 Register Reg = getRegForValue(Src);
852 if (!Reg)
853 return false;
854 Addr.setOffsetReg(Reg);
855 return true;
856 }
857 case Instruction::And: {
858 if (Addr.getOffsetReg())
859 break;
860
861 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
862 break;
863
864 const Value *LHS = U->getOperand(0);
865 const Value *RHS = U->getOperand(1);
866
867 if (const auto *C = dyn_cast<ConstantInt>(LHS))
868 if (C->getValue() == 0xffffffff)
869 std::swap(LHS, RHS);
870
871 if (const auto *C = dyn_cast<ConstantInt>(RHS))
872 if (C->getValue() == 0xffffffff) {
873 Addr.setShift(0);
874 Addr.setExtendType(AArch64_AM::LSL);
875 Addr.setExtendType(AArch64_AM::UXTW);
876
877 Register Reg = getRegForValue(LHS);
878 if (!Reg)
879 return false;
880 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
881 Addr.setOffsetReg(Reg);
882 return true;
883 }
884 break;
885 }
886 case Instruction::SExt:
887 case Instruction::ZExt: {
888 if (!Addr.getReg() || Addr.getOffsetReg())
889 break;
890
891 const Value *Src = nullptr;
892 // Fold the zext or sext when it won't become a noop.
893 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
894 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
895 Addr.setExtendType(AArch64_AM::UXTW);
896 Src = ZE->getOperand(0);
897 }
898 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
899 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
900 Addr.setExtendType(AArch64_AM::SXTW);
901 Src = SE->getOperand(0);
902 }
903 }
904
905 if (!Src)
906 break;
907
908 Addr.setShift(0);
909 Register Reg = getRegForValue(Src);
910 if (!Reg)
911 return false;
912 Addr.setOffsetReg(Reg);
913 return true;
914 }
915 } // end switch
916
917 if (Addr.isRegBase() && !Addr.getReg()) {
918 Register Reg = getRegForValue(Obj);
919 if (!Reg)
920 return false;
921 Addr.setReg(Reg);
922 return true;
923 }
924
925 if (!Addr.getOffsetReg()) {
926 Register Reg = getRegForValue(Obj);
927 if (!Reg)
928 return false;
929 Addr.setOffsetReg(Reg);
930 return true;
931 }
932
933 return false;
934}
935
936bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
937 const User *U = nullptr;
938 unsigned Opcode = Instruction::UserOp1;
939 bool InMBB = true;
940
941 if (const auto *I = dyn_cast<Instruction>(V)) {
942 Opcode = I->getOpcode();
943 U = I;
944 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
945 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
946 Opcode = C->getOpcode();
947 U = C;
948 }
949
950 switch (Opcode) {
951 default: break;
952 case Instruction::BitCast:
953 // Look past bitcasts if its operand is in the same BB.
954 if (InMBB)
955 return computeCallAddress(U->getOperand(0), Addr);
956 break;
957 case Instruction::IntToPtr:
958 // Look past no-op inttoptrs if its operand is in the same BB.
959 if (InMBB &&
960 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
961 TLI.getPointerTy(DL))
962 return computeCallAddress(U->getOperand(0), Addr);
963 break;
964 case Instruction::PtrToInt:
965 // Look past no-op ptrtoints if its operand is in the same BB.
966 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
967 return computeCallAddress(U->getOperand(0), Addr);
968 break;
969 }
970
971 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
972 Addr.setGlobalValue(GV);
973 return true;
974 }
975
976 // If all else fails, try to materialize the value in a register.
977 if (!Addr.getGlobalValue()) {
978 Addr.setReg(getRegForValue(V));
979 return Addr.getReg() != 0;
980 }
981
982 return false;
983}
984
985bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
986 EVT evt = TLI.getValueType(DL, Ty, true);
987
988 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
989 return false;
990
991 // Only handle simple types.
992 if (evt == MVT::Other || !evt.isSimple())
993 return false;
994 VT = evt.getSimpleVT();
995
996 // This is a legal type, but it's not something we handle in fast-isel.
997 if (VT == MVT::f128)
998 return false;
999
1000 // Handle all other legal types, i.e. a register that will directly hold this
1001 // value.
1002 return TLI.isTypeLegal(VT);
1003}
1004
1005/// Determine if the value type is supported by FastISel.
1006///
1007/// FastISel for AArch64 can handle more value types than are legal. This adds
1008/// simple value type such as i1, i8, and i16.
1009bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1010 if (Ty->isVectorTy() && !IsVectorAllowed)
1011 return false;
1012
1013 if (isTypeLegal(Ty, VT))
1014 return true;
1015
1016 // If this is a type than can be sign or zero-extended to a basic operation
1017 // go ahead and accept it now.
1018 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1019 return true;
1020
1021 return false;
1022}
1023
1024bool AArch64FastISel::isValueAvailable(const Value *V) const {
1025 if (!isa<Instruction>(V))
1026 return true;
1027
1028 const auto *I = cast<Instruction>(V);
1029 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1030}
1031
1032bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1033 if (Subtarget->isTargetILP32())
1034 return false;
1035
1036 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1037 if (!ScaleFactor)
1038 return false;
1039
1040 bool ImmediateOffsetNeedsLowering = false;
1041 bool RegisterOffsetNeedsLowering = false;
1042 int64_t Offset = Addr.getOffset();
1043 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1044 ImmediateOffsetNeedsLowering = true;
1045 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1046 !isUInt<12>(Offset / ScaleFactor))
1047 ImmediateOffsetNeedsLowering = true;
1048
1049 // Cannot encode an offset register and an immediate offset in the same
1050 // instruction. Fold the immediate offset into the load/store instruction and
1051 // emit an additional add to take care of the offset register.
1052 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1053 RegisterOffsetNeedsLowering = true;
1054
1055 // Cannot encode zero register as base.
1056 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1057 RegisterOffsetNeedsLowering = true;
1058
1059 // If this is a stack pointer and the offset needs to be simplified then put
1060 // the alloca address into a register, set the base type back to register and
1061 // continue. This should almost never happen.
1062 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1063 {
1064 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1065 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1066 ResultReg)
1067 .addFrameIndex(Addr.getFI())
1068 .addImm(0)
1069 .addImm(0);
1070 Addr.setKind(Address::RegBase);
1071 Addr.setReg(ResultReg);
1072 }
1073
1074 if (RegisterOffsetNeedsLowering) {
1075 unsigned ResultReg = 0;
1076 if (Addr.getReg()) {
1077 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1078 Addr.getExtendType() == AArch64_AM::UXTW )
1079 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1080 Addr.getOffsetReg(), Addr.getExtendType(),
1081 Addr.getShift());
1082 else
1083 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1084 Addr.getOffsetReg(), AArch64_AM::LSL,
1085 Addr.getShift());
1086 } else {
1087 if (Addr.getExtendType() == AArch64_AM::UXTW)
1088 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1089 Addr.getShift(), /*IsZExt=*/true);
1090 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1091 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1092 Addr.getShift(), /*IsZExt=*/false);
1093 else
1094 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1095 Addr.getShift());
1096 }
1097 if (!ResultReg)
1098 return false;
1099
1100 Addr.setReg(ResultReg);
1101 Addr.setOffsetReg(0);
1102 Addr.setShift(0);
1103 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1104 }
1105
1106 // Since the offset is too large for the load/store instruction get the
1107 // reg+offset into a register.
1108 if (ImmediateOffsetNeedsLowering) {
1109 unsigned ResultReg;
1110 if (Addr.getReg())
1111 // Try to fold the immediate into the add instruction.
1112 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1113 else
1114 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1115
1116 if (!ResultReg)
1117 return false;
1118 Addr.setReg(ResultReg);
1119 Addr.setOffset(0);
1120 }
1121 return true;
1122}
1123
1124void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1125 const MachineInstrBuilder &MIB,
1127 unsigned ScaleFactor,
1128 MachineMemOperand *MMO) {
1129 int64_t Offset = Addr.getOffset() / ScaleFactor;
1130 // Frame base works a bit differently. Handle it separately.
1131 if (Addr.isFIBase()) {
1132 int FI = Addr.getFI();
1133 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1134 // and alignment should be based on the VT.
1135 MMO = FuncInfo.MF->getMachineMemOperand(
1136 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1137 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1138 // Now add the rest of the operands.
1139 MIB.addFrameIndex(FI).addImm(Offset);
1140 } else {
1141 assert(Addr.isRegBase() && "Unexpected address kind.");
1142 const MCInstrDesc &II = MIB->getDesc();
1143 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1144 Addr.setReg(
1145 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1146 Addr.setOffsetReg(
1147 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1148 if (Addr.getOffsetReg()) {
1149 assert(Addr.getOffset() == 0 && "Unexpected offset");
1150 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1151 Addr.getExtendType() == AArch64_AM::SXTX;
1152 MIB.addReg(Addr.getReg());
1153 MIB.addReg(Addr.getOffsetReg());
1154 MIB.addImm(IsSigned);
1155 MIB.addImm(Addr.getShift() != 0);
1156 } else
1157 MIB.addReg(Addr.getReg()).addImm(Offset);
1158 }
1159
1160 if (MMO)
1161 MIB.addMemOperand(MMO);
1162}
1163
1164unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1165 const Value *RHS, bool SetFlags,
1166 bool WantResult, bool IsZExt) {
1168 bool NeedExtend = false;
1169 switch (RetVT.SimpleTy) {
1170 default:
1171 return 0;
1172 case MVT::i1:
1173 NeedExtend = true;
1174 break;
1175 case MVT::i8:
1176 NeedExtend = true;
1177 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1178 break;
1179 case MVT::i16:
1180 NeedExtend = true;
1181 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1182 break;
1183 case MVT::i32: // fall-through
1184 case MVT::i64:
1185 break;
1186 }
1187 MVT SrcVT = RetVT;
1188 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1189
1190 // Canonicalize immediates to the RHS first.
1191 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1192 std::swap(LHS, RHS);
1193
1194 // Canonicalize mul by power of 2 to the RHS.
1195 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1196 if (isMulPowOf2(LHS))
1197 std::swap(LHS, RHS);
1198
1199 // Canonicalize shift immediate to the RHS.
1200 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1201 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1202 if (isa<ConstantInt>(SI->getOperand(1)))
1203 if (SI->getOpcode() == Instruction::Shl ||
1204 SI->getOpcode() == Instruction::LShr ||
1205 SI->getOpcode() == Instruction::AShr )
1206 std::swap(LHS, RHS);
1207
1208 Register LHSReg = getRegForValue(LHS);
1209 if (!LHSReg)
1210 return 0;
1211
1212 if (NeedExtend)
1213 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1214
1215 unsigned ResultReg = 0;
1216 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1217 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1218 if (C->isNegative())
1219 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1220 WantResult);
1221 else
1222 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1223 WantResult);
1224 } else if (const auto *C = dyn_cast<Constant>(RHS))
1225 if (C->isNullValue())
1226 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1227
1228 if (ResultReg)
1229 return ResultReg;
1230
1231 // Only extend the RHS within the instruction if there is a valid extend type.
1232 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1233 isValueAvailable(RHS)) {
1234 Register RHSReg = getRegForValue(RHS);
1235 if (!RHSReg)
1236 return 0;
1237 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1238 SetFlags, WantResult);
1239 }
1240
1241 // Check if the mul can be folded into the instruction.
1242 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1243 if (isMulPowOf2(RHS)) {
1244 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1245 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1246
1247 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1248 if (C->getValue().isPowerOf2())
1249 std::swap(MulLHS, MulRHS);
1250
1251 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1252 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1253 Register RHSReg = getRegForValue(MulLHS);
1254 if (!RHSReg)
1255 return 0;
1256 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1257 ShiftVal, SetFlags, WantResult);
1258 if (ResultReg)
1259 return ResultReg;
1260 }
1261 }
1262
1263 // Check if the shift can be folded into the instruction.
1264 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1265 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1266 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1268 switch (SI->getOpcode()) {
1269 default: break;
1270 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1271 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1272 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1273 }
1274 uint64_t ShiftVal = C->getZExtValue();
1275 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1276 Register RHSReg = getRegForValue(SI->getOperand(0));
1277 if (!RHSReg)
1278 return 0;
1279 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1280 ShiftVal, SetFlags, WantResult);
1281 if (ResultReg)
1282 return ResultReg;
1283 }
1284 }
1285 }
1286 }
1287
1288 Register RHSReg = getRegForValue(RHS);
1289 if (!RHSReg)
1290 return 0;
1291
1292 if (NeedExtend)
1293 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1294
1295 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1296}
1297
1298unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1299 unsigned RHSReg, bool SetFlags,
1300 bool WantResult) {
1301 assert(LHSReg && RHSReg && "Invalid register number.");
1302
1303 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1304 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1305 return 0;
1306
1307 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1308 return 0;
1309
1310 static const unsigned OpcTable[2][2][2] = {
1311 { { AArch64::SUBWrr, AArch64::SUBXrr },
1312 { AArch64::ADDWrr, AArch64::ADDXrr } },
1313 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1314 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1315 };
1316 bool Is64Bit = RetVT == MVT::i64;
1317 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1318 const TargetRegisterClass *RC =
1319 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1320 unsigned ResultReg;
1321 if (WantResult)
1322 ResultReg = createResultReg(RC);
1323 else
1324 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1325
1326 const MCInstrDesc &II = TII.get(Opc);
1327 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1328 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1329 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1330 .addReg(LHSReg)
1331 .addReg(RHSReg);
1332 return ResultReg;
1333}
1334
1335unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1336 uint64_t Imm, bool SetFlags,
1337 bool WantResult) {
1338 assert(LHSReg && "Invalid register number.");
1339
1340 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1341 return 0;
1342
1343 unsigned ShiftImm;
1344 if (isUInt<12>(Imm))
1345 ShiftImm = 0;
1346 else if ((Imm & 0xfff000) == Imm) {
1347 ShiftImm = 12;
1348 Imm >>= 12;
1349 } else
1350 return 0;
1351
1352 static const unsigned OpcTable[2][2][2] = {
1353 { { AArch64::SUBWri, AArch64::SUBXri },
1354 { AArch64::ADDWri, AArch64::ADDXri } },
1355 { { AArch64::SUBSWri, AArch64::SUBSXri },
1356 { AArch64::ADDSWri, AArch64::ADDSXri } }
1357 };
1358 bool Is64Bit = RetVT == MVT::i64;
1359 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1360 const TargetRegisterClass *RC;
1361 if (SetFlags)
1362 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1363 else
1364 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1365 unsigned ResultReg;
1366 if (WantResult)
1367 ResultReg = createResultReg(RC);
1368 else
1369 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1370
1371 const MCInstrDesc &II = TII.get(Opc);
1372 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1373 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1374 .addReg(LHSReg)
1375 .addImm(Imm)
1376 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1377 return ResultReg;
1378}
1379
1380unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1381 unsigned RHSReg,
1383 uint64_t ShiftImm, bool SetFlags,
1384 bool WantResult) {
1385 assert(LHSReg && RHSReg && "Invalid register number.");
1386 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1387 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1388
1389 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1390 return 0;
1391
1392 // Don't deal with undefined shifts.
1393 if (ShiftImm >= RetVT.getSizeInBits())
1394 return 0;
1395
1396 static const unsigned OpcTable[2][2][2] = {
1397 { { AArch64::SUBWrs, AArch64::SUBXrs },
1398 { AArch64::ADDWrs, AArch64::ADDXrs } },
1399 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1400 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1401 };
1402 bool Is64Bit = RetVT == MVT::i64;
1403 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1404 const TargetRegisterClass *RC =
1405 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1406 unsigned ResultReg;
1407 if (WantResult)
1408 ResultReg = createResultReg(RC);
1409 else
1410 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1411
1412 const MCInstrDesc &II = TII.get(Opc);
1413 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1414 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1415 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1416 .addReg(LHSReg)
1417 .addReg(RHSReg)
1418 .addImm(getShifterImm(ShiftType, ShiftImm));
1419 return ResultReg;
1420}
1421
1422unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1423 unsigned RHSReg,
1425 uint64_t ShiftImm, bool SetFlags,
1426 bool WantResult) {
1427 assert(LHSReg && RHSReg && "Invalid register number.");
1428 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1429 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1430
1431 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1432 return 0;
1433
1434 if (ShiftImm >= 4)
1435 return 0;
1436
1437 static const unsigned OpcTable[2][2][2] = {
1438 { { AArch64::SUBWrx, AArch64::SUBXrx },
1439 { AArch64::ADDWrx, AArch64::ADDXrx } },
1440 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1441 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1442 };
1443 bool Is64Bit = RetVT == MVT::i64;
1444 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1445 const TargetRegisterClass *RC = nullptr;
1446 if (SetFlags)
1447 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1448 else
1449 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1450 unsigned ResultReg;
1451 if (WantResult)
1452 ResultReg = createResultReg(RC);
1453 else
1454 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1455
1456 const MCInstrDesc &II = TII.get(Opc);
1457 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1458 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1460 .addReg(LHSReg)
1461 .addReg(RHSReg)
1462 .addImm(getArithExtendImm(ExtType, ShiftImm));
1463 return ResultReg;
1464}
1465
1466bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1467 Type *Ty = LHS->getType();
1468 EVT EVT = TLI.getValueType(DL, Ty, true);
1469 if (!EVT.isSimple())
1470 return false;
1471 MVT VT = EVT.getSimpleVT();
1472
1473 switch (VT.SimpleTy) {
1474 default:
1475 return false;
1476 case MVT::i1:
1477 case MVT::i8:
1478 case MVT::i16:
1479 case MVT::i32:
1480 case MVT::i64:
1481 return emitICmp(VT, LHS, RHS, IsZExt);
1482 case MVT::f32:
1483 case MVT::f64:
1484 return emitFCmp(VT, LHS, RHS);
1485 }
1486}
1487
1488bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1489 bool IsZExt) {
1490 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1491 IsZExt) != 0;
1492}
1493
1494bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1495 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1496 /*SetFlags=*/true, /*WantResult=*/false) != 0;
1497}
1498
1499bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1500 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1501 return false;
1502
1503 // Check to see if the 2nd operand is a constant that we can encode directly
1504 // in the compare.
1505 bool UseImm = false;
1506 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1507 if (CFP->isZero() && !CFP->isNegative())
1508 UseImm = true;
1509
1510 Register LHSReg = getRegForValue(LHS);
1511 if (!LHSReg)
1512 return false;
1513
1514 if (UseImm) {
1515 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1516 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1517 .addReg(LHSReg);
1518 return true;
1519 }
1520
1521 Register RHSReg = getRegForValue(RHS);
1522 if (!RHSReg)
1523 return false;
1524
1525 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1526 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1527 .addReg(LHSReg)
1528 .addReg(RHSReg);
1529 return true;
1530}
1531
1532unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1533 bool SetFlags, bool WantResult, bool IsZExt) {
1534 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1535 IsZExt);
1536}
1537
1538/// This method is a wrapper to simplify add emission.
1539///
1540/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1541/// that fails, then try to materialize the immediate into a register and use
1542/// emitAddSub_rr instead.
1543unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1544 unsigned ResultReg;
1545 if (Imm < 0)
1546 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1547 else
1548 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1549
1550 if (ResultReg)
1551 return ResultReg;
1552
1553 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1554 if (!CReg)
1555 return 0;
1556
1557 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1558 return ResultReg;
1559}
1560
1561unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1562 bool SetFlags, bool WantResult, bool IsZExt) {
1563 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1564 IsZExt);
1565}
1566
1567unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1568 unsigned RHSReg, bool WantResult) {
1569 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1570 /*SetFlags=*/true, WantResult);
1571}
1572
1573unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1574 unsigned RHSReg,
1576 uint64_t ShiftImm, bool WantResult) {
1577 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1578 ShiftImm, /*SetFlags=*/true, WantResult);
1579}
1580
1581unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1582 const Value *LHS, const Value *RHS) {
1583 // Canonicalize immediates to the RHS first.
1584 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1585 std::swap(LHS, RHS);
1586
1587 // Canonicalize mul by power-of-2 to the RHS.
1588 if (LHS->hasOneUse() && isValueAvailable(LHS))
1589 if (isMulPowOf2(LHS))
1590 std::swap(LHS, RHS);
1591
1592 // Canonicalize shift immediate to the RHS.
1593 if (LHS->hasOneUse() && isValueAvailable(LHS))
1594 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1595 if (isa<ConstantInt>(SI->getOperand(1)))
1596 std::swap(LHS, RHS);
1597
1598 Register LHSReg = getRegForValue(LHS);
1599 if (!LHSReg)
1600 return 0;
1601
1602 unsigned ResultReg = 0;
1603 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1604 uint64_t Imm = C->getZExtValue();
1605 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1606 }
1607 if (ResultReg)
1608 return ResultReg;
1609
1610 // Check if the mul can be folded into the instruction.
1611 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1612 if (isMulPowOf2(RHS)) {
1613 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1614 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1615
1616 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1617 if (C->getValue().isPowerOf2())
1618 std::swap(MulLHS, MulRHS);
1619
1620 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1621 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1622
1623 Register RHSReg = getRegForValue(MulLHS);
1624 if (!RHSReg)
1625 return 0;
1626 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1627 if (ResultReg)
1628 return ResultReg;
1629 }
1630 }
1631
1632 // Check if the shift can be folded into the instruction.
1633 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1634 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1635 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1636 uint64_t ShiftVal = C->getZExtValue();
1637 Register RHSReg = getRegForValue(SI->getOperand(0));
1638 if (!RHSReg)
1639 return 0;
1640 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1641 if (ResultReg)
1642 return ResultReg;
1643 }
1644 }
1645
1646 Register RHSReg = getRegForValue(RHS);
1647 if (!RHSReg)
1648 return 0;
1649
1650 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1651 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1652 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1653 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1654 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1655 }
1656 return ResultReg;
1657}
1658
1659unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1660 unsigned LHSReg, uint64_t Imm) {
1661 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1662 "ISD nodes are not consecutive!");
1663 static const unsigned OpcTable[3][2] = {
1664 { AArch64::ANDWri, AArch64::ANDXri },
1665 { AArch64::ORRWri, AArch64::ORRXri },
1666 { AArch64::EORWri, AArch64::EORXri }
1667 };
1668 const TargetRegisterClass *RC;
1669 unsigned Opc;
1670 unsigned RegSize;
1671 switch (RetVT.SimpleTy) {
1672 default:
1673 return 0;
1674 case MVT::i1:
1675 case MVT::i8:
1676 case MVT::i16:
1677 case MVT::i32: {
1678 unsigned Idx = ISDOpc - ISD::AND;
1679 Opc = OpcTable[Idx][0];
1680 RC = &AArch64::GPR32spRegClass;
1681 RegSize = 32;
1682 break;
1683 }
1684 case MVT::i64:
1685 Opc = OpcTable[ISDOpc - ISD::AND][1];
1686 RC = &AArch64::GPR64spRegClass;
1687 RegSize = 64;
1688 break;
1689 }
1690
1692 return 0;
1693
1694 Register ResultReg =
1695 fastEmitInst_ri(Opc, RC, LHSReg,
1697 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1698 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1699 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1700 }
1701 return ResultReg;
1702}
1703
1704unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1705 unsigned LHSReg, unsigned RHSReg,
1706 uint64_t ShiftImm) {
1707 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1708 "ISD nodes are not consecutive!");
1709 static const unsigned OpcTable[3][2] = {
1710 { AArch64::ANDWrs, AArch64::ANDXrs },
1711 { AArch64::ORRWrs, AArch64::ORRXrs },
1712 { AArch64::EORWrs, AArch64::EORXrs }
1713 };
1714
1715 // Don't deal with undefined shifts.
1716 if (ShiftImm >= RetVT.getSizeInBits())
1717 return 0;
1718
1719 const TargetRegisterClass *RC;
1720 unsigned Opc;
1721 switch (RetVT.SimpleTy) {
1722 default:
1723 return 0;
1724 case MVT::i1:
1725 case MVT::i8:
1726 case MVT::i16:
1727 case MVT::i32:
1728 Opc = OpcTable[ISDOpc - ISD::AND][0];
1729 RC = &AArch64::GPR32RegClass;
1730 break;
1731 case MVT::i64:
1732 Opc = OpcTable[ISDOpc - ISD::AND][1];
1733 RC = &AArch64::GPR64RegClass;
1734 break;
1735 }
1736 Register ResultReg =
1737 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1739 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1740 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1741 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1742 }
1743 return ResultReg;
1744}
1745
1746unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1747 uint64_t Imm) {
1748 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1749}
1750
1751unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1752 bool WantZExt, MachineMemOperand *MMO) {
1753 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1754 return 0;
1755
1756 // Simplify this down to something we can handle.
1757 if (!simplifyAddress(Addr, VT))
1758 return 0;
1759
1760 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1761 if (!ScaleFactor)
1762 llvm_unreachable("Unexpected value type.");
1763
1764 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1765 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1766 bool UseScaled = true;
1767 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1768 UseScaled = false;
1769 ScaleFactor = 1;
1770 }
1771
1772 static const unsigned GPOpcTable[2][8][4] = {
1773 // Sign-extend.
1774 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1775 AArch64::LDURXi },
1776 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1777 AArch64::LDURXi },
1778 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1779 AArch64::LDRXui },
1780 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1781 AArch64::LDRXui },
1782 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1783 AArch64::LDRXroX },
1784 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1785 AArch64::LDRXroX },
1786 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1787 AArch64::LDRXroW },
1788 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1789 AArch64::LDRXroW }
1790 },
1791 // Zero-extend.
1792 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1793 AArch64::LDURXi },
1794 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1795 AArch64::LDURXi },
1796 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1797 AArch64::LDRXui },
1798 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1799 AArch64::LDRXui },
1800 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1801 AArch64::LDRXroX },
1802 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1803 AArch64::LDRXroX },
1804 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1805 AArch64::LDRXroW },
1806 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1807 AArch64::LDRXroW }
1808 }
1809 };
1810
1811 static const unsigned FPOpcTable[4][2] = {
1812 { AArch64::LDURSi, AArch64::LDURDi },
1813 { AArch64::LDRSui, AArch64::LDRDui },
1814 { AArch64::LDRSroX, AArch64::LDRDroX },
1815 { AArch64::LDRSroW, AArch64::LDRDroW }
1816 };
1817
1818 unsigned Opc;
1819 const TargetRegisterClass *RC;
1820 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1821 Addr.getOffsetReg();
1822 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1823 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1824 Addr.getExtendType() == AArch64_AM::SXTW)
1825 Idx++;
1826
1827 bool IsRet64Bit = RetVT == MVT::i64;
1828 switch (VT.SimpleTy) {
1829 default:
1830 llvm_unreachable("Unexpected value type.");
1831 case MVT::i1: // Intentional fall-through.
1832 case MVT::i8:
1833 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1834 RC = (IsRet64Bit && !WantZExt) ?
1835 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1836 break;
1837 case MVT::i16:
1838 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1839 RC = (IsRet64Bit && !WantZExt) ?
1840 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1841 break;
1842 case MVT::i32:
1843 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1844 RC = (IsRet64Bit && !WantZExt) ?
1845 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1846 break;
1847 case MVT::i64:
1848 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1849 RC = &AArch64::GPR64RegClass;
1850 break;
1851 case MVT::f32:
1852 Opc = FPOpcTable[Idx][0];
1853 RC = &AArch64::FPR32RegClass;
1854 break;
1855 case MVT::f64:
1856 Opc = FPOpcTable[Idx][1];
1857 RC = &AArch64::FPR64RegClass;
1858 break;
1859 }
1860
1861 // Create the base instruction, then add the operands.
1862 Register ResultReg = createResultReg(RC);
1863 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1864 TII.get(Opc), ResultReg);
1865 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1866
1867 // Loading an i1 requires special handling.
1868 if (VT == MVT::i1) {
1869 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1870 assert(ANDReg && "Unexpected AND instruction emission failure.");
1871 ResultReg = ANDReg;
1872 }
1873
1874 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1875 // the 32bit reg to a 64bit reg.
1876 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1877 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1878 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1879 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1880 .addImm(0)
1881 .addReg(ResultReg, getKillRegState(true))
1882 .addImm(AArch64::sub_32);
1883 ResultReg = Reg64;
1884 }
1885 return ResultReg;
1886}
1887
1888bool AArch64FastISel::selectAddSub(const Instruction *I) {
1889 MVT VT;
1890 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1891 return false;
1892
1893 if (VT.isVector())
1894 return selectOperator(I, I->getOpcode());
1895
1896 unsigned ResultReg;
1897 switch (I->getOpcode()) {
1898 default:
1899 llvm_unreachable("Unexpected instruction.");
1900 case Instruction::Add:
1901 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1902 break;
1903 case Instruction::Sub:
1904 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1905 break;
1906 }
1907 if (!ResultReg)
1908 return false;
1909
1910 updateValueMap(I, ResultReg);
1911 return true;
1912}
1913
1914bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1915 MVT VT;
1916 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1917 return false;
1918
1919 if (VT.isVector())
1920 return selectOperator(I, I->getOpcode());
1921
1922 unsigned ResultReg;
1923 switch (I->getOpcode()) {
1924 default:
1925 llvm_unreachable("Unexpected instruction.");
1926 case Instruction::And:
1927 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1928 break;
1929 case Instruction::Or:
1930 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1931 break;
1932 case Instruction::Xor:
1933 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1934 break;
1935 }
1936 if (!ResultReg)
1937 return false;
1938
1939 updateValueMap(I, ResultReg);
1940 return true;
1941}
1942
1943bool AArch64FastISel::selectLoad(const Instruction *I) {
1944 MVT VT;
1945 // Verify we have a legal type before going any further. Currently, we handle
1946 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1947 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1948 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1949 cast<LoadInst>(I)->isAtomic())
1950 return false;
1951
1952 const Value *SV = I->getOperand(0);
1953 if (TLI.supportSwiftError()) {
1954 // Swifterror values can come from either a function parameter with
1955 // swifterror attribute or an alloca with swifterror attribute.
1956 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1957 if (Arg->hasSwiftErrorAttr())
1958 return false;
1959 }
1960
1961 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1962 if (Alloca->isSwiftError())
1963 return false;
1964 }
1965 }
1966
1967 // See if we can handle this address.
1968 Address Addr;
1969 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1970 return false;
1971
1972 // Fold the following sign-/zero-extend into the load instruction.
1973 bool WantZExt = true;
1974 MVT RetVT = VT;
1975 const Value *IntExtVal = nullptr;
1976 if (I->hasOneUse()) {
1977 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1978 if (isTypeSupported(ZE->getType(), RetVT))
1979 IntExtVal = ZE;
1980 else
1981 RetVT = VT;
1982 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1983 if (isTypeSupported(SE->getType(), RetVT))
1984 IntExtVal = SE;
1985 else
1986 RetVT = VT;
1987 WantZExt = false;
1988 }
1989 }
1990
1991 unsigned ResultReg =
1992 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1993 if (!ResultReg)
1994 return false;
1995
1996 // There are a few different cases we have to handle, because the load or the
1997 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1998 // SelectionDAG. There is also an ordering issue when both instructions are in
1999 // different basic blocks.
2000 // 1.) The load instruction is selected by FastISel, but the integer extend
2001 // not. This usually happens when the integer extend is in a different
2002 // basic block and SelectionDAG took over for that basic block.
2003 // 2.) The load instruction is selected before the integer extend. This only
2004 // happens when the integer extend is in a different basic block.
2005 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2006 // by FastISel. This happens if there are instructions between the load
2007 // and the integer extend that couldn't be selected by FastISel.
2008 if (IntExtVal) {
2009 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2010 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2011 // it when it selects the integer extend.
2012 Register Reg = lookUpRegForValue(IntExtVal);
2013 auto *MI = MRI.getUniqueVRegDef(Reg);
2014 if (!MI) {
2015 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2016 if (WantZExt) {
2017 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2018 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2019 ResultReg = std::prev(I)->getOperand(0).getReg();
2020 removeDeadCode(I, std::next(I));
2021 } else
2022 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2023 AArch64::sub_32);
2024 }
2025 updateValueMap(I, ResultReg);
2026 return true;
2027 }
2028
2029 // The integer extend has already been emitted - delete all the instructions
2030 // that have been emitted by the integer extend lowering code and use the
2031 // result from the load instruction directly.
2032 while (MI) {
2033 Reg = 0;
2034 for (auto &Opnd : MI->uses()) {
2035 if (Opnd.isReg()) {
2036 Reg = Opnd.getReg();
2037 break;
2038 }
2039 }
2041 removeDeadCode(I, std::next(I));
2042 MI = nullptr;
2043 if (Reg)
2044 MI = MRI.getUniqueVRegDef(Reg);
2045 }
2046 updateValueMap(IntExtVal, ResultReg);
2047 return true;
2048 }
2049
2050 updateValueMap(I, ResultReg);
2051 return true;
2052}
2053
2054bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2055 unsigned AddrReg,
2056 MachineMemOperand *MMO) {
2057 unsigned Opc;
2058 switch (VT.SimpleTy) {
2059 default: return false;
2060 case MVT::i8: Opc = AArch64::STLRB; break;
2061 case MVT::i16: Opc = AArch64::STLRH; break;
2062 case MVT::i32: Opc = AArch64::STLRW; break;
2063 case MVT::i64: Opc = AArch64::STLRX; break;
2064 }
2065
2066 const MCInstrDesc &II = TII.get(Opc);
2067 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2068 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2069 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2070 .addReg(SrcReg)
2071 .addReg(AddrReg)
2072 .addMemOperand(MMO);
2073 return true;
2074}
2075
2076bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2077 MachineMemOperand *MMO) {
2078 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2079 return false;
2080
2081 // Simplify this down to something we can handle.
2082 if (!simplifyAddress(Addr, VT))
2083 return false;
2084
2085 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2086 if (!ScaleFactor)
2087 llvm_unreachable("Unexpected value type.");
2088
2089 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2090 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2091 bool UseScaled = true;
2092 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2093 UseScaled = false;
2094 ScaleFactor = 1;
2095 }
2096
2097 static const unsigned OpcTable[4][6] = {
2098 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2099 AArch64::STURSi, AArch64::STURDi },
2100 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2101 AArch64::STRSui, AArch64::STRDui },
2102 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2103 AArch64::STRSroX, AArch64::STRDroX },
2104 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2105 AArch64::STRSroW, AArch64::STRDroW }
2106 };
2107
2108 unsigned Opc;
2109 bool VTIsi1 = false;
2110 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2111 Addr.getOffsetReg();
2112 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2113 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2114 Addr.getExtendType() == AArch64_AM::SXTW)
2115 Idx++;
2116
2117 switch (VT.SimpleTy) {
2118 default: llvm_unreachable("Unexpected value type.");
2119 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2120 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2121 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2122 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2123 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2124 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2125 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2126 }
2127
2128 // Storing an i1 requires special handling.
2129 if (VTIsi1 && SrcReg != AArch64::WZR) {
2130 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2131 assert(ANDReg && "Unexpected AND instruction emission failure.");
2132 SrcReg = ANDReg;
2133 }
2134 // Create the base instruction, then add the operands.
2135 const MCInstrDesc &II = TII.get(Opc);
2136 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2138 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2139 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2140
2141 return true;
2142}
2143
2144bool AArch64FastISel::selectStore(const Instruction *I) {
2145 MVT VT;
2146 const Value *Op0 = I->getOperand(0);
2147 // Verify we have a legal type before going any further. Currently, we handle
2148 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2149 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2150 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2151 return false;
2152
2153 const Value *PtrV = I->getOperand(1);
2154 if (TLI.supportSwiftError()) {
2155 // Swifterror values can come from either a function parameter with
2156 // swifterror attribute or an alloca with swifterror attribute.
2157 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2158 if (Arg->hasSwiftErrorAttr())
2159 return false;
2160 }
2161
2162 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2163 if (Alloca->isSwiftError())
2164 return false;
2165 }
2166 }
2167
2168 // Get the value to be stored into a register. Use the zero register directly
2169 // when possible to avoid an unnecessary copy and a wasted register.
2170 unsigned SrcReg = 0;
2171 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2172 if (CI->isZero())
2173 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2174 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2175 if (CF->isZero() && !CF->isNegative()) {
2177 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2178 }
2179 }
2180
2181 if (!SrcReg)
2182 SrcReg = getRegForValue(Op0);
2183
2184 if (!SrcReg)
2185 return false;
2186
2187 auto *SI = cast<StoreInst>(I);
2188
2189 // Try to emit a STLR for seq_cst/release.
2190 if (SI->isAtomic()) {
2191 AtomicOrdering Ord = SI->getOrdering();
2192 // The non-atomic instructions are sufficient for relaxed stores.
2193 if (isReleaseOrStronger(Ord)) {
2194 // The STLR addressing mode only supports a base reg; pass that directly.
2195 Register AddrReg = getRegForValue(PtrV);
2196 return emitStoreRelease(VT, SrcReg, AddrReg,
2197 createMachineMemOperandFor(I));
2198 }
2199 }
2200
2201 // See if we can handle this address.
2202 Address Addr;
2203 if (!computeAddress(PtrV, Addr, Op0->getType()))
2204 return false;
2205
2206 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2207 return false;
2208 return true;
2209}
2210
2212 switch (Pred) {
2213 case CmpInst::FCMP_ONE:
2214 case CmpInst::FCMP_UEQ:
2215 default:
2216 // AL is our "false" for now. The other two need more compares.
2217 return AArch64CC::AL;
2218 case CmpInst::ICMP_EQ:
2219 case CmpInst::FCMP_OEQ:
2220 return AArch64CC::EQ;
2221 case CmpInst::ICMP_SGT:
2222 case CmpInst::FCMP_OGT:
2223 return AArch64CC::GT;
2224 case CmpInst::ICMP_SGE:
2225 case CmpInst::FCMP_OGE:
2226 return AArch64CC::GE;
2227 case CmpInst::ICMP_UGT:
2228 case CmpInst::FCMP_UGT:
2229 return AArch64CC::HI;
2230 case CmpInst::FCMP_OLT:
2231 return AArch64CC::MI;
2232 case CmpInst::ICMP_ULE:
2233 case CmpInst::FCMP_OLE:
2234 return AArch64CC::LS;
2235 case CmpInst::FCMP_ORD:
2236 return AArch64CC::VC;
2237 case CmpInst::FCMP_UNO:
2238 return AArch64CC::VS;
2239 case CmpInst::FCMP_UGE:
2240 return AArch64CC::PL;
2241 case CmpInst::ICMP_SLT:
2242 case CmpInst::FCMP_ULT:
2243 return AArch64CC::LT;
2244 case CmpInst::ICMP_SLE:
2245 case CmpInst::FCMP_ULE:
2246 return AArch64CC::LE;
2247 case CmpInst::FCMP_UNE:
2248 case CmpInst::ICMP_NE:
2249 return AArch64CC::NE;
2250 case CmpInst::ICMP_UGE:
2251 return AArch64CC::HS;
2252 case CmpInst::ICMP_ULT:
2253 return AArch64CC::LO;
2254 }
2255}
2256
2257/// Try to emit a combined compare-and-branch instruction.
2258bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2259 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2260 // will not be produced, as they are conditional branch instructions that do
2261 // not set flags.
2262 if (FuncInfo.MF->getFunction().hasFnAttribute(
2263 Attribute::SpeculativeLoadHardening))
2264 return false;
2265
2266 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2267 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2268 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2269
2270 const Value *LHS = CI->getOperand(0);
2271 const Value *RHS = CI->getOperand(1);
2272
2273 MVT VT;
2274 if (!isTypeSupported(LHS->getType(), VT))
2275 return false;
2276
2277 unsigned BW = VT.getSizeInBits();
2278 if (BW > 64)
2279 return false;
2280
2281 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2282 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2283
2284 // Try to take advantage of fallthrough opportunities.
2285 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2286 std::swap(TBB, FBB);
2288 }
2289
2290 int TestBit = -1;
2291 bool IsCmpNE;
2292 switch (Predicate) {
2293 default:
2294 return false;
2295 case CmpInst::ICMP_EQ:
2296 case CmpInst::ICMP_NE:
2297 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2298 std::swap(LHS, RHS);
2299
2300 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2301 return false;
2302
2303 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2304 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2305 const Value *AndLHS = AI->getOperand(0);
2306 const Value *AndRHS = AI->getOperand(1);
2307
2308 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2309 if (C->getValue().isPowerOf2())
2310 std::swap(AndLHS, AndRHS);
2311
2312 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2313 if (C->getValue().isPowerOf2()) {
2314 TestBit = C->getValue().logBase2();
2315 LHS = AndLHS;
2316 }
2317 }
2318
2319 if (VT == MVT::i1)
2320 TestBit = 0;
2321
2322 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2323 break;
2324 case CmpInst::ICMP_SLT:
2325 case CmpInst::ICMP_SGE:
2326 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2327 return false;
2328
2329 TestBit = BW - 1;
2330 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2331 break;
2332 case CmpInst::ICMP_SGT:
2333 case CmpInst::ICMP_SLE:
2334 if (!isa<ConstantInt>(RHS))
2335 return false;
2336
2337 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2338 return false;
2339
2340 TestBit = BW - 1;
2341 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2342 break;
2343 } // end switch
2344
2345 static const unsigned OpcTable[2][2][2] = {
2346 { {AArch64::CBZW, AArch64::CBZX },
2347 {AArch64::CBNZW, AArch64::CBNZX} },
2348 { {AArch64::TBZW, AArch64::TBZX },
2349 {AArch64::TBNZW, AArch64::TBNZX} }
2350 };
2351
2352 bool IsBitTest = TestBit != -1;
2353 bool Is64Bit = BW == 64;
2354 if (TestBit < 32 && TestBit >= 0)
2355 Is64Bit = false;
2356
2357 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2358 const MCInstrDesc &II = TII.get(Opc);
2359
2360 Register SrcReg = getRegForValue(LHS);
2361 if (!SrcReg)
2362 return false;
2363
2364 if (BW == 64 && !Is64Bit)
2365 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2366
2367 if ((BW < 32) && !IsBitTest)
2368 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2369
2370 // Emit the combined compare and branch instruction.
2371 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2373 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2374 .addReg(SrcReg);
2375 if (IsBitTest)
2376 MIB.addImm(TestBit);
2377 MIB.addMBB(TBB);
2378
2379 finishCondBranch(BI->getParent(), TBB, FBB);
2380 return true;
2381}
2382
2383bool AArch64FastISel::selectBranch(const Instruction *I) {
2384 const BranchInst *BI = cast<BranchInst>(I);
2385 if (BI->isUnconditional()) {
2386 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2387 fastEmitBranch(MSucc, BI->getDebugLoc());
2388 return true;
2389 }
2390
2391 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2392 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2393
2394 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2395 if (CI->hasOneUse() && isValueAvailable(CI)) {
2396 // Try to optimize or fold the cmp.
2397 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2398 switch (Predicate) {
2399 default:
2400 break;
2402 fastEmitBranch(FBB, MIMD.getDL());
2403 return true;
2404 case CmpInst::FCMP_TRUE:
2405 fastEmitBranch(TBB, MIMD.getDL());
2406 return true;
2407 }
2408
2409 // Try to emit a combined compare-and-branch first.
2410 if (emitCompareAndBranch(BI))
2411 return true;
2412
2413 // Try to take advantage of fallthrough opportunities.
2414 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2415 std::swap(TBB, FBB);
2417 }
2418
2419 // Emit the cmp.
2420 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2421 return false;
2422
2423 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2424 // instruction.
2425 AArch64CC::CondCode CC = getCompareCC(Predicate);
2427 switch (Predicate) {
2428 default:
2429 break;
2430 case CmpInst::FCMP_UEQ:
2431 ExtraCC = AArch64CC::EQ;
2432 CC = AArch64CC::VS;
2433 break;
2434 case CmpInst::FCMP_ONE:
2435 ExtraCC = AArch64CC::MI;
2436 CC = AArch64CC::GT;
2437 break;
2438 }
2439 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2440
2441 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2442 if (ExtraCC != AArch64CC::AL) {
2443 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2444 .addImm(ExtraCC)
2445 .addMBB(TBB);
2446 }
2447
2448 // Emit the branch.
2449 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2450 .addImm(CC)
2451 .addMBB(TBB);
2452
2453 finishCondBranch(BI->getParent(), TBB, FBB);
2454 return true;
2455 }
2456 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2457 uint64_t Imm = CI->getZExtValue();
2458 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2460 .addMBB(Target);
2461
2462 // Obtain the branch probability and add the target to the successor list.
2463 if (FuncInfo.BPI) {
2464 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2465 BI->getParent(), Target->getBasicBlock());
2466 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2467 } else
2468 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2469 return true;
2470 } else {
2472 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2473 // Fake request the condition, otherwise the intrinsic might be completely
2474 // optimized away.
2475 Register CondReg = getRegForValue(BI->getCondition());
2476 if (!CondReg)
2477 return false;
2478
2479 // Emit the branch.
2480 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2481 .addImm(CC)
2482 .addMBB(TBB);
2483
2484 finishCondBranch(BI->getParent(), TBB, FBB);
2485 return true;
2486 }
2487 }
2488
2489 Register CondReg = getRegForValue(BI->getCondition());
2490 if (CondReg == 0)
2491 return false;
2492
2493 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2494 unsigned Opcode = AArch64::TBNZW;
2495 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2496 std::swap(TBB, FBB);
2497 Opcode = AArch64::TBZW;
2498 }
2499
2500 const MCInstrDesc &II = TII.get(Opcode);
2501 Register ConstrainedCondReg
2502 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2503 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2504 .addReg(ConstrainedCondReg)
2505 .addImm(0)
2506 .addMBB(TBB);
2507
2508 finishCondBranch(BI->getParent(), TBB, FBB);
2509 return true;
2510}
2511
2512bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2513 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2514 Register AddrReg = getRegForValue(BI->getOperand(0));
2515 if (AddrReg == 0)
2516 return false;
2517
2518 // Emit the indirect branch.
2519 const MCInstrDesc &II = TII.get(AArch64::BR);
2520 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2521 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2522
2523 // Make sure the CFG is up-to-date.
2524 for (const auto *Succ : BI->successors())
2525 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2526
2527 return true;
2528}
2529
2530bool AArch64FastISel::selectCmp(const Instruction *I) {
2531 const CmpInst *CI = cast<CmpInst>(I);
2532
2533 // Vectors of i1 are weird: bail out.
2534 if (CI->getType()->isVectorTy())
2535 return false;
2536
2537 // Try to optimize or fold the cmp.
2538 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2539 unsigned ResultReg = 0;
2540 switch (Predicate) {
2541 default:
2542 break;
2544 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2545 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2546 TII.get(TargetOpcode::COPY), ResultReg)
2547 .addReg(AArch64::WZR, getKillRegState(true));
2548 break;
2549 case CmpInst::FCMP_TRUE:
2550 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2551 break;
2552 }
2553
2554 if (ResultReg) {
2555 updateValueMap(I, ResultReg);
2556 return true;
2557 }
2558
2559 // Emit the cmp.
2560 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2561 return false;
2562
2563 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2564
2565 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2566 // condition codes are inverted, because they are used by CSINC.
2567 static unsigned CondCodeTable[2][2] = {
2570 };
2571 unsigned *CondCodes = nullptr;
2572 switch (Predicate) {
2573 default:
2574 break;
2575 case CmpInst::FCMP_UEQ:
2576 CondCodes = &CondCodeTable[0][0];
2577 break;
2578 case CmpInst::FCMP_ONE:
2579 CondCodes = &CondCodeTable[1][0];
2580 break;
2581 }
2582
2583 if (CondCodes) {
2584 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2585 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2586 TmpReg1)
2587 .addReg(AArch64::WZR, getKillRegState(true))
2588 .addReg(AArch64::WZR, getKillRegState(true))
2589 .addImm(CondCodes[0]);
2590 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2591 ResultReg)
2592 .addReg(TmpReg1, getKillRegState(true))
2593 .addReg(AArch64::WZR, getKillRegState(true))
2594 .addImm(CondCodes[1]);
2595
2596 updateValueMap(I, ResultReg);
2597 return true;
2598 }
2599
2600 // Now set a register based on the comparison.
2601 AArch64CC::CondCode CC = getCompareCC(Predicate);
2602 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2603 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2604 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2605 ResultReg)
2606 .addReg(AArch64::WZR, getKillRegState(true))
2607 .addReg(AArch64::WZR, getKillRegState(true))
2608 .addImm(invertedCC);
2609
2610 updateValueMap(I, ResultReg);
2611 return true;
2612}
2613
2614/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2615/// value.
2616bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2617 if (!SI->getType()->isIntegerTy(1))
2618 return false;
2619
2620 const Value *Src1Val, *Src2Val;
2621 unsigned Opc = 0;
2622 bool NeedExtraOp = false;
2623 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2624 if (CI->isOne()) {
2625 Src1Val = SI->getCondition();
2626 Src2Val = SI->getFalseValue();
2627 Opc = AArch64::ORRWrr;
2628 } else {
2629 assert(CI->isZero());
2630 Src1Val = SI->getFalseValue();
2631 Src2Val = SI->getCondition();
2632 Opc = AArch64::BICWrr;
2633 }
2634 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2635 if (CI->isOne()) {
2636 Src1Val = SI->getCondition();
2637 Src2Val = SI->getTrueValue();
2638 Opc = AArch64::ORRWrr;
2639 NeedExtraOp = true;
2640 } else {
2641 assert(CI->isZero());
2642 Src1Val = SI->getCondition();
2643 Src2Val = SI->getTrueValue();
2644 Opc = AArch64::ANDWrr;
2645 }
2646 }
2647
2648 if (!Opc)
2649 return false;
2650
2651 Register Src1Reg = getRegForValue(Src1Val);
2652 if (!Src1Reg)
2653 return false;
2654
2655 Register Src2Reg = getRegForValue(Src2Val);
2656 if (!Src2Reg)
2657 return false;
2658
2659 if (NeedExtraOp)
2660 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2661
2662 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2663 Src2Reg);
2664 updateValueMap(SI, ResultReg);
2665 return true;
2666}
2667
2668bool AArch64FastISel::selectSelect(const Instruction *I) {
2669 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2670 MVT VT;
2671 if (!isTypeSupported(I->getType(), VT))
2672 return false;
2673
2674 unsigned Opc;
2675 const TargetRegisterClass *RC;
2676 switch (VT.SimpleTy) {
2677 default:
2678 return false;
2679 case MVT::i1:
2680 case MVT::i8:
2681 case MVT::i16:
2682 case MVT::i32:
2683 Opc = AArch64::CSELWr;
2684 RC = &AArch64::GPR32RegClass;
2685 break;
2686 case MVT::i64:
2687 Opc = AArch64::CSELXr;
2688 RC = &AArch64::GPR64RegClass;
2689 break;
2690 case MVT::f32:
2691 Opc = AArch64::FCSELSrrr;
2692 RC = &AArch64::FPR32RegClass;
2693 break;
2694 case MVT::f64:
2695 Opc = AArch64::FCSELDrrr;
2696 RC = &AArch64::FPR64RegClass;
2697 break;
2698 }
2699
2700 const SelectInst *SI = cast<SelectInst>(I);
2701 const Value *Cond = SI->getCondition();
2704
2705 if (optimizeSelect(SI))
2706 return true;
2707
2708 // Try to pickup the flags, so we don't have to emit another compare.
2709 if (foldXALUIntrinsic(CC, I, Cond)) {
2710 // Fake request the condition to force emission of the XALU intrinsic.
2711 Register CondReg = getRegForValue(Cond);
2712 if (!CondReg)
2713 return false;
2714 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2715 isValueAvailable(Cond)) {
2716 const auto *Cmp = cast<CmpInst>(Cond);
2717 // Try to optimize or fold the cmp.
2718 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2719 const Value *FoldSelect = nullptr;
2720 switch (Predicate) {
2721 default:
2722 break;
2724 FoldSelect = SI->getFalseValue();
2725 break;
2726 case CmpInst::FCMP_TRUE:
2727 FoldSelect = SI->getTrueValue();
2728 break;
2729 }
2730
2731 if (FoldSelect) {
2732 Register SrcReg = getRegForValue(FoldSelect);
2733 if (!SrcReg)
2734 return false;
2735
2736 updateValueMap(I, SrcReg);
2737 return true;
2738 }
2739
2740 // Emit the cmp.
2741 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2742 return false;
2743
2744 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2745 CC = getCompareCC(Predicate);
2746 switch (Predicate) {
2747 default:
2748 break;
2749 case CmpInst::FCMP_UEQ:
2750 ExtraCC = AArch64CC::EQ;
2751 CC = AArch64CC::VS;
2752 break;
2753 case CmpInst::FCMP_ONE:
2754 ExtraCC = AArch64CC::MI;
2755 CC = AArch64CC::GT;
2756 break;
2757 }
2758 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2759 } else {
2760 Register CondReg = getRegForValue(Cond);
2761 if (!CondReg)
2762 return false;
2763
2764 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2765 CondReg = constrainOperandRegClass(II, CondReg, 1);
2766
2767 // Emit a TST instruction (ANDS wzr, reg, #imm).
2768 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2769 AArch64::WZR)
2770 .addReg(CondReg)
2772 }
2773
2774 Register Src1Reg = getRegForValue(SI->getTrueValue());
2775 Register Src2Reg = getRegForValue(SI->getFalseValue());
2776
2777 if (!Src1Reg || !Src2Reg)
2778 return false;
2779
2780 if (ExtraCC != AArch64CC::AL)
2781 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2782
2783 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2784 updateValueMap(I, ResultReg);
2785 return true;
2786}
2787
2788bool AArch64FastISel::selectFPExt(const Instruction *I) {
2789 Value *V = I->getOperand(0);
2790 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2791 return false;
2792
2793 Register Op = getRegForValue(V);
2794 if (Op == 0)
2795 return false;
2796
2797 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2798 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2799 ResultReg).addReg(Op);
2800 updateValueMap(I, ResultReg);
2801 return true;
2802}
2803
2804bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2805 Value *V = I->getOperand(0);
2806 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2807 return false;
2808
2809 Register Op = getRegForValue(V);
2810 if (Op == 0)
2811 return false;
2812
2813 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2814 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2815 ResultReg).addReg(Op);
2816 updateValueMap(I, ResultReg);
2817 return true;
2818}
2819
2820// FPToUI and FPToSI
2821bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2822 MVT DestVT;
2823 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2824 return false;
2825
2826 Register SrcReg = getRegForValue(I->getOperand(0));
2827 if (SrcReg == 0)
2828 return false;
2829
2830 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2831 if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2832 return false;
2833
2834 unsigned Opc;
2835 if (SrcVT == MVT::f64) {
2836 if (Signed)
2837 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2838 else
2839 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2840 } else {
2841 if (Signed)
2842 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2843 else
2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2845 }
2846 Register ResultReg = createResultReg(
2847 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2848 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2849 .addReg(SrcReg);
2850 updateValueMap(I, ResultReg);
2851 return true;
2852}
2853
2854bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2855 MVT DestVT;
2856 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2857 return false;
2858 // Let regular ISEL handle FP16
2859 if (DestVT == MVT::f16)
2860 return false;
2861
2862 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2863 "Unexpected value type.");
2864
2865 Register SrcReg = getRegForValue(I->getOperand(0));
2866 if (!SrcReg)
2867 return false;
2868
2869 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2870
2871 // Handle sign-extension.
2872 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2873 SrcReg =
2874 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2875 if (!SrcReg)
2876 return false;
2877 }
2878
2879 unsigned Opc;
2880 if (SrcVT == MVT::i64) {
2881 if (Signed)
2882 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2883 else
2884 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2885 } else {
2886 if (Signed)
2887 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2888 else
2889 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2890 }
2891
2892 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2893 updateValueMap(I, ResultReg);
2894 return true;
2895}
2896
2897bool AArch64FastISel::fastLowerArguments() {
2898 if (!FuncInfo.CanLowerReturn)
2899 return false;
2900
2901 const Function *F = FuncInfo.Fn;
2902 if (F->isVarArg())
2903 return false;
2904
2905 CallingConv::ID CC = F->getCallingConv();
2907 return false;
2908
2909 if (Subtarget->hasCustomCallingConv())
2910 return false;
2911
2912 // Only handle simple cases of up to 8 GPR and FPR each.
2913 unsigned GPRCnt = 0;
2914 unsigned FPRCnt = 0;
2915 for (auto const &Arg : F->args()) {
2916 if (Arg.hasAttribute(Attribute::ByVal) ||
2917 Arg.hasAttribute(Attribute::InReg) ||
2918 Arg.hasAttribute(Attribute::StructRet) ||
2919 Arg.hasAttribute(Attribute::SwiftSelf) ||
2920 Arg.hasAttribute(Attribute::SwiftAsync) ||
2921 Arg.hasAttribute(Attribute::SwiftError) ||
2922 Arg.hasAttribute(Attribute::Nest))
2923 return false;
2924
2925 Type *ArgTy = Arg.getType();
2926 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2927 return false;
2928
2929 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2930 if (!ArgVT.isSimple())
2931 return false;
2932
2933 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2934 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2935 return false;
2936
2937 if (VT.isVector() &&
2938 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2939 return false;
2940
2941 if (VT >= MVT::i1 && VT <= MVT::i64)
2942 ++GPRCnt;
2943 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2944 VT.is128BitVector())
2945 ++FPRCnt;
2946 else
2947 return false;
2948
2949 if (GPRCnt > 8 || FPRCnt > 8)
2950 return false;
2951 }
2952
2953 static const MCPhysReg Registers[6][8] = {
2954 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2955 AArch64::W5, AArch64::W6, AArch64::W7 },
2956 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2957 AArch64::X5, AArch64::X6, AArch64::X7 },
2958 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2959 AArch64::H5, AArch64::H6, AArch64::H7 },
2960 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2961 AArch64::S5, AArch64::S6, AArch64::S7 },
2962 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2963 AArch64::D5, AArch64::D6, AArch64::D7 },
2964 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2965 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2966 };
2967
2968 unsigned GPRIdx = 0;
2969 unsigned FPRIdx = 0;
2970 for (auto const &Arg : F->args()) {
2971 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2972 unsigned SrcReg;
2973 const TargetRegisterClass *RC;
2974 if (VT >= MVT::i1 && VT <= MVT::i32) {
2975 SrcReg = Registers[0][GPRIdx++];
2976 RC = &AArch64::GPR32RegClass;
2977 VT = MVT::i32;
2978 } else if (VT == MVT::i64) {
2979 SrcReg = Registers[1][GPRIdx++];
2980 RC = &AArch64::GPR64RegClass;
2981 } else if (VT == MVT::f16) {
2982 SrcReg = Registers[2][FPRIdx++];
2983 RC = &AArch64::FPR16RegClass;
2984 } else if (VT == MVT::f32) {
2985 SrcReg = Registers[3][FPRIdx++];
2986 RC = &AArch64::FPR32RegClass;
2987 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2988 SrcReg = Registers[4][FPRIdx++];
2989 RC = &AArch64::FPR64RegClass;
2990 } else if (VT.is128BitVector()) {
2991 SrcReg = Registers[5][FPRIdx++];
2992 RC = &AArch64::FPR128RegClass;
2993 } else
2994 llvm_unreachable("Unexpected value type.");
2995
2996 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2997 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2998 // Without this, EmitLiveInCopies may eliminate the livein if its only
2999 // use is a bitcast (which isn't turned into an instruction).
3000 Register ResultReg = createResultReg(RC);
3001 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3002 TII.get(TargetOpcode::COPY), ResultReg)
3003 .addReg(DstReg, getKillRegState(true));
3004 updateValueMap(&Arg, ResultReg);
3005 }
3006 return true;
3007}
3008
3009bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3010 SmallVectorImpl<MVT> &OutVTs,
3011 unsigned &NumBytes) {
3012 CallingConv::ID CC = CLI.CallConv;
3014 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3015 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3016
3017 // Get a count of how many bytes are to be pushed on the stack.
3018 NumBytes = CCInfo.getStackSize();
3019
3020 // Issue CALLSEQ_START
3021 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3022 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3023 .addImm(NumBytes).addImm(0);
3024
3025 // Process the args.
3026 for (CCValAssign &VA : ArgLocs) {
3027 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3028 MVT ArgVT = OutVTs[VA.getValNo()];
3029
3030 Register ArgReg = getRegForValue(ArgVal);
3031 if (!ArgReg)
3032 return false;
3033
3034 // Handle arg promotion: SExt, ZExt, AExt.
3035 switch (VA.getLocInfo()) {
3036 case CCValAssign::Full:
3037 break;
3038 case CCValAssign::SExt: {
3039 MVT DestVT = VA.getLocVT();
3040 MVT SrcVT = ArgVT;
3041 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3042 if (!ArgReg)
3043 return false;
3044 break;
3045 }
3046 case CCValAssign::AExt:
3047 // Intentional fall-through.
3048 case CCValAssign::ZExt: {
3049 MVT DestVT = VA.getLocVT();
3050 MVT SrcVT = ArgVT;
3051 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3052 if (!ArgReg)
3053 return false;
3054 break;
3055 }
3056 default:
3057 llvm_unreachable("Unknown arg promotion!");
3058 }
3059
3060 // Now copy/store arg to correct locations.
3061 if (VA.isRegLoc() && !VA.needsCustom()) {
3062 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3063 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3064 CLI.OutRegs.push_back(VA.getLocReg());
3065 } else if (VA.needsCustom()) {
3066 // FIXME: Handle custom args.
3067 return false;
3068 } else {
3069 assert(VA.isMemLoc() && "Assuming store on stack.");
3070
3071 // Don't emit stores for undef values.
3072 if (isa<UndefValue>(ArgVal))
3073 continue;
3074
3075 // Need to store on the stack.
3076 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3077
3078 unsigned BEAlign = 0;
3079 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3080 BEAlign = 8 - ArgSize;
3081
3082 Address Addr;
3083 Addr.setKind(Address::RegBase);
3084 Addr.setReg(AArch64::SP);
3085 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3086
3087 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3088 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3089 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3090 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3091
3092 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3093 return false;
3094 }
3095 }
3096 return true;
3097}
3098
3099bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3100 CallingConv::ID CC = CLI.CallConv;
3101
3102 // Issue CALLSEQ_END
3103 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3104 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3105 .addImm(NumBytes).addImm(0);
3106
3107 // Now the return values.
3109 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3110 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3111
3112 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3113 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3114 CCValAssign &VA = RVLocs[i];
3115 MVT CopyVT = VA.getValVT();
3116 unsigned CopyReg = ResultReg + i;
3117
3118 // TODO: Handle big-endian results
3119 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3120 return false;
3121
3122 // Copy result out of their specified physreg.
3123 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3124 CopyReg)
3125 .addReg(VA.getLocReg());
3126 CLI.InRegs.push_back(VA.getLocReg());
3127 }
3128
3129 CLI.ResultReg = ResultReg;
3130 CLI.NumResultRegs = RVLocs.size();
3131
3132 return true;
3133}
3134
3135bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3136 CallingConv::ID CC = CLI.CallConv;
3137 bool IsTailCall = CLI.IsTailCall;
3138 bool IsVarArg = CLI.IsVarArg;
3139 const Value *Callee = CLI.Callee;
3140 MCSymbol *Symbol = CLI.Symbol;
3141
3142 if (!Callee && !Symbol)
3143 return false;
3144
3145 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3146 // a bti instruction following the call.
3147 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3148 !Subtarget->noBTIAtReturnTwice() &&
3150 return false;
3151
3152 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3153 if (CLI.CB && CLI.CB->isIndirectCall() &&
3154 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3155 return false;
3156
3157 // Allow SelectionDAG isel to handle tail calls.
3158 if (IsTailCall)
3159 return false;
3160
3161 // FIXME: we could and should support this, but for now correctness at -O0 is
3162 // more important.
3163 if (Subtarget->isTargetILP32())
3164 return false;
3165
3166 CodeModel::Model CM = TM.getCodeModel();
3167 // Only support the small-addressing and large code models.
3168 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3169 return false;
3170
3171 // FIXME: Add large code model support for ELF.
3172 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3173 return false;
3174
3175 // Let SDISel handle vararg functions.
3176 if (IsVarArg)
3177 return false;
3178
3179 if (Subtarget->isWindowsArm64EC())
3180 return false;
3181
3182 for (auto Flag : CLI.OutFlags)
3183 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3184 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3185 return false;
3186
3187 // Set up the argument vectors.
3188 SmallVector<MVT, 16> OutVTs;
3189 OutVTs.reserve(CLI.OutVals.size());
3190
3191 for (auto *Val : CLI.OutVals) {
3192 MVT VT;
3193 if (!isTypeLegal(Val->getType(), VT) &&
3194 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3195 return false;
3196
3197 // We don't handle vector parameters yet.
3198 if (VT.isVector() || VT.getSizeInBits() > 64)
3199 return false;
3200
3201 OutVTs.push_back(VT);
3202 }
3203
3204 Address Addr;
3205 if (Callee && !computeCallAddress(Callee, Addr))
3206 return false;
3207
3208 // The weak function target may be zero; in that case we must use indirect
3209 // addressing via a stub on windows as it may be out of range for a
3210 // PC-relative jump.
3211 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3212 Addr.getGlobalValue()->hasExternalWeakLinkage())
3213 return false;
3214
3215 // Handle the arguments now that we've gotten them.
3216 unsigned NumBytes;
3217 if (!processCallArgs(CLI, OutVTs, NumBytes))
3218 return false;
3219
3220 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3221 if (RegInfo->isAnyArgRegReserved(*MF))
3222 RegInfo->emitReservedArgRegCallError(*MF);
3223
3224 // Issue the call.
3226 if (Subtarget->useSmallAddressing()) {
3227 const MCInstrDesc &II =
3228 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3229 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3230 if (Symbol)
3231 MIB.addSym(Symbol, 0);
3232 else if (Addr.getGlobalValue())
3233 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3234 else if (Addr.getReg()) {
3235 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3236 MIB.addReg(Reg);
3237 } else
3238 return false;
3239 } else {
3240 unsigned CallReg = 0;
3241 if (Symbol) {
3242 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3243 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3244 ADRPReg)
3246
3247 CallReg = createResultReg(&AArch64::GPR64RegClass);
3248 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3249 TII.get(AArch64::LDRXui), CallReg)
3250 .addReg(ADRPReg)
3251 .addSym(Symbol,
3253 } else if (Addr.getGlobalValue())
3254 CallReg = materializeGV(Addr.getGlobalValue());
3255 else if (Addr.getReg())
3256 CallReg = Addr.getReg();
3257
3258 if (!CallReg)
3259 return false;
3260
3261 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3262 CallReg = constrainOperandRegClass(II, CallReg, 0);
3263 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3264 }
3265
3266 // Add implicit physical register uses to the call.
3267 for (auto Reg : CLI.OutRegs)
3268 MIB.addReg(Reg, RegState::Implicit);
3269
3270 // Add a register mask with the call-preserved registers.
3271 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3272 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3273
3274 CLI.Call = MIB;
3275
3276 // Finish off the call including any return values.
3277 return finishCall(CLI, NumBytes);
3278}
3279
3280bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3281 if (Alignment)
3282 return Len / Alignment->value() <= 4;
3283 else
3284 return Len < 32;
3285}
3286
3287bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3288 uint64_t Len, MaybeAlign Alignment) {
3289 // Make sure we don't bloat code by inlining very large memcpy's.
3290 if (!isMemCpySmall(Len, Alignment))
3291 return false;
3292
3293 int64_t UnscaledOffset = 0;
3294 Address OrigDest = Dest;
3295 Address OrigSrc = Src;
3296
3297 while (Len) {
3298 MVT VT;
3299 if (!Alignment || *Alignment >= 8) {
3300 if (Len >= 8)
3301 VT = MVT::i64;
3302 else if (Len >= 4)
3303 VT = MVT::i32;
3304 else if (Len >= 2)
3305 VT = MVT::i16;
3306 else {
3307 VT = MVT::i8;
3308 }
3309 } else {
3310 assert(Alignment && "Alignment is set in this branch");
3311 // Bound based on alignment.
3312 if (Len >= 4 && *Alignment == 4)
3313 VT = MVT::i32;
3314 else if (Len >= 2 && *Alignment == 2)
3315 VT = MVT::i16;
3316 else {
3317 VT = MVT::i8;
3318 }
3319 }
3320
3321 unsigned ResultReg = emitLoad(VT, VT, Src);
3322 if (!ResultReg)
3323 return false;
3324
3325 if (!emitStore(VT, ResultReg, Dest))
3326 return false;
3327
3328 int64_t Size = VT.getSizeInBits() / 8;
3329 Len -= Size;
3330 UnscaledOffset += Size;
3331
3332 // We need to recompute the unscaled offset for each iteration.
3333 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3334 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3335 }
3336
3337 return true;
3338}
3339
3340/// Check if it is possible to fold the condition from the XALU intrinsic
3341/// into the user. The condition code will only be updated on success.
3342bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3343 const Instruction *I,
3344 const Value *Cond) {
3345 if (!isa<ExtractValueInst>(Cond))
3346 return false;
3347
3348 const auto *EV = cast<ExtractValueInst>(Cond);
3349 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3350 return false;
3351
3352 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3353 MVT RetVT;
3354 const Function *Callee = II->getCalledFunction();
3355 Type *RetTy =
3356 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3357 if (!isTypeLegal(RetTy, RetVT))
3358 return false;
3359
3360 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3361 return false;
3362
3363 const Value *LHS = II->getArgOperand(0);
3364 const Value *RHS = II->getArgOperand(1);
3365
3366 // Canonicalize immediate to the RHS.
3367 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3368 std::swap(LHS, RHS);
3369
3370 // Simplify multiplies.
3371 Intrinsic::ID IID = II->getIntrinsicID();
3372 switch (IID) {
3373 default:
3374 break;
3375 case Intrinsic::smul_with_overflow:
3376 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3377 if (C->getValue() == 2)
3378 IID = Intrinsic::sadd_with_overflow;
3379 break;
3380 case Intrinsic::umul_with_overflow:
3381 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3382 if (C->getValue() == 2)
3383 IID = Intrinsic::uadd_with_overflow;
3384 break;
3385 }
3386
3387 AArch64CC::CondCode TmpCC;
3388 switch (IID) {
3389 default:
3390 return false;
3391 case Intrinsic::sadd_with_overflow:
3392 case Intrinsic::ssub_with_overflow:
3393 TmpCC = AArch64CC::VS;
3394 break;
3395 case Intrinsic::uadd_with_overflow:
3396 TmpCC = AArch64CC::HS;
3397 break;
3398 case Intrinsic::usub_with_overflow:
3399 TmpCC = AArch64CC::LO;
3400 break;
3401 case Intrinsic::smul_with_overflow:
3402 case Intrinsic::umul_with_overflow:
3403 TmpCC = AArch64CC::NE;
3404 break;
3405 }
3406
3407 // Check if both instructions are in the same basic block.
3408 if (!isValueAvailable(II))
3409 return false;
3410
3411 // Make sure nothing is in the way
3414 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3415 // We only expect extractvalue instructions between the intrinsic and the
3416 // instruction to be selected.
3417 if (!isa<ExtractValueInst>(Itr))
3418 return false;
3419
3420 // Check that the extractvalue operand comes from the intrinsic.
3421 const auto *EVI = cast<ExtractValueInst>(Itr);
3422 if (EVI->getAggregateOperand() != II)
3423 return false;
3424 }
3425
3426 CC = TmpCC;
3427 return true;
3428}
3429
3430bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3431 // FIXME: Handle more intrinsics.
3432 switch (II->getIntrinsicID()) {
3433 default: return false;
3434 case Intrinsic::frameaddress: {
3435 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3436 MFI.setFrameAddressIsTaken(true);
3437
3438 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3439 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3440 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3441 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3442 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3443 // Recursively load frame address
3444 // ldr x0, [fp]
3445 // ldr x0, [x0]
3446 // ldr x0, [x0]
3447 // ...
3448 unsigned DestReg;
3449 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3450 while (Depth--) {
3451 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3452 SrcReg, 0);
3453 assert(DestReg && "Unexpected LDR instruction emission failure.");
3454 SrcReg = DestReg;
3455 }
3456
3457 updateValueMap(II, SrcReg);
3458 return true;
3459 }
3460 case Intrinsic::sponentry: {
3461 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3462
3463 // SP = FP + Fixed Object + 16
3464 int FI = MFI.CreateFixedObject(4, 0, false);
3465 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3466 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3467 TII.get(AArch64::ADDXri), ResultReg)
3468 .addFrameIndex(FI)
3469 .addImm(0)
3470 .addImm(0);
3471
3472 updateValueMap(II, ResultReg);
3473 return true;
3474 }
3475 case Intrinsic::memcpy:
3476 case Intrinsic::memmove: {
3477 const auto *MTI = cast<MemTransferInst>(II);
3478 // Don't handle volatile.
3479 if (MTI->isVolatile())
3480 return false;
3481
3482 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3483 // we would emit dead code because we don't currently handle memmoves.
3484 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3485 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3486 // Small memcpy's are common enough that we want to do them without a call
3487 // if possible.
3488 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3489 MaybeAlign Alignment;
3490 if (MTI->getDestAlign() || MTI->getSourceAlign())
3491 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3492 MTI->getSourceAlign().valueOrOne());
3493 if (isMemCpySmall(Len, Alignment)) {
3494 Address Dest, Src;
3495 if (!computeAddress(MTI->getRawDest(), Dest) ||
3496 !computeAddress(MTI->getRawSource(), Src))
3497 return false;
3498 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3499 return true;
3500 }
3501 }
3502
3503 if (!MTI->getLength()->getType()->isIntegerTy(64))
3504 return false;
3505
3506 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3507 // Fast instruction selection doesn't support the special
3508 // address spaces.
3509 return false;
3510
3511 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3512 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3513 }
3514 case Intrinsic::memset: {
3515 const MemSetInst *MSI = cast<MemSetInst>(II);
3516 // Don't handle volatile.
3517 if (MSI->isVolatile())
3518 return false;
3519
3520 if (!MSI->getLength()->getType()->isIntegerTy(64))
3521 return false;
3522
3523 if (MSI->getDestAddressSpace() > 255)
3524 // Fast instruction selection doesn't support the special
3525 // address spaces.
3526 return false;
3527
3528 return lowerCallTo(II, "memset", II->arg_size() - 1);
3529 }
3530 case Intrinsic::sin:
3531 case Intrinsic::cos:
3532 case Intrinsic::pow: {
3533 MVT RetVT;
3534 if (!isTypeLegal(II->getType(), RetVT))
3535 return false;
3536
3537 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3538 return false;
3539
3540 static const RTLIB::Libcall LibCallTable[3][2] = {
3541 { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3542 { RTLIB::COS_F32, RTLIB::COS_F64 },
3543 { RTLIB::POW_F32, RTLIB::POW_F64 }
3544 };
3545 RTLIB::Libcall LC;
3546 bool Is64Bit = RetVT == MVT::f64;
3547 switch (II->getIntrinsicID()) {
3548 default:
3549 llvm_unreachable("Unexpected intrinsic.");
3550 case Intrinsic::sin:
3551 LC = LibCallTable[0][Is64Bit];
3552 break;
3553 case Intrinsic::cos:
3554 LC = LibCallTable[1][Is64Bit];
3555 break;
3556 case Intrinsic::pow:
3557 LC = LibCallTable[2][Is64Bit];
3558 break;
3559 }
3560
3561 ArgListTy Args;
3562 Args.reserve(II->arg_size());
3563
3564 // Populate the argument list.
3565 for (auto &Arg : II->args()) {
3566 ArgListEntry Entry;
3567 Entry.Val = Arg;
3568 Entry.Ty = Arg->getType();
3569 Args.push_back(Entry);
3570 }
3571
3572 CallLoweringInfo CLI;
3573 MCContext &Ctx = MF->getContext();
3574 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3575 TLI.getLibcallName(LC), std::move(Args));
3576 if (!lowerCallTo(CLI))
3577 return false;
3578 updateValueMap(II, CLI.ResultReg);
3579 return true;
3580 }
3581 case Intrinsic::fabs: {
3582 MVT VT;
3583 if (!isTypeLegal(II->getType(), VT))
3584 return false;
3585
3586 unsigned Opc;
3587 switch (VT.SimpleTy) {
3588 default:
3589 return false;
3590 case MVT::f32:
3591 Opc = AArch64::FABSSr;
3592 break;
3593 case MVT::f64:
3594 Opc = AArch64::FABSDr;
3595 break;
3596 }
3597 Register SrcReg = getRegForValue(II->getOperand(0));
3598 if (!SrcReg)
3599 return false;
3600 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3601 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3602 .addReg(SrcReg);
3603 updateValueMap(II, ResultReg);
3604 return true;
3605 }
3606 case Intrinsic::trap:
3607 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3608 .addImm(1);
3609 return true;
3610 case Intrinsic::debugtrap:
3611 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3612 .addImm(0xF000);
3613 return true;
3614
3615 case Intrinsic::sqrt: {
3617
3618 MVT VT;
3619 if (!isTypeLegal(RetTy, VT))
3620 return false;
3621
3622 Register Op0Reg = getRegForValue(II->getOperand(0));
3623 if (!Op0Reg)
3624 return false;
3625
3626 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3627 if (!ResultReg)
3628 return false;
3629
3630 updateValueMap(II, ResultReg);
3631 return true;
3632 }
3633 case Intrinsic::sadd_with_overflow:
3634 case Intrinsic::uadd_with_overflow:
3635 case Intrinsic::ssub_with_overflow:
3636 case Intrinsic::usub_with_overflow:
3637 case Intrinsic::smul_with_overflow:
3638 case Intrinsic::umul_with_overflow: {
3639 // This implements the basic lowering of the xalu with overflow intrinsics.
3640 const Function *Callee = II->getCalledFunction();
3641 auto *Ty = cast<StructType>(Callee->getReturnType());
3642 Type *RetTy = Ty->getTypeAtIndex(0U);
3643
3644 MVT VT;
3645 if (!isTypeLegal(RetTy, VT))
3646 return false;
3647
3648 if (VT != MVT::i32 && VT != MVT::i64)
3649 return false;
3650
3651 const Value *LHS = II->getArgOperand(0);
3652 const Value *RHS = II->getArgOperand(1);
3653 // Canonicalize immediate to the RHS.
3654 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3655 std::swap(LHS, RHS);
3656
3657 // Simplify multiplies.
3658 Intrinsic::ID IID = II->getIntrinsicID();
3659 switch (IID) {
3660 default:
3661 break;
3662 case Intrinsic::smul_with_overflow:
3663 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3664 if (C->getValue() == 2) {
3665 IID = Intrinsic::sadd_with_overflow;
3666 RHS = LHS;
3667 }
3668 break;
3669 case Intrinsic::umul_with_overflow:
3670 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3671 if (C->getValue() == 2) {
3672 IID = Intrinsic::uadd_with_overflow;
3673 RHS = LHS;
3674 }
3675 break;
3676 }
3677
3678 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3680 switch (IID) {
3681 default: llvm_unreachable("Unexpected intrinsic!");
3682 case Intrinsic::sadd_with_overflow:
3683 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3684 CC = AArch64CC::VS;
3685 break;
3686 case Intrinsic::uadd_with_overflow:
3687 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3688 CC = AArch64CC::HS;
3689 break;
3690 case Intrinsic::ssub_with_overflow:
3691 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3692 CC = AArch64CC::VS;
3693 break;
3694 case Intrinsic::usub_with_overflow:
3695 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3696 CC = AArch64CC::LO;
3697 break;
3698 case Intrinsic::smul_with_overflow: {
3699 CC = AArch64CC::NE;
3700 Register LHSReg = getRegForValue(LHS);
3701 if (!LHSReg)
3702 return false;
3703
3704 Register RHSReg = getRegForValue(RHS);
3705 if (!RHSReg)
3706 return false;
3707
3708 if (VT == MVT::i32) {
3709 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3710 Register MulSubReg =
3711 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3712 // cmp xreg, wreg, sxtw
3713 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3714 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3715 /*WantResult=*/false);
3716 MulReg = MulSubReg;
3717 } else {
3718 assert(VT == MVT::i64 && "Unexpected value type.");
3719 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3720 // reused in the next instruction.
3721 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3722 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3723 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3724 /*WantResult=*/false);
3725 }
3726 break;
3727 }
3728 case Intrinsic::umul_with_overflow: {
3729 CC = AArch64CC::NE;
3730 Register LHSReg = getRegForValue(LHS);
3731 if (!LHSReg)
3732 return false;
3733
3734 Register RHSReg = getRegForValue(RHS);
3735 if (!RHSReg)
3736 return false;
3737
3738 if (VT == MVT::i32) {
3739 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3740 // tst xreg, #0xffffffff00000000
3741 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3742 TII.get(AArch64::ANDSXri), AArch64::XZR)
3743 .addReg(MulReg)
3744 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3745 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3746 } else {
3747 assert(VT == MVT::i64 && "Unexpected value type.");
3748 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3749 // reused in the next instruction.
3750 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3751 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3752 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3753 }
3754 break;
3755 }
3756 }
3757
3758 if (MulReg) {
3759 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3760 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3761 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3762 }
3763
3764 if (!ResultReg1)
3765 return false;
3766
3767 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3768 AArch64::WZR, AArch64::WZR,
3769 getInvertedCondCode(CC));
3770 (void)ResultReg2;
3771 assert((ResultReg1 + 1) == ResultReg2 &&
3772 "Nonconsecutive result registers.");
3773 updateValueMap(II, ResultReg1, 2);
3774 return true;
3775 }
3776 case Intrinsic::aarch64_crc32b:
3777 case Intrinsic::aarch64_crc32h:
3778 case Intrinsic::aarch64_crc32w:
3779 case Intrinsic::aarch64_crc32x:
3780 case Intrinsic::aarch64_crc32cb:
3781 case Intrinsic::aarch64_crc32ch:
3782 case Intrinsic::aarch64_crc32cw:
3783 case Intrinsic::aarch64_crc32cx: {
3784 if (!Subtarget->hasCRC())
3785 return false;
3786
3787 unsigned Opc;
3788 switch (II->getIntrinsicID()) {
3789 default:
3790 llvm_unreachable("Unexpected intrinsic!");
3791 case Intrinsic::aarch64_crc32b:
3792 Opc = AArch64::CRC32Brr;
3793 break;
3794 case Intrinsic::aarch64_crc32h:
3795 Opc = AArch64::CRC32Hrr;
3796 break;
3797 case Intrinsic::aarch64_crc32w:
3798 Opc = AArch64::CRC32Wrr;
3799 break;
3800 case Intrinsic::aarch64_crc32x:
3801 Opc = AArch64::CRC32Xrr;
3802 break;
3803 case Intrinsic::aarch64_crc32cb:
3804 Opc = AArch64::CRC32CBrr;
3805 break;
3806 case Intrinsic::aarch64_crc32ch:
3807 Opc = AArch64::CRC32CHrr;
3808 break;
3809 case Intrinsic::aarch64_crc32cw:
3810 Opc = AArch64::CRC32CWrr;
3811 break;
3812 case Intrinsic::aarch64_crc32cx:
3813 Opc = AArch64::CRC32CXrr;
3814 break;
3815 }
3816
3817 Register LHSReg = getRegForValue(II->getArgOperand(0));
3818 Register RHSReg = getRegForValue(II->getArgOperand(1));
3819 if (!LHSReg || !RHSReg)
3820 return false;
3821
3822 Register ResultReg =
3823 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3824 updateValueMap(II, ResultReg);
3825 return true;
3826 }
3827 }
3828 return false;
3829}
3830
3831bool AArch64FastISel::selectRet(const Instruction *I) {
3832 const ReturnInst *Ret = cast<ReturnInst>(I);
3833 const Function &F = *I->getParent()->getParent();
3834
3835 if (!FuncInfo.CanLowerReturn)
3836 return false;
3837
3838 if (F.isVarArg())
3839 return false;
3840
3841 if (TLI.supportSwiftError() &&
3842 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3843 return false;
3844
3845 if (TLI.supportSplitCSR(FuncInfo.MF))
3846 return false;
3847
3848 // Build a list of return value registers.
3850
3851 if (Ret->getNumOperands() > 0) {
3852 CallingConv::ID CC = F.getCallingConv();
3854 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3855
3856 // Analyze operands of the call, assigning locations to each operand.
3858 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3859 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3860
3861 // Only handle a single return value for now.
3862 if (ValLocs.size() != 1)
3863 return false;
3864
3865 CCValAssign &VA = ValLocs[0];
3866 const Value *RV = Ret->getOperand(0);
3867
3868 // Don't bother handling odd stuff for now.
3869 if ((VA.getLocInfo() != CCValAssign::Full) &&
3870 (VA.getLocInfo() != CCValAssign::BCvt))
3871 return false;
3872
3873 // Only handle register returns for now.
3874 if (!VA.isRegLoc())
3875 return false;
3876
3877 Register Reg = getRegForValue(RV);
3878 if (Reg == 0)
3879 return false;
3880
3881 unsigned SrcReg = Reg + VA.getValNo();
3882 Register DestReg = VA.getLocReg();
3883 // Avoid a cross-class copy. This is very unlikely.
3884 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3885 return false;
3886
3887 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3888 if (!RVEVT.isSimple())
3889 return false;
3890
3891 // Vectors (of > 1 lane) in big endian need tricky handling.
3892 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3893 !Subtarget->isLittleEndian())
3894 return false;
3895
3896 MVT RVVT = RVEVT.getSimpleVT();
3897 if (RVVT == MVT::f128)
3898 return false;
3899
3900 MVT DestVT = VA.getValVT();
3901 // Special handling for extended integers.
3902 if (RVVT != DestVT) {
3903 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3904 return false;
3905
3906 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3907 return false;
3908
3909 bool IsZExt = Outs[0].Flags.isZExt();
3910 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3911 if (SrcReg == 0)
3912 return false;
3913 }
3914
3915 // "Callee" (i.e. value producer) zero extends pointers at function
3916 // boundary.
3917 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3918 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3919
3920 // Make the copy.
3921 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3922 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3923
3924 // Add register to return instruction.
3925 RetRegs.push_back(VA.getLocReg());
3926 }
3927
3928 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3929 TII.get(AArch64::RET_ReallyLR));
3930 for (unsigned RetReg : RetRegs)
3931 MIB.addReg(RetReg, RegState::Implicit);
3932 return true;
3933}
3934
3935bool AArch64FastISel::selectTrunc(const Instruction *I) {
3936 Type *DestTy = I->getType();
3937 Value *Op = I->getOperand(0);
3938 Type *SrcTy = Op->getType();
3939
3940 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3941 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3942 if (!SrcEVT.isSimple())
3943 return false;
3944 if (!DestEVT.isSimple())
3945 return false;
3946
3947 MVT SrcVT = SrcEVT.getSimpleVT();
3948 MVT DestVT = DestEVT.getSimpleVT();
3949
3950 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3951 SrcVT != MVT::i8)
3952 return false;
3953 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3954 DestVT != MVT::i1)
3955 return false;
3956
3957 Register SrcReg = getRegForValue(Op);
3958 if (!SrcReg)
3959 return false;
3960
3961 // If we're truncating from i64 to a smaller non-legal type then generate an
3962 // AND. Otherwise, we know the high bits are undefined and a truncate only
3963 // generate a COPY. We cannot mark the source register also as result
3964 // register, because this can incorrectly transfer the kill flag onto the
3965 // source register.
3966 unsigned ResultReg;
3967 if (SrcVT == MVT::i64) {
3968 uint64_t Mask = 0;
3969 switch (DestVT.SimpleTy) {
3970 default:
3971 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3972 return false;
3973 case MVT::i1:
3974 Mask = 0x1;
3975 break;
3976 case MVT::i8:
3977 Mask = 0xff;
3978 break;
3979 case MVT::i16:
3980 Mask = 0xffff;
3981 break;
3982 }
3983 // Issue an extract_subreg to get the lower 32-bits.
3984 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3985 AArch64::sub_32);
3986 // Create the AND instruction which performs the actual truncation.
3987 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3988 assert(ResultReg && "Unexpected AND instruction emission failure.");
3989 } else {
3990 ResultReg = createResultReg(&AArch64::GPR32RegClass);
3991 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3992 TII.get(TargetOpcode::COPY), ResultReg)
3993 .addReg(SrcReg);
3994 }
3995
3996 updateValueMap(I, ResultReg);
3997 return true;
3998}
3999
4000unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4001 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4002 DestVT == MVT::i64) &&
4003 "Unexpected value type.");
4004 // Handle i8 and i16 as i32.
4005 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4006 DestVT = MVT::i32;
4007
4008 if (IsZExt) {
4009 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4010 assert(ResultReg && "Unexpected AND instruction emission failure.");
4011 if (DestVT == MVT::i64) {
4012 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4013 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4014 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4015 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4016 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4017 .addImm(0)
4018 .addReg(ResultReg)
4019 .addImm(AArch64::sub_32);
4020 ResultReg = Reg64;
4021 }
4022 return ResultReg;
4023 } else {
4024 if (DestVT == MVT::i64) {
4025 // FIXME: We're SExt i1 to i64.
4026 return 0;
4027 }
4028 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4029 0, 0);
4030 }
4031}
4032
4033unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4034 unsigned Opc, ZReg;
4035 switch (RetVT.SimpleTy) {
4036 default: return 0;
4037 case MVT::i8:
4038 case MVT::i16:
4039 case MVT::i32:
4040 RetVT = MVT::i32;
4041 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4042 case MVT::i64:
4043 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4044 }
4045
4046 const TargetRegisterClass *RC =
4047 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4048 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4049}
4050
4051unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4052 if (RetVT != MVT::i64)
4053 return 0;
4054
4055 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4056 Op0, Op1, AArch64::XZR);
4057}
4058
4059unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4060 if (RetVT != MVT::i64)
4061 return 0;
4062
4063 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4064 Op0, Op1, AArch64::XZR);
4065}
4066
4067unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4068 unsigned Op1Reg) {
4069 unsigned Opc = 0;
4070 bool NeedTrunc = false;
4071 uint64_t Mask = 0;
4072 switch (RetVT.SimpleTy) {
4073 default: return 0;
4074 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4075 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4076 case MVT::i32: Opc = AArch64::LSLVWr; break;
4077 case MVT::i64: Opc = AArch64::LSLVXr; break;
4078 }
4079
4080 const TargetRegisterClass *RC =
4081 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4082 if (NeedTrunc)
4083 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4084
4085 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4086 if (NeedTrunc)
4087 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4088 return ResultReg;
4089}
4090
4091unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4092 uint64_t Shift, bool IsZExt) {
4093 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4094 "Unexpected source/return type pair.");
4095 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4096 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4097 "Unexpected source value type.");
4098 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4099 RetVT == MVT::i64) && "Unexpected return value type.");
4100
4101 bool Is64Bit = (RetVT == MVT::i64);
4102 unsigned RegSize = Is64Bit ? 64 : 32;
4103 unsigned DstBits = RetVT.getSizeInBits();
4104 unsigned SrcBits = SrcVT.getSizeInBits();
4105 const TargetRegisterClass *RC =
4106 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4107
4108 // Just emit a copy for "zero" shifts.
4109 if (Shift == 0) {
4110 if (RetVT == SrcVT) {
4111 Register ResultReg = createResultReg(RC);
4112 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4113 TII.get(TargetOpcode::COPY), ResultReg)
4114 .addReg(Op0);
4115 return ResultReg;
4116 } else
4117 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4118 }
4119
4120 // Don't deal with undefined shifts.
4121 if (Shift >= DstBits)
4122 return 0;
4123
4124 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4125 // {S|U}BFM Wd, Wn, #r, #s
4126 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4127
4128 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4129 // %2 = shl i16 %1, 4
4130 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4131 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4132 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4133 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4134
4135 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4136 // %2 = shl i16 %1, 8
4137 // Wd<32+7-24,32-24> = Wn<7:0>
4138 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4139 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4140 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4141
4142 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4143 // %2 = shl i16 %1, 12
4144 // Wd<32+3-20,32-20> = Wn<3:0>
4145 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4146 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4147 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4148
4149 unsigned ImmR = RegSize - Shift;
4150 // Limit the width to the length of the source type.
4151 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4152 static const unsigned OpcTable[2][2] = {
4153 {AArch64::SBFMWri, AArch64::SBFMXri},
4154 {AArch64::UBFMWri, AArch64::UBFMXri}
4155 };
4156 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4157 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4158 Register TmpReg = MRI.createVirtualRegister(RC);
4159 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4160 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4161 .addImm(0)
4162 .addReg(Op0)
4163 .addImm(AArch64::sub_32);
4164 Op0 = TmpReg;
4165 }
4166 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4167}
4168
4169unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4170 unsigned Op1Reg) {
4171 unsigned Opc = 0;
4172 bool NeedTrunc = false;
4173 uint64_t Mask = 0;
4174 switch (RetVT.SimpleTy) {
4175 default: return 0;
4176 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4177 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4178 case MVT::i32: Opc = AArch64::LSRVWr; break;
4179 case MVT::i64: Opc = AArch64::LSRVXr; break;
4180 }
4181
4182 const TargetRegisterClass *RC =
4183 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4184 if (NeedTrunc) {
4185 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4186 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4187 }
4188 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4189 if (NeedTrunc)
4190 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4191 return ResultReg;
4192}
4193
4194unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4195 uint64_t Shift, bool IsZExt) {
4196 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4197 "Unexpected source/return type pair.");
4198 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4199 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4200 "Unexpected source value type.");
4201 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4202 RetVT == MVT::i64) && "Unexpected return value type.");
4203
4204 bool Is64Bit = (RetVT == MVT::i64);
4205 unsigned RegSize = Is64Bit ? 64 : 32;
4206 unsigned DstBits = RetVT.getSizeInBits();
4207 unsigned SrcBits = SrcVT.getSizeInBits();
4208 const TargetRegisterClass *RC =
4209 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4210
4211 // Just emit a copy for "zero" shifts.
4212 if (Shift == 0) {
4213 if (RetVT == SrcVT) {
4214 Register ResultReg = createResultReg(RC);
4215 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4216 TII.get(TargetOpcode::COPY), ResultReg)
4217 .addReg(Op0);
4218 return ResultReg;
4219 } else
4220 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4221 }
4222
4223 // Don't deal with undefined shifts.
4224 if (Shift >= DstBits)
4225 return 0;
4226
4227 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4228 // {S|U}BFM Wd, Wn, #r, #s
4229 // Wd<s-r:0> = Wn<s:r> when r <= s
4230
4231 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4232 // %2 = lshr i16 %1, 4
4233 // Wd<7-4:0> = Wn<7:4>
4234 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4235 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4236 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4237
4238 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4239 // %2 = lshr i16 %1, 8
4240 // Wd<7-7,0> = Wn<7:7>
4241 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4242 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4243 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4244
4245 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4246 // %2 = lshr i16 %1, 12
4247 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4248 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4249 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4250 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4251
4252 if (Shift >= SrcBits && IsZExt)
4253 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4254
4255 // It is not possible to fold a sign-extend into the LShr instruction. In this
4256 // case emit a sign-extend.
4257 if (!IsZExt) {
4258 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4259 if (!Op0)
4260 return 0;
4261 SrcVT = RetVT;
4262 SrcBits = SrcVT.getSizeInBits();
4263 IsZExt = true;
4264 }
4265
4266 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4267 unsigned ImmS = SrcBits - 1;
4268 static const unsigned OpcTable[2][2] = {
4269 {AArch64::SBFMWri, AArch64::SBFMXri},
4270 {AArch64::UBFMWri, AArch64::UBFMXri}
4271 };
4272 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4273 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4274 Register TmpReg = MRI.createVirtualRegister(RC);
4275 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4276 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4277 .addImm(0)
4278 .addReg(Op0)
4279 .addImm(AArch64::sub_32);
4280 Op0 = TmpReg;
4281 }
4282 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4283}
4284
4285unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4286 unsigned Op1Reg) {
4287 unsigned Opc = 0;
4288 bool NeedTrunc = false;
4289 uint64_t Mask = 0;
4290 switch (RetVT.SimpleTy) {
4291 default: return 0;
4292 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4293 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4294 case MVT::i32: Opc = AArch64::ASRVWr; break;
4295 case MVT::i64: Opc = AArch64::ASRVXr; break;
4296 }
4297
4298 const TargetRegisterClass *RC =
4299 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4300 if (NeedTrunc) {
4301 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4302 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4303 }
4304 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4305 if (NeedTrunc)
4306 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4307 return ResultReg;
4308}
4309
4310unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4311 uint64_t Shift, bool IsZExt) {
4312 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4313 "Unexpected source/return type pair.");
4314 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4315 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4316 "Unexpected source value type.");
4317 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4318 RetVT == MVT::i64) && "Unexpected return value type.");
4319
4320 bool Is64Bit = (RetVT == MVT::i64);
4321 unsigned RegSize = Is64Bit ? 64 : 32;
4322 unsigned DstBits = RetVT.getSizeInBits();
4323 unsigned SrcBits = SrcVT.getSizeInBits();
4324 const TargetRegisterClass *RC =
4325 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4326
4327 // Just emit a copy for "zero" shifts.
4328 if (Shift == 0) {
4329 if (RetVT == SrcVT) {
4330 Register ResultReg = createResultReg(RC);
4331 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4332 TII.get(TargetOpcode::COPY), ResultReg)
4333 .addReg(Op0);
4334 return ResultReg;
4335 } else
4336 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4337 }
4338
4339 // Don't deal with undefined shifts.
4340 if (Shift >= DstBits)
4341 return 0;
4342
4343 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4344 // {S|U}BFM Wd, Wn, #r, #s
4345 // Wd<s-r:0> = Wn<s:r> when r <= s
4346
4347 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4348 // %2 = ashr i16 %1, 4
4349 // Wd<7-4:0> = Wn<7:4>
4350 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4351 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4352 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4353
4354 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4355 // %2 = ashr i16 %1, 8
4356 // Wd<7-7,0> = Wn<7:7>
4357 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4358 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4359 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4360
4361 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4362 // %2 = ashr i16 %1, 12
4363 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4364 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4365 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4366 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4367
4368 if (Shift >= SrcBits && IsZExt)
4369 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4370
4371 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4372 unsigned ImmS = SrcBits - 1;
4373 static const unsigned OpcTable[2][2] = {
4374 {AArch64::SBFMWri, AArch64::SBFMXri},
4375 {AArch64::UBFMWri, AArch64::UBFMXri}
4376 };
4377 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4378 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4379 Register TmpReg = MRI.createVirtualRegister(RC);
4380 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4381 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4382 .addImm(0)
4383 .addReg(Op0)
4384 .addImm(AArch64::sub_32);
4385 Op0 = TmpReg;
4386 }
4387 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4388}
4389
4390unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4391 bool IsZExt) {
4392 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4393
4394 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4395 // DestVT are odd things, so test to make sure that they are both types we can
4396 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4397 // bail out to SelectionDAG.
4398 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4399 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4400 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4401 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4402 return 0;
4403
4404 unsigned Opc;
4405 unsigned Imm = 0;
4406
4407 switch (SrcVT.SimpleTy) {
4408 default:
4409 return 0;
4410 case MVT::i1:
4411 return emiti1Ext(SrcReg, DestVT, IsZExt);
4412 case MVT::i8:
4413 if (DestVT == MVT::i64)
4414 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4415 else
4416 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4417 Imm = 7;
4418 break;
4419 case MVT::i16:
4420 if (DestVT == MVT::i64)
4421 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4422 else
4423 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4424 Imm = 15;
4425 break;
4426 case MVT::i32:
4427 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4428 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4429 Imm = 31;
4430 break;
4431 }
4432
4433 // Handle i8 and i16 as i32.
4434 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4435 DestVT = MVT::i32;
4436 else if (DestVT == MVT::i64) {
4437 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4438 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4439 TII.get(AArch64::SUBREG_TO_REG), Src64)
4440 .addImm(0)
4441 .addReg(SrcReg)
4442 .addImm(AArch64::sub_32);
4443 SrcReg = Src64;
4444 }
4445
4446 const TargetRegisterClass *RC =
4447 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4448 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4449}
4450
4451static bool isZExtLoad(const MachineInstr *LI) {
4452 switch (LI->getOpcode()) {
4453 default:
4454 return false;
4455 case AArch64::LDURBBi:
4456 case AArch64::LDURHHi:
4457 case AArch64::LDURWi:
4458 case AArch64::LDRBBui:
4459 case AArch64::LDRHHui:
4460 case AArch64::LDRWui:
4461 case AArch64::LDRBBroX:
4462 case AArch64::LDRHHroX:
4463 case AArch64::LDRWroX:
4464 case AArch64::LDRBBroW:
4465 case AArch64::LDRHHroW:
4466 case AArch64::LDRWroW:
4467 return true;
4468 }
4469}
4470
4471static bool isSExtLoad(const MachineInstr *LI) {
4472 switch (LI->getOpcode()) {
4473 default:
4474 return false;
4475 case AArch64::LDURSBWi:
4476 case AArch64::LDURSHWi:
4477 case AArch64::LDURSBXi:
4478 case AArch64::LDURSHXi:
4479 case AArch64::LDURSWi:
4480 case AArch64::LDRSBWui:
4481 case AArch64::LDRSHWui:
4482 case AArch64::LDRSBXui:
4483 case AArch64::LDRSHXui:
4484 case AArch64::LDRSWui:
4485 case AArch64::LDRSBWroX:
4486 case AArch64::LDRSHWroX:
4487 case AArch64::LDRSBXroX:
4488 case AArch64::LDRSHXroX:
4489 case AArch64::LDRSWroX:
4490 case AArch64::LDRSBWroW:
4491 case AArch64::LDRSHWroW:
4492 case AArch64::LDRSBXroW:
4493 case AArch64::LDRSHXroW:
4494 case AArch64::LDRSWroW:
4495 return true;
4496 }
4497}
4498
4499bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4500 MVT SrcVT) {
4501 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4502 if (!LI || !LI->hasOneUse())
4503 return false;
4504
4505 // Check if the load instruction has already been selected.
4506 Register Reg = lookUpRegForValue(LI);
4507 if (!Reg)
4508 return false;
4509
4510 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4511 if (!MI)
4512 return false;
4513
4514 // Check if the correct load instruction has been emitted - SelectionDAG might
4515 // have emitted a zero-extending load, but we need a sign-extending load.
4516 bool IsZExt = isa<ZExtInst>(I);
4517 const auto *LoadMI = MI;
4518 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4519 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4520 Register LoadReg = MI->getOperand(1).getReg();
4521 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4522 assert(LoadMI && "Expected valid instruction");
4523 }
4524 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4525 return false;
4526
4527 // Nothing to be done.
4528 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4529 updateValueMap(I, Reg);
4530 return true;
4531 }
4532
4533 if (IsZExt) {
4534 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4535 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4536 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4537 .addImm(0)
4538 .addReg(Reg, getKillRegState(true))
4539 .addImm(AArch64::sub_32);
4540 Reg = Reg64;
4541 } else {
4542 assert((MI->getOpcode() == TargetOpcode::COPY &&
4543 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4544 "Expected copy instruction");
4545 Reg = MI->getOperand(1).getReg();
4547 removeDeadCode(I, std::next(I));
4548 }
4549 updateValueMap(I, Reg);
4550 return true;
4551}
4552
4553bool AArch64FastISel::selectIntExt(const Instruction *I) {
4554 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4555 "Unexpected integer extend instruction.");
4556 MVT RetVT;
4557 MVT SrcVT;
4558 if (!isTypeSupported(I->getType(), RetVT))
4559 return false;
4560
4561 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4562 return false;
4563
4564 // Try to optimize already sign-/zero-extended values from load instructions.
4565 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4566 return true;
4567
4568 Register SrcReg = getRegForValue(I->getOperand(0));
4569 if (!SrcReg)
4570 return false;
4571
4572 // Try to optimize already sign-/zero-extended values from function arguments.
4573 bool IsZExt = isa<ZExtInst>(I);
4574 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4575 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4576 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4577 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4578 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4579 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4580 .addImm(0)
4581 .addReg(SrcReg)
4582 .addImm(AArch64::sub_32);
4583 SrcReg = ResultReg;
4584 }
4585
4586 updateValueMap(I, SrcReg);
4587 return true;
4588 }
4589 }
4590
4591 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4592 if (!ResultReg)
4593 return false;
4594
4595 updateValueMap(I, ResultReg);
4596 return true;
4597}
4598
4599bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4600 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4601 if (!DestEVT.isSimple())
4602 return false;
4603
4604 MVT DestVT = DestEVT.getSimpleVT();
4605 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4606 return false;
4607
4608 unsigned DivOpc;
4609 bool Is64bit = (DestVT == MVT::i64);
4610 switch (ISDOpcode) {
4611 default:
4612 return false;
4613 case ISD::SREM:
4614 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4615 break;
4616 case ISD::UREM:
4617 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4618 break;
4619 }
4620 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4621 Register Src0Reg = getRegForValue(I->getOperand(0));
4622 if (!Src0Reg)
4623 return false;
4624
4625 Register Src1Reg = getRegForValue(I->getOperand(1));
4626 if (!Src1Reg)
4627 return false;
4628
4629 const TargetRegisterClass *RC =
4630 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4631 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4632 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4633 // The remainder is computed as numerator - (quotient * denominator) using the
4634 // MSUB instruction.
4635 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4636 updateValueMap(I, ResultReg);
4637 return true;
4638}
4639
4640bool AArch64FastISel::selectMul(const Instruction *I) {
4641 MVT VT;
4642 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4643 return false;
4644
4645 if (VT.isVector())
4646 return selectBinaryOp(I, ISD::MUL);
4647
4648 const Value *Src0 = I->getOperand(0);
4649 const Value *Src1 = I->getOperand(1);
4650 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4651 if (C->getValue().isPowerOf2())
4652 std::swap(Src0, Src1);
4653
4654 // Try to simplify to a shift instruction.
4655 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4656 if (C->getValue().isPowerOf2()) {
4657 uint64_t ShiftVal = C->getValue().logBase2();
4658 MVT SrcVT = VT;
4659 bool IsZExt = true;
4660 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4661 if (!isIntExtFree(ZExt)) {
4662 MVT VT;
4663 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4664 SrcVT = VT;
4665 IsZExt = true;
4666 Src0 = ZExt->getOperand(0);
4667 }
4668 }
4669 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4670 if (!isIntExtFree(SExt)) {
4671 MVT VT;
4672 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4673 SrcVT = VT;
4674 IsZExt = false;
4675 Src0 = SExt->getOperand(0);
4676 }
4677 }
4678 }
4679
4680 Register Src0Reg = getRegForValue(Src0);
4681 if (!Src0Reg)
4682 return false;
4683
4684 unsigned ResultReg =
4685 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4686
4687 if (ResultReg) {
4688 updateValueMap(I, ResultReg);
4689 return true;
4690 }
4691 }
4692
4693 Register Src0Reg = getRegForValue(I->getOperand(0));
4694 if (!Src0Reg)
4695 return false;
4696
4697 Register Src1Reg = getRegForValue(I->getOperand(1));
4698 if (!Src1Reg)
4699 return false;
4700
4701 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4702
4703 if (!ResultReg)
4704 return false;
4705
4706 updateValueMap(I, ResultReg);
4707 return true;
4708}
4709
4710bool AArch64FastISel::selectShift(const Instruction *I) {
4711 MVT RetVT;
4712 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4713 return false;
4714
4715 if (RetVT.isVector())
4716 return selectOperator(I, I->getOpcode());
4717
4718 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4719 unsigned ResultReg = 0;
4720 uint64_t ShiftVal = C->getZExtValue();
4721 MVT SrcVT = RetVT;
4722 bool IsZExt = I->getOpcode() != Instruction::AShr;
4723 const Value *Op0 = I->getOperand(0);
4724 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4725 if (!isIntExtFree(ZExt)) {
4726 MVT TmpVT;
4727 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4728 SrcVT = TmpVT;
4729 IsZExt = true;
4730 Op0 = ZExt->getOperand(0);
4731 }
4732 }
4733 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4734 if (!isIntExtFree(SExt)) {
4735 MVT TmpVT;
4736 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4737 SrcVT = TmpVT;
4738 IsZExt = false;
4739 Op0 = SExt->getOperand(0);
4740 }
4741 }
4742 }
4743
4744 Register Op0Reg = getRegForValue(Op0);
4745 if (!Op0Reg)
4746 return false;
4747
4748 switch (I->getOpcode()) {
4749 default: llvm_unreachable("Unexpected instruction.");
4750 case Instruction::Shl:
4751 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4752 break;
4753 case Instruction::AShr:
4754 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4755 break;
4756 case Instruction::LShr:
4757 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4758 break;
4759 }
4760 if (!ResultReg)
4761 return false;
4762
4763 updateValueMap(I, ResultReg);
4764 return true;
4765 }
4766
4767 Register Op0Reg = getRegForValue(I->getOperand(0));
4768 if (!Op0Reg)
4769 return false;
4770
4771 Register Op1Reg = getRegForValue(I->getOperand(1));
4772 if (!Op1Reg)
4773 return false;
4774
4775 unsigned ResultReg = 0;
4776 switch (I->getOpcode()) {
4777 default: llvm_unreachable("Unexpected instruction.");
4778 case Instruction::Shl:
4779 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4780 break;
4781 case Instruction::AShr:
4782 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4783 break;
4784 case Instruction::LShr:
4785 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4786 break;
4787 }
4788
4789 if (!ResultReg)
4790 return false;
4791
4792 updateValueMap(I, ResultReg);
4793 return true;
4794}
4795
4796bool AArch64FastISel::selectBitCast(const Instruction *I) {
4797 MVT RetVT, SrcVT;
4798
4799 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4800 return false;
4801 if (!isTypeLegal(I->getType(), RetVT))
4802 return false;
4803
4804 unsigned Opc;
4805 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4806 Opc = AArch64::FMOVWSr;
4807 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4808 Opc = AArch64::FMOVXDr;
4809 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4810 Opc = AArch64::FMOVSWr;
4811 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4812 Opc = AArch64::FMOVDXr;
4813 else
4814 return false;
4815
4816 const TargetRegisterClass *RC = nullptr;
4817 switch (RetVT.SimpleTy) {
4818 default: llvm_unreachable("Unexpected value type.");
4819 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4820 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4821 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4822 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4823 }
4824 Register Op0Reg = getRegForValue(I->getOperand(0));
4825 if (!Op0Reg)
4826 return false;
4827
4828 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4829 if (!ResultReg)
4830 return false;
4831
4832 updateValueMap(I, ResultReg);
4833 return true;
4834}
4835
4836bool AArch64FastISel::selectFRem(const Instruction *I) {
4837 MVT RetVT;
4838 if (!isTypeLegal(I->getType(), RetVT))
4839 return false;
4840
4841 RTLIB::Libcall LC;
4842 switch (RetVT.SimpleTy) {
4843 default:
4844 return false;
4845 case MVT::f32:
4846 LC = RTLIB::REM_F32;
4847 break;
4848 case MVT::f64:
4849 LC = RTLIB::REM_F64;
4850 break;
4851 }
4852
4853 ArgListTy Args;
4854 Args.reserve(I->getNumOperands());
4855
4856 // Populate the argument list.
4857 for (auto &Arg : I->operands()) {
4858 ArgListEntry Entry;
4859 Entry.Val = Arg;
4860 Entry.Ty = Arg->getType();
4861 Args.push_back(Entry);
4862 }
4863
4864 CallLoweringInfo CLI;
4865 MCContext &Ctx = MF->getContext();
4866 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4867 TLI.getLibcallName(LC), std::move(Args));
4868 if (!lowerCallTo(CLI))
4869 return false;
4870 updateValueMap(I, CLI.ResultReg);
4871 return true;
4872}
4873
4874bool AArch64FastISel::selectSDiv(const Instruction *I) {
4875 MVT VT;
4876 if (!isTypeLegal(I->getType(), VT))
4877 return false;
4878
4879 if (!isa<ConstantInt>(I->getOperand(1)))
4880 return selectBinaryOp(I, ISD::SDIV);
4881
4882 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4883 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4884 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4885 return selectBinaryOp(I, ISD::SDIV);
4886
4887 unsigned Lg2 = C.countr_zero();
4888 Register Src0Reg = getRegForValue(I->getOperand(0));
4889 if (!Src0Reg)
4890 return false;
4891
4892 if (cast<BinaryOperator>(I)->isExact()) {
4893 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4894 if (!ResultReg)
4895 return false;
4896 updateValueMap(I, ResultReg);
4897 return true;
4898 }
4899
4900 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4901 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4902 if (!AddReg)
4903 return false;
4904
4905 // (Src0 < 0) ? Pow2 - 1 : 0;
4906 if (!emitICmp_ri(VT, Src0Reg, 0))
4907 return false;
4908
4909 unsigned SelectOpc;
4910 const TargetRegisterClass *RC;
4911 if (VT == MVT::i64) {
4912 SelectOpc = AArch64::CSELXr;
4913 RC = &AArch64::GPR64RegClass;
4914 } else {
4915 SelectOpc = AArch64::CSELWr;
4916 RC = &AArch64::GPR32RegClass;
4917 }
4918 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4920 if (!SelectReg)
4921 return false;
4922
4923 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4924 // negate the result.
4925 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4926 unsigned ResultReg;
4927 if (C.isNegative())
4928 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4929 AArch64_AM::ASR, Lg2);
4930 else
4931 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4932
4933 if (!ResultReg)
4934 return false;
4935
4936 updateValueMap(I, ResultReg);
4937 return true;
4938}
4939
4940/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4941/// have to duplicate it for AArch64, because otherwise we would fail during the
4942/// sign-extend emission.
4943unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4944 Register IdxN = getRegForValue(Idx);
4945 if (IdxN == 0)
4946 // Unhandled operand. Halt "fast" selection and bail.
4947 return 0;
4948
4949 // If the index is smaller or larger than intptr_t, truncate or extend it.
4950 MVT PtrVT = TLI.getPointerTy(DL);
4951 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4952 if (IdxVT.bitsLT(PtrVT)) {
4953 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4954 } else if (IdxVT.bitsGT(PtrVT))
4955 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4956 return IdxN;
4957}
4958
4959/// This is mostly a copy of the existing FastISel GEP code, but we have to
4960/// duplicate it for AArch64, because otherwise we would bail out even for
4961/// simple cases. This is because the standard fastEmit functions don't cover
4962/// MUL at all and ADD is lowered very inefficientily.
4963bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4964 if (Subtarget->isTargetILP32())
4965 return false;
4966
4967 Register N = getRegForValue(I->getOperand(0));
4968 if (!N)
4969 return false;
4970
4971 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4972 // into a single N = N + TotalOffset.
4973 uint64_t TotalOffs = 0;
4974 MVT VT = TLI.getPointerTy(DL);
4976 GTI != E; ++GTI) {
4977 const Value *Idx = GTI.getOperand();
4978 if (auto *StTy = GTI.getStructTypeOrNull()) {
4979 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4980 // N = N + Offset
4981 if (Field)
4982 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4983 } else {
4984 // If this is a constant subscript, handle it quickly.
4985 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4986 if (CI->isZero())
4987 continue;
4988 // N = N + Offset
4989 TotalOffs += GTI.getSequentialElementStride(DL) *
4990 cast<ConstantInt>(CI)->getSExtValue();
4991 continue;
4992 }
4993 if (TotalOffs) {
4994 N = emitAdd_ri_(VT, N, TotalOffs);
4995 if (!N)
4996 return false;
4997 TotalOffs = 0;
4998 }
4999
5000 // N = N + Idx * ElementSize;
5001 uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5002 unsigned IdxN = getRegForGEPIndex(Idx);
5003 if (!IdxN)
5004 return false;
5005
5006 if (ElementSize != 1) {
5007 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5008 if (!C)
5009 return false;
5010 IdxN = emitMul_rr(VT, IdxN, C);
5011 if (!IdxN)
5012 return false;
5013 }
5014 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5015 if (!N)
5016 return false;
5017 }
5018 }
5019 if (TotalOffs) {
5020 N = emitAdd_ri_(VT, N, TotalOffs);
5021 if (!N)
5022 return false;
5023 }
5024 updateValueMap(I, N);
5025 return true;
5026}
5027
5028bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5029 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5030 "cmpxchg survived AtomicExpand at optlevel > -O0");
5031
5032 auto *RetPairTy = cast<StructType>(I->getType());
5033 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5034 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5035 "cmpxchg has a non-i1 status result");
5036
5037 MVT VT;
5038 if (!isTypeLegal(RetTy, VT))
5039 return false;
5040
5041 const TargetRegisterClass *ResRC;
5042 unsigned Opc, CmpOpc;
5043 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5044 // extractvalue selection doesn't support that.
5045 if (VT == MVT::i32) {
5046 Opc = AArch64::CMP_SWAP_32;
5047 CmpOpc = AArch64::SUBSWrs;
5048 ResRC = &AArch64::GPR32RegClass;
5049 } else if (VT == MVT::i64) {
5050 Opc = AArch64::CMP_SWAP_64;
5051 CmpOpc = AArch64::SUBSXrs;
5052 ResRC = &AArch64::GPR64RegClass;
5053 } else {
5054 return false;
5055 }
5056
5057 const MCInstrDesc &II = TII.get(Opc);
5058
5059 const Register AddrReg = constrainOperandRegClass(
5060 II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5061 const Register DesiredReg = constrainOperandRegClass(
5062 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5063 const Register NewReg = constrainOperandRegClass(
5064 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5065
5066 const Register ResultReg1 = createResultReg(ResRC);
5067 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5068 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5069
5070 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5071 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5072 .addDef(ResultReg1)
5073 .addDef(ScratchReg)
5074 .addUse(AddrReg)
5075 .addUse(DesiredReg)
5076 .addUse(NewReg);
5077
5078 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5079 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5080 .addUse(ResultReg1)
5081 .addUse(DesiredReg)
5082 .addImm(0);
5083
5084 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5085 .addDef(ResultReg2)
5086 .addUse(AArch64::WZR)
5087 .addUse(AArch64::WZR)
5089
5090 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5091 updateValueMap(I, ResultReg1, 2);
5092 return true;
5093}
5094
5095bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5096 if (TLI.fallBackToDAGISel(*I))
5097 return false;
5098 switch (I->getOpcode()) {
5099 default:
5100 break;
5101 case Instruction::Add:
5102 case Instruction::Sub:
5103 return selectAddSub(I);
5104 case Instruction::Mul:
5105 return selectMul(I);
5106 case Instruction::SDiv:
5107 return selectSDiv(I);
5108 case Instruction::SRem:
5109 if (!selectBinaryOp(I, ISD::SREM))
5110 return selectRem(I, ISD::SREM);
5111 return true;
5112 case Instruction::URem:
5113 if (!selectBinaryOp(I, ISD::UREM))
5114 return selectRem(I, ISD::UREM);
5115 return true;
5116 case Instruction::Shl:
5117 case Instruction::LShr:
5118 case Instruction::AShr:
5119 return selectShift(I);
5120 case Instruction::And:
5121 case Instruction::Or:
5122 case Instruction::Xor:
5123 return selectLogicalOp(I);
5124 case Instruction::Br:
5125 return selectBranch(I);
5126 case Instruction::IndirectBr:
5127 return selectIndirectBr(I);
5128 case Instruction::BitCast:
5130 return selectBitCast(I);
5131 return true;
5132 case Instruction::FPToSI:
5133 if (!selectCast(I, ISD::FP_TO_SINT))
5134 return selectFPToInt(I, /*Signed=*/true);
5135 return true;
5136 case Instruction::FPToUI:
5137 return selectFPToInt(I, /*Signed=*/false);
5138 case Instruction::ZExt:
5139 case Instruction::SExt:
5140 return selectIntExt(I);
5141 case Instruction::Trunc:
5142 if (!selectCast(I, ISD::TRUNCATE))
5143 return selectTrunc(I);
5144 return true;
5145 case Instruction::FPExt:
5146 return selectFPExt(I);
5147 case Instruction::FPTrunc:
5148 return selectFPTrunc(I);
5149 case Instruction::SIToFP:
5150 if (!selectCast(I, ISD::SINT_TO_FP))
5151 return selectIntToFP(I, /*Signed=*/true);
5152 return true;
5153 case Instruction::UIToFP:
5154 return selectIntToFP(I, /*Signed=*/false);
5155 case Instruction::Load:
5156 return selectLoad(I);
5157 case Instruction::Store:
5158 return selectStore(I);
5159 case Instruction::FCmp:
5160 case Instruction::ICmp:
5161 return selectCmp(I);
5162 case Instruction::Select:
5163 return selectSelect(I);
5164 case Instruction::Ret:
5165 return selectRet(I);
5166 case Instruction::FRem:
5167 return selectFRem(I);
5168 case Instruction::GetElementPtr:
5169 return selectGetElementPtr(I);
5170 case Instruction::AtomicCmpXchg:
5171 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5172 }
5173
5174 // fall-back to target-independent instruction selection.
5175 return selectOperator(I, I->getOpcode());
5176}
5177
5179 const TargetLibraryInfo *LibInfo) {
5180
5181 SMEAttrs CallerAttrs(*FuncInfo.Fn);
5182 if (CallerAttrs.hasZAState() || CallerAttrs.hasStreamingInterfaceOrBody() ||
5183 CallerAttrs.hasStreamingCompatibleInterface())
5184 return nullptr;
5185 return new AArch64FastISel(FuncInfo, LibInfo);
5186}
unsigned const MachineRegisterInfo * MRI
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
static bool isSExtLoad(const MachineInstr *LI)
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
static bool isMulPowOf2(const Value *I)
Check if the multiply is by a power-of-2 constant.
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type.
static bool isZExtLoad(const MachineInstr *LI)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec)
Emit a load-pair instruction for frame-destroy.
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec)
Emit a store-pair instruction for frame-setup.
unsigned RegSize
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
LLVMContext & Context
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB