LLVM 20.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
41#include "llvm/IR/Argument.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/Constants.h"
47#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/User.h"
62#include "llvm/IR/Value.h"
63#include "llvm/MC/MCInstrDesc.h"
64#include "llvm/MC/MCSymbol.h"
71#include <algorithm>
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <utility>
76
77using namespace llvm;
78
79namespace {
80
81class AArch64FastISel final : public FastISel {
82 class Address {
83 public:
84 using BaseKind = enum {
85 RegBase,
86 FrameIndexBase
87 };
88
89 private:
90 BaseKind Kind = RegBase;
92 union {
93 unsigned Reg;
94 int FI;
95 } Base;
96 unsigned OffsetReg = 0;
97 unsigned Shift = 0;
98 int64_t Offset = 0;
99 const GlobalValue *GV = nullptr;
100
101 public:
102 Address() { Base.Reg = 0; }
103
104 void setKind(BaseKind K) { Kind = K; }
105 BaseKind getKind() const { return Kind; }
106 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
107 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
108 bool isRegBase() const { return Kind == RegBase; }
109 bool isFIBase() const { return Kind == FrameIndexBase; }
110
111 void setReg(unsigned Reg) {
112 assert(isRegBase() && "Invalid base register access!");
113 Base.Reg = Reg;
114 }
115
116 unsigned getReg() const {
117 assert(isRegBase() && "Invalid base register access!");
118 return Base.Reg;
119 }
120
121 void setOffsetReg(unsigned Reg) {
122 OffsetReg = Reg;
123 }
124
125 unsigned getOffsetReg() const {
126 return OffsetReg;
127 }
128
129 void setFI(unsigned FI) {
130 assert(isFIBase() && "Invalid base frame index access!");
131 Base.FI = FI;
132 }
133
134 unsigned getFI() const {
135 assert(isFIBase() && "Invalid base frame index access!");
136 return Base.FI;
137 }
138
139 void setOffset(int64_t O) { Offset = O; }
140 int64_t getOffset() { return Offset; }
141 void setShift(unsigned S) { Shift = S; }
142 unsigned getShift() { return Shift; }
143
144 void setGlobalValue(const GlobalValue *G) { GV = G; }
145 const GlobalValue *getGlobalValue() { return GV; }
146 };
147
148 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
149 /// make the right decision when generating code for different targets.
150 const AArch64Subtarget *Subtarget;
151 LLVMContext *Context;
152
153 bool fastLowerArguments() override;
154 bool fastLowerCall(CallLoweringInfo &CLI) override;
155 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
156
157private:
158 // Selection routines.
159 bool selectAddSub(const Instruction *I);
160 bool selectLogicalOp(const Instruction *I);
161 bool selectLoad(const Instruction *I);
162 bool selectStore(const Instruction *I);
163 bool selectBranch(const Instruction *I);
164 bool selectIndirectBr(const Instruction *I);
165 bool selectCmp(const Instruction *I);
166 bool selectSelect(const Instruction *I);
167 bool selectFPExt(const Instruction *I);
168 bool selectFPTrunc(const Instruction *I);
169 bool selectFPToInt(const Instruction *I, bool Signed);
170 bool selectIntToFP(const Instruction *I, bool Signed);
171 bool selectRem(const Instruction *I, unsigned ISDOpcode);
172 bool selectRet(const Instruction *I);
173 bool selectTrunc(const Instruction *I);
174 bool selectIntExt(const Instruction *I);
175 bool selectMul(const Instruction *I);
176 bool selectShift(const Instruction *I);
177 bool selectBitCast(const Instruction *I);
178 bool selectFRem(const Instruction *I);
179 bool selectSDiv(const Instruction *I);
180 bool selectGetElementPtr(const Instruction *I);
181 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
182
183 // Utility helper routines.
184 bool isTypeLegal(Type *Ty, MVT &VT);
185 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
186 bool isValueAvailable(const Value *V) const;
187 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
188 bool computeCallAddress(const Value *V, Address &Addr);
189 bool simplifyAddress(Address &Addr, MVT VT);
190 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
192 unsigned ScaleFactor, MachineMemOperand *MMO);
193 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
194 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
195 MaybeAlign Alignment);
196 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
197 const Value *Cond);
198 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
199 bool optimizeSelect(const SelectInst *SI);
200 unsigned getRegForGEPIndex(const Value *Idx);
201
202 // Emit helper routines.
203 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
204 const Value *RHS, bool SetFlags = false,
205 bool WantResult = true, bool IsZExt = false);
206 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
207 unsigned RHSReg, bool SetFlags = false,
208 bool WantResult = true);
209 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
210 uint64_t Imm, bool SetFlags = false,
211 bool WantResult = true);
212 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
213 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
214 uint64_t ShiftImm, bool SetFlags = false,
215 bool WantResult = true);
216 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
218 uint64_t ShiftImm, bool SetFlags = false,
219 bool WantResult = true);
220
221 // Emit functions.
222 bool emitCompareAndBranch(const BranchInst *BI);
223 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
226 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228 MachineMemOperand *MMO = nullptr);
229 bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230 MachineMemOperand *MMO = nullptr);
231 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232 MachineMemOperand *MMO = nullptr);
233 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236 bool SetFlags = false, bool WantResult = true,
237 bool IsZExt = false);
238 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
239 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240 bool SetFlags = false, bool WantResult = true,
241 bool IsZExt = false);
242 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
243 bool WantResult = true);
244 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
245 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
246 bool WantResult = true);
247 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
248 const Value *RHS);
249 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
250 uint64_t Imm);
251 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252 unsigned RHSReg, uint64_t ShiftImm);
253 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
254 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
257 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
258 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
259 bool IsZExt = true);
260 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
261 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
262 bool IsZExt = true);
263 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
264 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
265 bool IsZExt = false);
266
267 unsigned materializeInt(const ConstantInt *CI, MVT VT);
268 unsigned materializeFP(const ConstantFP *CFP, MVT VT);
269 unsigned materializeGV(const GlobalValue *GV);
270
271 // Call handling routines.
272private:
273 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
274 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
275 unsigned &NumBytes);
276 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
277
278public:
279 // Backend specific FastISel code.
280 unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
281 unsigned fastMaterializeConstant(const Constant *C) override;
282 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
283
284 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
285 const TargetLibraryInfo *LibInfo)
286 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
287 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
288 Context = &FuncInfo.Fn->getContext();
289 }
290
291 bool fastSelectInstruction(const Instruction *I) override;
292
293#include "AArch64GenFastISel.inc"
294};
295
296} // end anonymous namespace
297
298/// Check if the sign-/zero-extend will be a noop.
299static bool isIntExtFree(const Instruction *I) {
300 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
301 "Unexpected integer extend instruction.");
302 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
303 "Unexpected value type.");
304 bool IsZExt = isa<ZExtInst>(I);
305
306 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
307 if (LI->hasOneUse())
308 return true;
309
310 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
311 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
312 return true;
313
314 return false;
315}
316
317/// Determine the implicit scale factor that is applied by a memory
318/// operation for a given value type.
319static unsigned getImplicitScaleFactor(MVT VT) {
320 switch (VT.SimpleTy) {
321 default:
322 return 0; // invalid
323 case MVT::i1: // fall-through
324 case MVT::i8:
325 return 1;
326 case MVT::i16:
327 return 2;
328 case MVT::i32: // fall-through
329 case MVT::f32:
330 return 4;
331 case MVT::i64: // fall-through
332 case MVT::f64:
333 return 8;
334 }
335}
336
337CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
338 if (CC == CallingConv::GHC)
339 return CC_AArch64_GHC;
342 if (Subtarget->isTargetDarwin())
344 if (Subtarget->isTargetWindows())
345 return CC_AArch64_Win64PCS;
346 return CC_AArch64_AAPCS;
347}
348
349unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
350 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
351 "Alloca should always return a pointer.");
352
353 // Don't handle dynamic allocas.
354 if (!FuncInfo.StaticAllocaMap.count(AI))
355 return 0;
356
358 FuncInfo.StaticAllocaMap.find(AI);
359
360 if (SI != FuncInfo.StaticAllocaMap.end()) {
361 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
362 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
363 ResultReg)
364 .addFrameIndex(SI->second)
365 .addImm(0)
366 .addImm(0);
367 return ResultReg;
368 }
369
370 return 0;
371}
372
373unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
374 if (VT > MVT::i64)
375 return 0;
376
377 if (!CI->isZero())
378 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
379
380 // Create a copy from the zero register to materialize a "0" value.
381 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
382 : &AArch64::GPR32RegClass;
383 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
384 Register ResultReg = createResultReg(RC);
385 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
386 ResultReg).addReg(ZeroReg, getKillRegState(true));
387 return ResultReg;
388}
389
390unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
391 // Positive zero (+0.0) has to be materialized with a fmov from the zero
392 // register, because the immediate version of fmov cannot encode zero.
393 if (CFP->isNullValue())
394 return fastMaterializeFloatZero(CFP);
395
396 if (VT != MVT::f32 && VT != MVT::f64)
397 return 0;
398
399 const APFloat Val = CFP->getValueAPF();
400 bool Is64Bit = (VT == MVT::f64);
401 // This checks to see if we can use FMOV instructions to materialize
402 // a constant, otherwise we have to materialize via the constant pool.
403 int Imm =
404 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
405 if (Imm != -1) {
406 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
407 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
408 }
409
410 // For the large code model materialize the FP constant in code.
411 if (TM.getCodeModel() == CodeModel::Large) {
412 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
413 const TargetRegisterClass *RC = Is64Bit ?
414 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
415
416 Register TmpReg = createResultReg(RC);
417 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
419
420 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
421 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
422 TII.get(TargetOpcode::COPY), ResultReg)
423 .addReg(TmpReg, getKillRegState(true));
424
425 return ResultReg;
426 }
427
428 // Materialize via constant pool. MachineConstantPool wants an explicit
429 // alignment.
430 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
431
432 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
433 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
434 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
436
437 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
438 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
439 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
440 .addReg(ADRPReg)
442 return ResultReg;
443}
444
445unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
446 // We can't handle thread-local variables quickly yet.
447 if (GV->isThreadLocal())
448 return 0;
449
450 // MachO still uses GOT for large code-model accesses, but ELF requires
451 // movz/movk sequences, which FastISel doesn't handle yet.
452 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
453 return 0;
454
455 if (FuncInfo.MF->getInfo<AArch64FunctionInfo>()->hasELFSignedGOT())
456 return 0;
457
458 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
459
460 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
461 if (!DestEVT.isSimple())
462 return 0;
463
464 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
465 unsigned ResultReg;
466
467 if (OpFlags & AArch64II::MO_GOT) {
468 // ADRP + LDRX
469 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
470 ADRPReg)
471 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
472
473 unsigned LdrOpc;
474 if (Subtarget->isTargetILP32()) {
475 ResultReg = createResultReg(&AArch64::GPR32RegClass);
476 LdrOpc = AArch64::LDRWui;
477 } else {
478 ResultReg = createResultReg(&AArch64::GPR64RegClass);
479 LdrOpc = AArch64::LDRXui;
480 }
481 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
482 ResultReg)
483 .addReg(ADRPReg)
485 AArch64II::MO_NC | OpFlags);
486 if (!Subtarget->isTargetILP32())
487 return ResultReg;
488
489 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
490 // so we must extend the result on ILP32.
491 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
492 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
493 TII.get(TargetOpcode::SUBREG_TO_REG))
494 .addDef(Result64)
495 .addImm(0)
496 .addReg(ResultReg, RegState::Kill)
497 .addImm(AArch64::sub_32);
498 return Result64;
499 } else {
500 // ADRP + ADDX
501 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
502 ADRPReg)
503 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
504
505 if (OpFlags & AArch64II::MO_TAGGED) {
506 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
507 // We do so by creating a MOVK that sets bits 48-63 of the register to
508 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
509 // the small code model so we can assume a binary size of <= 4GB, which
510 // makes the untagged PC relative offset positive. The binary must also be
511 // loaded into address range [0, 2^48). Both of these properties need to
512 // be ensured at runtime when using tagged addresses.
513 //
514 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
515 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
516 // are not exactly 1:1 with FastISel so we cannot easily abstract this
517 // out. At some point, it would be nice to find a way to not have this
518 // duplciate code.
519 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
520 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
521 DstReg)
522 .addReg(ADRPReg)
523 .addGlobalAddress(GV, /*Offset=*/0x100000000,
525 .addImm(48);
526 ADRPReg = DstReg;
527 }
528
529 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
530 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
531 ResultReg)
532 .addReg(ADRPReg)
533 .addGlobalAddress(GV, 0,
535 .addImm(0);
536 }
537 return ResultReg;
538}
539
540unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
541 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
542
543 // Only handle simple types.
544 if (!CEVT.isSimple())
545 return 0;
546 MVT VT = CEVT.getSimpleVT();
547 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
548 // 'null' pointers need to have a somewhat special treatment.
549 if (isa<ConstantPointerNull>(C)) {
550 assert(VT == MVT::i64 && "Expected 64-bit pointers");
551 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
552 }
553
554 if (const auto *CI = dyn_cast<ConstantInt>(C))
555 return materializeInt(CI, VT);
556 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
557 return materializeFP(CFP, VT);
558 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
559 return materializeGV(GV);
560
561 return 0;
562}
563
564unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
565 assert(CFP->isNullValue() &&
566 "Floating-point constant is not a positive zero.");
567 MVT VT;
568 if (!isTypeLegal(CFP->getType(), VT))
569 return 0;
570
571 if (VT != MVT::f32 && VT != MVT::f64)
572 return 0;
573
574 bool Is64Bit = (VT == MVT::f64);
575 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
576 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
577 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
578}
579
580/// Check if the multiply is by a power-of-2 constant.
581static bool isMulPowOf2(const Value *I) {
582 if (const auto *MI = dyn_cast<MulOperator>(I)) {
583 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
584 if (C->getValue().isPowerOf2())
585 return true;
586 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
587 if (C->getValue().isPowerOf2())
588 return true;
589 }
590 return false;
591}
592
593// Computes the address to get to an object.
594bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
595{
596 const User *U = nullptr;
597 unsigned Opcode = Instruction::UserOp1;
598 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
599 // Don't walk into other basic blocks unless the object is an alloca from
600 // another block, otherwise it may not have a virtual register assigned.
601 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
602 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
603 Opcode = I->getOpcode();
604 U = I;
605 }
606 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
607 Opcode = C->getOpcode();
608 U = C;
609 }
610
611 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
612 if (Ty->getAddressSpace() > 255)
613 // Fast instruction selection doesn't support the special
614 // address spaces.
615 return false;
616
617 switch (Opcode) {
618 default:
619 break;
620 case Instruction::BitCast:
621 // Look through bitcasts.
622 return computeAddress(U->getOperand(0), Addr, Ty);
623
624 case Instruction::IntToPtr:
625 // Look past no-op inttoptrs.
626 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
627 TLI.getPointerTy(DL))
628 return computeAddress(U->getOperand(0), Addr, Ty);
629 break;
630
631 case Instruction::PtrToInt:
632 // Look past no-op ptrtoints.
633 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
634 return computeAddress(U->getOperand(0), Addr, Ty);
635 break;
636
637 case Instruction::GetElementPtr: {
638 Address SavedAddr = Addr;
639 uint64_t TmpOffset = Addr.getOffset();
640
641 // Iterate through the GEP folding the constants into offsets where
642 // we can.
643 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
644 GTI != E; ++GTI) {
645 const Value *Op = GTI.getOperand();
646 if (StructType *STy = GTI.getStructTypeOrNull()) {
647 const StructLayout *SL = DL.getStructLayout(STy);
648 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
649 TmpOffset += SL->getElementOffset(Idx);
650 } else {
651 uint64_t S = GTI.getSequentialElementStride(DL);
652 while (true) {
653 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
654 // Constant-offset addressing.
655 TmpOffset += CI->getSExtValue() * S;
656 break;
657 }
658 if (canFoldAddIntoGEP(U, Op)) {
659 // A compatible add with a constant operand. Fold the constant.
660 ConstantInt *CI =
661 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
662 TmpOffset += CI->getSExtValue() * S;
663 // Iterate on the other operand.
664 Op = cast<AddOperator>(Op)->getOperand(0);
665 continue;
666 }
667 // Unsupported
668 goto unsupported_gep;
669 }
670 }
671 }
672
673 // Try to grab the base operand now.
674 Addr.setOffset(TmpOffset);
675 if (computeAddress(U->getOperand(0), Addr, Ty))
676 return true;
677
678 // We failed, restore everything and try the other options.
679 Addr = SavedAddr;
680
681 unsupported_gep:
682 break;
683 }
684 case Instruction::Alloca: {
685 const AllocaInst *AI = cast<AllocaInst>(Obj);
687 FuncInfo.StaticAllocaMap.find(AI);
688 if (SI != FuncInfo.StaticAllocaMap.end()) {
689 Addr.setKind(Address::FrameIndexBase);
690 Addr.setFI(SI->second);
691 return true;
692 }
693 break;
694 }
695 case Instruction::Add: {
696 // Adds of constants are common and easy enough.
697 const Value *LHS = U->getOperand(0);
698 const Value *RHS = U->getOperand(1);
699
700 if (isa<ConstantInt>(LHS))
701 std::swap(LHS, RHS);
702
703 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
704 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
705 return computeAddress(LHS, Addr, Ty);
706 }
707
708 Address Backup = Addr;
709 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
710 return true;
711 Addr = Backup;
712
713 break;
714 }
715 case Instruction::Sub: {
716 // Subs of constants are common and easy enough.
717 const Value *LHS = U->getOperand(0);
718 const Value *RHS = U->getOperand(1);
719
720 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
721 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
722 return computeAddress(LHS, Addr, Ty);
723 }
724 break;
725 }
726 case Instruction::Shl: {
727 if (Addr.getOffsetReg())
728 break;
729
730 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
731 if (!CI)
732 break;
733
734 unsigned Val = CI->getZExtValue();
735 if (Val < 1 || Val > 3)
736 break;
737
738 uint64_t NumBytes = 0;
739 if (Ty && Ty->isSized()) {
740 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
741 NumBytes = NumBits / 8;
742 if (!isPowerOf2_64(NumBits))
743 NumBytes = 0;
744 }
745
746 if (NumBytes != (1ULL << Val))
747 break;
748
749 Addr.setShift(Val);
750 Addr.setExtendType(AArch64_AM::LSL);
751
752 const Value *Src = U->getOperand(0);
753 if (const auto *I = dyn_cast<Instruction>(Src)) {
754 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
755 // Fold the zext or sext when it won't become a noop.
756 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
757 if (!isIntExtFree(ZE) &&
758 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
759 Addr.setExtendType(AArch64_AM::UXTW);
760 Src = ZE->getOperand(0);
761 }
762 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
763 if (!isIntExtFree(SE) &&
764 SE->getOperand(0)->getType()->isIntegerTy(32)) {
765 Addr.setExtendType(AArch64_AM::SXTW);
766 Src = SE->getOperand(0);
767 }
768 }
769 }
770 }
771
772 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
773 if (AI->getOpcode() == Instruction::And) {
774 const Value *LHS = AI->getOperand(0);
775 const Value *RHS = AI->getOperand(1);
776
777 if (const auto *C = dyn_cast<ConstantInt>(LHS))
778 if (C->getValue() == 0xffffffff)
779 std::swap(LHS, RHS);
780
781 if (const auto *C = dyn_cast<ConstantInt>(RHS))
782 if (C->getValue() == 0xffffffff) {
783 Addr.setExtendType(AArch64_AM::UXTW);
784 Register Reg = getRegForValue(LHS);
785 if (!Reg)
786 return false;
787 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
788 Addr.setOffsetReg(Reg);
789 return true;
790 }
791 }
792
793 Register Reg = getRegForValue(Src);
794 if (!Reg)
795 return false;
796 Addr.setOffsetReg(Reg);
797 return true;
798 }
799 case Instruction::Mul: {
800 if (Addr.getOffsetReg())
801 break;
802
803 if (!isMulPowOf2(U))
804 break;
805
806 const Value *LHS = U->getOperand(0);
807 const Value *RHS = U->getOperand(1);
808
809 // Canonicalize power-of-2 value to the RHS.
810 if (const auto *C = dyn_cast<ConstantInt>(LHS))
811 if (C->getValue().isPowerOf2())
812 std::swap(LHS, RHS);
813
814 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
815 const auto *C = cast<ConstantInt>(RHS);
816 unsigned Val = C->getValue().logBase2();
817 if (Val < 1 || Val > 3)
818 break;
819
820 uint64_t NumBytes = 0;
821 if (Ty && Ty->isSized()) {
822 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
823 NumBytes = NumBits / 8;
824 if (!isPowerOf2_64(NumBits))
825 NumBytes = 0;
826 }
827
828 if (NumBytes != (1ULL << Val))
829 break;
830
831 Addr.setShift(Val);
832 Addr.setExtendType(AArch64_AM::LSL);
833
834 const Value *Src = LHS;
835 if (const auto *I = dyn_cast<Instruction>(Src)) {
836 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
837 // Fold the zext or sext when it won't become a noop.
838 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
839 if (!isIntExtFree(ZE) &&
840 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
841 Addr.setExtendType(AArch64_AM::UXTW);
842 Src = ZE->getOperand(0);
843 }
844 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
845 if (!isIntExtFree(SE) &&
846 SE->getOperand(0)->getType()->isIntegerTy(32)) {
847 Addr.setExtendType(AArch64_AM::SXTW);
848 Src = SE->getOperand(0);
849 }
850 }
851 }
852 }
853
854 Register Reg = getRegForValue(Src);
855 if (!Reg)
856 return false;
857 Addr.setOffsetReg(Reg);
858 return true;
859 }
860 case Instruction::And: {
861 if (Addr.getOffsetReg())
862 break;
863
864 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
865 break;
866
867 const Value *LHS = U->getOperand(0);
868 const Value *RHS = U->getOperand(1);
869
870 if (const auto *C = dyn_cast<ConstantInt>(LHS))
871 if (C->getValue() == 0xffffffff)
872 std::swap(LHS, RHS);
873
874 if (const auto *C = dyn_cast<ConstantInt>(RHS))
875 if (C->getValue() == 0xffffffff) {
876 Addr.setShift(0);
877 Addr.setExtendType(AArch64_AM::LSL);
878 Addr.setExtendType(AArch64_AM::UXTW);
879
880 Register Reg = getRegForValue(LHS);
881 if (!Reg)
882 return false;
883 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
884 Addr.setOffsetReg(Reg);
885 return true;
886 }
887 break;
888 }
889 case Instruction::SExt:
890 case Instruction::ZExt: {
891 if (!Addr.getReg() || Addr.getOffsetReg())
892 break;
893
894 const Value *Src = nullptr;
895 // Fold the zext or sext when it won't become a noop.
896 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
897 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
898 Addr.setExtendType(AArch64_AM::UXTW);
899 Src = ZE->getOperand(0);
900 }
901 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
902 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
903 Addr.setExtendType(AArch64_AM::SXTW);
904 Src = SE->getOperand(0);
905 }
906 }
907
908 if (!Src)
909 break;
910
911 Addr.setShift(0);
912 Register Reg = getRegForValue(Src);
913 if (!Reg)
914 return false;
915 Addr.setOffsetReg(Reg);
916 return true;
917 }
918 } // end switch
919
920 if (Addr.isRegBase() && !Addr.getReg()) {
921 Register Reg = getRegForValue(Obj);
922 if (!Reg)
923 return false;
924 Addr.setReg(Reg);
925 return true;
926 }
927
928 if (!Addr.getOffsetReg()) {
929 Register Reg = getRegForValue(Obj);
930 if (!Reg)
931 return false;
932 Addr.setOffsetReg(Reg);
933 return true;
934 }
935
936 return false;
937}
938
939bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
940 const User *U = nullptr;
941 unsigned Opcode = Instruction::UserOp1;
942 bool InMBB = true;
943
944 if (const auto *I = dyn_cast<Instruction>(V)) {
945 Opcode = I->getOpcode();
946 U = I;
947 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
948 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
949 Opcode = C->getOpcode();
950 U = C;
951 }
952
953 switch (Opcode) {
954 default: break;
955 case Instruction::BitCast:
956 // Look past bitcasts if its operand is in the same BB.
957 if (InMBB)
958 return computeCallAddress(U->getOperand(0), Addr);
959 break;
960 case Instruction::IntToPtr:
961 // Look past no-op inttoptrs if its operand is in the same BB.
962 if (InMBB &&
963 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
964 TLI.getPointerTy(DL))
965 return computeCallAddress(U->getOperand(0), Addr);
966 break;
967 case Instruction::PtrToInt:
968 // Look past no-op ptrtoints if its operand is in the same BB.
969 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
970 return computeCallAddress(U->getOperand(0), Addr);
971 break;
972 }
973
974 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
975 Addr.setGlobalValue(GV);
976 return true;
977 }
978
979 // If all else fails, try to materialize the value in a register.
980 if (!Addr.getGlobalValue()) {
981 Addr.setReg(getRegForValue(V));
982 return Addr.getReg() != 0;
983 }
984
985 return false;
986}
987
988bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
989 EVT evt = TLI.getValueType(DL, Ty, true);
990
991 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
992 return false;
993
994 // Only handle simple types.
995 if (evt == MVT::Other || !evt.isSimple())
996 return false;
997 VT = evt.getSimpleVT();
998
999 // This is a legal type, but it's not something we handle in fast-isel.
1000 if (VT == MVT::f128)
1001 return false;
1002
1003 // Handle all other legal types, i.e. a register that will directly hold this
1004 // value.
1005 return TLI.isTypeLegal(VT);
1006}
1007
1008/// Determine if the value type is supported by FastISel.
1009///
1010/// FastISel for AArch64 can handle more value types than are legal. This adds
1011/// simple value type such as i1, i8, and i16.
1012bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1013 if (Ty->isVectorTy() && !IsVectorAllowed)
1014 return false;
1015
1016 if (isTypeLegal(Ty, VT))
1017 return true;
1018
1019 // If this is a type than can be sign or zero-extended to a basic operation
1020 // go ahead and accept it now.
1021 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1022 return true;
1023
1024 return false;
1025}
1026
1027bool AArch64FastISel::isValueAvailable(const Value *V) const {
1028 if (!isa<Instruction>(V))
1029 return true;
1030
1031 const auto *I = cast<Instruction>(V);
1032 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
1033}
1034
1035bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1036 if (Subtarget->isTargetILP32())
1037 return false;
1038
1039 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1040 if (!ScaleFactor)
1041 return false;
1042
1043 bool ImmediateOffsetNeedsLowering = false;
1044 bool RegisterOffsetNeedsLowering = false;
1045 int64_t Offset = Addr.getOffset();
1046 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1047 ImmediateOffsetNeedsLowering = true;
1048 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1049 !isUInt<12>(Offset / ScaleFactor))
1050 ImmediateOffsetNeedsLowering = true;
1051
1052 // Cannot encode an offset register and an immediate offset in the same
1053 // instruction. Fold the immediate offset into the load/store instruction and
1054 // emit an additional add to take care of the offset register.
1055 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1056 RegisterOffsetNeedsLowering = true;
1057
1058 // Cannot encode zero register as base.
1059 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1060 RegisterOffsetNeedsLowering = true;
1061
1062 // If this is a stack pointer and the offset needs to be simplified then put
1063 // the alloca address into a register, set the base type back to register and
1064 // continue. This should almost never happen.
1065 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1066 {
1067 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1068 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1069 ResultReg)
1070 .addFrameIndex(Addr.getFI())
1071 .addImm(0)
1072 .addImm(0);
1073 Addr.setKind(Address::RegBase);
1074 Addr.setReg(ResultReg);
1075 }
1076
1077 if (RegisterOffsetNeedsLowering) {
1078 unsigned ResultReg = 0;
1079 if (Addr.getReg()) {
1080 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1081 Addr.getExtendType() == AArch64_AM::UXTW )
1082 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1083 Addr.getOffsetReg(), Addr.getExtendType(),
1084 Addr.getShift());
1085 else
1086 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1087 Addr.getOffsetReg(), AArch64_AM::LSL,
1088 Addr.getShift());
1089 } else {
1090 if (Addr.getExtendType() == AArch64_AM::UXTW)
1091 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1092 Addr.getShift(), /*IsZExt=*/true);
1093 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1094 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1095 Addr.getShift(), /*IsZExt=*/false);
1096 else
1097 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1098 Addr.getShift());
1099 }
1100 if (!ResultReg)
1101 return false;
1102
1103 Addr.setReg(ResultReg);
1104 Addr.setOffsetReg(0);
1105 Addr.setShift(0);
1106 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1107 }
1108
1109 // Since the offset is too large for the load/store instruction get the
1110 // reg+offset into a register.
1111 if (ImmediateOffsetNeedsLowering) {
1112 unsigned ResultReg;
1113 if (Addr.getReg())
1114 // Try to fold the immediate into the add instruction.
1115 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1116 else
1117 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1118
1119 if (!ResultReg)
1120 return false;
1121 Addr.setReg(ResultReg);
1122 Addr.setOffset(0);
1123 }
1124 return true;
1125}
1126
1127void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1128 const MachineInstrBuilder &MIB,
1130 unsigned ScaleFactor,
1131 MachineMemOperand *MMO) {
1132 int64_t Offset = Addr.getOffset() / ScaleFactor;
1133 // Frame base works a bit differently. Handle it separately.
1134 if (Addr.isFIBase()) {
1135 int FI = Addr.getFI();
1136 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1137 // and alignment should be based on the VT.
1138 MMO = FuncInfo.MF->getMachineMemOperand(
1139 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1140 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1141 // Now add the rest of the operands.
1142 MIB.addFrameIndex(FI).addImm(Offset);
1143 } else {
1144 assert(Addr.isRegBase() && "Unexpected address kind.");
1145 const MCInstrDesc &II = MIB->getDesc();
1146 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1147 Addr.setReg(
1148 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1149 Addr.setOffsetReg(
1150 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1151 if (Addr.getOffsetReg()) {
1152 assert(Addr.getOffset() == 0 && "Unexpected offset");
1153 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1154 Addr.getExtendType() == AArch64_AM::SXTX;
1155 MIB.addReg(Addr.getReg());
1156 MIB.addReg(Addr.getOffsetReg());
1157 MIB.addImm(IsSigned);
1158 MIB.addImm(Addr.getShift() != 0);
1159 } else
1160 MIB.addReg(Addr.getReg()).addImm(Offset);
1161 }
1162
1163 if (MMO)
1164 MIB.addMemOperand(MMO);
1165}
1166
1167unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1168 const Value *RHS, bool SetFlags,
1169 bool WantResult, bool IsZExt) {
1171 bool NeedExtend = false;
1172 switch (RetVT.SimpleTy) {
1173 default:
1174 return 0;
1175 case MVT::i1:
1176 NeedExtend = true;
1177 break;
1178 case MVT::i8:
1179 NeedExtend = true;
1180 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1181 break;
1182 case MVT::i16:
1183 NeedExtend = true;
1184 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1185 break;
1186 case MVT::i32: // fall-through
1187 case MVT::i64:
1188 break;
1189 }
1190 MVT SrcVT = RetVT;
1191 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1192
1193 // Canonicalize immediates to the RHS first.
1194 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1195 std::swap(LHS, RHS);
1196
1197 // Canonicalize mul by power of 2 to the RHS.
1198 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1199 if (isMulPowOf2(LHS))
1200 std::swap(LHS, RHS);
1201
1202 // Canonicalize shift immediate to the RHS.
1203 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1204 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1205 if (isa<ConstantInt>(SI->getOperand(1)))
1206 if (SI->getOpcode() == Instruction::Shl ||
1207 SI->getOpcode() == Instruction::LShr ||
1208 SI->getOpcode() == Instruction::AShr )
1209 std::swap(LHS, RHS);
1210
1211 Register LHSReg = getRegForValue(LHS);
1212 if (!LHSReg)
1213 return 0;
1214
1215 if (NeedExtend)
1216 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1217
1218 unsigned ResultReg = 0;
1219 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1220 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1221 if (C->isNegative())
1222 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1223 WantResult);
1224 else
1225 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1226 WantResult);
1227 } else if (const auto *C = dyn_cast<Constant>(RHS))
1228 if (C->isNullValue())
1229 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1230
1231 if (ResultReg)
1232 return ResultReg;
1233
1234 // Only extend the RHS within the instruction if there is a valid extend type.
1235 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1236 isValueAvailable(RHS)) {
1237 Register RHSReg = getRegForValue(RHS);
1238 if (!RHSReg)
1239 return 0;
1240 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1241 SetFlags, WantResult);
1242 }
1243
1244 // Check if the mul can be folded into the instruction.
1245 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1246 if (isMulPowOf2(RHS)) {
1247 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1248 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1249
1250 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1251 if (C->getValue().isPowerOf2())
1252 std::swap(MulLHS, MulRHS);
1253
1254 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1255 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1256 Register RHSReg = getRegForValue(MulLHS);
1257 if (!RHSReg)
1258 return 0;
1259 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1260 ShiftVal, SetFlags, WantResult);
1261 if (ResultReg)
1262 return ResultReg;
1263 }
1264 }
1265
1266 // Check if the shift can be folded into the instruction.
1267 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1268 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1269 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1271 switch (SI->getOpcode()) {
1272 default: break;
1273 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1274 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1275 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1276 }
1277 uint64_t ShiftVal = C->getZExtValue();
1278 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1279 Register RHSReg = getRegForValue(SI->getOperand(0));
1280 if (!RHSReg)
1281 return 0;
1282 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1283 ShiftVal, SetFlags, WantResult);
1284 if (ResultReg)
1285 return ResultReg;
1286 }
1287 }
1288 }
1289 }
1290
1291 Register RHSReg = getRegForValue(RHS);
1292 if (!RHSReg)
1293 return 0;
1294
1295 if (NeedExtend)
1296 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1297
1298 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1299}
1300
1301unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1302 unsigned RHSReg, bool SetFlags,
1303 bool WantResult) {
1304 assert(LHSReg && RHSReg && "Invalid register number.");
1305
1306 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1307 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1308 return 0;
1309
1310 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1311 return 0;
1312
1313 static const unsigned OpcTable[2][2][2] = {
1314 { { AArch64::SUBWrr, AArch64::SUBXrr },
1315 { AArch64::ADDWrr, AArch64::ADDXrr } },
1316 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1317 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1318 };
1319 bool Is64Bit = RetVT == MVT::i64;
1320 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1321 const TargetRegisterClass *RC =
1322 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1323 unsigned ResultReg;
1324 if (WantResult)
1325 ResultReg = createResultReg(RC);
1326 else
1327 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1328
1329 const MCInstrDesc &II = TII.get(Opc);
1330 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1331 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1332 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1333 .addReg(LHSReg)
1334 .addReg(RHSReg);
1335 return ResultReg;
1336}
1337
1338unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1339 uint64_t Imm, bool SetFlags,
1340 bool WantResult) {
1341 assert(LHSReg && "Invalid register number.");
1342
1343 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1344 return 0;
1345
1346 unsigned ShiftImm;
1347 if (isUInt<12>(Imm))
1348 ShiftImm = 0;
1349 else if ((Imm & 0xfff000) == Imm) {
1350 ShiftImm = 12;
1351 Imm >>= 12;
1352 } else
1353 return 0;
1354
1355 static const unsigned OpcTable[2][2][2] = {
1356 { { AArch64::SUBWri, AArch64::SUBXri },
1357 { AArch64::ADDWri, AArch64::ADDXri } },
1358 { { AArch64::SUBSWri, AArch64::SUBSXri },
1359 { AArch64::ADDSWri, AArch64::ADDSXri } }
1360 };
1361 bool Is64Bit = RetVT == MVT::i64;
1362 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1363 const TargetRegisterClass *RC;
1364 if (SetFlags)
1365 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1366 else
1367 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1368 unsigned ResultReg;
1369 if (WantResult)
1370 ResultReg = createResultReg(RC);
1371 else
1372 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1373
1374 const MCInstrDesc &II = TII.get(Opc);
1375 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1376 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1377 .addReg(LHSReg)
1378 .addImm(Imm)
1379 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1380 return ResultReg;
1381}
1382
1383unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1384 unsigned RHSReg,
1386 uint64_t ShiftImm, bool SetFlags,
1387 bool WantResult) {
1388 assert(LHSReg && RHSReg && "Invalid register number.");
1389 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1390 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1391
1392 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1393 return 0;
1394
1395 // Don't deal with undefined shifts.
1396 if (ShiftImm >= RetVT.getSizeInBits())
1397 return 0;
1398
1399 static const unsigned OpcTable[2][2][2] = {
1400 { { AArch64::SUBWrs, AArch64::SUBXrs },
1401 { AArch64::ADDWrs, AArch64::ADDXrs } },
1402 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1403 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1404 };
1405 bool Is64Bit = RetVT == MVT::i64;
1406 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1407 const TargetRegisterClass *RC =
1408 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1409 unsigned ResultReg;
1410 if (WantResult)
1411 ResultReg = createResultReg(RC);
1412 else
1413 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1414
1415 const MCInstrDesc &II = TII.get(Opc);
1416 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1417 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1418 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1419 .addReg(LHSReg)
1420 .addReg(RHSReg)
1421 .addImm(getShifterImm(ShiftType, ShiftImm));
1422 return ResultReg;
1423}
1424
1425unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1426 unsigned RHSReg,
1428 uint64_t ShiftImm, bool SetFlags,
1429 bool WantResult) {
1430 assert(LHSReg && RHSReg && "Invalid register number.");
1431 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1432 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1433
1434 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1435 return 0;
1436
1437 if (ShiftImm >= 4)
1438 return 0;
1439
1440 static const unsigned OpcTable[2][2][2] = {
1441 { { AArch64::SUBWrx, AArch64::SUBXrx },
1442 { AArch64::ADDWrx, AArch64::ADDXrx } },
1443 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1444 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1445 };
1446 bool Is64Bit = RetVT == MVT::i64;
1447 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1448 const TargetRegisterClass *RC = nullptr;
1449 if (SetFlags)
1450 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1451 else
1452 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1453 unsigned ResultReg;
1454 if (WantResult)
1455 ResultReg = createResultReg(RC);
1456 else
1457 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1458
1459 const MCInstrDesc &II = TII.get(Opc);
1460 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1461 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1462 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1463 .addReg(LHSReg)
1464 .addReg(RHSReg)
1465 .addImm(getArithExtendImm(ExtType, ShiftImm));
1466 return ResultReg;
1467}
1468
1469bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1470 Type *Ty = LHS->getType();
1471 EVT EVT = TLI.getValueType(DL, Ty, true);
1472 if (!EVT.isSimple())
1473 return false;
1474 MVT VT = EVT.getSimpleVT();
1475
1476 switch (VT.SimpleTy) {
1477 default:
1478 return false;
1479 case MVT::i1:
1480 case MVT::i8:
1481 case MVT::i16:
1482 case MVT::i32:
1483 case MVT::i64:
1484 return emitICmp(VT, LHS, RHS, IsZExt);
1485 case MVT::f32:
1486 case MVT::f64:
1487 return emitFCmp(VT, LHS, RHS);
1488 }
1489}
1490
1491bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1492 bool IsZExt) {
1493 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1494 IsZExt) != 0;
1495}
1496
1497bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1498 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1499 /*SetFlags=*/true, /*WantResult=*/false) != 0;
1500}
1501
1502bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1503 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1504 return false;
1505
1506 // Check to see if the 2nd operand is a constant that we can encode directly
1507 // in the compare.
1508 bool UseImm = false;
1509 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1510 if (CFP->isZero() && !CFP->isNegative())
1511 UseImm = true;
1512
1513 Register LHSReg = getRegForValue(LHS);
1514 if (!LHSReg)
1515 return false;
1516
1517 if (UseImm) {
1518 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1519 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1520 .addReg(LHSReg);
1521 return true;
1522 }
1523
1524 Register RHSReg = getRegForValue(RHS);
1525 if (!RHSReg)
1526 return false;
1527
1528 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1529 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1530 .addReg(LHSReg)
1531 .addReg(RHSReg);
1532 return true;
1533}
1534
1535unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1536 bool SetFlags, bool WantResult, bool IsZExt) {
1537 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1538 IsZExt);
1539}
1540
1541/// This method is a wrapper to simplify add emission.
1542///
1543/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1544/// that fails, then try to materialize the immediate into a register and use
1545/// emitAddSub_rr instead.
1546unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1547 unsigned ResultReg;
1548 if (Imm < 0)
1549 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1550 else
1551 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1552
1553 if (ResultReg)
1554 return ResultReg;
1555
1556 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1557 if (!CReg)
1558 return 0;
1559
1560 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1561 return ResultReg;
1562}
1563
1564unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1565 bool SetFlags, bool WantResult, bool IsZExt) {
1566 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1567 IsZExt);
1568}
1569
1570unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1571 unsigned RHSReg, bool WantResult) {
1572 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1573 /*SetFlags=*/true, WantResult);
1574}
1575
1576unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1577 unsigned RHSReg,
1579 uint64_t ShiftImm, bool WantResult) {
1580 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1581 ShiftImm, /*SetFlags=*/true, WantResult);
1582}
1583
1584unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1585 const Value *LHS, const Value *RHS) {
1586 // Canonicalize immediates to the RHS first.
1587 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1588 std::swap(LHS, RHS);
1589
1590 // Canonicalize mul by power-of-2 to the RHS.
1591 if (LHS->hasOneUse() && isValueAvailable(LHS))
1592 if (isMulPowOf2(LHS))
1593 std::swap(LHS, RHS);
1594
1595 // Canonicalize shift immediate to the RHS.
1596 if (LHS->hasOneUse() && isValueAvailable(LHS))
1597 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1598 if (isa<ConstantInt>(SI->getOperand(1)))
1599 std::swap(LHS, RHS);
1600
1601 Register LHSReg = getRegForValue(LHS);
1602 if (!LHSReg)
1603 return 0;
1604
1605 unsigned ResultReg = 0;
1606 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1607 uint64_t Imm = C->getZExtValue();
1608 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1609 }
1610 if (ResultReg)
1611 return ResultReg;
1612
1613 // Check if the mul can be folded into the instruction.
1614 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1615 if (isMulPowOf2(RHS)) {
1616 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1617 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1618
1619 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1620 if (C->getValue().isPowerOf2())
1621 std::swap(MulLHS, MulRHS);
1622
1623 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1624 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1625
1626 Register RHSReg = getRegForValue(MulLHS);
1627 if (!RHSReg)
1628 return 0;
1629 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1630 if (ResultReg)
1631 return ResultReg;
1632 }
1633 }
1634
1635 // Check if the shift can be folded into the instruction.
1636 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1637 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1638 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1639 uint64_t ShiftVal = C->getZExtValue();
1640 Register RHSReg = getRegForValue(SI->getOperand(0));
1641 if (!RHSReg)
1642 return 0;
1643 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1644 if (ResultReg)
1645 return ResultReg;
1646 }
1647 }
1648
1649 Register RHSReg = getRegForValue(RHS);
1650 if (!RHSReg)
1651 return 0;
1652
1653 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1654 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1655 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1656 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1657 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1658 }
1659 return ResultReg;
1660}
1661
1662unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1663 unsigned LHSReg, uint64_t Imm) {
1664 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1665 "ISD nodes are not consecutive!");
1666 static const unsigned OpcTable[3][2] = {
1667 { AArch64::ANDWri, AArch64::ANDXri },
1668 { AArch64::ORRWri, AArch64::ORRXri },
1669 { AArch64::EORWri, AArch64::EORXri }
1670 };
1671 const TargetRegisterClass *RC;
1672 unsigned Opc;
1673 unsigned RegSize;
1674 switch (RetVT.SimpleTy) {
1675 default:
1676 return 0;
1677 case MVT::i1:
1678 case MVT::i8:
1679 case MVT::i16:
1680 case MVT::i32: {
1681 unsigned Idx = ISDOpc - ISD::AND;
1682 Opc = OpcTable[Idx][0];
1683 RC = &AArch64::GPR32spRegClass;
1684 RegSize = 32;
1685 break;
1686 }
1687 case MVT::i64:
1688 Opc = OpcTable[ISDOpc - ISD::AND][1];
1689 RC = &AArch64::GPR64spRegClass;
1690 RegSize = 64;
1691 break;
1692 }
1693
1695 return 0;
1696
1697 Register ResultReg =
1698 fastEmitInst_ri(Opc, RC, LHSReg,
1700 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1701 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1702 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1703 }
1704 return ResultReg;
1705}
1706
1707unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1708 unsigned LHSReg, unsigned RHSReg,
1709 uint64_t ShiftImm) {
1710 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1711 "ISD nodes are not consecutive!");
1712 static const unsigned OpcTable[3][2] = {
1713 { AArch64::ANDWrs, AArch64::ANDXrs },
1714 { AArch64::ORRWrs, AArch64::ORRXrs },
1715 { AArch64::EORWrs, AArch64::EORXrs }
1716 };
1717
1718 // Don't deal with undefined shifts.
1719 if (ShiftImm >= RetVT.getSizeInBits())
1720 return 0;
1721
1722 const TargetRegisterClass *RC;
1723 unsigned Opc;
1724 switch (RetVT.SimpleTy) {
1725 default:
1726 return 0;
1727 case MVT::i1:
1728 case MVT::i8:
1729 case MVT::i16:
1730 case MVT::i32:
1731 Opc = OpcTable[ISDOpc - ISD::AND][0];
1732 RC = &AArch64::GPR32RegClass;
1733 break;
1734 case MVT::i64:
1735 Opc = OpcTable[ISDOpc - ISD::AND][1];
1736 RC = &AArch64::GPR64RegClass;
1737 break;
1738 }
1739 Register ResultReg =
1740 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1742 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1743 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1744 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1745 }
1746 return ResultReg;
1747}
1748
1749unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1750 uint64_t Imm) {
1751 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1752}
1753
1754unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1755 bool WantZExt, MachineMemOperand *MMO) {
1756 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1757 return 0;
1758
1759 // Simplify this down to something we can handle.
1760 if (!simplifyAddress(Addr, VT))
1761 return 0;
1762
1763 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1764 if (!ScaleFactor)
1765 llvm_unreachable("Unexpected value type.");
1766
1767 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1768 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1769 bool UseScaled = true;
1770 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1771 UseScaled = false;
1772 ScaleFactor = 1;
1773 }
1774
1775 static const unsigned GPOpcTable[2][8][4] = {
1776 // Sign-extend.
1777 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1778 AArch64::LDURXi },
1779 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1780 AArch64::LDURXi },
1781 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1782 AArch64::LDRXui },
1783 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1784 AArch64::LDRXui },
1785 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1786 AArch64::LDRXroX },
1787 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1788 AArch64::LDRXroX },
1789 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1790 AArch64::LDRXroW },
1791 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1792 AArch64::LDRXroW }
1793 },
1794 // Zero-extend.
1795 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1796 AArch64::LDURXi },
1797 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1798 AArch64::LDURXi },
1799 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1800 AArch64::LDRXui },
1801 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1802 AArch64::LDRXui },
1803 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1804 AArch64::LDRXroX },
1805 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1806 AArch64::LDRXroX },
1807 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1808 AArch64::LDRXroW },
1809 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1810 AArch64::LDRXroW }
1811 }
1812 };
1813
1814 static const unsigned FPOpcTable[4][2] = {
1815 { AArch64::LDURSi, AArch64::LDURDi },
1816 { AArch64::LDRSui, AArch64::LDRDui },
1817 { AArch64::LDRSroX, AArch64::LDRDroX },
1818 { AArch64::LDRSroW, AArch64::LDRDroW }
1819 };
1820
1821 unsigned Opc;
1822 const TargetRegisterClass *RC;
1823 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1824 Addr.getOffsetReg();
1825 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1826 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1827 Addr.getExtendType() == AArch64_AM::SXTW)
1828 Idx++;
1829
1830 bool IsRet64Bit = RetVT == MVT::i64;
1831 switch (VT.SimpleTy) {
1832 default:
1833 llvm_unreachable("Unexpected value type.");
1834 case MVT::i1: // Intentional fall-through.
1835 case MVT::i8:
1836 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1837 RC = (IsRet64Bit && !WantZExt) ?
1838 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1839 break;
1840 case MVT::i16:
1841 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1842 RC = (IsRet64Bit && !WantZExt) ?
1843 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1844 break;
1845 case MVT::i32:
1846 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1847 RC = (IsRet64Bit && !WantZExt) ?
1848 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1849 break;
1850 case MVT::i64:
1851 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1852 RC = &AArch64::GPR64RegClass;
1853 break;
1854 case MVT::f32:
1855 Opc = FPOpcTable[Idx][0];
1856 RC = &AArch64::FPR32RegClass;
1857 break;
1858 case MVT::f64:
1859 Opc = FPOpcTable[Idx][1];
1860 RC = &AArch64::FPR64RegClass;
1861 break;
1862 }
1863
1864 // Create the base instruction, then add the operands.
1865 Register ResultReg = createResultReg(RC);
1866 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1867 TII.get(Opc), ResultReg);
1868 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1869
1870 // Loading an i1 requires special handling.
1871 if (VT == MVT::i1) {
1872 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1873 assert(ANDReg && "Unexpected AND instruction emission failure.");
1874 ResultReg = ANDReg;
1875 }
1876
1877 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1878 // the 32bit reg to a 64bit reg.
1879 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1880 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1881 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1882 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1883 .addImm(0)
1884 .addReg(ResultReg, getKillRegState(true))
1885 .addImm(AArch64::sub_32);
1886 ResultReg = Reg64;
1887 }
1888 return ResultReg;
1889}
1890
1891bool AArch64FastISel::selectAddSub(const Instruction *I) {
1892 MVT VT;
1893 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1894 return false;
1895
1896 if (VT.isVector())
1897 return selectOperator(I, I->getOpcode());
1898
1899 unsigned ResultReg;
1900 switch (I->getOpcode()) {
1901 default:
1902 llvm_unreachable("Unexpected instruction.");
1903 case Instruction::Add:
1904 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1905 break;
1906 case Instruction::Sub:
1907 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1908 break;
1909 }
1910 if (!ResultReg)
1911 return false;
1912
1913 updateValueMap(I, ResultReg);
1914 return true;
1915}
1916
1917bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1918 MVT VT;
1919 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1920 return false;
1921
1922 if (VT.isVector())
1923 return selectOperator(I, I->getOpcode());
1924
1925 unsigned ResultReg;
1926 switch (I->getOpcode()) {
1927 default:
1928 llvm_unreachable("Unexpected instruction.");
1929 case Instruction::And:
1930 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1931 break;
1932 case Instruction::Or:
1933 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1934 break;
1935 case Instruction::Xor:
1936 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1937 break;
1938 }
1939 if (!ResultReg)
1940 return false;
1941
1942 updateValueMap(I, ResultReg);
1943 return true;
1944}
1945
1946bool AArch64FastISel::selectLoad(const Instruction *I) {
1947 MVT VT;
1948 // Verify we have a legal type before going any further. Currently, we handle
1949 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1950 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1951 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1952 cast<LoadInst>(I)->isAtomic())
1953 return false;
1954
1955 const Value *SV = I->getOperand(0);
1956 if (TLI.supportSwiftError()) {
1957 // Swifterror values can come from either a function parameter with
1958 // swifterror attribute or an alloca with swifterror attribute.
1959 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1960 if (Arg->hasSwiftErrorAttr())
1961 return false;
1962 }
1963
1964 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1965 if (Alloca->isSwiftError())
1966 return false;
1967 }
1968 }
1969
1970 // See if we can handle this address.
1971 Address Addr;
1972 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1973 return false;
1974
1975 // Fold the following sign-/zero-extend into the load instruction.
1976 bool WantZExt = true;
1977 MVT RetVT = VT;
1978 const Value *IntExtVal = nullptr;
1979 if (I->hasOneUse()) {
1980 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1981 if (isTypeSupported(ZE->getType(), RetVT))
1982 IntExtVal = ZE;
1983 else
1984 RetVT = VT;
1985 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1986 if (isTypeSupported(SE->getType(), RetVT))
1987 IntExtVal = SE;
1988 else
1989 RetVT = VT;
1990 WantZExt = false;
1991 }
1992 }
1993
1994 unsigned ResultReg =
1995 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1996 if (!ResultReg)
1997 return false;
1998
1999 // There are a few different cases we have to handle, because the load or the
2000 // sign-/zero-extend might not be selected by FastISel if we fall-back to
2001 // SelectionDAG. There is also an ordering issue when both instructions are in
2002 // different basic blocks.
2003 // 1.) The load instruction is selected by FastISel, but the integer extend
2004 // not. This usually happens when the integer extend is in a different
2005 // basic block and SelectionDAG took over for that basic block.
2006 // 2.) The load instruction is selected before the integer extend. This only
2007 // happens when the integer extend is in a different basic block.
2008 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2009 // by FastISel. This happens if there are instructions between the load
2010 // and the integer extend that couldn't be selected by FastISel.
2011 if (IntExtVal) {
2012 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2013 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2014 // it when it selects the integer extend.
2015 Register Reg = lookUpRegForValue(IntExtVal);
2016 auto *MI = MRI.getUniqueVRegDef(Reg);
2017 if (!MI) {
2018 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2019 if (WantZExt) {
2020 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2021 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2022 ResultReg = std::prev(I)->getOperand(0).getReg();
2023 removeDeadCode(I, std::next(I));
2024 } else
2025 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2026 AArch64::sub_32);
2027 }
2028 updateValueMap(I, ResultReg);
2029 return true;
2030 }
2031
2032 // The integer extend has already been emitted - delete all the instructions
2033 // that have been emitted by the integer extend lowering code and use the
2034 // result from the load instruction directly.
2035 while (MI) {
2036 Reg = 0;
2037 for (auto &Opnd : MI->uses()) {
2038 if (Opnd.isReg()) {
2039 Reg = Opnd.getReg();
2040 break;
2041 }
2042 }
2044 removeDeadCode(I, std::next(I));
2045 MI = nullptr;
2046 if (Reg)
2047 MI = MRI.getUniqueVRegDef(Reg);
2048 }
2049 updateValueMap(IntExtVal, ResultReg);
2050 return true;
2051 }
2052
2053 updateValueMap(I, ResultReg);
2054 return true;
2055}
2056
2057bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2058 unsigned AddrReg,
2059 MachineMemOperand *MMO) {
2060 unsigned Opc;
2061 switch (VT.SimpleTy) {
2062 default: return false;
2063 case MVT::i8: Opc = AArch64::STLRB; break;
2064 case MVT::i16: Opc = AArch64::STLRH; break;
2065 case MVT::i32: Opc = AArch64::STLRW; break;
2066 case MVT::i64: Opc = AArch64::STLRX; break;
2067 }
2068
2069 const MCInstrDesc &II = TII.get(Opc);
2070 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2071 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2072 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2073 .addReg(SrcReg)
2074 .addReg(AddrReg)
2075 .addMemOperand(MMO);
2076 return true;
2077}
2078
2079bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2080 MachineMemOperand *MMO) {
2081 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2082 return false;
2083
2084 // Simplify this down to something we can handle.
2085 if (!simplifyAddress(Addr, VT))
2086 return false;
2087
2088 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2089 if (!ScaleFactor)
2090 llvm_unreachable("Unexpected value type.");
2091
2092 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2093 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2094 bool UseScaled = true;
2095 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2096 UseScaled = false;
2097 ScaleFactor = 1;
2098 }
2099
2100 static const unsigned OpcTable[4][6] = {
2101 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2102 AArch64::STURSi, AArch64::STURDi },
2103 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2104 AArch64::STRSui, AArch64::STRDui },
2105 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2106 AArch64::STRSroX, AArch64::STRDroX },
2107 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2108 AArch64::STRSroW, AArch64::STRDroW }
2109 };
2110
2111 unsigned Opc;
2112 bool VTIsi1 = false;
2113 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2114 Addr.getOffsetReg();
2115 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2116 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2117 Addr.getExtendType() == AArch64_AM::SXTW)
2118 Idx++;
2119
2120 switch (VT.SimpleTy) {
2121 default: llvm_unreachable("Unexpected value type.");
2122 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2123 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2124 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2125 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2126 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2127 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2128 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2129 }
2130
2131 // Storing an i1 requires special handling.
2132 if (VTIsi1 && SrcReg != AArch64::WZR) {
2133 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2134 assert(ANDReg && "Unexpected AND instruction emission failure.");
2135 SrcReg = ANDReg;
2136 }
2137 // Create the base instruction, then add the operands.
2138 const MCInstrDesc &II = TII.get(Opc);
2139 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2141 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2142 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2143
2144 return true;
2145}
2146
2147bool AArch64FastISel::selectStore(const Instruction *I) {
2148 MVT VT;
2149 const Value *Op0 = I->getOperand(0);
2150 // Verify we have a legal type before going any further. Currently, we handle
2151 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2152 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2153 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2154 return false;
2155
2156 const Value *PtrV = I->getOperand(1);
2157 if (TLI.supportSwiftError()) {
2158 // Swifterror values can come from either a function parameter with
2159 // swifterror attribute or an alloca with swifterror attribute.
2160 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2161 if (Arg->hasSwiftErrorAttr())
2162 return false;
2163 }
2164
2165 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2166 if (Alloca->isSwiftError())
2167 return false;
2168 }
2169 }
2170
2171 // Get the value to be stored into a register. Use the zero register directly
2172 // when possible to avoid an unnecessary copy and a wasted register.
2173 unsigned SrcReg = 0;
2174 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2175 if (CI->isZero())
2176 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2177 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2178 if (CF->isZero() && !CF->isNegative()) {
2180 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2181 }
2182 }
2183
2184 if (!SrcReg)
2185 SrcReg = getRegForValue(Op0);
2186
2187 if (!SrcReg)
2188 return false;
2189
2190 auto *SI = cast<StoreInst>(I);
2191
2192 // Try to emit a STLR for seq_cst/release.
2193 if (SI->isAtomic()) {
2194 AtomicOrdering Ord = SI->getOrdering();
2195 // The non-atomic instructions are sufficient for relaxed stores.
2196 if (isReleaseOrStronger(Ord)) {
2197 // The STLR addressing mode only supports a base reg; pass that directly.
2198 Register AddrReg = getRegForValue(PtrV);
2199 return emitStoreRelease(VT, SrcReg, AddrReg,
2200 createMachineMemOperandFor(I));
2201 }
2202 }
2203
2204 // See if we can handle this address.
2205 Address Addr;
2206 if (!computeAddress(PtrV, Addr, Op0->getType()))
2207 return false;
2208
2209 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2210 return false;
2211 return true;
2212}
2213
2215 switch (Pred) {
2216 case CmpInst::FCMP_ONE:
2217 case CmpInst::FCMP_UEQ:
2218 default:
2219 // AL is our "false" for now. The other two need more compares.
2220 return AArch64CC::AL;
2221 case CmpInst::ICMP_EQ:
2222 case CmpInst::FCMP_OEQ:
2223 return AArch64CC::EQ;
2224 case CmpInst::ICMP_SGT:
2225 case CmpInst::FCMP_OGT:
2226 return AArch64CC::GT;
2227 case CmpInst::ICMP_SGE:
2228 case CmpInst::FCMP_OGE:
2229 return AArch64CC::GE;
2230 case CmpInst::ICMP_UGT:
2231 case CmpInst::FCMP_UGT:
2232 return AArch64CC::HI;
2233 case CmpInst::FCMP_OLT:
2234 return AArch64CC::MI;
2235 case CmpInst::ICMP_ULE:
2236 case CmpInst::FCMP_OLE:
2237 return AArch64CC::LS;
2238 case CmpInst::FCMP_ORD:
2239 return AArch64CC::VC;
2240 case CmpInst::FCMP_UNO:
2241 return AArch64CC::VS;
2242 case CmpInst::FCMP_UGE:
2243 return AArch64CC::PL;
2244 case CmpInst::ICMP_SLT:
2245 case CmpInst::FCMP_ULT:
2246 return AArch64CC::LT;
2247 case CmpInst::ICMP_SLE:
2248 case CmpInst::FCMP_ULE:
2249 return AArch64CC::LE;
2250 case CmpInst::FCMP_UNE:
2251 case CmpInst::ICMP_NE:
2252 return AArch64CC::NE;
2253 case CmpInst::ICMP_UGE:
2254 return AArch64CC::HS;
2255 case CmpInst::ICMP_ULT:
2256 return AArch64CC::LO;
2257 }
2258}
2259
2260/// Try to emit a combined compare-and-branch instruction.
2261bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2262 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2263 // will not be produced, as they are conditional branch instructions that do
2264 // not set flags.
2265 if (FuncInfo.MF->getFunction().hasFnAttribute(
2266 Attribute::SpeculativeLoadHardening))
2267 return false;
2268
2269 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2270 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2271 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2272
2273 const Value *LHS = CI->getOperand(0);
2274 const Value *RHS = CI->getOperand(1);
2275
2276 MVT VT;
2277 if (!isTypeSupported(LHS->getType(), VT))
2278 return false;
2279
2280 unsigned BW = VT.getSizeInBits();
2281 if (BW > 64)
2282 return false;
2283
2284 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2285 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2286
2287 // Try to take advantage of fallthrough opportunities.
2288 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2289 std::swap(TBB, FBB);
2291 }
2292
2293 int TestBit = -1;
2294 bool IsCmpNE;
2295 switch (Predicate) {
2296 default:
2297 return false;
2298 case CmpInst::ICMP_EQ:
2299 case CmpInst::ICMP_NE:
2300 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2301 std::swap(LHS, RHS);
2302
2303 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2304 return false;
2305
2306 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2307 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2308 const Value *AndLHS = AI->getOperand(0);
2309 const Value *AndRHS = AI->getOperand(1);
2310
2311 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2312 if (C->getValue().isPowerOf2())
2313 std::swap(AndLHS, AndRHS);
2314
2315 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2316 if (C->getValue().isPowerOf2()) {
2317 TestBit = C->getValue().logBase2();
2318 LHS = AndLHS;
2319 }
2320 }
2321
2322 if (VT == MVT::i1)
2323 TestBit = 0;
2324
2325 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2326 break;
2327 case CmpInst::ICMP_SLT:
2328 case CmpInst::ICMP_SGE:
2329 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2330 return false;
2331
2332 TestBit = BW - 1;
2333 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2334 break;
2335 case CmpInst::ICMP_SGT:
2336 case CmpInst::ICMP_SLE:
2337 if (!isa<ConstantInt>(RHS))
2338 return false;
2339
2340 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2341 return false;
2342
2343 TestBit = BW - 1;
2344 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2345 break;
2346 } // end switch
2347
2348 static const unsigned OpcTable[2][2][2] = {
2349 { {AArch64::CBZW, AArch64::CBZX },
2350 {AArch64::CBNZW, AArch64::CBNZX} },
2351 { {AArch64::TBZW, AArch64::TBZX },
2352 {AArch64::TBNZW, AArch64::TBNZX} }
2353 };
2354
2355 bool IsBitTest = TestBit != -1;
2356 bool Is64Bit = BW == 64;
2357 if (TestBit < 32 && TestBit >= 0)
2358 Is64Bit = false;
2359
2360 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2361 const MCInstrDesc &II = TII.get(Opc);
2362
2363 Register SrcReg = getRegForValue(LHS);
2364 if (!SrcReg)
2365 return false;
2366
2367 if (BW == 64 && !Is64Bit)
2368 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2369
2370 if ((BW < 32) && !IsBitTest)
2371 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2372
2373 // Emit the combined compare and branch instruction.
2374 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2376 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2377 .addReg(SrcReg);
2378 if (IsBitTest)
2379 MIB.addImm(TestBit);
2380 MIB.addMBB(TBB);
2381
2382 finishCondBranch(BI->getParent(), TBB, FBB);
2383 return true;
2384}
2385
2386bool AArch64FastISel::selectBranch(const Instruction *I) {
2387 const BranchInst *BI = cast<BranchInst>(I);
2388 if (BI->isUnconditional()) {
2389 MachineBasicBlock *MSucc = FuncInfo.getMBB(BI->getSuccessor(0));
2390 fastEmitBranch(MSucc, BI->getDebugLoc());
2391 return true;
2392 }
2393
2394 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2395 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2396
2397 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2398 if (CI->hasOneUse() && isValueAvailable(CI)) {
2399 // Try to optimize or fold the cmp.
2400 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2401 switch (Predicate) {
2402 default:
2403 break;
2405 fastEmitBranch(FBB, MIMD.getDL());
2406 return true;
2407 case CmpInst::FCMP_TRUE:
2408 fastEmitBranch(TBB, MIMD.getDL());
2409 return true;
2410 }
2411
2412 // Try to emit a combined compare-and-branch first.
2413 if (emitCompareAndBranch(BI))
2414 return true;
2415
2416 // Try to take advantage of fallthrough opportunities.
2417 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2418 std::swap(TBB, FBB);
2420 }
2421
2422 // Emit the cmp.
2423 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2424 return false;
2425
2426 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2427 // instruction.
2428 AArch64CC::CondCode CC = getCompareCC(Predicate);
2430 switch (Predicate) {
2431 default:
2432 break;
2433 case CmpInst::FCMP_UEQ:
2434 ExtraCC = AArch64CC::EQ;
2435 CC = AArch64CC::VS;
2436 break;
2437 case CmpInst::FCMP_ONE:
2438 ExtraCC = AArch64CC::MI;
2439 CC = AArch64CC::GT;
2440 break;
2441 }
2442 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2443
2444 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2445 if (ExtraCC != AArch64CC::AL) {
2446 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2447 .addImm(ExtraCC)
2448 .addMBB(TBB);
2449 }
2450
2451 // Emit the branch.
2452 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2453 .addImm(CC)
2454 .addMBB(TBB);
2455
2456 finishCondBranch(BI->getParent(), TBB, FBB);
2457 return true;
2458 }
2459 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2460 uint64_t Imm = CI->getZExtValue();
2461 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2462 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2463 .addMBB(Target);
2464
2465 // Obtain the branch probability and add the target to the successor list.
2466 if (FuncInfo.BPI) {
2467 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2468 BI->getParent(), Target->getBasicBlock());
2469 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2470 } else
2471 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2472 return true;
2473 } else {
2475 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2476 // Fake request the condition, otherwise the intrinsic might be completely
2477 // optimized away.
2478 Register CondReg = getRegForValue(BI->getCondition());
2479 if (!CondReg)
2480 return false;
2481
2482 // Emit the branch.
2483 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2484 .addImm(CC)
2485 .addMBB(TBB);
2486
2487 finishCondBranch(BI->getParent(), TBB, FBB);
2488 return true;
2489 }
2490 }
2491
2492 Register CondReg = getRegForValue(BI->getCondition());
2493 if (CondReg == 0)
2494 return false;
2495
2496 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2497 unsigned Opcode = AArch64::TBNZW;
2498 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2499 std::swap(TBB, FBB);
2500 Opcode = AArch64::TBZW;
2501 }
2502
2503 const MCInstrDesc &II = TII.get(Opcode);
2504 Register ConstrainedCondReg
2505 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2506 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2507 .addReg(ConstrainedCondReg)
2508 .addImm(0)
2509 .addMBB(TBB);
2510
2511 finishCondBranch(BI->getParent(), TBB, FBB);
2512 return true;
2513}
2514
2515bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2516 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2517 Register AddrReg = getRegForValue(BI->getOperand(0));
2518 if (AddrReg == 0)
2519 return false;
2520
2521 // Authenticated indirectbr is not implemented yet.
2522 if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos"))
2523 return false;
2524
2525 // Emit the indirect branch.
2526 const MCInstrDesc &II = TII.get(AArch64::BR);
2527 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2528 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2529
2530 // Make sure the CFG is up-to-date.
2531 for (const auto *Succ : BI->successors())
2532 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(Succ));
2533
2534 return true;
2535}
2536
2537bool AArch64FastISel::selectCmp(const Instruction *I) {
2538 const CmpInst *CI = cast<CmpInst>(I);
2539
2540 // Vectors of i1 are weird: bail out.
2541 if (CI->getType()->isVectorTy())
2542 return false;
2543
2544 // Try to optimize or fold the cmp.
2545 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2546 unsigned ResultReg = 0;
2547 switch (Predicate) {
2548 default:
2549 break;
2551 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2552 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2553 TII.get(TargetOpcode::COPY), ResultReg)
2554 .addReg(AArch64::WZR, getKillRegState(true));
2555 break;
2556 case CmpInst::FCMP_TRUE:
2557 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2558 break;
2559 }
2560
2561 if (ResultReg) {
2562 updateValueMap(I, ResultReg);
2563 return true;
2564 }
2565
2566 // Emit the cmp.
2567 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2568 return false;
2569
2570 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2571
2572 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2573 // condition codes are inverted, because they are used by CSINC.
2574 static unsigned CondCodeTable[2][2] = {
2577 };
2578 unsigned *CondCodes = nullptr;
2579 switch (Predicate) {
2580 default:
2581 break;
2582 case CmpInst::FCMP_UEQ:
2583 CondCodes = &CondCodeTable[0][0];
2584 break;
2585 case CmpInst::FCMP_ONE:
2586 CondCodes = &CondCodeTable[1][0];
2587 break;
2588 }
2589
2590 if (CondCodes) {
2591 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2592 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2593 TmpReg1)
2594 .addReg(AArch64::WZR, getKillRegState(true))
2595 .addReg(AArch64::WZR, getKillRegState(true))
2596 .addImm(CondCodes[0]);
2597 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2598 ResultReg)
2599 .addReg(TmpReg1, getKillRegState(true))
2600 .addReg(AArch64::WZR, getKillRegState(true))
2601 .addImm(CondCodes[1]);
2602
2603 updateValueMap(I, ResultReg);
2604 return true;
2605 }
2606
2607 // Now set a register based on the comparison.
2608 AArch64CC::CondCode CC = getCompareCC(Predicate);
2609 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2610 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2611 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2612 ResultReg)
2613 .addReg(AArch64::WZR, getKillRegState(true))
2614 .addReg(AArch64::WZR, getKillRegState(true))
2615 .addImm(invertedCC);
2616
2617 updateValueMap(I, ResultReg);
2618 return true;
2619}
2620
2621/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2622/// value.
2623bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2624 if (!SI->getType()->isIntegerTy(1))
2625 return false;
2626
2627 const Value *Src1Val, *Src2Val;
2628 unsigned Opc = 0;
2629 bool NeedExtraOp = false;
2630 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2631 if (CI->isOne()) {
2632 Src1Val = SI->getCondition();
2633 Src2Val = SI->getFalseValue();
2634 Opc = AArch64::ORRWrr;
2635 } else {
2636 assert(CI->isZero());
2637 Src1Val = SI->getFalseValue();
2638 Src2Val = SI->getCondition();
2639 Opc = AArch64::BICWrr;
2640 }
2641 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2642 if (CI->isOne()) {
2643 Src1Val = SI->getCondition();
2644 Src2Val = SI->getTrueValue();
2645 Opc = AArch64::ORRWrr;
2646 NeedExtraOp = true;
2647 } else {
2648 assert(CI->isZero());
2649 Src1Val = SI->getCondition();
2650 Src2Val = SI->getTrueValue();
2651 Opc = AArch64::ANDWrr;
2652 }
2653 }
2654
2655 if (!Opc)
2656 return false;
2657
2658 Register Src1Reg = getRegForValue(Src1Val);
2659 if (!Src1Reg)
2660 return false;
2661
2662 Register Src2Reg = getRegForValue(Src2Val);
2663 if (!Src2Reg)
2664 return false;
2665
2666 if (NeedExtraOp)
2667 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2668
2669 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2670 Src2Reg);
2671 updateValueMap(SI, ResultReg);
2672 return true;
2673}
2674
2675bool AArch64FastISel::selectSelect(const Instruction *I) {
2676 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2677 MVT VT;
2678 if (!isTypeSupported(I->getType(), VT))
2679 return false;
2680
2681 unsigned Opc;
2682 const TargetRegisterClass *RC;
2683 switch (VT.SimpleTy) {
2684 default:
2685 return false;
2686 case MVT::i1:
2687 case MVT::i8:
2688 case MVT::i16:
2689 case MVT::i32:
2690 Opc = AArch64::CSELWr;
2691 RC = &AArch64::GPR32RegClass;
2692 break;
2693 case MVT::i64:
2694 Opc = AArch64::CSELXr;
2695 RC = &AArch64::GPR64RegClass;
2696 break;
2697 case MVT::f32:
2698 Opc = AArch64::FCSELSrrr;
2699 RC = &AArch64::FPR32RegClass;
2700 break;
2701 case MVT::f64:
2702 Opc = AArch64::FCSELDrrr;
2703 RC = &AArch64::FPR64RegClass;
2704 break;
2705 }
2706
2707 const SelectInst *SI = cast<SelectInst>(I);
2708 const Value *Cond = SI->getCondition();
2711
2712 if (optimizeSelect(SI))
2713 return true;
2714
2715 // Try to pickup the flags, so we don't have to emit another compare.
2716 if (foldXALUIntrinsic(CC, I, Cond)) {
2717 // Fake request the condition to force emission of the XALU intrinsic.
2718 Register CondReg = getRegForValue(Cond);
2719 if (!CondReg)
2720 return false;
2721 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2722 isValueAvailable(Cond)) {
2723 const auto *Cmp = cast<CmpInst>(Cond);
2724 // Try to optimize or fold the cmp.
2725 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2726 const Value *FoldSelect = nullptr;
2727 switch (Predicate) {
2728 default:
2729 break;
2731 FoldSelect = SI->getFalseValue();
2732 break;
2733 case CmpInst::FCMP_TRUE:
2734 FoldSelect = SI->getTrueValue();
2735 break;
2736 }
2737
2738 if (FoldSelect) {
2739 Register SrcReg = getRegForValue(FoldSelect);
2740 if (!SrcReg)
2741 return false;
2742
2743 updateValueMap(I, SrcReg);
2744 return true;
2745 }
2746
2747 // Emit the cmp.
2748 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2749 return false;
2750
2751 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2752 CC = getCompareCC(Predicate);
2753 switch (Predicate) {
2754 default:
2755 break;
2756 case CmpInst::FCMP_UEQ:
2757 ExtraCC = AArch64CC::EQ;
2758 CC = AArch64CC::VS;
2759 break;
2760 case CmpInst::FCMP_ONE:
2761 ExtraCC = AArch64CC::MI;
2762 CC = AArch64CC::GT;
2763 break;
2764 }
2765 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2766 } else {
2767 Register CondReg = getRegForValue(Cond);
2768 if (!CondReg)
2769 return false;
2770
2771 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2772 CondReg = constrainOperandRegClass(II, CondReg, 1);
2773
2774 // Emit a TST instruction (ANDS wzr, reg, #imm).
2775 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2776 AArch64::WZR)
2777 .addReg(CondReg)
2779 }
2780
2781 Register Src1Reg = getRegForValue(SI->getTrueValue());
2782 Register Src2Reg = getRegForValue(SI->getFalseValue());
2783
2784 if (!Src1Reg || !Src2Reg)
2785 return false;
2786
2787 if (ExtraCC != AArch64CC::AL)
2788 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2789
2790 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2791 updateValueMap(I, ResultReg);
2792 return true;
2793}
2794
2795bool AArch64FastISel::selectFPExt(const Instruction *I) {
2796 Value *V = I->getOperand(0);
2797 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2798 return false;
2799
2800 Register Op = getRegForValue(V);
2801 if (Op == 0)
2802 return false;
2803
2804 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2805 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2806 ResultReg).addReg(Op);
2807 updateValueMap(I, ResultReg);
2808 return true;
2809}
2810
2811bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2812 Value *V = I->getOperand(0);
2813 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2814 return false;
2815
2816 Register Op = getRegForValue(V);
2817 if (Op == 0)
2818 return false;
2819
2820 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2821 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2822 ResultReg).addReg(Op);
2823 updateValueMap(I, ResultReg);
2824 return true;
2825}
2826
2827// FPToUI and FPToSI
2828bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2829 MVT DestVT;
2830 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2831 return false;
2832
2833 Register SrcReg = getRegForValue(I->getOperand(0));
2834 if (SrcReg == 0)
2835 return false;
2836
2837 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2838 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2839 return false;
2840
2841 unsigned Opc;
2842 if (SrcVT == MVT::f64) {
2843 if (Signed)
2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2845 else
2846 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2847 } else {
2848 if (Signed)
2849 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2850 else
2851 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2852 }
2853 Register ResultReg = createResultReg(
2854 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2855 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2856 .addReg(SrcReg);
2857 updateValueMap(I, ResultReg);
2858 return true;
2859}
2860
2861bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2862 MVT DestVT;
2863 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2864 return false;
2865 // Let regular ISEL handle FP16
2866 if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2867 return false;
2868
2869 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2870 "Unexpected value type.");
2871
2872 Register SrcReg = getRegForValue(I->getOperand(0));
2873 if (!SrcReg)
2874 return false;
2875
2876 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2877
2878 // Handle sign-extension.
2879 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2880 SrcReg =
2881 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2882 if (!SrcReg)
2883 return false;
2884 }
2885
2886 unsigned Opc;
2887 if (SrcVT == MVT::i64) {
2888 if (Signed)
2889 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2890 else
2891 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2892 } else {
2893 if (Signed)
2894 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2895 else
2896 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2897 }
2898
2899 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2900 updateValueMap(I, ResultReg);
2901 return true;
2902}
2903
2904bool AArch64FastISel::fastLowerArguments() {
2905 if (!FuncInfo.CanLowerReturn)
2906 return false;
2907
2908 const Function *F = FuncInfo.Fn;
2909 if (F->isVarArg())
2910 return false;
2911
2912 CallingConv::ID CC = F->getCallingConv();
2914 return false;
2915
2916 if (Subtarget->hasCustomCallingConv())
2917 return false;
2918
2919 // Only handle simple cases of up to 8 GPR and FPR each.
2920 unsigned GPRCnt = 0;
2921 unsigned FPRCnt = 0;
2922 for (auto const &Arg : F->args()) {
2923 if (Arg.hasAttribute(Attribute::ByVal) ||
2924 Arg.hasAttribute(Attribute::InReg) ||
2925 Arg.hasAttribute(Attribute::StructRet) ||
2926 Arg.hasAttribute(Attribute::SwiftSelf) ||
2927 Arg.hasAttribute(Attribute::SwiftAsync) ||
2928 Arg.hasAttribute(Attribute::SwiftError) ||
2929 Arg.hasAttribute(Attribute::Nest))
2930 return false;
2931
2932 Type *ArgTy = Arg.getType();
2933 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2934 return false;
2935
2936 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2937 if (!ArgVT.isSimple())
2938 return false;
2939
2940 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2941 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2942 return false;
2943
2944 if (VT.isVector() &&
2945 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2946 return false;
2947
2948 if (VT >= MVT::i1 && VT <= MVT::i64)
2949 ++GPRCnt;
2950 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2951 VT.is128BitVector())
2952 ++FPRCnt;
2953 else
2954 return false;
2955
2956 if (GPRCnt > 8 || FPRCnt > 8)
2957 return false;
2958 }
2959
2960 static const MCPhysReg Registers[6][8] = {
2961 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2962 AArch64::W5, AArch64::W6, AArch64::W7 },
2963 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2964 AArch64::X5, AArch64::X6, AArch64::X7 },
2965 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2966 AArch64::H5, AArch64::H6, AArch64::H7 },
2967 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2968 AArch64::S5, AArch64::S6, AArch64::S7 },
2969 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2970 AArch64::D5, AArch64::D6, AArch64::D7 },
2971 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2972 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2973 };
2974
2975 unsigned GPRIdx = 0;
2976 unsigned FPRIdx = 0;
2977 for (auto const &Arg : F->args()) {
2978 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2979 unsigned SrcReg;
2980 const TargetRegisterClass *RC;
2981 if (VT >= MVT::i1 && VT <= MVT::i32) {
2982 SrcReg = Registers[0][GPRIdx++];
2983 RC = &AArch64::GPR32RegClass;
2984 VT = MVT::i32;
2985 } else if (VT == MVT::i64) {
2986 SrcReg = Registers[1][GPRIdx++];
2987 RC = &AArch64::GPR64RegClass;
2988 } else if (VT == MVT::f16 || VT == MVT::bf16) {
2989 SrcReg = Registers[2][FPRIdx++];
2990 RC = &AArch64::FPR16RegClass;
2991 } else if (VT == MVT::f32) {
2992 SrcReg = Registers[3][FPRIdx++];
2993 RC = &AArch64::FPR32RegClass;
2994 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2995 SrcReg = Registers[4][FPRIdx++];
2996 RC = &AArch64::FPR64RegClass;
2997 } else if (VT.is128BitVector()) {
2998 SrcReg = Registers[5][FPRIdx++];
2999 RC = &AArch64::FPR128RegClass;
3000 } else
3001 llvm_unreachable("Unexpected value type.");
3002
3003 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3004 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3005 // Without this, EmitLiveInCopies may eliminate the livein if its only
3006 // use is a bitcast (which isn't turned into an instruction).
3007 Register ResultReg = createResultReg(RC);
3008 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3009 TII.get(TargetOpcode::COPY), ResultReg)
3010 .addReg(DstReg, getKillRegState(true));
3011 updateValueMap(&Arg, ResultReg);
3012 }
3013 return true;
3014}
3015
3016bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3017 SmallVectorImpl<MVT> &OutVTs,
3018 unsigned &NumBytes) {
3019 CallingConv::ID CC = CLI.CallConv;
3021 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3022 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3023
3024 // Get a count of how many bytes are to be pushed on the stack.
3025 NumBytes = CCInfo.getStackSize();
3026
3027 // Issue CALLSEQ_START
3028 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3029 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3030 .addImm(NumBytes).addImm(0);
3031
3032 // Process the args.
3033 for (CCValAssign &VA : ArgLocs) {
3034 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3035 MVT ArgVT = OutVTs[VA.getValNo()];
3036
3037 Register ArgReg = getRegForValue(ArgVal);
3038 if (!ArgReg)
3039 return false;
3040
3041 // Handle arg promotion: SExt, ZExt, AExt.
3042 switch (VA.getLocInfo()) {
3043 case CCValAssign::Full:
3044 break;
3045 case CCValAssign::SExt: {
3046 MVT DestVT = VA.getLocVT();
3047 MVT SrcVT = ArgVT;
3048 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3049 if (!ArgReg)
3050 return false;
3051 break;
3052 }
3053 case CCValAssign::AExt:
3054 // Intentional fall-through.
3055 case CCValAssign::ZExt: {
3056 MVT DestVT = VA.getLocVT();
3057 MVT SrcVT = ArgVT;
3058 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3059 if (!ArgReg)
3060 return false;
3061 break;
3062 }
3063 default:
3064 llvm_unreachable("Unknown arg promotion!");
3065 }
3066
3067 // Now copy/store arg to correct locations.
3068 if (VA.isRegLoc() && !VA.needsCustom()) {
3069 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3070 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3071 CLI.OutRegs.push_back(VA.getLocReg());
3072 } else if (VA.needsCustom()) {
3073 // FIXME: Handle custom args.
3074 return false;
3075 } else {
3076 assert(VA.isMemLoc() && "Assuming store on stack.");
3077
3078 // Don't emit stores for undef values.
3079 if (isa<UndefValue>(ArgVal))
3080 continue;
3081
3082 // Need to store on the stack.
3083 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3084
3085 unsigned BEAlign = 0;
3086 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3087 BEAlign = 8 - ArgSize;
3088
3089 Address Addr;
3090 Addr.setKind(Address::RegBase);
3091 Addr.setReg(AArch64::SP);
3092 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3093
3094 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3095 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3096 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3097 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3098
3099 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3100 return false;
3101 }
3102 }
3103 return true;
3104}
3105
3106bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3107 CallingConv::ID CC = CLI.CallConv;
3108
3109 // Issue CALLSEQ_END
3110 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3111 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3112 .addImm(NumBytes).addImm(0);
3113
3114 // Now the return values.
3116 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3117 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3118
3119 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3120 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3121 CCValAssign &VA = RVLocs[i];
3122 MVT CopyVT = VA.getValVT();
3123 unsigned CopyReg = ResultReg + i;
3124
3125 // TODO: Handle big-endian results
3126 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3127 return false;
3128
3129 // Copy result out of their specified physreg.
3130 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3131 CopyReg)
3132 .addReg(VA.getLocReg());
3133 CLI.InRegs.push_back(VA.getLocReg());
3134 }
3135
3136 CLI.ResultReg = ResultReg;
3137 CLI.NumResultRegs = RVLocs.size();
3138
3139 return true;
3140}
3141
3142bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3143 CallingConv::ID CC = CLI.CallConv;
3144 bool IsTailCall = CLI.IsTailCall;
3145 bool IsVarArg = CLI.IsVarArg;
3146 const Value *Callee = CLI.Callee;
3147 MCSymbol *Symbol = CLI.Symbol;
3148
3149 if (!Callee && !Symbol)
3150 return false;
3151
3152 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3153 // a bti instruction following the call.
3154 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3155 !Subtarget->noBTIAtReturnTwice() &&
3157 return false;
3158
3159 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3160 if (CLI.CB && CLI.CB->isIndirectCall() &&
3161 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3162 return false;
3163
3164 // Allow SelectionDAG isel to handle tail calls.
3165 if (IsTailCall)
3166 return false;
3167
3168 // FIXME: we could and should support this, but for now correctness at -O0 is
3169 // more important.
3170 if (Subtarget->isTargetILP32())
3171 return false;
3172
3173 CodeModel::Model CM = TM.getCodeModel();
3174 // Only support the small-addressing and large code models.
3175 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3176 return false;
3177
3178 // FIXME: Add large code model support for ELF.
3179 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3180 return false;
3181
3182 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3183 // attribute. Check "RtLibUseGOT" instead.
3184 if (MF->getFunction().getParent()->getRtLibUseGOT())
3185 return false;
3186
3187 // Let SDISel handle vararg functions.
3188 if (IsVarArg)
3189 return false;
3190
3191 if (Subtarget->isWindowsArm64EC())
3192 return false;
3193
3194 for (auto Flag : CLI.OutFlags)
3195 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3196 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3197 return false;
3198
3199 // Set up the argument vectors.
3200 SmallVector<MVT, 16> OutVTs;
3201 OutVTs.reserve(CLI.OutVals.size());
3202
3203 for (auto *Val : CLI.OutVals) {
3204 MVT VT;
3205 if (!isTypeLegal(Val->getType(), VT) &&
3206 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3207 return false;
3208
3209 // We don't handle vector parameters yet.
3210 if (VT.isVector() || VT.getSizeInBits() > 64)
3211 return false;
3212
3213 OutVTs.push_back(VT);
3214 }
3215
3216 Address Addr;
3217 if (Callee && !computeCallAddress(Callee, Addr))
3218 return false;
3219
3220 // The weak function target may be zero; in that case we must use indirect
3221 // addressing via a stub on windows as it may be out of range for a
3222 // PC-relative jump.
3223 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3224 Addr.getGlobalValue()->hasExternalWeakLinkage())
3225 return false;
3226
3227 // Handle the arguments now that we've gotten them.
3228 unsigned NumBytes;
3229 if (!processCallArgs(CLI, OutVTs, NumBytes))
3230 return false;
3231
3232 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3233 if (RegInfo->isAnyArgRegReserved(*MF))
3234 RegInfo->emitReservedArgRegCallError(*MF);
3235
3236 // Issue the call.
3238 if (Subtarget->useSmallAddressing()) {
3239 const MCInstrDesc &II =
3240 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3241 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3242 if (Symbol)
3243 MIB.addSym(Symbol, 0);
3244 else if (Addr.getGlobalValue())
3245 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3246 else if (Addr.getReg()) {
3248 MIB.addReg(Reg);
3249 } else
3250 return false;
3251 } else {
3252 unsigned CallReg = 0;
3253 if (Symbol) {
3254 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3255 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3256 ADRPReg)
3258
3259 CallReg = createResultReg(&AArch64::GPR64RegClass);
3260 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3261 TII.get(AArch64::LDRXui), CallReg)
3262 .addReg(ADRPReg)
3263 .addSym(Symbol,
3265 } else if (Addr.getGlobalValue())
3266 CallReg = materializeGV(Addr.getGlobalValue());
3267 else if (Addr.getReg())
3268 CallReg = Addr.getReg();
3269
3270 if (!CallReg)
3271 return false;
3272
3273 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3274 CallReg = constrainOperandRegClass(II, CallReg, 0);
3275 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3276 }
3277
3278 // Add implicit physical register uses to the call.
3279 for (auto Reg : CLI.OutRegs)
3280 MIB.addReg(Reg, RegState::Implicit);
3281
3282 // Add a register mask with the call-preserved registers.
3283 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3284 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3285
3286 CLI.Call = MIB;
3287
3288 // Finish off the call including any return values.
3289 return finishCall(CLI, NumBytes);
3290}
3291
3292bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3293 if (Alignment)
3294 return Len / Alignment->value() <= 4;
3295 else
3296 return Len < 32;
3297}
3298
3299bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3300 uint64_t Len, MaybeAlign Alignment) {
3301 // Make sure we don't bloat code by inlining very large memcpy's.
3302 if (!isMemCpySmall(Len, Alignment))
3303 return false;
3304
3305 int64_t UnscaledOffset = 0;
3306 Address OrigDest = Dest;
3307 Address OrigSrc = Src;
3308
3309 while (Len) {
3310 MVT VT;
3311 if (!Alignment || *Alignment >= 8) {
3312 if (Len >= 8)
3313 VT = MVT::i64;
3314 else if (Len >= 4)
3315 VT = MVT::i32;
3316 else if (Len >= 2)
3317 VT = MVT::i16;
3318 else {
3319 VT = MVT::i8;
3320 }
3321 } else {
3322 assert(Alignment && "Alignment is set in this branch");
3323 // Bound based on alignment.
3324 if (Len >= 4 && *Alignment == 4)
3325 VT = MVT::i32;
3326 else if (Len >= 2 && *Alignment == 2)
3327 VT = MVT::i16;
3328 else {
3329 VT = MVT::i8;
3330 }
3331 }
3332
3333 unsigned ResultReg = emitLoad(VT, VT, Src);
3334 if (!ResultReg)
3335 return false;
3336
3337 if (!emitStore(VT, ResultReg, Dest))
3338 return false;
3339
3340 int64_t Size = VT.getSizeInBits() / 8;
3341 Len -= Size;
3342 UnscaledOffset += Size;
3343
3344 // We need to recompute the unscaled offset for each iteration.
3345 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3346 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3347 }
3348
3349 return true;
3350}
3351
3352/// Check if it is possible to fold the condition from the XALU intrinsic
3353/// into the user. The condition code will only be updated on success.
3354bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3355 const Instruction *I,
3356 const Value *Cond) {
3357 if (!isa<ExtractValueInst>(Cond))
3358 return false;
3359
3360 const auto *EV = cast<ExtractValueInst>(Cond);
3361 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3362 return false;
3363
3364 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3365 MVT RetVT;
3366 const Function *Callee = II->getCalledFunction();
3367 Type *RetTy =
3368 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3369 if (!isTypeLegal(RetTy, RetVT))
3370 return false;
3371
3372 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3373 return false;
3374
3375 const Value *LHS = II->getArgOperand(0);
3376 const Value *RHS = II->getArgOperand(1);
3377
3378 // Canonicalize immediate to the RHS.
3379 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3380 std::swap(LHS, RHS);
3381
3382 // Simplify multiplies.
3383 Intrinsic::ID IID = II->getIntrinsicID();
3384 switch (IID) {
3385 default:
3386 break;
3387 case Intrinsic::smul_with_overflow:
3388 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3389 if (C->getValue() == 2)
3390 IID = Intrinsic::sadd_with_overflow;
3391 break;
3392 case Intrinsic::umul_with_overflow:
3393 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3394 if (C->getValue() == 2)
3395 IID = Intrinsic::uadd_with_overflow;
3396 break;
3397 }
3398
3399 AArch64CC::CondCode TmpCC;
3400 switch (IID) {
3401 default:
3402 return false;
3403 case Intrinsic::sadd_with_overflow:
3404 case Intrinsic::ssub_with_overflow:
3405 TmpCC = AArch64CC::VS;
3406 break;
3407 case Intrinsic::uadd_with_overflow:
3408 TmpCC = AArch64CC::HS;
3409 break;
3410 case Intrinsic::usub_with_overflow:
3411 TmpCC = AArch64CC::LO;
3412 break;
3413 case Intrinsic::smul_with_overflow:
3414 case Intrinsic::umul_with_overflow:
3415 TmpCC = AArch64CC::NE;
3416 break;
3417 }
3418
3419 // Check if both instructions are in the same basic block.
3420 if (!isValueAvailable(II))
3421 return false;
3422
3423 // Make sure nothing is in the way
3426 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3427 // We only expect extractvalue instructions between the intrinsic and the
3428 // instruction to be selected.
3429 if (!isa<ExtractValueInst>(Itr))
3430 return false;
3431
3432 // Check that the extractvalue operand comes from the intrinsic.
3433 const auto *EVI = cast<ExtractValueInst>(Itr);
3434 if (EVI->getAggregateOperand() != II)
3435 return false;
3436 }
3437
3438 CC = TmpCC;
3439 return true;
3440}
3441
3442bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3443 // FIXME: Handle more intrinsics.
3444 switch (II->getIntrinsicID()) {
3445 default: return false;
3446 case Intrinsic::frameaddress: {
3447 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3448 MFI.setFrameAddressIsTaken(true);
3449
3450 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3451 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3452 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3453 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3454 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3455 // Recursively load frame address
3456 // ldr x0, [fp]
3457 // ldr x0, [x0]
3458 // ldr x0, [x0]
3459 // ...
3460 unsigned DestReg;
3461 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3462 while (Depth--) {
3463 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3464 SrcReg, 0);
3465 assert(DestReg && "Unexpected LDR instruction emission failure.");
3466 SrcReg = DestReg;
3467 }
3468
3469 updateValueMap(II, SrcReg);
3470 return true;
3471 }
3472 case Intrinsic::sponentry: {
3473 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3474
3475 // SP = FP + Fixed Object + 16
3476 int FI = MFI.CreateFixedObject(4, 0, false);
3477 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3478 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3479 TII.get(AArch64::ADDXri), ResultReg)
3480 .addFrameIndex(FI)
3481 .addImm(0)
3482 .addImm(0);
3483
3484 updateValueMap(II, ResultReg);
3485 return true;
3486 }
3487 case Intrinsic::memcpy:
3488 case Intrinsic::memmove: {
3489 const auto *MTI = cast<MemTransferInst>(II);
3490 // Don't handle volatile.
3491 if (MTI->isVolatile())
3492 return false;
3493
3494 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3495 // we would emit dead code because we don't currently handle memmoves.
3496 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3497 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3498 // Small memcpy's are common enough that we want to do them without a call
3499 // if possible.
3500 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3501 MaybeAlign Alignment;
3502 if (MTI->getDestAlign() || MTI->getSourceAlign())
3503 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3504 MTI->getSourceAlign().valueOrOne());
3505 if (isMemCpySmall(Len, Alignment)) {
3506 Address Dest, Src;
3507 if (!computeAddress(MTI->getRawDest(), Dest) ||
3508 !computeAddress(MTI->getRawSource(), Src))
3509 return false;
3510 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3511 return true;
3512 }
3513 }
3514
3515 if (!MTI->getLength()->getType()->isIntegerTy(64))
3516 return false;
3517
3518 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3519 // Fast instruction selection doesn't support the special
3520 // address spaces.
3521 return false;
3522
3523 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3524 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3525 }
3526 case Intrinsic::memset: {
3527 const MemSetInst *MSI = cast<MemSetInst>(II);
3528 // Don't handle volatile.
3529 if (MSI->isVolatile())
3530 return false;
3531
3532 if (!MSI->getLength()->getType()->isIntegerTy(64))
3533 return false;
3534
3535 if (MSI->getDestAddressSpace() > 255)
3536 // Fast instruction selection doesn't support the special
3537 // address spaces.
3538 return false;
3539
3540 return lowerCallTo(II, "memset", II->arg_size() - 1);
3541 }
3542 case Intrinsic::sin:
3543 case Intrinsic::cos:
3544 case Intrinsic::tan:
3545 case Intrinsic::pow: {
3546 MVT RetVT;
3547 if (!isTypeLegal(II->getType(), RetVT))
3548 return false;
3549
3550 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3551 return false;
3552
3553 static const RTLIB::Libcall LibCallTable[4][2] = {
3554 {RTLIB::SIN_F32, RTLIB::SIN_F64},
3555 {RTLIB::COS_F32, RTLIB::COS_F64},
3556 {RTLIB::TAN_F32, RTLIB::TAN_F64},
3557 {RTLIB::POW_F32, RTLIB::POW_F64}};
3558 RTLIB::Libcall LC;
3559 bool Is64Bit = RetVT == MVT::f64;
3560 switch (II->getIntrinsicID()) {
3561 default:
3562 llvm_unreachable("Unexpected intrinsic.");
3563 case Intrinsic::sin:
3564 LC = LibCallTable[0][Is64Bit];
3565 break;
3566 case Intrinsic::cos:
3567 LC = LibCallTable[1][Is64Bit];
3568 break;
3569 case Intrinsic::tan:
3570 LC = LibCallTable[2][Is64Bit];
3571 break;
3572 case Intrinsic::pow:
3573 LC = LibCallTable[3][Is64Bit];
3574 break;
3575 }
3576
3577 ArgListTy Args;
3578 Args.reserve(II->arg_size());
3579
3580 // Populate the argument list.
3581 for (auto &Arg : II->args()) {
3582 ArgListEntry Entry;
3583 Entry.Val = Arg;
3584 Entry.Ty = Arg->getType();
3585 Args.push_back(Entry);
3586 }
3587
3588 CallLoweringInfo CLI;
3589 MCContext &Ctx = MF->getContext();
3590 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3591 TLI.getLibcallName(LC), std::move(Args));
3592 if (!lowerCallTo(CLI))
3593 return false;
3594 updateValueMap(II, CLI.ResultReg);
3595 return true;
3596 }
3597 case Intrinsic::fabs: {
3598 MVT VT;
3599 if (!isTypeLegal(II->getType(), VT))
3600 return false;
3601
3602 unsigned Opc;
3603 switch (VT.SimpleTy) {
3604 default:
3605 return false;
3606 case MVT::f32:
3607 Opc = AArch64::FABSSr;
3608 break;
3609 case MVT::f64:
3610 Opc = AArch64::FABSDr;
3611 break;
3612 }
3613 Register SrcReg = getRegForValue(II->getOperand(0));
3614 if (!SrcReg)
3615 return false;
3616 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3617 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3618 .addReg(SrcReg);
3619 updateValueMap(II, ResultReg);
3620 return true;
3621 }
3622 case Intrinsic::trap:
3623 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3624 .addImm(1);
3625 return true;
3626 case Intrinsic::debugtrap:
3627 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3628 .addImm(0xF000);
3629 return true;
3630
3631 case Intrinsic::sqrt: {
3632 Type *RetTy = II->getCalledFunction()->getReturnType();
3633
3634 MVT VT;
3635 if (!isTypeLegal(RetTy, VT))
3636 return false;
3637
3638 Register Op0Reg = getRegForValue(II->getOperand(0));
3639 if (!Op0Reg)
3640 return false;
3641
3642 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3643 if (!ResultReg)
3644 return false;
3645
3646 updateValueMap(II, ResultReg);
3647 return true;
3648 }
3649 case Intrinsic::sadd_with_overflow:
3650 case Intrinsic::uadd_with_overflow:
3651 case Intrinsic::ssub_with_overflow:
3652 case Intrinsic::usub_with_overflow:
3653 case Intrinsic::smul_with_overflow:
3654 case Intrinsic::umul_with_overflow: {
3655 // This implements the basic lowering of the xalu with overflow intrinsics.
3656 const Function *Callee = II->getCalledFunction();
3657 auto *Ty = cast<StructType>(Callee->getReturnType());
3658 Type *RetTy = Ty->getTypeAtIndex(0U);
3659
3660 MVT VT;
3661 if (!isTypeLegal(RetTy, VT))
3662 return false;
3663
3664 if (VT != MVT::i32 && VT != MVT::i64)
3665 return false;
3666
3667 const Value *LHS = II->getArgOperand(0);
3668 const Value *RHS = II->getArgOperand(1);
3669 // Canonicalize immediate to the RHS.
3670 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3671 std::swap(LHS, RHS);
3672
3673 // Simplify multiplies.
3674 Intrinsic::ID IID = II->getIntrinsicID();
3675 switch (IID) {
3676 default:
3677 break;
3678 case Intrinsic::smul_with_overflow:
3679 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3680 if (C->getValue() == 2) {
3681 IID = Intrinsic::sadd_with_overflow;
3682 RHS = LHS;
3683 }
3684 break;
3685 case Intrinsic::umul_with_overflow:
3686 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3687 if (C->getValue() == 2) {
3688 IID = Intrinsic::uadd_with_overflow;
3689 RHS = LHS;
3690 }
3691 break;
3692 }
3693
3694 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3696 switch (IID) {
3697 default: llvm_unreachable("Unexpected intrinsic!");
3698 case Intrinsic::sadd_with_overflow:
3699 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3700 CC = AArch64CC::VS;
3701 break;
3702 case Intrinsic::uadd_with_overflow:
3703 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3704 CC = AArch64CC::HS;
3705 break;
3706 case Intrinsic::ssub_with_overflow:
3707 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3708 CC = AArch64CC::VS;
3709 break;
3710 case Intrinsic::usub_with_overflow:
3711 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3712 CC = AArch64CC::LO;
3713 break;
3714 case Intrinsic::smul_with_overflow: {
3715 CC = AArch64CC::NE;
3716 Register LHSReg = getRegForValue(LHS);
3717 if (!LHSReg)
3718 return false;
3719
3720 Register RHSReg = getRegForValue(RHS);
3721 if (!RHSReg)
3722 return false;
3723
3724 if (VT == MVT::i32) {
3725 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3726 Register MulSubReg =
3727 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3728 // cmp xreg, wreg, sxtw
3729 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3730 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3731 /*WantResult=*/false);
3732 MulReg = MulSubReg;
3733 } else {
3734 assert(VT == MVT::i64 && "Unexpected value type.");
3735 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3736 // reused in the next instruction.
3737 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3738 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3739 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3740 /*WantResult=*/false);
3741 }
3742 break;
3743 }
3744 case Intrinsic::umul_with_overflow: {
3745 CC = AArch64CC::NE;
3746 Register LHSReg = getRegForValue(LHS);
3747 if (!LHSReg)
3748 return false;
3749
3750 Register RHSReg = getRegForValue(RHS);
3751 if (!RHSReg)
3752 return false;
3753
3754 if (VT == MVT::i32) {
3755 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3756 // tst xreg, #0xffffffff00000000
3757 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3758 TII.get(AArch64::ANDSXri), AArch64::XZR)
3759 .addReg(MulReg)
3760 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3761 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3762 } else {
3763 assert(VT == MVT::i64 && "Unexpected value type.");
3764 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3765 // reused in the next instruction.
3766 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3767 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3768 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3769 }
3770 break;
3771 }
3772 }
3773
3774 if (MulReg) {
3775 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3776 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3777 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3778 }
3779
3780 if (!ResultReg1)
3781 return false;
3782
3783 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3784 AArch64::WZR, AArch64::WZR,
3785 getInvertedCondCode(CC));
3786 (void)ResultReg2;
3787 assert((ResultReg1 + 1) == ResultReg2 &&
3788 "Nonconsecutive result registers.");
3789 updateValueMap(II, ResultReg1, 2);
3790 return true;
3791 }
3792 case Intrinsic::aarch64_crc32b:
3793 case Intrinsic::aarch64_crc32h:
3794 case Intrinsic::aarch64_crc32w:
3795 case Intrinsic::aarch64_crc32x:
3796 case Intrinsic::aarch64_crc32cb:
3797 case Intrinsic::aarch64_crc32ch:
3798 case Intrinsic::aarch64_crc32cw:
3799 case Intrinsic::aarch64_crc32cx: {
3800 if (!Subtarget->hasCRC())
3801 return false;
3802
3803 unsigned Opc;
3804 switch (II->getIntrinsicID()) {
3805 default:
3806 llvm_unreachable("Unexpected intrinsic!");
3807 case Intrinsic::aarch64_crc32b:
3808 Opc = AArch64::CRC32Brr;
3809 break;
3810 case Intrinsic::aarch64_crc32h:
3811 Opc = AArch64::CRC32Hrr;
3812 break;
3813 case Intrinsic::aarch64_crc32w:
3814 Opc = AArch64::CRC32Wrr;
3815 break;
3816 case Intrinsic::aarch64_crc32x:
3817 Opc = AArch64::CRC32Xrr;
3818 break;
3819 case Intrinsic::aarch64_crc32cb:
3820 Opc = AArch64::CRC32CBrr;
3821 break;
3822 case Intrinsic::aarch64_crc32ch:
3823 Opc = AArch64::CRC32CHrr;
3824 break;
3825 case Intrinsic::aarch64_crc32cw:
3826 Opc = AArch64::CRC32CWrr;
3827 break;
3828 case Intrinsic::aarch64_crc32cx:
3829 Opc = AArch64::CRC32CXrr;
3830 break;
3831 }
3832
3833 Register LHSReg = getRegForValue(II->getArgOperand(0));
3834 Register RHSReg = getRegForValue(II->getArgOperand(1));
3835 if (!LHSReg || !RHSReg)
3836 return false;
3837
3838 Register ResultReg =
3839 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3840 updateValueMap(II, ResultReg);
3841 return true;
3842 }
3843 }
3844 return false;
3845}
3846
3847bool AArch64FastISel::selectRet(const Instruction *I) {
3848 const ReturnInst *Ret = cast<ReturnInst>(I);
3849 const Function &F = *I->getParent()->getParent();
3850
3851 if (!FuncInfo.CanLowerReturn)
3852 return false;
3853
3854 if (F.isVarArg())
3855 return false;
3856
3857 if (TLI.supportSwiftError() &&
3858 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3859 return false;
3860
3861 if (TLI.supportSplitCSR(FuncInfo.MF))
3862 return false;
3863
3864 // Build a list of return value registers.
3866
3867 if (Ret->getNumOperands() > 0) {
3868 CallingConv::ID CC = F.getCallingConv();
3870 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3871
3872 // Analyze operands of the call, assigning locations to each operand.
3874 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3875 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3876
3877 // Only handle a single return value for now.
3878 if (ValLocs.size() != 1)
3879 return false;
3880
3881 CCValAssign &VA = ValLocs[0];
3882 const Value *RV = Ret->getOperand(0);
3883
3884 // Don't bother handling odd stuff for now.
3885 if ((VA.getLocInfo() != CCValAssign::Full) &&
3886 (VA.getLocInfo() != CCValAssign::BCvt))
3887 return false;
3888
3889 // Only handle register returns for now.
3890 if (!VA.isRegLoc())
3891 return false;
3892
3893 Register Reg = getRegForValue(RV);
3894 if (Reg == 0)
3895 return false;
3896
3897 unsigned SrcReg = Reg + VA.getValNo();
3898 Register DestReg = VA.getLocReg();
3899 // Avoid a cross-class copy. This is very unlikely.
3900 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3901 return false;
3902
3903 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3904 if (!RVEVT.isSimple())
3905 return false;
3906
3907 // Vectors (of > 1 lane) in big endian need tricky handling.
3908 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3909 !Subtarget->isLittleEndian())
3910 return false;
3911
3912 MVT RVVT = RVEVT.getSimpleVT();
3913 if (RVVT == MVT::f128)
3914 return false;
3915
3916 MVT DestVT = VA.getValVT();
3917 // Special handling for extended integers.
3918 if (RVVT != DestVT) {
3919 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3920 return false;
3921
3922 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3923 return false;
3924
3925 bool IsZExt = Outs[0].Flags.isZExt();
3926 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3927 if (SrcReg == 0)
3928 return false;
3929 }
3930
3931 // "Callee" (i.e. value producer) zero extends pointers at function
3932 // boundary.
3933 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3934 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3935
3936 // Make the copy.
3937 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3938 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3939
3940 // Add register to return instruction.
3941 RetRegs.push_back(VA.getLocReg());
3942 }
3943
3944 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3945 TII.get(AArch64::RET_ReallyLR));
3946 for (unsigned RetReg : RetRegs)
3947 MIB.addReg(RetReg, RegState::Implicit);
3948 return true;
3949}
3950
3951bool AArch64FastISel::selectTrunc(const Instruction *I) {
3952 Type *DestTy = I->getType();
3953 Value *Op = I->getOperand(0);
3954 Type *SrcTy = Op->getType();
3955
3956 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3957 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3958 if (!SrcEVT.isSimple())
3959 return false;
3960 if (!DestEVT.isSimple())
3961 return false;
3962
3963 MVT SrcVT = SrcEVT.getSimpleVT();
3964 MVT DestVT = DestEVT.getSimpleVT();
3965
3966 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3967 SrcVT != MVT::i8)
3968 return false;
3969 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3970 DestVT != MVT::i1)
3971 return false;
3972
3973 Register SrcReg = getRegForValue(Op);
3974 if (!SrcReg)
3975 return false;
3976
3977 // If we're truncating from i64 to a smaller non-legal type then generate an
3978 // AND. Otherwise, we know the high bits are undefined and a truncate only
3979 // generate a COPY. We cannot mark the source register also as result
3980 // register, because this can incorrectly transfer the kill flag onto the
3981 // source register.
3982 unsigned ResultReg;
3983 if (SrcVT == MVT::i64) {
3984 uint64_t Mask = 0;
3985 switch (DestVT.SimpleTy) {
3986 default:
3987 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3988 return false;
3989 case MVT::i1:
3990 Mask = 0x1;
3991 break;
3992 case MVT::i8:
3993 Mask = 0xff;
3994 break;
3995 case MVT::i16:
3996 Mask = 0xffff;
3997 break;
3998 }
3999 // Issue an extract_subreg to get the lower 32-bits.
4000 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
4001 AArch64::sub_32);
4002 // Create the AND instruction which performs the actual truncation.
4003 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
4004 assert(ResultReg && "Unexpected AND instruction emission failure.");
4005 } else {
4006 ResultReg = createResultReg(&AArch64::GPR32RegClass);
4007 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4008 TII.get(TargetOpcode::COPY), ResultReg)
4009 .addReg(SrcReg);
4010 }
4011
4012 updateValueMap(I, ResultReg);
4013 return true;
4014}
4015
4016unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4017 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4018 DestVT == MVT::i64) &&
4019 "Unexpected value type.");
4020 // Handle i8 and i16 as i32.
4021 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4022 DestVT = MVT::i32;
4023
4024 if (IsZExt) {
4025 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4026 assert(ResultReg && "Unexpected AND instruction emission failure.");
4027 if (DestVT == MVT::i64) {
4028 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4029 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4030 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4031 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4032 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4033 .addImm(0)
4034 .addReg(ResultReg)
4035 .addImm(AArch64::sub_32);
4036 ResultReg = Reg64;
4037 }
4038 return ResultReg;
4039 } else {
4040 if (DestVT == MVT::i64) {
4041 // FIXME: We're SExt i1 to i64.
4042 return 0;
4043 }
4044 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4045 0, 0);
4046 }
4047}
4048
4049unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4050 unsigned Opc, ZReg;
4051 switch (RetVT.SimpleTy) {
4052 default: return 0;
4053 case MVT::i8:
4054 case MVT::i16:
4055 case MVT::i32:
4056 RetVT = MVT::i32;
4057 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4058 case MVT::i64:
4059 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4060 }
4061
4062 const TargetRegisterClass *RC =
4063 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4064 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4065}
4066
4067unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4068 if (RetVT != MVT::i64)
4069 return 0;
4070
4071 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4072 Op0, Op1, AArch64::XZR);
4073}
4074
4075unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4076 if (RetVT != MVT::i64)
4077 return 0;
4078
4079 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4080 Op0, Op1, AArch64::XZR);
4081}
4082
4083unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4084 unsigned Op1Reg) {
4085 unsigned Opc = 0;
4086 bool NeedTrunc = false;
4087 uint64_t Mask = 0;
4088 switch (RetVT.SimpleTy) {
4089 default: return 0;
4090 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4091 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4092 case MVT::i32: Opc = AArch64::LSLVWr; break;
4093 case MVT::i64: Opc = AArch64::LSLVXr; break;
4094 }
4095
4096 const TargetRegisterClass *RC =
4097 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4098 if (NeedTrunc)
4099 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4100
4101 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4102 if (NeedTrunc)
4103 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4104 return ResultReg;
4105}
4106
4107unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4108 uint64_t Shift, bool IsZExt) {
4109 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4110 "Unexpected source/return type pair.");
4111 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4112 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4113 "Unexpected source value type.");
4114 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4115 RetVT == MVT::i64) && "Unexpected return value type.");
4116
4117 bool Is64Bit = (RetVT == MVT::i64);
4118 unsigned RegSize = Is64Bit ? 64 : 32;
4119 unsigned DstBits = RetVT.getSizeInBits();
4120 unsigned SrcBits = SrcVT.getSizeInBits();
4121 const TargetRegisterClass *RC =
4122 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4123
4124 // Just emit a copy for "zero" shifts.
4125 if (Shift == 0) {
4126 if (RetVT == SrcVT) {
4127 Register ResultReg = createResultReg(RC);
4128 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4129 TII.get(TargetOpcode::COPY), ResultReg)
4130 .addReg(Op0);
4131 return ResultReg;
4132 } else
4133 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4134 }
4135
4136 // Don't deal with undefined shifts.
4137 if (Shift >= DstBits)
4138 return 0;
4139
4140 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4141 // {S|U}BFM Wd, Wn, #r, #s
4142 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4143
4144 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4145 // %2 = shl i16 %1, 4
4146 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4147 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4148 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4149 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4150
4151 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4152 // %2 = shl i16 %1, 8
4153 // Wd<32+7-24,32-24> = Wn<7:0>
4154 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4155 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4156 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4157
4158 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4159 // %2 = shl i16 %1, 12
4160 // Wd<32+3-20,32-20> = Wn<3:0>
4161 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4162 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4163 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4164
4165 unsigned ImmR = RegSize - Shift;
4166 // Limit the width to the length of the source type.
4167 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4168 static const unsigned OpcTable[2][2] = {
4169 {AArch64::SBFMWri, AArch64::SBFMXri},
4170 {AArch64::UBFMWri, AArch64::UBFMXri}
4171 };
4172 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4173 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4174 Register TmpReg = MRI.createVirtualRegister(RC);
4175 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4176 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4177 .addImm(0)
4178 .addReg(Op0)
4179 .addImm(AArch64::sub_32);
4180 Op0 = TmpReg;
4181 }
4182 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4183}
4184
4185unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4186 unsigned Op1Reg) {
4187 unsigned Opc = 0;
4188 bool NeedTrunc = false;
4189 uint64_t Mask = 0;
4190 switch (RetVT.SimpleTy) {
4191 default: return 0;
4192 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4193 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4194 case MVT::i32: Opc = AArch64::LSRVWr; break;
4195 case MVT::i64: Opc = AArch64::LSRVXr; break;
4196 }
4197
4198 const TargetRegisterClass *RC =
4199 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4200 if (NeedTrunc) {
4201 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4202 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4203 }
4204 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4205 if (NeedTrunc)
4206 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4207 return ResultReg;
4208}
4209
4210unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4211 uint64_t Shift, bool IsZExt) {
4212 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4213 "Unexpected source/return type pair.");
4214 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4215 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4216 "Unexpected source value type.");
4217 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4218 RetVT == MVT::i64) && "Unexpected return value type.");
4219
4220 bool Is64Bit = (RetVT == MVT::i64);
4221 unsigned RegSize = Is64Bit ? 64 : 32;
4222 unsigned DstBits = RetVT.getSizeInBits();
4223 unsigned SrcBits = SrcVT.getSizeInBits();
4224 const TargetRegisterClass *RC =
4225 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4226
4227 // Just emit a copy for "zero" shifts.
4228 if (Shift == 0) {
4229 if (RetVT == SrcVT) {
4230 Register ResultReg = createResultReg(RC);
4231 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4232 TII.get(TargetOpcode::COPY), ResultReg)
4233 .addReg(Op0);
4234 return ResultReg;
4235 } else
4236 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4237 }
4238
4239 // Don't deal with undefined shifts.
4240 if (Shift >= DstBits)
4241 return 0;
4242
4243 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4244 // {S|U}BFM Wd, Wn, #r, #s
4245 // Wd<s-r:0> = Wn<s:r> when r <= s
4246
4247 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4248 // %2 = lshr i16 %1, 4
4249 // Wd<7-4:0> = Wn<7:4>
4250 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4251 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4252 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4253
4254 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4255 // %2 = lshr i16 %1, 8
4256 // Wd<7-7,0> = Wn<7:7>
4257 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4258 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4259 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4260
4261 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4262 // %2 = lshr i16 %1, 12
4263 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4264 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4265 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4266 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4267
4268 if (Shift >= SrcBits && IsZExt)
4269 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4270
4271 // It is not possible to fold a sign-extend into the LShr instruction. In this
4272 // case emit a sign-extend.
4273 if (!IsZExt) {
4274 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4275 if (!Op0)
4276 return 0;
4277 SrcVT = RetVT;
4278 SrcBits = SrcVT.getSizeInBits();
4279 IsZExt = true;
4280 }
4281
4282 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4283 unsigned ImmS = SrcBits - 1;
4284 static const unsigned OpcTable[2][2] = {
4285 {AArch64::SBFMWri, AArch64::SBFMXri},
4286 {AArch64::UBFMWri, AArch64::UBFMXri}
4287 };
4288 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4289 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4290 Register TmpReg = MRI.createVirtualRegister(RC);
4291 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4292 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4293 .addImm(0)
4294 .addReg(Op0)
4295 .addImm(AArch64::sub_32);
4296 Op0 = TmpReg;
4297 }
4298 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4299}
4300
4301unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4302 unsigned Op1Reg) {
4303 unsigned Opc = 0;
4304 bool NeedTrunc = false;
4305 uint64_t Mask = 0;
4306 switch (RetVT.SimpleTy) {
4307 default: return 0;
4308 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4309 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4310 case MVT::i32: Opc = AArch64::ASRVWr; break;
4311 case MVT::i64: Opc = AArch64::ASRVXr; break;
4312 }
4313
4314 const TargetRegisterClass *RC =
4315 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4316 if (NeedTrunc) {
4317 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4318 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4319 }
4320 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4321 if (NeedTrunc)
4322 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4323 return ResultReg;
4324}
4325
4326unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4327 uint64_t Shift, bool IsZExt) {
4328 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4329 "Unexpected source/return type pair.");
4330 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4331 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4332 "Unexpected source value type.");
4333 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4334 RetVT == MVT::i64) && "Unexpected return value type.");
4335
4336 bool Is64Bit = (RetVT == MVT::i64);
4337 unsigned RegSize = Is64Bit ? 64 : 32;
4338 unsigned DstBits = RetVT.getSizeInBits();
4339 unsigned SrcBits = SrcVT.getSizeInBits();
4340 const TargetRegisterClass *RC =
4341 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4342
4343 // Just emit a copy for "zero" shifts.
4344 if (Shift == 0) {
4345 if (RetVT == SrcVT) {
4346 Register ResultReg = createResultReg(RC);
4347 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4348 TII.get(TargetOpcode::COPY), ResultReg)
4349 .addReg(Op0);
4350 return ResultReg;
4351 } else
4352 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4353 }
4354
4355 // Don't deal with undefined shifts.
4356 if (Shift >= DstBits)
4357 return 0;
4358
4359 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4360 // {S|U}BFM Wd, Wn, #r, #s
4361 // Wd<s-r:0> = Wn<s:r> when r <= s
4362
4363 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4364 // %2 = ashr i16 %1, 4
4365 // Wd<7-4:0> = Wn<7:4>
4366 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4367 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4368 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4369
4370 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4371 // %2 = ashr i16 %1, 8
4372 // Wd<7-7,0> = Wn<7:7>
4373 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4374 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4375 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4376
4377 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4378 // %2 = ashr i16 %1, 12
4379 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4380 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4381 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4382 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4383
4384 if (Shift >= SrcBits && IsZExt)
4385 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4386
4387 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4388 unsigned ImmS = SrcBits - 1;
4389 static const unsigned OpcTable[2][2] = {
4390 {AArch64::SBFMWri, AArch64::SBFMXri},
4391 {AArch64::UBFMWri, AArch64::UBFMXri}
4392 };
4393 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4394 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4395 Register TmpReg = MRI.createVirtualRegister(RC);
4396 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4397 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4398 .addImm(0)
4399 .addReg(Op0)
4400 .addImm(AArch64::sub_32);
4401 Op0 = TmpReg;
4402 }
4403 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4404}
4405
4406unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4407 bool IsZExt) {
4408 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4409
4410 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4411 // DestVT are odd things, so test to make sure that they are both types we can
4412 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4413 // bail out to SelectionDAG.
4414 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4415 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4416 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4417 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4418 return 0;
4419
4420 unsigned Opc;
4421 unsigned Imm = 0;
4422
4423 switch (SrcVT.SimpleTy) {
4424 default:
4425 return 0;
4426 case MVT::i1:
4427 return emiti1Ext(SrcReg, DestVT, IsZExt);
4428 case MVT::i8:
4429 if (DestVT == MVT::i64)
4430 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4431 else
4432 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4433 Imm = 7;
4434 break;
4435 case MVT::i16:
4436 if (DestVT == MVT::i64)
4437 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4438 else
4439 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4440 Imm = 15;
4441 break;
4442 case MVT::i32:
4443 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4444 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4445 Imm = 31;
4446 break;
4447 }
4448
4449 // Handle i8 and i16 as i32.
4450 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4451 DestVT = MVT::i32;
4452 else if (DestVT == MVT::i64) {
4453 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4454 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4455 TII.get(AArch64::SUBREG_TO_REG), Src64)
4456 .addImm(0)
4457 .addReg(SrcReg)
4458 .addImm(AArch64::sub_32);
4459 SrcReg = Src64;
4460 }
4461
4462 const TargetRegisterClass *RC =
4463 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4464 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4465}
4466
4467static bool isZExtLoad(const MachineInstr *LI) {
4468 switch (LI->getOpcode()) {
4469 default:
4470 return false;
4471 case AArch64::LDURBBi:
4472 case AArch64::LDURHHi:
4473 case AArch64::LDURWi:
4474 case AArch64::LDRBBui:
4475 case AArch64::LDRHHui:
4476 case AArch64::LDRWui:
4477 case AArch64::LDRBBroX:
4478 case AArch64::LDRHHroX:
4479 case AArch64::LDRWroX:
4480 case AArch64::LDRBBroW:
4481 case AArch64::LDRHHroW:
4482 case AArch64::LDRWroW:
4483 return true;
4484 }
4485}
4486
4487static bool isSExtLoad(const MachineInstr *LI) {
4488 switch (LI->getOpcode()) {
4489 default:
4490 return false;
4491 case AArch64::LDURSBWi:
4492 case AArch64::LDURSHWi:
4493 case AArch64::LDURSBXi:
4494 case AArch64::LDURSHXi:
4495 case AArch64::LDURSWi:
4496 case AArch64::LDRSBWui:
4497 case AArch64::LDRSHWui:
4498 case AArch64::LDRSBXui:
4499 case AArch64::LDRSHXui:
4500 case AArch64::LDRSWui:
4501 case AArch64::LDRSBWroX:
4502 case AArch64::LDRSHWroX:
4503 case AArch64::LDRSBXroX:
4504 case AArch64::LDRSHXroX:
4505 case AArch64::LDRSWroX:
4506 case AArch64::LDRSBWroW:
4507 case AArch64::LDRSHWroW:
4508 case AArch64::LDRSBXroW:
4509 case AArch64::LDRSHXroW:
4510 case AArch64::LDRSWroW:
4511 return true;
4512 }
4513}
4514
4515bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4516 MVT SrcVT) {
4517 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4518 if (!LI || !LI->hasOneUse())
4519 return false;
4520
4521 // Check if the load instruction has already been selected.
4522 Register Reg = lookUpRegForValue(LI);
4523 if (!Reg)
4524 return false;
4525
4526 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4527 if (!MI)
4528 return false;
4529
4530 // Check if the correct load instruction has been emitted - SelectionDAG might
4531 // have emitted a zero-extending load, but we need a sign-extending load.
4532 bool IsZExt = isa<ZExtInst>(I);
4533 const auto *LoadMI = MI;
4534 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4535 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4536 Register LoadReg = MI->getOperand(1).getReg();
4537 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4538 assert(LoadMI && "Expected valid instruction");
4539 }
4540 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4541 return false;
4542
4543 // Nothing to be done.
4544 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4545 updateValueMap(I, Reg);
4546 return true;
4547 }
4548
4549 if (IsZExt) {
4550 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4551 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4552 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4553 .addImm(0)
4554 .addReg(Reg, getKillRegState(true))
4555 .addImm(AArch64::sub_32);
4556 Reg = Reg64;
4557 } else {
4558 assert((MI->getOpcode() == TargetOpcode::COPY &&
4559 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4560 "Expected copy instruction");
4561 Reg = MI->getOperand(1).getReg();
4563 removeDeadCode(I, std::next(I));
4564 }
4565 updateValueMap(I, Reg);
4566 return true;
4567}
4568
4569bool AArch64FastISel::selectIntExt(const Instruction *I) {
4570 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4571 "Unexpected integer extend instruction.");
4572 MVT RetVT;
4573 MVT SrcVT;
4574 if (!isTypeSupported(I->getType(), RetVT))
4575 return false;
4576
4577 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4578 return false;
4579
4580 // Try to optimize already sign-/zero-extended values from load instructions.
4581 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4582 return true;
4583
4584 Register SrcReg = getRegForValue(I->getOperand(0));
4585 if (!SrcReg)
4586 return false;
4587
4588 // Try to optimize already sign-/zero-extended values from function arguments.
4589 bool IsZExt = isa<ZExtInst>(I);
4590 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4591 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4592 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4593 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4594 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4595 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4596 .addImm(0)
4597 .addReg(SrcReg)
4598 .addImm(AArch64::sub_32);
4599 SrcReg = ResultReg;
4600 }
4601
4602 updateValueMap(I, SrcReg);
4603 return true;
4604 }
4605 }
4606
4607 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4608 if (!ResultReg)
4609 return false;
4610
4611 updateValueMap(I, ResultReg);
4612 return true;
4613}
4614
4615bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4616 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4617 if (!DestEVT.isSimple())
4618 return false;
4619
4620 MVT DestVT = DestEVT.getSimpleVT();
4621 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4622 return false;
4623
4624 unsigned DivOpc;
4625 bool Is64bit = (DestVT == MVT::i64);
4626 switch (ISDOpcode) {
4627 default:
4628 return false;
4629 case ISD::SREM:
4630 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4631 break;
4632 case ISD::UREM:
4633 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4634 break;
4635 }
4636 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4637 Register Src0Reg = getRegForValue(I->getOperand(0));
4638 if (!Src0Reg)
4639 return false;
4640
4641 Register Src1Reg = getRegForValue(I->getOperand(1));
4642 if (!Src1Reg)
4643 return false;
4644
4645 const TargetRegisterClass *RC =
4646 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4647 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4648 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4649 // The remainder is computed as numerator - (quotient * denominator) using the
4650 // MSUB instruction.
4651 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4652 updateValueMap(I, ResultReg);
4653 return true;
4654}
4655
4656bool AArch64FastISel::selectMul(const Instruction *I) {
4657 MVT VT;
4658 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4659 return false;
4660
4661 if (VT.isVector())
4662 return selectBinaryOp(I, ISD::MUL);
4663
4664 const Value *Src0 = I->getOperand(0);
4665 const Value *Src1 = I->getOperand(1);
4666 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4667 if (C->getValue().isPowerOf2())
4668 std::swap(Src0, Src1);
4669
4670 // Try to simplify to a shift instruction.
4671 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4672 if (C->getValue().isPowerOf2()) {
4673 uint64_t ShiftVal = C->getValue().logBase2();
4674 MVT SrcVT = VT;
4675 bool IsZExt = true;
4676 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4677 if (!isIntExtFree(ZExt)) {
4678 MVT VT;
4679 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4680 SrcVT = VT;
4681 IsZExt = true;
4682 Src0 = ZExt->getOperand(0);
4683 }
4684 }
4685 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4686 if (!isIntExtFree(SExt)) {
4687 MVT VT;
4688 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4689 SrcVT = VT;
4690 IsZExt = false;
4691 Src0 = SExt->getOperand(0);
4692 }
4693 }
4694 }
4695
4696 Register Src0Reg = getRegForValue(Src0);
4697 if (!Src0Reg)
4698 return false;
4699
4700 unsigned ResultReg =
4701 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4702
4703 if (ResultReg) {
4704 updateValueMap(I, ResultReg);
4705 return true;
4706 }
4707 }
4708
4709 Register Src0Reg = getRegForValue(I->getOperand(0));
4710 if (!Src0Reg)
4711 return false;
4712
4713 Register Src1Reg = getRegForValue(I->getOperand(1));
4714 if (!Src1Reg)
4715 return false;
4716
4717 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4718
4719 if (!ResultReg)
4720 return false;
4721
4722 updateValueMap(I, ResultReg);
4723 return true;
4724}
4725
4726bool AArch64FastISel::selectShift(const Instruction *I) {
4727 MVT RetVT;
4728 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4729 return false;
4730
4731 if (RetVT.isVector())
4732 return selectOperator(I, I->getOpcode());
4733
4734 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4735 unsigned ResultReg = 0;
4736 uint64_t ShiftVal = C->getZExtValue();
4737 MVT SrcVT = RetVT;
4738 bool IsZExt = I->getOpcode() != Instruction::AShr;
4739 const Value *Op0 = I->getOperand(0);
4740 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4741 if (!isIntExtFree(ZExt)) {
4742 MVT TmpVT;
4743 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4744 SrcVT = TmpVT;
4745 IsZExt = true;
4746 Op0 = ZExt->getOperand(0);
4747 }
4748 }
4749 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4750 if (!isIntExtFree(SExt)) {
4751 MVT TmpVT;
4752 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4753 SrcVT = TmpVT;
4754 IsZExt = false;
4755 Op0 = SExt->getOperand(0);
4756 }
4757 }
4758 }
4759
4760 Register Op0Reg = getRegForValue(Op0);
4761 if (!Op0Reg)
4762 return false;
4763
4764 switch (I->getOpcode()) {
4765 default: llvm_unreachable("Unexpected instruction.");
4766 case Instruction::Shl:
4767 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4768 break;
4769 case Instruction::AShr:
4770 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4771 break;
4772 case Instruction::LShr:
4773 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4774 break;
4775 }
4776 if (!ResultReg)
4777 return false;
4778
4779 updateValueMap(I, ResultReg);
4780 return true;
4781 }
4782
4783 Register Op0Reg = getRegForValue(I->getOperand(0));
4784 if (!Op0Reg)
4785 return false;
4786
4787 Register Op1Reg = getRegForValue(I->getOperand(1));
4788 if (!Op1Reg)
4789 return false;
4790
4791 unsigned ResultReg = 0;
4792 switch (I->getOpcode()) {
4793 default: llvm_unreachable("Unexpected instruction.");
4794 case Instruction::Shl:
4795 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4796 break;
4797 case Instruction::AShr:
4798 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4799 break;
4800 case Instruction::LShr:
4801 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4802 break;
4803 }
4804
4805 if (!ResultReg)
4806 return false;
4807
4808 updateValueMap(I, ResultReg);
4809 return true;
4810}
4811
4812bool AArch64FastISel::selectBitCast(const Instruction *I) {
4813 MVT RetVT, SrcVT;
4814
4815 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4816 return false;
4817 if (!isTypeLegal(I->getType(), RetVT))
4818 return false;
4819
4820 unsigned Opc;
4821 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4822 Opc = AArch64::FMOVWSr;
4823 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4824 Opc = AArch64::FMOVXDr;
4825 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4826 Opc = AArch64::FMOVSWr;
4827 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4828 Opc = AArch64::FMOVDXr;
4829 else
4830 return false;
4831
4832 const TargetRegisterClass *RC = nullptr;
4833 switch (RetVT.SimpleTy) {
4834 default: llvm_unreachable("Unexpected value type.");
4835 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4836 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4837 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4838 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4839 }
4840 Register Op0Reg = getRegForValue(I->getOperand(0));
4841 if (!Op0Reg)
4842 return false;
4843
4844 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4845 if (!ResultReg)
4846 return false;
4847
4848 updateValueMap(I, ResultReg);
4849 return true;
4850}
4851
4852bool AArch64FastISel::selectFRem(const Instruction *I) {
4853 MVT RetVT;
4854 if (!isTypeLegal(I->getType(), RetVT))
4855 return false;
4856
4857 RTLIB::Libcall LC;
4858 switch (RetVT.SimpleTy) {
4859 default:
4860 return false;
4861 case MVT::f32:
4862 LC = RTLIB::REM_F32;
4863 break;
4864 case MVT::f64:
4865 LC = RTLIB::REM_F64;
4866 break;
4867 }
4868
4869 ArgListTy Args;
4870 Args.reserve(I->getNumOperands());
4871
4872 // Populate the argument list.
4873 for (auto &Arg : I->operands()) {
4874 ArgListEntry Entry;
4875 Entry.Val = Arg;
4876 Entry.Ty = Arg->getType();
4877 Args.push_back(Entry);
4878 }
4879
4880 CallLoweringInfo CLI;
4881 MCContext &Ctx = MF->getContext();
4882 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4883 TLI.getLibcallName(LC), std::move(Args));
4884 if (!lowerCallTo(CLI))
4885 return false;
4886 updateValueMap(I, CLI.ResultReg);
4887 return true;
4888}
4889
4890bool AArch64FastISel::selectSDiv(const Instruction *I) {
4891 MVT VT;
4892 if (!isTypeLegal(I->getType(), VT))
4893 return false;
4894
4895 if (!isa<ConstantInt>(I->getOperand(1)))
4896 return selectBinaryOp(I, ISD::SDIV);
4897
4898 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4899 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4900 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4901 return selectBinaryOp(I, ISD::SDIV);
4902
4903 unsigned Lg2 = C.countr_zero();
4904 Register Src0Reg = getRegForValue(I->getOperand(0));
4905 if (!Src0Reg)
4906 return false;
4907
4908 if (cast<BinaryOperator>(I)->isExact()) {
4909 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4910 if (!ResultReg)
4911 return false;
4912 updateValueMap(I, ResultReg);
4913 return true;
4914 }
4915
4916 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4917 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4918 if (!AddReg)
4919 return false;
4920
4921 // (Src0 < 0) ? Pow2 - 1 : 0;
4922 if (!emitICmp_ri(VT, Src0Reg, 0))
4923 return false;
4924
4925 unsigned SelectOpc;
4926 const TargetRegisterClass *RC;
4927 if (VT == MVT::i64) {
4928 SelectOpc = AArch64::CSELXr;
4929 RC = &AArch64::GPR64RegClass;
4930 } else {
4931 SelectOpc = AArch64::CSELWr;
4932 RC = &AArch64::GPR32RegClass;
4933 }
4934 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4936 if (!SelectReg)
4937 return false;
4938
4939 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4940 // negate the result.
4941 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4942 unsigned ResultReg;
4943 if (C.isNegative())
4944 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4945 AArch64_AM::ASR, Lg2);
4946 else
4947 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4948
4949 if (!ResultReg)
4950 return false;
4951
4952 updateValueMap(I, ResultReg);
4953 return true;
4954}
4955
4956/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4957/// have to duplicate it for AArch64, because otherwise we would fail during the
4958/// sign-extend emission.
4959unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4960 Register IdxN = getRegForValue(Idx);
4961 if (IdxN == 0)
4962 // Unhandled operand. Halt "fast" selection and bail.
4963 return 0;
4964
4965 // If the index is smaller or larger than intptr_t, truncate or extend it.
4966 MVT PtrVT = TLI.getPointerTy(DL);
4967 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4968 if (IdxVT.bitsLT(PtrVT)) {
4969 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4970 } else if (IdxVT.bitsGT(PtrVT))
4971 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4972 return IdxN;
4973}
4974
4975/// This is mostly a copy of the existing FastISel GEP code, but we have to
4976/// duplicate it for AArch64, because otherwise we would bail out even for
4977/// simple cases. This is because the standard fastEmit functions don't cover
4978/// MUL at all and ADD is lowered very inefficientily.
4979bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4980 if (Subtarget->isTargetILP32())
4981 return false;
4982
4983 Register N = getRegForValue(I->getOperand(0));
4984 if (!N)
4985 return false;
4986
4987 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4988 // into a single N = N + TotalOffset.
4989 uint64_t TotalOffs = 0;
4990 MVT VT = TLI.getPointerTy(DL);
4992 GTI != E; ++GTI) {
4993 const Value *Idx = GTI.getOperand();
4994 if (auto *StTy = GTI.getStructTypeOrNull()) {
4995 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4996 // N = N + Offset
4997 if (Field)
4998 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4999 } else {
5000 // If this is a constant subscript, handle it quickly.
5001 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
5002 if (CI->isZero())
5003 continue;
5004 // N = N + Offset
5005 TotalOffs += GTI.getSequentialElementStride(DL) *
5006 cast<ConstantInt>(CI)->getSExtValue();
5007 continue;
5008 }
5009 if (TotalOffs) {
5010 N = emitAdd_ri_(VT, N, TotalOffs);
5011 if (!N)
5012 return false;
5013 TotalOffs = 0;
5014 }
5015
5016 // N = N + Idx * ElementSize;
5017 uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5018 unsigned IdxN = getRegForGEPIndex(Idx);
5019 if (!IdxN)
5020 return false;
5021
5022 if (ElementSize != 1) {
5023 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5024 if (!C)
5025 return false;
5026 IdxN = emitMul_rr(VT, IdxN, C);
5027 if (!IdxN)
5028 return false;
5029 }
5030 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5031 if (!N)
5032 return false;
5033 }
5034 }
5035 if (TotalOffs) {
5036 N = emitAdd_ri_(VT, N, TotalOffs);
5037 if (!N)
5038 return false;
5039 }
5040 updateValueMap(I, N);
5041 return true;
5042}
5043
5044bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5045 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5046 "cmpxchg survived AtomicExpand at optlevel > -O0");
5047
5048 auto *RetPairTy = cast<StructType>(I->getType());
5049 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5050 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5051 "cmpxchg has a non-i1 status result");
5052
5053 MVT VT;
5054 if (!isTypeLegal(RetTy, VT))
5055 return false;
5056
5057 const TargetRegisterClass *ResRC;
5058 unsigned Opc, CmpOpc;
5059 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5060 // extractvalue selection doesn't support that.
5061 if (VT == MVT::i32) {
5062 Opc = AArch64::CMP_SWAP_32;
5063 CmpOpc = AArch64::SUBSWrs;
5064 ResRC = &AArch64::GPR32RegClass;
5065 } else if (VT == MVT::i64) {
5066 Opc = AArch64::CMP_SWAP_64;
5067 CmpOpc = AArch64::SUBSXrs;
5068 ResRC = &AArch64::GPR64RegClass;
5069 } else {
5070 return false;
5071 }
5072
5073 const MCInstrDesc &II = TII.get(Opc);
5074
5075 const Register AddrReg = constrainOperandRegClass(
5076 II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5077 const Register DesiredReg = constrainOperandRegClass(
5078 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5079 const Register NewReg = constrainOperandRegClass(
5080 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5081
5082 const Register ResultReg1 = createResultReg(ResRC);
5083 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5084 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5085
5086 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5087 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5088 .addDef(ResultReg1)
5089 .addDef(ScratchReg)
5090 .addUse(AddrReg)
5091 .addUse(DesiredReg)
5092 .addUse(NewReg);
5093
5094 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5095 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5096 .addUse(ResultReg1)
5097 .addUse(DesiredReg)
5098 .addImm(0);
5099
5100 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5101 .addDef(ResultReg2)
5102 .addUse(AArch64::WZR)
5103 .addUse(AArch64::WZR)
5105
5106 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5107 updateValueMap(I, ResultReg1, 2);
5108 return true;
5109}
5110
5111bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5112 if (TLI.fallBackToDAGISel(*I))
5113 return false;
5114 switch (I->getOpcode()) {
5115 default:
5116 break;
5117 case Instruction::Add:
5118 case Instruction::Sub:
5119 return selectAddSub(I);
5120 case Instruction::Mul:
5121 return selectMul(I);
5122 case Instruction::SDiv:
5123 return selectSDiv(I);
5124 case Instruction::SRem:
5125 if (!selectBinaryOp(I, ISD::SREM))
5126 return selectRem(I, ISD::SREM);
5127 return true;
5128 case Instruction::URem:
5129 if (!selectBinaryOp(I, ISD::UREM))
5130 return selectRem(I, ISD::UREM);
5131 return true;
5132 case Instruction::Shl:
5133 case Instruction::LShr:
5134 case Instruction::AShr:
5135 return selectShift(I);
5136 case Instruction::And:
5137 case Instruction::Or:
5138 case Instruction::Xor:
5139 return selectLogicalOp(I);
5140 case Instruction::Br:
5141 return selectBranch(I);
5142 case Instruction::IndirectBr:
5143 return selectIndirectBr(I);
5144 case Instruction::BitCast:
5146 return selectBitCast(I);
5147 return true;
5148 case Instruction::FPToSI:
5149 if (!selectCast(I, ISD::FP_TO_SINT))
5150 return selectFPToInt(I, /*Signed=*/true);
5151 return true;
5152 case Instruction::FPToUI:
5153 return selectFPToInt(I, /*Signed=*/false);
5154 case Instruction::ZExt:
5155 case Instruction::SExt:
5156 return selectIntExt(I);
5157 case Instruction::Trunc:
5158 if (!selectCast(I, ISD::TRUNCATE))
5159 return selectTrunc(I);
5160 return true;
5161 case Instruction::FPExt:
5162 return selectFPExt(I);
5163 case Instruction::FPTrunc:
5164 return selectFPTrunc(I);
5165 case Instruction::SIToFP:
5166 if (!selectCast(I, ISD::SINT_TO_FP))
5167 return selectIntToFP(I, /*Signed=*/true);
5168 return true;
5169 case Instruction::UIToFP:
5170 return selectIntToFP(I, /*Signed=*/false);
5171 case Instruction::Load:
5172 return selectLoad(I);
5173 case Instruction::Store:
5174 return selectStore(I);
5175 case Instruction::FCmp:
5176 case Instruction::ICmp:
5177 return selectCmp(I);
5178 case Instruction::Select:
5179 return selectSelect(I);
5180 case Instruction::Ret:
5181 return selectRet(I);
5182 case Instruction::FRem:
5183 return selectFRem(I);
5184 case Instruction::GetElementPtr:
5185 return selectGetElementPtr(I);
5186 case Instruction::AtomicCmpXchg:
5187 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5188 }
5189
5190 // fall-back to target-independent instruction selection.
5191 return selectOperator(I, I->getOpcode());
5192}
5193
5195 const TargetLibraryInfo *LibInfo) {
5196
5197 SMEAttrs CallerAttrs(*FuncInfo.Fn);
5198 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5199 CallerAttrs.hasStreamingInterfaceOrBody() ||
5200 CallerAttrs.hasStreamingCompatibleInterface() ||
5201 CallerAttrs.hasAgnosticZAInterface())
5202 return nullptr;
5203 return new AArch64FastISel(FuncInfo, LibInfo);
5204}
unsigned const MachineRegisterInfo * MRI
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
static bool isSExtLoad(const MachineInstr *LI)
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
static bool isMulPowOf2(const Value *I)
Check if the multiply is by a power-of-2 constant.
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type.
static bool isZExtLoad(const MachineInstr *LI)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec)
Emit a load-pair instruction for frame-destroy.
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec)
Emit a store-pair instruction for frame-setup.
unsigned RegSize
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Pre allocate WWM Registers
This file defines the SmallVector class.
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
static const unsigned FramePtr
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
APInt bitcastToAPInt() const
Definition: APFloat.h:1346
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
an instruction to allocate memory on the stack
Definition: Instructions.h:63
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:99
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:178
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
CCState - This class holds information needed while lowering arguments and return values.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
unsigned getValNo() const
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:676
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:690
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:703
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:679
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:688
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:677
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:678
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:697
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:687
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:681
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:684
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:685
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:680
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:682
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:701
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:689
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:699
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:686
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:675
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:683
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:787
bool isUnsigned() const
Definition: InstrTypes.h:934
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1108
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
const APFloat & getValueAPF() const
Definition: Constants.h:314
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:321
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:318
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:163
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
This is an important base class in LLVM.
Definition: Constant.h:42
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:326
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
bool selectGetElementPtr(const User *I)
Definition: FastISel.cpp:530
virtual unsigned fastMaterializeFloatZero(const ConstantFP *CF)
Emit the floating-point constant +0.0 in a register using target- specific logic.
Definition: FastISel.h:480
virtual bool fastLowerIntrinsicCall(const IntrinsicInst *II)
This method is called by target-independent code to do target- specific intrinsic lowering.
Definition: FastISel.cpp:1948
Register getRegForGEPIndex(MVT PtrVT, const Value *Idx)
This is a wrapper around getRegForValue that also takes care of truncating or sign-extending the give...
Definition: FastISel.cpp:383
virtual unsigned fastMaterializeConstant(const Constant *C)
Emit a constant in a register using target-specific logic, such as constant pool loads.
Definition: FastISel.h:473
virtual bool fastLowerCall(CallLoweringInfo &CLI)
This method is called by target-independent code to do target- specific call lowering.
Definition: FastISel.cpp:1946
virtual bool fastLowerArguments()
This method is called by target-independent code to do target- specific argument lowering.
Definition: FastISel.cpp:1944
virtual bool fastSelectInstruction(const Instruction *I)=0
This method is called by target-independent code when the normal FastISel process fails to select an ...
bool selectBitCast(const User *I)
Definition: FastISel.cpp:1516
virtual unsigned fastMaterializeAlloca(const AllocaInst *C)
Emit an alloca address in a register using target-specific logic.
Definition: FastISel.h:476
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:294
Indirect Branch Instruction.
iterator_range< succ_op_iterator > successors()
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Context object for machine code objects.
Definition: MCContext.h:83
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:572
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
Value * getLength() const
unsigned getDestAddressSpace() const
bool isVolatile() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Return a value (possibly void), from a function.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingCompatibleInterface() const
bool hasAgnosticZAInterface() const
bool hasStreamingInterfaceOrBody() const
bool hasZAState() const
bool hasZT0State() const
This class represents the LLVM 'select' instruction.
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void reserve(size_type N)
Definition: SmallVector.h:663
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:567
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:596
Class to represent struct types.
Definition: DerivedTypes.h:218
Provides information about what library functions are available for the current target.
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:261
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:258
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:310
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
Value * getOperand(unsigned i) const
Definition: User.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
const ParentTy * getParent() const
Definition: ilist_node.h:32
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
@ Offset
Definition: DWP.cpp:480
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:56
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool CC_AArch64_Win64PCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
gep_type_iterator gep_type_end(const User *GEP)
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
unsigned getKillRegState(bool B)
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
gep_type_iterator gep_type_begin(const User *GEP)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:289
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117