LLVM 18.0.0git
X86ISelLoweringCall.cpp
Go to the documentation of this file.
1//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements the lowering of LLVM calls to DAG nodes.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86.h"
15#include "X86CallingConv.h"
16#include "X86FrameLowering.h"
17#include "X86ISelLowering.h"
18#include "X86InstrBuilder.h"
20#include "X86TargetMachine.h"
21#include "X86TargetObjectFile.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/IRBuilder.h"
29
30#define DEBUG_TYPE "x86-isel"
31
32using namespace llvm;
33
34STATISTIC(NumTailCalls, "Number of tail calls");
35
36/// Call this when the user attempts to do something unsupported, like
37/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
38/// report_fatal_error, so calling code should attempt to recover without
39/// crashing.
40static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
41 const char *Msg) {
43 DAG.getContext()->diagnose(
45}
46
47/// Returns true if a CC can dynamically exclude a register from the list of
48/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
49/// the return registers.
51 switch (CC) {
52 default:
53 return false;
57 return true;
58 }
59}
60
61/// Returns true if a CC can dynamically exclude a register from the list of
62/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
63/// the parameters.
66}
67
68static std::pair<MVT, unsigned>
70 const X86Subtarget &Subtarget) {
71 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
72 // convention is one that uses k registers.
73 if (NumElts == 2)
74 return {MVT::v2i64, 1};
75 if (NumElts == 4)
76 return {MVT::v4i32, 1};
77 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
79 return {MVT::v8i16, 1};
80 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
82 return {MVT::v16i8, 1};
83 // v32i1 passes in ymm unless we have BWI and the calling convention is
84 // regcall.
85 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
86 return {MVT::v32i8, 1};
87 // Split v64i1 vectors if we don't have v64i8 available.
88 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
89 if (Subtarget.useAVX512Regs())
90 return {MVT::v64i8, 1};
91 return {MVT::v32i8, 2};
92 }
93
94 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
95 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
96 NumElts > 64)
97 return {MVT::i8, NumElts};
98
100}
101
104 EVT VT) const {
105 if (VT.isVector()) {
106 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
107 unsigned NumElts = VT.getVectorNumElements();
108
109 MVT RegisterVT;
110 unsigned NumRegisters;
111 std::tie(RegisterVT, NumRegisters) =
112 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
113 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
114 return RegisterVT;
115 }
116
117 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
118 return MVT::v8f16;
119 }
120
121 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
122 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
123 !Subtarget.hasX87())
124 return MVT::i32;
125
126 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
128 VT.changeVectorElementType(MVT::f16));
129
131}
132
135 EVT VT) const {
136 if (VT.isVector()) {
137 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
138 unsigned NumElts = VT.getVectorNumElements();
139
140 MVT RegisterVT;
141 unsigned NumRegisters;
142 std::tie(RegisterVT, NumRegisters) =
143 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
144 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
145 return NumRegisters;
146 }
147
148 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
149 return 1;
150 }
151
152 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
153 // x87 is disabled.
154 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
155 if (VT == MVT::f64)
156 return 2;
157 if (VT == MVT::f80)
158 return 3;
159 }
160
161 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
163 VT.changeVectorElementType(MVT::f16));
164
166}
167
169 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
170 unsigned &NumIntermediates, MVT &RegisterVT) const {
171 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
172 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
173 Subtarget.hasAVX512() &&
175 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
176 VT.getVectorNumElements() > 64)) {
177 RegisterVT = MVT::i8;
178 IntermediateVT = MVT::i1;
179 NumIntermediates = VT.getVectorNumElements();
180 return NumIntermediates;
181 }
182
183 // Split v64i1 vectors if we don't have v64i8 available.
184 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
186 RegisterVT = MVT::v32i8;
187 IntermediateVT = MVT::v32i1;
188 NumIntermediates = 2;
189 return 2;
190 }
191
192 // Split vNbf16 vectors according to vNf16.
193 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
194 VT = VT.changeVectorElementType(MVT::f16);
195
197 NumIntermediates, RegisterVT);
198}
199
201 LLVMContext& Context,
202 EVT VT) const {
203 if (!VT.isVector())
204 return MVT::i8;
205
206 if (Subtarget.hasAVX512()) {
207 // Figure out what this type will be legalized to.
208 EVT LegalVT = VT;
209 while (getTypeAction(Context, LegalVT) != TypeLegal)
210 LegalVT = getTypeToTransformTo(Context, LegalVT);
211
212 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
213 if (LegalVT.getSimpleVT().is512BitVector())
214 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
215
216 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
217 // If we legalized to less than a 512-bit vector, then we will use a vXi1
218 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
219 // vXi16/vXi8.
220 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
221 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
222 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
223 }
224 }
225
227}
228
229/// Helper for getByValTypeAlignment to determine
230/// the desired ByVal argument alignment.
231static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
232 if (MaxAlign == 16)
233 return;
234 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
235 if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
236 MaxAlign = Align(16);
237 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
238 Align EltAlign;
239 getMaxByValAlign(ATy->getElementType(), EltAlign);
240 if (EltAlign > MaxAlign)
241 MaxAlign = EltAlign;
242 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
243 for (auto *EltTy : STy->elements()) {
244 Align EltAlign;
245 getMaxByValAlign(EltTy, EltAlign);
246 if (EltAlign > MaxAlign)
247 MaxAlign = EltAlign;
248 if (MaxAlign == 16)
249 break;
250 }
251 }
252}
253
254/// Return the desired alignment for ByVal aggregate
255/// function arguments in the caller parameter area. For X86, aggregates
256/// that contain SSE vectors are placed at 16-byte boundaries while the rest
257/// are at 4-byte boundaries.
259 const DataLayout &DL) const {
260 if (Subtarget.is64Bit()) {
261 // Max of 8 and alignment of type.
262 Align TyAlign = DL.getABITypeAlign(Ty);
263 if (TyAlign > 8)
264 return TyAlign.value();
265 return 8;
266 }
267
268 Align Alignment(4);
269 if (Subtarget.hasSSE1())
270 getMaxByValAlign(Ty, Alignment);
271 return Alignment.value();
272}
273
274/// It returns EVT::Other if the type should be determined using generic
275/// target-independent logic.
276/// For vector ops we check that the overall size isn't larger than our
277/// preferred vector width.
279 const MemOp &Op, const AttributeList &FuncAttributes) const {
280 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
281 if (Op.size() >= 16 &&
282 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
283 // FIXME: Check if unaligned 64-byte accesses are slow.
284 if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
285 (Subtarget.getPreferVectorWidth() >= 512)) {
286 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
287 }
288 // FIXME: Check if unaligned 32-byte accesses are slow.
289 if (Op.size() >= 32 && Subtarget.hasAVX() &&
290 Subtarget.useLight256BitInstructions()) {
291 // Although this isn't a well-supported type for AVX1, we'll let
292 // legalization and shuffle lowering produce the optimal codegen. If we
293 // choose an optimal type with a vector element larger than a byte,
294 // getMemsetStores() may create an intermediate splat (using an integer
295 // multiply) before we splat as a vector.
296 return MVT::v32i8;
297 }
298 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
299 return MVT::v16i8;
300 // TODO: Can SSE1 handle a byte vector?
301 // If we have SSE1 registers we should be able to use them.
302 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
303 (Subtarget.getPreferVectorWidth() >= 128))
304 return MVT::v4f32;
305 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
306 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
307 // Do not use f64 to lower memcpy if source is string constant. It's
308 // better to use i32 to avoid the loads.
309 // Also, do not use f64 to lower memset unless this is a memset of zeros.
310 // The gymnastics of splatting a byte value into an XMM register and then
311 // only using 8-byte stores (because this is a CPU with slow unaligned
312 // 16-byte accesses) makes that a loser.
313 return MVT::f64;
314 }
315 }
316 // This is a compromise. If we reach here, unaligned accesses may be slow on
317 // this target. However, creating smaller, aligned accesses could be even
318 // slower and would certainly be a lot more code.
319 if (Subtarget.is64Bit() && Op.size() >= 8)
320 return MVT::i64;
321 return MVT::i32;
322}
323
325 if (VT == MVT::f32)
326 return Subtarget.hasSSE1();
327 if (VT == MVT::f64)
328 return Subtarget.hasSSE2();
329 return true;
330}
331
332static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
333 return (8 * Alignment.value()) % SizeInBits == 0;
334}
335
337 if (isBitAligned(Alignment, VT.getSizeInBits()))
338 return true;
339 switch (VT.getSizeInBits()) {
340 default:
341 // 8-byte and under are always assumed to be fast.
342 return true;
343 case 128:
344 return !Subtarget.isUnalignedMem16Slow();
345 case 256:
346 return !Subtarget.isUnalignedMem32Slow();
347 // TODO: What about AVX-512 (512-bit) accesses?
348 }
349}
350
352 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
353 unsigned *Fast) const {
354 if (Fast)
355 *Fast = isMemoryAccessFast(VT, Alignment);
356 // NonTemporal vector memory ops must be aligned.
357 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
358 // NT loads can only be vector aligned, so if its less aligned than the
359 // minimum vector size (which we can split the vector down to), we might as
360 // well use a regular unaligned vector load.
361 // We don't have any NT loads pre-SSE41.
362 if (!!(Flags & MachineMemOperand::MOLoad))
363 return (Alignment < 16 || !Subtarget.hasSSE41());
364 return false;
365 }
366 // Misaligned accesses of any size are always allowed.
367 return true;
368}
369
371 const DataLayout &DL, EVT VT,
372 unsigned AddrSpace, Align Alignment,
374 unsigned *Fast) const {
375 if (Fast)
376 *Fast = isMemoryAccessFast(VT, Alignment);
377 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
378 if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
379 /*Fast=*/nullptr))
380 return true;
381 // NonTemporal vector memory ops are special, and must be aligned.
382 if (!isBitAligned(Alignment, VT.getSizeInBits()))
383 return false;
384 switch (VT.getSizeInBits()) {
385 case 128:
386 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
387 return true;
388 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
389 return true;
390 return false;
391 case 256:
392 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
393 return true;
394 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
395 return true;
396 return false;
397 case 512:
398 if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
399 return true;
400 return false;
401 default:
402 return false; // Don't have NonTemporal vector memory ops of this size.
403 }
404 }
405 return true;
406}
407
408/// Return the entry encoding for a jump table in the
409/// current function. The returned value is a member of the
410/// MachineJumpTableInfo::JTEntryKind enum.
412 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
413 // symbol.
414 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
416 if (isPositionIndependent() &&
417 getTargetMachine().getCodeModel() == CodeModel::Large)
419
420 // Otherwise, use the normal jump table encoding heuristics.
422}
423
424bool X86TargetLowering::splitValueIntoRegisterParts(
425 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
426 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
427 bool IsABIRegCopy = CC.has_value();
428 EVT ValueVT = Val.getValueType();
429 if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
430 unsigned ValueBits = ValueVT.getSizeInBits();
431 unsigned PartBits = PartVT.getSizeInBits();
432 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
433 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
434 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
435 Parts[0] = Val;
436 return true;
437 }
438 return false;
439}
440
441SDValue X86TargetLowering::joinRegisterPartsIntoValue(
442 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
443 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
444 bool IsABIRegCopy = CC.has_value();
445 if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) {
446 unsigned ValueBits = ValueVT.getSizeInBits();
447 unsigned PartBits = PartVT.getSizeInBits();
448 SDValue Val = Parts[0];
449
450 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
451 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
452 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
453 return Val;
454 }
455 return SDValue();
456}
457
459 return Subtarget.useSoftFloat();
460}
461
463 ArgListTy &Args) const {
464
465 // Only relabel X86-32 for C / Stdcall CCs.
466 if (Subtarget.is64Bit())
467 return;
469 return;
470 unsigned ParamRegs = 0;
471 if (auto *M = MF->getFunction().getParent())
472 ParamRegs = M->getNumberRegisterParameters();
473
474 // Mark the first N int arguments as having reg
475 for (auto &Arg : Args) {
476 Type *T = Arg.Ty;
477 if (T->isIntOrPtrTy())
478 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
479 unsigned numRegs = 1;
480 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
481 numRegs = 2;
482 if (ParamRegs < numRegs)
483 return;
484 ParamRegs -= numRegs;
485 Arg.IsInReg = true;
486 }
487 }
488}
489
490const MCExpr *
492 const MachineBasicBlock *MBB,
493 unsigned uid,MCContext &Ctx) const{
495 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
496 // entries.
499}
500
501/// Returns relocation base for the given PIC jumptable.
503 SelectionDAG &DAG) const {
504 if (!Subtarget.is64Bit())
505 // This doesn't have SDLoc associated with it, but is not really the
506 // same as a Register.
507 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
509 return Table;
510}
511
512/// This returns the relocation base for the given PIC jumptable,
513/// the same as getPICJumpTableRelocBase, but as an MCExpr.
515getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
516 MCContext &Ctx) const {
517 // X86-64 uses RIP relative addressing based on the jump table label.
518 if (Subtarget.isPICStyleRIPRel() ||
519 (Subtarget.is64Bit() &&
520 getTargetMachine().getCodeModel() == CodeModel::Large))
522
523 // Otherwise, the reference is relative to the PIC base.
524 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
525}
526
527std::pair<const TargetRegisterClass *, uint8_t>
529 MVT VT) const {
530 const TargetRegisterClass *RRC = nullptr;
531 uint8_t Cost = 1;
532 switch (VT.SimpleTy) {
533 default:
535 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
536 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
537 break;
538 case MVT::x86mmx:
539 RRC = &X86::VR64RegClass;
540 break;
541 case MVT::f32: case MVT::f64:
542 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
543 case MVT::v4f32: case MVT::v2f64:
544 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
545 case MVT::v8f32: case MVT::v4f64:
546 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
547 case MVT::v16f32: case MVT::v8f64:
548 RRC = &X86::VR128XRegClass;
549 break;
550 }
551 return std::make_pair(RRC, Cost);
552}
553
554unsigned X86TargetLowering::getAddressSpace() const {
555 if (Subtarget.is64Bit())
556 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
557 return 256;
558}
559
560static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
561 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
562 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
563}
564
566 int Offset, unsigned AddressSpace) {
570}
571
573 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
574 // tcbhead_t; use it instead of the usual global variable (see
575 // sysdeps/{i386,x86_64}/nptl/tls.h)
576 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
577 unsigned AddressSpace = getAddressSpace();
578
579 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
580 if (Subtarget.isTargetFuchsia())
581 return SegmentOffset(IRB, 0x10, AddressSpace);
582
583 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
584 // Specially, some users may customize the base reg and offset.
585 int Offset = M->getStackProtectorGuardOffset();
586 // If we don't set -stack-protector-guard-offset value:
587 // %fs:0x28, unless we're using a Kernel code model, in which case
588 // it's %gs:0x28. gs:0x14 on i386.
589 if (Offset == INT_MAX)
590 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
591
592 StringRef GuardReg = M->getStackProtectorGuardReg();
593 if (GuardReg == "fs")
595 else if (GuardReg == "gs")
597
598 // Use symbol guard if user specify.
599 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
600 if (!GuardSymb.empty()) {
601 GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
602 if (!GV) {
603 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
604 : Type::getInt32Ty(M->getContext());
605 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
606 nullptr, GuardSymb, nullptr,
608 if (!Subtarget.isTargetDarwin())
609 GV->setDSOLocal(M->getDirectAccessExternalData());
610 }
611 return GV;
612 }
613
614 return SegmentOffset(IRB, Offset, AddressSpace);
615 }
617}
618
620 // MSVC CRT provides functionalities for stack protection.
621 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
623 // MSVC CRT has a global variable holding security cookie.
624 M.getOrInsertGlobal("__security_cookie",
625 PointerType::getUnqual(M.getContext()));
626
627 // MSVC CRT has a function to validate security cookie.
628 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
629 "__security_check_cookie", Type::getVoidTy(M.getContext()),
630 PointerType::getUnqual(M.getContext()));
631 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
632 F->setCallingConv(CallingConv::X86_FastCall);
633 F->addParamAttr(0, Attribute::AttrKind::InReg);
634 }
635 return;
636 }
637
638 StringRef GuardMode = M.getStackProtectorGuard();
639
640 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
641 if ((GuardMode == "tls" || GuardMode.empty()) &&
643 return;
645}
646
648 // MSVC CRT has a global variable holding security cookie.
649 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
651 return M.getGlobalVariable("__security_cookie");
652 }
654}
655
657 // MSVC CRT has a function to validate security cookie.
658 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
660 return M.getFunction("__security_check_cookie");
661 }
663}
664
665Value *
667 // Android provides a fixed TLS slot for the SafeStack pointer. See the
668 // definition of TLS_SLOT_SAFESTACK in
669 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
670 if (Subtarget.isTargetAndroid()) {
671 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
672 // %gs:0x24 on i386
673 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
674 return SegmentOffset(IRB, Offset, getAddressSpace());
675 }
676
677 // Fuchsia is similar.
678 if (Subtarget.isTargetFuchsia()) {
679 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
680 return SegmentOffset(IRB, 0x18, getAddressSpace());
681 }
682
684}
685
686//===----------------------------------------------------------------------===//
687// Return Value Calling Convention Implementation
688//===----------------------------------------------------------------------===//
689
690bool X86TargetLowering::CanLowerReturn(
691 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
692 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
694 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
695 return CCInfo.CheckReturn(Outs, RetCC_X86);
696}
697
698const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
699 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
700 return ScratchRegs;
701}
702
703ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
704 // FIXME: We should def X86::FPCW for x87 as well. But it affects a lot of lit
705 // tests at the moment, which is not what we expected.
706 static const MCPhysReg RCRegs[] = {X86::MXCSR};
707 return RCRegs;
708}
709
710/// Lowers masks values (v*i1) to the local register values
711/// \returns DAG node after lowering to register type
712static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
713 const SDLoc &DL, SelectionDAG &DAG) {
714 EVT ValVT = ValArg.getValueType();
715
716 if (ValVT == MVT::v1i1)
717 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
718 DAG.getIntPtrConstant(0, DL));
719
720 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
721 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
722 // Two stage lowering might be required
723 // bitcast: v8i1 -> i8 / v16i1 -> i16
724 // anyextend: i8 -> i32 / i16 -> i32
725 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
726 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
727 if (ValLoc == MVT::i32)
728 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
729 return ValToCopy;
730 }
731
732 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
733 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
734 // One stage lowering is required
735 // bitcast: v32i1 -> i32 / v64i1 -> i64
736 return DAG.getBitcast(ValLoc, ValArg);
737 }
738
739 return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
740}
741
742/// Breaks v64i1 value into two registers and adds the new node to the DAG
744 const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
745 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
746 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
747 assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
748 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
749 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
750 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
751 "The value should reside in two registers");
752
753 // Before splitting the value we cast it to i64
754 Arg = DAG.getBitcast(MVT::i64, Arg);
755
756 // Splitting the value into two i32 types
757 SDValue Lo, Hi;
758 std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
759
760 // Attach the two i32 types into corresponding registers
761 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
762 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
763}
764
766X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
767 bool isVarArg,
769 const SmallVectorImpl<SDValue> &OutVals,
770 const SDLoc &dl, SelectionDAG &DAG) const {
773
774 // In some cases we need to disable registers from the default CSR list.
775 // For example, when they are used as return registers (preserve_* and X86's
776 // regcall) or for argument passing (X86's regcall).
777 bool ShouldDisableCalleeSavedRegister =
778 shouldDisableRetRegFromCSR(CallConv) ||
779 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
780
781 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
782 report_fatal_error("X86 interrupts may not return any value");
783
785 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
786 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
787
789 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
790 ++I, ++OutsIndex) {
791 CCValAssign &VA = RVLocs[I];
792 assert(VA.isRegLoc() && "Can only return in registers!");
793
794 // Add the register to the CalleeSaveDisableRegs list.
795 if (ShouldDisableCalleeSavedRegister)
797
798 SDValue ValToCopy = OutVals[OutsIndex];
799 EVT ValVT = ValToCopy.getValueType();
800
801 // Promote values to the appropriate types.
802 if (VA.getLocInfo() == CCValAssign::SExt)
803 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
804 else if (VA.getLocInfo() == CCValAssign::ZExt)
805 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
806 else if (VA.getLocInfo() == CCValAssign::AExt) {
807 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
808 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
809 else
810 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
811 }
812 else if (VA.getLocInfo() == CCValAssign::BCvt)
813 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
814
816 "Unexpected FP-extend for return value.");
817
818 // Report an error if we have attempted to return a value via an XMM
819 // register and SSE was disabled.
820 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
821 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
822 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
823 } else if (!Subtarget.hasSSE2() &&
824 X86::FR64XRegClass.contains(VA.getLocReg()) &&
825 ValVT == MVT::f64) {
826 // When returning a double via an XMM register, report an error if SSE2 is
827 // not enabled.
828 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
829 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
830 }
831
832 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
833 // the RET instruction and handled by the FP Stackifier.
834 if (VA.getLocReg() == X86::FP0 ||
835 VA.getLocReg() == X86::FP1) {
836 // If this is a copy from an xmm register to ST(0), use an FPExtend to
837 // change the value to the FP stack register class.
839 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
840 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
841 // Don't emit a copytoreg.
842 continue;
843 }
844
845 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
846 // which is returned in RAX / RDX.
847 if (Subtarget.is64Bit()) {
848 if (ValVT == MVT::x86mmx) {
849 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
850 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
851 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
852 ValToCopy);
853 // If we don't have SSE2 available, convert to v4f32 so the generated
854 // register is legal.
855 if (!Subtarget.hasSSE2())
856 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
857 }
858 }
859 }
860
861 if (VA.needsCustom()) {
862 assert(VA.getValVT() == MVT::v64i1 &&
863 "Currently the only custom case is when we split v64i1 to 2 regs");
864
865 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
866 Subtarget);
867
868 // Add the second register to the CalleeSaveDisableRegs list.
869 if (ShouldDisableCalleeSavedRegister)
870 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
871 } else {
872 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
873 }
874 }
875
876 SDValue Glue;
878 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
879 // Operand #1 = Bytes To Pop
880 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
881 MVT::i32));
882
883 // Copy the result values into the output registers.
884 for (auto &RetVal : RetVals) {
885 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
886 RetOps.push_back(RetVal.second);
887 continue; // Don't emit a copytoreg.
888 }
889
890 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
891 Glue = Chain.getValue(1);
892 RetOps.push_back(
893 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
894 }
895
896 // Swift calling convention does not require we copy the sret argument
897 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
898
899 // All x86 ABIs require that for returning structs by value we copy
900 // the sret argument into %rax/%eax (depending on ABI) for the return.
901 // We saved the argument into a virtual register in the entry block,
902 // so now we copy the value out and into %rax/%eax.
903 //
904 // Checking Function.hasStructRetAttr() here is insufficient because the IR
905 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
906 // false, then an sret argument may be implicitly inserted in the SelDAG. In
907 // either case FuncInfo->setSRetReturnReg() will have been called.
908 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
909 // When we have both sret and another return value, we should use the
910 // original Chain stored in RetOps[0], instead of the current Chain updated
911 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
912
913 // For the case of sret and another return value, we have
914 // Chain_0 at the function entry
915 // Chain_1 = getCopyToReg(Chain_0) in the above loop
916 // If we use Chain_1 in getCopyFromReg, we will have
917 // Val = getCopyFromReg(Chain_1)
918 // Chain_2 = getCopyToReg(Chain_1, Val) from below
919
920 // getCopyToReg(Chain_0) will be glued together with
921 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
922 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
923 // Data dependency from Unit B to Unit A due to usage of Val in
924 // getCopyToReg(Chain_1, Val)
925 // Chain dependency from Unit A to Unit B
926
927 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
928 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
930
931 Register RetValReg
932 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
933 X86::RAX : X86::EAX;
934 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
935 Glue = Chain.getValue(1);
936
937 // RAX/EAX now acts like a return value.
938 RetOps.push_back(
939 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
940
941 // Add the returned register to the CalleeSaveDisableRegs list. Don't do
942 // this however for preserve_most/preserve_all to minimize the number of
943 // callee-saved registers for these CCs.
944 if (ShouldDisableCalleeSavedRegister &&
945 CallConv != CallingConv::PreserveAll &&
946 CallConv != CallingConv::PreserveMost)
948 }
949
950 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
951 const MCPhysReg *I =
952 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
953 if (I) {
954 for (; *I; ++I) {
955 if (X86::GR64RegClass.contains(*I))
956 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
957 else
958 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
959 }
960 }
961
962 RetOps[0] = Chain; // Update chain.
963
964 // Add the glue if we have it.
965 if (Glue.getNode())
966 RetOps.push_back(Glue);
967
969 if (CallConv == CallingConv::X86_INTR)
970 opcode = X86ISD::IRET;
971 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
972}
973
974bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
975 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
976 return false;
977
978 SDValue TCChain = Chain;
979 SDNode *Copy = *N->use_begin();
980 if (Copy->getOpcode() == ISD::CopyToReg) {
981 // If the copy has a glue operand, we conservatively assume it isn't safe to
982 // perform a tail call.
983 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
984 return false;
985 TCChain = Copy->getOperand(0);
986 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
987 return false;
988
989 bool HasRet = false;
990 for (const SDNode *U : Copy->uses()) {
991 if (U->getOpcode() != X86ISD::RET_GLUE)
992 return false;
993 // If we are returning more than one value, we can definitely
994 // not make a tail call see PR19530
995 if (U->getNumOperands() > 4)
996 return false;
997 if (U->getNumOperands() == 4 &&
998 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
999 return false;
1000 HasRet = true;
1001 }
1002
1003 if (!HasRet)
1004 return false;
1005
1006 Chain = TCChain;
1007 return true;
1008}
1009
1010EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
1011 ISD::NodeType ExtendKind) const {
1012 MVT ReturnMVT = MVT::i32;
1013
1014 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
1015 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
1016 // The ABI does not require i1, i8 or i16 to be extended.
1017 //
1018 // On Darwin, there is code in the wild relying on Clang's old behaviour of
1019 // always extending i8/i16 return values, so keep doing that for now.
1020 // (PR26665).
1021 ReturnMVT = MVT::i8;
1022 }
1023
1024 EVT MinVT = getRegisterType(Context, ReturnMVT);
1025 return VT.bitsLT(MinVT) ? MinVT : VT;
1026}
1027
1028/// Reads two 32 bit registers and creates a 64 bit mask value.
1029/// \param VA The current 32 bit value that need to be assigned.
1030/// \param NextVA The next 32 bit value that need to be assigned.
1031/// \param Root The parent DAG node.
1032/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1033/// glue purposes. In the case the DAG is already using
1034/// physical register instead of virtual, we should glue
1035/// our new SDValue to InGlue SDvalue.
1036/// \return a new SDvalue of size 64bit.
1038 SDValue &Root, SelectionDAG &DAG,
1039 const SDLoc &DL, const X86Subtarget &Subtarget,
1040 SDValue *InGlue = nullptr) {
1041 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1042 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1043 assert(VA.getValVT() == MVT::v64i1 &&
1044 "Expecting first location of 64 bit width type");
1045 assert(NextVA.getValVT() == VA.getValVT() &&
1046 "The locations should have the same type");
1047 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1048 "The values should reside in two registers");
1049
1050 SDValue Lo, Hi;
1051 SDValue ArgValueLo, ArgValueHi;
1052
1054 const TargetRegisterClass *RC = &X86::GR32RegClass;
1055
1056 // Read a 32 bit value from the registers.
1057 if (nullptr == InGlue) {
1058 // When no physical register is present,
1059 // create an intermediate virtual register.
1060 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1061 ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1062 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1063 ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1064 } else {
1065 // When a physical register is available read the value from it and glue
1066 // the reads together.
1067 ArgValueLo =
1068 DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
1069 *InGlue = ArgValueLo.getValue(2);
1070 ArgValueHi =
1071 DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
1072 *InGlue = ArgValueHi.getValue(2);
1073 }
1074
1075 // Convert the i32 type into v32i1 type.
1076 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
1077
1078 // Convert the i32 type into v32i1 type.
1079 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
1080
1081 // Concatenate the two values together.
1082 return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
1083}
1084
1085/// The function will lower a register of various sizes (8/16/32/64)
1086/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1087/// \returns a DAG node contains the operand after lowering to mask type.
1088static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1089 const EVT &ValLoc, const SDLoc &DL,
1090 SelectionDAG &DAG) {
1091 SDValue ValReturned = ValArg;
1092
1093 if (ValVT == MVT::v1i1)
1094 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
1095
1096 if (ValVT == MVT::v64i1) {
1097 // In 32 bit machine, this case is handled by getv64i1Argument
1098 assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1099 // In 64 bit machine, There is no need to truncate the value only bitcast
1100 } else {
1101 MVT MaskLenVT;
1102 switch (ValVT.getSimpleVT().SimpleTy) {
1103 case MVT::v8i1:
1104 MaskLenVT = MVT::i8;
1105 break;
1106 case MVT::v16i1:
1107 MaskLenVT = MVT::i16;
1108 break;
1109 case MVT::v32i1:
1110 MaskLenVT = MVT::i32;
1111 break;
1112 default:
1113 llvm_unreachable("Expecting a vector of i1 types");
1114 }
1115
1116 ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
1117 }
1118 return DAG.getBitcast(ValVT, ValReturned);
1119}
1120
1121/// Lower the result values of a call into the
1122/// appropriate copies out of appropriate physical registers.
1123///
1124SDValue X86TargetLowering::LowerCallResult(
1125 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1126 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1128 uint32_t *RegMask) const {
1129
1130 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1131 // Assign locations to each value returned by this call.
1133 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1134 *DAG.getContext());
1135 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
1136
1137 // Copy all of the result registers out of their specified physreg.
1138 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
1139 ++I, ++InsIndex) {
1140 CCValAssign &VA = RVLocs[I];
1141 EVT CopyVT = VA.getLocVT();
1142
1143 // In some calling conventions we need to remove the used registers
1144 // from the register mask.
1145 if (RegMask) {
1146 for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
1147 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
1148 }
1149
1150 // Report an error if there was an attempt to return FP values via XMM
1151 // registers.
1152 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
1153 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
1154 if (VA.getLocReg() == X86::XMM1)
1155 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1156 else
1157 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1158 } else if (!Subtarget.hasSSE2() &&
1159 X86::FR64XRegClass.contains(VA.getLocReg()) &&
1160 CopyVT == MVT::f64) {
1161 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
1162 if (VA.getLocReg() == X86::XMM1)
1163 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1164 else
1165 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1166 }
1167
1168 // If we prefer to use the value in xmm registers, copy it out as f80 and
1169 // use a truncate to move it from fp stack reg to xmm reg.
1170 bool RoundAfterCopy = false;
1171 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
1173 if (!Subtarget.hasX87())
1174 report_fatal_error("X87 register return with X87 disabled");
1175 CopyVT = MVT::f80;
1176 RoundAfterCopy = (CopyVT != VA.getLocVT());
1177 }
1178
1179 SDValue Val;
1180 if (VA.needsCustom()) {
1181 assert(VA.getValVT() == MVT::v64i1 &&
1182 "Currently the only custom case is when we split v64i1 to 2 regs");
1183 Val =
1184 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
1185 } else {
1186 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1187 .getValue(1);
1188 Val = Chain.getValue(0);
1189 InGlue = Chain.getValue(2);
1190 }
1191
1192 if (RoundAfterCopy)
1193 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
1194 // This truncation won't change the value.
1195 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
1196
1197 if (VA.isExtInLoc()) {
1198 if (VA.getValVT().isVector() &&
1199 VA.getValVT().getScalarType() == MVT::i1 &&
1200 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1201 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1202 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1203 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
1204 } else
1205 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
1206 }
1207
1208 if (VA.getLocInfo() == CCValAssign::BCvt)
1209 Val = DAG.getBitcast(VA.getValVT(), Val);
1210
1211 InVals.push_back(Val);
1212 }
1213
1214 return Chain;
1215}
1216
1217//===----------------------------------------------------------------------===//
1218// C & StdCall & Fast Calling Convention implementation
1219//===----------------------------------------------------------------------===//
1220// StdCall calling convention seems to be standard for many Windows' API
1221// routines and around. It differs from C calling convention just a little:
1222// callee should clean up the stack, not caller. Symbols should be also
1223// decorated in some fancy way :) It doesn't support any vector arguments.
1224// For info on fast calling convention see Fast Calling Convention (tail call)
1225// implementation LowerX86_32FastCCCallTo.
1226
1227/// Determines whether Args, either a set of outgoing arguments to a call, or a
1228/// set of incoming args of a call, contains an sret pointer that the callee
1229/// pops
1230template <typename T>
1231static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1232 const X86Subtarget &Subtarget) {
1233 // Not C++20 (yet), so no concepts available.
1234 static_assert(std::is_same_v<T, ISD::OutputArg> ||
1235 std::is_same_v<T, ISD::InputArg>,
1236 "requires ISD::OutputArg or ISD::InputArg");
1237
1238 // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
1239 // for most compilations.
1240 if (!Subtarget.is32Bit())
1241 return false;
1242
1243 if (Args.empty())
1244 return false;
1245
1246 // Most calls do not have an sret argument, check the arg next.
1247 const ISD::ArgFlagsTy &Flags = Args[0].Flags;
1248 if (!Flags.isSRet() || Flags.isInReg())
1249 return false;
1250
1251 // The MSVCabi does not pop the sret.
1252 if (Subtarget.getTargetTriple().isOSMSVCRT())
1253 return false;
1254
1255 // MCUs don't pop the sret
1256 if (Subtarget.isTargetMCU())
1257 return false;
1258
1259 // Callee pops argument
1260 return true;
1261}
1262
1263/// Make a copy of an aggregate at address specified by "Src" to address
1264/// "Dst" with size and alignment information specified by the specific
1265/// parameter attribute. The copy will be passed as a byval function parameter.
1267 SDValue Chain, ISD::ArgFlagsTy Flags,
1268 SelectionDAG &DAG, const SDLoc &dl) {
1269 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
1270
1271 return DAG.getMemcpy(
1272 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
1273 /*isVolatile*/ false, /*AlwaysInline=*/true,
1274 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
1275}
1276
1277/// Return true if the calling convention is one that we can guarantee TCO for.
1279 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
1282}
1283
1284/// Return true if we might ever do TCO for calls with this calling convention.
1286 switch (CC) {
1287 // C calling conventions:
1288 case CallingConv::C:
1289 case CallingConv::Win64:
1291 // Callee pop conventions:
1296 // Swift:
1297 case CallingConv::Swift:
1298 return true;
1299 default:
1300 return canGuaranteeTCO(CC);
1301 }
1302}
1303
1304/// Return true if the function is being made into a tailcall target by
1305/// changing its ABI.
1306static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1307 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
1309}
1310
1311bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1312 if (!CI->isTailCall())
1313 return false;
1314
1315 CallingConv::ID CalleeCC = CI->getCallingConv();
1316 if (!mayTailCallThisCC(CalleeCC))
1317 return false;
1318
1319 return true;
1320}
1321
1322SDValue
1323X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1325 const SDLoc &dl, SelectionDAG &DAG,
1326 const CCValAssign &VA,
1327 MachineFrameInfo &MFI, unsigned i) const {
1328 // Create the nodes corresponding to a load from this parameter slot.
1329 ISD::ArgFlagsTy Flags = Ins[i].Flags;
1330 bool AlwaysUseMutable = shouldGuaranteeTCO(
1331 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
1332 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1333 EVT ValVT;
1334 MVT PtrVT = getPointerTy(DAG.getDataLayout());
1335
1336 // If value is passed by pointer we have address passed instead of the value
1337 // itself. No need to extend if the mask value and location share the same
1338 // absolute size.
1339 bool ExtendedInMem =
1340 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1342
1343 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
1344 ValVT = VA.getLocVT();
1345 else
1346 ValVT = VA.getValVT();
1347
1348 // FIXME: For now, all byval parameter objects are marked mutable. This can be
1349 // changed with more analysis.
1350 // In case of tail call optimization mark all arguments mutable. Since they
1351 // could be overwritten by lowering of arguments in case of a tail call.
1352 if (Flags.isByVal()) {
1353 unsigned Bytes = Flags.getByValSize();
1354 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
1355
1356 // FIXME: For now, all byval parameter objects are marked as aliasing. This
1357 // can be improved with deeper analysis.
1358 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
1359 /*isAliased=*/true);
1360 return DAG.getFrameIndex(FI, PtrVT);
1361 }
1362
1363 EVT ArgVT = Ins[i].ArgVT;
1364
1365 // If this is a vector that has been split into multiple parts, don't elide
1366 // the copy. The layout on the stack may not match the packed in-memory
1367 // layout.
1368 bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1369
1370 // This is an argument in memory. We might be able to perform copy elision.
1371 // If the argument is passed directly in memory without any extension, then we
1372 // can perform copy elision. Large vector types, for example, may be passed
1373 // indirectly by pointer.
1374 if (Flags.isCopyElisionCandidate() &&
1375 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1376 !ScalarizedVector) {
1377 SDValue PartAddr;
1378 if (Ins[i].PartOffset == 0) {
1379 // If this is a one-part value or the first part of a multi-part value,
1380 // create a stack object for the entire argument value type and return a
1381 // load from our portion of it. This assumes that if the first part of an
1382 // argument is in memory, the rest will also be in memory.
1383 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
1384 /*IsImmutable=*/false);
1385 PartAddr = DAG.getFrameIndex(FI, PtrVT);
1386 return DAG.getLoad(
1387 ValVT, dl, Chain, PartAddr,
1389 }
1390
1391 // This is not the first piece of an argument in memory. See if there is
1392 // already a fixed stack object including this offset. If so, assume it
1393 // was created by the PartOffset == 0 branch above and create a load from
1394 // the appropriate offset into it.
1395 int64_t PartBegin = VA.getLocMemOffset();
1396 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
1397 int FI = MFI.getObjectIndexBegin();
1398 for (; MFI.isFixedObjectIndex(FI); ++FI) {
1399 int64_t ObjBegin = MFI.getObjectOffset(FI);
1400 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
1401 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1402 break;
1403 }
1404 if (MFI.isFixedObjectIndex(FI)) {
1405 SDValue Addr =
1406 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
1407 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
1408 return DAG.getLoad(ValVT, dl, Chain, Addr,
1410 DAG.getMachineFunction(), FI, Ins[i].PartOffset));
1411 }
1412 }
1413
1414 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1415 VA.getLocMemOffset(), isImmutable);
1416
1417 // Set SExt or ZExt flag.
1418 if (VA.getLocInfo() == CCValAssign::ZExt) {
1419 MFI.setObjectZExt(FI, true);
1420 } else if (VA.getLocInfo() == CCValAssign::SExt) {
1421 MFI.setObjectSExt(FI, true);
1422 }
1423
1424 MaybeAlign Alignment;
1425 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1426 ValVT != MVT::f80)
1427 Alignment = MaybeAlign(4);
1428 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1429 SDValue Val = DAG.getLoad(
1430 ValVT, dl, Chain, FIN,
1432 Alignment);
1433 return ExtendedInMem
1434 ? (VA.getValVT().isVector()
1435 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
1436 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
1437 : Val;
1438}
1439
1440// FIXME: Get this from tablegen.
1442 const X86Subtarget &Subtarget) {
1443 assert(Subtarget.is64Bit());
1444
1445 if (Subtarget.isCallingConvWin64(CallConv)) {
1446 static const MCPhysReg GPR64ArgRegsWin64[] = {
1447 X86::RCX, X86::RDX, X86::R8, X86::R9
1448 };
1449 return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
1450 }
1451
1452 static const MCPhysReg GPR64ArgRegs64Bit[] = {
1453 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1454 };
1455 return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
1456}
1457
1458// FIXME: Get this from tablegen.
1460 CallingConv::ID CallConv,
1461 const X86Subtarget &Subtarget) {
1462 assert(Subtarget.is64Bit());
1463 if (Subtarget.isCallingConvWin64(CallConv)) {
1464 // The XMM registers which might contain var arg parameters are shadowed
1465 // in their paired GPR. So we only need to save the GPR to their home
1466 // slots.
1467 // TODO: __vectorcall will change this.
1468 return std::nullopt;
1469 }
1470
1471 bool isSoftFloat = Subtarget.useSoftFloat();
1472 if (isSoftFloat || !Subtarget.hasSSE1())
1473 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
1474 // registers.
1475 return std::nullopt;
1476
1477 static const MCPhysReg XMMArgRegs64Bit[] = {
1478 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1479 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1480 };
1481 return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
1482}
1483
1484#ifndef NDEBUG
1486 return llvm::is_sorted(
1487 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
1488 return A.getValNo() < B.getValNo();
1489 });
1490}
1491#endif
1492
1493namespace {
1494/// This is a helper class for lowering variable arguments parameters.
1495class VarArgsLoweringHelper {
1496public:
1497 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
1498 SelectionDAG &DAG, const X86Subtarget &Subtarget,
1499 CallingConv::ID CallConv, CCState &CCInfo)
1500 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1501 TheMachineFunction(DAG.getMachineFunction()),
1502 TheFunction(TheMachineFunction.getFunction()),
1503 FrameInfo(TheMachineFunction.getFrameInfo()),
1504 FrameLowering(*Subtarget.getFrameLowering()),
1505 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1506 CCInfo(CCInfo) {}
1507
1508 // Lower variable arguments parameters.
1509 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1510
1511private:
1512 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1513
1514 void forwardMustTailParameters(SDValue &Chain);
1515
1516 bool is64Bit() const { return Subtarget.is64Bit(); }
1517 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
1518
1519 X86MachineFunctionInfo *FuncInfo;
1520 const SDLoc &DL;
1521 SelectionDAG &DAG;
1522 const X86Subtarget &Subtarget;
1523 MachineFunction &TheMachineFunction;
1524 const Function &TheFunction;
1525 MachineFrameInfo &FrameInfo;
1526 const TargetFrameLowering &FrameLowering;
1527 const TargetLowering &TargLowering;
1528 CallingConv::ID CallConv;
1529 CCState &CCInfo;
1530};
1531} // namespace
1532
1533void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1534 SDValue &Chain, unsigned StackSize) {
1535 // If the function takes variable number of arguments, make a frame index for
1536 // the start of the first vararg value... for expansion of llvm.va_start. We
1537 // can skip this if there are no va_start calls.
1538 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
1539 CallConv != CallingConv::X86_ThisCall)) {
1540 FuncInfo->setVarArgsFrameIndex(
1541 FrameInfo.CreateFixedObject(1, StackSize, true));
1542 }
1543
1544 // 64-bit calling conventions support varargs and register parameters, so we
1545 // have to do extra work to spill them in the prologue.
1546 if (is64Bit()) {
1547 // Find the first unallocated argument registers.
1548 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1549 ArrayRef<MCPhysReg> ArgXMMs =
1550 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
1551 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
1552 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
1553
1554 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1555 "SSE register cannot be used when SSE is disabled!");
1556
1557 if (isWin64()) {
1558 // Get to the caller-allocated home save location. Add 8 to account
1559 // for the return address.
1560 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
1561 FuncInfo->setRegSaveFrameIndex(
1562 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
1563 // Fixup to set vararg frame on shadow area (4 x i64).
1564 if (NumIntRegs < 4)
1565 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1566 } else {
1567 // For X86-64, if there are vararg parameters that are passed via
1568 // registers, then we must store them to their spots on the stack so
1569 // they may be loaded by dereferencing the result of va_next.
1570 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
1571 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
1572 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1573 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
1574 }
1575
1577 LiveGPRs; // list of SDValue for GPR registers keeping live input value
1578 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
1579 // keeping live input value
1580 SDValue ALVal; // if applicable keeps SDValue for %al register
1581
1582 // Gather all the live in physical registers.
1583 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
1584 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
1585 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
1586 }
1587 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
1588 if (!AvailableXmms.empty()) {
1589 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1590 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
1591 for (MCPhysReg Reg : AvailableXmms) {
1592 // FastRegisterAllocator spills virtual registers at basic
1593 // block boundary. That leads to usages of xmm registers
1594 // outside of check for %al. Pass physical registers to
1595 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1596 TheMachineFunction.getRegInfo().addLiveIn(Reg);
1597 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
1598 }
1599 }
1600
1601 // Store the integer parameter registers.
1603 SDValue RSFIN =
1604 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
1605 TargLowering.getPointerTy(DAG.getDataLayout()));
1606 unsigned Offset = FuncInfo->getVarArgsGPOffset();
1607 for (SDValue Val : LiveGPRs) {
1608 SDValue FIN = DAG.getNode(ISD::ADD, DL,
1609 TargLowering.getPointerTy(DAG.getDataLayout()),
1610 RSFIN, DAG.getIntPtrConstant(Offset, DL));
1611 SDValue Store =
1612 DAG.getStore(Val.getValue(1), DL, Val, FIN,
1614 DAG.getMachineFunction(),
1615 FuncInfo->getRegSaveFrameIndex(), Offset));
1616 MemOps.push_back(Store);
1617 Offset += 8;
1618 }
1619
1620 // Now store the XMM (fp + vector) parameter registers.
1621 if (!LiveXMMRegs.empty()) {
1622 SmallVector<SDValue, 12> SaveXMMOps;
1623 SaveXMMOps.push_back(Chain);
1624 SaveXMMOps.push_back(ALVal);
1625 SaveXMMOps.push_back(RSFIN);
1626 SaveXMMOps.push_back(
1627 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
1628 llvm::append_range(SaveXMMOps, LiveXMMRegs);
1629 MachineMemOperand *StoreMMO =
1632 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
1633 Offset),
1636 DL, DAG.getVTList(MVT::Other),
1637 SaveXMMOps, MVT::i8, StoreMMO));
1638 }
1639
1640 if (!MemOps.empty())
1641 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1642 }
1643}
1644
1645void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1646 // Find the largest legal vector type.
1647 MVT VecVT = MVT::Other;
1648 // FIXME: Only some x86_32 calling conventions support AVX512.
1649 if (Subtarget.useAVX512Regs() &&
1650 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
1651 CallConv == CallingConv::Intel_OCL_BI)))
1652 VecVT = MVT::v16f32;
1653 else if (Subtarget.hasAVX())
1654 VecVT = MVT::v8f32;
1655 else if (Subtarget.hasSSE2())
1656 VecVT = MVT::v4f32;
1657
1658 // We forward some GPRs and some vector types.
1659 SmallVector<MVT, 2> RegParmTypes;
1660 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1661 RegParmTypes.push_back(IntVT);
1662 if (VecVT != MVT::Other)
1663 RegParmTypes.push_back(VecVT);
1664
1665 // Compute the set of forwarded registers. The rest are scratch.
1667 FuncInfo->getForwardedMustTailRegParms();
1668 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
1669
1670 // Forward AL for SysV x86_64 targets, since it is used for varargs.
1671 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
1672 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1673 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1674 }
1675
1676 // Copy all forwards from physical to virtual registers.
1677 for (ForwardedRegister &FR : Forwards) {
1678 // FIXME: Can we use a less constrained schedule?
1679 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
1680 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1681 TargLowering.getRegClassFor(FR.VT));
1682 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
1683 }
1684}
1685
1686void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1687 unsigned StackSize) {
1688 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1689 // If necessary, it would be set into the correct value later.
1690 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
1691 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1692
1693 if (FrameInfo.hasVAStart())
1694 createVarArgAreaAndStoreRegisters(Chain, StackSize);
1695
1696 if (FrameInfo.hasMustTailInVarArgFunc())
1697 forwardMustTailParameters(Chain);
1698}
1699
1700SDValue X86TargetLowering::LowerFormalArguments(
1701 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1702 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1703 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1706
1707 const Function &F = MF.getFunction();
1708 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1709 F.getName() == "main")
1710 FuncInfo->setForceFramePointer(true);
1711
1712 MachineFrameInfo &MFI = MF.getFrameInfo();
1713 bool Is64Bit = Subtarget.is64Bit();
1714 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
1715
1716 assert(
1717 !(IsVarArg && canGuaranteeTCO(CallConv)) &&
1718 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1719
1720 // Assign locations to all of the incoming arguments.
1722 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1723
1724 // Allocate shadow area for Win64.
1725 if (IsWin64)
1726 CCInfo.AllocateStack(32, Align(8));
1727
1728 CCInfo.AnalyzeArguments(Ins, CC_X86);
1729
1730 // In vectorcall calling convention a second pass is required for the HVA
1731 // types.
1732 if (CallingConv::X86_VectorCall == CallConv) {
1733 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
1734 }
1735
1736 // The next loop assumes that the locations are in the same order of the
1737 // input arguments.
1738 assert(isSortedByValueNo(ArgLocs) &&
1739 "Argument Location list must be sorted before lowering");
1740
1741 SDValue ArgValue;
1742 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
1743 ++I, ++InsIndex) {
1744 assert(InsIndex < Ins.size() && "Invalid Ins index");
1745 CCValAssign &VA = ArgLocs[I];
1746
1747 if (VA.isRegLoc()) {
1748 EVT RegVT = VA.getLocVT();
1749 if (VA.needsCustom()) {
1750 assert(
1751 VA.getValVT() == MVT::v64i1 &&
1752 "Currently the only custom case is when we split v64i1 to 2 regs");
1753
1754 // v64i1 values, in regcall calling convention, that are
1755 // compiled to 32 bit arch, are split up into two registers.
1756 ArgValue =
1757 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
1758 } else {
1759 const TargetRegisterClass *RC;
1760 if (RegVT == MVT::i8)
1761 RC = &X86::GR8RegClass;
1762 else if (RegVT == MVT::i16)
1763 RC = &X86::GR16RegClass;
1764 else if (RegVT == MVT::i32)
1765 RC = &X86::GR32RegClass;
1766 else if (Is64Bit && RegVT == MVT::i64)
1767 RC = &X86::GR64RegClass;
1768 else if (RegVT == MVT::f16)
1769 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1770 else if (RegVT == MVT::f32)
1771 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1772 else if (RegVT == MVT::f64)
1773 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1774 else if (RegVT == MVT::f80)
1775 RC = &X86::RFP80RegClass;
1776 else if (RegVT == MVT::f128)
1777 RC = &X86::VR128RegClass;
1778 else if (RegVT.is512BitVector())
1779 RC = &X86::VR512RegClass;
1780 else if (RegVT.is256BitVector())
1781 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1782 else if (RegVT.is128BitVector())
1783 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1784 else if (RegVT == MVT::x86mmx)
1785 RC = &X86::VR64RegClass;
1786 else if (RegVT == MVT::v1i1)
1787 RC = &X86::VK1RegClass;
1788 else if (RegVT == MVT::v8i1)
1789 RC = &X86::VK8RegClass;
1790 else if (RegVT == MVT::v16i1)
1791 RC = &X86::VK16RegClass;
1792 else if (RegVT == MVT::v32i1)
1793 RC = &X86::VK32RegClass;
1794 else if (RegVT == MVT::v64i1)
1795 RC = &X86::VK64RegClass;
1796 else
1797 llvm_unreachable("Unknown argument type!");
1798
1799 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1800 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1801 }
1802
1803 // If this is an 8 or 16-bit value, it is really passed promoted to 32
1804 // bits. Insert an assert[sz]ext to capture this, then truncate to the
1805 // right size.
1806 if (VA.getLocInfo() == CCValAssign::SExt)
1807 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1808 DAG.getValueType(VA.getValVT()));
1809 else if (VA.getLocInfo() == CCValAssign::ZExt)
1810 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1811 DAG.getValueType(VA.getValVT()));
1812 else if (VA.getLocInfo() == CCValAssign::BCvt)
1813 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
1814
1815 if (VA.isExtInLoc()) {
1816 // Handle MMX values passed in XMM regs.
1817 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1818 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
1819 else if (VA.getValVT().isVector() &&
1820 VA.getValVT().getScalarType() == MVT::i1 &&
1821 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1822 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1823 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1824 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
1825 } else
1826 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1827 }
1828 } else {
1829 assert(VA.isMemLoc());
1830 ArgValue =
1831 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
1832 }
1833
1834 // If value is passed via pointer - do a load.
1835 if (VA.getLocInfo() == CCValAssign::Indirect &&
1836 !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
1837 ArgValue =
1838 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
1839 }
1840
1841 InVals.push_back(ArgValue);
1842 }
1843
1844 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1845 if (Ins[I].Flags.isSwiftAsync()) {
1846 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1847 if (Subtarget.is64Bit())
1848 X86FI->setHasSwiftAsyncContext(true);
1849 else {
1850 int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
1851 X86FI->setSwiftAsyncContextFrameIdx(FI);
1852 SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
1853 DAG.getFrameIndex(FI, MVT::i32),
1855 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
1856 }
1857 }
1858
1859 // Swift calling convention does not require we copy the sret argument
1860 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1861 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
1862 continue;
1863
1864 // All x86 ABIs require that for returning structs by value we copy the
1865 // sret argument into %rax/%eax (depending on ABI) for the return. Save
1866 // the argument into a virtual register so that we can access it from the
1867 // return points.
1868 if (Ins[I].Flags.isSRet()) {
1869 assert(!FuncInfo->getSRetReturnReg() &&
1870 "SRet return has already been set");
1871 MVT PtrTy = getPointerTy(DAG.getDataLayout());
1872 Register Reg =
1874 FuncInfo->setSRetReturnReg(Reg);
1875 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
1876 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
1877 break;
1878 }
1879 }
1880
1881 unsigned StackSize = CCInfo.getStackSize();
1882 // Align stack specially for tail calls.
1883 if (shouldGuaranteeTCO(CallConv,
1885 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1886
1887 if (IsVarArg)
1888 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1889 .lowerVarArgsParameters(Chain, StackSize);
1890
1891 // Some CCs need callee pop.
1892 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
1894 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1895 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
1896 // X86 interrupts must pop the error code (and the alignment padding) if
1897 // present.
1898 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
1899 } else {
1900 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
1901 // If this is an sret function, the return should pop the hidden pointer.
1902 if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
1903 FuncInfo->setBytesToPopOnReturn(4);
1904 }
1905
1906 if (!Is64Bit) {
1907 // RegSaveFrameIndex is X86-64 only.
1908 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1909 }
1910
1911 FuncInfo->setArgumentStackSize(StackSize);
1912
1913 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1914 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
1915 if (Personality == EHPersonality::CoreCLR) {
1916 assert(Is64Bit);
1917 // TODO: Add a mechanism to frame lowering that will allow us to indicate
1918 // that we'd prefer this slot be allocated towards the bottom of the frame
1919 // (i.e. near the stack pointer after allocating the frame). Every
1920 // funclet needs a copy of this slot in its (mostly empty) frame, and the
1921 // offset from the bottom of this and each funclet's frame must be the
1922 // same, so the size of funclets' (mostly empty) frames is dictated by
1923 // how far this slot is from the bottom (since they allocate just enough
1924 // space to accommodate holding this slot at the correct offset).
1925 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
1926 EHInfo->PSPSymFrameIdx = PSPSymFI;
1927 }
1928 }
1929
1930 if (shouldDisableArgRegFromCSR(CallConv) ||
1931 F.hasFnAttribute("no_caller_saved_registers")) {
1933 for (std::pair<Register, Register> Pair : MRI.liveins())
1934 MRI.disableCalleeSavedRegister(Pair.first);
1935 }
1936
1937 return Chain;
1938}
1939
1940SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1941 SDValue Arg, const SDLoc &dl,
1942 SelectionDAG &DAG,
1943 const CCValAssign &VA,
1944 ISD::ArgFlagsTy Flags,
1945 bool isByVal) const {
1946 unsigned LocMemOffset = VA.getLocMemOffset();
1947 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1948 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1949 StackPtr, PtrOff);
1950 if (isByVal)
1951 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1952
1953 MaybeAlign Alignment;
1954 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1955 Arg.getSimpleValueType() != MVT::f80)
1956 Alignment = MaybeAlign(4);
1957 return DAG.getStore(
1958 Chain, dl, Arg, PtrOff,
1960 Alignment);
1961}
1962
1963/// Emit a load of return address if tail call
1964/// optimization is performed and it is required.
1965SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1966 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1967 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1968 // Adjust the Return address stack slot.
1969 EVT VT = getPointerTy(DAG.getDataLayout());
1970 OutRetAddr = getReturnAddressFrameIndex(DAG);
1971
1972 // Load the "old" Return address.
1973 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
1974 return SDValue(OutRetAddr.getNode(), 1);
1975}
1976
1977/// Emit a store of the return address if tail call
1978/// optimization is performed and it is required (FPDiff!=0).
1980 SDValue Chain, SDValue RetAddrFrIdx,
1981 EVT PtrVT, unsigned SlotSize,
1982 int FPDiff, const SDLoc &dl) {
1983 // Store the return address to the appropriate stack slot.
1984 if (!FPDiff) return Chain;
1985 // Calculate the new stack slot for the return address.
1986 int NewReturnAddrFI =
1987 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
1988 false);
1989 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
1990 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
1992 DAG.getMachineFunction(), NewReturnAddrFI));
1993 return Chain;
1994}
1995
1996/// Returns a vector_shuffle mask for an movs{s|d}, movd
1997/// operation of specified width.
1998SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
1999 SDValue V1, SDValue V2) const {
2000 unsigned NumElems = VT.getVectorNumElements();
2002 Mask.push_back(NumElems);
2003 for (unsigned i = 1; i != NumElems; ++i)
2004 Mask.push_back(i);
2005 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
2006}
2007
2008SDValue
2009X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2010 SmallVectorImpl<SDValue> &InVals) const {
2011 SelectionDAG &DAG = CLI.DAG;
2012 SDLoc &dl = CLI.DL;
2014 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2016 SDValue Chain = CLI.Chain;
2017 SDValue Callee = CLI.Callee;
2018 CallingConv::ID CallConv = CLI.CallConv;
2019 bool &isTailCall = CLI.IsTailCall;
2020 bool isVarArg = CLI.IsVarArg;
2021 const auto *CB = CLI.CB;
2022
2024 bool Is64Bit = Subtarget.is64Bit();
2025 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2026 bool IsSibcall = false;
2027 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
2028 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
2029 bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
2031 bool HasNCSR = (CB && isa<CallInst>(CB) &&
2032 CB->hasFnAttr("no_caller_saved_registers"));
2033 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
2034 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
2035 bool IsCFICall = IsIndirectCall && CLI.CFIType;
2036 const Module *M = MF.getMMI().getModule();
2037 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
2038
2040 if (CallConv == CallingConv::X86_INTR)
2041 report_fatal_error("X86 interrupts may not be called directly");
2042
2043 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2044 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
2045 // If we are using a GOT, disable tail calls to external symbols with
2046 // default visibility. Tail calling such a symbol requires using a GOT
2047 // relocation, which forces early binding of the symbol. This breaks code
2048 // that require lazy function symbol resolution. Using musttail or
2049 // GuaranteedTailCallOpt will override this.
2050 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2051 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
2052 G->getGlobal()->hasDefaultVisibility()))
2053 isTailCall = false;
2054 }
2055
2056 if (isTailCall && !IsMustTail) {
2057 // Check if it's really possible to do a tail call.
2058 isTailCall = IsEligibleForTailCallOptimization(
2059 Callee, CallConv, IsCalleePopSRet, isVarArg, CLI.RetTy, Outs, OutVals,
2060 Ins, DAG);
2061
2062 // Sibcalls are automatically detected tailcalls which do not require
2063 // ABI changes.
2064 if (!IsGuaranteeTCO && isTailCall)
2065 IsSibcall = true;
2066
2067 if (isTailCall)
2068 ++NumTailCalls;
2069 }
2070
2071 if (IsMustTail && !isTailCall)
2072 report_fatal_error("failed to perform tail call elimination on a call "
2073 "site marked musttail");
2074
2075 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2076 "Var args not supported with calling convention fastcc, ghc or hipe");
2077
2078 // Analyze operands of the call, assigning locations to each operand.
2080 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2081
2082 // Allocate shadow area for Win64.
2083 if (IsWin64)
2084 CCInfo.AllocateStack(32, Align(8));
2085
2086 CCInfo.AnalyzeArguments(Outs, CC_X86);
2087
2088 // In vectorcall calling convention a second pass is required for the HVA
2089 // types.
2090 if (CallingConv::X86_VectorCall == CallConv) {
2091 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
2092 }
2093
2094 // Get a count of how many bytes are to be pushed on the stack.
2095 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2096 if (IsSibcall)
2097 // This is a sibcall. The memory operands are available in caller's
2098 // own caller's stack.
2099 NumBytes = 0;
2100 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
2101 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2102
2103 int FPDiff = 0;
2104 if (isTailCall &&
2105 shouldGuaranteeTCO(CallConv,
2107 // Lower arguments at fp - stackoffset + fpdiff.
2108 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2109
2110 FPDiff = NumBytesCallerPushed - NumBytes;
2111
2112 // Set the delta of movement of the returnaddr stackslot.
2113 // But only set if delta is greater than previous delta.
2114 if (FPDiff < X86Info->getTCReturnAddrDelta())
2115 X86Info->setTCReturnAddrDelta(FPDiff);
2116 }
2117
2118 unsigned NumBytesToPush = NumBytes;
2119 unsigned NumBytesToPop = NumBytes;
2120
2121 // If we have an inalloca argument, all stack space has already been allocated
2122 // for us and be right at the top of the stack. We don't support multiple
2123 // arguments passed in memory when using inalloca.
2124 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2125 NumBytesToPush = 0;
2126 if (!ArgLocs.back().isMemLoc())
2127 report_fatal_error("cannot use inalloca attribute on a register "
2128 "parameter");
2129 if (ArgLocs.back().getLocMemOffset() != 0)
2130 report_fatal_error("any parameter with the inalloca attribute must be "
2131 "the only memory argument");
2132 } else if (CLI.IsPreallocated) {
2133 assert(ArgLocs.back().isMemLoc() &&
2134 "cannot use preallocated attribute on a register "
2135 "parameter");
2136 SmallVector<size_t, 4> PreallocatedOffsets;
2137 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
2138 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
2139 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
2140 }
2141 }
2143 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
2144 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
2145 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
2146 NumBytesToPush = 0;
2147 }
2148
2149 if (!IsSibcall && !IsMustTail)
2150 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
2151 NumBytes - NumBytesToPush, dl);
2152
2153 SDValue RetAddrFrIdx;
2154 // Load return address for tail calls.
2155 if (isTailCall && FPDiff)
2156 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2157 Is64Bit, FPDiff, dl);
2158
2160 SmallVector<SDValue, 8> MemOpChains;
2162
2163 // The next loop assumes that the locations are in the same order of the
2164 // input arguments.
2165 assert(isSortedByValueNo(ArgLocs) &&
2166 "Argument Location list must be sorted before lowering");
2167
2168 // Walk the register/memloc assignments, inserting copies/loads. In the case
2169 // of tail call optimization arguments are handle later.
2170 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2171 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
2172 ++I, ++OutIndex) {
2173 assert(OutIndex < Outs.size() && "Invalid Out index");
2174 // Skip inalloca/preallocated arguments, they have already been written.
2175 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
2176 if (Flags.isInAlloca() || Flags.isPreallocated())
2177 continue;
2178
2179 CCValAssign &VA = ArgLocs[I];
2180 EVT RegVT = VA.getLocVT();
2181 SDValue Arg = OutVals[OutIndex];
2182 bool isByVal = Flags.isByVal();
2183
2184 // Promote the value if needed.
2185 switch (VA.getLocInfo()) {
2186 default: llvm_unreachable("Unknown loc info!");
2187 case CCValAssign::Full: break;
2188 case CCValAssign::SExt:
2189 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2190 break;
2191 case CCValAssign::ZExt:
2192 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2193 break;
2194 case CCValAssign::AExt:
2195 if (Arg.getValueType().isVector() &&
2196 Arg.getValueType().getVectorElementType() == MVT::i1)
2197 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
2198 else if (RegVT.is128BitVector()) {
2199 // Special case: passing MMX values in XMM registers.
2200 Arg = DAG.getBitcast(MVT::i64, Arg);
2201 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2202 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2203 } else
2204 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2205 break;
2206 case CCValAssign::BCvt:
2207 Arg = DAG.getBitcast(RegVT, Arg);
2208 break;
2209 case CCValAssign::Indirect: {
2210 if (isByVal) {
2211 // Memcpy the argument to a temporary stack slot to prevent
2212 // the caller from seeing any modifications the callee may make
2213 // as guaranteed by the `byval` attribute.
2214 int FrameIdx = MF.getFrameInfo().CreateStackObject(
2215 Flags.getByValSize(),
2216 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
2217 SDValue StackSlot =
2218 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
2219 Chain =
2220 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
2221 // From now on treat this as a regular pointer
2222 Arg = StackSlot;
2223 isByVal = false;
2224 } else {
2225 // Store the argument.
2226 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2227 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2228 Chain = DAG.getStore(
2229 Chain, dl, Arg, SpillSlot,
2231 Arg = SpillSlot;
2232 }
2233 break;
2234 }
2235 }
2236
2237 if (VA.needsCustom()) {
2238 assert(VA.getValVT() == MVT::v64i1 &&
2239 "Currently the only custom case is when we split v64i1 to 2 regs");
2240 // Split v64i1 value into two registers
2241 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
2242 } else if (VA.isRegLoc()) {
2243 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2244 const TargetOptions &Options = DAG.getTarget().Options;
2245 if (Options.EmitCallSiteInfo)
2246 CSInfo.emplace_back(VA.getLocReg(), I);
2247 if (isVarArg && IsWin64) {
2248 // Win64 ABI requires argument XMM reg to be copied to the corresponding
2249 // shadow reg if callee is a varargs function.
2250 Register ShadowReg;
2251 switch (VA.getLocReg()) {
2252 case X86::XMM0: ShadowReg = X86::RCX; break;
2253 case X86::XMM1: ShadowReg = X86::RDX; break;
2254 case X86::XMM2: ShadowReg = X86::R8; break;
2255 case X86::XMM3: ShadowReg = X86::R9; break;
2256 }
2257 if (ShadowReg)
2258 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2259 }
2260 } else if (!IsSibcall && (!isTailCall || isByVal)) {
2261 assert(VA.isMemLoc());
2262 if (!StackPtr.getNode())
2263 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2265 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2266 dl, DAG, VA, Flags, isByVal));
2267 }
2268 }
2269
2270 if (!MemOpChains.empty())
2271 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2272
2273 if (Subtarget.isPICStyleGOT()) {
2274 // ELF / PIC requires GOT in the EBX register before function calls via PLT
2275 // GOT pointer (except regcall).
2276 if (!isTailCall) {
2277 // Indirect call with RegCall calling convertion may use up all the
2278 // general registers, so it is not suitable to bind EBX reister for
2279 // GOT address, just let register allocator handle it.
2280 if (CallConv != CallingConv::X86_RegCall)
2281 RegsToPass.push_back(std::make_pair(
2282 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2283 getPointerTy(DAG.getDataLayout()))));
2284 } else {
2285 // If we are tail calling and generating PIC/GOT style code load the
2286 // address of the callee into ECX. The value in ecx is used as target of
2287 // the tail jump. This is done to circumvent the ebx/callee-saved problem
2288 // for tail calls on PIC/GOT architectures. Normally we would just put the
2289 // address of GOT into ebx and then call target@PLT. But for tail calls
2290 // ebx would be restored (since ebx is callee saved) before jumping to the
2291 // target@PLT.
2292
2293 // Note: The actual moving to ECX is done further down.
2294 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2295 if (G && !G->getGlobal()->hasLocalLinkage() &&
2296 G->getGlobal()->hasDefaultVisibility())
2297 Callee = LowerGlobalAddress(Callee, DAG);
2298 else if (isa<ExternalSymbolSDNode>(Callee))
2299 Callee = LowerExternalSymbol(Callee, DAG);
2300 }
2301 }
2302
2303 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2304 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
2305 // From AMD64 ABI document:
2306 // For calls that may call functions that use varargs or stdargs
2307 // (prototype-less calls or calls to functions containing ellipsis (...) in
2308 // the declaration) %al is used as hidden argument to specify the number
2309 // of SSE registers used. The contents of %al do not need to match exactly
2310 // the number of registers, but must be an ubound on the number of SSE
2311 // registers used and is in the range 0 - 8 inclusive.
2312
2313 // Count the number of XMM registers allocated.
2314 static const MCPhysReg XMMArgRegs[] = {
2315 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2316 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2317 };
2318 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
2319 assert((Subtarget.hasSSE1() || !NumXMMRegs)
2320 && "SSE registers cannot be used when SSE is disabled");
2321 RegsToPass.push_back(std::make_pair(Register(X86::AL),
2322 DAG.getConstant(NumXMMRegs, dl,
2323 MVT::i8)));
2324 }
2325
2326 if (isVarArg && IsMustTail) {
2327 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2328 for (const auto &F : Forwards) {
2329 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2330 RegsToPass.push_back(std::make_pair(F.PReg, Val));
2331 }
2332 }
2333
2334 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2335 // don't need this because the eligibility check rejects calls that require
2336 // shuffling arguments passed in memory.
2337 if (!IsSibcall && isTailCall) {
2338 // Force all the incoming stack arguments to be loaded from the stack
2339 // before any new outgoing arguments are stored to the stack, because the
2340 // outgoing stack slots may alias the incoming argument stack slots, and
2341 // the alias isn't otherwise explicit. This is slightly more conservative
2342 // than necessary, because it means that each store effectively depends
2343 // on every argument instead of just those arguments it would clobber.
2344 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
2345
2346 SmallVector<SDValue, 8> MemOpChains2;
2347 SDValue FIN;
2348 int FI = 0;
2349 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
2350 ++I, ++OutsIndex) {
2351 CCValAssign &VA = ArgLocs[I];
2352
2353 if (VA.isRegLoc()) {
2354 if (VA.needsCustom()) {
2355 assert((CallConv == CallingConv::X86_RegCall) &&
2356 "Expecting custom case only in regcall calling convention");
2357 // This means that we are in special case where one argument was
2358 // passed through two register locations - Skip the next location
2359 ++I;
2360 }
2361
2362 continue;
2363 }
2364
2365 assert(VA.isMemLoc());
2366 SDValue Arg = OutVals[OutsIndex];
2367 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
2368 // Skip inalloca/preallocated arguments. They don't require any work.
2369 if (Flags.isInAlloca() || Flags.isPreallocated())
2370 continue;
2371 // Create frame index.
2372 int32_t Offset = VA.getLocMemOffset()+FPDiff;
2373 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
2374 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
2375 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2376
2377 if (Flags.isByVal()) {
2378 // Copy relative to framepointer.
2380 if (!StackPtr.getNode())
2381 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2384 StackPtr, Source);
2385
2386 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
2387 ArgChain,
2388 Flags, DAG, dl));
2389 } else {
2390 // Store relative to framepointer.
2391 MemOpChains2.push_back(DAG.getStore(
2392 ArgChain, dl, Arg, FIN,
2394 }
2395 }
2396
2397 if (!MemOpChains2.empty())
2398 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
2399
2400 // Store the return address to the appropriate stack slot.
2401 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2403 RegInfo->getSlotSize(), FPDiff, dl);
2404 }
2405
2406 // Build a sequence of copy-to-reg nodes chained together with token chain
2407 // and glue operands which copy the outgoing args into registers.
2408 SDValue InGlue;
2409 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2410 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2411 RegsToPass[i].second, InGlue);
2412 InGlue = Chain.getValue(1);
2413 }
2414
2415 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2416 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2417 // In the 64-bit large code model, we have to make all calls
2418 // through a register, since the call instruction's 32-bit
2419 // pc-relative offset may not be large enough to hold the whole
2420 // address.
2421 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
2422 Callee->getOpcode() == ISD::ExternalSymbol) {
2423 // Lower direct calls to global addresses and external symbols. Setting
2424 // ForCall to true here has the effect of removing WrapperRIP when possible
2425 // to allow direct calls to be selected without first materializing the
2426 // address into a register.
2427 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
2428 } else if (Subtarget.isTarget64BitILP32() &&
2429 Callee.getValueType() == MVT::i32) {
2430 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2431 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
2432 }
2433
2434 // Returns a chain & a glue for retval copy to use.
2435 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2437
2438 if (!IsSibcall && isTailCall && !IsMustTail) {
2439 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
2440 InGlue = Chain.getValue(1);
2441 }
2442
2443 Ops.push_back(Chain);
2444 Ops.push_back(Callee);
2445
2446 if (isTailCall)
2447 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
2448
2449 // Add argument registers to the end of the list so that they are known live
2450 // into the call.
2451 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2452 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2453 RegsToPass[i].second.getValueType()));
2454
2455 // Add a register mask operand representing the call-preserved registers.
2456 const uint32_t *Mask = [&]() {
2457 auto AdaptedCC = CallConv;
2458 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2459 // use X86_INTR calling convention because it has the same CSR mask
2460 // (same preserved registers).
2461 if (HasNCSR)
2463 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
2464 // to use the CSR_NoRegs_RegMask.
2465 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
2466 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2467 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2468 }();
2469 assert(Mask && "Missing call preserved mask for calling convention");
2470
2471 // If this is an invoke in a 32-bit function using a funclet-based
2472 // personality, assume the function clobbers all registers. If an exception
2473 // is thrown, the runtime will not restore CSRs.
2474 // FIXME: Model this more precisely so that we can register allocate across
2475 // the normal edge and spill and fill across the exceptional edge.
2476 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
2477 const Function &CallerFn = MF.getFunction();
2478 EHPersonality Pers =
2479 CallerFn.hasPersonalityFn()
2482 if (isFuncletEHPersonality(Pers))
2483 Mask = RegInfo->getNoPreservedMask();
2484 }
2485
2486 // Define a new register mask from the existing mask.
2487 uint32_t *RegMask = nullptr;
2488
2489 // In some calling conventions we need to remove the used physical registers
2490 // from the reg mask. Create a new RegMask for such calling conventions.
2491 // RegMask for calling conventions that disable only return registers (e.g.
2492 // preserve_most) will be modified later in LowerCallResult.
2493 bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
2494 if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
2495 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2496
2497 // Allocate a new Reg Mask and copy Mask.
2498 RegMask = MF.allocateRegMask();
2499 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
2500 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
2501
2502 // Make sure all sub registers of the argument registers are reset
2503 // in the RegMask.
2504 if (ShouldDisableArgRegs) {
2505 for (auto const &RegPair : RegsToPass)
2506 for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
2507 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
2508 }
2509
2510 // Create the RegMask Operand according to our updated mask.
2512 } else {
2513 // Create the RegMask Operand according to the static mask.
2514 Ops.push_back(DAG.getRegisterMask(Mask));
2515 }
2516
2517 if (InGlue.getNode())
2518 Ops.push_back(InGlue);
2519
2520 if (isTailCall) {
2521 // We used to do:
2522 //// If this is the first return lowered for this function, add the regs
2523 //// to the liveout set for the function.
2524 // This isn't right, although it's probably harmless on x86; liveouts
2525 // should be computed from returns not tail calls. Consider a void
2526 // function making a tail call to a function returning int.
2528 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
2529
2530 if (IsCFICall)
2531 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2532
2533 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2534 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2535 return Ret;
2536 }
2537
2538 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
2539 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
2540 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
2541 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
2542 // expanded to the call, directly followed by a special marker sequence and
2543 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2544 assert(!isTailCall &&
2545 "tail calls cannot be marked with clang.arc.attachedcall");
2546 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2547
2548 // Add a target global address for the retainRV/claimRV runtime function
2549 // just before the call target.
2551 auto PtrVT = getPointerTy(DAG.getDataLayout());
2552 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
2553 Ops.insert(Ops.begin() + 1, GA);
2554 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
2555 } else {
2556 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
2557 }
2558
2559 if (IsCFICall)
2560 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2561
2562 InGlue = Chain.getValue(1);
2563 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2564 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2565
2566 // Save heapallocsite metadata.
2567 if (CLI.CB)
2568 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
2569 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
2570
2571 // Create the CALLSEQ_END node.
2572 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
2573 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2575 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
2576 else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
2577 // If this call passes a struct-return pointer, the callee
2578 // pops that struct pointer.
2579 NumBytesForCalleeToPop = 4;
2580
2581 // Returns a glue for retval copy to use.
2582 if (!IsSibcall) {
2583 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
2584 InGlue, dl);
2585 InGlue = Chain.getValue(1);
2586 }
2587
2588 // Handle result values, copying them out of physregs into vregs that we
2589 // return.
2590 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2591 InVals, RegMask);
2592}
2593
2594//===----------------------------------------------------------------------===//
2595// Fast Calling Convention (tail call) implementation
2596//===----------------------------------------------------------------------===//
2597
2598// Like std call, callee cleans arguments, convention except that ECX is
2599// reserved for storing the tail called function address. Only 2 registers are
2600// free for argument passing (inreg). Tail call optimization is performed
2601// provided:
2602// * tailcallopt is enabled
2603// * caller/callee are fastcc
2604// On X86_64 architecture with GOT-style position independent code only local
2605// (within module) calls are supported at the moment.
2606// To keep the stack aligned according to platform abi the function
2607// GetAlignedArgumentStackSize ensures that argument delta is always multiples
2608// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2609// If a tail called function callee has more arguments than the caller the
2610// caller needs to make sure that there is room to move the RETADDR to. This is
2611// achieved by reserving an area the size of the argument delta right after the
2612// original RETADDR, but before the saved framepointer or the spilled registers
2613// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2614// stack layout:
2615// arg1
2616// arg2
2617// RETADDR
2618// [ new RETADDR
2619// move area ]
2620// (possible EBP)
2621// ESI
2622// EDI
2623// local1 ..
2624
2625/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2626/// requirement.
2627unsigned
2628X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2629 SelectionDAG &DAG) const {
2630 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2631 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2632 assert(StackSize % SlotSize == 0 &&
2633 "StackSize must be a multiple of SlotSize");
2634 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
2635}
2636
2637/// Return true if the given stack call argument is already available in the
2638/// same position (relatively) of the caller's incoming argument stack.
2639static
2642 const X86InstrInfo *TII, const CCValAssign &VA) {
2643 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2644
2645 for (;;) {
2646 // Look through nodes that don't alter the bits of the incoming value.
2647 unsigned Op = Arg.getOpcode();
2648 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2649 Op == ISD::AssertZext) {
2650 Arg = Arg.getOperand(0);
2651 continue;
2652 }
2653 if (Op == ISD::TRUNCATE) {
2654 const SDValue &TruncInput = Arg.getOperand(0);
2655 if (TruncInput.getOpcode() == ISD::AssertZext &&
2656 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
2657 Arg.getValueType()) {
2658 Arg = TruncInput.getOperand(0);
2659 continue;
2660 }
2661 }
2662 break;
2663 }
2664
2665 int FI = INT_MAX;
2666 if (Arg.getOpcode() == ISD::CopyFromReg) {
2667 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2668 if (!VR.isVirtual())
2669 return false;
2670 MachineInstr *Def = MRI->getVRegDef(VR);
2671 if (!Def)
2672 return false;
2673 if (!Flags.isByVal()) {
2674 if (!TII->isLoadFromStackSlot(*Def, FI))
2675 return false;
2676 } else {
2677 unsigned Opcode = Def->getOpcode();
2678 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
2679 Opcode == X86::LEA64_32r) &&
2680 Def->getOperand(1).isFI()) {
2681 FI = Def->getOperand(1).getIndex();
2682 Bytes = Flags.getByValSize();
2683 } else
2684 return false;
2685 }
2686 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2687 if (Flags.isByVal())
2688 // ByVal argument is passed in as a pointer but it's now being
2689 // dereferenced. e.g.
2690 // define @foo(%struct.X* %A) {
2691 // tail call @bar(%struct.X* byval %A)
2692 // }
2693 return false;
2694 SDValue Ptr = Ld->getBasePtr();
2695 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2696 if (!FINode)
2697 return false;
2698 FI = FINode->getIndex();
2699 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2700 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
2701 FI = FINode->getIndex();
2702 Bytes = Flags.getByValSize();
2703 } else
2704 return false;
2705
2706 assert(FI != INT_MAX);
2707 if (!MFI.isFixedObjectIndex(FI))
2708 return false;
2709
2710 if (Offset != MFI.getObjectOffset(FI))
2711 return false;
2712
2713 // If this is not byval, check that the argument stack object is immutable.
2714 // inalloca and argument copy elision can create mutable argument stack
2715 // objects. Byval objects can be mutated, but a byval call intends to pass the
2716 // mutated memory.
2717 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
2718 return false;
2719
2720 if (VA.getLocVT().getFixedSizeInBits() >
2722 // If the argument location is wider than the argument type, check that any
2723 // extension flags match.
2724 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
2725 Flags.isSExt() != MFI.isObjectSExt(FI)) {
2726 return false;
2727 }
2728 }
2729
2730 return Bytes == MFI.getObjectSize(FI);
2731}
2732
2733/// Check whether the call is eligible for tail call optimization. Targets
2734/// that want to do tail call optimization should implement this function.
2735bool X86TargetLowering::IsEligibleForTailCallOptimization(
2736 SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleePopSRet,
2737 bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
2738 const SmallVectorImpl<SDValue> &OutVals,
2739 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
2740 if (!mayTailCallThisCC(CalleeCC))
2741 return false;
2742
2743 // If -tailcallopt is specified, make fastcc functions tail-callable.
2745 const Function &CallerF = MF.getFunction();
2746
2747 // If the function return type is x86_fp80 and the callee return type is not,
2748 // then the FP_EXTEND of the call result is not a nop. It's not safe to
2749 // perform a tailcall optimization here.
2750 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
2751 return false;
2752
2753 CallingConv::ID CallerCC = CallerF.getCallingConv();
2754 bool CCMatch = CallerCC == CalleeCC;
2755 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
2756 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
2757 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
2758 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
2759
2760 // Win64 functions have extra shadow space for argument homing. Don't do the
2761 // sibcall if the caller and callee have mismatched expectations for this
2762 // space.
2763 if (IsCalleeWin64 != IsCallerWin64)
2764 return false;
2765
2766 if (IsGuaranteeTCO) {
2767 if (canGuaranteeTCO(CalleeCC) && CCMatch)
2768 return true;
2769 return false;
2770 }
2771
2772 // Look for obvious safe cases to perform tail call optimization that do not
2773 // require ABI changes. This is what gcc calls sibcall.
2774
2775 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
2776 // emit a special epilogue.
2777 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2778 if (RegInfo->hasStackRealignment(MF))
2779 return false;
2780
2781 // Also avoid sibcall optimization if we're an sret return fn and the callee
2782 // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
2783 // insufficient.
2785 // For a compatible tail call the callee must return our sret pointer. So it
2786 // needs to be (a) an sret function itself and (b) we pass our sret as its
2787 // sret. Condition #b is harder to determine.
2788 return false;
2789 } else if (IsCalleePopSRet)
2790 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
2791 // expect that.
2792 return false;
2793
2794 // Do not sibcall optimize vararg calls unless all arguments are passed via
2795 // registers.
2796 LLVMContext &C = *DAG.getContext();
2797 if (isVarArg && !Outs.empty()) {
2798 // Optimizing for varargs on Win64 is unlikely to be safe without
2799 // additional testing.
2800 if (IsCalleeWin64 || IsCallerWin64)
2801 return false;
2802
2804 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2805 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
2806 for (const auto &VA : ArgLocs)
2807 if (!VA.isRegLoc())
2808 return false;
2809 }
2810
2811 // If the call result is in ST0 / ST1, it needs to be popped off the x87
2812 // stack. Therefore, if it's not used by the call it is not safe to optimize
2813 // this into a sibcall.
2814 bool Unused = false;
2815 for (const auto &In : Ins) {
2816 if (!In.Used) {
2817 Unused = true;
2818 break;
2819 }
2820 }
2821 if (Unused) {
2823 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
2824 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2825 for (const auto &VA : RVLocs) {
2826 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
2827 return false;
2828 }
2829 }
2830
2831 // Check that the call results are passed in the same way.
2832 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2834 return false;
2835 // The callee has to preserve all registers the caller needs to preserve.
2836 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2837 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2838 if (!CCMatch) {
2839 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2840 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2841 return false;
2842 }
2843
2844 unsigned StackArgsSize = 0;
2845
2846 // If the callee takes no arguments then go on to check the results of the
2847 // call.
2848 if (!Outs.empty()) {
2849 // Check if stack adjustment is needed. For now, do not do this if any
2850 // argument is passed on the stack.
2852 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2853
2854 // Allocate shadow area for Win64
2855 if (IsCalleeWin64)
2856 CCInfo.AllocateStack(32, Align(8));
2857
2858 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
2859 StackArgsSize = CCInfo.getStackSize();
2860
2861 if (CCInfo.getStackSize()) {
2862 // Check if the arguments are already laid out in the right way as
2863 // the caller's fixed stack objects.
2864 MachineFrameInfo &MFI = MF.getFrameInfo();
2865 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2866 const X86InstrInfo *TII = Subtarget.getInstrInfo();
2867 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2868 const CCValAssign &VA = ArgLocs[I];
2869 SDValue Arg = OutVals[I];
2870 ISD::ArgFlagsTy Flags = Outs[I].Flags;
2872 return false;
2873 if (!VA.isRegLoc()) {
2874 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
2875 TII, VA))
2876 return false;
2877 }
2878 }
2879 }
2880
2881 bool PositionIndependent = isPositionIndependent();
2882 // If the tailcall address may be in a register, then make sure it's
2883 // possible to register allocate for it. In 32-bit, the call address can
2884 // only target EAX, EDX, or ECX since the tail call must be scheduled after
2885 // callee-saved registers are restored. These happen to be the same
2886 // registers used to pass 'inreg' arguments so watch out for those.
2887 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
2888 !isa<ExternalSymbolSDNode>(Callee)) ||
2889 PositionIndependent)) {
2890 unsigned NumInRegs = 0;
2891 // In PIC we need an extra register to formulate the address computation
2892 // for the callee.
2893 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
2894
2895 for (const auto &VA : ArgLocs) {
2896 if (!VA.isRegLoc())
2897 continue;
2898 Register Reg = VA.getLocReg();
2899 switch (Reg) {
2900 default: break;
2901 case X86::EAX: case X86::EDX: case X86::ECX:
2902 if (++NumInRegs == MaxInRegs)
2903 return false;
2904 break;
2905 }
2906 }
2907 }
2908
2909 const MachineRegisterInfo &MRI = MF.getRegInfo();
2910 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2911 return false;
2912 }
2913
2914 bool CalleeWillPop =
2915 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
2917
2918 if (unsigned BytesToPop =
2920 // If we have bytes to pop, the callee must pop them.
2921 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
2922 if (!CalleePopMatches)
2923 return false;
2924 } else if (CalleeWillPop && StackArgsSize > 0) {
2925 // If we don't have bytes to pop, make sure the callee doesn't pop any.
2926 return false;
2927 }
2928
2929 return true;
2930}
2931
2932/// Determines whether the callee is required to pop its own arguments.
2933/// Callee pop is necessary to support tail calls.
2935 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
2936 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
2937 // can guarantee TCO.
2938 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
2939 return true;
2940
2941 switch (CallingConv) {
2942 default:
2943 return false;
2948 return !is64Bit;
2949 }
2950}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII)
MatchingStackOffset - Return true if the given stack call argument is already available in the same p...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
return RetTy
uint64_t Addr
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:236
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
static LVOptions Options
Definition: LVOptions.cpp:25
const MCPhysReg ArgGPRs[]
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt)
Return true if the function is being made into a tailcall target by changing its ABI.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
LLVMContext & Context
This file defines ARC utility functions which are used by various parts of the compiler.
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
Lowers masks values (v*i1) to the local register values.
static void Passv64i1ArgInRegs(const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg, SmallVectorImpl< std::pair< Register, SDValue > > &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, const X86Subtarget &Subtarget)
Breaks v64i1 value into two registers and adds the new node to the DAG.
static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget, SDValue *InGlue=nullptr)
Reads two 32 bit registers and creates a 64 bit mask value.
static ArrayRef< MCPhysReg > get64BitArgumentXMMs(MachineFunction &MF, CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static bool isSortedByValueNo(ArrayRef< CCValAssign > ArgLocs)
static ArrayRef< MCPhysReg > get64BitArgumentGPRs(CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static std::pair< MVT, unsigned > handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, const X86Subtarget &Subtarget)
static bool shouldDisableRetRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, const char *Msg)
Call this when the user attempts to do something unsupported, like returning a double without SSE2 en...
static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, unsigned SlotSize, int FPDiff, const SDLoc &dl)
Emit a store of the return address if tail call optimization is performed and it is required (FPDiff!...
static bool hasCalleePopSRet(const SmallVectorImpl< T > &Args, const X86Subtarget &Subtarget)
Determines whether Args, either a set of outgoing arguments to a call, or a set of incoming args of a...
static bool shouldDisableArgRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static bool hasStackGuardSlotTLS(const Triple &TargetTriple)
static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
The function will lower a register of various sizes (8/16/32/64) to a mask value of the expected size...
static Constant * SegmentOffset(IRBuilderBase &IRB, int Offset, unsigned AddressSpace)
static bool isBitAligned(Align Alignment, uint64_t SizeInBits)
static constexpr uint32_t RegMask
Definition: aarch32.h:221
static constexpr uint32_t Opcode
Definition: aarch32.h:200
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
CCState - This class holds information needed while lowering arguments and return values.
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
void convertToReg(unsigned RegNo)
bool isExtInLoc() const
int64_t getLocMemOffset() const
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1507
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2042
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:145
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Diagnostic information for unsupported feature in backend.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:262
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:846
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1867
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:205
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:666
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
void setDSOLocal(bool Local)
Definition: GlobalValue.h:299
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:48
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
LLVMContext & getContext() const
Definition: IRBuilder.h:176
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:563
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:346
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition: MCContext.h:76
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:389
Metadata node.
Definition: Metadata.h:1037
Machine Value Type.
SimpleValueType SimpleTy
@ INVALID_SIMPLE_VALUE_TYPE
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setObjectZExt(int ObjectIdx, bool IsZExt)
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setObjectSExt(int ObjectIdx, bool IsSExt)
bool isImmutableObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to an immutable object.
void setHasTailCall(bool V=true)
bool isObjectZExt(int ObjectIdx) const
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isObjectSExt(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
uint32_t * allocateRegMask()
Allocate and initialize a register mask with NumRegister bits.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Representation of each machine instruction.
Definition: MachineInstr.h:68
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
@ EK_LabelDifference64
EK_LabelDifference64 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOStore
The memory access writes data.
const Module * getModule() const
static unsigned getRegMaskSize(unsigned NumRegs)
Returns number of elements needed for a regmask array.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void disableCalleeSavedRegister(MCRegister Reg)
Disables the register from the list of CSRs.
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
Represents one node in the SelectionDAG.
void setCFIType(uint32_t Type)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:720
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
void addCallSiteInfo(const SDNode *Node, CallSiteInfoImpl &&CallInfo)
Set CallSiteInfo to be associated with Node.
void addHeapAllocSite(const SDNode *Node, MDNode *MD)
Set HeapAllocSite to be associated with Node.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:771
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:674
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:797
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
SDValue getRegisterMask(const uint32_t *RegMask)
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:809
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
Class to represent struct types.
Definition: DerivedTypes.h:216
Information about stack frame layout on the target.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const
Returns the target-specific address of the unsafe stack pointer.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool isPositionIndependent() const
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isAndroidVersionLT(unsigned Major) const
Definition: Triple.h:730
bool isAndroid() const
Tests whether the target is Android.
Definition: Triple.h:728
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:626
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, or DriverKit).
Definition: Triple.h:517
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition: Triple.h:662
bool isOSFuchsia() const
Definition: Triple.h:547
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:593
bool isWindowsItaniumEnvironment() const
Definition: Triple.h:608
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition: Type.h:160
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
void setBytesToPopOnReturn(unsigned bytes)
void setVarArgsGPOffset(unsigned Offset)
void setArgumentStackSize(unsigned size)
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
void setVarArgsFPOffset(unsigned Offset)
unsigned getSlotSize() const
bool hasSSE1() const
Definition: X86Subtarget.h:200
bool useLight256BitInstructions() const
Definition: X86Subtarget.h:271
bool isPICStyleGOT() const
Definition: X86Subtarget.h:341
bool isTargetMCU() const
Definition: X86Subtarget.h:310
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:313
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:180
bool isTargetDarwin() const
Definition: X86Subtarget.h:293
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:291
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:129
bool useAVX512Regs() const
Definition: X86Subtarget.h:266
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:350
bool hasAVX512() const
Definition: X86Subtarget.h:208
bool hasSSE41() const
Definition: X86Subtarget.h:204
bool hasSSE2() const
Definition: X86Subtarget.h:201
bool isTargetFuchsia() const
Definition: X86Subtarget.h:311
bool isPICStyleRIPRel() const
Definition: X86Subtarget.h:342
bool isTargetCygMing() const
Definition: X86Subtarget.h:333
const X86RegisterInfo * getRegisterInfo() const override
Definition: X86Subtarget.h:139
bool hasAVX() const
Definition: X86Subtarget.h:206
unsigned getPreferVectorWidth() const
Definition: X86Subtarget.h:239
bool isTargetAndroid() const
Definition: X86Subtarget.h:306
const X86FrameLowering * getFrameLowering() const override
Definition: X86Subtarget.h:131
bool hasAVX2() const
Definition: X86Subtarget.h:207
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMemoryAccessFast(EVT VT, Align Alignment) const
bool useSoftFloat() const override
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool isSafeMemOpType(MVT VT) const override
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Returns true if the target allows unaligned memory accesses of the specified type.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
Return the desired alignment for ByVal aggregate function arguments in the caller parameter area.
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void markLibCallAttributes(MachineFunction *MF, unsigned CC, ArgListTy &Args) const override
Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const override
Return true if the target stores SafeStack pointer at a fixed offset in some non-standard address spa...
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
This function returns true if the memory access is aligned or if the target allows this specific unal...
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the value type to use for ISD::SETCC.
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override
For types supported by the target, this is an identity function.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:188
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ X86_64_SysV
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:148
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:170
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ X86_ThisCall
Similar to X86_StdCall.
Definition: CallingConv.h:119
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition: CallingConv.h:66
@ X86_StdCall
stdcall is mostly used by the Win32 API.
Definition: CallingConv.h:96
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ X86_VectorCall
MSVC calling convention that passes vectors and vector aggregates in SSE registers.
Definition: CallingConv.h:160
@ Intel_OCL_BI
Used for Intel OpenCL built-ins.
Definition: CallingConv.h:144
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:156
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
Definition: CallingConv.h:200
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:100
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:543
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ExternalSymbol
Definition: ISDOpcodes.h:83
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ FS
Definition: X86.h:206
@ GS
Definition: X86.h:205
Reg
All possible values of the reg field in the ModR/M byte.
@ RET_GLUE
Return with a glue operand.
@ IRET
Return from interrupt. Operand 0 is the number of bytes to pop.
@ CALL
These operations represent an abstract X86 call instruction, which includes a bunch of information.
@ GlobalBaseReg
On Darwin, this node represents the result of the popl at function entry, used for PIC code.
@ TC_RETURN
Tail call return.
@ NT_CALL
Same as call except it adds the NoTrack prefix.
@ MOVDQ2Q
Copies a 64-bit value from the low word of an XMM vector to an MMX vector.
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
std::optional< Function * > getAttachedARCFunction(const CallBase *CB)
This function returns operand bundle clang_arc_attachedcall's argument, which is the address of the A...
Definition: ObjCARCUtil.h:43
bool hasAttachedCallOpBundle(const CallBase *CB)
Definition: ObjCARCUtil.h:29
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
AddressSpace
Definition: NVPTXBaseInfo.h:21
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2042
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1906
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:373
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:283
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:333
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:351
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:299
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:196
bool is512BitVector() const
Return true if this is a 512-bit vector type.
Definition: ValueTypes.h:206
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:160
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:201
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:311
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:319
Describes a register that needs to be forwarded from the prologue to a musttail call.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals