LLVM 23.0.0git
X86MCInstLower.cpp
Go to the documentation of this file.
1//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains code to lower X86 MachineInstrs to their corresponding
10// MCInst records.
11//
12//===----------------------------------------------------------------------===//
13
21#include "X86AsmPrinter.h"
23#include "X86RegisterInfo.h"
25#include "X86Subtarget.h"
26#include "llvm/ADT/STLExtras.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/GlobalValue.h"
38#include "llvm/IR/Mangler.h"
39#include "llvm/MC/MCAsmInfo.h"
41#include "llvm/MC/MCContext.h"
42#include "llvm/MC/MCExpr.h"
43#include "llvm/MC/MCFixup.h"
44#include "llvm/MC/MCInst.h"
46#include "llvm/MC/MCSection.h"
47#include "llvm/MC/MCStreamer.h"
48#include "llvm/MC/MCSymbol.h"
55#include <string>
56
57using namespace llvm;
58
59static cl::opt<bool> EnableBranchHint("enable-branch-hint",
60 cl::desc("Enable branch hint."),
61 cl::init(false), cl::Hidden);
63 "branch-hint-probability-threshold",
64 cl::desc("The probability threshold of enabling branch hint."),
65 cl::init(50), cl::Hidden);
66
67namespace {
68
69/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
70class X86MCInstLower {
71 MCContext &Ctx;
72 const MachineFunction &MF;
73 const TargetMachine &TM;
74 const MCAsmInfo &MAI;
76
77public:
78 X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
79
80 MCOperand LowerMachineOperand(const MachineInstr *MI,
81 const MachineOperand &MO) const;
82 void Lower(const MachineInstr *MI, MCInst &OutMI) const;
83
86
87private:
88 MachineModuleInfoMachO &getMachOMMI() const;
89};
90
91} // end anonymous namespace
92
93/// A RAII helper which defines a region of instructions which can't have
94/// padding added between them for correctness.
99 : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
100 changeAndComment(false);
101 }
103 void changeAndComment(bool b) {
104 if (b == OS.getAllowAutoPadding())
105 return;
106 OS.setAllowAutoPadding(b);
107 if (b)
108 OS.emitRawComment("autopadding");
109 else
110 OS.emitRawComment("noautopadding");
111 }
112};
113
114// Emit a minimal sequence of nops spanning NumBytes bytes.
115static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
116 const X86Subtarget *Subtarget);
117
118void X86AsmPrinter::StackMapShadowTracker::count(const MCInst &Inst,
119 const MCSubtargetInfo &STI,
120 MCCodeEmitter *CodeEmitter) {
121 if (InShadow) {
122 SmallString<256> Code;
124 CodeEmitter->encodeInstruction(Inst, Code, Fixups, STI);
125 CurrentShadowSize += Code.size();
126 if (CurrentShadowSize >= RequiredShadowSize)
127 InShadow = false; // The shadow is big enough. Stop counting.
128 }
129}
130
131void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
132 MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
133 if (InShadow && CurrentShadowSize < RequiredShadowSize) {
134 InShadow = false;
135 emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
136 &MF->getSubtarget<X86Subtarget>());
137 }
138}
139
140void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
141 OutStreamer->emitInstruction(Inst, getSubtargetInfo());
142 SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
143}
144
145X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
146 X86AsmPrinter &asmprinter)
147 : Ctx(asmprinter.OutContext), MF(mf), TM(mf.getTarget()),
148 MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {}
149
150MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
151 return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>();
152}
153
154/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
155/// operand to an MCSymbol.
156MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
157 const Triple &TT = TM.getTargetTriple();
158 if (MO.isGlobal() && TT.isOSBinFormatELF())
159 return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal());
160
161 const DataLayout &DL = MF.getDataLayout();
162 assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
163 "Isn't a symbol reference");
164
165 MCSymbol *Sym = nullptr;
166 SmallString<128> Name;
167 StringRef Suffix;
168
169 switch (MO.getTargetFlags()) {
171 // Handle dllimport linkage.
172 Name += "__imp_";
173 break;
175 Name += ".refptr.";
176 break;
179 Suffix = "$non_lazy_ptr";
180 break;
181 }
182
183 if (!Suffix.empty())
184 Name += DL.getPrivateGlobalPrefix();
185
186 if (MO.isGlobal()) {
187 const GlobalValue *GV = MO.getGlobal();
188 AsmPrinter.getNameWithPrefix(Name, GV);
189 } else if (MO.isSymbol()) {
191 } else if (MO.isMBB()) {
192 assert(Suffix.empty());
193 Sym = MO.getMBB()->getSymbol();
194 }
195
196 Name += Suffix;
197 if (!Sym)
198 Sym = Ctx.getOrCreateSymbol(Name);
199
200 // If the target flags on the operand changes the name of the symbol, do that
201 // before we return the symbol.
202 switch (MO.getTargetFlags()) {
203 default:
204 break;
205 case X86II::MO_COFFSTUB: {
206 MachineModuleInfoCOFF &MMICOFF =
207 AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoCOFF>();
209 if (!StubSym.getPointer()) {
210 assert(MO.isGlobal() && "Extern symbol not handled yet");
212 AsmPrinter.getSymbol(MO.getGlobal()), true);
213 }
214 break;
215 }
219 getMachOMMI().getGVStubEntry(Sym);
220 if (!StubSym.getPointer()) {
221 assert(MO.isGlobal() && "Extern symbol not handled yet");
223 AsmPrinter.getSymbol(MO.getGlobal()),
225 }
226 break;
227 }
228 }
229
230 return Sym;
231}
232
233MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
234 MCSymbol *Sym) const {
235 // FIXME: We would like an efficient form for this, so we don't have to do a
236 // lot of extra uniquing.
237 const MCExpr *Expr = nullptr;
238 uint16_t Specifier = X86::S_None;
239
240 switch (MO.getTargetFlags()) {
241 default:
242 llvm_unreachable("Unknown target flag on GV operand");
243 case X86II::MO_NO_FLAG: // No flag.
244 // These affect the name of the symbol, not any suffix.
248 break;
249
250 case X86II::MO_TLVP:
252 break;
254 Expr = MCSymbolRefExpr::create(Sym, X86::S_TLVP, Ctx);
255 // Subtract the pic base.
257 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
258 break;
259 case X86II::MO_SECREL:
260 Specifier = uint16_t(X86::S_COFF_SECREL);
261 break;
262 case X86II::MO_TLSGD:
264 break;
265 case X86II::MO_TLSLD:
267 break;
268 case X86II::MO_TLSLDM:
270 break;
273 break;
276 break;
277 case X86II::MO_TPOFF:
279 break;
280 case X86II::MO_DTPOFF:
282 break;
283 case X86II::MO_NTPOFF:
285 break;
288 break;
291 break;
294 break;
295 case X86II::MO_GOT:
297 break;
298 case X86II::MO_GOTOFF:
300 break;
301 case X86II::MO_PLT:
303 break;
304 case X86II::MO_ABS8:
306 break;
309 Expr = MCSymbolRefExpr::create(Sym, Ctx);
310 // Subtract the pic base.
312 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
313 if (MO.isJTI()) {
315 // If .set directive is supported, use it to reduce the number of
316 // relocations the assembler will generate for differences between
317 // local labels. This is only safe when the symbols are in the same
318 // section so we are restricting it to jumptable references.
320 AsmPrinter.OutStreamer->emitAssignment(Label, Expr);
321 Expr = MCSymbolRefExpr::create(Label, Ctx);
322 }
323 break;
324 }
325
326 if (!Expr)
327 Expr = MCSymbolRefExpr::create(Sym, Specifier, Ctx);
328
329 if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
331 Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
332 return MCOperand::createExpr(Expr);
333}
334
335static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
336 return Subtarget.is64Bit() ? X86::RET64 : X86::RET32;
337}
338
339MCOperand X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
340 const MachineOperand &MO) const {
341 switch (MO.getType()) {
342 default:
343 MI->print(errs());
344 llvm_unreachable("unknown operand type");
346 // Ignore all implicit register operands.
347 if (MO.isImplicit())
348 return MCOperand();
349 return MCOperand::createReg(MO.getReg());
351 return MCOperand::createImm(MO.getImm());
357 return LowerSymbolOperand(MO, MO.getMCSymbol());
359 return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
361 return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
363 return LowerSymbolOperand(
364 MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
366 // Ignore call clobbers.
367 return MCOperand();
368 }
369}
370
371// Replace TAILJMP opcodes with their equivalent opcodes that have encoding
372// information.
373static unsigned convertTailJumpOpcode(unsigned Opcode) {
374 switch (Opcode) {
375 case X86::TAILJMPr:
376 Opcode = X86::JMP32r;
377 break;
378 case X86::TAILJMPm:
379 Opcode = X86::JMP32m;
380 break;
381 case X86::TAILJMPr64:
382 Opcode = X86::JMP64r;
383 break;
384 case X86::TAILJMPm64:
385 Opcode = X86::JMP64m;
386 break;
387 case X86::TAILJMPr64_REX:
388 Opcode = X86::JMP64r_REX;
389 break;
390 case X86::TAILJMPm64_REX:
391 Opcode = X86::JMP64m_REX;
392 break;
393 case X86::TAILJMPd:
394 case X86::TAILJMPd64:
395 Opcode = X86::JMP_1;
396 break;
397 case X86::TAILJMPd_CC:
398 case X86::TAILJMPd64_CC:
399 Opcode = X86::JCC_1;
400 break;
401 }
402
403 return Opcode;
404}
405
406void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
407 OutMI.setOpcode(MI->getOpcode());
408
409 for (const MachineOperand &MO : MI->operands())
410 if (auto Op = LowerMachineOperand(MI, MO); Op.isValid())
411 OutMI.addOperand(Op);
412
413 bool In64BitMode = AsmPrinter.getSubtarget().is64Bit();
414 if (X86::optimizeInstFromVEX3ToVEX2(OutMI, MI->getDesc()) ||
417 X86::optimizeMOVSX(OutMI) || X86::optimizeINCDEC(OutMI, In64BitMode) ||
418 X86::optimizeMOV(OutMI, In64BitMode) ||
420 return;
421
422 // Handle a few special cases to eliminate operand modifiers.
423 switch (OutMI.getOpcode()) {
424 case X86::LEA64_32r:
425 case X86::LEA64r:
426 case X86::LEA16r:
427 case X86::LEA32r:
428 // LEA should have a segment register, but it must be empty.
430 "Unexpected # of LEA operands");
431 assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
432 "LEA has segment specified!");
433 break;
434 case X86::MULX32Hrr:
435 case X86::MULX32Hrm:
436 case X86::MULX64Hrr:
437 case X86::MULX64Hrm: {
438 // Turn into regular MULX by duplicating the destination.
439 unsigned NewOpc;
440 switch (OutMI.getOpcode()) {
441 default: llvm_unreachable("Invalid opcode");
442 case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
443 case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
444 case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
445 case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
446 }
447 OutMI.setOpcode(NewOpc);
448 // Duplicate the destination.
449 MCRegister DestReg = OutMI.getOperand(0).getReg();
450 OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg));
451 break;
452 }
453 // CALL64r, CALL64pcrel32 - These instructions used to have
454 // register inputs modeled as normal uses instead of implicit uses. As such,
455 // they we used to truncate off all but the first operand (the callee). This
456 // issue seems to have been fixed at some point. This assert verifies that.
457 case X86::CALL64r:
458 case X86::CALL64pcrel32:
459 assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
460 break;
461 case X86::EH_RETURN:
462 case X86::EH_RETURN64: {
463 OutMI = MCInst();
464 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
465 break;
466 }
467 case X86::CLEANUPRET: {
468 // Replace CLEANUPRET with the appropriate RET.
469 OutMI = MCInst();
470 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
471 break;
472 }
473 case X86::CATCHRET: {
474 // Replace CATCHRET with the appropriate RET.
475 const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
476 unsigned ReturnReg = In64BitMode ? X86::RAX : X86::EAX;
477 OutMI = MCInst();
478 OutMI.setOpcode(getRetOpcode(Subtarget));
479 OutMI.addOperand(MCOperand::createReg(ReturnReg));
480 break;
481 }
482 // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
483 // instruction.
484 case X86::TAILJMPr:
485 case X86::TAILJMPr64:
486 case X86::TAILJMPr64_REX:
487 case X86::TAILJMPd:
488 case X86::TAILJMPd64:
489 assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
491 break;
492 case X86::TAILJMPd_CC:
493 case X86::TAILJMPd64_CC:
494 assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!");
496 break;
497 case X86::TAILJMPm:
498 case X86::TAILJMPm64:
499 case X86::TAILJMPm64_REX:
501 "Unexpected number of operands!");
503 break;
504 case X86::MASKMOVDQU:
505 case X86::VMASKMOVDQU:
506 if (In64BitMode)
508 break;
509 case X86::BSF16rm:
510 case X86::BSF16rr:
511 case X86::BSF32rm:
512 case X86::BSF32rr:
513 case X86::BSF64rm:
514 case X86::BSF64rr: {
515 // Add an REP prefix to BSF instructions so that new processors can
516 // recognize as TZCNT, which has better performance than BSF.
517 // BSF and TZCNT have different interpretations on ZF bit. So make sure
518 // it won't be used later.
519 const MachineOperand *FlagDef =
520 MI->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
521 if (!MF.getFunction().hasOptSize() && FlagDef && FlagDef->isDead())
523 break;
524 }
525 default:
526 break;
527 }
528}
529
530void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
531 const MachineInstr &MI) {
532 NoAutoPaddingScope NoPadScope(*OutStreamer);
533 bool Is64Bits = getSubtarget().is64Bit();
534 bool Is64BitsLP64 = getSubtarget().isTarget64BitLP64();
535 MCContext &Ctx = OutStreamer->getContext();
536
538 switch (MI.getOpcode()) {
539 case X86::TLS_addr32:
540 case X86::TLS_addr64:
541 case X86::TLS_addrX32:
543 break;
544 case X86::TLS_base_addr32:
546 break;
547 case X86::TLS_base_addr64:
548 case X86::TLS_base_addrX32:
550 break;
551 case X86::TLS_desc32:
552 case X86::TLS_desc64:
554 break;
555 default:
556 llvm_unreachable("unexpected opcode");
557 }
558
559 const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
560 MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), Specifier, Ctx);
561
562 // Before binutils 2.41, ld has a bogus TLS relaxation error when the GD/LD
563 // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
564 // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
565 // only using GOT when GOTPCRELX is enabled.
566 // TODO Delete the workaround when rustc no longer relies on the hack
567 bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
569
570 if (Specifier == X86::S_TLSDESC) {
571 const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(
572 MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), X86::S_TLSCALL,
573 Ctx);
574 EmitAndCountInstruction(
575 MCInstBuilder(Is64BitsLP64 ? X86::LEA64r : X86::LEA32r)
576 .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX)
577 .addReg(Is64Bits ? X86::RIP : X86::EBX)
578 .addImm(1)
579 .addReg(0)
580 .addExpr(Sym)
581 .addReg(0));
582 EmitAndCountInstruction(
583 MCInstBuilder(Is64Bits ? X86::CALL64m : X86::CALL32m)
584 .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX)
585 .addImm(1)
586 .addReg(0)
587 .addExpr(Expr)
588 .addReg(0));
589 } else if (Is64Bits) {
590 bool NeedsPadding = Specifier == X86::S_TLSGD;
591 if (NeedsPadding && Is64BitsLP64)
592 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
593 EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
594 .addReg(X86::RDI)
595 .addReg(X86::RIP)
596 .addImm(1)
597 .addReg(0)
598 .addExpr(Sym)
599 .addReg(0));
600 const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
601 if (NeedsPadding) {
602 if (!UseGot)
603 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
604 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
605 EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
606 }
607 if (UseGot) {
608 const MCExpr *Expr =
610 EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
611 .addReg(X86::RIP)
612 .addImm(1)
613 .addReg(0)
614 .addExpr(Expr)
615 .addReg(0));
616 } else {
617 EmitAndCountInstruction(
618 MCInstBuilder(X86::CALL64pcrel32)
619 .addExpr(MCSymbolRefExpr::create(TlsGetAddr, X86::S_PLT, Ctx)));
620 }
621 } else {
622 if (Specifier == X86::S_TLSGD && !UseGot) {
623 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
624 .addReg(X86::EAX)
625 .addReg(0)
626 .addImm(1)
627 .addReg(X86::EBX)
628 .addExpr(Sym)
629 .addReg(0));
630 } else {
631 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
632 .addReg(X86::EAX)
633 .addReg(X86::EBX)
634 .addImm(1)
635 .addReg(0)
636 .addExpr(Sym)
637 .addReg(0));
638 }
639
640 const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
641 if (UseGot) {
642 const MCExpr *Expr = MCSymbolRefExpr::create(TlsGetAddr, X86::S_GOT, Ctx);
643 EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
644 .addReg(X86::EBX)
645 .addImm(1)
646 .addReg(0)
647 .addExpr(Expr)
648 .addReg(0));
649 } else {
650 EmitAndCountInstruction(
651 MCInstBuilder(X86::CALLpcrel32)
652 .addExpr(MCSymbolRefExpr::create(TlsGetAddr, X86::S_PLT, Ctx)));
653 }
654 }
655}
656
657/// Emit the largest nop instruction smaller than or equal to \p NumBytes
658/// bytes. Return the size of nop emitted.
659static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
660 const X86Subtarget *Subtarget) {
661 // Determine the longest nop which can be efficiently decoded for the given
662 // target cpu. 15-bytes is the longest single NOP instruction, but some
663 // platforms can't decode the longest forms efficiently.
664 unsigned MaxNopLength = 1;
665 if (Subtarget->is64Bit()) {
666 // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
667 // IndexReg/BaseReg below need to be updated.
668 if (Subtarget->hasFeature(X86::TuningFast7ByteNOP))
669 MaxNopLength = 7;
670 else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP))
671 MaxNopLength = 15;
672 else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP))
673 MaxNopLength = 11;
674 else
675 MaxNopLength = 10;
676 } if (Subtarget->is32Bit())
677 MaxNopLength = 2;
678
679 // Cap a single nop emission at the profitable value for the target
680 NumBytes = std::min(NumBytes, MaxNopLength);
681
682 unsigned NopSize;
683 unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
684 IndexReg = Displacement = SegmentReg = 0;
685 BaseReg = X86::RAX;
686 ScaleVal = 1;
687 switch (NumBytes) {
688 case 0:
689 llvm_unreachable("Zero nops?");
690 break;
691 case 1:
692 NopSize = 1;
693 Opc = X86::NOOP;
694 break;
695 case 2:
696 NopSize = 2;
697 Opc = X86::XCHG16ar;
698 break;
699 case 3:
700 NopSize = 3;
701 Opc = X86::NOOPL;
702 break;
703 case 4:
704 NopSize = 4;
705 Opc = X86::NOOPL;
706 Displacement = 8;
707 break;
708 case 5:
709 NopSize = 5;
710 Opc = X86::NOOPL;
711 Displacement = 8;
712 IndexReg = X86::RAX;
713 break;
714 case 6:
715 NopSize = 6;
716 Opc = X86::NOOPW;
717 Displacement = 8;
718 IndexReg = X86::RAX;
719 break;
720 case 7:
721 NopSize = 7;
722 Opc = X86::NOOPL;
723 Displacement = 512;
724 break;
725 case 8:
726 NopSize = 8;
727 Opc = X86::NOOPL;
728 Displacement = 512;
729 IndexReg = X86::RAX;
730 break;
731 case 9:
732 NopSize = 9;
733 Opc = X86::NOOPW;
734 Displacement = 512;
735 IndexReg = X86::RAX;
736 break;
737 default:
738 NopSize = 10;
739 Opc = X86::NOOPW;
740 Displacement = 512;
741 IndexReg = X86::RAX;
742 SegmentReg = X86::CS;
743 break;
744 }
745
746 unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
747 NopSize += NumPrefixes;
748 for (unsigned i = 0; i != NumPrefixes; ++i)
749 OS.emitBytes("\x66");
750
751 switch (Opc) {
752 default: llvm_unreachable("Unexpected opcode");
753 case X86::NOOP:
754 OS.emitInstruction(MCInstBuilder(Opc), *Subtarget);
755 break;
756 case X86::XCHG16ar:
757 OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX),
758 *Subtarget);
759 break;
760 case X86::NOOPL:
761 case X86::NOOPW:
763 .addReg(BaseReg)
764 .addImm(ScaleVal)
765 .addReg(IndexReg)
766 .addImm(Displacement)
767 .addReg(SegmentReg),
768 *Subtarget);
769 break;
770 }
771 assert(NopSize <= NumBytes && "We overemitted?");
772 return NopSize;
773}
774
775/// Emit the optimal amount of multi-byte nops on X86.
776static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
777 const X86Subtarget *Subtarget) {
778 unsigned NopsToEmit = NumBytes;
779 (void)NopsToEmit;
780 while (NumBytes) {
781 NumBytes -= emitNop(OS, NumBytes, Subtarget);
782 assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
783 }
784}
785
786void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
787 X86MCInstLower &MCIL) {
788 assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
789
790 NoAutoPaddingScope NoPadScope(*OutStreamer);
791
792 StatepointOpers SOpers(&MI);
793 if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
794 emitX86Nops(*OutStreamer, PatchBytes, Subtarget);
795 } else {
796 // Lower call target and choose correct opcode
797 const MachineOperand &CallTarget = SOpers.getCallTarget();
798 MCOperand CallTargetMCOp;
799 unsigned CallOpcode;
800 switch (CallTarget.getType()) {
803 CallTargetMCOp = MCIL.LowerSymbolOperand(
804 CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
805 CallOpcode = X86::CALL64pcrel32;
806 // Currently, we only support relative addressing with statepoints.
807 // Otherwise, we'll need a scratch register to hold the target
808 // address. You'll fail asserts during load & relocation if this
809 // symbol is to far away. (TODO: support non-relative addressing)
810 break;
812 CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
813 CallOpcode = X86::CALL64pcrel32;
814 // Currently, we only support relative addressing with statepoints.
815 // Otherwise, we'll need a scratch register to hold the target
816 // immediate. You'll fail asserts during load & relocation if this
817 // address is to far away. (TODO: support non-relative addressing)
818 break;
820 // FIXME: Add retpoline support and remove this.
821 if (Subtarget->useIndirectThunkCalls())
822 report_fatal_error("Lowering register statepoints with thunks not "
823 "yet implemented.");
824 CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
825 CallOpcode = X86::CALL64r;
826 break;
827 default:
828 llvm_unreachable("Unsupported operand type in statepoint call target");
829 break;
830 }
831
832 // Emit call
833 MCInst CallInst;
834 CallInst.setOpcode(CallOpcode);
835 CallInst.addOperand(CallTargetMCOp);
836 OutStreamer->emitInstruction(CallInst, getSubtargetInfo());
837 maybeEmitNopAfterCallForWindowsEH(&MI);
838 }
839
840 // Record our statepoint node in the same section used by STACKMAP
841 // and PATCHPOINT
842 auto &Ctx = OutStreamer->getContext();
843 MCSymbol *MILabel = Ctx.createTempSymbol();
844 OutStreamer->emitLabel(MILabel);
845 SM.recordStatepoint(*MILabel, MI);
846}
847
848void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
849 X86MCInstLower &MCIL) {
850 // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
851 // <opcode>, <operands>
852
853 NoAutoPaddingScope NoPadScope(*OutStreamer);
854
855 Register DefRegister = FaultingMI.getOperand(0).getReg();
857 static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
858 MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
859 unsigned Opcode = FaultingMI.getOperand(3).getImm();
860 unsigned OperandsBeginIdx = 4;
861
862 auto &Ctx = OutStreamer->getContext();
863 MCSymbol *FaultingLabel = Ctx.createTempSymbol();
864 OutStreamer->emitLabel(FaultingLabel);
865
866 assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
867 FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
868
869 MCInst MI;
870 MI.setOpcode(Opcode);
871
872 if (DefRegister != X86::NoRegister)
873 MI.addOperand(MCOperand::createReg(DefRegister));
874
875 for (const MachineOperand &MO :
876 llvm::drop_begin(FaultingMI.operands(), OperandsBeginIdx))
877 if (auto Op = MCIL.LowerMachineOperand(&FaultingMI, MO); Op.isValid())
878 MI.addOperand(Op);
879
880 OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
881 OutStreamer->emitInstruction(MI, getSubtargetInfo());
882}
883
884void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
885 X86MCInstLower &MCIL) {
886 bool Is64Bits = Subtarget->is64Bit();
887 MCContext &Ctx = OutStreamer->getContext();
888 MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
889 const MCSymbolRefExpr *Op = MCSymbolRefExpr::create(fentry, Ctx);
890
891 EmitAndCountInstruction(
892 MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
893 .addExpr(Op));
894}
895
896void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) {
897 assert(std::next(MI.getIterator())->isCall() &&
898 "KCFI_CHECK not followed by a call instruction");
899
900 // Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop()
901 // returns a 1-byte X86::NOOP, which means the offset is the same in
902 // bytes. This assumes that patchable-function-prefix is the same for all
903 // functions.
904 const MachineFunction &MF = *MI.getMF();
905 int64_t PrefixNops = 0;
906 (void)MF.getFunction()
907 .getFnAttribute("patchable-function-prefix")
909 .getAsInteger(10, PrefixNops);
910
911 // KCFI allows indirect calls to any location that's preceded by a valid
912 // type identifier. To avoid encoding the full constant into an instruction,
913 // and thus emitting potential call target gadgets at each indirect call
914 // site, load a negated constant to a register and compare that to the
915 // expected value at the call target.
916 const Register AddrReg = MI.getOperand(0).getReg();
917 const uint32_t Type = MI.getOperand(1).getImm();
918 // The check is immediately before the call. If the call target is in R10,
919 // we can clobber R11 for the check instead.
920 unsigned TempReg = AddrReg == X86::R10 ? X86::R11D : X86::R10D;
921 EmitAndCountInstruction(
922 MCInstBuilder(X86::MOV32ri).addReg(TempReg).addImm(-MaskKCFIType(Type)));
923 EmitAndCountInstruction(MCInstBuilder(X86::ADD32rm)
924 .addReg(X86::NoRegister)
925 .addReg(TempReg)
926 .addReg(AddrReg)
927 .addImm(1)
928 .addReg(X86::NoRegister)
929 .addImm(-(PrefixNops + 4))
930 .addReg(X86::NoRegister));
931
932 MCSymbol *Pass = OutContext.createTempSymbol();
933 EmitAndCountInstruction(
934 MCInstBuilder(X86::JCC_1)
935 .addExpr(MCSymbolRefExpr::create(Pass, OutContext))
936 .addImm(X86::COND_E));
937
938 MCSymbol *Trap = OutContext.createTempSymbol();
939 OutStreamer->emitLabel(Trap);
940 EmitAndCountInstruction(MCInstBuilder(X86::TRAP));
941 emitKCFITrapEntry(MF, Trap);
942 OutStreamer->emitLabel(Pass);
943}
944
945void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
946 // FIXME: Make this work on non-ELF.
947 if (!TM.getTargetTriple().isOSBinFormatELF()) {
948 report_fatal_error("llvm.asan.check.memaccess only supported on ELF");
949 return;
950 }
951
952 const auto &Reg = MI.getOperand(0).getReg();
953 ASanAccessInfo AccessInfo(MI.getOperand(1).getImm());
954
955 uint64_t ShadowBase;
956 int MappingScale;
957 bool OrShadowOffset;
958 getAddressSanitizerParams(TM.getTargetTriple(), 64, AccessInfo.CompileKernel,
959 &ShadowBase, &MappingScale, &OrShadowOffset);
960
961 StringRef Name = AccessInfo.IsWrite ? "store" : "load";
962 StringRef Op = OrShadowOffset ? "or" : "add";
963 std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" +
964 Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" +
965 TM.getMCRegisterInfo()->getName(Reg.asMCReg()))
966 .str();
967 if (OrShadowOffset)
969 "OrShadowOffset is not supported with optimized callbacks");
970
971 EmitAndCountInstruction(
972 MCInstBuilder(X86::CALL64pcrel32)
974 OutContext.getOrCreateSymbol(SymName), OutContext)));
975}
976
977void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
978 X86MCInstLower &MCIL) {
979 // PATCHABLE_OP minsize
980
981 NoAutoPaddingScope NoPadScope(*OutStreamer);
982
983 auto NextMI = std::find_if(std::next(MI.getIterator()),
984 MI.getParent()->end().getInstrIterator(),
985 [](auto &II) { return !II.isMetaInstruction(); });
986
987 SmallString<256> Code;
988 unsigned MinSize = MI.getOperand(0).getImm();
989
990 if (NextMI != MI.getParent()->end() && !NextMI->isInlineAsm()) {
991 // Lower the next MachineInstr to find its byte size.
992 // If the next instruction is inline assembly, we skip lowering it for now,
993 // and assume we should always generate NOPs.
994 MCInst MCI;
995 MCIL.Lower(&*NextMI, MCI);
996
998 CodeEmitter->encodeInstruction(MCI, Code, Fixups, getSubtargetInfo());
999 }
1000
1001 if (Code.size() < MinSize) {
1002 if (MinSize == 2 && Subtarget->is32Bit() &&
1003 Subtarget->isTargetWindowsMSVC() &&
1004 (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
1005 // For compatibility reasons, when targetting MSVC, it is important to
1006 // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
1007 // rely specifically on this pattern to be able to patch a function.
1008 // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
1009 OutStreamer->emitInstruction(
1010 MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
1011 *Subtarget);
1012 } else {
1013 unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget);
1014 assert(NopSize == MinSize && "Could not implement MinSize!");
1015 (void)NopSize;
1016 }
1017 }
1018}
1019
1020// Lower a stackmap of the form:
1021// <id>, <shadowBytes>, ...
1022void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1023 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1024
1025 auto &Ctx = OutStreamer->getContext();
1026 MCSymbol *MILabel = Ctx.createTempSymbol();
1027 OutStreamer->emitLabel(MILabel);
1028
1029 SM.recordStackMap(*MILabel, MI);
1030 unsigned NumShadowBytes = MI.getOperand(1).getImm();
1031 SMShadowTracker.reset(NumShadowBytes);
1032}
1033
1034// Lower a patchpoint of the form:
1035// [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1036void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1037 X86MCInstLower &MCIL) {
1038 assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1039
1040 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1041
1042 NoAutoPaddingScope NoPadScope(*OutStreamer);
1043
1044 auto &Ctx = OutStreamer->getContext();
1045 MCSymbol *MILabel = Ctx.createTempSymbol();
1046 OutStreamer->emitLabel(MILabel);
1047 SM.recordPatchPoint(*MILabel, MI);
1048
1049 PatchPointOpers opers(&MI);
1050 unsigned ScratchIdx = opers.getNextScratchIdx();
1051 unsigned EncodedBytes = 0;
1052 const MachineOperand &CalleeMO = opers.getCallTarget();
1053
1054 // Check for null target. If target is non-null (i.e. is non-zero or is
1055 // symbolic) then emit a call.
1056 if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1057 MCOperand CalleeMCOp;
1058 switch (CalleeMO.getType()) {
1059 default:
1060 /// FIXME: Add a verifier check for bad callee types.
1061 llvm_unreachable("Unrecognized callee operand type.");
1063 if (CalleeMO.getImm())
1064 CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
1065 break;
1068 CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
1069 MCIL.GetSymbolFromOperand(CalleeMO));
1070 break;
1071 }
1072
1073 // Emit MOV to materialize the target address and the CALL to target.
1074 // This is encoded with 12-13 bytes, depending on which register is used.
1075 Register ScratchReg = MI.getOperand(ScratchIdx).getReg();
1076 if (X86II::isX86_64ExtendedReg(ScratchReg))
1077 EncodedBytes = 13;
1078 else
1079 EncodedBytes = 12;
1080
1081 EmitAndCountInstruction(
1082 MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1083 // FIXME: Add retpoline support and remove this.
1084 if (Subtarget->useIndirectThunkCalls())
1086 "Lowering patchpoint with thunks not yet implemented.");
1087 EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1088 }
1089
1090 // Emit padding.
1091 unsigned NumBytes = opers.getNumPatchBytes();
1092 assert(NumBytes >= EncodedBytes &&
1093 "Patchpoint can't request size less than the length of a call.");
1094
1095 emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget);
1096}
1097
1098void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1099 X86MCInstLower &MCIL) {
1100 assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1101
1102 NoAutoPaddingScope NoPadScope(*OutStreamer);
1103
1104 // We want to emit the following pattern, which follows the x86 calling
1105 // convention to prepare for the trampoline call to be patched in.
1106 //
1107 // .p2align 1, ...
1108 // .Lxray_event_sled_N:
1109 // jmp +N // jump across the instrumentation sled
1110 // ... // set up arguments in register
1111 // callq __xray_CustomEvent@plt // force dependency to symbol
1112 // ...
1113 // <jump here>
1114 //
1115 // After patching, it would look something like:
1116 //
1117 // nopw (2-byte nop)
1118 // ...
1119 // callq __xrayCustomEvent // already lowered
1120 // ...
1121 //
1122 // ---
1123 // First we emit the label and the jump.
1124 auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
1125 OutStreamer->AddComment("# XRay Custom Event Log");
1126 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1127 OutStreamer->emitLabel(CurSled);
1128
1129 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1130 // an operand (computed as an offset from the jmp instruction).
1131 // FIXME: Find another less hacky way do force the relative jump.
1132 OutStreamer->emitBinaryData("\xeb\x0f");
1133
1134 // The default C calling convention will place two arguments into %rcx and
1135 // %rdx -- so we only work with those.
1136 const Register DestRegs[] = {X86::RDI, X86::RSI};
1137 bool UsedMask[] = {false, false};
1138 // Filled out in loop.
1139 Register SrcRegs[] = {0, 0};
1140
1141 // Then we put the operands in the %rdi and %rsi registers. We spill the
1142 // values in the register before we clobber them, and mark them as used in
1143 // UsedMask. In case the arguments are already in the correct register, we use
1144 // emit nops appropriately sized to keep the sled the same size in every
1145 // situation.
1146 for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1147 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I));
1148 Op.isValid()) {
1149 assert(Op.isReg() && "Only support arguments in registers");
1150 SrcRegs[I] = getX86SubSuperRegister(Op.getReg(), 64);
1151 assert(SrcRegs[I].isValid() && "Invalid operand");
1152 if (SrcRegs[I] != DestRegs[I]) {
1153 UsedMask[I] = true;
1154 EmitAndCountInstruction(
1155 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1156 } else {
1157 emitX86Nops(*OutStreamer, 4, Subtarget);
1158 }
1159 }
1160
1161 // Now that the register values are stashed, mov arguments into place.
1162 // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1163 // earlier DestReg. We will have already overwritten over the register before
1164 // we can copy from it.
1165 for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1166 if (SrcRegs[I] != DestRegs[I])
1167 EmitAndCountInstruction(
1168 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1169
1170 // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1171 // name of the trampoline to be implemented by the XRay runtime.
1172 auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
1173 MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1174 if (isPositionIndependent())
1176
1177 // Emit the call instruction.
1178 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1179 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1180
1181 // Restore caller-saved and used registers.
1182 for (unsigned I = sizeof UsedMask; I-- > 0;)
1183 if (UsedMask[I])
1184 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1185 else
1186 emitX86Nops(*OutStreamer, 1, Subtarget);
1187
1188 OutStreamer->AddComment("xray custom event end.");
1189
1190 // Record the sled version. Version 0 of this sled was spelled differently, so
1191 // we let the runtime handle the different offsets we're using. Version 2
1192 // changed the absolute address to a PC-relative address.
1193 recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);
1194}
1195
1196void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1197 X86MCInstLower &MCIL) {
1198 assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1199
1200 NoAutoPaddingScope NoPadScope(*OutStreamer);
1201
1202 // We want to emit the following pattern, which follows the x86 calling
1203 // convention to prepare for the trampoline call to be patched in.
1204 //
1205 // .p2align 1, ...
1206 // .Lxray_event_sled_N:
1207 // jmp +N // jump across the instrumentation sled
1208 // ... // set up arguments in register
1209 // callq __xray_TypedEvent@plt // force dependency to symbol
1210 // ...
1211 // <jump here>
1212 //
1213 // After patching, it would look something like:
1214 //
1215 // nopw (2-byte nop)
1216 // ...
1217 // callq __xrayTypedEvent // already lowered
1218 // ...
1219 //
1220 // ---
1221 // First we emit the label and the jump.
1222 auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
1223 OutStreamer->AddComment("# XRay Typed Event Log");
1224 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1225 OutStreamer->emitLabel(CurSled);
1226
1227 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1228 // an operand (computed as an offset from the jmp instruction).
1229 // FIXME: Find another less hacky way do force the relative jump.
1230 OutStreamer->emitBinaryData("\xeb\x14");
1231
1232 // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1233 // so we'll work with those. Or we may be called via SystemV, in which case
1234 // we don't have to do any translation.
1235 const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1236 bool UsedMask[] = {false, false, false};
1237
1238 // Will fill out src regs in the loop.
1239 Register SrcRegs[] = {0, 0, 0};
1240
1241 // Then we put the operands in the SystemV registers. We spill the values in
1242 // the registers before we clobber them, and mark them as used in UsedMask.
1243 // In case the arguments are already in the correct register, we emit nops
1244 // appropriately sized to keep the sled the same size in every situation.
1245 for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1246 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I));
1247 Op.isValid()) {
1248 // TODO: Is register only support adequate?
1249 assert(Op.isReg() && "Only supports arguments in registers");
1250 SrcRegs[I] = getX86SubSuperRegister(Op.getReg(), 64);
1251 assert(SrcRegs[I].isValid() && "Invalid operand");
1252 if (SrcRegs[I] != DestRegs[I]) {
1253 UsedMask[I] = true;
1254 EmitAndCountInstruction(
1255 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1256 } else {
1257 emitX86Nops(*OutStreamer, 4, Subtarget);
1258 }
1259 }
1260
1261 // In the above loop we only stash all of the destination registers or emit
1262 // nops if the arguments are already in the right place. Doing the actually
1263 // moving is postponed until after all the registers are stashed so nothing
1264 // is clobbers. We've already added nops to account for the size of mov and
1265 // push if the register is in the right place, so we only have to worry about
1266 // emitting movs.
1267 // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1268 // earlier DestReg. We will have already overwritten over the register before
1269 // we can copy from it.
1270 for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1271 if (UsedMask[I])
1272 EmitAndCountInstruction(
1273 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1274
1275 // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1276 // name of the trampoline to be implemented by the XRay runtime.
1277 auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
1278 MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1279 if (isPositionIndependent())
1281
1282 // Emit the call instruction.
1283 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1284 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1285
1286 // Restore caller-saved and used registers.
1287 for (unsigned I = sizeof UsedMask; I-- > 0;)
1288 if (UsedMask[I])
1289 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1290 else
1291 emitX86Nops(*OutStreamer, 1, Subtarget);
1292
1293 OutStreamer->AddComment("xray typed event end.");
1294
1295 // Record the sled version.
1296 recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);
1297}
1298
1299void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1300 X86MCInstLower &MCIL) {
1301
1302 NoAutoPaddingScope NoPadScope(*OutStreamer);
1303
1304 const Function &F = MF->getFunction();
1305 if (F.hasFnAttribute("patchable-function-entry")) {
1306 unsigned Num;
1307 if (F.getFnAttribute("patchable-function-entry")
1308 .getValueAsString()
1309 .getAsInteger(10, Num))
1310 return;
1311 emitX86Nops(*OutStreamer, Num, Subtarget);
1312 return;
1313 }
1314 // We want to emit the following pattern:
1315 //
1316 // .p2align 1, ...
1317 // .Lxray_sled_N:
1318 // jmp .tmpN
1319 // # 9 bytes worth of noops
1320 //
1321 // We need the 9 bytes because at runtime, we'd be patching over the full 11
1322 // bytes with the following pattern:
1323 //
1324 // mov %r10, <function id, 32-bit> // 6 bytes
1325 // call <relative offset, 32-bits> // 5 bytes
1326 //
1327 auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1328 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1329 OutStreamer->emitLabel(CurSled);
1330
1331 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1332 // an operand (computed as an offset from the jmp instruction).
1333 // FIXME: Find another less hacky way do force the relative jump.
1334 OutStreamer->emitBytes("\xeb\x09");
1335 emitX86Nops(*OutStreamer, 9, Subtarget);
1336 recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2);
1337}
1338
1339void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1340 X86MCInstLower &MCIL) {
1341 NoAutoPaddingScope NoPadScope(*OutStreamer);
1342
1343 // Since PATCHABLE_RET takes the opcode of the return statement as an
1344 // argument, we use that to emit the correct form of the RET that we want.
1345 // i.e. when we see this:
1346 //
1347 // PATCHABLE_RET X86::RET ...
1348 //
1349 // We should emit the RET followed by sleds.
1350 //
1351 // .p2align 1, ...
1352 // .Lxray_sled_N:
1353 // ret # or equivalent instruction
1354 // # 10 bytes worth of noops
1355 //
1356 // This just makes sure that the alignment for the next instruction is 2.
1357 auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1358 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1359 OutStreamer->emitLabel(CurSled);
1360 unsigned OpCode = MI.getOperand(0).getImm();
1361 MCInst Ret;
1362 Ret.setOpcode(OpCode);
1363 for (auto &MO : drop_begin(MI.operands()))
1364 if (auto Op = MCIL.LowerMachineOperand(&MI, MO); Op.isValid())
1365 Ret.addOperand(Op);
1366 OutStreamer->emitInstruction(Ret, getSubtargetInfo());
1367 emitX86Nops(*OutStreamer, 10, Subtarget);
1368 recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2);
1369}
1370
1371void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1372 X86MCInstLower &MCIL) {
1373 MCInst TC;
1374 TC.setOpcode(convertTailJumpOpcode(MI.getOperand(0).getImm()));
1375 // Drop the tail jump opcode.
1376 auto TCOperands = drop_begin(MI.operands());
1377 bool IsConditional = TC.getOpcode() == X86::JCC_1;
1378 MCSymbol *FallthroughLabel;
1379 if (IsConditional) {
1380 // Rewrite:
1381 // je target
1382 //
1383 // To:
1384 // jne .fallthrough
1385 // .p2align 1, ...
1386 // .Lxray_sled_N:
1387 // SLED_CODE
1388 // jmp target
1389 // .fallthrough:
1390 FallthroughLabel = OutContext.createTempSymbol();
1391 EmitToStreamer(
1392 *OutStreamer,
1393 MCInstBuilder(X86::JCC_1)
1394 .addExpr(MCSymbolRefExpr::create(FallthroughLabel, OutContext))
1396 static_cast<X86::CondCode>(MI.getOperand(2).getImm()))));
1397 TC.setOpcode(X86::JMP_1);
1398 // Drop the condition code.
1399 TCOperands = drop_end(TCOperands);
1400 }
1401
1402 NoAutoPaddingScope NoPadScope(*OutStreamer);
1403
1404 // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1405 // instruction so we lower that particular instruction and its operands.
1406 // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1407 // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1408 // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1409 // tail call much like how we have it in PATCHABLE_RET.
1410 auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1411 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1412 OutStreamer->emitLabel(CurSled);
1413 auto Target = OutContext.createTempSymbol();
1414
1415 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1416 // an operand (computed as an offset from the jmp instruction).
1417 // FIXME: Find another less hacky way do force the relative jump.
1418 OutStreamer->emitBytes("\xeb\x09");
1419 emitX86Nops(*OutStreamer, 9, Subtarget);
1420 OutStreamer->emitLabel(Target);
1421 recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);
1422
1423 // Before emitting the instruction, add a comment to indicate that this is
1424 // indeed a tail call.
1425 OutStreamer->AddComment("TAILCALL");
1426 for (auto &MO : TCOperands)
1427 if (auto Op = MCIL.LowerMachineOperand(&MI, MO); Op.isValid())
1428 TC.addOperand(Op);
1429 OutStreamer->emitInstruction(TC, getSubtargetInfo());
1430
1431 if (IsConditional)
1432 OutStreamer->emitLabel(FallthroughLabel);
1433}
1434
1435static unsigned getSrcIdx(const MachineInstr* MI, unsigned SrcIdx) {
1436 if (X86II::isKMasked(MI->getDesc().TSFlags)) {
1437 // Skip mask operand.
1438 ++SrcIdx;
1439 if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
1440 // Skip passthru operand.
1441 ++SrcIdx;
1442 }
1443 }
1444 return SrcIdx;
1445}
1446
1448 unsigned SrcOpIdx) {
1449 const MachineOperand &DstOp = MI->getOperand(0);
1451
1452 // Handle AVX512 MASK/MASXZ write mask comments.
1453 // MASK: zmmX {%kY}
1454 // MASKZ: zmmX {%kY} {z}
1455 if (X86II::isKMasked(MI->getDesc().TSFlags)) {
1456 const MachineOperand &WriteMaskOp = MI->getOperand(SrcOpIdx - 1);
1458 CS << " {%" << Mask << "}";
1459 if (!X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
1460 CS << " {z}";
1461 }
1462 }
1463}
1464
1465static void printShuffleMask(raw_ostream &CS, StringRef Src1Name,
1466 StringRef Src2Name, ArrayRef<int> Mask) {
1467 // One source operand, fix the mask to print all elements in one span.
1468 SmallVector<int, 8> ShuffleMask(Mask);
1469 if (Src1Name == Src2Name)
1470 for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
1471 if (ShuffleMask[i] >= e)
1472 ShuffleMask[i] -= e;
1473
1474 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
1475 if (i != 0)
1476 CS << ",";
1477 if (ShuffleMask[i] == SM_SentinelZero) {
1478 CS << "zero";
1479 continue;
1480 }
1481
1482 // Otherwise, it must come from src1 or src2. Print the span of elements
1483 // that comes from this src.
1484 bool isSrc1 = ShuffleMask[i] < (int)e;
1485 CS << (isSrc1 ? Src1Name : Src2Name) << '[';
1486
1487 bool IsFirst = true;
1488 while (i != e && ShuffleMask[i] != SM_SentinelZero &&
1489 (ShuffleMask[i] < (int)e) == isSrc1) {
1490 if (!IsFirst)
1491 CS << ',';
1492 else
1493 IsFirst = false;
1494 if (ShuffleMask[i] == SM_SentinelUndef)
1495 CS << "u";
1496 else
1497 CS << ShuffleMask[i] % (int)e;
1498 ++i;
1499 }
1500 CS << ']';
1501 --i; // For loop increments element #.
1502 }
1503}
1504
1505static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
1506 unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1507 std::string Comment;
1508
1509 const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
1510 const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
1511 StringRef Src1Name = SrcOp1.isReg()
1513 : "mem";
1514 StringRef Src2Name = SrcOp2.isReg()
1516 : "mem";
1517
1518 raw_string_ostream CS(Comment);
1519 printDstRegisterName(CS, MI, SrcOp1Idx);
1520 CS << " = ";
1521 printShuffleMask(CS, Src1Name, Src2Name, Mask);
1522
1523 return Comment;
1524}
1525
1526static void printConstant(const APInt &Val, raw_ostream &CS,
1527 bool PrintZero = false) {
1528 if (Val.getBitWidth() <= 64) {
1529 CS << (PrintZero ? 0ULL : Val.getZExtValue());
1530 } else {
1531 // print multi-word constant as (w0,w1)
1532 CS << "(";
1533 for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
1534 if (i > 0)
1535 CS << ",";
1536 CS << (PrintZero ? 0ULL : Val.getRawData()[i]);
1537 }
1538 CS << ")";
1539 }
1540}
1541
1542static void printConstant(const APFloat &Flt, raw_ostream &CS,
1543 bool PrintZero = false) {
1544 SmallString<32> Str;
1545 // Force scientific notation to distinguish from integers.
1546 if (PrintZero)
1547 APFloat::getZero(Flt.getSemantics()).toString(Str, 0, 0);
1548 else
1549 Flt.toString(Str, 0, 0);
1550 CS << Str;
1551}
1552
1553static void printConstant(const Constant *COp, unsigned BitWidth,
1554 raw_ostream &CS, bool PrintZero = false) {
1555 if (isa<UndefValue>(COp)) {
1556 CS << "u";
1557 } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
1558 if (auto VTy = dyn_cast<FixedVectorType>(CI->getType())) {
1559 for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) {
1560 if (I != 0)
1561 CS << ',';
1562 printConstant(CI->getValue(), CS, PrintZero);
1563 }
1564 } else
1565 printConstant(CI->getValue(), CS, PrintZero);
1566 } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
1567 if (auto VTy = dyn_cast<FixedVectorType>(CF->getType())) {
1568 unsigned EltBits = VTy->getScalarSizeInBits();
1569 unsigned E = std::min(BitWidth / EltBits, VTy->getNumElements());
1570 if ((BitWidth % EltBits) == 0) {
1571 for (unsigned I = 0; I != E; ++I) {
1572 if (I != 0)
1573 CS << ",";
1574 printConstant(CF->getValueAPF(), CS, PrintZero);
1575 }
1576 } else {
1577 CS << "?";
1578 }
1579 } else
1580 printConstant(CF->getValueAPF(), CS, PrintZero);
1581 } else if (auto *CDS = dyn_cast<ConstantDataSequential>(COp)) {
1582 Type *EltTy = CDS->getElementType();
1583 bool IsInteger = EltTy->isIntegerTy();
1584 bool IsFP = EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy();
1585 unsigned EltBits = EltTy->getPrimitiveSizeInBits();
1586 unsigned E = std::min(BitWidth / EltBits, (unsigned)CDS->getNumElements());
1587 if ((BitWidth % EltBits) == 0) {
1588 for (unsigned I = 0; I != E; ++I) {
1589 if (I != 0)
1590 CS << ",";
1591 if (IsInteger)
1592 printConstant(CDS->getElementAsAPInt(I), CS, PrintZero);
1593 else if (IsFP)
1594 printConstant(CDS->getElementAsAPFloat(I), CS, PrintZero);
1595 else
1596 CS << "?";
1597 }
1598 } else {
1599 CS << "?";
1600 }
1601 } else if (auto *CV = dyn_cast<ConstantVector>(COp)) {
1602 unsigned EltBits = CV->getType()->getScalarSizeInBits();
1603 unsigned E = std::min(BitWidth / EltBits, CV->getNumOperands());
1604 if ((BitWidth % EltBits) == 0) {
1605 for (unsigned I = 0; I != E; ++I) {
1606 if (I != 0)
1607 CS << ",";
1608 printConstant(CV->getOperand(I), EltBits, CS, PrintZero);
1609 }
1610 } else {
1611 CS << "?";
1612 }
1613 } else {
1614 CS << "?";
1615 }
1616}
1617
1618static void printZeroUpperMove(const MachineInstr *MI, MCStreamer &OutStreamer,
1619 int SclWidth, int VecWidth,
1620 const char *ShuffleComment) {
1621 unsigned SrcIdx = getSrcIdx(MI, 1);
1622
1623 std::string Comment;
1624 raw_string_ostream CS(Comment);
1625 printDstRegisterName(CS, MI, SrcIdx);
1626 CS << " = ";
1627
1628 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx)) {
1629 CS << "[";
1630 printConstant(C, SclWidth, CS);
1631 for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) {
1632 CS << ",";
1633 printConstant(C, SclWidth, CS, true);
1634 }
1635 CS << "]";
1636 OutStreamer.AddComment(CS.str());
1637 return; // early-out
1638 }
1639
1640 // We didn't find a constant load, fallback to a shuffle mask decode.
1641 CS << ShuffleComment;
1642 OutStreamer.AddComment(CS.str());
1643}
1644
1645static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer,
1646 int Repeats, int BitWidth) {
1647 unsigned SrcIdx = getSrcIdx(MI, 1);
1648 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx)) {
1649 std::string Comment;
1650 raw_string_ostream CS(Comment);
1651 printDstRegisterName(CS, MI, SrcIdx);
1652 CS << " = [";
1653 for (int l = 0; l != Repeats; ++l) {
1654 if (l != 0)
1655 CS << ",";
1656 printConstant(C, BitWidth, CS);
1657 }
1658 CS << "]";
1659 OutStreamer.AddComment(CS.str());
1660 }
1661}
1662
1663static bool printExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1664 int SrcEltBits, int DstEltBits, bool IsSext) {
1665 unsigned SrcIdx = getSrcIdx(MI, 1);
1666 auto *C = X86::getConstantFromPool(*MI, SrcIdx);
1667 if (C && C->getType()->getScalarSizeInBits() == unsigned(SrcEltBits)) {
1668 if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
1669 int NumElts = CDS->getNumElements();
1670 std::string Comment;
1671 raw_string_ostream CS(Comment);
1672 printDstRegisterName(CS, MI, SrcIdx);
1673 CS << " = [";
1674 for (int i = 0; i != NumElts; ++i) {
1675 if (i != 0)
1676 CS << ",";
1677 if (CDS->getElementType()->isIntegerTy()) {
1678 APInt Elt = CDS->getElementAsAPInt(i);
1679 Elt = IsSext ? Elt.sext(DstEltBits) : Elt.zext(DstEltBits);
1680 printConstant(Elt, CS);
1681 } else
1682 CS << "?";
1683 }
1684 CS << "]";
1685 OutStreamer.AddComment(CS.str());
1686 return true;
1687 }
1688 }
1689
1690 return false;
1691}
1692static void printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1693 int SrcEltBits, int DstEltBits) {
1694 printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, true);
1695}
1696static void printZeroExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1697 int SrcEltBits, int DstEltBits) {
1698 if (printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, false))
1699 return;
1700
1701 // We didn't find a constant load, fallback to a shuffle mask decode.
1702 std::string Comment;
1703 raw_string_ostream CS(Comment);
1705 CS << " = ";
1706
1707 SmallVector<int> Mask;
1708 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1709 assert((Width % DstEltBits) == 0 && (DstEltBits % SrcEltBits) == 0 &&
1710 "Illegal extension ratio");
1711 DecodeZeroExtendMask(SrcEltBits, DstEltBits, Width / DstEltBits, false, Mask);
1712 printShuffleMask(CS, "mem", "", Mask);
1713
1714 OutStreamer.AddComment(CS.str());
1715}
1716
1717void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
1718 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1719 assert((getSubtarget().isOSWindows() || getSubtarget().isUEFI()) &&
1720 "SEH_ instruction Windows and UEFI only");
1721
1722 // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1723 if (EmitFPOData) {
1724 X86TargetStreamer *XTS =
1725 static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
1726 switch (MI->getOpcode()) {
1727 case X86::SEH_PushReg:
1728 XTS->emitFPOPushReg(MI->getOperand(0).getImm());
1729 break;
1730 case X86::SEH_StackAlloc:
1731 XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
1732 break;
1733 case X86::SEH_StackAlign:
1734 XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
1735 break;
1736 case X86::SEH_SetFrame:
1737 assert(MI->getOperand(1).getImm() == 0 &&
1738 ".cv_fpo_setframe takes no offset");
1739 XTS->emitFPOSetFrame(MI->getOperand(0).getImm());
1740 break;
1741 case X86::SEH_EndPrologue:
1742 XTS->emitFPOEndPrologue();
1743 break;
1744 case X86::SEH_SaveReg:
1745 case X86::SEH_SaveXMM:
1746 case X86::SEH_PushFrame:
1747 llvm_unreachable("SEH_ directive incompatible with FPO");
1748 break;
1749 default:
1750 llvm_unreachable("expected SEH_ instruction");
1751 }
1752 return;
1753 }
1754
1755 // Otherwise, use the .seh_ directives for all other Windows platforms.
1756 switch (MI->getOpcode()) {
1757 case X86::SEH_PushReg:
1758 OutStreamer->emitWinCFIPushReg(MI->getOperand(0).getImm());
1759 break;
1760
1761 case X86::SEH_SaveReg:
1762 OutStreamer->emitWinCFISaveReg(MI->getOperand(0).getImm(),
1763 MI->getOperand(1).getImm());
1764 break;
1765
1766 case X86::SEH_SaveXMM:
1767 OutStreamer->emitWinCFISaveXMM(MI->getOperand(0).getImm(),
1768 MI->getOperand(1).getImm());
1769 break;
1770
1771 case X86::SEH_StackAlloc:
1772 OutStreamer->emitWinCFIAllocStack(MI->getOperand(0).getImm());
1773 break;
1774
1775 case X86::SEH_SetFrame:
1776 OutStreamer->emitWinCFISetFrame(MI->getOperand(0).getImm(),
1777 MI->getOperand(1).getImm());
1778 break;
1779
1780 case X86::SEH_PushFrame:
1781 OutStreamer->emitWinCFIPushFrame(MI->getOperand(0).getImm());
1782 break;
1783
1784 case X86::SEH_EndPrologue:
1785 OutStreamer->emitWinCFIEndProlog();
1786 break;
1787
1788 case X86::SEH_BeginEpilogue:
1789 OutStreamer->emitWinCFIBeginEpilogue();
1790 break;
1791
1792 case X86::SEH_EndEpilogue:
1793 OutStreamer->emitWinCFIEndEpilogue();
1794 break;
1795
1796 case X86::SEH_UnwindV2Start:
1797 OutStreamer->emitWinCFIUnwindV2Start();
1798 break;
1799
1800 case X86::SEH_UnwindVersion:
1801 OutStreamer->emitWinCFIUnwindVersion(MI->getOperand(0).getImm());
1802 break;
1803
1804 case X86::SEH_SplitChained:
1805 OutStreamer->emitWinCFISplitChained();
1806 break;
1807
1808 default:
1809 llvm_unreachable("expected SEH_ instruction");
1810 }
1811}
1812
1814 MCStreamer &OutStreamer) {
1815 switch (MI->getOpcode()) {
1816 // Lower PSHUFB and VPERMILP normally but add a comment if we can find
1817 // a constant shuffle mask. We won't be able to do this at the MC layer
1818 // because the mask isn't an immediate.
1819 case X86::PSHUFBrm:
1820 case X86::VPSHUFBrm:
1821 case X86::VPSHUFBYrm:
1822 case X86::VPSHUFBZ128rm:
1823 case X86::VPSHUFBZ128rmk:
1824 case X86::VPSHUFBZ128rmkz:
1825 case X86::VPSHUFBZ256rm:
1826 case X86::VPSHUFBZ256rmk:
1827 case X86::VPSHUFBZ256rmkz:
1828 case X86::VPSHUFBZrm:
1829 case X86::VPSHUFBZrmk:
1830 case X86::VPSHUFBZrmkz: {
1831 unsigned SrcIdx = getSrcIdx(MI, 1);
1832 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
1833 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1835 DecodePSHUFBMask(C, Width, Mask);
1836 if (!Mask.empty())
1837 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
1838 }
1839 break;
1840 }
1841
1842 case X86::VPERMILPSrm:
1843 case X86::VPERMILPSYrm:
1844 case X86::VPERMILPSZ128rm:
1845 case X86::VPERMILPSZ128rmk:
1846 case X86::VPERMILPSZ128rmkz:
1847 case X86::VPERMILPSZ256rm:
1848 case X86::VPERMILPSZ256rmk:
1849 case X86::VPERMILPSZ256rmkz:
1850 case X86::VPERMILPSZrm:
1851 case X86::VPERMILPSZrmk:
1852 case X86::VPERMILPSZrmkz: {
1853 unsigned SrcIdx = getSrcIdx(MI, 1);
1854 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
1855 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1857 DecodeVPERMILPMask(C, 32, Width, Mask);
1858 if (!Mask.empty())
1859 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
1860 }
1861 break;
1862 }
1863 case X86::VPERMILPDrm:
1864 case X86::VPERMILPDYrm:
1865 case X86::VPERMILPDZ128rm:
1866 case X86::VPERMILPDZ128rmk:
1867 case X86::VPERMILPDZ128rmkz:
1868 case X86::VPERMILPDZ256rm:
1869 case X86::VPERMILPDZ256rmk:
1870 case X86::VPERMILPDZ256rmkz:
1871 case X86::VPERMILPDZrm:
1872 case X86::VPERMILPDZrmk:
1873 case X86::VPERMILPDZrmkz: {
1874 unsigned SrcIdx = getSrcIdx(MI, 1);
1875 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
1876 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1878 DecodeVPERMILPMask(C, 64, Width, Mask);
1879 if (!Mask.empty())
1880 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
1881 }
1882 break;
1883 }
1884
1885 case X86::VPERMIL2PDrm:
1886 case X86::VPERMIL2PSrm:
1887 case X86::VPERMIL2PDYrm:
1888 case X86::VPERMIL2PSYrm: {
1889 assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) &&
1890 "Unexpected number of operands!");
1891
1892 const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
1893 if (!CtrlOp.isImm())
1894 break;
1895
1896 unsigned ElSize;
1897 switch (MI->getOpcode()) {
1898 default: llvm_unreachable("Invalid opcode");
1899 case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
1900 case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
1901 }
1902
1903 if (auto *C = X86::getConstantFromPool(*MI, 3)) {
1904 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1906 DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
1907 if (!Mask.empty())
1908 OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
1909 }
1910 break;
1911 }
1912
1913 case X86::VPPERMrrm: {
1914 if (auto *C = X86::getConstantFromPool(*MI, 3)) {
1915 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1917 DecodeVPPERMMask(C, Width, Mask);
1918 if (!Mask.empty())
1919 OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
1920 }
1921 break;
1922 }
1923
1924 case X86::MMX_MOVQ64rm: {
1925 if (auto *C = X86::getConstantFromPool(*MI, 1)) {
1926 std::string Comment;
1927 raw_string_ostream CS(Comment);
1928 const MachineOperand &DstOp = MI->getOperand(0);
1930 if (auto *CF = dyn_cast<ConstantFP>(C)) {
1931 CS << "0x" << toString(CF->getValueAPF().bitcastToAPInt(), 16, false);
1932 OutStreamer.AddComment(CS.str());
1933 }
1934 }
1935 break;
1936 }
1937
1938#define INSTR_CASE(Prefix, Instr, Suffix, Postfix) \
1939 case X86::Prefix##Instr##Suffix##rm##Postfix:
1940
1941#define CASE_AVX512_ARITH_RM(Instr) \
1942 INSTR_CASE(V, Instr, Z128, ) \
1943 INSTR_CASE(V, Instr, Z128, k) \
1944 INSTR_CASE(V, Instr, Z128, kz) \
1945 INSTR_CASE(V, Instr, Z256, ) \
1946 INSTR_CASE(V, Instr, Z256, k) \
1947 INSTR_CASE(V, Instr, Z256, kz) \
1948 INSTR_CASE(V, Instr, Z, ) \
1949 INSTR_CASE(V, Instr, Z, k) \
1950 INSTR_CASE(V, Instr, Z, kz)
1951
1952#define CASE_ARITH_RM(Instr) \
1953 INSTR_CASE(, Instr, , ) /* SSE */ \
1954 INSTR_CASE(V, Instr, , ) /* AVX-128 */ \
1955 INSTR_CASE(V, Instr, Y, ) /* AVX-256 */ \
1956 INSTR_CASE(V, Instr, Z128, ) \
1957 INSTR_CASE(V, Instr, Z128, k) \
1958 INSTR_CASE(V, Instr, Z128, kz) \
1959 INSTR_CASE(V, Instr, Z256, ) \
1960 INSTR_CASE(V, Instr, Z256, k) \
1961 INSTR_CASE(V, Instr, Z256, kz) \
1962 INSTR_CASE(V, Instr, Z, ) \
1963 INSTR_CASE(V, Instr, Z, k) \
1964 INSTR_CASE(V, Instr, Z, kz)
1965
1966 // TODO: Add additional instructions when useful.
1967 CASE_ARITH_RM(PMADDUBSW)
1968 CASE_ARITH_RM(PMADDWD)
1969 CASE_ARITH_RM(PMULDQ)
1970 CASE_ARITH_RM(PMULUDQ)
1971 CASE_ARITH_RM(PMULLD)
1972 CASE_AVX512_ARITH_RM(PMULLQ)
1973 CASE_ARITH_RM(PMULLW)
1974 CASE_ARITH_RM(PMULHW)
1975 CASE_ARITH_RM(PMULHUW)
1976 CASE_ARITH_RM(PMULHRSW) {
1977 unsigned SrcIdx = getSrcIdx(MI, 1);
1978 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
1979 std::string Comment;
1980 raw_string_ostream CS(Comment);
1981 unsigned VectorWidth =
1982 X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1983 CS << "[";
1984 printConstant(C, VectorWidth, CS);
1985 CS << "]";
1986 OutStreamer.AddComment(CS.str());
1987 }
1988 break;
1989 }
1990
1991#define MASK_AVX512_CASE(Instr) \
1992 case Instr: \
1993 case Instr##k: \
1994 case Instr##kz:
1995
1996 case X86::MOVSDrm:
1997 case X86::VMOVSDrm:
1998 MASK_AVX512_CASE(X86::VMOVSDZrm)
1999 case X86::MOVSDrm_alt:
2000 case X86::VMOVSDrm_alt:
2001 case X86::VMOVSDZrm_alt:
2002 case X86::MOVQI2PQIrm:
2003 case X86::VMOVQI2PQIrm:
2004 case X86::VMOVQI2PQIZrm:
2005 printZeroUpperMove(MI, OutStreamer, 64, 128, "mem[0],zero");
2006 break;
2007
2008 MASK_AVX512_CASE(X86::VMOVSHZrm)
2009 case X86::VMOVSHZrm_alt:
2010 printZeroUpperMove(MI, OutStreamer, 16, 128,
2011 "mem[0],zero,zero,zero,zero,zero,zero,zero");
2012 break;
2013
2014 case X86::MOVSSrm:
2015 case X86::VMOVSSrm:
2016 MASK_AVX512_CASE(X86::VMOVSSZrm)
2017 case X86::MOVSSrm_alt:
2018 case X86::VMOVSSrm_alt:
2019 case X86::VMOVSSZrm_alt:
2020 case X86::MOVDI2PDIrm:
2021 case X86::VMOVDI2PDIrm:
2022 case X86::VMOVDI2PDIZrm:
2023 printZeroUpperMove(MI, OutStreamer, 32, 128, "mem[0],zero,zero,zero");
2024 break;
2025
2026#define MOV_CASE(Prefix, Suffix) \
2027 case X86::Prefix##MOVAPD##Suffix##rm: \
2028 case X86::Prefix##MOVAPS##Suffix##rm: \
2029 case X86::Prefix##MOVUPD##Suffix##rm: \
2030 case X86::Prefix##MOVUPS##Suffix##rm: \
2031 case X86::Prefix##MOVDQA##Suffix##rm: \
2032 case X86::Prefix##MOVDQU##Suffix##rm:
2033
2034#define MOV_AVX512_CASE(Suffix, Postfix) \
2035 case X86::VMOVDQA64##Suffix##rm##Postfix: \
2036 case X86::VMOVDQA32##Suffix##rm##Postfix: \
2037 case X86::VMOVDQU64##Suffix##rm##Postfix: \
2038 case X86::VMOVDQU32##Suffix##rm##Postfix: \
2039 case X86::VMOVDQU16##Suffix##rm##Postfix: \
2040 case X86::VMOVDQU8##Suffix##rm##Postfix: \
2041 case X86::VMOVAPS##Suffix##rm##Postfix: \
2042 case X86::VMOVAPD##Suffix##rm##Postfix: \
2043 case X86::VMOVUPS##Suffix##rm##Postfix: \
2044 case X86::VMOVUPD##Suffix##rm##Postfix:
2045
2046#define CASE_128_MOV_RM() \
2047 MOV_CASE(, ) /* SSE */ \
2048 MOV_CASE(V, ) /* AVX-128 */ \
2049 MOV_AVX512_CASE(Z128, ) \
2050 MOV_AVX512_CASE(Z128, k) \
2051 MOV_AVX512_CASE(Z128, kz)
2052
2053#define CASE_256_MOV_RM() \
2054 MOV_CASE(V, Y) /* AVX-256 */ \
2055 MOV_AVX512_CASE(Z256, ) \
2056 MOV_AVX512_CASE(Z256, k) \
2057 MOV_AVX512_CASE(Z256, kz) \
2058
2059#define CASE_512_MOV_RM() \
2060 MOV_AVX512_CASE(Z, ) \
2061 MOV_AVX512_CASE(Z, k) \
2062 MOV_AVX512_CASE(Z, kz) \
2063
2064 // For loads from a constant pool to a vector register, print the constant
2065 // loaded.
2067 printBroadcast(MI, OutStreamer, 1, 128);
2068 break;
2070 printBroadcast(MI, OutStreamer, 1, 256);
2071 break;
2073 printBroadcast(MI, OutStreamer, 1, 512);
2074 break;
2075 case X86::VBROADCASTF128rm:
2076 case X86::VBROADCASTI128rm:
2077 MASK_AVX512_CASE(X86::VBROADCASTF32X4Z256rm)
2078 MASK_AVX512_CASE(X86::VBROADCASTF64X2Z256rm)
2079 MASK_AVX512_CASE(X86::VBROADCASTI32X4Z256rm)
2080 MASK_AVX512_CASE(X86::VBROADCASTI64X2Z256rm)
2081 printBroadcast(MI, OutStreamer, 2, 128);
2082 break;
2083 MASK_AVX512_CASE(X86::VBROADCASTF32X4Zrm)
2084 MASK_AVX512_CASE(X86::VBROADCASTF64X2Zrm)
2085 MASK_AVX512_CASE(X86::VBROADCASTI32X4Zrm)
2086 MASK_AVX512_CASE(X86::VBROADCASTI64X2Zrm)
2087 printBroadcast(MI, OutStreamer, 4, 128);
2088 break;
2089 MASK_AVX512_CASE(X86::VBROADCASTF32X8Zrm)
2090 MASK_AVX512_CASE(X86::VBROADCASTF64X4Zrm)
2091 MASK_AVX512_CASE(X86::VBROADCASTI32X8Zrm)
2092 MASK_AVX512_CASE(X86::VBROADCASTI64X4Zrm)
2093 printBroadcast(MI, OutStreamer, 2, 256);
2094 break;
2095
2096 // For broadcast loads from a constant pool to a vector register, repeatedly
2097 // print the constant loaded.
2098 case X86::MOVDDUPrm:
2099 case X86::VMOVDDUPrm:
2100 MASK_AVX512_CASE(X86::VMOVDDUPZ128rm)
2101 case X86::VPBROADCASTQrm:
2102 MASK_AVX512_CASE(X86::VPBROADCASTQZ128rm)
2103 printBroadcast(MI, OutStreamer, 2, 64);
2104 break;
2105 case X86::VBROADCASTSDYrm:
2106 MASK_AVX512_CASE(X86::VBROADCASTSDZ256rm)
2107 case X86::VPBROADCASTQYrm:
2108 MASK_AVX512_CASE(X86::VPBROADCASTQZ256rm)
2109 printBroadcast(MI, OutStreamer, 4, 64);
2110 break;
2111 MASK_AVX512_CASE(X86::VBROADCASTSDZrm)
2112 MASK_AVX512_CASE(X86::VPBROADCASTQZrm)
2113 printBroadcast(MI, OutStreamer, 8, 64);
2114 break;
2115 case X86::VBROADCASTSSrm:
2116 MASK_AVX512_CASE(X86::VBROADCASTSSZ128rm)
2117 case X86::VPBROADCASTDrm:
2118 MASK_AVX512_CASE(X86::VPBROADCASTDZ128rm)
2119 printBroadcast(MI, OutStreamer, 4, 32);
2120 break;
2121 case X86::VBROADCASTSSYrm:
2122 MASK_AVX512_CASE(X86::VBROADCASTSSZ256rm)
2123 case X86::VPBROADCASTDYrm:
2124 MASK_AVX512_CASE(X86::VPBROADCASTDZ256rm)
2125 printBroadcast(MI, OutStreamer, 8, 32);
2126 break;
2127 MASK_AVX512_CASE(X86::VBROADCASTSSZrm)
2128 MASK_AVX512_CASE(X86::VPBROADCASTDZrm)
2129 printBroadcast(MI, OutStreamer, 16, 32);
2130 break;
2131 case X86::VPBROADCASTWrm:
2132 MASK_AVX512_CASE(X86::VPBROADCASTWZ128rm)
2133 printBroadcast(MI, OutStreamer, 8, 16);
2134 break;
2135 case X86::VPBROADCASTWYrm:
2136 MASK_AVX512_CASE(X86::VPBROADCASTWZ256rm)
2137 printBroadcast(MI, OutStreamer, 16, 16);
2138 break;
2139 MASK_AVX512_CASE(X86::VPBROADCASTWZrm)
2140 printBroadcast(MI, OutStreamer, 32, 16);
2141 break;
2142 case X86::VPBROADCASTBrm:
2143 MASK_AVX512_CASE(X86::VPBROADCASTBZ128rm)
2144 printBroadcast(MI, OutStreamer, 16, 8);
2145 break;
2146 case X86::VPBROADCASTBYrm:
2147 MASK_AVX512_CASE(X86::VPBROADCASTBZ256rm)
2148 printBroadcast(MI, OutStreamer, 32, 8);
2149 break;
2150 MASK_AVX512_CASE(X86::VPBROADCASTBZrm)
2151 printBroadcast(MI, OutStreamer, 64, 8);
2152 break;
2153
2154#define MOVX_CASE(Prefix, Ext, Type, Suffix, Postfix) \
2155 case X86::Prefix##PMOV##Ext##Type##Suffix##rm##Postfix:
2156
2157#define CASE_MOVX_RM(Ext, Type) \
2158 MOVX_CASE(, Ext, Type, , ) \
2159 MOVX_CASE(V, Ext, Type, , ) \
2160 MOVX_CASE(V, Ext, Type, Y, ) \
2161 MOVX_CASE(V, Ext, Type, Z128, ) \
2162 MOVX_CASE(V, Ext, Type, Z128, k ) \
2163 MOVX_CASE(V, Ext, Type, Z128, kz ) \
2164 MOVX_CASE(V, Ext, Type, Z256, ) \
2165 MOVX_CASE(V, Ext, Type, Z256, k ) \
2166 MOVX_CASE(V, Ext, Type, Z256, kz ) \
2167 MOVX_CASE(V, Ext, Type, Z, ) \
2168 MOVX_CASE(V, Ext, Type, Z, k ) \
2169 MOVX_CASE(V, Ext, Type, Z, kz )
2170
2171 CASE_MOVX_RM(SX, BD)
2172 printSignExtend(MI, OutStreamer, 8, 32);
2173 break;
2174 CASE_MOVX_RM(SX, BQ)
2175 printSignExtend(MI, OutStreamer, 8, 64);
2176 break;
2177 CASE_MOVX_RM(SX, BW)
2178 printSignExtend(MI, OutStreamer, 8, 16);
2179 break;
2180 CASE_MOVX_RM(SX, DQ)
2181 printSignExtend(MI, OutStreamer, 32, 64);
2182 break;
2183 CASE_MOVX_RM(SX, WD)
2184 printSignExtend(MI, OutStreamer, 16, 32);
2185 break;
2186 CASE_MOVX_RM(SX, WQ)
2187 printSignExtend(MI, OutStreamer, 16, 64);
2188 break;
2189
2190 CASE_MOVX_RM(ZX, BD)
2191 printZeroExtend(MI, OutStreamer, 8, 32);
2192 break;
2193 CASE_MOVX_RM(ZX, BQ)
2194 printZeroExtend(MI, OutStreamer, 8, 64);
2195 break;
2196 CASE_MOVX_RM(ZX, BW)
2197 printZeroExtend(MI, OutStreamer, 8, 16);
2198 break;
2199 CASE_MOVX_RM(ZX, DQ)
2200 printZeroExtend(MI, OutStreamer, 32, 64);
2201 break;
2202 CASE_MOVX_RM(ZX, WD)
2203 printZeroExtend(MI, OutStreamer, 16, 32);
2204 break;
2205 CASE_MOVX_RM(ZX, WQ)
2206 printZeroExtend(MI, OutStreamer, 16, 64);
2207 break;
2208 }
2209}
2210
2211// Does the given operand refer to a DLLIMPORT function?
2213 return MO.isGlobal() && (MO.getTargetFlags() == X86II::MO_DLLIMPORT);
2214}
2215
2216// Is the given instruction a call to a CFGuard function?
2218 assert(MI->getOpcode() == X86::TAILJMPm64_REX ||
2219 MI->getOpcode() == X86::CALL64m);
2220 const MachineOperand &MO = MI->getOperand(3);
2221 return MO.isGlobal() && (MO.getTargetFlags() == X86II::MO_NO_FLAG) &&
2223}
2224
2225// Does the containing block for the given instruction contain any jump table
2226// info (indicating that the block is a dispatch for a jump table)?
2228 const MachineBasicBlock &MBB = *MI->getParent();
2229 for (auto I = MBB.instr_rbegin(), E = MBB.instr_rend(); I != E; ++I)
2230 if (I->isJumpTableDebugInfo())
2231 return true;
2232
2233 return false;
2234}
2235
2237 // FIXME: Enable feature predicate checks once all the test pass.
2238 // X86_MC::verifyInstructionPredicates(MI->getOpcode(),
2239 // Subtarget->getFeatureBits());
2240
2241 X86MCInstLower MCInstLowering(*MF, *this);
2242 const X86RegisterInfo *RI =
2243 MF->getSubtarget<X86Subtarget>().getRegisterInfo();
2244
2245 if (MI->getOpcode() == X86::OR64rm) {
2246 for (auto &Opd : MI->operands()) {
2247 if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) ==
2248 "swift_async_extendedFramePointerFlags") {
2249 ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true;
2250 }
2251 }
2252 }
2253
2254 // Add comments for values loaded from constant pool.
2255 if (OutStreamer->isVerboseAsm())
2257
2258 // Add a comment about EVEX compression
2259 if (TM.Options.MCOptions.ShowMCEncoding) {
2260 if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY)
2261 OutStreamer->AddComment("EVEX TO LEGACY Compression ", false);
2262 else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
2263 OutStreamer->AddComment("EVEX TO VEX Compression ", false);
2264 else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_EVEX)
2265 OutStreamer->AddComment("EVEX TO EVEX Compression ", false);
2266 }
2267
2268 // We use this to suppress NOP padding for Windows EH.
2269 bool IsTailJump = false;
2270
2271 switch (MI->getOpcode()) {
2272 case TargetOpcode::DBG_VALUE:
2273 llvm_unreachable("Should be handled target independently");
2274
2275 case X86::EH_RETURN:
2276 case X86::EH_RETURN64: {
2277 // Lower these as normal, but add some comments.
2278 Register Reg = MI->getOperand(0).getReg();
2279 OutStreamer->AddComment(StringRef("eh_return, addr: %") +
2281 break;
2282 }
2283 case X86::CLEANUPRET: {
2284 // Lower these as normal, but add some comments.
2285 OutStreamer->AddComment("CLEANUPRET");
2286 break;
2287 }
2288
2289 case X86::CATCHRET: {
2290 // Lower these as normal, but add some comments.
2291 OutStreamer->AddComment("CATCHRET");
2292 break;
2293 }
2294
2295 case X86::ENDBR32:
2296 case X86::ENDBR64: {
2297 // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2298 // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2299 // non-empty. If MI is the initial ENDBR, place the
2300 // __patchable_function_entries label after ENDBR.
2303 MI == &MF->front().front()) {
2304 MCInst Inst;
2305 MCInstLowering.Lower(MI, Inst);
2306 EmitAndCountInstruction(Inst);
2309 return;
2310 }
2311 break;
2312 }
2313
2314 case X86::TAILJMPd64:
2315 if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11))
2316 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX));
2317
2318 if (EnableImportCallOptimization && isImportedFunction(MI->getOperand(0))) {
2319 emitLabelAndRecordForImportCallOptimization(
2320 IMAGE_RETPOLINE_AMD64_IMPORT_BR);
2321 }
2322
2323 // Lower this as normal, but add a comment.
2324 OutStreamer->AddComment("TAILCALL");
2325 IsTailJump = true;
2326 break;
2327
2328 case X86::TAILJMPr:
2329 case X86::TAILJMPm:
2330 case X86::TAILJMPd:
2331 case X86::TAILJMPd_CC:
2332 case X86::TAILJMPr64:
2333 case X86::TAILJMPm64:
2334 case X86::TAILJMPd64_CC:
2335 if (EnableImportCallOptimization)
2336 report_fatal_error("Unexpected TAILJMP instruction was emitted when "
2337 "import call optimization was enabled");
2338
2339 // Lower these as normal, but add some comments.
2340 OutStreamer->AddComment("TAILCALL");
2341 IsTailJump = true;
2342 break;
2343
2344 case X86::TAILJMPm64_REX:
2345 if (EnableImportCallOptimization && isCallToCFGuardFunction(MI)) {
2346 emitLabelAndRecordForImportCallOptimization(
2347 IMAGE_RETPOLINE_AMD64_CFG_BR_REX);
2348 }
2349
2350 OutStreamer->AddComment("TAILCALL");
2351 IsTailJump = true;
2352 break;
2353
2354 case X86::TAILJMPr64_REX: {
2355 if (EnableImportCallOptimization) {
2356 assert(MI->getOperand(0).getReg() == X86::RAX &&
2357 "Indirect tail calls with impcall enabled must go through RAX (as "
2358 "enforced by TCRETURNImpCallri64)");
2359 emitLabelAndRecordForImportCallOptimization(
2360 IMAGE_RETPOLINE_AMD64_INDIR_BR);
2361 }
2362
2363 OutStreamer->AddComment("TAILCALL");
2364 IsTailJump = true;
2365 break;
2366 }
2367
2368 case X86::JMP64r:
2369 if (EnableImportCallOptimization && hasJumpTableInfoInBlock(MI)) {
2370 uint16_t EncodedReg =
2371 this->getSubtarget().getRegisterInfo()->getEncodingValue(
2372 MI->getOperand(0).getReg().asMCReg());
2373 emitLabelAndRecordForImportCallOptimization(
2374 (ImportCallKind)(IMAGE_RETPOLINE_AMD64_SWITCHTABLE_FIRST +
2375 EncodedReg));
2376 }
2377 break;
2378
2379 case X86::JMP16r:
2380 case X86::JMP16m:
2381 case X86::JMP32r:
2382 case X86::JMP32m:
2383 case X86::JMP64m:
2384 if (EnableImportCallOptimization && hasJumpTableInfoInBlock(MI))
2386 "Unexpected JMP instruction was emitted for a jump-table when import "
2387 "call optimization was enabled");
2388 break;
2389
2390 case X86::TLS_addr32:
2391 case X86::TLS_addr64:
2392 case X86::TLS_addrX32:
2393 case X86::TLS_base_addr32:
2394 case X86::TLS_base_addr64:
2395 case X86::TLS_base_addrX32:
2396 case X86::TLS_desc32:
2397 case X86::TLS_desc64:
2398 return LowerTlsAddr(MCInstLowering, *MI);
2399
2400 case X86::MOVPC32r: {
2401 // This is a pseudo op for a two instruction sequence with a label, which
2402 // looks like:
2403 // call "L1$pb"
2404 // "L1$pb":
2405 // popl %esi
2406
2407 // Emit the call.
2408 MCSymbol *PICBase = MF->getPICBaseSymbol();
2409 // FIXME: We would like an efficient form for this, so we don't have to do a
2410 // lot of extra uniquing.
2411 EmitAndCountInstruction(
2412 MCInstBuilder(X86::CALLpcrel32)
2413 .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
2414
2415 const X86FrameLowering *FrameLowering =
2416 MF->getSubtarget<X86Subtarget>().getFrameLowering();
2417 bool hasFP = FrameLowering->hasFP(*MF);
2418
2419 // TODO: This is needed only if we require precise CFA.
2420 bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
2421 !OutStreamer->getDwarfFrameInfos().back().End;
2422
2423 int stackGrowth = -RI->getSlotSize();
2424
2425 if (HasActiveDwarfFrame && !hasFP) {
2426 OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth);
2427 MF->getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
2428 }
2429
2430 // Emit the label.
2431 OutStreamer->emitLabel(PICBase);
2432
2433 // popl $reg
2434 EmitAndCountInstruction(
2435 MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
2436
2437 if (HasActiveDwarfFrame && !hasFP) {
2438 OutStreamer->emitCFIAdjustCfaOffset(stackGrowth);
2439 }
2440 return;
2441 }
2442
2443 case X86::ADD32ri: {
2444 // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2445 if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
2446 break;
2447
2448 // Okay, we have something like:
2449 // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2450
2451 // For this, we want to print something like:
2452 // MYGLOBAL + (. - PICBASE)
2453 // However, we can't generate a ".", so just emit a new label here and refer
2454 // to it.
2455 MCSymbol *DotSym = OutContext.createTempSymbol();
2456 OutStreamer->emitLabel(DotSym);
2457
2458 // Now that we have emitted the label, lower the complex operand expression.
2459 MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
2460
2461 const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
2462 const MCExpr *PICBase =
2463 MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
2464 DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
2465
2466 DotExpr = MCBinaryExpr::createAdd(
2468
2469 EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
2470 .addReg(MI->getOperand(0).getReg())
2471 .addReg(MI->getOperand(1).getReg())
2472 .addExpr(DotExpr));
2473 return;
2474 }
2475 case TargetOpcode::STATEPOINT:
2476 return LowerSTATEPOINT(*MI, MCInstLowering);
2477
2478 case TargetOpcode::FAULTING_OP:
2479 return LowerFAULTING_OP(*MI, MCInstLowering);
2480
2481 case TargetOpcode::FENTRY_CALL:
2482 return LowerFENTRY_CALL(*MI, MCInstLowering);
2483
2484 case TargetOpcode::PATCHABLE_OP:
2485 return LowerPATCHABLE_OP(*MI, MCInstLowering);
2486
2487 case TargetOpcode::STACKMAP:
2488 return LowerSTACKMAP(*MI);
2489
2490 case TargetOpcode::PATCHPOINT:
2491 return LowerPATCHPOINT(*MI, MCInstLowering);
2492
2493 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2494 return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
2495
2496 case TargetOpcode::PATCHABLE_RET:
2497 return LowerPATCHABLE_RET(*MI, MCInstLowering);
2498
2499 case TargetOpcode::PATCHABLE_TAIL_CALL:
2500 return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
2501
2502 case TargetOpcode::PATCHABLE_EVENT_CALL:
2503 return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
2504
2505 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2506 return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
2507
2508 case X86::MORESTACK_RET:
2509 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2510 return;
2511
2512 case X86::KCFI_CHECK:
2513 return LowerKCFI_CHECK(*MI);
2514
2515 case X86::ASAN_CHECK_MEMACCESS:
2516 return LowerASAN_CHECK_MEMACCESS(*MI);
2517
2518 case X86::MORESTACK_RET_RESTORE_R10:
2519 // Return, then restore R10.
2520 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2521 EmitAndCountInstruction(
2522 MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
2523 return;
2524
2525 case X86::SEH_PushReg:
2526 case X86::SEH_SaveReg:
2527 case X86::SEH_SaveXMM:
2528 case X86::SEH_StackAlloc:
2529 case X86::SEH_StackAlign:
2530 case X86::SEH_SetFrame:
2531 case X86::SEH_PushFrame:
2532 case X86::SEH_EndPrologue:
2533 case X86::SEH_EndEpilogue:
2534 case X86::SEH_UnwindV2Start:
2535 case X86::SEH_UnwindVersion:
2536 case X86::SEH_SplitChained:
2537 EmitSEHInstruction(MI);
2538 return;
2539
2540 case X86::SEH_BeginEpilogue: {
2541 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2542 EmitSEHInstruction(MI);
2543 return;
2544 }
2545 case X86::UBSAN_UD1:
2546 EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm)
2547 .addReg(X86::EAX)
2548 .addReg(X86::EAX)
2549 .addImm(1)
2550 .addReg(X86::NoRegister)
2551 .addImm(MI->getOperand(0).getImm())
2552 .addReg(X86::NoRegister));
2553 return;
2554 case X86::CALL64pcrel32:
2555 if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11))
2556 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX));
2557
2558 if (EnableImportCallOptimization && isImportedFunction(MI->getOperand(0))) {
2559 emitLabelAndRecordForImportCallOptimization(
2560 IMAGE_RETPOLINE_AMD64_IMPORT_CALL);
2561
2562 MCInst TmpInst;
2563 MCInstLowering.Lower(MI, TmpInst);
2564
2565 // For Import Call Optimization to work, we need a the call instruction
2566 // with a rex prefix, and a 5-byte nop after the call instruction.
2567 EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
2568 emitCallInstruction(TmpInst);
2569 emitNop(*OutStreamer, 5, Subtarget);
2570 maybeEmitNopAfterCallForWindowsEH(MI);
2571 return;
2572 }
2573
2574 break;
2575
2576 case X86::CALL64r:
2577 if (EnableImportCallOptimization) {
2578 assert(MI->getOperand(0).getReg() == X86::RAX &&
2579 "Indirect calls with impcall enabled must go through RAX (as "
2580 "enforced by CALL64r_ImpCall)");
2581
2582 emitLabelAndRecordForImportCallOptimization(
2583 IMAGE_RETPOLINE_AMD64_INDIR_CALL);
2584 MCInst TmpInst;
2585 MCInstLowering.Lower(MI, TmpInst);
2586 emitCallInstruction(TmpInst);
2587
2588 // For Import Call Optimization to work, we need a 3-byte nop after the
2589 // call instruction.
2590 emitNop(*OutStreamer, 3, Subtarget);
2591 maybeEmitNopAfterCallForWindowsEH(MI);
2592 return;
2593 }
2594 break;
2595
2596 case X86::CALL64m:
2597 if (EnableImportCallOptimization && isCallToCFGuardFunction(MI)) {
2598 emitLabelAndRecordForImportCallOptimization(
2599 IMAGE_RETPOLINE_AMD64_CFG_CALL);
2600 }
2601 break;
2602
2603 case X86::JCC_1:
2604 // Two instruction prefixes (2EH for branch not-taken and 3EH for branch
2605 // taken) are used as branch hints. Here we add branch taken prefix for
2606 // jump instruction with higher probability than threshold.
2607 if (getSubtarget().hasBranchHint() && EnableBranchHint) {
2608 const MachineBranchProbabilityInfo *MBPI =
2610 MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
2611 BranchProbability EdgeProb =
2612 MBPI->getEdgeProbability(MI->getParent(), DestBB);
2614 if (EdgeProb > Threshold)
2615 EmitAndCountInstruction(MCInstBuilder(X86::DS_PREFIX));
2616 }
2617 break;
2618 }
2619
2620 MCInst TmpInst;
2621 MCInstLowering.Lower(MI, TmpInst);
2622
2623 if (MI->isCall()) {
2624 emitCallInstruction(TmpInst);
2625 // Since tail calls transfer control without leaving a stack frame, there is
2626 // never a need for NOP padding tail calls.
2627 if (!IsTailJump)
2628 maybeEmitNopAfterCallForWindowsEH(MI);
2629 return;
2630 }
2631
2632 EmitAndCountInstruction(TmpInst);
2633}
2634
2636 const MCSubtargetInfo *EndInfo,
2637 const MachineInstr *MI) {
2638 if (MI) {
2639 // If unwinding inline asm ends on a call, wineh may require insertion of
2640 // a nop.
2641 unsigned ExtraInfo = MI->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
2642 if (ExtraInfo & InlineAsm::Extra_MayUnwind)
2643 maybeEmitNopAfterCallForWindowsEH(MI);
2644 }
2645}
2646
2647void X86AsmPrinter::emitCallInstruction(const llvm::MCInst &MCI) {
2648 // Stackmap shadows cannot include branch targets, so we can count the bytes
2649 // in a call towards the shadow, but must ensure that the no thread returns
2650 // in to the stackmap shadow. The only way to achieve this is if the call
2651 // is at the end of the shadow.
2652
2653 // Count then size of the call towards the shadow
2654 SMShadowTracker.count(MCI, getSubtargetInfo(), CodeEmitter.get());
2655 // Then flush the shadow so that we fill with nops before the call, not
2656 // after it.
2657 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
2658 // Then emit the call
2659 OutStreamer->emitInstruction(MCI, getSubtargetInfo());
2660}
2661
2662// Determines whether a NOP is required after a CALL, so that Windows EH
2663// IP2State tables have the correct information.
2664//
2665// On most Windows platforms (AMD64, ARM64, ARM32, IA64, but *not* x86-32),
2666// exception handling works by looking up instruction pointers in lookup
2667// tables. These lookup tables are stored in .xdata sections in executables.
2668// One element of the lookup tables are the "IP2State" tables (Instruction
2669// Pointer to State).
2670//
2671// If a function has any instructions that require cleanup during exception
2672// unwinding, then it will have an IP2State table. Each entry in the IP2State
2673// table describes a range of bytes in the function's instruction stream, and
2674// associates an "EH state number" with that range of instructions. A value of
2675// -1 means "the null state", which does not require any code to execute.
2676// A value other than -1 is an index into the State table.
2677//
2678// The entries in the IP2State table contain byte offsets within the instruction
2679// stream of the function. The Windows ABI requires that these offsets are
2680// aligned to instruction boundaries; they are not permitted to point to a byte
2681// that is not the first byte of an instruction.
2682//
2683// Unfortunately, CALL instructions present a problem during unwinding. CALL
2684// instructions push the address of the instruction after the CALL instruction,
2685// so that execution can resume after the CALL. If the CALL is the last
2686// instruction within an IP2State region, then the return address (on the stack)
2687// points to the *next* IP2State region. This means that the unwinder will
2688// use the wrong cleanup funclet during unwinding.
2689//
2690// To fix this problem, the Windows AMD64 ABI requires that CALL instructions
2691// are never placed at the end of an IP2State region. Stated equivalently, the
2692// end of a CALL instruction cannot be aligned to an IP2State boundary. If a
2693// CALL instruction would occur at the end of an IP2State region, then the
2694// compiler must insert a NOP instruction after the CALL. The NOP instruction
2695// is placed in the same EH region as the CALL instruction, so that the return
2696// address points to the NOP and the unwinder will locate the correct region.
2697//
2698// NOP padding is only necessary on Windows AMD64 targets. On ARM64 and ARM32,
2699// instructions have a fixed size so the unwinder knows how to "back up" by
2700// one instruction.
2701//
2702// Interaction with Import Call Optimization (ICO):
2703//
2704// Import Call Optimization (ICO) is a compiler + OS feature on Windows which
2705// improves the performance and security of DLL imports. ICO relies on using a
2706// specific CALL idiom that can be replaced by the OS DLL loader. This removes
2707// a load and indirect CALL and replaces it with a single direct CALL.
2708//
2709// To achieve this, ICO also inserts NOPs after the CALL instruction. If the
2710// end of the CALL is aligned with an EH state transition, we *also* insert
2711// a single-byte NOP. **Both forms of NOPs must be preserved.** They cannot
2712// be combined into a single larger NOP; nor can the second NOP be removed.
2713//
2714// This is necessary because, if ICO is active and the call site is modified
2715// by the loader, the loader will end up overwriting the NOPs that were inserted
2716// for ICO. That means that those NOPs cannot be used for the correct
2717// termination of the exception handling region (the IP2State transition),
2718// so we still need an additional NOP instruction. The NOPs cannot be combined
2719// into a longer NOP (which is ordinarily desirable) because then ICO would
2720// split one instruction, producing a malformed instruction after the ICO call.
2721void X86AsmPrinter::maybeEmitNopAfterCallForWindowsEH(const MachineInstr *MI) {
2722 // We only need to insert NOPs after CALLs when targeting Windows on AMD64.
2723 // (Don't let the name fool you: Itanium refers to table-based exception
2724 // handling, not the Itanium architecture.)
2725 if (MAI->getExceptionHandlingType() != ExceptionHandling::WinEH ||
2726 MAI->getWinEHEncodingType() != WinEH::EncodingType::Itanium) {
2727 return;
2728 }
2729
2730 bool HasEHPersonality = MF->getWinEHFuncInfo() != nullptr;
2731
2732 // Set up MBB iterator, initially positioned on the same MBB as MI.
2733 MachineFunction::const_iterator MFI(MI->getParent());
2735
2736 // Set up instruction iterator, positioned immediately *after* MI.
2738 MachineBasicBlock::const_iterator MBBE = MI->getParent()->end();
2739 ++MBBI; // Step over MI
2740
2741 // This loop iterates MBBs
2742 for (;;) {
2743 // This loop iterates instructions
2744 for (; MBBI != MBBE; ++MBBI) {
2745 // Check the instruction that follows this CALL.
2746 const MachineInstr &NextMI = *MBBI;
2747
2748 // If there is an EH_LABEL after this CALL, then there is an EH state
2749 // transition after this CALL. This is exactly the situation which
2750 // requires NOP padding.
2751 if (NextMI.isEHLabel()) {
2752 if (HasEHPersonality) {
2753 EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2754 return;
2755 }
2756 // We actually want to continue, in case there is an SEH_BeginEpilogue
2757 // instruction after the EH_LABEL. In some situations, IR is produced
2758 // that contains EH_LABEL pseudo-instructions, even when we are not
2759 // generating IP2State tables. We still need to insert a NOP before
2760 // SEH_BeginEpilogue in that case.
2761 continue;
2762 }
2763
2764 // Somewhat similarly, if the CALL is the last instruction before the
2765 // SEH prologue, then we also need a NOP. This is necessary because the
2766 // Windows stack unwinder will not invoke a function's exception handler
2767 // if the instruction pointer is in the function prologue or epilogue.
2768 //
2769 // We always emit a NOP before SEH_BeginEpilogue, even if there is no
2770 // personality function (unwind info) for this frame. This is the same
2771 // behavior as MSVC.
2772 if (NextMI.getOpcode() == X86::SEH_BeginEpilogue) {
2773 EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2774 return;
2775 }
2776
2777 if (!NextMI.isPseudo() && !NextMI.isMetaInstruction()) {
2778 // We found a real instruction. During the CALL, the return IP will
2779 // point to this instruction. Since this instruction has the same EH
2780 // state as the call itself (because there is no intervening EH_LABEL),
2781 // the IP2State table will be accurate; there is no need to insert a
2782 // NOP.
2783 return;
2784 }
2785
2786 // The next instruction is a pseudo-op. Ignore it and keep searching.
2787 // Because these instructions do not generate any machine code, they
2788 // cannot prevent the IP2State table from pointing at the wrong
2789 // instruction during a CALL.
2790 }
2791
2792 // We've reached the end of this MBB. Find the next MBB in program order.
2793 // MBB order should be finalized by this point, so falling across MBBs is
2794 // expected.
2795 ++MFI;
2796 if (MFI == MFE) {
2797 // No more blocks; we've reached the end of the function. This should
2798 // only happen with no-return functions, but double-check to be sure.
2799 if (HasEHPersonality) {
2800 // If the CALL has no successors, then it is a noreturn function.
2801 // Insert an INT3 instead of a NOP. This accomplishes the same purpose,
2802 // but is more clear to read. Also, analysis tools will understand
2803 // that they should not continue disassembling after the CALL (unless
2804 // there are other branches to that label).
2805 if (MI->getParent()->succ_empty())
2806 EmitAndCountInstruction(MCInstBuilder(X86::INT3));
2807 else
2808 EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2809 }
2810 return;
2811 }
2812
2813 // Set up iterator to scan the next basic block.
2814 const MachineBasicBlock *NextMBB = &*MFI;
2815 MBBI = NextMBB->instr_begin();
2816 MBBE = NextMBB->instr_end();
2817 }
2818}
2819
2820void X86AsmPrinter::emitLabelAndRecordForImportCallOptimization(
2821 ImportCallKind Kind) {
2822 assert(EnableImportCallOptimization);
2823
2824 MCSymbol *CallSiteSymbol = MMI->getContext().createNamedTempSymbol("impcall");
2825 OutStreamer->emitLabel(CallSiteSymbol);
2826
2827 SectionToImportedFunctionCalls[OutStreamer->getCurrentSectionOnly()]
2828 .push_back({CallSiteSymbol, Kind});
2829}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void printShuffleMask(raw_ostream &Out, Type *Ty, ArrayRef< int > Mask)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
print mir2vec MIR2Vec Vocabulary Printer Pass
Definition MIR2Vec.cpp:593
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t IntrinsicInst * II
static cl::opt< bool > EnableBranchHint("ppc-use-branch-hint", cl::init(true), cl::desc("Enable static hinting of branches on ppc"), cl::Hidden)
static MCSymbol * GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallString class.
This file contains some functions that are useful when dealing with strings.
static MCOperand LowerSymbolOperand(const MachineInstr *MI, const MachineOperand &MO, const MCSymbol *Symbol, AsmPrinter &AP)
static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, const X86Subtarget *Subtarget)
Emit the optimal amount of multi-byte nops on X86.
static unsigned getRetOpcode(const X86Subtarget &Subtarget)
static void printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer, int SrcEltBits, int DstEltBits)
static unsigned convertTailJumpOpcode(unsigned Opcode)
static unsigned getSrcIdx(const MachineInstr *MI, unsigned SrcIdx)
static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer, int Repeats, int BitWidth)
static bool printExtend(const MachineInstr *MI, MCStreamer &OutStreamer, int SrcEltBits, int DstEltBits, bool IsSext)
static void printZeroUpperMove(const MachineInstr *MI, MCStreamer &OutStreamer, int SclWidth, int VecWidth, const char *ShuffleComment)
#define MASK_AVX512_CASE(Instr)
#define CASE_ARITH_RM(Instr)
static void addConstantComments(const MachineInstr *MI, MCStreamer &OutStreamer)
#define CASE_256_MOV_RM()
#define CASE_AVX512_ARITH_RM(Instr)
bool hasJumpTableInfoInBlock(const llvm::MachineInstr *MI)
static unsigned emitNop(MCStreamer &OS, unsigned NumBytes, const X86Subtarget *Subtarget)
Emit the largest nop instruction smaller than or equal to NumBytes bytes.
static void printDstRegisterName(raw_ostream &CS, const MachineInstr *MI, unsigned SrcOpIdx)
#define CASE_MOVX_RM(Ext, Type)
bool isImportedFunction(const MachineOperand &MO)
static cl::opt< bool > EnableBranchHint("enable-branch-hint", cl::desc("Enable branch hint."), cl::init(false), cl::Hidden)
static void printConstant(const APInt &Val, raw_ostream &CS, bool PrintZero=false)
static void printZeroExtend(const MachineInstr *MI, MCStreamer &OutStreamer, int SrcEltBits, int DstEltBits)
static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx, unsigned SrcOp2Idx, ArrayRef< int > Mask)
bool isCallToCFGuardFunction(const MachineInstr *MI)
#define CASE_512_MOV_RM()
static cl::opt< unsigned > BranchHintProbabilityThreshold("branch-hint-probability-threshold", cl::desc("The probability threshold of enabling branch hint."), cl::init(50), cl::Hidden)
#define CASE_128_MOV_RM()
void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Definition APFloat.h:1541
const fltSemantics & getSemantics() const
Definition APFloat.h:1520
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1142
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1549
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1497
unsigned getNumWords() const
Get the number of words.
Definition APInt.h:1504
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:996
const uint64_t * getRawData() const
This function returns a pointer to the internal storage of the APInt.
Definition APInt.h:570
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
This class is intended to be used as a driving class for all asm writers.
Definition AsmPrinter.h:91
MCSymbol * getSymbol(const GlobalValue *GV) const
MCSymbol * CurrentFnBegin
Definition AsmPrinter.h:220
TargetMachine & TM
Target machine description.
Definition AsmPrinter.h:94
virtual MCSymbol * GetCPISymbol(unsigned CPID) const
Return the symbol for the specified constant pool entry.
const MCAsmInfo * MAI
Target Asm Printer information.
Definition AsmPrinter.h:97
MachineFunction * MF
The current machine function.
Definition AsmPrinter.h:109
MCSymbol * GetJTISymbol(unsigned JTID, bool isLinkerPrivate=false) const
Return the symbol for the specified jump table entry.
AsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer, char &ID=AsmPrinter::ID)
MCSymbol * getSymbolPreferLocal(const GlobalValue &GV) const
Similar to getSymbol() but preferred for references.
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition AsmPrinter.h:112
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition AsmPrinter.h:101
MCSymbol * createTempSymbol(const Twine &Name) const
MCSymbol * CurrentPatchableFunctionEntrySym
The symbol for the entry in __patchable_function_entires.
Definition AsmPrinter.h:124
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition AsmPrinter.h:106
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
MCSymbol * GetBlockAddressSymbol(const BlockAddress *BA) const
Return the MCSymbol used to satisfy BlockAddress uses of the specified basic block.
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
This is an important base class in LLVM.
Definition Constant.h:43
Register getReg() const
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
bool hasInternalLinkage() const
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
bool doesSetDirectiveSuppressReloc() const
Definition MCAsmInfo.h:593
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
MCCodeEmitter - Generic instruction encoding interface.
virtual void encodeInstruction(const MCInst &Inst, SmallVectorImpl< char > &CB, SmallVectorImpl< MCFixup > &Fixups, const MCSubtargetInfo &STI) const =0
Encode the given Inst to bytes and append to CB.
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * createTempSymbol()
Create a temporary symbol with a unique name.
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCTargetOptions * getTargetOptions() const
Definition MCContext.h:420
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
MCInstBuilder & addReg(MCRegister Reg)
Add a new register operand.
MCInstBuilder & addExpr(const MCExpr *Val)
Add a new MCExpr operand.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void setFlags(unsigned F)
Definition MCInst.h:204
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
void setOpcode(unsigned Op)
Definition MCInst.h:201
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
Streaming machine code generation interface.
Definition MCStreamer.h:220
virtual void emitWinCFIUnwindVersion(uint8_t Version, SMLoc Loc=SMLoc())
virtual void emitWinCFIPushReg(MCRegister Register, SMLoc Loc=SMLoc())
virtual void emitBinaryData(StringRef Data)
Functionally identical to EmitBytes.
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
virtual void emitWinCFIUnwindV2Start(SMLoc Loc=SMLoc())
virtual void emitWinCFIEndEpilogue(SMLoc Loc=SMLoc())
virtual void emitWinCFIPushFrame(bool Code, SMLoc Loc=SMLoc())
virtual void emitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc=SMLoc())
MCContext & getContext() const
Definition MCStreamer.h:314
virtual void AddComment(const Twine &T, bool EOL=true)
Add a textual comment.
Definition MCStreamer.h:387
virtual void emitWinCFIBeginEpilogue(SMLoc Loc=SMLoc())
virtual void emitLabel(MCSymbol *Symbol, SMLoc Loc=SMLoc())
Emit a label for Symbol into the current section.
MCTargetStreamer * getTargetStreamer()
Definition MCStreamer.h:324
virtual void emitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc=SMLoc())
virtual void emitWinCFIEndProlog(SMLoc Loc=SMLoc())
virtual void emitCodeAlignment(Align Alignment, const MCSubtargetInfo *STI, unsigned MaxBytesToEmit=0)
Emit nops until the byte alignment ByteAlignment is reached.
virtual void emitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc=SMLoc())
virtual void emitWinCFISplitChained(SMLoc Loc=SMLoc())
virtual void emitWinCFIAllocStack(unsigned Size, SMLoc Loc=SMLoc())
MCSection * getCurrentSectionOnly() const
Definition MCStreamer.h:421
virtual void emitBytes(StringRef Data)
Emit the bytes in Data into the output.
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
StringRef getName() const
getName - Get the symbol name.
Definition MCSymbol.h:188
MachineInstrBundleIterator< const MachineInstr > const_iterator
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::const_iterator const_iterator
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mop_range operands()
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
const MachineOperand & getOperand(unsigned i) const
bool isEHLabel() const
bool isMetaInstruction(QueryType Type=IgnoreBundle) const
Return true if this instruction doesn't produce any output in the form of executable instructions.
StubValueTy & getGVStubEntry(MCSymbol *Sym)
PointerIntPair< MCSymbol *, 1, bool > StubValueTy
MachineModuleInfoMachO - This is a MachineModuleInfoImpl implementation for MachO targets.
Ty & getObjFileInfo()
Keep track of various per-module pieces of information for backends that would like to do so.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateMCSymbol(MCSymbol *Sym, unsigned TargetFlags=0)
const GlobalValue * getGlobal() const
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
bool isJTI() const
isJTI - Tests if this is a MO_JumpTableIndex operand.
const BlockAddress * getBlockAddress() const
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
void setTargetFlags(unsigned F)
MCSymbol * getMCSymbol() const
@ MO_Immediate
Immediate operand.
@ MO_ConstantPoolIndex
Address of indexed Constant in Constant Pool.
@ MO_MCSymbol
MCSymbol reference (for debug/eh info)
@ MO_GlobalAddress
Address of a global value.
@ MO_RegisterMask
Mask of preserved registers.
@ MO_BlockAddress
Address of a basic block.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_JumpTableIndex
Address of indexed Jump Table for switch.
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
LLVM_ABI void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable's name.
Definition Mangler.cpp:121
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition Pass.cpp:140
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
PointerTy getPointer() const
Wrapper class representing virtual and physical registers.
Definition Register.h:20
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:472
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:142
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static const char * getRegisterName(MCRegister Reg)
void emitInstruction(const MachineInstr *MI) override
Targets should implement this to emit instructions.
const X86Subtarget & getSubtarget() const
X86AsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, const MCSubtargetInfo *EndInfo, const MachineInstr *MI) override
Let the target do anything it needs to do after emitting inlineasm.
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
unsigned getSlotSize() const
bool isTargetWindowsMSVC() const
bool useIndirectThunkCalls() const
virtual bool emitFPOPushReg(MCRegister Reg, SMLoc L={})
virtual bool emitFPOEndPrologue(SMLoc L={})
virtual bool emitFPOStackAlign(unsigned Align, SMLoc L={})
virtual bool emitFPOSetFrame(MCRegister Reg, SMLoc L={})
virtual bool emitFPOStackAlloc(unsigned StackAlloc, SMLoc L={})
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Itanium
Windows CE ARM, PowerPC, SH3, SH4.
Definition MCAsmInfo.h:49
bool isKMergeMasked(uint64_t TSFlags)
@ MO_TLSLD
MO_TLSLD - On a symbol operand this indicates that the immediate is the offset of the GOT entry with ...
@ MO_GOTPCREL_NORELAX
MO_GOTPCREL_NORELAX - Same as MO_GOTPCREL except that R_X86_64_GOTPCREL relocations are guaranteed to...
@ MO_GOTOFF
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
@ MO_DARWIN_NONLAZY_PIC_BASE
MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates that the reference is actually...
@ MO_GOT_ABSOLUTE_ADDRESS
MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a relocation of: SYMBOL_LABEL + [.
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
@ MO_NTPOFF
MO_NTPOFF - On a symbol operand this indicates that the immediate is the negative thread-pointer offs...
@ MO_DARWIN_NONLAZY
MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the reference is actually to the "...
@ MO_INDNTPOFF
MO_INDNTPOFF - On a symbol operand this indicates that the immediate is the absolute address of the G...
@ MO_GOTNTPOFF
MO_GOTNTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry w...
@ MO_TPOFF
MO_TPOFF - On a symbol operand this indicates that the immediate is the thread-pointer offset for the...
@ MO_TLVP_PIC_BASE
MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate is some TLS offset from the ...
@ MO_GOT
MO_GOT - On a symbol operand this indicates that the immediate is the offset to the GOT entry for the...
@ MO_ABS8
MO_ABS8 - On a symbol operand this indicates that the symbol is known to be an absolute symbol in ran...
@ MO_PLT
MO_PLT - On a symbol operand this indicates that the immediate is offset to the PLT entry of symbol n...
@ MO_TLSGD
MO_TLSGD - On a symbol operand this indicates that the immediate is the offset of the GOT entry with ...
@ MO_NO_FLAG
MO_NO_FLAG - No flag for the operand.
@ MO_TLVP
MO_TLVP - On a symbol operand this indicates that the immediate is some TLS offset.
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
@ MO_GOTTPOFF
MO_GOTTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry wi...
@ MO_SECREL
MO_SECREL - On a symbol operand this indicates that the immediate is the offset from beginning of sec...
@ MO_DTPOFF
MO_DTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry with...
@ MO_PIC_BASE_OFFSET
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
@ MO_TLSLDM
MO_TLSLDM - On a symbol operand this indicates that the immediate is the offset of the GOT entry with...
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
bool isKMasked(uint64_t TSFlags)
bool isX86_64ExtendedReg(MCRegister Reg)
bool optimizeToFixedRegisterOrShortImmediateForm(MCInst &MI)
@ AddrNumOperands
Definition X86BaseInfo.h:36
bool optimizeMOV(MCInst &MI, bool In64BitMode)
Simplify things like MOV32rm to MOV32o32a.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
bool optimizeMOVSX(MCInst &MI)
bool optimizeVPCMPWithImmediateOneOrSix(MCInst &MI)
bool optimizeShiftRotateWithImmediateOne(MCInst &MI)
bool optimizeInstFromVEX3ToVEX2(MCInst &MI, const MCInstrDesc &Desc)
uint16_t Specifier
const Constant * getConstantFromPool(const MachineInstr &MI, unsigned OpNo)
Find any constant pool entry associated with a specific instruction operand.
bool optimizeINCDEC(MCInst &MI, bool In64BitMode)
unsigned getVectorRegisterWidth(const MCOperandInfo &Info)
Get the width of the vector register operand.
@ S_GOTPCREL_NORELAX
initializer< Ty > init(const Ty &Val)
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits, unsigned NumDstElts, bool IsAnyExtend, SmallVectorImpl< int > &ShuffleMask)
Decode a zero extension instruction as a shuffle mask.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
LLVM_ABI bool isCFGuardFunction(const GlobalValue *GV)
Definition CFGuard.cpp:319
@ WinEH
Windows Exception Handling.
Definition CodeGen.h:58
void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
void DecodeVPPERMMask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPPERM mask from a raw array of constants such as from BUILD_VECTOR.
DWARFExpression::Operation Op
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
constexpr unsigned BitWidth
void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize, bool IsKasan, uint64_t *ShadowBase, int *MappingScale, bool *OrShadowOffset)
@ SM_SentinelUndef
@ SM_SentinelZero
void DecodePSHUFBMask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a PSHUFB mask from a raw array of constants such as from BUILD_VECTOR.
#define N
void changeAndComment(bool b)
NoAutoPaddingScope(MCStreamer &OS)
const bool OldAllowAutoPadding