LLVM  4.0.0
X86InstrInfo.cpp
Go to the documentation of this file.
1 //===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the X86 implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86InstrInfo.h"
15 #include "X86.h"
16 #include "X86InstrBuilder.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86Subtarget.h"
19 #include "X86TargetMachine.h"
20 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/StackMaps.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/LLVMContext.h"
33 #include "llvm/MC/MCAsmInfo.h"
34 #include "llvm/MC/MCExpr.h"
35 #include "llvm/MC/MCInst.h"
37 #include "llvm/Support/Debug.h"
41 
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "x86-instr-info"
45 
46 #define GET_INSTRINFO_CTOR_DTOR
47 #include "X86GenInstrInfo.inc"
48 
49 static cl::opt<bool>
50 NoFusing("disable-spill-fusing",
51  cl::desc("Disable fusing of spill code into instructions"));
52 static cl::opt<bool>
53 PrintFailedFusing("print-failed-fuse-candidates",
54  cl::desc("Print instructions that the allocator wants to"
55  " fuse, but the X86 backend currently can't"),
56  cl::Hidden);
57 static cl::opt<bool>
58 ReMatPICStubLoad("remat-pic-stub-load",
59  cl::desc("Re-materialize load from stub in PIC mode"),
60  cl::init(false), cl::Hidden);
61 static cl::opt<unsigned>
62 PartialRegUpdateClearance("partial-reg-update-clearance",
63  cl::desc("Clearance between two register writes "
64  "for inserting XOR to avoid partial "
65  "register update"),
66  cl::init(64), cl::Hidden);
67 static cl::opt<unsigned>
68 UndefRegClearance("undef-reg-clearance",
69  cl::desc("How many idle instructions we would like before "
70  "certain undef register reads"),
71  cl::init(128), cl::Hidden);
72 
73 enum {
74  // Select which memory operand is being unfolded.
75  // (stored in bits 0 - 3)
82 
83  // Do not insert the reverse map (MemOp -> RegOp) into the table.
84  // This may be needed because there is a many -> one mapping.
85  TB_NO_REVERSE = 1 << 4,
86 
87  // Do not insert the forward map (RegOp -> MemOp) into the table.
88  // This is needed for Native Client, which prohibits branch
89  // instructions from using a memory operand.
90  TB_NO_FORWARD = 1 << 5,
91 
92  TB_FOLDED_LOAD = 1 << 6,
93  TB_FOLDED_STORE = 1 << 7,
94 
95  // Minimum alignment required for load/store.
96  // Used for RegOp->MemOp conversion.
97  // (stored in bits 8 - 15)
104 };
105 
107  uint16_t RegOp;
108  uint16_t MemOp;
109  uint16_t Flags;
110 };
111 
112 // Pin the vtable to this file.
113 void X86InstrInfo::anchor() {}
114 
116  : X86GenInstrInfo((STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64
117  : X86::ADJCALLSTACKDOWN32),
118  (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64
119  : X86::ADJCALLSTACKUP32),
120  X86::CATCHRET,
121  (STI.is64Bit() ? X86::RETQ : X86::RETL)),
122  Subtarget(STI), RI(STI.getTargetTriple()) {
123 
124  static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
125  { X86::ADC32ri, X86::ADC32mi, 0 },
126  { X86::ADC32ri8, X86::ADC32mi8, 0 },
127  { X86::ADC32rr, X86::ADC32mr, 0 },
128  { X86::ADC64ri32, X86::ADC64mi32, 0 },
129  { X86::ADC64ri8, X86::ADC64mi8, 0 },
130  { X86::ADC64rr, X86::ADC64mr, 0 },
131  { X86::ADD16ri, X86::ADD16mi, 0 },
132  { X86::ADD16ri8, X86::ADD16mi8, 0 },
133  { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE },
134  { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE },
135  { X86::ADD16rr, X86::ADD16mr, 0 },
136  { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE },
137  { X86::ADD32ri, X86::ADD32mi, 0 },
138  { X86::ADD32ri8, X86::ADD32mi8, 0 },
139  { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE },
140  { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE },
141  { X86::ADD32rr, X86::ADD32mr, 0 },
142  { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE },
143  { X86::ADD64ri32, X86::ADD64mi32, 0 },
144  { X86::ADD64ri8, X86::ADD64mi8, 0 },
145  { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE },
146  { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE },
147  { X86::ADD64rr, X86::ADD64mr, 0 },
148  { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE },
149  { X86::ADD8ri, X86::ADD8mi, 0 },
150  { X86::ADD8rr, X86::ADD8mr, 0 },
151  { X86::AND16ri, X86::AND16mi, 0 },
152  { X86::AND16ri8, X86::AND16mi8, 0 },
153  { X86::AND16rr, X86::AND16mr, 0 },
154  { X86::AND32ri, X86::AND32mi, 0 },
155  { X86::AND32ri8, X86::AND32mi8, 0 },
156  { X86::AND32rr, X86::AND32mr, 0 },
157  { X86::AND64ri32, X86::AND64mi32, 0 },
158  { X86::AND64ri8, X86::AND64mi8, 0 },
159  { X86::AND64rr, X86::AND64mr, 0 },
160  { X86::AND8ri, X86::AND8mi, 0 },
161  { X86::AND8rr, X86::AND8mr, 0 },
162  { X86::DEC16r, X86::DEC16m, 0 },
163  { X86::DEC32r, X86::DEC32m, 0 },
164  { X86::DEC64r, X86::DEC64m, 0 },
165  { X86::DEC8r, X86::DEC8m, 0 },
166  { X86::INC16r, X86::INC16m, 0 },
167  { X86::INC32r, X86::INC32m, 0 },
168  { X86::INC64r, X86::INC64m, 0 },
169  { X86::INC8r, X86::INC8m, 0 },
170  { X86::NEG16r, X86::NEG16m, 0 },
171  { X86::NEG32r, X86::NEG32m, 0 },
172  { X86::NEG64r, X86::NEG64m, 0 },
173  { X86::NEG8r, X86::NEG8m, 0 },
174  { X86::NOT16r, X86::NOT16m, 0 },
175  { X86::NOT32r, X86::NOT32m, 0 },
176  { X86::NOT64r, X86::NOT64m, 0 },
177  { X86::NOT8r, X86::NOT8m, 0 },
178  { X86::OR16ri, X86::OR16mi, 0 },
179  { X86::OR16ri8, X86::OR16mi8, 0 },
180  { X86::OR16rr, X86::OR16mr, 0 },
181  { X86::OR32ri, X86::OR32mi, 0 },
182  { X86::OR32ri8, X86::OR32mi8, 0 },
183  { X86::OR32rr, X86::OR32mr, 0 },
184  { X86::OR64ri32, X86::OR64mi32, 0 },
185  { X86::OR64ri8, X86::OR64mi8, 0 },
186  { X86::OR64rr, X86::OR64mr, 0 },
187  { X86::OR8ri, X86::OR8mi, 0 },
188  { X86::OR8rr, X86::OR8mr, 0 },
189  { X86::ROL16r1, X86::ROL16m1, 0 },
190  { X86::ROL16rCL, X86::ROL16mCL, 0 },
191  { X86::ROL16ri, X86::ROL16mi, 0 },
192  { X86::ROL32r1, X86::ROL32m1, 0 },
193  { X86::ROL32rCL, X86::ROL32mCL, 0 },
194  { X86::ROL32ri, X86::ROL32mi, 0 },
195  { X86::ROL64r1, X86::ROL64m1, 0 },
196  { X86::ROL64rCL, X86::ROL64mCL, 0 },
197  { X86::ROL64ri, X86::ROL64mi, 0 },
198  { X86::ROL8r1, X86::ROL8m1, 0 },
199  { X86::ROL8rCL, X86::ROL8mCL, 0 },
200  { X86::ROL8ri, X86::ROL8mi, 0 },
201  { X86::ROR16r1, X86::ROR16m1, 0 },
202  { X86::ROR16rCL, X86::ROR16mCL, 0 },
203  { X86::ROR16ri, X86::ROR16mi, 0 },
204  { X86::ROR32r1, X86::ROR32m1, 0 },
205  { X86::ROR32rCL, X86::ROR32mCL, 0 },
206  { X86::ROR32ri, X86::ROR32mi, 0 },
207  { X86::ROR64r1, X86::ROR64m1, 0 },
208  { X86::ROR64rCL, X86::ROR64mCL, 0 },
209  { X86::ROR64ri, X86::ROR64mi, 0 },
210  { X86::ROR8r1, X86::ROR8m1, 0 },
211  { X86::ROR8rCL, X86::ROR8mCL, 0 },
212  { X86::ROR8ri, X86::ROR8mi, 0 },
213  { X86::SAR16r1, X86::SAR16m1, 0 },
214  { X86::SAR16rCL, X86::SAR16mCL, 0 },
215  { X86::SAR16ri, X86::SAR16mi, 0 },
216  { X86::SAR32r1, X86::SAR32m1, 0 },
217  { X86::SAR32rCL, X86::SAR32mCL, 0 },
218  { X86::SAR32ri, X86::SAR32mi, 0 },
219  { X86::SAR64r1, X86::SAR64m1, 0 },
220  { X86::SAR64rCL, X86::SAR64mCL, 0 },
221  { X86::SAR64ri, X86::SAR64mi, 0 },
222  { X86::SAR8r1, X86::SAR8m1, 0 },
223  { X86::SAR8rCL, X86::SAR8mCL, 0 },
224  { X86::SAR8ri, X86::SAR8mi, 0 },
225  { X86::SBB32ri, X86::SBB32mi, 0 },
226  { X86::SBB32ri8, X86::SBB32mi8, 0 },
227  { X86::SBB32rr, X86::SBB32mr, 0 },
228  { X86::SBB64ri32, X86::SBB64mi32, 0 },
229  { X86::SBB64ri8, X86::SBB64mi8, 0 },
230  { X86::SBB64rr, X86::SBB64mr, 0 },
231  { X86::SHL16r1, X86::SHL16m1, 0 },
232  { X86::SHL16rCL, X86::SHL16mCL, 0 },
233  { X86::SHL16ri, X86::SHL16mi, 0 },
234  { X86::SHL32r1, X86::SHL32m1, 0 },
235  { X86::SHL32rCL, X86::SHL32mCL, 0 },
236  { X86::SHL32ri, X86::SHL32mi, 0 },
237  { X86::SHL64r1, X86::SHL64m1, 0 },
238  { X86::SHL64rCL, X86::SHL64mCL, 0 },
239  { X86::SHL64ri, X86::SHL64mi, 0 },
240  { X86::SHL8r1, X86::SHL8m1, 0 },
241  { X86::SHL8rCL, X86::SHL8mCL, 0 },
242  { X86::SHL8ri, X86::SHL8mi, 0 },
243  { X86::SHLD16rrCL, X86::SHLD16mrCL, 0 },
244  { X86::SHLD16rri8, X86::SHLD16mri8, 0 },
245  { X86::SHLD32rrCL, X86::SHLD32mrCL, 0 },
246  { X86::SHLD32rri8, X86::SHLD32mri8, 0 },
247  { X86::SHLD64rrCL, X86::SHLD64mrCL, 0 },
248  { X86::SHLD64rri8, X86::SHLD64mri8, 0 },
249  { X86::SHR16r1, X86::SHR16m1, 0 },
250  { X86::SHR16rCL, X86::SHR16mCL, 0 },
251  { X86::SHR16ri, X86::SHR16mi, 0 },
252  { X86::SHR32r1, X86::SHR32m1, 0 },
253  { X86::SHR32rCL, X86::SHR32mCL, 0 },
254  { X86::SHR32ri, X86::SHR32mi, 0 },
255  { X86::SHR64r1, X86::SHR64m1, 0 },
256  { X86::SHR64rCL, X86::SHR64mCL, 0 },
257  { X86::SHR64ri, X86::SHR64mi, 0 },
258  { X86::SHR8r1, X86::SHR8m1, 0 },
259  { X86::SHR8rCL, X86::SHR8mCL, 0 },
260  { X86::SHR8ri, X86::SHR8mi, 0 },
261  { X86::SHRD16rrCL, X86::SHRD16mrCL, 0 },
262  { X86::SHRD16rri8, X86::SHRD16mri8, 0 },
263  { X86::SHRD32rrCL, X86::SHRD32mrCL, 0 },
264  { X86::SHRD32rri8, X86::SHRD32mri8, 0 },
265  { X86::SHRD64rrCL, X86::SHRD64mrCL, 0 },
266  { X86::SHRD64rri8, X86::SHRD64mri8, 0 },
267  { X86::SUB16ri, X86::SUB16mi, 0 },
268  { X86::SUB16ri8, X86::SUB16mi8, 0 },
269  { X86::SUB16rr, X86::SUB16mr, 0 },
270  { X86::SUB32ri, X86::SUB32mi, 0 },
271  { X86::SUB32ri8, X86::SUB32mi8, 0 },
272  { X86::SUB32rr, X86::SUB32mr, 0 },
273  { X86::SUB64ri32, X86::SUB64mi32, 0 },
274  { X86::SUB64ri8, X86::SUB64mi8, 0 },
275  { X86::SUB64rr, X86::SUB64mr, 0 },
276  { X86::SUB8ri, X86::SUB8mi, 0 },
277  { X86::SUB8rr, X86::SUB8mr, 0 },
278  { X86::XOR16ri, X86::XOR16mi, 0 },
279  { X86::XOR16ri8, X86::XOR16mi8, 0 },
280  { X86::XOR16rr, X86::XOR16mr, 0 },
281  { X86::XOR32ri, X86::XOR32mi, 0 },
282  { X86::XOR32ri8, X86::XOR32mi8, 0 },
283  { X86::XOR32rr, X86::XOR32mr, 0 },
284  { X86::XOR64ri32, X86::XOR64mi32, 0 },
285  { X86::XOR64ri8, X86::XOR64mi8, 0 },
286  { X86::XOR64rr, X86::XOR64mr, 0 },
287  { X86::XOR8ri, X86::XOR8mi, 0 },
288  { X86::XOR8rr, X86::XOR8mr, 0 }
289  };
290 
291  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2Addr) {
292  AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
293  Entry.RegOp, Entry.MemOp,
294  // Index 0, folded load and store, no alignment requirement.
295  Entry.Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
296  }
297 
298  static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
299  { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD },
300  { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD },
301  { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD },
302  { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD },
303  { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD },
304  { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD },
305  { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD },
306  { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD },
307  { X86::CMP32ri, X86::CMP32mi, TB_FOLDED_LOAD },
308  { X86::CMP32ri8, X86::CMP32mi8, TB_FOLDED_LOAD },
309  { X86::CMP32rr, X86::CMP32mr, TB_FOLDED_LOAD },
310  { X86::CMP64ri32, X86::CMP64mi32, TB_FOLDED_LOAD },
311  { X86::CMP64ri8, X86::CMP64mi8, TB_FOLDED_LOAD },
312  { X86::CMP64rr, X86::CMP64mr, TB_FOLDED_LOAD },
313  { X86::CMP8ri, X86::CMP8mi, TB_FOLDED_LOAD },
314  { X86::CMP8rr, X86::CMP8mr, TB_FOLDED_LOAD },
315  { X86::DIV16r, X86::DIV16m, TB_FOLDED_LOAD },
316  { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD },
317  { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD },
318  { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD },
319  { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE },
320  { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD },
321  { X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD },
322  { X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD },
323  { X86::IDIV8r, X86::IDIV8m, TB_FOLDED_LOAD },
324  { X86::IMUL16r, X86::IMUL16m, TB_FOLDED_LOAD },
325  { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD },
326  { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD },
327  { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD },
328  { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD },
329  { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD },
330  { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE },
331  { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE },
332  { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE },
333  { X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE },
334  { X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE },
335  { X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE },
336  { X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE },
337  { X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE },
338  { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE },
339  { X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
340  { X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
341  { X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
342  { X86::MOVDQUrr, X86::MOVDQUmr, TB_FOLDED_STORE },
343  { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE },
344  { X86::MOVPQIto64rr,X86::MOVPQI2QImr, TB_FOLDED_STORE },
345  { X86::MOVSDto64rr, X86::MOVSDto64mr, TB_FOLDED_STORE },
346  { X86::MOVSS2DIrr, X86::MOVSS2DImr, TB_FOLDED_STORE },
347  { X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE },
348  { X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE },
349  { X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD },
350  { X86::MUL32r, X86::MUL32m, TB_FOLDED_LOAD },
351  { X86::MUL64r, X86::MUL64m, TB_FOLDED_LOAD },
352  { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD },
353  { X86::PEXTRDrr, X86::PEXTRDmr, TB_FOLDED_STORE },
354  { X86::PEXTRQrr, X86::PEXTRQmr, TB_FOLDED_STORE },
355  { X86::PUSH16r, X86::PUSH16rmm, TB_FOLDED_LOAD },
356  { X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD },
357  { X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD },
358  { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE },
359  { X86::SETAr, X86::SETAm, TB_FOLDED_STORE },
360  { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE },
361  { X86::SETBr, X86::SETBm, TB_FOLDED_STORE },
362  { X86::SETEr, X86::SETEm, TB_FOLDED_STORE },
363  { X86::SETGEr, X86::SETGEm, TB_FOLDED_STORE },
364  { X86::SETGr, X86::SETGm, TB_FOLDED_STORE },
365  { X86::SETLEr, X86::SETLEm, TB_FOLDED_STORE },
366  { X86::SETLr, X86::SETLm, TB_FOLDED_STORE },
367  { X86::SETNEr, X86::SETNEm, TB_FOLDED_STORE },
368  { X86::SETNOr, X86::SETNOm, TB_FOLDED_STORE },
369  { X86::SETNPr, X86::SETNPm, TB_FOLDED_STORE },
370  { X86::SETNSr, X86::SETNSm, TB_FOLDED_STORE },
371  { X86::SETOr, X86::SETOm, TB_FOLDED_STORE },
372  { X86::SETPr, X86::SETPm, TB_FOLDED_STORE },
373  { X86::SETSr, X86::SETSm, TB_FOLDED_STORE },
374  { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD },
375  { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD },
376  { X86::TAILJMPr64_REX, X86::TAILJMPm64_REX, TB_FOLDED_LOAD },
377  { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD },
378  { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD },
379  { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD },
380  { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD },
381 
382  // AVX 128-bit versions of foldable instructions
383  { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE },
384  { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
385  { X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
386  { X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
387  { X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
388  { X86::VMOVDQUrr, X86::VMOVDQUmr, TB_FOLDED_STORE },
389  { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr, TB_FOLDED_STORE },
390  { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE },
391  { X86::VMOVSDto64rr,X86::VMOVSDto64mr, TB_FOLDED_STORE },
392  { X86::VMOVSS2DIrr, X86::VMOVSS2DImr, TB_FOLDED_STORE },
393  { X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE },
394  { X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE },
395  { X86::VPEXTRDrr, X86::VPEXTRDmr, TB_FOLDED_STORE },
396  { X86::VPEXTRQrr, X86::VPEXTRQmr, TB_FOLDED_STORE },
397 
398  // AVX 256-bit foldable instructions
399  { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
400  { X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
401  { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
402  { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
403  { X86::VMOVDQUYrr, X86::VMOVDQUYmr, TB_FOLDED_STORE },
404  { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
405  { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE },
406 
407  // AVX-512 foldable instructions
408  { X86::VEXTRACTF32x4Zrr,X86::VEXTRACTF32x4Zmr, TB_FOLDED_STORE },
409  { X86::VEXTRACTF32x8Zrr,X86::VEXTRACTF32x8Zmr, TB_FOLDED_STORE },
410  { X86::VEXTRACTF64x2Zrr,X86::VEXTRACTF64x2Zmr, TB_FOLDED_STORE },
411  { X86::VEXTRACTF64x4Zrr,X86::VEXTRACTF64x4Zmr, TB_FOLDED_STORE },
412  { X86::VEXTRACTI32x4Zrr,X86::VEXTRACTI32x4Zmr, TB_FOLDED_STORE },
413  { X86::VEXTRACTI32x8Zrr,X86::VEXTRACTI32x8Zmr, TB_FOLDED_STORE },
414  { X86::VEXTRACTI64x2Zrr,X86::VEXTRACTI64x2Zmr, TB_FOLDED_STORE },
415  { X86::VEXTRACTI64x4Zrr,X86::VEXTRACTI64x4Zmr, TB_FOLDED_STORE },
416  { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZmr, TB_FOLDED_STORE },
417  { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE },
418  { X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
419  { X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
420  { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
421  { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
422  { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE },
423  { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE },
424  { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE },
425  { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE },
426  { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE },
427  { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE },
428  { X86::VPMOVDBZrr, X86::VPMOVDBZmr, TB_FOLDED_STORE },
429  { X86::VPMOVDWZrr, X86::VPMOVDWZmr, TB_FOLDED_STORE },
430  { X86::VPMOVQDZrr, X86::VPMOVQDZmr, TB_FOLDED_STORE },
431  { X86::VPMOVQWZrr, X86::VPMOVQWZmr, TB_FOLDED_STORE },
432  { X86::VPMOVWBZrr, X86::VPMOVWBZmr, TB_FOLDED_STORE },
433  { X86::VPMOVSDBZrr, X86::VPMOVSDBZmr, TB_FOLDED_STORE },
434  { X86::VPMOVSDWZrr, X86::VPMOVSDWZmr, TB_FOLDED_STORE },
435  { X86::VPMOVSQDZrr, X86::VPMOVSQDZmr, TB_FOLDED_STORE },
436  { X86::VPMOVSQWZrr, X86::VPMOVSQWZmr, TB_FOLDED_STORE },
437  { X86::VPMOVSWBZrr, X86::VPMOVSWBZmr, TB_FOLDED_STORE },
438  { X86::VPMOVUSDBZrr, X86::VPMOVUSDBZmr, TB_FOLDED_STORE },
439  { X86::VPMOVUSDWZrr, X86::VPMOVUSDWZmr, TB_FOLDED_STORE },
440  { X86::VPMOVUSQDZrr, X86::VPMOVUSQDZmr, TB_FOLDED_STORE },
441  { X86::VPMOVUSQWZrr, X86::VPMOVUSQWZmr, TB_FOLDED_STORE },
442  { X86::VPMOVUSWBZrr, X86::VPMOVUSWBZmr, TB_FOLDED_STORE },
443 
444  // AVX-512 foldable instructions (256-bit versions)
445  { X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTF32x4Z256mr, TB_FOLDED_STORE },
446  { X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTF64x2Z256mr, TB_FOLDED_STORE },
447  { X86::VEXTRACTI32x4Z256rr,X86::VEXTRACTI32x4Z256mr, TB_FOLDED_STORE },
448  { X86::VEXTRACTI64x2Z256rr,X86::VEXTRACTI64x2Z256mr, TB_FOLDED_STORE },
449  { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
450  { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
451  { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
452  { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
453  { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256mr, TB_FOLDED_STORE },
454  { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256mr, TB_FOLDED_STORE },
455  { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256mr, TB_FOLDED_STORE },
456  { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256mr, TB_FOLDED_STORE },
457  { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256mr, TB_FOLDED_STORE },
458  { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256mr, TB_FOLDED_STORE },
459  { X86::VPMOVDWZ256rr, X86::VPMOVDWZ256mr, TB_FOLDED_STORE },
460  { X86::VPMOVQDZ256rr, X86::VPMOVQDZ256mr, TB_FOLDED_STORE },
461  { X86::VPMOVWBZ256rr, X86::VPMOVWBZ256mr, TB_FOLDED_STORE },
462  { X86::VPMOVSDWZ256rr, X86::VPMOVSDWZ256mr, TB_FOLDED_STORE },
463  { X86::VPMOVSQDZ256rr, X86::VPMOVSQDZ256mr, TB_FOLDED_STORE },
464  { X86::VPMOVSWBZ256rr, X86::VPMOVSWBZ256mr, TB_FOLDED_STORE },
465  { X86::VPMOVUSDWZ256rr, X86::VPMOVUSDWZ256mr, TB_FOLDED_STORE },
466  { X86::VPMOVUSQDZ256rr, X86::VPMOVUSQDZ256mr, TB_FOLDED_STORE },
467  { X86::VPMOVUSWBZ256rr, X86::VPMOVUSWBZ256mr, TB_FOLDED_STORE },
468 
469  // AVX-512 foldable instructions (128-bit versions)
470  { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
471  { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
472  { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
473  { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
474  { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128mr, TB_FOLDED_STORE },
475  { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128mr, TB_FOLDED_STORE },
476  { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128mr, TB_FOLDED_STORE },
477  { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128mr, TB_FOLDED_STORE },
478  { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128mr, TB_FOLDED_STORE },
479  { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128mr, TB_FOLDED_STORE },
480 
481  // F16C foldable instructions
482  { X86::VCVTPS2PHrr, X86::VCVTPS2PHmr, TB_FOLDED_STORE },
483  { X86::VCVTPS2PHYrr, X86::VCVTPS2PHYmr, TB_FOLDED_STORE }
484  };
485 
486  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable0) {
487  AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
488  Entry.RegOp, Entry.MemOp, TB_INDEX_0 | Entry.Flags);
489  }
490 
491  static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
492  { X86::BSF16rr, X86::BSF16rm, 0 },
493  { X86::BSF32rr, X86::BSF32rm, 0 },
494  { X86::BSF64rr, X86::BSF64rm, 0 },
495  { X86::BSR16rr, X86::BSR16rm, 0 },
496  { X86::BSR32rr, X86::BSR32rm, 0 },
497  { X86::BSR64rr, X86::BSR64rm, 0 },
498  { X86::CMP16rr, X86::CMP16rm, 0 },
499  { X86::CMP32rr, X86::CMP32rm, 0 },
500  { X86::CMP64rr, X86::CMP64rm, 0 },
501  { X86::CMP8rr, X86::CMP8rm, 0 },
502  { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 },
503  { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 },
504  { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 },
505  { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 },
506  { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 },
507  { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 },
508  { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 },
509  { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
510  { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
511  { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
512  { X86::IMUL16rri, X86::IMUL16rmi, 0 },
513  { X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
514  { X86::IMUL32rri, X86::IMUL32rmi, 0 },
515  { X86::IMUL32rri8, X86::IMUL32rmi8, 0 },
516  { X86::IMUL64rri32, X86::IMUL64rmi32, 0 },
517  { X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
518  { X86::Int_COMISDrr, X86::Int_COMISDrm, TB_NO_REVERSE },
519  { X86::Int_COMISSrr, X86::Int_COMISSrm, TB_NO_REVERSE },
520  { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, TB_NO_REVERSE },
521  { X86::CVTSD2SIrr, X86::CVTSD2SIrm, TB_NO_REVERSE },
522  { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, TB_NO_REVERSE },
523  { X86::CVTSS2SIrr, X86::CVTSS2SIrm, TB_NO_REVERSE },
524  { X86::CVTDQ2PDrr, X86::CVTDQ2PDrm, TB_NO_REVERSE },
525  { X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 },
526  { X86::CVTPD2DQrr, X86::CVTPD2DQrm, TB_ALIGN_16 },
527  { X86::CVTPD2PSrr, X86::CVTPD2PSrm, TB_ALIGN_16 },
528  { X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 },
529  { X86::CVTPS2PDrr, X86::CVTPS2PDrm, TB_NO_REVERSE },
530  { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
531  { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
532  { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, TB_NO_REVERSE },
533  { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, TB_NO_REVERSE },
534  { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, TB_NO_REVERSE },
535  { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, TB_NO_REVERSE },
536  { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, TB_NO_REVERSE },
537  { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, TB_NO_REVERSE },
538  { X86::MOV16rr, X86::MOV16rm, 0 },
539  { X86::MOV32rr, X86::MOV32rm, 0 },
540  { X86::MOV64rr, X86::MOV64rm, 0 },
541  { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
542  { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
543  { X86::MOV8rr, X86::MOV8rm, 0 },
544  { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 },
545  { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 },
546  { X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE },
547  { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
548  { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
549  { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 },
550  { X86::MOVDQUrr, X86::MOVDQUrm, 0 },
551  { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 },
552  { X86::MOVSLDUPrr, X86::MOVSLDUPrm, TB_ALIGN_16 },
553  { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
554  { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
555  { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
556  { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 },
557  { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 },
558  { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
559  { X86::MOVUPDrr, X86::MOVUPDrm, 0 },
560  { X86::MOVUPSrr, X86::MOVUPSrm, 0 },
561  { X86::MOVZPQILo2PQIrr, X86::MOVQI2PQIrm, TB_NO_REVERSE },
562  { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
563  { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
564  { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
565  { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
566  { X86::PABSBrr, X86::PABSBrm, TB_ALIGN_16 },
567  { X86::PABSDrr, X86::PABSDrm, TB_ALIGN_16 },
568  { X86::PABSWrr, X86::PABSWrm, TB_ALIGN_16 },
569  { X86::PCMPESTRIrr, X86::PCMPESTRIrm, TB_ALIGN_16 },
570  { X86::PCMPESTRM128rr, X86::PCMPESTRM128rm, TB_ALIGN_16 },
571  { X86::PCMPISTRIrr, X86::PCMPISTRIrm, TB_ALIGN_16 },
572  { X86::PCMPISTRM128rr, X86::PCMPISTRM128rm, TB_ALIGN_16 },
573  { X86::PHMINPOSUWrr128, X86::PHMINPOSUWrm128, TB_ALIGN_16 },
574  { X86::PMOVSXBDrr, X86::PMOVSXBDrm, TB_NO_REVERSE },
575  { X86::PMOVSXBQrr, X86::PMOVSXBQrm, TB_NO_REVERSE },
576  { X86::PMOVSXBWrr, X86::PMOVSXBWrm, TB_NO_REVERSE },
577  { X86::PMOVSXDQrr, X86::PMOVSXDQrm, TB_NO_REVERSE },
578  { X86::PMOVSXWDrr, X86::PMOVSXWDrm, TB_NO_REVERSE },
579  { X86::PMOVSXWQrr, X86::PMOVSXWQrm, TB_NO_REVERSE },
580  { X86::PMOVZXBDrr, X86::PMOVZXBDrm, TB_NO_REVERSE },
581  { X86::PMOVZXBQrr, X86::PMOVZXBQrm, TB_NO_REVERSE },
582  { X86::PMOVZXBWrr, X86::PMOVZXBWrm, TB_NO_REVERSE },
583  { X86::PMOVZXDQrr, X86::PMOVZXDQrm, TB_NO_REVERSE },
584  { X86::PMOVZXWDrr, X86::PMOVZXWDrm, TB_NO_REVERSE },
585  { X86::PMOVZXWQrr, X86::PMOVZXWQrm, TB_NO_REVERSE },
586  { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 },
587  { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 },
588  { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 },
589  { X86::PTESTrr, X86::PTESTrm, TB_ALIGN_16 },
590  { X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16 },
591  { X86::RCPSSr, X86::RCPSSm, 0 },
592  { X86::RCPSSr_Int, X86::RCPSSm_Int, TB_NO_REVERSE },
593  { X86::ROUNDPDr, X86::ROUNDPDm, TB_ALIGN_16 },
594  { X86::ROUNDPSr, X86::ROUNDPSm, TB_ALIGN_16 },
595  { X86::ROUNDSDr, X86::ROUNDSDm, 0 },
596  { X86::ROUNDSSr, X86::ROUNDSSm, 0 },
597  { X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16 },
598  { X86::RSQRTSSr, X86::RSQRTSSm, 0 },
599  { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, TB_NO_REVERSE },
600  { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 },
601  { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 },
602  { X86::SQRTSDr, X86::SQRTSDm, 0 },
603  { X86::SQRTSDr_Int, X86::SQRTSDm_Int, TB_NO_REVERSE },
604  { X86::SQRTSSr, X86::SQRTSSm, 0 },
605  { X86::SQRTSSr_Int, X86::SQRTSSm_Int, TB_NO_REVERSE },
606  { X86::TEST16rr, X86::TEST16rm, 0 },
607  { X86::TEST32rr, X86::TEST32rm, 0 },
608  { X86::TEST64rr, X86::TEST64rm, 0 },
609  { X86::TEST8rr, X86::TEST8rm, 0 },
610  // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
611  { X86::UCOMISDrr, X86::UCOMISDrm, 0 },
612  { X86::UCOMISSrr, X86::UCOMISSrm, 0 },
613 
614  // MMX version of foldable instructions
615  { X86::MMX_CVTPD2PIirr, X86::MMX_CVTPD2PIirm, 0 },
616  { X86::MMX_CVTPI2PDirr, X86::MMX_CVTPI2PDirm, 0 },
617  { X86::MMX_CVTPS2PIirr, X86::MMX_CVTPS2PIirm, 0 },
618  { X86::MMX_CVTTPD2PIirr, X86::MMX_CVTTPD2PIirm, 0 },
619  { X86::MMX_CVTTPS2PIirr, X86::MMX_CVTTPS2PIirm, 0 },
620  { X86::MMX_MOVD64to64rr, X86::MMX_MOVQ64rm, 0 },
621  { X86::MMX_PABSBrr64, X86::MMX_PABSBrm64, 0 },
622  { X86::MMX_PABSDrr64, X86::MMX_PABSDrm64, 0 },
623  { X86::MMX_PABSWrr64, X86::MMX_PABSWrm64, 0 },
624  { X86::MMX_PSHUFWri, X86::MMX_PSHUFWmi, 0 },
625 
626  // 3DNow! version of foldable instructions
627  { X86::PF2IDrr, X86::PF2IDrm, 0 },
628  { X86::PF2IWrr, X86::PF2IWrm, 0 },
629  { X86::PFRCPrr, X86::PFRCPrm, 0 },
630  { X86::PFRSQRTrr, X86::PFRSQRTrm, 0 },
631  { X86::PI2FDrr, X86::PI2FDrm, 0 },
632  { X86::PI2FWrr, X86::PI2FWrm, 0 },
633  { X86::PSWAPDrr, X86::PSWAPDrm, 0 },
634 
635  // AVX 128-bit versions of foldable instructions
636  { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, TB_NO_REVERSE },
637  { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, TB_NO_REVERSE },
638  { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, TB_NO_REVERSE },
639  { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, TB_NO_REVERSE },
640  { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
641  { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,TB_NO_REVERSE },
642  { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
643  { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, TB_NO_REVERSE },
644  { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
645  { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,TB_NO_REVERSE },
646  { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
647  { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, TB_NO_REVERSE },
648  { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, TB_NO_REVERSE },
649  { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, TB_NO_REVERSE },
650  { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, TB_NO_REVERSE },
651  { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, TB_NO_REVERSE },
652  { X86::VCVTDQ2PDrr, X86::VCVTDQ2PDrm, TB_NO_REVERSE },
653  { X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0 },
654  { X86::VCVTPD2DQrr, X86::VCVTPD2DQrm, 0 },
655  { X86::VCVTPD2PSrr, X86::VCVTPD2PSrm, 0 },
656  { X86::VCVTPS2DQrr, X86::VCVTPS2DQrm, 0 },
657  { X86::VCVTPS2PDrr, X86::VCVTPS2PDrm, TB_NO_REVERSE },
658  { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, 0 },
659  { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 },
660  { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
661  { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
662  { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 },
663  { X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 },
664  { X86::VMOVDDUPrr, X86::VMOVDDUPrm, TB_NO_REVERSE },
665  { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 },
666  { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 },
667  { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 },
668  { X86::VMOVDQUrr, X86::VMOVDQUrm, 0 },
669  { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, 0 },
670  { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, 0 },
671  { X86::VMOVUPDrr, X86::VMOVUPDrm, 0 },
672  { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
673  { X86::VMOVZPQILo2PQIrr,X86::VMOVQI2PQIrm, TB_NO_REVERSE },
674  { X86::VPABSBrr, X86::VPABSBrm, 0 },
675  { X86::VPABSDrr, X86::VPABSDrm, 0 },
676  { X86::VPABSWrr, X86::VPABSWrm, 0 },
677  { X86::VPCMPESTRIrr, X86::VPCMPESTRIrm, 0 },
678  { X86::VPCMPESTRM128rr, X86::VPCMPESTRM128rm, 0 },
679  { X86::VPCMPISTRIrr, X86::VPCMPISTRIrm, 0 },
680  { X86::VPCMPISTRM128rr, X86::VPCMPISTRM128rm, 0 },
681  { X86::VPHMINPOSUWrr128, X86::VPHMINPOSUWrm128, 0 },
682  { X86::VPERMILPDri, X86::VPERMILPDmi, 0 },
683  { X86::VPERMILPSri, X86::VPERMILPSmi, 0 },
684  { X86::VPMOVSXBDrr, X86::VPMOVSXBDrm, TB_NO_REVERSE },
685  { X86::VPMOVSXBQrr, X86::VPMOVSXBQrm, TB_NO_REVERSE },
686  { X86::VPMOVSXBWrr, X86::VPMOVSXBWrm, TB_NO_REVERSE },
687  { X86::VPMOVSXDQrr, X86::VPMOVSXDQrm, TB_NO_REVERSE },
688  { X86::VPMOVSXWDrr, X86::VPMOVSXWDrm, TB_NO_REVERSE },
689  { X86::VPMOVSXWQrr, X86::VPMOVSXWQrm, TB_NO_REVERSE },
690  { X86::VPMOVZXBDrr, X86::VPMOVZXBDrm, TB_NO_REVERSE },
691  { X86::VPMOVZXBQrr, X86::VPMOVZXBQrm, TB_NO_REVERSE },
692  { X86::VPMOVZXBWrr, X86::VPMOVZXBWrm, TB_NO_REVERSE },
693  { X86::VPMOVZXDQrr, X86::VPMOVZXDQrm, TB_NO_REVERSE },
694  { X86::VPMOVZXWDrr, X86::VPMOVZXWDrm, TB_NO_REVERSE },
695  { X86::VPMOVZXWQrr, X86::VPMOVZXWQrm, TB_NO_REVERSE },
696  { X86::VPSHUFDri, X86::VPSHUFDmi, 0 },
697  { X86::VPSHUFHWri, X86::VPSHUFHWmi, 0 },
698  { X86::VPSHUFLWri, X86::VPSHUFLWmi, 0 },
699  { X86::VPTESTrr, X86::VPTESTrm, 0 },
700  { X86::VRCPPSr, X86::VRCPPSm, 0 },
701  { X86::VROUNDPDr, X86::VROUNDPDm, 0 },
702  { X86::VROUNDPSr, X86::VROUNDPSm, 0 },
703  { X86::VRSQRTPSr, X86::VRSQRTPSm, 0 },
704  { X86::VSQRTPDr, X86::VSQRTPDm, 0 },
705  { X86::VSQRTPSr, X86::VSQRTPSm, 0 },
706  { X86::VTESTPDrr, X86::VTESTPDrm, 0 },
707  { X86::VTESTPSrr, X86::VTESTPSrm, 0 },
708  { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 },
709  { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 },
710 
711  // AVX 256-bit foldable instructions
712  { X86::VCVTDQ2PDYrr, X86::VCVTDQ2PDYrm, TB_NO_REVERSE },
713  { X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0 },
714  { X86::VCVTPD2DQYrr, X86::VCVTPD2DQYrm, 0 },
715  { X86::VCVTPD2PSYrr, X86::VCVTPD2PSYrm, 0 },
716  { X86::VCVTPS2DQYrr, X86::VCVTPS2DQYrm, 0 },
717  { X86::VCVTPS2PDYrr, X86::VCVTPS2PDYrm, TB_NO_REVERSE },
718  { X86::VCVTTPD2DQYrr, X86::VCVTTPD2DQYrm, 0 },
719  { X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0 },
720  { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
721  { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
722  { X86::VMOVDDUPYrr, X86::VMOVDDUPYrm, 0 },
723  { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 },
724  { X86::VMOVDQUYrr, X86::VMOVDQUYrm, 0 },
725  { X86::VMOVSLDUPYrr, X86::VMOVSLDUPYrm, 0 },
726  { X86::VMOVSHDUPYrr, X86::VMOVSHDUPYrm, 0 },
727  { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
728  { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
729  { X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 },
730  { X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 },
731  { X86::VPTESTYrr, X86::VPTESTYrm, 0 },
732  { X86::VRCPPSYr, X86::VRCPPSYm, 0 },
733  { X86::VROUNDYPDr, X86::VROUNDYPDm, 0 },
734  { X86::VROUNDYPSr, X86::VROUNDYPSm, 0 },
735  { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
736  { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
737  { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
738  { X86::VTESTPDYrr, X86::VTESTPDYrm, 0 },
739  { X86::VTESTPSYrr, X86::VTESTPSYrm, 0 },
740 
741  // AVX2 foldable instructions
742 
743  // VBROADCASTS{SD}rr register instructions were an AVX2 addition while the
744  // VBROADCASTS{SD}rm memory instructions were available from AVX1.
745  // TB_NO_REVERSE prevents unfolding from introducing an illegal instruction
746  // on AVX1 targets. The VPBROADCAST instructions are all AVX2 instructions
747  // so they don't need an equivalent limitation.
748  { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE },
749  { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
750  { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
751  { X86::VPABSBYrr, X86::VPABSBYrm, 0 },
752  { X86::VPABSDYrr, X86::VPABSDYrm, 0 },
753  { X86::VPABSWYrr, X86::VPABSWYrm, 0 },
754  { X86::VPBROADCASTBrr, X86::VPBROADCASTBrm, TB_NO_REVERSE },
755  { X86::VPBROADCASTBYrr, X86::VPBROADCASTBYrm, TB_NO_REVERSE },
756  { X86::VPBROADCASTDrr, X86::VPBROADCASTDrm, TB_NO_REVERSE },
757  { X86::VPBROADCASTDYrr, X86::VPBROADCASTDYrm, TB_NO_REVERSE },
758  { X86::VPBROADCASTQrr, X86::VPBROADCASTQrm, TB_NO_REVERSE },
759  { X86::VPBROADCASTQYrr, X86::VPBROADCASTQYrm, TB_NO_REVERSE },
760  { X86::VPBROADCASTWrr, X86::VPBROADCASTWrm, TB_NO_REVERSE },
761  { X86::VPBROADCASTWYrr, X86::VPBROADCASTWYrm, TB_NO_REVERSE },
762  { X86::VPERMPDYri, X86::VPERMPDYmi, 0 },
763  { X86::VPERMQYri, X86::VPERMQYmi, 0 },
764  { X86::VPMOVSXBDYrr, X86::VPMOVSXBDYrm, TB_NO_REVERSE },
765  { X86::VPMOVSXBQYrr, X86::VPMOVSXBQYrm, TB_NO_REVERSE },
766  { X86::VPMOVSXBWYrr, X86::VPMOVSXBWYrm, 0 },
767  { X86::VPMOVSXDQYrr, X86::VPMOVSXDQYrm, 0 },
768  { X86::VPMOVSXWDYrr, X86::VPMOVSXWDYrm, 0 },
769  { X86::VPMOVSXWQYrr, X86::VPMOVSXWQYrm, TB_NO_REVERSE },
770  { X86::VPMOVZXBDYrr, X86::VPMOVZXBDYrm, TB_NO_REVERSE },
771  { X86::VPMOVZXBQYrr, X86::VPMOVZXBQYrm, TB_NO_REVERSE },
772  { X86::VPMOVZXBWYrr, X86::VPMOVZXBWYrm, 0 },
773  { X86::VPMOVZXDQYrr, X86::VPMOVZXDQYrm, 0 },
774  { X86::VPMOVZXWDYrr, X86::VPMOVZXWDYrm, 0 },
775  { X86::VPMOVZXWQYrr, X86::VPMOVZXWQYrm, TB_NO_REVERSE },
776  { X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 },
777  { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 },
778  { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 },
779 
780  // XOP foldable instructions
781  { X86::VFRCZPDrr, X86::VFRCZPDrm, 0 },
782  { X86::VFRCZPDrrY, X86::VFRCZPDrmY, 0 },
783  { X86::VFRCZPSrr, X86::VFRCZPSrm, 0 },
784  { X86::VFRCZPSrrY, X86::VFRCZPSrmY, 0 },
785  { X86::VFRCZSDrr, X86::VFRCZSDrm, 0 },
786  { X86::VFRCZSSrr, X86::VFRCZSSrm, 0 },
787  { X86::VPHADDBDrr, X86::VPHADDBDrm, 0 },
788  { X86::VPHADDBQrr, X86::VPHADDBQrm, 0 },
789  { X86::VPHADDBWrr, X86::VPHADDBWrm, 0 },
790  { X86::VPHADDDQrr, X86::VPHADDDQrm, 0 },
791  { X86::VPHADDWDrr, X86::VPHADDWDrm, 0 },
792  { X86::VPHADDWQrr, X86::VPHADDWQrm, 0 },
793  { X86::VPHADDUBDrr, X86::VPHADDUBDrm, 0 },
794  { X86::VPHADDUBQrr, X86::VPHADDUBQrm, 0 },
795  { X86::VPHADDUBWrr, X86::VPHADDUBWrm, 0 },
796  { X86::VPHADDUDQrr, X86::VPHADDUDQrm, 0 },
797  { X86::VPHADDUWDrr, X86::VPHADDUWDrm, 0 },
798  { X86::VPHADDUWQrr, X86::VPHADDUWQrm, 0 },
799  { X86::VPHSUBBWrr, X86::VPHSUBBWrm, 0 },
800  { X86::VPHSUBDQrr, X86::VPHSUBDQrm, 0 },
801  { X86::VPHSUBWDrr, X86::VPHSUBWDrm, 0 },
802  { X86::VPROTBri, X86::VPROTBmi, 0 },
803  { X86::VPROTBrr, X86::VPROTBmr, 0 },
804  { X86::VPROTDri, X86::VPROTDmi, 0 },
805  { X86::VPROTDrr, X86::VPROTDmr, 0 },
806  { X86::VPROTQri, X86::VPROTQmi, 0 },
807  { X86::VPROTQrr, X86::VPROTQmr, 0 },
808  { X86::VPROTWri, X86::VPROTWmi, 0 },
809  { X86::VPROTWrr, X86::VPROTWmr, 0 },
810  { X86::VPSHABrr, X86::VPSHABmr, 0 },
811  { X86::VPSHADrr, X86::VPSHADmr, 0 },
812  { X86::VPSHAQrr, X86::VPSHAQmr, 0 },
813  { X86::VPSHAWrr, X86::VPSHAWmr, 0 },
814  { X86::VPSHLBrr, X86::VPSHLBmr, 0 },
815  { X86::VPSHLDrr, X86::VPSHLDmr, 0 },
816  { X86::VPSHLQrr, X86::VPSHLQmr, 0 },
817  { X86::VPSHLWrr, X86::VPSHLWmr, 0 },
818 
819  // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions
820  { X86::BEXTR32rr, X86::BEXTR32rm, 0 },
821  { X86::BEXTR64rr, X86::BEXTR64rm, 0 },
822  { X86::BEXTRI32ri, X86::BEXTRI32mi, 0 },
823  { X86::BEXTRI64ri, X86::BEXTRI64mi, 0 },
824  { X86::BLCFILL32rr, X86::BLCFILL32rm, 0 },
825  { X86::BLCFILL64rr, X86::BLCFILL64rm, 0 },
826  { X86::BLCI32rr, X86::BLCI32rm, 0 },
827  { X86::BLCI64rr, X86::BLCI64rm, 0 },
828  { X86::BLCIC32rr, X86::BLCIC32rm, 0 },
829  { X86::BLCIC64rr, X86::BLCIC64rm, 0 },
830  { X86::BLCMSK32rr, X86::BLCMSK32rm, 0 },
831  { X86::BLCMSK64rr, X86::BLCMSK64rm, 0 },
832  { X86::BLCS32rr, X86::BLCS32rm, 0 },
833  { X86::BLCS64rr, X86::BLCS64rm, 0 },
834  { X86::BLSFILL32rr, X86::BLSFILL32rm, 0 },
835  { X86::BLSFILL64rr, X86::BLSFILL64rm, 0 },
836  { X86::BLSI32rr, X86::BLSI32rm, 0 },
837  { X86::BLSI64rr, X86::BLSI64rm, 0 },
838  { X86::BLSIC32rr, X86::BLSIC32rm, 0 },
839  { X86::BLSIC64rr, X86::BLSIC64rm, 0 },
840  { X86::BLSMSK32rr, X86::BLSMSK32rm, 0 },
841  { X86::BLSMSK64rr, X86::BLSMSK64rm, 0 },
842  { X86::BLSR32rr, X86::BLSR32rm, 0 },
843  { X86::BLSR64rr, X86::BLSR64rm, 0 },
844  { X86::BZHI32rr, X86::BZHI32rm, 0 },
845  { X86::BZHI64rr, X86::BZHI64rm, 0 },
846  { X86::LZCNT16rr, X86::LZCNT16rm, 0 },
847  { X86::LZCNT32rr, X86::LZCNT32rm, 0 },
848  { X86::LZCNT64rr, X86::LZCNT64rm, 0 },
849  { X86::POPCNT16rr, X86::POPCNT16rm, 0 },
850  { X86::POPCNT32rr, X86::POPCNT32rm, 0 },
851  { X86::POPCNT64rr, X86::POPCNT64rm, 0 },
852  { X86::RORX32ri, X86::RORX32mi, 0 },
853  { X86::RORX64ri, X86::RORX64mi, 0 },
854  { X86::SARX32rr, X86::SARX32rm, 0 },
855  { X86::SARX64rr, X86::SARX64rm, 0 },
856  { X86::SHRX32rr, X86::SHRX32rm, 0 },
857  { X86::SHRX64rr, X86::SHRX64rm, 0 },
858  { X86::SHLX32rr, X86::SHLX32rm, 0 },
859  { X86::SHLX64rr, X86::SHLX64rm, 0 },
860  { X86::T1MSKC32rr, X86::T1MSKC32rm, 0 },
861  { X86::T1MSKC64rr, X86::T1MSKC64rm, 0 },
862  { X86::TZCNT16rr, X86::TZCNT16rm, 0 },
863  { X86::TZCNT32rr, X86::TZCNT32rm, 0 },
864  { X86::TZCNT64rr, X86::TZCNT64rm, 0 },
865  { X86::TZMSK32rr, X86::TZMSK32rm, 0 },
866  { X86::TZMSK64rr, X86::TZMSK64rm, 0 },
867 
868  // AVX-512 foldable instructions
869  { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE },
870  { X86::VBROADCASTSSZr_s, X86::VBROADCASTSSZm, TB_NO_REVERSE },
871  { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE },
872  { X86::VBROADCASTSDZr_s, X86::VBROADCASTSDZm, TB_NO_REVERSE },
873  { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
874  { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm, TB_NO_REVERSE },
875  { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
876  { X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 },
877  { X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 },
878  { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 },
879  { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrm, TB_ALIGN_64 },
880  { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zrm, 0 },
881  { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zrm, 0 },
882  { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zrm, 0 },
883  { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 },
884  { X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 },
885  { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 },
886  { X86::VPABSDZrr, X86::VPABSDZrm, 0 },
887  { X86::VPABSQZrr, X86::VPABSQZrm, 0 },
888  { X86::VPERMILPDZri, X86::VPERMILPDZmi, 0 },
889  { X86::VPERMILPSZri, X86::VPERMILPSZmi, 0 },
890  { X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
891  { X86::VPERMQZri, X86::VPERMQZmi, 0 },
892  { X86::VPMOVSXBDZrr, X86::VPMOVSXBDZrm, 0 },
893  { X86::VPMOVSXBQZrr, X86::VPMOVSXBQZrm, TB_NO_REVERSE },
894  { X86::VPMOVSXBWZrr, X86::VPMOVSXBWZrm, 0 },
895  { X86::VPMOVSXDQZrr, X86::VPMOVSXDQZrm, 0 },
896  { X86::VPMOVSXWDZrr, X86::VPMOVSXWDZrm, 0 },
897  { X86::VPMOVSXWQZrr, X86::VPMOVSXWQZrm, 0 },
898  { X86::VPMOVZXBDZrr, X86::VPMOVZXBDZrm, 0 },
899  { X86::VPMOVZXBQZrr, X86::VPMOVZXBQZrm, TB_NO_REVERSE },
900  { X86::VPMOVZXBWZrr, X86::VPMOVZXBWZrm, 0 },
901  { X86::VPMOVZXDQZrr, X86::VPMOVZXDQZrm, 0 },
902  { X86::VPMOVZXWDZrr, X86::VPMOVZXWDZrm, 0 },
903  { X86::VPMOVZXWQZrr, X86::VPMOVZXWQZrm, 0 },
904  { X86::VPSHUFDZri, X86::VPSHUFDZmi, 0 },
905  { X86::VPSHUFHWZri, X86::VPSHUFHWZmi, 0 },
906  { X86::VPSHUFLWZri, X86::VPSHUFLWZmi, 0 },
907 
908  // AVX-512 foldable instructions (256-bit versions)
909  { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
910  { X86::VBROADCASTSSZ256r_s, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
911  { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
912  { X86::VBROADCASTSDZ256r_s, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
913  { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
914  { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 },
915  { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
916  { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256rm, TB_ALIGN_32 },
917  { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256rm, 0 },
918  { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256rm, 0 },
919  { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256rm, 0 },
920  { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 },
921  { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 },
922  { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 },
923  { X86::VPERMILPDZ256ri, X86::VPERMILPDZ256mi, 0 },
924  { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256mi, 0 },
925  { X86::VPERMPDZ256ri, X86::VPERMPDZ256mi, 0 },
926  { X86::VPERMQZ256ri, X86::VPERMQZ256mi, 0 },
927  { X86::VPMOVSXBDZ256rr, X86::VPMOVSXBDZ256rm, TB_NO_REVERSE },
928  { X86::VPMOVSXBQZ256rr, X86::VPMOVSXBQZ256rm, TB_NO_REVERSE },
929  { X86::VPMOVSXBWZ256rr, X86::VPMOVSXBWZ256rm, 0 },
930  { X86::VPMOVSXDQZ256rr, X86::VPMOVSXDQZ256rm, 0 },
931  { X86::VPMOVSXWDZ256rr, X86::VPMOVSXWDZ256rm, 0 },
932  { X86::VPMOVSXWQZ256rr, X86::VPMOVSXWQZ256rm, TB_NO_REVERSE },
933  { X86::VPMOVZXBDZ256rr, X86::VPMOVZXBDZ256rm, TB_NO_REVERSE },
934  { X86::VPMOVZXBQZ256rr, X86::VPMOVZXBQZ256rm, TB_NO_REVERSE },
935  { X86::VPMOVZXBWZ256rr, X86::VPMOVZXBWZ256rm, 0 },
936  { X86::VPMOVZXDQZ256rr, X86::VPMOVZXDQZ256rm, 0 },
937  { X86::VPMOVZXWDZ256rr, X86::VPMOVZXWDZ256rm, 0 },
938  { X86::VPMOVZXWQZ256rr, X86::VPMOVZXWQZ256rm, TB_NO_REVERSE },
939  { X86::VPSHUFDZ256ri, X86::VPSHUFDZ256mi, 0 },
940  { X86::VPSHUFHWZ256ri, X86::VPSHUFHWZ256mi, 0 },
941  { X86::VPSHUFLWZ256ri, X86::VPSHUFLWZ256mi, 0 },
942 
943  // AVX-512 foldable instructions (128-bit versions)
944  { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
945  { X86::VBROADCASTSSZ128r_s, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
946  { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
947  { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 },
948  { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
949  { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128rm, TB_ALIGN_16 },
950  { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128rm, 0 },
951  { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128rm, 0 },
952  { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128rm, 0 },
953  { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 },
954  { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 },
955  { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 },
956  { X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 },
957  { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128mi, 0 },
958  { X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, TB_NO_REVERSE },
959  { X86::VPMOVSXBQZ128rr, X86::VPMOVSXBQZ128rm, TB_NO_REVERSE },
960  { X86::VPMOVSXBWZ128rr, X86::VPMOVSXBWZ128rm, TB_NO_REVERSE },
961  { X86::VPMOVSXDQZ128rr, X86::VPMOVSXDQZ128rm, TB_NO_REVERSE },
962  { X86::VPMOVSXWDZ128rr, X86::VPMOVSXWDZ128rm, TB_NO_REVERSE },
963  { X86::VPMOVSXWQZ128rr, X86::VPMOVSXWQZ128rm, TB_NO_REVERSE },
964  { X86::VPMOVZXBDZ128rr, X86::VPMOVZXBDZ128rm, TB_NO_REVERSE },
965  { X86::VPMOVZXBQZ128rr, X86::VPMOVZXBQZ128rm, TB_NO_REVERSE },
966  { X86::VPMOVZXBWZ128rr, X86::VPMOVZXBWZ128rm, TB_NO_REVERSE },
967  { X86::VPMOVZXDQZ128rr, X86::VPMOVZXDQZ128rm, TB_NO_REVERSE },
968  { X86::VPMOVZXWDZ128rr, X86::VPMOVZXWDZ128rm, TB_NO_REVERSE },
969  { X86::VPMOVZXWQZ128rr, X86::VPMOVZXWQZ128rm, TB_NO_REVERSE },
970  { X86::VPSHUFDZ128ri, X86::VPSHUFDZ128mi, 0 },
971  { X86::VPSHUFHWZ128ri, X86::VPSHUFHWZ128mi, 0 },
972  { X86::VPSHUFLWZ128ri, X86::VPSHUFLWZ128mi, 0 },
973 
974  // F16C foldable instructions
975  { X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, 0 },
976  { X86::VCVTPH2PSYrr, X86::VCVTPH2PSYrm, 0 },
977 
978  // AES foldable instructions
979  { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 },
980  { X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 },
981  { X86::VAESIMCrr, X86::VAESIMCrm, 0 },
982  { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, 0 }
983  };
984 
985  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable1) {
986  AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
987  Entry.RegOp, Entry.MemOp,
988  // Index 1, folded load
989  Entry.Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
990  }
991 
992  static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
993  { X86::ADC32rr, X86::ADC32rm, 0 },
994  { X86::ADC64rr, X86::ADC64rm, 0 },
995  { X86::ADD16rr, X86::ADD16rm, 0 },
996  { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE },
997  { X86::ADD32rr, X86::ADD32rm, 0 },
998  { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE },
999  { X86::ADD64rr, X86::ADD64rm, 0 },
1000  { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE },
1001  { X86::ADD8rr, X86::ADD8rm, 0 },
1002  { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 },
1003  { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 },
1004  { X86::ADDSDrr, X86::ADDSDrm, 0 },
1005  { X86::ADDSDrr_Int, X86::ADDSDrm_Int, TB_NO_REVERSE },
1006  { X86::ADDSSrr, X86::ADDSSrm, 0 },
1007  { X86::ADDSSrr_Int, X86::ADDSSrm_Int, TB_NO_REVERSE },
1008  { X86::ADDSUBPDrr, X86::ADDSUBPDrm, TB_ALIGN_16 },
1009  { X86::ADDSUBPSrr, X86::ADDSUBPSrm, TB_ALIGN_16 },
1010  { X86::AND16rr, X86::AND16rm, 0 },
1011  { X86::AND32rr, X86::AND32rm, 0 },
1012  { X86::AND64rr, X86::AND64rm, 0 },
1013  { X86::AND8rr, X86::AND8rm, 0 },
1014  { X86::ANDNPDrr, X86::ANDNPDrm, TB_ALIGN_16 },
1015  { X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 },
1016  { X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 },
1017  { X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 },
1018  { X86::BLENDPDrri, X86::BLENDPDrmi, TB_ALIGN_16 },
1019  { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 },
1020  { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 },
1021  { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 },
1022  { X86::CMOVA16rr, X86::CMOVA16rm, 0 },
1023  { X86::CMOVA32rr, X86::CMOVA32rm, 0 },
1024  { X86::CMOVA64rr, X86::CMOVA64rm, 0 },
1025  { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 },
1026  { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 },
1027  { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 },
1028  { X86::CMOVB16rr, X86::CMOVB16rm, 0 },
1029  { X86::CMOVB32rr, X86::CMOVB32rm, 0 },
1030  { X86::CMOVB64rr, X86::CMOVB64rm, 0 },
1031  { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 },
1032  { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 },
1033  { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 },
1034  { X86::CMOVE16rr, X86::CMOVE16rm, 0 },
1035  { X86::CMOVE32rr, X86::CMOVE32rm, 0 },
1036  { X86::CMOVE64rr, X86::CMOVE64rm, 0 },
1037  { X86::CMOVG16rr, X86::CMOVG16rm, 0 },
1038  { X86::CMOVG32rr, X86::CMOVG32rm, 0 },
1039  { X86::CMOVG64rr, X86::CMOVG64rm, 0 },
1040  { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 },
1041  { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 },
1042  { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 },
1043  { X86::CMOVL16rr, X86::CMOVL16rm, 0 },
1044  { X86::CMOVL32rr, X86::CMOVL32rm, 0 },
1045  { X86::CMOVL64rr, X86::CMOVL64rm, 0 },
1046  { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 },
1047  { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 },
1048  { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 },
1049  { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 },
1050  { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 },
1051  { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 },
1052  { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 },
1053  { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 },
1054  { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 },
1055  { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 },
1056  { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 },
1057  { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 },
1058  { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 },
1059  { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 },
1060  { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 },
1061  { X86::CMOVO16rr, X86::CMOVO16rm, 0 },
1062  { X86::CMOVO32rr, X86::CMOVO32rm, 0 },
1063  { X86::CMOVO64rr, X86::CMOVO64rm, 0 },
1064  { X86::CMOVP16rr, X86::CMOVP16rm, 0 },
1065  { X86::CMOVP32rr, X86::CMOVP32rm, 0 },
1066  { X86::CMOVP64rr, X86::CMOVP64rm, 0 },
1067  { X86::CMOVS16rr, X86::CMOVS16rm, 0 },
1068  { X86::CMOVS32rr, X86::CMOVS32rm, 0 },
1069  { X86::CMOVS64rr, X86::CMOVS64rm, 0 },
1070  { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 },
1071  { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 },
1072  { X86::CMPSDrr, X86::CMPSDrm, 0 },
1073  { X86::CMPSSrr, X86::CMPSSrm, 0 },
1074  { X86::CRC32r32r32, X86::CRC32r32m32, 0 },
1075  { X86::CRC32r64r64, X86::CRC32r64m64, 0 },
1076  { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 },
1077  { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 },
1078  { X86::DIVSDrr, X86::DIVSDrm, 0 },
1079  { X86::DIVSDrr_Int, X86::DIVSDrm_Int, TB_NO_REVERSE },
1080  { X86::DIVSSrr, X86::DIVSSrm, 0 },
1081  { X86::DIVSSrr_Int, X86::DIVSSrm_Int, TB_NO_REVERSE },
1082  { X86::DPPDrri, X86::DPPDrmi, TB_ALIGN_16 },
1083  { X86::DPPSrri, X86::DPPSrmi, TB_ALIGN_16 },
1084  { X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 },
1085  { X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 },
1086  { X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 },
1087  { X86::HSUBPSrr, X86::HSUBPSrm, TB_ALIGN_16 },
1088  { X86::IMUL16rr, X86::IMUL16rm, 0 },
1089  { X86::IMUL32rr, X86::IMUL32rm, 0 },
1090  { X86::IMUL64rr, X86::IMUL64rm, 0 },
1091  { X86::Int_CMPSDrr, X86::Int_CMPSDrm, TB_NO_REVERSE },
1092  { X86::Int_CMPSSrr, X86::Int_CMPSSrm, TB_NO_REVERSE },
1093  { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, TB_NO_REVERSE },
1094  { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
1095  { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
1096  { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
1097  { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
1098  { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, TB_NO_REVERSE },
1099  { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
1100  { X86::MAXCPDrr, X86::MAXCPDrm, TB_ALIGN_16 },
1101  { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
1102  { X86::MAXCPSrr, X86::MAXCPSrm, TB_ALIGN_16 },
1103  { X86::MAXSDrr, X86::MAXSDrm, 0 },
1104  { X86::MAXCSDrr, X86::MAXCSDrm, 0 },
1105  { X86::MAXSDrr_Int, X86::MAXSDrm_Int, TB_NO_REVERSE },
1106  { X86::MAXSSrr, X86::MAXSSrm, 0 },
1107  { X86::MAXCSSrr, X86::MAXCSSrm, 0 },
1108  { X86::MAXSSrr_Int, X86::MAXSSrm_Int, TB_NO_REVERSE },
1109  { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 },
1110  { X86::MINCPDrr, X86::MINCPDrm, TB_ALIGN_16 },
1111  { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 },
1112  { X86::MINCPSrr, X86::MINCPSrm, TB_ALIGN_16 },
1113  { X86::MINSDrr, X86::MINSDrm, 0 },
1114  { X86::MINCSDrr, X86::MINCSDrm, 0 },
1115  { X86::MINSDrr_Int, X86::MINSDrm_Int, TB_NO_REVERSE },
1116  { X86::MINSSrr, X86::MINSSrm, 0 },
1117  { X86::MINCSSrr, X86::MINCSSrm, 0 },
1118  { X86::MINSSrr_Int, X86::MINSSrm_Int, TB_NO_REVERSE },
1119  { X86::MOVLHPSrr, X86::MOVHPSrm, TB_NO_REVERSE },
1120  { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 },
1121  { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
1122  { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
1123  { X86::MULSDrr, X86::MULSDrm, 0 },
1124  { X86::MULSDrr_Int, X86::MULSDrm_Int, TB_NO_REVERSE },
1125  { X86::MULSSrr, X86::MULSSrm, 0 },
1126  { X86::MULSSrr_Int, X86::MULSSrm_Int, TB_NO_REVERSE },
1127  { X86::OR16rr, X86::OR16rm, 0 },
1128  { X86::OR32rr, X86::OR32rm, 0 },
1129  { X86::OR64rr, X86::OR64rm, 0 },
1130  { X86::OR8rr, X86::OR8rm, 0 },
1131  { X86::ORPDrr, X86::ORPDrm, TB_ALIGN_16 },
1132  { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 },
1133  { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 },
1134  { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 },
1135  { X86::PACKUSDWrr, X86::PACKUSDWrm, TB_ALIGN_16 },
1136  { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 },
1137  { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 },
1138  { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 },
1139  { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 },
1140  { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 },
1141  { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 },
1142  { X86::PADDUSBrr, X86::PADDUSBrm, TB_ALIGN_16 },
1143  { X86::PADDUSWrr, X86::PADDUSWrm, TB_ALIGN_16 },
1144  { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 },
1145  { X86::PALIGNRrri, X86::PALIGNRrmi, TB_ALIGN_16 },
1146  { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 },
1147  { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 },
1148  { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 },
1149  { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 },
1150  { X86::PBLENDVBrr0, X86::PBLENDVBrm0, TB_ALIGN_16 },
1151  { X86::PBLENDWrri, X86::PBLENDWrmi, TB_ALIGN_16 },
1152  { X86::PCLMULQDQrr, X86::PCLMULQDQrm, TB_ALIGN_16 },
1153  { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 },
1154  { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 },
1155  { X86::PCMPEQQrr, X86::PCMPEQQrm, TB_ALIGN_16 },
1156  { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 },
1157  { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 },
1158  { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 },
1159  { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 },
1160  { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 },
1161  { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 },
1162  { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 },
1163  { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 },
1164  { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 },
1165  { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 },
1166  { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 },
1167  { X86::PINSRBrr, X86::PINSRBrm, 0 },
1168  { X86::PINSRDrr, X86::PINSRDrm, 0 },
1169  { X86::PINSRQrr, X86::PINSRQrm, 0 },
1170  { X86::PINSRWrri, X86::PINSRWrmi, 0 },
1171  { X86::PMADDUBSWrr, X86::PMADDUBSWrm, TB_ALIGN_16 },
1172  { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
1173  { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 },
1174  { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
1175  { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
1176  { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
1177  { X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 },
1178  { X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 },
1179  { X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 },
1180  { X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 },
1181  { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 },
1182  { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 },
1183  { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 },
1184  { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 },
1185  { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
1186  { X86::PMULHRSWrr, X86::PMULHRSWrm, TB_ALIGN_16 },
1187  { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
1188  { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 },
1189  { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 },
1190  { X86::PMULLWrr, X86::PMULLWrm, TB_ALIGN_16 },
1191  { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 },
1192  { X86::PORrr, X86::PORrm, TB_ALIGN_16 },
1193  { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 },
1194  { X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 },
1195  { X86::PSIGNBrr128, X86::PSIGNBrm128, TB_ALIGN_16 },
1196  { X86::PSIGNWrr128, X86::PSIGNWrm128, TB_ALIGN_16 },
1197  { X86::PSIGNDrr128, X86::PSIGNDrm128, TB_ALIGN_16 },
1198  { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 },
1199  { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 },
1200  { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 },
1201  { X86::PSRADrr, X86::PSRADrm, TB_ALIGN_16 },
1202  { X86::PSRAWrr, X86::PSRAWrm, TB_ALIGN_16 },
1203  { X86::PSRLDrr, X86::PSRLDrm, TB_ALIGN_16 },
1204  { X86::PSRLQrr, X86::PSRLQrm, TB_ALIGN_16 },
1205  { X86::PSRLWrr, X86::PSRLWrm, TB_ALIGN_16 },
1206  { X86::PSUBBrr, X86::PSUBBrm, TB_ALIGN_16 },
1207  { X86::PSUBDrr, X86::PSUBDrm, TB_ALIGN_16 },
1208  { X86::PSUBQrr, X86::PSUBQrm, TB_ALIGN_16 },
1209  { X86::PSUBSBrr, X86::PSUBSBrm, TB_ALIGN_16 },
1210  { X86::PSUBSWrr, X86::PSUBSWrm, TB_ALIGN_16 },
1211  { X86::PSUBUSBrr, X86::PSUBUSBrm, TB_ALIGN_16 },
1212  { X86::PSUBUSWrr, X86::PSUBUSWrm, TB_ALIGN_16 },
1213  { X86::PSUBWrr, X86::PSUBWrm, TB_ALIGN_16 },
1214  { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, TB_ALIGN_16 },
1215  { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, TB_ALIGN_16 },
1216  { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, TB_ALIGN_16 },
1217  { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, TB_ALIGN_16 },
1218  { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, TB_ALIGN_16 },
1219  { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, TB_ALIGN_16 },
1220  { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 },
1221  { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 },
1222  { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 },
1223  { X86::ROUNDSDr_Int, X86::ROUNDSDm_Int, TB_NO_REVERSE },
1224  { X86::ROUNDSSr_Int, X86::ROUNDSSm_Int, TB_NO_REVERSE },
1225  { X86::SBB32rr, X86::SBB32rm, 0 },
1226  { X86::SBB64rr, X86::SBB64rm, 0 },
1227  { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 },
1228  { X86::SHUFPSrri, X86::SHUFPSrmi, TB_ALIGN_16 },
1229  { X86::SUB16rr, X86::SUB16rm, 0 },
1230  { X86::SUB32rr, X86::SUB32rm, 0 },
1231  { X86::SUB64rr, X86::SUB64rm, 0 },
1232  { X86::SUB8rr, X86::SUB8rm, 0 },
1233  { X86::SUBPDrr, X86::SUBPDrm, TB_ALIGN_16 },
1234  { X86::SUBPSrr, X86::SUBPSrm, TB_ALIGN_16 },
1235  { X86::SUBSDrr, X86::SUBSDrm, 0 },
1236  { X86::SUBSDrr_Int, X86::SUBSDrm_Int, TB_NO_REVERSE },
1237  { X86::SUBSSrr, X86::SUBSSrm, 0 },
1238  { X86::SUBSSrr_Int, X86::SUBSSrm_Int, TB_NO_REVERSE },
1239  // FIXME: TEST*rr -> swapped operand of TEST*mr.
1240  { X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 },
1241  { X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 },
1242  { X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 },
1243  { X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16 },
1244  { X86::XOR16rr, X86::XOR16rm, 0 },
1245  { X86::XOR32rr, X86::XOR32rm, 0 },
1246  { X86::XOR64rr, X86::XOR64rm, 0 },
1247  { X86::XOR8rr, X86::XOR8rm, 0 },
1248  { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 },
1249  { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 },
1250 
1251  // MMX version of foldable instructions
1252  { X86::MMX_CVTPI2PSirr, X86::MMX_CVTPI2PSirm, 0 },
1253  { X86::MMX_PACKSSDWirr, X86::MMX_PACKSSDWirm, 0 },
1254  { X86::MMX_PACKSSWBirr, X86::MMX_PACKSSWBirm, 0 },
1255  { X86::MMX_PACKUSWBirr, X86::MMX_PACKUSWBirm, 0 },
1256  { X86::MMX_PADDBirr, X86::MMX_PADDBirm, 0 },
1257  { X86::MMX_PADDDirr, X86::MMX_PADDDirm, 0 },
1258  { X86::MMX_PADDQirr, X86::MMX_PADDQirm, 0 },
1259  { X86::MMX_PADDSBirr, X86::MMX_PADDSBirm, 0 },
1260  { X86::MMX_PADDSWirr, X86::MMX_PADDSWirm, 0 },
1261  { X86::MMX_PADDUSBirr, X86::MMX_PADDUSBirm, 0 },
1262  { X86::MMX_PADDUSWirr, X86::MMX_PADDUSWirm, 0 },
1263  { X86::MMX_PADDWirr, X86::MMX_PADDWirm, 0 },
1264  { X86::MMX_PALIGNR64irr, X86::MMX_PALIGNR64irm, 0 },
1265  { X86::MMX_PANDNirr, X86::MMX_PANDNirm, 0 },
1266  { X86::MMX_PANDirr, X86::MMX_PANDirm, 0 },
1267  { X86::MMX_PAVGBirr, X86::MMX_PAVGBirm, 0 },
1268  { X86::MMX_PAVGWirr, X86::MMX_PAVGWirm, 0 },
1269  { X86::MMX_PCMPEQBirr, X86::MMX_PCMPEQBirm, 0 },
1270  { X86::MMX_PCMPEQDirr, X86::MMX_PCMPEQDirm, 0 },
1271  { X86::MMX_PCMPEQWirr, X86::MMX_PCMPEQWirm, 0 },
1272  { X86::MMX_PCMPGTBirr, X86::MMX_PCMPGTBirm, 0 },
1273  { X86::MMX_PCMPGTDirr, X86::MMX_PCMPGTDirm, 0 },
1274  { X86::MMX_PCMPGTWirr, X86::MMX_PCMPGTWirm, 0 },
1275  { X86::MMX_PHADDSWrr64, X86::MMX_PHADDSWrm64, 0 },
1276  { X86::MMX_PHADDWrr64, X86::MMX_PHADDWrm64, 0 },
1277  { X86::MMX_PHADDrr64, X86::MMX_PHADDrm64, 0 },
1278  { X86::MMX_PHSUBDrr64, X86::MMX_PHSUBDrm64, 0 },
1279  { X86::MMX_PHSUBSWrr64, X86::MMX_PHSUBSWrm64, 0 },
1280  { X86::MMX_PHSUBWrr64, X86::MMX_PHSUBWrm64, 0 },
1281  { X86::MMX_PINSRWirri, X86::MMX_PINSRWirmi, 0 },
1282  { X86::MMX_PMADDUBSWrr64, X86::MMX_PMADDUBSWrm64, 0 },
1283  { X86::MMX_PMADDWDirr, X86::MMX_PMADDWDirm, 0 },
1284  { X86::MMX_PMAXSWirr, X86::MMX_PMAXSWirm, 0 },
1285  { X86::MMX_PMAXUBirr, X86::MMX_PMAXUBirm, 0 },
1286  { X86::MMX_PMINSWirr, X86::MMX_PMINSWirm, 0 },
1287  { X86::MMX_PMINUBirr, X86::MMX_PMINUBirm, 0 },
1288  { X86::MMX_PMULHRSWrr64, X86::MMX_PMULHRSWrm64, 0 },
1289  { X86::MMX_PMULHUWirr, X86::MMX_PMULHUWirm, 0 },
1290  { X86::MMX_PMULHWirr, X86::MMX_PMULHWirm, 0 },
1291  { X86::MMX_PMULLWirr, X86::MMX_PMULLWirm, 0 },
1292  { X86::MMX_PMULUDQirr, X86::MMX_PMULUDQirm, 0 },
1293  { X86::MMX_PORirr, X86::MMX_PORirm, 0 },
1294  { X86::MMX_PSADBWirr, X86::MMX_PSADBWirm, 0 },
1295  { X86::MMX_PSHUFBrr64, X86::MMX_PSHUFBrm64, 0 },
1296  { X86::MMX_PSIGNBrr64, X86::MMX_PSIGNBrm64, 0 },
1297  { X86::MMX_PSIGNDrr64, X86::MMX_PSIGNDrm64, 0 },
1298  { X86::MMX_PSIGNWrr64, X86::MMX_PSIGNWrm64, 0 },
1299  { X86::MMX_PSLLDrr, X86::MMX_PSLLDrm, 0 },
1300  { X86::MMX_PSLLQrr, X86::MMX_PSLLQrm, 0 },
1301  { X86::MMX_PSLLWrr, X86::MMX_PSLLWrm, 0 },
1302  { X86::MMX_PSRADrr, X86::MMX_PSRADrm, 0 },
1303  { X86::MMX_PSRAWrr, X86::MMX_PSRAWrm, 0 },
1304  { X86::MMX_PSRLDrr, X86::MMX_PSRLDrm, 0 },
1305  { X86::MMX_PSRLQrr, X86::MMX_PSRLQrm, 0 },
1306  { X86::MMX_PSRLWrr, X86::MMX_PSRLWrm, 0 },
1307  { X86::MMX_PSUBBirr, X86::MMX_PSUBBirm, 0 },
1308  { X86::MMX_PSUBDirr, X86::MMX_PSUBDirm, 0 },
1309  { X86::MMX_PSUBQirr, X86::MMX_PSUBQirm, 0 },
1310  { X86::MMX_PSUBSBirr, X86::MMX_PSUBSBirm, 0 },
1311  { X86::MMX_PSUBSWirr, X86::MMX_PSUBSWirm, 0 },
1312  { X86::MMX_PSUBUSBirr, X86::MMX_PSUBUSBirm, 0 },
1313  { X86::MMX_PSUBUSWirr, X86::MMX_PSUBUSWirm, 0 },
1314  { X86::MMX_PSUBWirr, X86::MMX_PSUBWirm, 0 },
1315  { X86::MMX_PUNPCKHBWirr, X86::MMX_PUNPCKHBWirm, 0 },
1316  { X86::MMX_PUNPCKHDQirr, X86::MMX_PUNPCKHDQirm, 0 },
1317  { X86::MMX_PUNPCKHWDirr, X86::MMX_PUNPCKHWDirm, 0 },
1318  { X86::MMX_PUNPCKLBWirr, X86::MMX_PUNPCKLBWirm, 0 },
1319  { X86::MMX_PUNPCKLDQirr, X86::MMX_PUNPCKLDQirm, 0 },
1320  { X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, 0 },
1321  { X86::MMX_PXORirr, X86::MMX_PXORirm, 0 },
1322 
1323  // 3DNow! version of foldable instructions
1324  { X86::PAVGUSBrr, X86::PAVGUSBrm, 0 },
1325  { X86::PFACCrr, X86::PFACCrm, 0 },
1326  { X86::PFADDrr, X86::PFADDrm, 0 },
1327  { X86::PFCMPEQrr, X86::PFCMPEQrm, 0 },
1328  { X86::PFCMPGErr, X86::PFCMPGErm, 0 },
1329  { X86::PFCMPGTrr, X86::PFCMPGTrm, 0 },
1330  { X86::PFMAXrr, X86::PFMAXrm, 0 },
1331  { X86::PFMINrr, X86::PFMINrm, 0 },
1332  { X86::PFMULrr, X86::PFMULrm, 0 },
1333  { X86::PFNACCrr, X86::PFNACCrm, 0 },
1334  { X86::PFPNACCrr, X86::PFPNACCrm, 0 },
1335  { X86::PFRCPIT1rr, X86::PFRCPIT1rm, 0 },
1336  { X86::PFRCPIT2rr, X86::PFRCPIT2rm, 0 },
1337  { X86::PFRSQIT1rr, X86::PFRSQIT1rm, 0 },
1338  { X86::PFSUBrr, X86::PFSUBrm, 0 },
1339  { X86::PFSUBRrr, X86::PFSUBRrm, 0 },
1340  { X86::PMULHRWrr, X86::PMULHRWrm, 0 },
1341 
1342  // AVX 128-bit versions of foldable instructions
1343  { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 },
1344  { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, TB_NO_REVERSE },
1345  { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 },
1346  { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 },
1347  { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 },
1348  { X86::Int_VCVTSI2SDrr, X86::Int_VCVTSI2SDrm, 0 },
1349  { X86::VCVTSI2SS64rr, X86::VCVTSI2SS64rm, 0 },
1350  { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 },
1351  { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 },
1352  { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
1353  { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
1354  { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, TB_NO_REVERSE },
1355  { X86::VADDPDrr, X86::VADDPDrm, 0 },
1356  { X86::VADDPSrr, X86::VADDPSrm, 0 },
1357  { X86::VADDSDrr, X86::VADDSDrm, 0 },
1358  { X86::VADDSDrr_Int, X86::VADDSDrm_Int, TB_NO_REVERSE },
1359  { X86::VADDSSrr, X86::VADDSSrm, 0 },
1360  { X86::VADDSSrr_Int, X86::VADDSSrm_Int, TB_NO_REVERSE },
1361  { X86::VADDSUBPDrr, X86::VADDSUBPDrm, 0 },
1362  { X86::VADDSUBPSrr, X86::VADDSUBPSrm, 0 },
1363  { X86::VANDNPDrr, X86::VANDNPDrm, 0 },
1364  { X86::VANDNPSrr, X86::VANDNPSrm, 0 },
1365  { X86::VANDPDrr, X86::VANDPDrm, 0 },
1366  { X86::VANDPSrr, X86::VANDPSrm, 0 },
1367  { X86::VBLENDPDrri, X86::VBLENDPDrmi, 0 },
1368  { X86::VBLENDPSrri, X86::VBLENDPSrmi, 0 },
1369  { X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0 },
1370  { X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0 },
1371  { X86::VCMPPDrri, X86::VCMPPDrmi, 0 },
1372  { X86::VCMPPSrri, X86::VCMPPSrmi, 0 },
1373  { X86::VCMPSDrr, X86::VCMPSDrm, 0 },
1374  { X86::VCMPSSrr, X86::VCMPSSrm, 0 },
1375  { X86::VDIVPDrr, X86::VDIVPDrm, 0 },
1376  { X86::VDIVPSrr, X86::VDIVPSrm, 0 },
1377  { X86::VDIVSDrr, X86::VDIVSDrm, 0 },
1378  { X86::VDIVSDrr_Int, X86::VDIVSDrm_Int, TB_NO_REVERSE },
1379  { X86::VDIVSSrr, X86::VDIVSSrm, 0 },
1380  { X86::VDIVSSrr_Int, X86::VDIVSSrm_Int, TB_NO_REVERSE },
1381  { X86::VDPPDrri, X86::VDPPDrmi, 0 },
1382  { X86::VDPPSrri, X86::VDPPSrmi, 0 },
1383  { X86::VHADDPDrr, X86::VHADDPDrm, 0 },
1384  { X86::VHADDPSrr, X86::VHADDPSrm, 0 },
1385  { X86::VHSUBPDrr, X86::VHSUBPDrm, 0 },
1386  { X86::VHSUBPSrr, X86::VHSUBPSrm, 0 },
1387  { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, TB_NO_REVERSE },
1388  { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, TB_NO_REVERSE },
1389  { X86::VMAXCPDrr, X86::VMAXCPDrm, 0 },
1390  { X86::VMAXCPSrr, X86::VMAXCPSrm, 0 },
1391  { X86::VMAXCSDrr, X86::VMAXCSDrm, 0 },
1392  { X86::VMAXCSSrr, X86::VMAXCSSrm, 0 },
1393  { X86::VMAXPDrr, X86::VMAXPDrm, 0 },
1394  { X86::VMAXPSrr, X86::VMAXPSrm, 0 },
1395  { X86::VMAXSDrr, X86::VMAXSDrm, 0 },
1396  { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, TB_NO_REVERSE },
1397  { X86::VMAXSSrr, X86::VMAXSSrm, 0 },
1398  { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, TB_NO_REVERSE },
1399  { X86::VMINCPDrr, X86::VMINCPDrm, 0 },
1400  { X86::VMINCPSrr, X86::VMINCPSrm, 0 },
1401  { X86::VMINCSDrr, X86::VMINCSDrm, 0 },
1402  { X86::VMINCSSrr, X86::VMINCSSrm, 0 },
1403  { X86::VMINPDrr, X86::VMINPDrm, 0 },
1404  { X86::VMINPSrr, X86::VMINPSrm, 0 },
1405  { X86::VMINSDrr, X86::VMINSDrm, 0 },
1406  { X86::VMINSDrr_Int, X86::VMINSDrm_Int, TB_NO_REVERSE },
1407  { X86::VMINSSrr, X86::VMINSSrm, 0 },
1408  { X86::VMINSSrr_Int, X86::VMINSSrm_Int, TB_NO_REVERSE },
1409  { X86::VMOVLHPSrr, X86::VMOVHPSrm, TB_NO_REVERSE },
1410  { X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 },
1411  { X86::VMULPDrr, X86::VMULPDrm, 0 },
1412  { X86::VMULPSrr, X86::VMULPSrm, 0 },
1413  { X86::VMULSDrr, X86::VMULSDrm, 0 },
1414  { X86::VMULSDrr_Int, X86::VMULSDrm_Int, TB_NO_REVERSE },
1415  { X86::VMULSSrr, X86::VMULSSrm, 0 },
1416  { X86::VMULSSrr_Int, X86::VMULSSrm_Int, TB_NO_REVERSE },
1417  { X86::VORPDrr, X86::VORPDrm, 0 },
1418  { X86::VORPSrr, X86::VORPSrm, 0 },
1419  { X86::VPACKSSDWrr, X86::VPACKSSDWrm, 0 },
1420  { X86::VPACKSSWBrr, X86::VPACKSSWBrm, 0 },
1421  { X86::VPACKUSDWrr, X86::VPACKUSDWrm, 0 },
1422  { X86::VPACKUSWBrr, X86::VPACKUSWBrm, 0 },
1423  { X86::VPADDBrr, X86::VPADDBrm, 0 },
1424  { X86::VPADDDrr, X86::VPADDDrm, 0 },
1425  { X86::VPADDQrr, X86::VPADDQrm, 0 },
1426  { X86::VPADDSBrr, X86::VPADDSBrm, 0 },
1427  { X86::VPADDSWrr, X86::VPADDSWrm, 0 },
1428  { X86::VPADDUSBrr, X86::VPADDUSBrm, 0 },
1429  { X86::VPADDUSWrr, X86::VPADDUSWrm, 0 },
1430  { X86::VPADDWrr, X86::VPADDWrm, 0 },
1431  { X86::VPALIGNRrri, X86::VPALIGNRrmi, 0 },
1432  { X86::VPANDNrr, X86::VPANDNrm, 0 },
1433  { X86::VPANDrr, X86::VPANDrm, 0 },
1434  { X86::VPAVGBrr, X86::VPAVGBrm, 0 },
1435  { X86::VPAVGWrr, X86::VPAVGWrm, 0 },
1436  { X86::VPBLENDVBrr, X86::VPBLENDVBrm, 0 },
1437  { X86::VPBLENDWrri, X86::VPBLENDWrmi, 0 },
1438  { X86::VPCLMULQDQrr, X86::VPCLMULQDQrm, 0 },
1439  { X86::VPCMPEQBrr, X86::VPCMPEQBrm, 0 },
1440  { X86::VPCMPEQDrr, X86::VPCMPEQDrm, 0 },
1441  { X86::VPCMPEQQrr, X86::VPCMPEQQrm, 0 },
1442  { X86::VPCMPEQWrr, X86::VPCMPEQWrm, 0 },
1443  { X86::VPCMPGTBrr, X86::VPCMPGTBrm, 0 },
1444  { X86::VPCMPGTDrr, X86::VPCMPGTDrm, 0 },
1445  { X86::VPCMPGTQrr, X86::VPCMPGTQrm, 0 },
1446  { X86::VPCMPGTWrr, X86::VPCMPGTWrm, 0 },
1447  { X86::VPHADDDrr, X86::VPHADDDrm, 0 },
1448  { X86::VPHADDSWrr128, X86::VPHADDSWrm128, 0 },
1449  { X86::VPHADDWrr, X86::VPHADDWrm, 0 },
1450  { X86::VPHSUBDrr, X86::VPHSUBDrm, 0 },
1451  { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, 0 },
1452  { X86::VPHSUBWrr, X86::VPHSUBWrm, 0 },
1453  { X86::VPERMILPDrr, X86::VPERMILPDrm, 0 },
1454  { X86::VPERMILPSrr, X86::VPERMILPSrm, 0 },
1455  { X86::VPINSRBrr, X86::VPINSRBrm, 0 },
1456  { X86::VPINSRDrr, X86::VPINSRDrm, 0 },
1457  { X86::VPINSRQrr, X86::VPINSRQrm, 0 },
1458  { X86::VPINSRWrri, X86::VPINSRWrmi, 0 },
1459  { X86::VPMADDUBSWrr, X86::VPMADDUBSWrm, 0 },
1460  { X86::VPMADDWDrr, X86::VPMADDWDrm, 0 },
1461  { X86::VPMAXSWrr, X86::VPMAXSWrm, 0 },
1462  { X86::VPMAXUBrr, X86::VPMAXUBrm, 0 },
1463  { X86::VPMINSWrr, X86::VPMINSWrm, 0 },
1464  { X86::VPMINUBrr, X86::VPMINUBrm, 0 },
1465  { X86::VPMINSBrr, X86::VPMINSBrm, 0 },
1466  { X86::VPMINSDrr, X86::VPMINSDrm, 0 },
1467  { X86::VPMINUDrr, X86::VPMINUDrm, 0 },
1468  { X86::VPMINUWrr, X86::VPMINUWrm, 0 },
1469  { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 },
1470  { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 },
1471  { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 },
1472  { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 },
1473  { X86::VPMULDQrr, X86::VPMULDQrm, 0 },
1474  { X86::VPMULHRSWrr, X86::VPMULHRSWrm, 0 },
1475  { X86::VPMULHUWrr, X86::VPMULHUWrm, 0 },
1476  { X86::VPMULHWrr, X86::VPMULHWrm, 0 },
1477  { X86::VPMULLDrr, X86::VPMULLDrm, 0 },
1478  { X86::VPMULLWrr, X86::VPMULLWrm, 0 },
1479  { X86::VPMULUDQrr, X86::VPMULUDQrm, 0 },
1480  { X86::VPORrr, X86::VPORrm, 0 },
1481  { X86::VPSADBWrr, X86::VPSADBWrm, 0 },
1482  { X86::VPSHUFBrr, X86::VPSHUFBrm, 0 },
1483  { X86::VPSIGNBrr128, X86::VPSIGNBrm128, 0 },
1484  { X86::VPSIGNWrr128, X86::VPSIGNWrm128, 0 },
1485  { X86::VPSIGNDrr128, X86::VPSIGNDrm128, 0 },
1486  { X86::VPSLLDrr, X86::VPSLLDrm, 0 },
1487  { X86::VPSLLQrr, X86::VPSLLQrm, 0 },
1488  { X86::VPSLLWrr, X86::VPSLLWrm, 0 },
1489  { X86::VPSRADrr, X86::VPSRADrm, 0 },
1490  { X86::VPSRAWrr, X86::VPSRAWrm, 0 },
1491  { X86::VPSRLDrr, X86::VPSRLDrm, 0 },
1492  { X86::VPSRLQrr, X86::VPSRLQrm, 0 },
1493  { X86::VPSRLWrr, X86::VPSRLWrm, 0 },
1494  { X86::VPSUBBrr, X86::VPSUBBrm, 0 },
1495  { X86::VPSUBDrr, X86::VPSUBDrm, 0 },
1496  { X86::VPSUBQrr, X86::VPSUBQrm, 0 },
1497  { X86::VPSUBSBrr, X86::VPSUBSBrm, 0 },
1498  { X86::VPSUBSWrr, X86::VPSUBSWrm, 0 },
1499  { X86::VPSUBUSBrr, X86::VPSUBUSBrm, 0 },
1500  { X86::VPSUBUSWrr, X86::VPSUBUSWrm, 0 },
1501  { X86::VPSUBWrr, X86::VPSUBWrm, 0 },
1502  { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, 0 },
1503  { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, 0 },
1504  { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, 0 },
1505  { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, 0 },
1506  { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, 0 },
1507  { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, 0 },
1508  { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 },
1509  { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 },
1510  { X86::VPXORrr, X86::VPXORrm, 0 },
1511  { X86::VRCPSSr, X86::VRCPSSm, 0 },
1512  { X86::VRCPSSr_Int, X86::VRCPSSm_Int, TB_NO_REVERSE },
1513  { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
1514  { X86::VRSQRTSSr_Int, X86::VRSQRTSSm_Int, TB_NO_REVERSE },
1515  { X86::VROUNDSDr, X86::VROUNDSDm, 0 },
1516  { X86::VROUNDSDr_Int, X86::VROUNDSDm_Int, TB_NO_REVERSE },
1517  { X86::VROUNDSSr, X86::VROUNDSSm, 0 },
1518  { X86::VROUNDSSr_Int, X86::VROUNDSSm_Int, TB_NO_REVERSE },
1519  { X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 },
1520  { X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 },
1521  { X86::VSQRTSDr, X86::VSQRTSDm, 0 },
1522  { X86::VSQRTSDr_Int, X86::VSQRTSDm_Int, TB_NO_REVERSE },
1523  { X86::VSQRTSSr, X86::VSQRTSSm, 0 },
1524  { X86::VSQRTSSr_Int, X86::VSQRTSSm_Int, TB_NO_REVERSE },
1525  { X86::VSUBPDrr, X86::VSUBPDrm, 0 },
1526  { X86::VSUBPSrr, X86::VSUBPSrm, 0 },
1527  { X86::VSUBSDrr, X86::VSUBSDrm, 0 },
1528  { X86::VSUBSDrr_Int, X86::VSUBSDrm_Int, TB_NO_REVERSE },
1529  { X86::VSUBSSrr, X86::VSUBSSrm, 0 },
1530  { X86::VSUBSSrr_Int, X86::VSUBSSrm_Int, TB_NO_REVERSE },
1531  { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, 0 },
1532  { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, 0 },
1533  { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, 0 },
1534  { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, 0 },
1535  { X86::VXORPDrr, X86::VXORPDrm, 0 },
1536  { X86::VXORPSrr, X86::VXORPSrm, 0 },
1537 
1538  // AVX 256-bit foldable instructions
1539  { X86::VADDPDYrr, X86::VADDPDYrm, 0 },
1540  { X86::VADDPSYrr, X86::VADDPSYrm, 0 },
1541  { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, 0 },
1542  { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, 0 },
1543  { X86::VANDNPDYrr, X86::VANDNPDYrm, 0 },
1544  { X86::VANDNPSYrr, X86::VANDNPSYrm, 0 },
1545  { X86::VANDPDYrr, X86::VANDPDYrm, 0 },
1546  { X86::VANDPSYrr, X86::VANDPSYrm, 0 },
1547  { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, 0 },
1548  { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0 },
1549  { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0 },
1550  { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0 },
1551  { X86::VCMPPDYrri, X86::VCMPPDYrmi, 0 },
1552  { X86::VCMPPSYrri, X86::VCMPPSYrmi, 0 },
1553  { X86::VDIVPDYrr, X86::VDIVPDYrm, 0 },
1554  { X86::VDIVPSYrr, X86::VDIVPSYrm, 0 },
1555  { X86::VDPPSYrri, X86::VDPPSYrmi, 0 },
1556  { X86::VHADDPDYrr, X86::VHADDPDYrm, 0 },
1557  { X86::VHADDPSYrr, X86::VHADDPSYrm, 0 },
1558  { X86::VHSUBPDYrr, X86::VHSUBPDYrm, 0 },
1559  { X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 },
1560  { X86::VINSERTF128rr, X86::VINSERTF128rm, 0 },
1561  { X86::VMAXCPDYrr, X86::VMAXCPDYrm, 0 },
1562  { X86::VMAXCPSYrr, X86::VMAXCPSYrm, 0 },
1563  { X86::VMAXPDYrr, X86::VMAXPDYrm, 0 },
1564  { X86::VMAXPSYrr, X86::VMAXPSYrm, 0 },
1565  { X86::VMINCPDYrr, X86::VMINCPDYrm, 0 },
1566  { X86::VMINCPSYrr, X86::VMINCPSYrm, 0 },
1567  { X86::VMINPDYrr, X86::VMINPDYrm, 0 },
1568  { X86::VMINPSYrr, X86::VMINPSYrm, 0 },
1569  { X86::VMULPDYrr, X86::VMULPDYrm, 0 },
1570  { X86::VMULPSYrr, X86::VMULPSYrm, 0 },
1571  { X86::VORPDYrr, X86::VORPDYrm, 0 },
1572  { X86::VORPSYrr, X86::VORPSYrm, 0 },
1573  { X86::VPERM2F128rr, X86::VPERM2F128rm, 0 },
1574  { X86::VPERMILPDYrr, X86::VPERMILPDYrm, 0 },
1575  { X86::VPERMILPSYrr, X86::VPERMILPSYrm, 0 },
1576  { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, 0 },
1577  { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, 0 },
1578  { X86::VSUBPDYrr, X86::VSUBPDYrm, 0 },
1579  { X86::VSUBPSYrr, X86::VSUBPSYrm, 0 },
1580  { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, 0 },
1581  { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, 0 },
1582  { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, 0 },
1583  { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, 0 },
1584  { X86::VXORPDYrr, X86::VXORPDYrm, 0 },
1585  { X86::VXORPSYrr, X86::VXORPSYrm, 0 },
1586 
1587  // AVX2 foldable instructions
1588  { X86::VINSERTI128rr, X86::VINSERTI128rm, 0 },
1589  { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, 0 },
1590  { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, 0 },
1591  { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, 0 },
1592  { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, 0 },
1593  { X86::VPADDBYrr, X86::VPADDBYrm, 0 },
1594  { X86::VPADDDYrr, X86::VPADDDYrm, 0 },
1595  { X86::VPADDQYrr, X86::VPADDQYrm, 0 },
1596  { X86::VPADDSBYrr, X86::VPADDSBYrm, 0 },
1597  { X86::VPADDSWYrr, X86::VPADDSWYrm, 0 },
1598  { X86::VPADDUSBYrr, X86::VPADDUSBYrm, 0 },
1599  { X86::VPADDUSWYrr, X86::VPADDUSWYrm, 0 },
1600  { X86::VPADDWYrr, X86::VPADDWYrm, 0 },
1601  { X86::VPALIGNRYrri, X86::VPALIGNRYrmi, 0 },
1602  { X86::VPANDNYrr, X86::VPANDNYrm, 0 },
1603  { X86::VPANDYrr, X86::VPANDYrm, 0 },
1604  { X86::VPAVGBYrr, X86::VPAVGBYrm, 0 },
1605  { X86::VPAVGWYrr, X86::VPAVGWYrm, 0 },
1606  { X86::VPBLENDDrri, X86::VPBLENDDrmi, 0 },
1607  { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, 0 },
1608  { X86::VPBLENDVBYrr, X86::VPBLENDVBYrm, 0 },
1609  { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0 },
1610  { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, 0 },
1611  { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, 0 },
1612  { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, 0 },
1613  { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, 0 },
1614  { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, 0 },
1615  { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, 0 },
1616  { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, 0 },
1617  { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, 0 },
1618  { X86::VPERM2I128rr, X86::VPERM2I128rm, 0 },
1619  { X86::VPERMDYrr, X86::VPERMDYrm, 0 },
1620  { X86::VPERMPSYrr, X86::VPERMPSYrm, 0 },
1621  { X86::VPHADDDYrr, X86::VPHADDDYrm, 0 },
1622  { X86::VPHADDSWrr256, X86::VPHADDSWrm256, 0 },
1623  { X86::VPHADDWYrr, X86::VPHADDWYrm, 0 },
1624  { X86::VPHSUBDYrr, X86::VPHSUBDYrm, 0 },
1625  { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, 0 },
1626  { X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 },
1627  { X86::VPMADDUBSWYrr, X86::VPMADDUBSWYrm, 0 },
1628  { X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 },
1629  { X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 },
1630  { X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 },
1631  { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 },
1632  { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 },
1633  { X86::VPMINSBYrr, X86::VPMINSBYrm, 0 },
1634  { X86::VPMINSDYrr, X86::VPMINSDYrm, 0 },
1635  { X86::VPMINUDYrr, X86::VPMINUDYrm, 0 },
1636  { X86::VPMINUWYrr, X86::VPMINUWYrm, 0 },
1637  { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 },
1638  { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 },
1639  { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 },
1640  { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 },
1641  { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 },
1642  { X86::VPMULDQYrr, X86::VPMULDQYrm, 0 },
1643  { X86::VPMULHRSWYrr, X86::VPMULHRSWYrm, 0 },
1644  { X86::VPMULHUWYrr, X86::VPMULHUWYrm, 0 },
1645  { X86::VPMULHWYrr, X86::VPMULHWYrm, 0 },
1646  { X86::VPMULLDYrr, X86::VPMULLDYrm, 0 },
1647  { X86::VPMULLWYrr, X86::VPMULLWYrm, 0 },
1648  { X86::VPMULUDQYrr, X86::VPMULUDQYrm, 0 },
1649  { X86::VPORYrr, X86::VPORYrm, 0 },
1650  { X86::VPSADBWYrr, X86::VPSADBWYrm, 0 },
1651  { X86::VPSHUFBYrr, X86::VPSHUFBYrm, 0 },
1652  { X86::VPSIGNBYrr256, X86::VPSIGNBYrm256, 0 },
1653  { X86::VPSIGNWYrr256, X86::VPSIGNWYrm256, 0 },
1654  { X86::VPSIGNDYrr256, X86::VPSIGNDYrm256, 0 },
1655  { X86::VPSLLDYrr, X86::VPSLLDYrm, 0 },
1656  { X86::VPSLLQYrr, X86::VPSLLQYrm, 0 },
1657  { X86::VPSLLWYrr, X86::VPSLLWYrm, 0 },
1658  { X86::VPSLLVDrr, X86::VPSLLVDrm, 0 },
1659  { X86::VPSLLVDYrr, X86::VPSLLVDYrm, 0 },
1660  { X86::VPSLLVQrr, X86::VPSLLVQrm, 0 },
1661  { X86::VPSLLVQYrr, X86::VPSLLVQYrm, 0 },
1662  { X86::VPSRADYrr, X86::VPSRADYrm, 0 },
1663  { X86::VPSRAWYrr, X86::VPSRAWYrm, 0 },
1664  { X86::VPSRAVDrr, X86::VPSRAVDrm, 0 },
1665  { X86::VPSRAVDYrr, X86::VPSRAVDYrm, 0 },
1666  { X86::VPSRLDYrr, X86::VPSRLDYrm, 0 },
1667  { X86::VPSRLQYrr, X86::VPSRLQYrm, 0 },
1668  { X86::VPSRLWYrr, X86::VPSRLWYrm, 0 },
1669  { X86::VPSRLVDrr, X86::VPSRLVDrm, 0 },
1670  { X86::VPSRLVDYrr, X86::VPSRLVDYrm, 0 },
1671  { X86::VPSRLVQrr, X86::VPSRLVQrm, 0 },
1672  { X86::VPSRLVQYrr, X86::VPSRLVQYrm, 0 },
1673  { X86::VPSUBBYrr, X86::VPSUBBYrm, 0 },
1674  { X86::VPSUBDYrr, X86::VPSUBDYrm, 0 },
1675  { X86::VPSUBQYrr, X86::VPSUBQYrm, 0 },
1676  { X86::VPSUBSBYrr, X86::VPSUBSBYrm, 0 },
1677  { X86::VPSUBSWYrr, X86::VPSUBSWYrm, 0 },
1678  { X86::VPSUBUSBYrr, X86::VPSUBUSBYrm, 0 },
1679  { X86::VPSUBUSWYrr, X86::VPSUBUSWYrm, 0 },
1680  { X86::VPSUBWYrr, X86::VPSUBWYrm, 0 },
1681  { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, 0 },
1682  { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, 0 },
1683  { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, 0 },
1684  { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, 0 },
1685  { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, 0 },
1686  { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, 0 },
1687  { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, 0 },
1688  { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, 0 },
1689  { X86::VPXORYrr, X86::VPXORYrm, 0 },
1690 
1691  // FMA4 foldable patterns
1692  { X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_NONE },
1693  { X86::VFMADDSS4rr_Int, X86::VFMADDSS4mr_Int, TB_NO_REVERSE },
1694  { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_NONE },
1695  { X86::VFMADDSD4rr_Int, X86::VFMADDSD4mr_Int, TB_NO_REVERSE },
1696  { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_NONE },
1697  { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_NONE },
1698  { X86::VFMADDPS4Yrr, X86::VFMADDPS4Ymr, TB_ALIGN_NONE },
1699  { X86::VFMADDPD4Yrr, X86::VFMADDPD4Ymr, TB_ALIGN_NONE },
1700  { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, TB_ALIGN_NONE },
1701  { X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4mr_Int, TB_NO_REVERSE },
1702  { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, TB_ALIGN_NONE },
1703  { X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4mr_Int, TB_NO_REVERSE },
1704  { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_NONE },
1705  { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_NONE },
1706  { X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Ymr, TB_ALIGN_NONE },
1707  { X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Ymr, TB_ALIGN_NONE },
1708  { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_NONE },
1709  { X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4mr_Int, TB_NO_REVERSE },
1710  { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_NONE },
1711  { X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4mr_Int, TB_NO_REVERSE },
1712  { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_NONE },
1713  { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_NONE },
1714  { X86::VFMSUBPS4Yrr, X86::VFMSUBPS4Ymr, TB_ALIGN_NONE },
1715  { X86::VFMSUBPD4Yrr, X86::VFMSUBPD4Ymr, TB_ALIGN_NONE },
1716  { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, TB_ALIGN_NONE },
1717  { X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4mr_Int, TB_NO_REVERSE },
1718  { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, TB_ALIGN_NONE },
1719  { X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4mr_Int, TB_NO_REVERSE },
1720  { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_NONE },
1721  { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_NONE },
1722  { X86::VFNMSUBPS4Yrr, X86::VFNMSUBPS4Ymr, TB_ALIGN_NONE },
1723  { X86::VFNMSUBPD4Yrr, X86::VFNMSUBPD4Ymr, TB_ALIGN_NONE },
1724  { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_NONE },
1725  { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_NONE },
1726  { X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Ymr, TB_ALIGN_NONE },
1727  { X86::VFMADDSUBPD4Yrr, X86::VFMADDSUBPD4Ymr, TB_ALIGN_NONE },
1728  { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_NONE },
1729  { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_NONE },
1730  { X86::VFMSUBADDPS4Yrr, X86::VFMSUBADDPS4Ymr, TB_ALIGN_NONE },
1731  { X86::VFMSUBADDPD4Yrr, X86::VFMSUBADDPD4Ymr, TB_ALIGN_NONE },
1732 
1733  // XOP foldable instructions
1734  { X86::VPCMOVrrr, X86::VPCMOVrmr, 0 },
1735  { X86::VPCMOVrrrY, X86::VPCMOVrmrY, 0 },
1736  { X86::VPCOMBri, X86::VPCOMBmi, 0 },
1737  { X86::VPCOMDri, X86::VPCOMDmi, 0 },
1738  { X86::VPCOMQri, X86::VPCOMQmi, 0 },
1739  { X86::VPCOMWri, X86::VPCOMWmi, 0 },
1740  { X86::VPCOMUBri, X86::VPCOMUBmi, 0 },
1741  { X86::VPCOMUDri, X86::VPCOMUDmi, 0 },
1742  { X86::VPCOMUQri, X86::VPCOMUQmi, 0 },
1743  { X86::VPCOMUWri, X86::VPCOMUWmi, 0 },
1744  { X86::VPERMIL2PDrr, X86::VPERMIL2PDmr, 0 },
1745  { X86::VPERMIL2PDrrY, X86::VPERMIL2PDmrY, 0 },
1746  { X86::VPERMIL2PSrr, X86::VPERMIL2PSmr, 0 },
1747  { X86::VPERMIL2PSrrY, X86::VPERMIL2PSmrY, 0 },
1748  { X86::VPMACSDDrr, X86::VPMACSDDrm, 0 },
1749  { X86::VPMACSDQHrr, X86::VPMACSDQHrm, 0 },
1750  { X86::VPMACSDQLrr, X86::VPMACSDQLrm, 0 },
1751  { X86::VPMACSSDDrr, X86::VPMACSSDDrm, 0 },
1752  { X86::VPMACSSDQHrr, X86::VPMACSSDQHrm, 0 },
1753  { X86::VPMACSSDQLrr, X86::VPMACSSDQLrm, 0 },
1754  { X86::VPMACSSWDrr, X86::VPMACSSWDrm, 0 },
1755  { X86::VPMACSSWWrr, X86::VPMACSSWWrm, 0 },
1756  { X86::VPMACSWDrr, X86::VPMACSWDrm, 0 },
1757  { X86::VPMACSWWrr, X86::VPMACSWWrm, 0 },
1758  { X86::VPMADCSSWDrr, X86::VPMADCSSWDrm, 0 },
1759  { X86::VPMADCSWDrr, X86::VPMADCSWDrm, 0 },
1760  { X86::VPPERMrrr, X86::VPPERMrmr, 0 },
1761  { X86::VPROTBrr, X86::VPROTBrm, 0 },
1762  { X86::VPROTDrr, X86::VPROTDrm, 0 },
1763  { X86::VPROTQrr, X86::VPROTQrm, 0 },
1764  { X86::VPROTWrr, X86::VPROTWrm, 0 },
1765  { X86::VPSHABrr, X86::VPSHABrm, 0 },
1766  { X86::VPSHADrr, X86::VPSHADrm, 0 },
1767  { X86::VPSHAQrr, X86::VPSHAQrm, 0 },
1768  { X86::VPSHAWrr, X86::VPSHAWrm, 0 },
1769  { X86::VPSHLBrr, X86::VPSHLBrm, 0 },
1770  { X86::VPSHLDrr, X86::VPSHLDrm, 0 },
1771  { X86::VPSHLQrr, X86::VPSHLQrm, 0 },
1772  { X86::VPSHLWrr, X86::VPSHLWrm, 0 },
1773 
1774  // BMI/BMI2 foldable instructions
1775  { X86::ANDN32rr, X86::ANDN32rm, 0 },
1776  { X86::ANDN64rr, X86::ANDN64rm, 0 },
1777  { X86::MULX32rr, X86::MULX32rm, 0 },
1778  { X86::MULX64rr, X86::MULX64rm, 0 },
1779  { X86::PDEP32rr, X86::PDEP32rm, 0 },
1780  { X86::PDEP64rr, X86::PDEP64rm, 0 },
1781  { X86::PEXT32rr, X86::PEXT32rm, 0 },
1782  { X86::PEXT64rr, X86::PEXT64rm, 0 },
1783 
1784  // ADX foldable instructions
1785  { X86::ADCX32rr, X86::ADCX32rm, 0 },
1786  { X86::ADCX64rr, X86::ADCX64rm, 0 },
1787  { X86::ADOX32rr, X86::ADOX32rm, 0 },
1788  { X86::ADOX64rr, X86::ADOX64rm, 0 },
1789 
1790  // AVX-512 foldable instructions
1791  { X86::VADDPDZrr, X86::VADDPDZrm, 0 },
1792  { X86::VADDPSZrr, X86::VADDPSZrm, 0 },
1793  { X86::VADDSDZrr, X86::VADDSDZrm, 0 },
1794  { X86::VADDSDZrr_Int, X86::VADDSDZrm_Int, TB_NO_REVERSE },
1795  { X86::VADDSSZrr, X86::VADDSSZrm, 0 },
1796  { X86::VADDSSZrr_Int, X86::VADDSSZrm_Int, TB_NO_REVERSE },
1797  { X86::VALIGNDZrri, X86::VALIGNDZrmi, 0 },
1798  { X86::VALIGNQZrri, X86::VALIGNQZrmi, 0 },
1799  { X86::VANDNPDZrr, X86::VANDNPDZrm, 0 },
1800  { X86::VANDNPSZrr, X86::VANDNPSZrm, 0 },
1801  { X86::VANDPDZrr, X86::VANDPDZrm, 0 },
1802  { X86::VANDPSZrr, X86::VANDPSZrm, 0 },
1803  { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE },
1804  { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE },
1805  { X86::VCMPPDZrri, X86::VCMPPDZrmi, 0 },
1806  { X86::VCMPPSZrri, X86::VCMPPSZrmi, 0 },
1807  { X86::VCMPSDZrr, X86::VCMPSDZrm, 0 },
1808  { X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, TB_NO_REVERSE },
1809  { X86::VCMPSSZrr, X86::VCMPSSZrm, 0 },
1810  { X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, TB_NO_REVERSE },
1811  { X86::VDIVPDZrr, X86::VDIVPDZrm, 0 },
1812  { X86::VDIVPSZrr, X86::VDIVPSZrm, 0 },
1813  { X86::VDIVSDZrr, X86::VDIVSDZrm, 0 },
1814  { X86::VDIVSDZrr_Int, X86::VDIVSDZrm_Int, TB_NO_REVERSE },
1815  { X86::VDIVSSZrr, X86::VDIVSSZrm, 0 },
1816  { X86::VDIVSSZrr_Int, X86::VDIVSSZrm_Int, TB_NO_REVERSE },
1817  { X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrm, 0 },
1818  { X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrm, 0 },
1819  { X86::VINSERTF64x2Zrr, X86::VINSERTF64x2Zrm, 0 },
1820  { X86::VINSERTF64x4Zrr, X86::VINSERTF64x4Zrm, 0 },
1821  { X86::VINSERTI32x4Zrr, X86::VINSERTI32x4Zrm, 0 },
1822  { X86::VINSERTI32x8Zrr, X86::VINSERTI32x8Zrm, 0 },
1823  { X86::VINSERTI64x2Zrr, X86::VINSERTI64x2Zrm, 0 },
1824  { X86::VINSERTI64x4Zrr, X86::VINSERTI64x4Zrm, 0 },
1825  { X86::VMAXCPDZrr, X86::VMAXCPDZrm, 0 },
1826  { X86::VMAXCPSZrr, X86::VMAXCPSZrm, 0 },
1827  { X86::VMAXCSDZrr, X86::VMAXCSDZrm, 0 },
1828  { X86::VMAXCSSZrr, X86::VMAXCSSZrm, 0 },
1829  { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
1830  { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 },
1831  { X86::VMAXSDZrr, X86::VMAXSDZrm, 0 },
1832  { X86::VMAXSDZrr_Int, X86::VMAXSDZrm_Int, TB_NO_REVERSE },
1833  { X86::VMAXSSZrr, X86::VMAXSSZrm, 0 },
1834  { X86::VMAXSSZrr_Int, X86::VMAXSSZrm_Int, TB_NO_REVERSE },
1835  { X86::VMINCPDZrr, X86::VMINCPDZrm, 0 },
1836  { X86::VMINCPSZrr, X86::VMINCPSZrm, 0 },
1837  { X86::VMINCSDZrr, X86::VMINCSDZrm, 0 },
1838  { X86::VMINCSSZrr, X86::VMINCSSZrm, 0 },
1839  { X86::VMINPDZrr, X86::VMINPDZrm, 0 },
1840  { X86::VMINPSZrr, X86::VMINPSZrm, 0 },
1841  { X86::VMINSDZrr, X86::VMINSDZrm, 0 },
1842  { X86::VMINSDZrr_Int, X86::VMINSDZrm_Int, TB_NO_REVERSE },
1843  { X86::VMINSSZrr, X86::VMINSSZrm, 0 },
1844  { X86::VMINSSZrr_Int, X86::VMINSSZrm_Int, TB_NO_REVERSE },
1845  { X86::VMULPDZrr, X86::VMULPDZrm, 0 },
1846  { X86::VMULPSZrr, X86::VMULPSZrm, 0 },
1847  { X86::VMULSDZrr, X86::VMULSDZrm, 0 },
1848  { X86::VMULSDZrr_Int, X86::VMULSDZrm_Int, TB_NO_REVERSE },
1849  { X86::VMULSSZrr, X86::VMULSSZrm, 0 },
1850  { X86::VMULSSZrr_Int, X86::VMULSSZrm_Int, TB_NO_REVERSE },
1851  { X86::VORPDZrr, X86::VORPDZrm, 0 },
1852  { X86::VORPSZrr, X86::VORPSZrm, 0 },
1853  { X86::VPADDBZrr, X86::VPADDBZrm, 0 },
1854  { X86::VPADDDZrr, X86::VPADDDZrm, 0 },
1855  { X86::VPADDQZrr, X86::VPADDQZrm, 0 },
1856  { X86::VPADDSBZrr, X86::VPADDSBZrm, 0 },
1857  { X86::VPADDSWZrr, X86::VPADDSWZrm, 0 },
1858  { X86::VPADDUSBZrr, X86::VPADDUSBZrm, 0 },
1859  { X86::VPADDUSWZrr, X86::VPADDUSWZrm, 0 },
1860  { X86::VPADDWZrr, X86::VPADDWZrm, 0 },
1861  { X86::VPALIGNRZrri, X86::VPALIGNRZrmi, 0 },
1862  { X86::VPANDDZrr, X86::VPANDDZrm, 0 },
1863  { X86::VPANDNDZrr, X86::VPANDNDZrm, 0 },
1864  { X86::VPANDNQZrr, X86::VPANDNQZrm, 0 },
1865  { X86::VPANDQZrr, X86::VPANDQZrm, 0 },
1866  { X86::VPCMPBZrri, X86::VPCMPBZrmi, 0 },
1867  { X86::VPCMPDZrri, X86::VPCMPDZrmi, 0 },
1868  { X86::VPCMPEQBZrr, X86::VPCMPEQBZrm, 0 },
1869  { X86::VPCMPEQDZrr, X86::VPCMPEQDZrm, 0 },
1870  { X86::VPCMPEQQZrr, X86::VPCMPEQQZrm, 0 },
1871  { X86::VPCMPEQWZrr, X86::VPCMPEQWZrm, 0 },
1872  { X86::VPCMPGTBZrr, X86::VPCMPGTBZrm, 0 },
1873  { X86::VPCMPGTDZrr, X86::VPCMPGTDZrm, 0 },
1874  { X86::VPCMPGTQZrr, X86::VPCMPGTQZrm, 0 },
1875  { X86::VPCMPGTWZrr, X86::VPCMPGTWZrm, 0 },
1876  { X86::VPCMPQZrri, X86::VPCMPQZrmi, 0 },
1877  { X86::VPCMPUBZrri, X86::VPCMPUBZrmi, 0 },
1878  { X86::VPCMPUDZrri, X86::VPCMPUDZrmi, 0 },
1879  { X86::VPCMPUQZrri, X86::VPCMPUQZrmi, 0 },
1880  { X86::VPCMPUWZrri, X86::VPCMPUWZrmi, 0 },
1881  { X86::VPCMPWZrri, X86::VPCMPWZrmi, 0 },
1882  { X86::VPERMBZrr, X86::VPERMBZrm, 0 },
1883  { X86::VPERMDZrr, X86::VPERMDZrm, 0 },
1884  { X86::VPERMILPDZrr, X86::VPERMILPDZrm, 0 },
1885  { X86::VPERMILPSZrr, X86::VPERMILPSZrm, 0 },
1886  { X86::VPERMPDZrr, X86::VPERMPDZrm, 0 },
1887  { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 },
1888  { X86::VPERMQZrr, X86::VPERMQZrm, 0 },
1889  { X86::VPERMWZrr, X86::VPERMWZrm, 0 },
1890  { X86::VPMADDUBSWZrr, X86::VPMADDUBSWZrm, 0 },
1891  { X86::VPMADDWDZrr, X86::VPMADDWDZrm, 0 },
1892  { X86::VPMAXSDZrr, X86::VPMAXSDZrm, 0 },
1893  { X86::VPMAXSQZrr, X86::VPMAXSQZrm, 0 },
1894  { X86::VPMAXUDZrr, X86::VPMAXUDZrm, 0 },
1895  { X86::VPMAXUQZrr, X86::VPMAXUQZrm, 0 },
1896  { X86::VPMINSDZrr, X86::VPMINSDZrm, 0 },
1897  { X86::VPMINSQZrr, X86::VPMINSQZrm, 0 },
1898  { X86::VPMINUDZrr, X86::VPMINUDZrm, 0 },
1899  { X86::VPMINUQZrr, X86::VPMINUQZrm, 0 },
1900  { X86::VPMULDQZrr, X86::VPMULDQZrm, 0 },
1901  { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 },
1902  { X86::VPORDZrr, X86::VPORDZrm, 0 },
1903  { X86::VPORQZrr, X86::VPORQZrm, 0 },
1904  { X86::VPSHUFBZrr, X86::VPSHUFBZrm, 0 },
1905  { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 },
1906  { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 },
1907  { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 },
1908  { X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 },
1909  { X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 },
1910  { X86::VPSUBBZrr, X86::VPSUBBZrm, 0 },
1911  { X86::VPSUBDZrr, X86::VPSUBDZrm, 0 },
1912  { X86::VPSUBQZrr, X86::VPSUBQZrm, 0 },
1913  { X86::VPSUBSBZrr, X86::VPSUBSBZrm, 0 },
1914  { X86::VPSUBSWZrr, X86::VPSUBSWZrm, 0 },
1915  { X86::VPSUBUSBZrr, X86::VPSUBUSBZrm, 0 },
1916  { X86::VPSUBUSWZrr, X86::VPSUBUSWZrm, 0 },
1917  { X86::VPSUBWZrr, X86::VPSUBWZrm, 0 },
1918  { X86::VPUNPCKHBWZrr, X86::VPUNPCKHBWZrm, 0 },
1919  { X86::VPUNPCKHDQZrr, X86::VPUNPCKHDQZrm, 0 },
1920  { X86::VPUNPCKHQDQZrr, X86::VPUNPCKHQDQZrm, 0 },
1921  { X86::VPUNPCKHWDZrr, X86::VPUNPCKHWDZrm, 0 },
1922  { X86::VPUNPCKLBWZrr, X86::VPUNPCKLBWZrm, 0 },
1923  { X86::VPUNPCKLDQZrr, X86::VPUNPCKLDQZrm, 0 },
1924  { X86::VPUNPCKLQDQZrr, X86::VPUNPCKLQDQZrm, 0 },
1925  { X86::VPUNPCKLWDZrr, X86::VPUNPCKLWDZrm, 0 },
1926  { X86::VPXORDZrr, X86::VPXORDZrm, 0 },
1927  { X86::VPXORQZrr, X86::VPXORQZrm, 0 },
1928  { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 },
1929  { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 },
1930  { X86::VSUBPDZrr, X86::VSUBPDZrm, 0 },
1931  { X86::VSUBPSZrr, X86::VSUBPSZrm, 0 },
1932  { X86::VSUBSDZrr, X86::VSUBSDZrm, 0 },
1933  { X86::VSUBSDZrr_Int, X86::VSUBSDZrm_Int, TB_NO_REVERSE },
1934  { X86::VSUBSSZrr, X86::VSUBSSZrm, 0 },
1935  { X86::VSUBSSZrr_Int, X86::VSUBSSZrm_Int, TB_NO_REVERSE },
1936  { X86::VUNPCKHPDZrr, X86::VUNPCKHPDZrm, 0 },
1937  { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrm, 0 },
1938  { X86::VUNPCKLPDZrr, X86::VUNPCKLPDZrm, 0 },
1939  { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrm, 0 },
1940  { X86::VXORPDZrr, X86::VXORPDZrm, 0 },
1941  { X86::VXORPSZrr, X86::VXORPSZrm, 0 },
1942 
1943  // AVX-512{F,VL} foldable instructions
1944  { X86::VADDPDZ128rr, X86::VADDPDZ128rm, 0 },
1945  { X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0 },
1946  { X86::VADDPSZ128rr, X86::VADDPSZ128rm, 0 },
1947  { X86::VADDPSZ256rr, X86::VADDPSZ256rm, 0 },
1948  { X86::VALIGNDZ128rri, X86::VALIGNDZ128rmi, 0 },
1949  { X86::VALIGNDZ256rri, X86::VALIGNDZ256rmi, 0 },
1950  { X86::VALIGNQZ128rri, X86::VALIGNQZ128rmi, 0 },
1951  { X86::VALIGNQZ256rri, X86::VALIGNQZ256rmi, 0 },
1952  { X86::VANDNPDZ128rr, X86::VANDNPDZ128rm, 0 },
1953  { X86::VANDNPDZ256rr, X86::VANDNPDZ256rm, 0 },
1954  { X86::VANDNPSZ128rr, X86::VANDNPSZ128rm, 0 },
1955  { X86::VANDNPSZ256rr, X86::VANDNPSZ256rm, 0 },
1956  { X86::VANDPDZ128rr, X86::VANDPDZ128rm, 0 },
1957  { X86::VANDPDZ256rr, X86::VANDPDZ256rm, 0 },
1958  { X86::VANDPSZ128rr, X86::VANDPSZ128rm, 0 },
1959  { X86::VANDPSZ256rr, X86::VANDPSZ256rm, 0 },
1960  { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE },
1961  { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE },
1962  { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE },
1963  { X86::VCMPPDZ128rri, X86::VCMPPDZ128rmi, 0 },
1964  { X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0 },
1965  { X86::VCMPPSZ128rri, X86::VCMPPSZ128rmi, 0 },
1966  { X86::VCMPPSZ256rri, X86::VCMPPSZ256rmi, 0 },
1967  { X86::VDIVPDZ128rr, X86::VDIVPDZ128rm, 0 },
1968  { X86::VDIVPDZ256rr, X86::VDIVPDZ256rm, 0 },
1969  { X86::VDIVPSZ128rr, X86::VDIVPSZ128rm, 0 },
1970  { X86::VDIVPSZ256rr, X86::VDIVPSZ256rm, 0 },
1971  { X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rm, 0 },
1972  { X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rm, 0 },
1973  { X86::VINSERTI32x4Z256rr,X86::VINSERTI32x4Z256rm, 0 },
1974  { X86::VINSERTI64x2Z256rr,X86::VINSERTI64x2Z256rm, 0 },
1975  { X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rm, 0 },
1976  { X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rm, 0 },
1977  { X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rm, 0 },
1978  { X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rm, 0 },
1979  { X86::VMAXPDZ128rr, X86::VMAXPDZ128rm, 0 },
1980  { X86::VMAXPDZ256rr, X86::VMAXPDZ256rm, 0 },
1981  { X86::VMAXPSZ128rr, X86::VMAXPSZ128rm, 0 },
1982  { X86::VMAXPSZ256rr, X86::VMAXPSZ256rm, 0 },
1983  { X86::VMINCPDZ128rr, X86::VMINCPDZ128rm, 0 },
1984  { X86::VMINCPDZ256rr, X86::VMINCPDZ256rm, 0 },
1985  { X86::VMINCPSZ128rr, X86::VMINCPSZ128rm, 0 },
1986  { X86::VMINCPSZ256rr, X86::VMINCPSZ256rm, 0 },
1987  { X86::VMINPDZ128rr, X86::VMINPDZ128rm, 0 },
1988  { X86::VMINPDZ256rr, X86::VMINPDZ256rm, 0 },
1989  { X86::VMINPSZ128rr, X86::VMINPSZ128rm, 0 },
1990  { X86::VMINPSZ256rr, X86::VMINPSZ256rm, 0 },
1991  { X86::VMULPDZ128rr, X86::VMULPDZ128rm, 0 },
1992  { X86::VMULPDZ256rr, X86::VMULPDZ256rm, 0 },
1993  { X86::VMULPSZ128rr, X86::VMULPSZ128rm, 0 },
1994  { X86::VMULPSZ256rr, X86::VMULPSZ256rm, 0 },
1995  { X86::VORPDZ128rr, X86::VORPDZ128rm, 0 },
1996  { X86::VORPDZ256rr, X86::VORPDZ256rm, 0 },
1997  { X86::VORPSZ128rr, X86::VORPSZ128rm, 0 },
1998  { X86::VORPSZ256rr, X86::VORPSZ256rm, 0 },
1999  { X86::VPADDBZ128rr, X86::VPADDBZ128rm, 0 },
2000  { X86::VPADDBZ256rr, X86::VPADDBZ256rm, 0 },
2001  { X86::VPADDDZ128rr, X86::VPADDDZ128rm, 0 },
2002  { X86::VPADDDZ256rr, X86::VPADDDZ256rm, 0 },
2003  { X86::VPADDQZ128rr, X86::VPADDQZ128rm, 0 },
2004  { X86::VPADDQZ256rr, X86::VPADDQZ256rm, 0 },
2005  { X86::VPADDSBZ128rr, X86::VPADDSBZ128rm, 0 },
2006  { X86::VPADDSBZ256rr, X86::VPADDSBZ256rm, 0 },
2007  { X86::VPADDSWZ128rr, X86::VPADDSWZ128rm, 0 },
2008  { X86::VPADDSWZ256rr, X86::VPADDSWZ256rm, 0 },
2009  { X86::VPADDUSBZ128rr, X86::VPADDUSBZ128rm, 0 },
2010  { X86::VPADDUSBZ256rr, X86::VPADDUSBZ256rm, 0 },
2011  { X86::VPADDUSWZ128rr, X86::VPADDUSWZ128rm, 0 },
2012  { X86::VPADDUSWZ256rr, X86::VPADDUSWZ256rm, 0 },
2013  { X86::VPADDWZ128rr, X86::VPADDWZ128rm, 0 },
2014  { X86::VPADDWZ256rr, X86::VPADDWZ256rm, 0 },
2015  { X86::VPALIGNRZ128rri, X86::VPALIGNRZ128rmi, 0 },
2016  { X86::VPALIGNRZ256rri, X86::VPALIGNRZ256rmi, 0 },
2017  { X86::VPANDDZ128rr, X86::VPANDDZ128rm, 0 },
2018  { X86::VPANDDZ256rr, X86::VPANDDZ256rm, 0 },
2019  { X86::VPANDNDZ128rr, X86::VPANDNDZ128rm, 0 },
2020  { X86::VPANDNDZ256rr, X86::VPANDNDZ256rm, 0 },
2021  { X86::VPANDNQZ128rr, X86::VPANDNQZ128rm, 0 },
2022  { X86::VPANDNQZ256rr, X86::VPANDNQZ256rm, 0 },
2023  { X86::VPANDQZ128rr, X86::VPANDQZ128rm, 0 },
2024  { X86::VPANDQZ256rr, X86::VPANDQZ256rm, 0 },
2025  { X86::VPCMPBZ128rri, X86::VPCMPBZ128rmi, 0 },
2026  { X86::VPCMPBZ256rri, X86::VPCMPBZ256rmi, 0 },
2027  { X86::VPCMPDZ128rri, X86::VPCMPDZ128rmi, 0 },
2028  { X86::VPCMPDZ256rri, X86::VPCMPDZ256rmi, 0 },
2029  { X86::VPCMPEQBZ128rr, X86::VPCMPEQBZ128rm, 0 },
2030  { X86::VPCMPEQBZ256rr, X86::VPCMPEQBZ256rm, 0 },
2031  { X86::VPCMPEQDZ128rr, X86::VPCMPEQDZ128rm, 0 },
2032  { X86::VPCMPEQDZ256rr, X86::VPCMPEQDZ256rm, 0 },
2033  { X86::VPCMPEQQZ128rr, X86::VPCMPEQQZ128rm, 0 },
2034  { X86::VPCMPEQQZ256rr, X86::VPCMPEQQZ256rm, 0 },
2035  { X86::VPCMPEQWZ128rr, X86::VPCMPEQWZ128rm, 0 },
2036  { X86::VPCMPEQWZ256rr, X86::VPCMPEQWZ256rm, 0 },
2037  { X86::VPCMPGTBZ128rr, X86::VPCMPGTBZ128rm, 0 },
2038  { X86::VPCMPGTBZ256rr, X86::VPCMPGTBZ256rm, 0 },
2039  { X86::VPCMPGTDZ128rr, X86::VPCMPGTDZ128rm, 0 },
2040  { X86::VPCMPGTDZ256rr, X86::VPCMPGTDZ256rm, 0 },
2041  { X86::VPCMPGTQZ128rr, X86::VPCMPGTQZ128rm, 0 },
2042  { X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rm, 0 },
2043  { X86::VPCMPGTWZ128rr, X86::VPCMPGTWZ128rm, 0 },
2044  { X86::VPCMPGTWZ256rr, X86::VPCMPGTWZ256rm, 0 },
2045  { X86::VPCMPQZ128rri, X86::VPCMPQZ128rmi, 0 },
2046  { X86::VPCMPQZ256rri, X86::VPCMPQZ256rmi, 0 },
2047  { X86::VPCMPUBZ128rri, X86::VPCMPUBZ128rmi, 0 },
2048  { X86::VPCMPUBZ256rri, X86::VPCMPUBZ256rmi, 0 },
2049  { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmi, 0 },
2050  { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmi, 0 },
2051  { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmi, 0 },
2052  { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmi, 0 },
2053  { X86::VPCMPUWZ128rri, X86::VPCMPUWZ128rmi, 0 },
2054  { X86::VPCMPUWZ256rri, X86::VPCMPUWZ256rmi, 0 },
2055  { X86::VPCMPWZ128rri, X86::VPCMPWZ128rmi, 0 },
2056  { X86::VPCMPWZ256rri, X86::VPCMPWZ256rmi, 0 },
2057  { X86::VPERMBZ128rr, X86::VPERMBZ128rm, 0 },
2058  { X86::VPERMBZ256rr, X86::VPERMBZ256rm, 0 },
2059  { X86::VPERMDZ256rr, X86::VPERMDZ256rm, 0 },
2060  { X86::VPERMILPDZ128rr, X86::VPERMILPDZ128rm, 0 },
2061  { X86::VPERMILPDZ256rr, X86::VPERMILPDZ256rm, 0 },
2062  { X86::VPERMILPSZ128rr, X86::VPERMILPSZ128rm, 0 },
2063  { X86::VPERMILPSZ256rr, X86::VPERMILPSZ256rm, 0 },
2064  { X86::VPERMPDZ256rr, X86::VPERMPDZ256rm, 0 },
2065  { X86::VPERMPSZ256rr, X86::VPERMPSZ256rm, 0 },
2066  { X86::VPERMQZ256rr, X86::VPERMQZ256rm, 0 },
2067  { X86::VPERMWZ128rr, X86::VPERMWZ128rm, 0 },
2068  { X86::VPERMWZ256rr, X86::VPERMWZ256rm, 0 },
2069  { X86::VPMADDUBSWZ128rr, X86::VPMADDUBSWZ128rm, 0 },
2070  { X86::VPMADDUBSWZ256rr, X86::VPMADDUBSWZ256rm, 0 },
2071  { X86::VPMADDWDZ128rr, X86::VPMADDWDZ128rm, 0 },
2072  { X86::VPMADDWDZ256rr, X86::VPMADDWDZ256rm, 0 },
2073  { X86::VPORDZ128rr, X86::VPORDZ128rm, 0 },
2074  { X86::VPORDZ256rr, X86::VPORDZ256rm, 0 },
2075  { X86::VPORQZ128rr, X86::VPORQZ128rm, 0 },
2076  { X86::VPORQZ256rr, X86::VPORQZ256rm, 0 },
2077  { X86::VPSHUFBZ128rr, X86::VPSHUFBZ128rm, 0 },
2078  { X86::VPSHUFBZ256rr, X86::VPSHUFBZ256rm, 0 },
2079  { X86::VPSUBBZ128rr, X86::VPSUBBZ128rm, 0 },
2080  { X86::VPSUBBZ256rr, X86::VPSUBBZ256rm, 0 },
2081  { X86::VPSUBDZ128rr, X86::VPSUBDZ128rm, 0 },
2082  { X86::VPSUBDZ256rr, X86::VPSUBDZ256rm, 0 },
2083  { X86::VPSUBQZ128rr, X86::VPSUBQZ128rm, 0 },
2084  { X86::VPSUBQZ256rr, X86::VPSUBQZ256rm, 0 },
2085  { X86::VPSUBSBZ128rr, X86::VPSUBSBZ128rm, 0 },
2086  { X86::VPSUBSBZ256rr, X86::VPSUBSBZ256rm, 0 },
2087  { X86::VPSUBSWZ128rr, X86::VPSUBSWZ128rm, 0 },
2088  { X86::VPSUBSWZ256rr, X86::VPSUBSWZ256rm, 0 },
2089  { X86::VPSUBUSBZ128rr, X86::VPSUBUSBZ128rm, 0 },
2090  { X86::VPSUBUSBZ256rr, X86::VPSUBUSBZ256rm, 0 },
2091  { X86::VPSUBUSWZ128rr, X86::VPSUBUSWZ128rm, 0 },
2092  { X86::VPSUBUSWZ256rr, X86::VPSUBUSWZ256rm, 0 },
2093  { X86::VPSUBWZ128rr, X86::VPSUBWZ128rm, 0 },
2094  { X86::VPSUBWZ256rr, X86::VPSUBWZ256rm, 0 },
2095  { X86::VPUNPCKHBWZ128rr, X86::VPUNPCKHBWZ128rm, 0 },
2096  { X86::VPUNPCKHBWZ256rr, X86::VPUNPCKHBWZ256rm, 0 },
2097  { X86::VPUNPCKHDQZ128rr, X86::VPUNPCKHDQZ128rm, 0 },
2098  { X86::VPUNPCKHDQZ256rr, X86::VPUNPCKHDQZ256rm, 0 },
2099  { X86::VPUNPCKHQDQZ128rr, X86::VPUNPCKHQDQZ128rm, 0 },
2100  { X86::VPUNPCKHQDQZ256rr, X86::VPUNPCKHQDQZ256rm, 0 },
2101  { X86::VPUNPCKHWDZ128rr, X86::VPUNPCKHWDZ128rm, 0 },
2102  { X86::VPUNPCKHWDZ256rr, X86::VPUNPCKHWDZ256rm, 0 },
2103  { X86::VPUNPCKLBWZ128rr, X86::VPUNPCKLBWZ128rm, 0 },
2104  { X86::VPUNPCKLBWZ256rr, X86::VPUNPCKLBWZ256rm, 0 },
2105  { X86::VPUNPCKLDQZ128rr, X86::VPUNPCKLDQZ128rm, 0 },
2106  { X86::VPUNPCKLDQZ256rr, X86::VPUNPCKLDQZ256rm, 0 },
2107  { X86::VPUNPCKLQDQZ128rr, X86::VPUNPCKLQDQZ128rm, 0 },
2108  { X86::VPUNPCKLQDQZ256rr, X86::VPUNPCKLQDQZ256rm, 0 },
2109  { X86::VPUNPCKLWDZ128rr, X86::VPUNPCKLWDZ128rm, 0 },
2110  { X86::VPUNPCKLWDZ256rr, X86::VPUNPCKLWDZ256rm, 0 },
2111  { X86::VPXORDZ128rr, X86::VPXORDZ128rm, 0 },
2112  { X86::VPXORDZ256rr, X86::VPXORDZ256rm, 0 },
2113  { X86::VPXORQZ128rr, X86::VPXORQZ128rm, 0 },
2114  { X86::VPXORQZ256rr, X86::VPXORQZ256rm, 0 },
2115  { X86::VSUBPDZ128rr, X86::VSUBPDZ128rm, 0 },
2116  { X86::VSUBPDZ256rr, X86::VSUBPDZ256rm, 0 },
2117  { X86::VSUBPSZ128rr, X86::VSUBPSZ128rm, 0 },
2118  { X86::VSUBPSZ256rr, X86::VSUBPSZ256rm, 0 },
2119  { X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rm, 0 },
2120  { X86::VUNPCKHPDZ256rr, X86::VUNPCKHPDZ256rm, 0 },
2121  { X86::VUNPCKHPSZ128rr, X86::VUNPCKHPSZ128rm, 0 },
2122  { X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rm, 0 },
2123  { X86::VUNPCKLPDZ128rr, X86::VUNPCKLPDZ128rm, 0 },
2124  { X86::VUNPCKLPDZ256rr, X86::VUNPCKLPDZ256rm, 0 },
2125  { X86::VUNPCKLPSZ128rr, X86::VUNPCKLPSZ128rm, 0 },
2126  { X86::VUNPCKLPSZ256rr, X86::VUNPCKLPSZ256rm, 0 },
2127  { X86::VXORPDZ128rr, X86::VXORPDZ128rm, 0 },
2128  { X86::VXORPDZ256rr, X86::VXORPDZ256rm, 0 },
2129  { X86::VXORPSZ128rr, X86::VXORPSZ128rm, 0 },
2130  { X86::VXORPSZ256rr, X86::VXORPSZ256rm, 0 },
2131 
2132  // AVX-512 masked foldable instructions
2133  { X86::VPERMILPDZrikz, X86::VPERMILPDZmikz, 0 },
2134  { X86::VPERMILPSZrikz, X86::VPERMILPSZmikz, 0 },
2135  { X86::VPERMPDZrikz, X86::VPERMPDZmikz, 0 },
2136  { X86::VPERMQZrikz, X86::VPERMQZmikz, 0 },
2137  { X86::VPMOVSXBDZrrkz, X86::VPMOVSXBDZrmkz, 0 },
2138  { X86::VPMOVSXBQZrrkz, X86::VPMOVSXBQZrmkz, TB_NO_REVERSE },
2139  { X86::VPMOVSXBWZrrkz, X86::VPMOVSXBWZrmkz, 0 },
2140  { X86::VPMOVSXDQZrrkz, X86::VPMOVSXDQZrmkz, 0 },
2141  { X86::VPMOVSXWDZrrkz, X86::VPMOVSXWDZrmkz, 0 },
2142  { X86::VPMOVSXWQZrrkz, X86::VPMOVSXWQZrmkz, 0 },
2143  { X86::VPMOVZXBDZrrkz, X86::VPMOVZXBDZrmkz, 0 },
2144  { X86::VPMOVZXBQZrrkz, X86::VPMOVZXBQZrmkz, TB_NO_REVERSE },
2145  { X86::VPMOVZXBWZrrkz, X86::VPMOVZXBWZrmkz, 0 },
2146  { X86::VPMOVZXDQZrrkz, X86::VPMOVZXDQZrmkz, 0 },
2147  { X86::VPMOVZXWDZrrkz, X86::VPMOVZXWDZrmkz, 0 },
2148  { X86::VPMOVZXWQZrrkz, X86::VPMOVZXWQZrmkz, 0 },
2149  { X86::VPSHUFDZrikz, X86::VPSHUFDZmikz, 0 },
2150  { X86::VPSHUFHWZrikz, X86::VPSHUFHWZmikz, 0 },
2151  { X86::VPSHUFLWZrikz, X86::VPSHUFLWZmikz, 0 },
2152 
2153  // AVX-512VL 256-bit masked foldable instructions
2154  { X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz, 0 },
2155  { X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz, 0 },
2156  { X86::VPERMPDZ256rikz, X86::VPERMPDZ256mikz, 0 },
2157  { X86::VPERMQZ256rikz, X86::VPERMQZ256mikz, 0 },
2158  { X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz, TB_NO_REVERSE },
2159  { X86::VPMOVSXBQZ256rrkz, X86::VPMOVSXBQZ256rmkz, TB_NO_REVERSE },
2160  { X86::VPMOVSXBWZ256rrkz, X86::VPMOVSXBWZ256rmkz, 0 },
2161  { X86::VPMOVSXDQZ256rrkz, X86::VPMOVSXDQZ256rmkz, 0 },
2162  { X86::VPMOVSXWDZ256rrkz, X86::VPMOVSXWDZ256rmkz, 0 },
2163  { X86::VPMOVSXWQZ256rrkz, X86::VPMOVSXWQZ256rmkz, TB_NO_REVERSE },
2164  { X86::VPMOVZXBDZ256rrkz, X86::VPMOVZXBDZ256rmkz, TB_NO_REVERSE },
2165  { X86::VPMOVZXBQZ256rrkz, X86::VPMOVZXBQZ256rmkz, TB_NO_REVERSE },
2166  { X86::VPMOVZXBWZ256rrkz, X86::VPMOVZXBWZ256rmkz, 0 },
2167  { X86::VPMOVZXDQZ256rrkz, X86::VPMOVZXDQZ256rmkz, 0 },
2168  { X86::VPMOVZXWDZ256rrkz, X86::VPMOVZXWDZ256rmkz, 0 },
2169  { X86::VPMOVZXWQZ256rrkz, X86::VPMOVZXWQZ256rmkz, TB_NO_REVERSE },
2170  { X86::VPSHUFDZ256rikz, X86::VPSHUFDZ256mikz, 0 },
2171  { X86::VPSHUFHWZ256rikz, X86::VPSHUFHWZ256mikz, 0 },
2172  { X86::VPSHUFLWZ256rikz, X86::VPSHUFLWZ256mikz, 0 },
2173 
2174  // AVX-512VL 128-bit masked foldable instructions
2175  { X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz, 0 },
2176  { X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz, 0 },
2177  { X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, TB_NO_REVERSE },
2178  { X86::VPMOVSXBQZ128rrkz, X86::VPMOVSXBQZ128rmkz, TB_NO_REVERSE },
2179  { X86::VPMOVSXBWZ128rrkz, X86::VPMOVSXBWZ128rmkz, TB_NO_REVERSE },
2180  { X86::VPMOVSXDQZ128rrkz, X86::VPMOVSXDQZ128rmkz, TB_NO_REVERSE },
2181  { X86::VPMOVSXWDZ128rrkz, X86::VPMOVSXWDZ128rmkz, TB_NO_REVERSE },
2182  { X86::VPMOVSXWQZ128rrkz, X86::VPMOVSXWQZ128rmkz, TB_NO_REVERSE },
2183  { X86::VPMOVZXBDZ128rrkz, X86::VPMOVZXBDZ128rmkz, TB_NO_REVERSE },
2184  { X86::VPMOVZXBQZ128rrkz, X86::VPMOVZXBQZ128rmkz, TB_NO_REVERSE },
2185  { X86::VPMOVZXBWZ128rrkz, X86::VPMOVZXBWZ128rmkz, TB_NO_REVERSE },
2186  { X86::VPMOVZXDQZ128rrkz, X86::VPMOVZXDQZ128rmkz, TB_NO_REVERSE },
2187  { X86::VPMOVZXWDZ128rrkz, X86::VPMOVZXWDZ128rmkz, TB_NO_REVERSE },
2188  { X86::VPMOVZXWQZ128rrkz, X86::VPMOVZXWQZ128rmkz, TB_NO_REVERSE },
2189  { X86::VPSHUFDZ128rikz, X86::VPSHUFDZ128mikz, 0 },
2190  { X86::VPSHUFHWZ128rikz, X86::VPSHUFHWZ128mikz, 0 },
2191  { X86::VPSHUFLWZ128rikz, X86::VPSHUFLWZ128mikz, 0 },
2192 
2193  // AES foldable instructions
2194  { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },
2195  { X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 },
2196  { X86::AESENCLASTrr, X86::AESENCLASTrm, TB_ALIGN_16 },
2197  { X86::AESENCrr, X86::AESENCrm, TB_ALIGN_16 },
2198  { X86::VAESDECLASTrr, X86::VAESDECLASTrm, 0 },
2199  { X86::VAESDECrr, X86::VAESDECrm, 0 },
2200  { X86::VAESENCLASTrr, X86::VAESENCLASTrm, 0 },
2201  { X86::VAESENCrr, X86::VAESENCrm, 0 },
2202 
2203  // SHA foldable instructions
2204  { X86::SHA1MSG1rr, X86::SHA1MSG1rm, TB_ALIGN_16 },
2205  { X86::SHA1MSG2rr, X86::SHA1MSG2rm, TB_ALIGN_16 },
2206  { X86::SHA1NEXTErr, X86::SHA1NEXTErm, TB_ALIGN_16 },
2207  { X86::SHA1RNDS4rri, X86::SHA1RNDS4rmi, TB_ALIGN_16 },
2208  { X86::SHA256MSG1rr, X86::SHA256MSG1rm, TB_ALIGN_16 },
2209  { X86::SHA256MSG2rr, X86::SHA256MSG2rm, TB_ALIGN_16 },
2210  { X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 }
2211  };
2212 
2213  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2) {
2214  AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
2215  Entry.RegOp, Entry.MemOp,
2216  // Index 2, folded load
2217  Entry.Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
2218  }
2219 
2220  static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
2221  // FMA4 foldable patterns
2222  { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_NONE },
2223  { X86::VFMADDSS4rr_Int, X86::VFMADDSS4rm_Int, TB_NO_REVERSE },
2224  { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_NONE },
2225  { X86::VFMADDSD4rr_Int, X86::VFMADDSD4rm_Int, TB_NO_REVERSE },
2226  { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_NONE },
2227  { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_NONE },
2228  { X86::VFMADDPS4Yrr, X86::VFMADDPS4Yrm, TB_ALIGN_NONE },
2229  { X86::VFMADDPD4Yrr, X86::VFMADDPD4Yrm, TB_ALIGN_NONE },
2230  { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, TB_ALIGN_NONE },
2231  { X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4rm_Int, TB_NO_REVERSE },
2232  { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, TB_ALIGN_NONE },
2233  { X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4rm_Int, TB_NO_REVERSE },
2234  { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_NONE },
2235  { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_NONE },
2236  { X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Yrm, TB_ALIGN_NONE },
2237  { X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Yrm, TB_ALIGN_NONE },
2238  { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_NONE },
2239  { X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4rm_Int, TB_NO_REVERSE },
2240  { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_NONE },
2241  { X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4rm_Int, TB_NO_REVERSE },
2242  { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_NONE },
2243  { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_NONE },
2244  { X86::VFMSUBPS4Yrr, X86::VFMSUBPS4Yrm, TB_ALIGN_NONE },
2245  { X86::VFMSUBPD4Yrr, X86::VFMSUBPD4Yrm, TB_ALIGN_NONE },
2246  { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, TB_ALIGN_NONE },
2247  { X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4rm_Int, TB_NO_REVERSE },
2248  { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, TB_ALIGN_NONE },
2249  { X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4rm_Int, TB_NO_REVERSE },
2250  { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_NONE },
2251  { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_NONE },
2252  { X86::VFNMSUBPS4Yrr, X86::VFNMSUBPS4Yrm, TB_ALIGN_NONE },
2253  { X86::VFNMSUBPD4Yrr, X86::VFNMSUBPD4Yrm, TB_ALIGN_NONE },
2254  { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_NONE },
2255  { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_NONE },
2256  { X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Yrm, TB_ALIGN_NONE },
2257  { X86::VFMADDSUBPD4Yrr, X86::VFMADDSUBPD4Yrm, TB_ALIGN_NONE },
2258  { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_NONE },
2259  { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_NONE },
2260  { X86::VFMSUBADDPS4Yrr, X86::VFMSUBADDPS4Yrm, TB_ALIGN_NONE },
2261  { X86::VFMSUBADDPD4Yrr, X86::VFMSUBADDPD4Yrm, TB_ALIGN_NONE },
2262 
2263  // XOP foldable instructions
2264  { X86::VPCMOVrrr, X86::VPCMOVrrm, 0 },
2265  { X86::VPCMOVrrrY, X86::VPCMOVrrmY, 0 },
2266  { X86::VPERMIL2PDrr, X86::VPERMIL2PDrm, 0 },
2267  { X86::VPERMIL2PDrrY, X86::VPERMIL2PDrmY, 0 },
2268  { X86::VPERMIL2PSrr, X86::VPERMIL2PSrm, 0 },
2269  { X86::VPERMIL2PSrrY, X86::VPERMIL2PSrmY, 0 },
2270  { X86::VPPERMrrr, X86::VPPERMrrm, 0 },
2271 
2272  // AVX-512 instructions with 3 source operands.
2273  { X86::VBLENDMPDZrr, X86::VBLENDMPDZrm, 0 },
2274  { X86::VBLENDMPSZrr, X86::VBLENDMPSZrm, 0 },
2275  { X86::VPBLENDMDZrr, X86::VPBLENDMDZrm, 0 },
2276  { X86::VPBLENDMQZrr, X86::VPBLENDMQZrm, 0 },
2277  { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE },
2278  { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE },
2279  { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE },
2280  { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE },
2281  { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE },
2282  { X86::VPERMI2Brr, X86::VPERMI2Brm, 0 },
2283  { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 },
2284  { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 },
2285  { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 },
2286  { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 },
2287  { X86::VPERMI2Wrr, X86::VPERMI2Wrm, 0 },
2288  { X86::VPERMT2Brr, X86::VPERMT2Brm, 0 },
2289  { X86::VPERMT2Drr, X86::VPERMT2Drm, 0 },
2290  { X86::VPERMT2PSrr, X86::VPERMT2PSrm, 0 },
2291  { X86::VPERMT2PDrr, X86::VPERMT2PDrm, 0 },
2292  { X86::VPERMT2Qrr, X86::VPERMT2Qrm, 0 },
2293  { X86::VPERMT2Wrr, X86::VPERMT2Wrm, 0 },
2294  { X86::VPTERNLOGDZrri, X86::VPTERNLOGDZrmi, 0 },
2295  { X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmi, 0 },
2296 
2297  // AVX-512VL 256-bit instructions with 3 source operands.
2298  { X86::VPERMI2B256rr, X86::VPERMI2B256rm, 0 },
2299  { X86::VPERMI2D256rr, X86::VPERMI2D256rm, 0 },
2300  { X86::VPERMI2PD256rr, X86::VPERMI2PD256rm, 0 },
2301  { X86::VPERMI2PS256rr, X86::VPERMI2PS256rm, 0 },
2302  { X86::VPERMI2Q256rr, X86::VPERMI2Q256rm, 0 },
2303  { X86::VPERMI2W256rr, X86::VPERMI2W256rm, 0 },
2304  { X86::VPERMT2B256rr, X86::VPERMT2B256rm, 0 },
2305  { X86::VPERMT2D256rr, X86::VPERMT2D256rm, 0 },
2306  { X86::VPERMT2PD256rr, X86::VPERMT2PD256rm, 0 },
2307  { X86::VPERMT2PS256rr, X86::VPERMT2PS256rm, 0 },
2308  { X86::VPERMT2Q256rr, X86::VPERMT2Q256rm, 0 },
2309  { X86::VPERMT2W256rr, X86::VPERMT2W256rm, 0 },
2310  { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGDZ256rmi, 0 },
2311  { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGQZ256rmi, 0 },
2312 
2313  // AVX-512VL 128-bit instructions with 3 source operands.
2314  { X86::VPERMI2B128rr, X86::VPERMI2B128rm, 0 },
2315  { X86::VPERMI2D128rr, X86::VPERMI2D128rm, 0 },
2316  { X86::VPERMI2PD128rr, X86::VPERMI2PD128rm, 0 },
2317  { X86::VPERMI2PS128rr, X86::VPERMI2PS128rm, 0 },
2318  { X86::VPERMI2Q128rr, X86::VPERMI2Q128rm, 0 },
2319  { X86::VPERMI2W128rr, X86::VPERMI2W128rm, 0 },
2320  { X86::VPERMT2B128rr, X86::VPERMT2B128rm, 0 },
2321  { X86::VPERMT2D128rr, X86::VPERMT2D128rm, 0 },
2322  { X86::VPERMT2PD128rr, X86::VPERMT2PD128rm, 0 },
2323  { X86::VPERMT2PS128rr, X86::VPERMT2PS128rm, 0 },
2324  { X86::VPERMT2Q128rr, X86::VPERMT2Q128rm, 0 },
2325  { X86::VPERMT2W128rr, X86::VPERMT2W128rm, 0 },
2326  { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGDZ128rmi, 0 },
2327  { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGQZ128rmi, 0 },
2328 
2329  // AVX-512 masked instructions
2330  { X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 },
2331  { X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 },
2332  { X86::VALIGNDZrrikz, X86::VALIGNDZrmikz, 0 },
2333  { X86::VALIGNQZrrikz, X86::VALIGNQZrmikz, 0 },
2334  { X86::VANDNPDZrrkz, X86::VANDNPDZrmkz, 0 },
2335  { X86::VANDNPSZrrkz, X86::VANDNPSZrmkz, 0 },
2336  { X86::VANDPDZrrkz, X86::VANDPDZrmkz, 0 },
2337  { X86::VANDPSZrrkz, X86::VANDPSZrmkz, 0 },
2338  { X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 },
2339  { X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 },
2340  { X86::VINSERTF32x4Zrrkz, X86::VINSERTF32x4Zrmkz, 0 },
2341  { X86::VINSERTF32x8Zrrkz, X86::VINSERTF32x8Zrmkz, 0 },
2342  { X86::VINSERTF64x2Zrrkz, X86::VINSERTF64x2Zrmkz, 0 },
2343  { X86::VINSERTF64x4Zrrkz, X86::VINSERTF64x4Zrmkz, 0 },
2344  { X86::VINSERTI32x4Zrrkz, X86::VINSERTI32x4Zrmkz, 0 },
2345  { X86::VINSERTI32x8Zrrkz, X86::VINSERTI32x8Zrmkz, 0 },
2346  { X86::VINSERTI64x2Zrrkz, X86::VINSERTI64x2Zrmkz, 0 },
2347  { X86::VINSERTI64x4Zrrkz, X86::VINSERTI64x4Zrmkz, 0 },
2348  { X86::VMAXCPDZrrkz, X86::VMAXCPDZrmkz, 0 },
2349  { X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0 },
2350  { X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 },
2351  { X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 },
2352  { X86::VMINCPDZrrkz, X86::VMINCPDZrmkz, 0 },
2353  { X86::VMINCPSZrrkz, X86::VMINCPSZrmkz, 0 },
2354  { X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 },
2355  { X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 },
2356  { X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 },
2357  { X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 },
2358  { X86::VORPDZrrkz, X86::VORPDZrmkz, 0 },
2359  { X86::VORPSZrrkz, X86::VORPSZrmkz, 0 },
2360  { X86::VPADDBZrrkz, X86::VPADDBZrmkz, 0 },
2361  { X86::VPADDDZrrkz, X86::VPADDDZrmkz, 0 },
2362  { X86::VPADDQZrrkz, X86::VPADDQZrmkz, 0 },
2363  { X86::VPADDSBZrrkz, X86::VPADDSBZrmkz, 0 },
2364  { X86::VPADDSWZrrkz, X86::VPADDSWZrmkz, 0 },
2365  { X86::VPADDUSBZrrkz, X86::VPADDUSBZrmkz, 0 },
2366  { X86::VPADDUSWZrrkz, X86::VPADDUSWZrmkz, 0 },
2367  { X86::VPADDWZrrkz, X86::VPADDWZrmkz, 0 },
2368  { X86::VPALIGNRZrrikz, X86::VPALIGNRZrmikz, 0 },
2369  { X86::VPANDDZrrkz, X86::VPANDDZrmkz, 0 },
2370  { X86::VPANDNDZrrkz, X86::VPANDNDZrmkz, 0 },
2371  { X86::VPANDNQZrrkz, X86::VPANDNQZrmkz, 0 },
2372  { X86::VPANDQZrrkz, X86::VPANDQZrmkz, 0 },
2373  { X86::VPERMBZrrkz, X86::VPERMBZrmkz, 0 },
2374  { X86::VPERMDZrrkz, X86::VPERMDZrmkz, 0 },
2375  { X86::VPERMILPDZrrkz, X86::VPERMILPDZrmkz, 0 },
2376  { X86::VPERMILPSZrrkz, X86::VPERMILPSZrmkz, 0 },
2377  { X86::VPERMPDZrrkz, X86::VPERMPDZrmkz, 0 },
2378  { X86::VPERMPSZrrkz, X86::VPERMPSZrmkz, 0 },
2379  { X86::VPERMQZrrkz, X86::VPERMQZrmkz, 0 },
2380  { X86::VPERMWZrrkz, X86::VPERMWZrmkz, 0 },
2381  { X86::VPMADDUBSWZrrkz, X86::VPMADDUBSWZrmkz, 0 },
2382  { X86::VPMADDWDZrrkz, X86::VPMADDWDZrmkz, 0 },
2383  { X86::VPORDZrrkz, X86::VPORDZrmkz, 0 },
2384  { X86::VPORQZrrkz, X86::VPORQZrmkz, 0 },
2385  { X86::VPSHUFBZrrkz, X86::VPSHUFBZrmkz, 0 },
2386  { X86::VPSUBBZrrkz, X86::VPSUBBZrmkz, 0 },
2387  { X86::VPSUBDZrrkz, X86::VPSUBDZrmkz, 0 },
2388  { X86::VPSUBQZrrkz, X86::VPSUBQZrmkz, 0 },
2389  { X86::VPSUBSBZrrkz, X86::VPSUBSBZrmkz, 0 },
2390  { X86::VPSUBSWZrrkz, X86::VPSUBSWZrmkz, 0 },
2391  { X86::VPSUBUSBZrrkz, X86::VPSUBUSBZrmkz, 0 },
2392  { X86::VPSUBUSWZrrkz, X86::VPSUBUSWZrmkz, 0 },
2393  { X86::VPSUBWZrrkz, X86::VPSUBWZrmkz, 0 },
2394  { X86::VPUNPCKHBWZrrkz, X86::VPUNPCKHBWZrmkz, 0 },
2395  { X86::VPUNPCKHDQZrrkz, X86::VPUNPCKHDQZrmkz, 0 },
2396  { X86::VPUNPCKHQDQZrrkz, X86::VPUNPCKHQDQZrmkz, 0 },
2397  { X86::VPUNPCKHWDZrrkz, X86::VPUNPCKHWDZrmkz, 0 },
2398  { X86::VPUNPCKLBWZrrkz, X86::VPUNPCKLBWZrmkz, 0 },
2399  { X86::VPUNPCKLDQZrrkz, X86::VPUNPCKLDQZrmkz, 0 },
2400  { X86::VPUNPCKLQDQZrrkz, X86::VPUNPCKLQDQZrmkz, 0 },
2401  { X86::VPUNPCKLWDZrrkz, X86::VPUNPCKLWDZrmkz, 0 },
2402  { X86::VPXORDZrrkz, X86::VPXORDZrmkz, 0 },
2403  { X86::VPXORQZrrkz, X86::VPXORQZrmkz, 0 },
2404  { X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 },
2405  { X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 },
2406  { X86::VUNPCKHPDZrrkz, X86::VUNPCKHPDZrmkz, 0 },
2407  { X86::VUNPCKHPSZrrkz, X86::VUNPCKHPSZrmkz, 0 },
2408  { X86::VUNPCKLPDZrrkz, X86::VUNPCKLPDZrmkz, 0 },
2409  { X86::VUNPCKLPSZrrkz, X86::VUNPCKLPSZrmkz, 0 },
2410  { X86::VXORPDZrrkz, X86::VXORPDZrmkz, 0 },
2411  { X86::VXORPSZrrkz, X86::VXORPSZrmkz, 0 },
2412 
2413  // AVX-512{F,VL} masked arithmetic instructions 256-bit
2414  { X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0 },
2415  { X86::VADDPSZ256rrkz, X86::VADDPSZ256rmkz, 0 },
2416  { X86::VALIGNDZ256rrikz, X86::VALIGNDZ256rmikz, 0 },
2417  { X86::VALIGNQZ256rrikz, X86::VALIGNQZ256rmikz, 0 },
2418  { X86::VANDNPDZ256rrkz, X86::VANDNPDZ256rmkz, 0 },
2419  { X86::VANDNPSZ256rrkz, X86::VANDNPSZ256rmkz, 0 },
2420  { X86::VANDPDZ256rrkz, X86::VANDPDZ256rmkz, 0 },
2421  { X86::VANDPSZ256rrkz, X86::VANDPSZ256rmkz, 0 },
2422  { X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 },
2423  { X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 },
2424  { X86::VINSERTF32x4Z256rrkz, X86::VINSERTF32x4Z256rmkz, 0 },
2425  { X86::VINSERTF64x2Z256rrkz, X86::VINSERTF64x2Z256rmkz, 0 },
2426  { X86::VINSERTI32x4Z256rrkz, X86::VINSERTI32x4Z256rmkz, 0 },
2427  { X86::VINSERTI64x2Z256rrkz, X86::VINSERTI64x2Z256rmkz, 0 },
2428  { X86::VMAXCPDZ256rrkz, X86::VMAXCPDZ256rmkz, 0 },
2429  { X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmkz, 0 },
2430  { X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0 },
2431  { X86::VMAXPSZ256rrkz, X86::VMAXPSZ256rmkz, 0 },
2432  { X86::VMINCPDZ256rrkz, X86::VMINCPDZ256rmkz, 0 },
2433  { X86::VMINCPSZ256rrkz, X86::VMINCPSZ256rmkz, 0 },
2434  { X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0 },
2435  { X86::VMINPSZ256rrkz, X86::VMINPSZ256rmkz, 0 },
2436  { X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0 },
2437  { X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 },
2438  { X86::VORPDZ256rrkz, X86::VORPDZ256rmkz, 0 },
2439  { X86::VORPSZ256rrkz, X86::VORPSZ256rmkz, 0 },
2440  { X86::VPADDBZ256rrkz, X86::VPADDBZ256rmkz, 0 },
2441  { X86::VPADDDZ256rrkz, X86::VPADDDZ256rmkz, 0 },
2442  { X86::VPADDQZ256rrkz, X86::VPADDQZ256rmkz, 0 },
2443  { X86::VPADDSBZ256rrkz, X86::VPADDSBZ256rmkz, 0 },
2444  { X86::VPADDSWZ256rrkz, X86::VPADDSWZ256rmkz, 0 },
2445  { X86::VPADDUSBZ256rrkz, X86::VPADDUSBZ256rmkz, 0 },
2446  { X86::VPADDUSWZ256rrkz, X86::VPADDUSWZ256rmkz, 0 },
2447  { X86::VPADDWZ256rrkz, X86::VPADDWZ256rmkz, 0 },
2448  { X86::VPALIGNRZ256rrikz, X86::VPALIGNRZ256rmikz, 0 },
2449  { X86::VPANDDZ256rrkz, X86::VPANDDZ256rmkz, 0 },
2450  { X86::VPANDNDZ256rrkz, X86::VPANDNDZ256rmkz, 0 },
2451  { X86::VPANDNQZ256rrkz, X86::VPANDNQZ256rmkz, 0 },
2452  { X86::VPANDQZ256rrkz, X86::VPANDQZ256rmkz, 0 },
2453  { X86::VPERMBZ256rrkz, X86::VPERMBZ256rmkz, 0 },
2454  { X86::VPERMDZ256rrkz, X86::VPERMDZ256rmkz, 0 },
2455  { X86::VPERMILPDZ256rrkz, X86::VPERMILPDZ256rmkz, 0 },
2456  { X86::VPERMILPSZ256rrkz, X86::VPERMILPSZ256rmkz, 0 },
2457  { X86::VPERMPDZ256rrkz, X86::VPERMPDZ256rmkz, 0 },
2458  { X86::VPERMPSZ256rrkz, X86::VPERMPSZ256rmkz, 0 },
2459  { X86::VPERMQZ256rrkz, X86::VPERMQZ256rmkz, 0 },
2460  { X86::VPERMWZ256rrkz, X86::VPERMWZ256rmkz, 0 },
2461  { X86::VPMADDUBSWZ256rrkz, X86::VPMADDUBSWZ256rmkz, 0 },
2462  { X86::VPMADDWDZ256rrkz, X86::VPMADDWDZ256rmkz, 0 },
2463  { X86::VPORDZ256rrkz, X86::VPORDZ256rmkz, 0 },
2464  { X86::VPORQZ256rrkz, X86::VPORQZ256rmkz, 0 },
2465  { X86::VPSHUFBZ256rrkz, X86::VPSHUFBZ256rmkz, 0 },
2466  { X86::VPSUBBZ256rrkz, X86::VPSUBBZ256rmkz, 0 },
2467  { X86::VPSUBDZ256rrkz, X86::VPSUBDZ256rmkz, 0 },
2468  { X86::VPSUBQZ256rrkz, X86::VPSUBQZ256rmkz, 0 },
2469  { X86::VPSUBSBZ256rrkz, X86::VPSUBSBZ256rmkz, 0 },
2470  { X86::VPSUBSWZ256rrkz, X86::VPSUBSWZ256rmkz, 0 },
2471  { X86::VPSUBUSBZ256rrkz, X86::VPSUBUSBZ256rmkz, 0 },
2472  { X86::VPSUBUSWZ256rrkz, X86::VPSUBUSWZ256rmkz, 0 },
2473  { X86::VPSUBWZ256rrkz, X86::VPSUBWZ256rmkz, 0 },
2474  { X86::VPUNPCKHBWZ256rrkz, X86::VPUNPCKHBWZ256rmkz, 0 },
2475  { X86::VPUNPCKHDQZ256rrkz, X86::VPUNPCKHDQZ256rmkz, 0 },
2476  { X86::VPUNPCKHQDQZ256rrkz, X86::VPUNPCKHQDQZ256rmkz, 0 },
2477  { X86::VPUNPCKHWDZ256rrkz, X86::VPUNPCKHWDZ256rmkz, 0 },
2478  { X86::VPUNPCKLBWZ256rrkz, X86::VPUNPCKLBWZ256rmkz, 0 },
2479  { X86::VPUNPCKLDQZ256rrkz, X86::VPUNPCKLDQZ256rmkz, 0 },
2480  { X86::VPUNPCKLQDQZ256rrkz, X86::VPUNPCKLQDQZ256rmkz, 0 },
2481  { X86::VPUNPCKLWDZ256rrkz, X86::VPUNPCKLWDZ256rmkz, 0 },
2482  { X86::VPXORDZ256rrkz, X86::VPXORDZ256rmkz, 0 },
2483  { X86::VPXORQZ256rrkz, X86::VPXORQZ256rmkz, 0 },
2484  { X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 },
2485  { X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 },
2486  { X86::VUNPCKHPDZ256rrkz, X86::VUNPCKHPDZ256rmkz, 0 },
2487  { X86::VUNPCKHPSZ256rrkz, X86::VUNPCKHPSZ256rmkz, 0 },
2488  { X86::VUNPCKLPDZ256rrkz, X86::VUNPCKLPDZ256rmkz, 0 },
2489  { X86::VUNPCKLPSZ256rrkz, X86::VUNPCKLPSZ256rmkz, 0 },
2490  { X86::VXORPDZ256rrkz, X86::VXORPDZ256rmkz, 0 },
2491  { X86::VXORPSZ256rrkz, X86::VXORPSZ256rmkz, 0 },
2492 
2493  // AVX-512{F,VL} masked arithmetic instructions 128-bit
2494  { X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0 },
2495  { X86::VADDPSZ128rrkz, X86::VADDPSZ128rmkz, 0 },
2496  { X86::VALIGNDZ128rrikz, X86::VALIGNDZ128rmikz, 0 },
2497  { X86::VALIGNQZ128rrikz, X86::VALIGNQZ128rmikz, 0 },
2498  { X86::VANDNPDZ128rrkz, X86::VANDNPDZ128rmkz, 0 },
2499  { X86::VANDNPSZ128rrkz, X86::VANDNPSZ128rmkz, 0 },
2500  { X86::VANDPDZ128rrkz, X86::VANDPDZ128rmkz, 0 },
2501  { X86::VANDPSZ128rrkz, X86::VANDPSZ128rmkz, 0 },
2502  { X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0 },
2503  { X86::VDIVPSZ128rrkz, X86::VDIVPSZ128rmkz, 0 },
2504  { X86::VMAXCPDZ128rrkz, X86::VMAXCPDZ128rmkz, 0 },
2505  { X86::VMAXCPSZ128rrkz, X86::VMAXCPSZ128rmkz, 0 },
2506  { X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 },
2507  { X86::VMAXPSZ128rrkz, X86::VMAXPSZ128rmkz, 0 },
2508  { X86::VMINCPDZ128rrkz, X86::VMINCPDZ128rmkz, 0 },
2509  { X86::VMINCPSZ128rrkz, X86::VMINCPSZ128rmkz, 0 },
2510  { X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0 },
2511  { X86::VMINPSZ128rrkz, X86::VMINPSZ128rmkz, 0 },
2512  { X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0 },
2513  { X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 },
2514  { X86::VORPDZ128rrkz, X86::VORPDZ128rmkz, 0 },
2515  { X86::VORPSZ128rrkz, X86::VORPSZ128rmkz, 0 },
2516  { X86::VPADDBZ128rrkz, X86::VPADDBZ128rmkz, 0 },
2517  { X86::VPADDDZ128rrkz, X86::VPADDDZ128rmkz, 0 },
2518  { X86::VPADDQZ128rrkz, X86::VPADDQZ128rmkz, 0 },
2519  { X86::VPADDSBZ128rrkz, X86::VPADDSBZ128rmkz, 0 },
2520  { X86::VPADDSWZ128rrkz, X86::VPADDSWZ128rmkz, 0 },
2521  { X86::VPADDUSBZ128rrkz, X86::VPADDUSBZ128rmkz, 0 },
2522  { X86::VPADDUSWZ128rrkz, X86::VPADDUSWZ128rmkz, 0 },
2523  { X86::VPADDWZ128rrkz, X86::VPADDWZ128rmkz, 0 },
2524  { X86::VPALIGNRZ128rrikz, X86::VPALIGNRZ128rmikz, 0 },
2525  { X86::VPANDDZ128rrkz, X86::VPANDDZ128rmkz, 0 },
2526  { X86::VPANDNDZ128rrkz, X86::VPANDNDZ128rmkz, 0 },
2527  { X86::VPANDNQZ128rrkz, X86::VPANDNQZ128rmkz, 0 },
2528  { X86::VPANDQZ128rrkz, X86::VPANDQZ128rmkz, 0 },
2529  { X86::VPERMBZ128rrkz, X86::VPERMBZ128rmkz, 0 },
2530  { X86::VPERMILPDZ128rrkz, X86::VPERMILPDZ128rmkz, 0 },
2531  { X86::VPERMILPSZ128rrkz, X86::VPERMILPSZ128rmkz, 0 },
2532  { X86::VPERMWZ128rrkz, X86::VPERMWZ128rmkz, 0 },
2533  { X86::VPMADDUBSWZ128rrkz, X86::VPMADDUBSWZ128rmkz, 0 },
2534  { X86::VPMADDWDZ128rrkz, X86::VPMADDWDZ128rmkz, 0 },
2535  { X86::VPORDZ128rrkz, X86::VPORDZ128rmkz, 0 },
2536  { X86::VPORQZ128rrkz, X86::VPORQZ128rmkz, 0 },
2537  { X86::VPSHUFBZ128rrkz, X86::VPSHUFBZ128rmkz, 0 },
2538  { X86::VPSUBBZ128rrkz, X86::VPSUBBZ128rmkz, 0 },
2539  { X86::VPSUBDZ128rrkz, X86::VPSUBDZ128rmkz, 0 },
2540  { X86::VPSUBQZ128rrkz, X86::VPSUBQZ128rmkz, 0 },
2541  { X86::VPSUBSBZ128rrkz, X86::VPSUBSBZ128rmkz, 0 },
2542  { X86::VPSUBSWZ128rrkz, X86::VPSUBSWZ128rmkz, 0 },
2543  { X86::VPSUBUSBZ128rrkz, X86::VPSUBUSBZ128rmkz, 0 },
2544  { X86::VPSUBUSWZ128rrkz, X86::VPSUBUSWZ128rmkz, 0 },
2545  { X86::VPSUBWZ128rrkz, X86::VPSUBWZ128rmkz, 0 },
2546  { X86::VPUNPCKHBWZ128rrkz, X86::VPUNPCKHBWZ128rmkz, 0 },
2547  { X86::VPUNPCKHDQZ128rrkz, X86::VPUNPCKHDQZ128rmkz, 0 },
2548  { X86::VPUNPCKHQDQZ128rrkz, X86::VPUNPCKHQDQZ128rmkz, 0 },
2549  { X86::VPUNPCKHWDZ128rrkz, X86::VPUNPCKHWDZ128rmkz, 0 },
2550  { X86::VPUNPCKLBWZ128rrkz, X86::VPUNPCKLBWZ128rmkz, 0 },
2551  { X86::VPUNPCKLDQZ128rrkz, X86::VPUNPCKLDQZ128rmkz, 0 },
2552  { X86::VPUNPCKLQDQZ128rrkz, X86::VPUNPCKLQDQZ128rmkz, 0 },
2553  { X86::VPUNPCKLWDZ128rrkz, X86::VPUNPCKLWDZ128rmkz, 0 },
2554  { X86::VPXORDZ128rrkz, X86::VPXORDZ128rmkz, 0 },
2555  { X86::VPXORQZ128rrkz, X86::VPXORQZ128rmkz, 0 },
2556  { X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 },
2557  { X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 },
2558  { X86::VUNPCKHPDZ128rrkz, X86::VUNPCKHPDZ128rmkz, 0 },
2559  { X86::VUNPCKHPSZ128rrkz, X86::VUNPCKHPSZ128rmkz, 0 },
2560  { X86::VUNPCKLPDZ128rrkz, X86::VUNPCKLPDZ128rmkz, 0 },
2561  { X86::VUNPCKLPSZ128rrkz, X86::VUNPCKLPSZ128rmkz, 0 },
2562  { X86::VXORPDZ128rrkz, X86::VXORPDZ128rmkz, 0 },
2563  { X86::VXORPSZ128rrkz, X86::VXORPSZ128rmkz, 0 },
2564 
2565  // AVX-512 masked foldable instructions
2566  { X86::VPERMILPDZrik, X86::VPERMILPDZmik, 0 },
2567  { X86::VPERMILPSZrik, X86::VPERMILPSZmik, 0 },
2568  { X86::VPERMPDZrik, X86::VPERMPDZmik, 0 },
2569  { X86::VPERMQZrik, X86::VPERMQZmik, 0 },
2570  { X86::VPMOVSXBDZrrk, X86::VPMOVSXBDZrmk, 0 },
2571  { X86::VPMOVSXBQZrrk, X86::VPMOVSXBQZrmk, TB_NO_REVERSE },
2572  { X86::VPMOVSXBWZrrk, X86::VPMOVSXBWZrmk, 0 },
2573  { X86::VPMOVSXDQZrrk, X86::VPMOVSXDQZrmk, 0 },
2574  { X86::VPMOVSXWDZrrk, X86::VPMOVSXWDZrmk, 0 },
2575  { X86::VPMOVSXWQZrrk, X86::VPMOVSXWQZrmk, 0 },
2576  { X86::VPMOVZXBDZrrk, X86::VPMOVZXBDZrmk, 0 },
2577  { X86::VPMOVZXBQZrrk, X86::VPMOVZXBQZrmk, TB_NO_REVERSE },
2578  { X86::VPMOVZXBWZrrk, X86::VPMOVZXBWZrmk, 0 },
2579  { X86::VPMOVZXDQZrrk, X86::VPMOVZXDQZrmk, 0 },
2580  { X86::VPMOVZXWDZrrk, X86::VPMOVZXWDZrmk, 0 },
2581  { X86::VPMOVZXWQZrrk, X86::VPMOVZXWQZrmk, 0 },
2582  { X86::VPSHUFDZrik, X86::VPSHUFDZmik, 0 },
2583  { X86::VPSHUFHWZrik, X86::VPSHUFHWZmik, 0 },
2584  { X86::VPSHUFLWZrik, X86::VPSHUFLWZmik, 0 },
2585 
2586  // AVX-512VL 256-bit masked foldable instructions
2587  { X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mik, 0 },
2588  { X86::VPERMILPSZ256rik, X86::VPERMILPSZ256mik, 0 },
2589  { X86::VPERMPDZ256rik, X86::VPERMPDZ256mik, 0 },
2590  { X86::VPERMQZ256rik, X86::VPERMQZ256mik, 0 },
2591  { X86::VPMOVSXBDZ256rrk, X86::VPMOVSXBDZ256rmk, TB_NO_REVERSE },
2592  { X86::VPMOVSXBQZ256rrk, X86::VPMOVSXBQZ256rmk, TB_NO_REVERSE },
2593  { X86::VPMOVSXBWZ256rrk, X86::VPMOVSXBWZ256rmk, 0 },
2594  { X86::VPMOVSXDQZ256rrk, X86::VPMOVSXDQZ256rmk, 0 },
2595  { X86::VPMOVSXWDZ256rrk, X86::VPMOVSXWDZ256rmk, 0 },
2596  { X86::VPMOVSXWQZ256rrk, X86::VPMOVSXWQZ256rmk, TB_NO_REVERSE },
2597  { X86::VPMOVZXBDZ256rrk, X86::VPMOVZXBDZ256rmk, TB_NO_REVERSE },
2598  { X86::VPMOVZXBQZ256rrk, X86::VPMOVZXBQZ256rmk, TB_NO_REVERSE },
2599  { X86::VPMOVZXBWZ256rrk, X86::VPMOVZXBWZ256rmk, 0 },
2600  { X86::VPMOVZXDQZ256rrk, X86::VPMOVZXDQZ256rmk, 0 },
2601  { X86::VPMOVZXWDZ256rrk, X86::VPMOVZXWDZ256rmk, 0 },
2602  { X86::VPMOVZXWQZ256rrk, X86::VPMOVZXWQZ256rmk, TB_NO_REVERSE },
2603  { X86::VPSHUFDZ256rik, X86::VPSHUFDZ256mik, 0 },
2604  { X86::VPSHUFHWZ256rik, X86::VPSHUFHWZ256mik, 0 },
2605  { X86::VPSHUFLWZ256rik, X86::VPSHUFLWZ256mik, 0 },
2606 
2607  // AVX-512VL 128-bit masked foldable instructions
2608  { X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mik, 0 },
2609  { X86::VPERMILPSZ128rik, X86::VPERMILPSZ128mik, 0 },
2610  { X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, TB_NO_REVERSE },
2611  { X86::VPMOVSXBQZ128rrk, X86::VPMOVSXBQZ128rmk, TB_NO_REVERSE },
2612  { X86::VPMOVSXBWZ128rrk, X86::VPMOVSXBWZ128rmk, TB_NO_REVERSE },
2613  { X86::VPMOVSXDQZ128rrk, X86::VPMOVSXDQZ128rmk, TB_NO_REVERSE },
2614  { X86::VPMOVSXWDZ128rrk, X86::VPMOVSXWDZ128rmk, TB_NO_REVERSE },
2615  { X86::VPMOVSXWQZ128rrk, X86::VPMOVSXWQZ128rmk, TB_NO_REVERSE },
2616  { X86::VPMOVZXBDZ128rrk, X86::VPMOVZXBDZ128rmk, TB_NO_REVERSE },
2617  { X86::VPMOVZXBQZ128rrk, X86::VPMOVZXBQZ128rmk, TB_NO_REVERSE },
2618  { X86::VPMOVZXBWZ128rrk, X86::VPMOVZXBWZ128rmk, TB_NO_REVERSE },
2619  { X86::VPMOVZXDQZ128rrk, X86::VPMOVZXDQZ128rmk, TB_NO_REVERSE },
2620  { X86::VPMOVZXWDZ128rrk, X86::VPMOVZXWDZ128rmk, TB_NO_REVERSE },
2621  { X86::VPMOVZXWQZ128rrk, X86::VPMOVZXWQZ128rmk, TB_NO_REVERSE },
2622  { X86::VPSHUFDZ128rik, X86::VPSHUFDZ128mik, 0 },
2623  { X86::VPSHUFHWZ128rik, X86::VPSHUFHWZ128mik, 0 },
2624  { X86::VPSHUFLWZ128rik, X86::VPSHUFLWZ128mik, 0 },
2625  };
2626 
2627  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) {
2628  AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
2629  Entry.RegOp, Entry.MemOp,
2630  // Index 3, folded load
2631  Entry.Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
2632  }
2633  auto I = X86InstrFMA3Info::rm_begin();
2634  auto E = X86InstrFMA3Info::rm_end();
2635  for (; I != E; ++I) {
2636  if (!I.getGroup()->isKMasked()) {
2637  // Intrinsic forms need to pass TB_NO_REVERSE.
2638  if (I.getGroup()->isIntrinsic()) {
2639  AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
2640  I.getRegOpcode(), I.getMemOpcode(),
2642  } else {
2643  AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
2644  I.getRegOpcode(), I.getMemOpcode(),
2646  }
2647  }
2648  }
2649 
2650  static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
2651  // AVX-512 foldable masked instructions
2652  { X86::VADDPDZrrk, X86::VADDPDZrmk, 0 },
2653  { X86::VADDPSZrrk, X86::VADDPSZrmk, 0 },
2654  { X86::VALIGNDZrrik, X86::VALIGNDZrmik, 0 },
2655  { X86::VALIGNQZrrik, X86::VALIGNQZrmik, 0 },
2656  { X86::VANDNPDZrrk, X86::VANDNPDZrmk, 0 },
2657  { X86::VANDNPSZrrk, X86::VANDNPSZrmk, 0 },
2658  { X86::VANDPDZrrk, X86::VANDPDZrmk, 0 },
2659  { X86::VANDPSZrrk, X86::VANDPSZrmk, 0 },
2660  { X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 },
2661  { X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 },
2662  { X86::VINSERTF32x4Zrrk, X86::VINSERTF32x4Zrmk, 0 },
2663  { X86::VINSERTF32x8Zrrk, X86::VINSERTF32x8Zrmk, 0 },
2664  { X86::VINSERTF64x2Zrrk, X86::VINSERTF64x2Zrmk, 0 },
2665  { X86::VINSERTF64x4Zrrk, X86::VINSERTF64x4Zrmk, 0 },
2666  { X86::VINSERTI32x4Zrrk, X86::VINSERTI32x4Zrmk, 0 },
2667  { X86::VINSERTI32x8Zrrk, X86::VINSERTI32x8Zrmk, 0 },
2668  { X86::VINSERTI64x2Zrrk, X86::VINSERTI64x2Zrmk, 0 },
2669  { X86::VINSERTI64x4Zrrk, X86::VINSERTI64x4Zrmk, 0 },
2670  { X86::VMAXCPDZrrk, X86::VMAXCPDZrmk, 0 },
2671  { X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0 },
2672  { X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 },
2673  { X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 },
2674  { X86::VMINCPDZrrk, X86::VMINCPDZrmk, 0 },
2675  { X86::VMINCPSZrrk, X86::VMINCPSZrmk, 0 },
2676  { X86::VMINPDZrrk, X86::VMINPDZrmk, 0 },
2677  { X86::VMINPSZrrk, X86::VMINPSZrmk, 0 },
2678  { X86::VMULPDZrrk, X86::VMULPDZrmk, 0 },
2679  { X86::VMULPSZrrk, X86::VMULPSZrmk, 0 },
2680  { X86::VORPDZrrk, X86::VORPDZrmk, 0 },
2681  { X86::VORPSZrrk, X86::VORPSZrmk, 0 },
2682  { X86::VPADDBZrrk, X86::VPADDBZrmk, 0 },
2683  { X86::VPADDDZrrk, X86::VPADDDZrmk, 0 },
2684  { X86::VPADDQZrrk, X86::VPADDQZrmk, 0 },
2685  { X86::VPADDSBZrrk, X86::VPADDSBZrmk, 0 },
2686  { X86::VPADDSWZrrk, X86::VPADDSWZrmk, 0 },
2687  { X86::VPADDUSBZrrk, X86::VPADDUSBZrmk, 0 },
2688  { X86::VPADDUSWZrrk, X86::VPADDUSWZrmk, 0 },
2689  { X86::VPADDWZrrk, X86::VPADDWZrmk, 0 },
2690  { X86::VPALIGNRZrrik, X86::VPALIGNRZrmik, 0 },
2691  { X86::VPANDDZrrk, X86::VPANDDZrmk, 0 },
2692  { X86::VPANDNDZrrk, X86::VPANDNDZrmk, 0 },
2693  { X86::VPANDNQZrrk, X86::VPANDNQZrmk, 0 },
2694  { X86::VPANDQZrrk, X86::VPANDQZrmk, 0 },
2695  { X86::VPERMBZrrk, X86::VPERMBZrmk, 0 },
2696  { X86::VPERMDZrrk, X86::VPERMDZrmk, 0 },
2697  { X86::VPERMI2Brrk, X86::VPERMI2Brmk, 0 },
2698  { X86::VPERMI2Drrk, X86::VPERMI2Drmk, 0 },
2699  { X86::VPERMI2PSrrk, X86::VPERMI2PSrmk, 0 },
2700  { X86::VPERMI2PDrrk, X86::VPERMI2PDrmk, 0 },
2701  { X86::VPERMI2Qrrk, X86::VPERMI2Qrmk, 0 },
2702  { X86::VPERMI2Wrrk, X86::VPERMI2Wrmk, 0 },
2703  { X86::VPERMILPDZrrk, X86::VPERMILPDZrmk, 0 },
2704  { X86::VPERMILPSZrrk, X86::VPERMILPSZrmk, 0 },
2705  { X86::VPERMPDZrrk, X86::VPERMPDZrmk, 0 },
2706  { X86::VPERMPSZrrk, X86::VPERMPSZrmk, 0 },
2707  { X86::VPERMQZrrk, X86::VPERMQZrmk, 0 },
2708  { X86::VPERMT2Brrk, X86::VPERMT2Brmk, 0 },
2709  { X86::VPERMT2Drrk, X86::VPERMT2Drmk, 0 },
2710  { X86::VPERMT2PSrrk, X86::VPERMT2PSrmk, 0 },
2711  { X86::VPERMT2PDrrk, X86::VPERMT2PDrmk, 0 },
2712  { X86::VPERMT2Qrrk, X86::VPERMT2Qrmk, 0 },
2713  { X86::VPERMT2Wrrk, X86::VPERMT2Wrmk, 0 },
2714  { X86::VPERMWZrrk, X86::VPERMWZrmk, 0 },
2715  { X86::VPMADDUBSWZrrk, X86::VPMADDUBSWZrmk, 0 },
2716  { X86::VPMADDWDZrrk, X86::VPMADDWDZrmk, 0 },
2717  { X86::VPORDZrrk, X86::VPORDZrmk, 0 },
2718  { X86::VPORQZrrk, X86::VPORQZrmk, 0 },
2719  { X86::VPSHUFBZrrk, X86::VPSHUFBZrmk, 0 },
2720  { X86::VPSUBBZrrk, X86::VPSUBBZrmk, 0 },
2721  { X86::VPSUBDZrrk, X86::VPSUBDZrmk, 0 },
2722  { X86::VPSUBQZrrk, X86::VPSUBQZrmk, 0 },
2723  { X86::VPSUBSBZrrk, X86::VPSUBSBZrmk, 0 },
2724  { X86::VPSUBSWZrrk, X86::VPSUBSWZrmk, 0 },
2725  { X86::VPSUBUSBZrrk, X86::VPSUBUSBZrmk, 0 },
2726  { X86::VPSUBUSWZrrk, X86::VPSUBUSWZrmk, 0 },
2727  { X86::VPTERNLOGDZrrik, X86::VPTERNLOGDZrmik, 0 },
2728  { X86::VPTERNLOGQZrrik, X86::VPTERNLOGQZrmik, 0 },
2729  { X86::VPUNPCKHBWZrrk, X86::VPUNPCKHBWZrmk, 0 },
2730  { X86::VPUNPCKHDQZrrk, X86::VPUNPCKHDQZrmk, 0 },
2731  { X86::VPUNPCKHQDQZrrk, X86::VPUNPCKHQDQZrmk, 0 },
2732  { X86::VPUNPCKHWDZrrk, X86::VPUNPCKHWDZrmk, 0 },
2733  { X86::VPUNPCKLBWZrrk, X86::VPUNPCKLBWZrmk, 0 },
2734  { X86::VPUNPCKLDQZrrk, X86::VPUNPCKLDQZrmk, 0 },
2735  { X86::VPUNPCKLQDQZrrk, X86::VPUNPCKLQDQZrmk, 0 },
2736  { X86::VPUNPCKLWDZrrk, X86::VPUNPCKLWDZrmk, 0 },
2737  { X86::VPXORDZrrk, X86::VPXORDZrmk, 0 },
2738  { X86::VPXORQZrrk, X86::VPXORQZrmk, 0 },
2739  { X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 },
2740  { X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 },
2741  { X86::VUNPCKHPDZrrk, X86::VUNPCKHPDZrmk, 0 },
2742  { X86::VUNPCKHPSZrrk, X86::VUNPCKHPSZrmk, 0 },
2743  { X86::VUNPCKLPDZrrk, X86::VUNPCKLPDZrmk, 0 },
2744  { X86::VUNPCKLPSZrrk, X86::VUNPCKLPSZrmk, 0 },
2745  { X86::VXORPDZrrk, X86::VXORPDZrmk, 0 },
2746  { X86::VXORPSZrrk, X86::VXORPSZrmk, 0 },
2747 
2748  // AVX-512{F,VL} foldable masked instructions 256-bit
2749  { X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0 },
2750  { X86::VADDPSZ256rrk, X86::VADDPSZ256rmk, 0 },
2751  { X86::VALIGNDZ256rrik, X86::VALIGNDZ256rmik, 0 },
2752  { X86::VALIGNQZ256rrik, X86::VALIGNQZ256rmik, 0 },
2753  { X86::VANDNPDZ256rrk, X86::VANDNPDZ256rmk, 0 },
2754  { X86::VANDNPSZ256rrk, X86::VANDNPSZ256rmk, 0 },
2755  { X86::VANDPDZ256rrk, X86::VANDPDZ256rmk, 0 },
2756  { X86::VANDPSZ256rrk, X86::VANDPSZ256rmk, 0 },
2757  { X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 },
2758  { X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 },
2759  { X86::VINSERTF32x4Z256rrk,X86::VINSERTF32x4Z256rmk, 0 },
2760  { X86::VINSERTF64x2Z256rrk,X86::VINSERTF64x2Z256rmk, 0 },
2761  { X86::VINSERTI32x4Z256rrk,X86::VINSERTI32x4Z256rmk, 0 },
2762  { X86::VINSERTI64x2Z256rrk,X86::VINSERTI64x2Z256rmk, 0 },
2763  { X86::VMAXCPDZ256rrk, X86::VMAXCPDZ256rmk, 0 },
2764  { X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmk, 0 },
2765  { X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0 },
2766  { X86::VMAXPSZ256rrk, X86::VMAXPSZ256rmk, 0 },
2767  { X86::VMINCPDZ256rrk, X86::VMINCPDZ256rmk, 0 },
2768  { X86::VMINCPSZ256rrk, X86::VMINCPSZ256rmk, 0 },
2769  { X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0 },
2770  { X86::VMINPSZ256rrk, X86::VMINPSZ256rmk, 0 },
2771  { X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0 },
2772  { X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 },
2773  { X86::VORPDZ256rrk, X86::VORPDZ256rmk, 0 },
2774  { X86::VORPSZ256rrk, X86::VORPSZ256rmk, 0 },
2775  { X86::VPADDBZ256rrk, X86::VPADDBZ256rmk, 0 },
2776  { X86::VPADDDZ256rrk, X86::VPADDDZ256rmk, 0 },
2777  { X86::VPADDQZ256rrk, X86::VPADDQZ256rmk, 0 },
2778  { X86::VPADDSBZ256rrk, X86::VPADDSBZ256rmk, 0 },
2779  { X86::VPADDSWZ256rrk, X86::VPADDSWZ256rmk, 0 },
2780  { X86::VPADDUSBZ256rrk, X86::VPADDUSBZ256rmk, 0 },
2781  { X86::VPADDUSWZ256rrk, X86::VPADDUSWZ256rmk, 0 },
2782  { X86::VPADDWZ256rrk, X86::VPADDWZ256rmk, 0 },
2783  { X86::VPALIGNRZ256rrik, X86::VPALIGNRZ256rmik, 0 },
2784  { X86::VPANDDZ256rrk, X86::VPANDDZ256rmk, 0 },
2785  { X86::VPANDNDZ256rrk, X86::VPANDNDZ256rmk, 0 },
2786  { X86::VPANDNQZ256rrk, X86::VPANDNQZ256rmk, 0 },
2787  { X86::VPANDQZ256rrk, X86::VPANDQZ256rmk, 0 },
2788  { X86::VPERMBZ256rrk, X86::VPERMBZ256rmk, 0 },
2789  { X86::VPERMDZ256rrk, X86::VPERMDZ256rmk, 0 },
2790  { X86::VPERMI2B256rrk, X86::VPERMI2B256rmk, 0 },
2791  { X86::VPERMI2D256rrk, X86::VPERMI2D256rmk, 0 },
2792  { X86::VPERMI2PD256rrk, X86::VPERMI2PD256rmk, 0 },
2793  { X86::VPERMI2PS256rrk, X86::VPERMI2PS256rmk, 0 },
2794  { X86::VPERMI2Q256rrk, X86::VPERMI2Q256rmk, 0 },
2795  { X86::VPERMI2W256rrk, X86::VPERMI2W256rmk, 0 },
2796  { X86::VPERMILPDZ256rrk, X86::VPERMILPDZ256rmk, 0 },
2797  { X86::VPERMILPSZ256rrk, X86::VPERMILPSZ256rmk, 0 },
2798  { X86::VPERMPDZ256rrk, X86::VPERMPDZ256rmk, 0 },
2799  { X86::VPERMPSZ256rrk, X86::VPERMPSZ256rmk, 0 },
2800  { X86::VPERMQZ256rrk, X86::VPERMQZ256rmk, 0 },
2801  { X86::VPERMT2B256rrk, X86::VPERMT2B256rmk, 0 },
2802  { X86::VPERMT2D256rrk, X86::VPERMT2D256rmk, 0 },
2803  { X86::VPERMT2PD256rrk, X86::VPERMT2PD256rmk, 0 },
2804  { X86::VPERMT2PS256rrk, X86::VPERMT2PS256rmk, 0 },
2805  { X86::VPERMT2Q256rrk, X86::VPERMT2Q256rmk, 0 },
2806  { X86::VPERMT2W256rrk, X86::VPERMT2W256rmk, 0 },
2807  { X86::VPERMWZ256rrk, X86::VPERMWZ256rmk, 0 },
2808  { X86::VPMADDUBSWZ256rrk, X86::VPMADDUBSWZ256rmk, 0 },
2809  { X86::VPMADDWDZ256rrk, X86::VPMADDWDZ256rmk, 0 },
2810  { X86::VPORDZ256rrk, X86::VPORDZ256rmk, 0 },
2811  { X86::VPORQZ256rrk, X86::VPORQZ256rmk, 0 },
2812  { X86::VPSHUFBZ256rrk, X86::VPSHUFBZ256rmk, 0 },
2813  { X86::VPSUBBZ256rrk, X86::VPSUBBZ256rmk, 0 },
2814  { X86::VPSUBDZ256rrk, X86::VPSUBDZ256rmk, 0 },
2815  { X86::VPSUBQZ256rrk, X86::VPSUBQZ256rmk, 0 },
2816  { X86::VPSUBSBZ256rrk, X86::VPSUBSBZ256rmk, 0 },
2817  { X86::VPSUBSWZ256rrk, X86::VPSUBSWZ256rmk, 0 },
2818  { X86::VPSUBUSBZ256rrk, X86::VPSUBUSBZ256rmk, 0 },
2819  { X86::VPSUBUSWZ256rrk, X86::VPSUBUSWZ256rmk, 0 },
2820  { X86::VPSUBWZ256rrk, X86::VPSUBWZ256rmk, 0 },
2821  { X86::VPTERNLOGDZ256rrik, X86::VPTERNLOGDZ256rmik, 0 },
2822  { X86::VPTERNLOGQZ256rrik, X86::VPTERNLOGQZ256rmik, 0 },
2823  { X86::VPUNPCKHBWZ256rrk, X86::VPUNPCKHBWZ256rmk, 0 },
2824  { X86::VPUNPCKHDQZ256rrk, X86::VPUNPCKHDQZ256rmk, 0 },
2825  { X86::VPUNPCKHQDQZ256rrk, X86::VPUNPCKHQDQZ256rmk, 0 },
2826  { X86::VPUNPCKHWDZ256rrk, X86::VPUNPCKHWDZ256rmk, 0 },
2827  { X86::VPUNPCKLBWZ256rrk, X86::VPUNPCKLBWZ256rmk, 0 },
2828  { X86::VPUNPCKLDQZ256rrk, X86::VPUNPCKLDQZ256rmk, 0 },
2829  { X86::VPUNPCKLQDQZ256rrk, X86::VPUNPCKLQDQZ256rmk, 0 },
2830  { X86::VPUNPCKLWDZ256rrk, X86::VPUNPCKLWDZ256rmk, 0 },
2831  { X86::VPXORDZ256rrk, X86::VPXORDZ256rmk, 0 },
2832  { X86::VPXORQZ256rrk, X86::VPXORQZ256rmk, 0 },
2833  { X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 },
2834  { X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 },
2835  { X86::VUNPCKHPDZ256rrk, X86::VUNPCKHPDZ256rmk, 0 },
2836  { X86::VUNPCKHPSZ256rrk, X86::VUNPCKHPSZ256rmk, 0 },
2837  { X86::VUNPCKLPDZ256rrk, X86::VUNPCKLPDZ256rmk, 0 },
2838  { X86::VUNPCKLPSZ256rrk, X86::VUNPCKLPSZ256rmk, 0 },
2839  { X86::VXORPDZ256rrk, X86::VXORPDZ256rmk, 0 },
2840  { X86::VXORPSZ256rrk, X86::VXORPSZ256rmk, 0 },
2841 
2842  // AVX-512{F,VL} foldable instructions 128-bit
2843  { X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0 },
2844  { X86::VADDPSZ128rrk, X86::VADDPSZ128rmk, 0 },
2845  { X86::VALIGNDZ128rrik, X86::VALIGNDZ128rmik, 0 },
2846  { X86::VALIGNQZ128rrik, X86::VALIGNQZ128rmik, 0 },
2847  { X86::VANDNPDZ128rrk, X86::VANDNPDZ128rmk, 0 },
2848  { X86::VANDNPSZ128rrk, X86::VANDNPSZ128rmk, 0 },
2849  { X86::VANDPDZ128rrk, X86::VANDPDZ128rmk, 0 },
2850  { X86::VANDPSZ128rrk, X86::VANDPSZ128rmk, 0 },
2851  { X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0 },
2852  { X86::VDIVPSZ128rrk, X86::VDIVPSZ128rmk, 0 },
2853  { X86::VMAXCPDZ128rrk, X86::VMAXCPDZ128rmk, 0 },
2854  { X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmk, 0 },
2855  { X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 },
2856  { X86::VMAXPSZ128rrk, X86::VMAXPSZ128rmk, 0 },
2857  { X86::VMINCPDZ128rrk, X86::VMINCPDZ128rmk, 0 },
2858  { X86::VMINCPSZ128rrk, X86::VMINCPSZ128rmk, 0 },
2859  { X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0 },
2860  { X86::VMINPSZ128rrk, X86::VMINPSZ128rmk, 0 },
2861  { X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0 },
2862  { X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 },
2863  { X86::VORPDZ128rrk, X86::VORPDZ128rmk, 0 },
2864  { X86::VORPSZ128rrk, X86::VORPSZ128rmk, 0 },
2865  { X86::VPADDBZ128rrk, X86::VPADDBZ128rmk, 0 },
2866  { X86::VPADDDZ128rrk, X86::VPADDDZ128rmk, 0 },
2867  { X86::VPADDQZ128rrk, X86::VPADDQZ128rmk, 0 },
2868  { X86::VPADDSBZ128rrk, X86::VPADDSBZ128rmk, 0 },
2869  { X86::VPADDSWZ128rrk, X86::VPADDSWZ128rmk, 0 },
2870  { X86::VPADDUSBZ128rrk, X86::VPADDUSBZ128rmk, 0 },
2871  { X86::VPADDUSWZ128rrk, X86::VPADDUSWZ128rmk, 0 },
2872  { X86::VPADDWZ128rrk, X86::VPADDWZ128rmk, 0 },
2873  { X86::VPALIGNRZ128rrik, X86::VPALIGNRZ128rmik, 0 },
2874  { X86::VPANDDZ128rrk, X86::VPANDDZ128rmk, 0 },
2875  { X86::VPANDNDZ128rrk, X86::VPANDNDZ128rmk, 0 },
2876  { X86::VPANDNQZ128rrk, X86::VPANDNQZ128rmk, 0 },
2877  { X86::VPANDQZ128rrk, X86::VPANDQZ128rmk, 0 },
2878  { X86::VPERMBZ128rrk, X86::VPERMBZ128rmk, 0 },
2879  { X86::VPERMI2B128rrk, X86::VPERMI2B128rmk, 0 },
2880  { X86::VPERMI2D128rrk, X86::VPERMI2D128rmk, 0 },
2881  { X86::VPERMI2PD128rrk, X86::VPERMI2PD128rmk, 0 },
2882  { X86::VPERMI2PS128rrk, X86::VPERMI2PS128rmk, 0 },
2883  { X86::VPERMI2Q128rrk, X86::VPERMI2Q128rmk, 0 },
2884  { X86::VPERMI2W128rrk, X86::VPERMI2W128rmk, 0 },
2885  { X86::VPERMILPDZ128rrk, X86::VPERMILPDZ128rmk, 0 },
2886  { X86::VPERMILPSZ128rrk, X86::VPERMILPSZ128rmk, 0 },
2887  { X86::VPERMT2B128rrk, X86::VPERMT2B128rmk, 0 },
2888  { X86::VPERMT2D128rrk, X86::VPERMT2D128rmk, 0 },
2889  { X86::VPERMT2PD128rrk, X86::VPERMT2PD128rmk, 0 },
2890  { X86::VPERMT2PS128rrk, X86::VPERMT2PS128rmk, 0 },
2891  { X86::VPERMT2Q128rrk, X86::VPERMT2Q128rmk, 0 },
2892  { X86::VPERMT2W128rrk, X86::VPERMT2W128rmk, 0 },
2893  { X86::VPERMWZ128rrk, X86::VPERMWZ128rmk, 0 },
2894  { X86::VPMADDUBSWZ128rrk, X86::VPMADDUBSWZ128rmk, 0 },
2895  { X86::VPMADDWDZ128rrk, X86::VPMADDWDZ128rmk, 0 },
2896  { X86::VPORDZ128rrk, X86::VPORDZ128rmk, 0 },
2897  { X86::VPORQZ128rrk, X86::VPORQZ128rmk, 0 },
2898  { X86::VPSHUFBZ128rrk, X86::VPSHUFBZ128rmk, 0 },
2899  { X86::VPSUBBZ128rrk, X86::VPSUBBZ128rmk, 0 },
2900  { X86::VPSUBDZ128rrk, X86::VPSUBDZ128rmk, 0 },
2901  { X86::VPSUBQZ128rrk, X86::VPSUBQZ128rmk, 0 },
2902  { X86::VPSUBSBZ128rrk, X86::VPSUBSBZ128rmk, 0 },
2903  { X86::VPSUBSWZ128rrk, X86::VPSUBSWZ128rmk, 0 },
2904  { X86::VPSUBUSBZ128rrk, X86::VPSUBUSBZ128rmk, 0 },
2905  { X86::VPSUBUSWZ128rrk, X86::VPSUBUSWZ128rmk, 0 },
2906  { X86::VPSUBWZ128rrk, X86::VPSUBWZ128rmk, 0 },
2907  { X86::VPTERNLOGDZ128rrik, X86::VPTERNLOGDZ128rmik, 0 },
2908  { X86::VPTERNLOGQZ128rrik, X86::VPTERNLOGQZ128rmik, 0 },
2909  { X86::VPUNPCKHBWZ128rrk, X86::VPUNPCKHBWZ128rmk, 0 },
2910  { X86::VPUNPCKHDQZ128rrk, X86::VPUNPCKHDQZ128rmk, 0 },
2911  { X86::VPUNPCKHQDQZ128rrk, X86::VPUNPCKHQDQZ128rmk, 0 },
2912  { X86::VPUNPCKHWDZ128rrk, X86::VPUNPCKHWDZ128rmk, 0 },
2913  { X86::VPUNPCKLBWZ128rrk, X86::VPUNPCKLBWZ128rmk, 0 },
2914  { X86::VPUNPCKLDQZ128rrk, X86::VPUNPCKLDQZ128rmk, 0 },
2915  { X86::VPUNPCKLQDQZ128rrk, X86::VPUNPCKLQDQZ128rmk, 0 },
2916  { X86::VPUNPCKLWDZ128rrk, X86::VPUNPCKLWDZ128rmk, 0 },
2917  { X86::VPXORDZ128rrk, X86::VPXORDZ128rmk, 0 },
2918  { X86::VPXORQZ128rrk, X86::VPXORQZ128rmk, 0 },
2919  { X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 },
2920  { X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 },
2921  { X86::VUNPCKHPDZ128rrk, X86::VUNPCKHPDZ128rmk, 0 },
2922  { X86::VUNPCKHPSZ128rrk, X86::VUNPCKHPSZ128rmk, 0 },
2923  { X86::VUNPCKLPDZ128rrk, X86::VUNPCKLPDZ128rmk, 0 },
2924  { X86::VUNPCKLPSZ128rrk, X86::VUNPCKLPSZ128rmk, 0 },
2925  { X86::VXORPDZ128rrk, X86::VXORPDZ128rmk, 0 },
2926  { X86::VXORPSZ128rrk, X86::VXORPSZ128rmk, 0 },
2927 
2928  // 512-bit three source instructions with zero masking.
2929  { X86::VPERMI2Brrkz, X86::VPERMI2Brmkz, 0 },
2930  { X86::VPERMI2Drrkz, X86::VPERMI2Drmkz, 0 },
2931  { X86::VPERMI2PSrrkz, X86::VPERMI2PSrmkz, 0 },
2932  { X86::VPERMI2PDrrkz, X86::VPERMI2PDrmkz, 0 },
2933  { X86::VPERMI2Qrrkz, X86::VPERMI2Qrmkz, 0 },
2934  { X86::VPERMI2Wrrkz, X86::VPERMI2Wrmkz, 0 },
2935  { X86::VPERMT2Brrkz, X86::VPERMT2Brmkz, 0 },
2936  { X86::VPERMT2Drrkz, X86::VPERMT2Drmkz, 0 },
2937  { X86::VPERMT2PSrrkz, X86::VPERMT2PSrmkz, 0 },
2938  { X86::VPERMT2PDrrkz, X86::VPERMT2PDrmkz, 0 },
2939  { X86::VPERMT2Qrrkz, X86::VPERMT2Qrmkz, 0 },
2940  { X86::VPERMT2Wrrkz, X86::VPERMT2Wrmkz, 0 },
2941  { X86::VPTERNLOGDZrrikz, X86::VPTERNLOGDZrmikz, 0 },
2942  { X86::VPTERNLOGQZrrikz, X86::VPTERNLOGQZrmikz, 0 },
2943 
2944  // 256-bit three source instructions with zero masking.
2945  { X86::VPERMI2B256rrkz, X86::VPERMI2B256rmkz, 0 },
2946  { X86::VPERMI2D256rrkz, X86::VPERMI2D256rmkz, 0 },
2947  { X86::VPERMI2PD256rrkz, X86::VPERMI2PD256rmkz, 0 },
2948  { X86::VPERMI2PS256rrkz, X86::VPERMI2PS256rmkz, 0 },
2949  { X86::VPERMI2Q256rrkz, X86::VPERMI2Q256rmkz, 0 },
2950  { X86::VPERMI2W256rrkz, X86::VPERMI2W256rmkz, 0 },
2951  { X86::VPERMT2B256rrkz, X86::VPERMT2B256rmkz, 0 },
2952  { X86::VPERMT2D256rrkz, X86::VPERMT2D256rmkz, 0 },
2953  { X86::VPERMT2PD256rrkz, X86::VPERMT2PD256rmkz, 0 },
2954  { X86::VPERMT2PS256rrkz, X86::VPERMT2PS256rmkz, 0 },
2955  { X86::VPERMT2Q256rrkz, X86::VPERMT2Q256rmkz, 0 },
2956  { X86::VPERMT2W256rrkz, X86::VPERMT2W256rmkz, 0 },
2957  { X86::VPTERNLOGDZ256rrikz,X86::VPTERNLOGDZ256rmikz, 0 },
2958  { X86::VPTERNLOGQZ256rrikz,X86::VPTERNLOGQZ256rmikz, 0 },
2959 
2960  // 128-bit three source instructions with zero masking.
2961  { X86::VPERMI2B128rrkz, X86::VPERMI2B128rmkz, 0 },
2962  { X86::VPERMI2D128rrkz, X86::VPERMI2D128rmkz, 0 },
2963  { X86::VPERMI2PD128rrkz, X86::VPERMI2PD128rmkz, 0 },
2964  { X86::VPERMI2PS128rrkz, X86::VPERMI2PS128rmkz, 0 },
2965  { X86::VPERMI2Q128rrkz, X86::VPERMI2Q128rmkz, 0 },
2966  { X86::VPERMI2W128rrkz, X86::VPERMI2W128rmkz, 0 },
2967  { X86::VPERMT2B128rrkz, X86::VPERMT2B128rmkz, 0 },
2968  { X86::VPERMT2D128rrkz, X86::VPERMT2D128rmkz, 0 },
2969  { X86::VPERMT2PD128rrkz, X86::VPERMT2PD128rmkz, 0 },
2970  { X86::VPERMT2PS128rrkz, X86::VPERMT2PS128rmkz, 0 },
2971  { X86::VPERMT2Q128rrkz, X86::VPERMT2Q128rmkz, 0 },
2972  { X86::VPERMT2W128rrkz, X86::VPERMT2W128rmkz, 0 },
2973  { X86::VPTERNLOGDZ128rrikz,X86::VPTERNLOGDZ128rmikz, 0 },
2974  { X86::VPTERNLOGQZ128rrikz,X86::VPTERNLOGQZ128rmikz, 0 },
2975  };
2976 
2977  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable4) {
2978  AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
2979  Entry.RegOp, Entry.MemOp,
2980  // Index 4, folded load
2981  Entry.Flags | TB_INDEX_4 | TB_FOLDED_LOAD);
2982  }
2983  for (I = X86InstrFMA3Info::rm_begin(); I != E; ++I) {
2984  if (I.getGroup()->isKMasked()) {
2985  // Intrinsics need to pass TB_NO_REVERSE.
2986  if (I.getGroup()->isIntrinsic()) {
2987  AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
2988  I.getRegOpcode(), I.getMemOpcode(),
2990  } else {
2991  AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
2992  I.getRegOpcode(), I.getMemOpcode(),
2994  }
2995  }
2996  }
2997 }
2998 
2999 void
3000 X86InstrInfo::AddTableEntry(RegOp2MemOpTableType &R2MTable,
3001  MemOp2RegOpTableType &M2RTable,
3002  uint16_t RegOp, uint16_t MemOp, uint16_t Flags) {
3003  if ((Flags & TB_NO_FORWARD) == 0) {
3004  assert(!R2MTable.count(RegOp) && "Duplicate entry!");
3005  R2MTable[RegOp] = std::make_pair(MemOp, Flags);
3006  }
3007  if ((Flags & TB_NO_REVERSE) == 0) {
3008  assert(!M2RTable.count(MemOp) &&
3009  "Duplicated entries in unfolding maps?");
3010  M2RTable[MemOp] = std::make_pair(RegOp, Flags);
3011  }
3012 }
3013 
3014 bool
3016  unsigned &SrcReg, unsigned &DstReg,
3017  unsigned &SubIdx) const {
3018  switch (MI.getOpcode()) {
3019  default: break;
3020  case X86::MOVSX16rr8:
3021  case X86::MOVZX16rr8:
3022  case X86::MOVSX32rr8:
3023  case X86::MOVZX32rr8:
3024  case X86::MOVSX64rr8:
3025  if (!Subtarget.is64Bit())
3026  // It's not always legal to reference the low 8-bit of the larger
3027  // register in 32-bit mode.
3028  return false;
3029  case X86::MOVSX32rr16:
3030  case X86::MOVZX32rr16:
3031  case X86::MOVSX64rr16:
3032  case X86::MOVSX64rr32: {
3033  if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
3034  // Be conservative.
3035  return false;
3036  SrcReg = MI.getOperand(1).getReg();
3037  DstReg = MI.getOperand(0).getReg();
3038  switch (MI.getOpcode()) {
3039  default: llvm_unreachable("Unreachable!");
3040  case X86::MOVSX16rr8:
3041  case X86::MOVZX16rr8:
3042  case X86::MOVSX32rr8:
3043  case X86::MOVZX32rr8:
3044  case X86::MOVSX64rr8:
3045  SubIdx = X86::sub_8bit;
3046  break;
3047  case X86::MOVSX32rr16:
3048  case X86::MOVZX32rr16:
3049  case X86::MOVSX64rr16:
3050  SubIdx = X86::sub_16bit;
3051  break;
3052  case X86::MOVSX64rr32:
3053  SubIdx = X86::sub_32bit;
3054  break;
3055  }
3056  return true;
3057  }
3058  }
3059  return false;
3060 }
3061 
3063  const MachineFunction *MF = MI.getParent()->getParent();
3064  const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
3065 
3066  if (MI.getOpcode() == getCallFrameSetupOpcode() ||
3067  MI.getOpcode() == getCallFrameDestroyOpcode()) {
3068  unsigned StackAlign = TFI->getStackAlignment();
3069  int SPAdj =
3070  (MI.getOperand(0).getImm() + StackAlign - 1) / StackAlign * StackAlign;
3071 
3072  SPAdj -= MI.getOperand(1).getImm();
3073 
3074  if (MI.getOpcode() == getCallFrameSetupOpcode())
3075  return SPAdj;
3076  else
3077  return -SPAdj;
3078  }
3079 
3080  // To know whether a call adjusts the stack, we need information
3081  // that is bound to the following ADJCALLSTACKUP pseudo.
3082  // Look for the next ADJCALLSTACKUP that follows the call.
3083  if (MI.isCall()) {
3084  const MachineBasicBlock *MBB = MI.getParent();
3085  auto I = ++MachineBasicBlock::const_iterator(MI);
3086  for (auto E = MBB->end(); I != E; ++I) {
3087  if (I->getOpcode() == getCallFrameDestroyOpcode() ||
3088  I->isCall())
3089  break;
3090  }
3091 
3092  // If we could not find a frame destroy opcode, then it has already
3093  // been simplified, so we don't care.
3094  if (I->getOpcode() != getCallFrameDestroyOpcode())
3095  return 0;
3096 
3097  return -(I->getOperand(1).getImm());
3098  }
3099 
3100  // Currently handle only PUSHes we can reasonably expect to see
3101  // in call sequences
3102  switch (MI.getOpcode()) {
3103  default:
3104  return 0;
3105  case X86::PUSH32i8:
3106  case X86::PUSH32r:
3107  case X86::PUSH32rmm:
3108  case X86::PUSH32rmr:
3109  case X86::PUSHi32:
3110  return 4;
3111  case X86::PUSH64i8:
3112  case X86::PUSH64r:
3113  case X86::PUSH64rmm:
3114  case X86::PUSH64rmr:
3115  case X86::PUSH64i32:
3116  return 8;
3117  }
3118 }
3119 
3120 /// Return true and the FrameIndex if the specified
3121 /// operand and follow operands form a reference to the stack frame.
3122 bool X86InstrInfo::isFrameOperand(const MachineInstr &MI, unsigned int Op,
3123  int &FrameIndex) const {
3124  if (MI.getOperand(Op + X86::AddrBaseReg).isFI() &&
3125  MI.getOperand(Op + X86::AddrScaleAmt).isImm() &&
3126  MI.getOperand(Op + X86::AddrIndexReg).isReg() &&
3127  MI.getOperand(Op + X86::AddrDisp).isImm() &&
3128  MI.getOperand(Op + X86::AddrScaleAmt).getImm() == 1 &&
3129  MI.getOperand(Op + X86::AddrIndexReg).getReg() == 0 &&
3130  MI.getOperand(Op + X86::AddrDisp).getImm() == 0) {
3131  FrameIndex = MI.getOperand(Op + X86::AddrBaseReg).getIndex();
3132  return true;
3133  }
3134  return false;
3135 }
3136 
3137 static bool isFrameLoadOpcode(int Opcode) {
3138  switch (Opcode) {
3139  default:
3140  return false;
3141  case X86::MOV8rm:
3142  case X86::MOV16rm:
3143  case X86::MOV32rm:
3144  case X86::MOV64rm:
3145  case X86::LD_Fp64m:
3146  case X86::MOVSSrm:
3147  case X86::MOVSDrm:
3148  case X86::MOVAPSrm:
3149  case X86::MOVUPSrm:
3150  case X86::MOVAPDrm:
3151  case X86::MOVUPDrm:
3152  case X86::MOVDQArm:
3153  case X86::MOVDQUrm:
3154  case X86::VMOVSSrm:
3155  case X86::VMOVSDrm:
3156  case X86::VMOVAPSrm:
3157  case X86::VMOVUPSrm:
3158  case X86::VMOVAPDrm:
3159  case X86::VMOVUPDrm:
3160  case X86::VMOVDQArm:
3161  case X86::VMOVDQUrm:
3162  case X86::VMOVUPSYrm:
3163  case X86::VMOVAPSYrm:
3164  case X86::VMOVUPDYrm:
3165  case X86::VMOVAPDYrm:
3166  case X86::VMOVDQUYrm:
3167  case X86::VMOVDQAYrm:
3168  case X86::MMX_MOVD64rm:
3169  case X86::MMX_MOVQ64rm:
3170  case X86::VMOVSSZrm:
3171  case X86::VMOVSDZrm:
3172  case X86::VMOVAPSZrm:
3173  case X86::VMOVAPSZ128rm:
3174  case X86::VMOVAPSZ256rm:
3175  case X86::VMOVAPSZ128rm_NOVLX:
3176  case X86::VMOVAPSZ256rm_NOVLX:
3177  case X86::VMOVUPSZrm:
3178  case X86::VMOVUPSZ128rm:
3179  case X86::VMOVUPSZ256rm:
3180  case X86::VMOVUPSZ128rm_NOVLX:
3181  case X86::VMOVUPSZ256rm_NOVLX:
3182  case X86::VMOVAPDZrm:
3183  case X86::VMOVAPDZ128rm:
3184  case X86::VMOVAPDZ256rm:
3185  case X86::VMOVUPDZrm:
3186  case X86::VMOVUPDZ128rm:
3187  case X86::VMOVUPDZ256rm:
3188  case X86::VMOVDQA32Zrm:
3189  case X86::VMOVDQA32Z128rm:
3190  case X86::VMOVDQA32Z256rm:
3191  case X86::VMOVDQU32Zrm:
3192  case X86::VMOVDQU32Z128rm:
3193  case X86::VMOVDQU32Z256rm:
3194  case X86::VMOVDQA64Zrm:
3195  case X86::VMOVDQA64Z128rm:
3196  case X86::VMOVDQA64Z256rm:
3197  case X86::VMOVDQU64Zrm:
3198  case X86::VMOVDQU64Z128rm:
3199  case X86::VMOVDQU64Z256rm:
3200  case X86::VMOVDQU8Zrm:
3201  case X86::VMOVDQU8Z128rm:
3202  case X86::VMOVDQU8Z256rm:
3203  case X86::VMOVDQU16Zrm:
3204  case X86::VMOVDQU16Z128rm:
3205  case X86::VMOVDQU16Z256rm:
3206  case X86::KMOVBkm:
3207  case X86::KMOVWkm:
3208  case X86::KMOVDkm:
3209  case X86::KMOVQkm:
3210  return true;
3211  }
3212 }
3213 
3214 static bool isFrameStoreOpcode(int Opcode) {
3215  switch (Opcode) {
3216  default: break;
3217  case X86::MOV8mr:
3218  case X86::MOV16mr:
3219  case X86::MOV32mr:
3220  case X86::MOV64mr:
3221  case X86::ST_FpP64m:
3222  case X86::MOVSSmr:
3223  case X86::MOVSDmr:
3224  case X86::MOVAPSmr:
3225  case X86::MOVUPSmr:
3226  case X86::MOVAPDmr:
3227  case X86::MOVUPDmr:
3228  case X86::MOVDQAmr:
3229  case X86::MOVDQUmr:
3230  case X86::VMOVSSmr:
3231  case X86::VMOVSDmr:
3232  case X86::VMOVAPSmr:
3233  case X86::VMOVUPSmr:
3234  case X86::VMOVAPDmr:
3235  case X86::VMOVUPDmr:
3236  case X86::VMOVDQAmr:
3237  case X86::VMOVDQUmr:
3238  case X86::VMOVUPSYmr:
3239  case X86::VMOVAPSYmr:
3240  case X86::VMOVUPDYmr:
3241  case X86::VMOVAPDYmr:
3242  case X86::VMOVDQUYmr:
3243  case X86::VMOVDQAYmr:
3244  case X86::VMOVSSZmr:
3245  case X86::VMOVSDZmr:
3246  case X86::VMOVUPSZmr:
3247  case X86::VMOVUPSZ128mr:
3248  case X86::VMOVUPSZ256mr:
3249  case X86::VMOVUPSZ128mr_NOVLX:
3250  case X86::VMOVUPSZ256mr_NOVLX:
3251  case X86::VMOVAPSZmr:
3252  case X86::VMOVAPSZ128mr:
3253  case X86::VMOVAPSZ256mr:
3254  case X86::VMOVAPSZ128mr_NOVLX:
3255  case X86::VMOVAPSZ256mr_NOVLX:
3256  case X86::VMOVUPDZmr:
3257  case X86::VMOVUPDZ128mr:
3258  case X86::VMOVUPDZ256mr:
3259  case X86::VMOVAPDZmr:
3260  case X86::VMOVAPDZ128mr:
3261  case X86::VMOVAPDZ256mr:
3262  case X86::VMOVDQA32Zmr:
3263  case X86::VMOVDQA32Z128mr:
3264  case X86::VMOVDQA32Z256mr:
3265  case X86::VMOVDQU32Zmr:
3266  case X86::VMOVDQU32Z128mr:
3267  case X86::VMOVDQU32Z256mr:
3268  case X86::VMOVDQA64Zmr:
3269  case X86::VMOVDQA64Z128mr:
3270  case X86::VMOVDQA64Z256mr:
3271  case X86::VMOVDQU64Zmr:
3272  case X86::VMOVDQU64Z128mr:
3273  case X86::VMOVDQU64Z256mr:
3274  case X86::VMOVDQU8Zmr:
3275  case X86::VMOVDQU8Z128mr:
3276  case X86::VMOVDQU8Z256mr:
3277  case X86::VMOVDQU16Zmr:
3278  case X86::VMOVDQU16Z128mr:
3279  case X86::VMOVDQU16Z256mr:
3280  case X86::MMX_MOVD64mr:
3281  case X86::MMX_MOVQ64mr:
3282  case X86::MMX_MOVNTQmr:
3283  case X86::KMOVBmk:
3284  case X86::KMOVWmk:
3285  case X86::KMOVDmk:
3286  case X86::KMOVQmk:
3287  return true;
3288  }
3289  return false;
3290 }
3291 
3293  int &FrameIndex) const {
3294  if (isFrameLoadOpcode(MI.getOpcode()))
3295  if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
3296  return MI.getOperand(0).getReg();
3297  return 0;
3298 }
3299 
3301  int &FrameIndex) const {
3302  if (isFrameLoadOpcode(MI.getOpcode())) {
3303  unsigned Reg;
3304  if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
3305  return Reg;
3306  // Check for post-frame index elimination operations
3307  const MachineMemOperand *Dummy;
3308  return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
3309  }
3310  return 0;
3311 }
3312 
3314  int &FrameIndex) const {
3315  if (isFrameStoreOpcode(MI.getOpcode()))
3316  if (MI.getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
3317  isFrameOperand(MI, 0, FrameIndex))
3318  return MI.getOperand(X86::AddrNumOperands).getReg();
3319  return 0;
3320 }
3321 
3323  int &FrameIndex) const {
3324  if (isFrameStoreOpcode(MI.getOpcode())) {
3325  unsigned Reg;
3326  if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
3327  return Reg;
3328  // Check for post-frame index elimination operations
3329  const MachineMemOperand *Dummy;
3330  return hasStoreToStackSlot(MI, Dummy, FrameIndex);
3331  }
3332  return 0;
3333 }
3334 
3335 /// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
3336 static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
3337  // Don't waste compile time scanning use-def chains of physregs.
3339  return false;
3340  bool isPICBase = false;
3342  E = MRI.def_instr_end(); I != E; ++I) {
3343  MachineInstr *DefMI = &*I;
3344  if (DefMI->getOpcode() != X86::MOVPC32r)
3345  return false;
3346  assert(!isPICBase && "More than one PIC base?");
3347  isPICBase = true;
3348  }
3349  return isPICBase;
3350 }
3351 
3353  AliasAnalysis *AA) const {
3354  switch (MI.getOpcode()) {
3355  default: break;
3356  case X86::MOV8rm:
3357  case X86::MOV8rm_NOREX:
3358  case X86::MOV16rm:
3359  case X86::MOV32rm:
3360  case X86::MOV64rm:
3361  case X86::LD_Fp64m:
3362  case X86::MOVSSrm:
3363  case X86::MOVSDrm:
3364  case X86::MOVAPSrm:
3365  case X86::MOVUPSrm:
3366  case X86::MOVAPDrm:
3367  case X86::MOVUPDrm:
3368  case X86::MOVDQArm:
3369  case X86::MOVDQUrm:
3370  case X86::VMOVSSrm:
3371  case X86::VMOVSDrm:
3372  case X86::VMOVAPSrm:
3373  case X86::VMOVUPSrm:
3374  case X86::VMOVAPDrm:
3375  case X86::VMOVUPDrm:
3376  case X86::VMOVDQArm:
3377  case X86::VMOVDQUrm:
3378  case X86::VMOVAPSYrm:
3379  case X86::VMOVUPSYrm:
3380  case X86::VMOVAPDYrm:
3381  case X86::VMOVUPDYrm:
3382  case X86::VMOVDQAYrm:
3383  case X86::VMOVDQUYrm:
3384  case X86::MMX_MOVD64rm:
3385  case X86::MMX_MOVQ64rm:
3386  // AVX-512
3387  case X86::VMOVSSZrm:
3388  case X86::VMOVSDZrm:
3389  case X86::VMOVAPDZ128rm:
3390  case X86::VMOVAPDZ256rm:
3391  case X86::VMOVAPDZrm:
3392  case X86::VMOVAPSZ128rm:
3393  case X86::VMOVAPSZ256rm:
3394  case X86::VMOVAPSZ128rm_NOVLX:
3395  case X86::VMOVAPSZ256rm_NOVLX:
3396  case X86::VMOVAPSZrm:
3397  case X86::VMOVDQA32Z128rm:
3398  case X86::VMOVDQA32Z256rm:
3399  case X86::VMOVDQA32Zrm:
3400  case X86::VMOVDQA64Z128rm:
3401  case X86::VMOVDQA64Z256rm:
3402  case X86::VMOVDQA64Zrm:
3403  case X86::VMOVDQU16Z128rm:
3404  case X86::VMOVDQU16Z256rm:
3405  case X86::VMOVDQU16Zrm:
3406  case X86::VMOVDQU32Z128rm:
3407  case X86::VMOVDQU32Z256rm:
3408  case X86::VMOVDQU32Zrm:
3409  case X86::VMOVDQU64Z128rm:
3410  case X86::VMOVDQU64Z256rm:
3411  case X86::VMOVDQU64Zrm:
3412  case X86::VMOVDQU8Z128rm:
3413  case X86::VMOVDQU8Z256rm:
3414  case X86::VMOVDQU8Zrm:
3415  case X86::VMOVUPDZ128rm:
3416  case X86::VMOVUPDZ256rm:
3417  case X86::VMOVUPDZrm:
3418  case X86::VMOVUPSZ128rm:
3419  case X86::VMOVUPSZ256rm:
3420  case X86::VMOVUPSZ128rm_NOVLX:
3421  case X86::VMOVUPSZ256rm_NOVLX:
3422  case X86::VMOVUPSZrm: {
3423  // Loads from constant pools are trivially rematerializable.
3424  if (MI.getOperand(1 + X86::AddrBaseReg).isReg() &&
3425  MI.getOperand(1 + X86::AddrScaleAmt).isImm() &&
3426  MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
3427  MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
3429  unsigned BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
3430  if (BaseReg == 0 || BaseReg == X86::RIP)
3431  return true;
3432  // Allow re-materialization of PIC load.
3434  return false;
3435  const MachineFunction &MF = *MI.getParent()->getParent();
3436  const MachineRegisterInfo &MRI = MF.getRegInfo();
3437  return regIsPICBase(BaseReg, MRI);
3438  }
3439  return false;
3440  }
3441 
3442  case X86::LEA32r:
3443  case X86::LEA64r: {
3444  if (MI.getOperand(1 + X86::AddrScaleAmt).isImm() &&
3445  MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
3446  MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
3447  !MI.getOperand(1 + X86::AddrDisp).isReg()) {
3448  // lea fi#, lea GV, etc. are all rematerializable.
3449  if (!MI.getOperand(1 + X86::AddrBaseReg).isReg())
3450  return true;
3451  unsigned BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
3452  if (BaseReg == 0)
3453  return true;
3454  // Allow re-materialization of lea PICBase + x.
3455  const MachineFunction &MF = *MI.getParent()->getParent();
3456  const MachineRegisterInfo &MRI = MF.getRegInfo();
3457  return regIsPICBase(BaseReg, MRI);
3458  }
3459  return false;
3460  }
3461  }
3462 
3463  // All other instructions marked M_REMATERIALIZABLE are always trivially
3464  // rematerializable.
3465  return true;
3466 }
3467 
3471 
3472  // For compile time consideration, if we are not able to determine the
3473  // safety after visiting 4 instructions in each direction, we will assume
3474  // it's not safe.
3476  for (unsigned i = 0; Iter != E && i < 4; ++i) {
3477  bool SeenDef = false;
3478  for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
3479  MachineOperand &MO = Iter->getOperand(j);
3480  if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
3481  SeenDef = true;
3482  if (!MO.isReg())
3483  continue;
3484  if (MO.getReg() == X86::EFLAGS) {
3485  if (MO.isUse())
3486  return false;
3487  SeenDef = true;
3488  }
3489  }
3490 
3491  if (SeenDef)
3492  // This instruction defines EFLAGS, no need to look any further.
3493  return true;
3494  ++Iter;
3495  // Skip over DBG_VALUE.
3496  while (Iter != E && Iter->isDebugValue())
3497  ++Iter;
3498  }
3499 
3500  // It is safe to clobber EFLAGS at the end of a block of no successor has it
3501  // live in.
3502  if (Iter == E) {
3503  for (MachineBasicBlock *S : MBB.successors())
3504  if (S->isLiveIn(X86::EFLAGS))
3505  return false;
3506  return true;
3507  }
3508 
3510  Iter = I;
3511  for (unsigned i = 0; i < 4; ++i) {
3512  // If we make it to the beginning of the block, it's safe to clobber
3513  // EFLAGS iff EFLAGS is not live-in.
3514  if (Iter == B)
3515  return !MBB.isLiveIn(X86::EFLAGS);
3516 
3517  --Iter;
3518  // Skip over DBG_VALUE.
3519  while (Iter != B && Iter->isDebugValue())
3520  --Iter;
3521 
3522  bool SawKill = false;
3523  for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
3524  MachineOperand &MO = Iter->getOperand(j);
3525  // A register mask may clobber EFLAGS, but we should still look for a
3526  // live EFLAGS def.
3527  if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
3528  SawKill = true;
3529  if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
3530  if (MO.isDef()) return MO.isDead();
3531  if (MO.isKill()) SawKill = true;
3532  }
3533  }
3534 
3535  if (SawKill)
3536  // This instruction kills EFLAGS and doesn't redefine it, so
3537  // there's no need to look further.
3538  return true;
3539  }
3540 
3541  // Conservative answer.
3542  return false;
3543 }
3544 
3547  unsigned DestReg, unsigned SubIdx,
3548  const MachineInstr &Orig,
3549  const TargetRegisterInfo &TRI) const {
3550  bool ClobbersEFLAGS = false;
3551  for (const MachineOperand &MO : Orig.operands()) {
3552  if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) {
3553  ClobbersEFLAGS = true;
3554  break;
3555  }
3556  }
3557 
3558  if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) {
3559  // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
3560  // effects.
3561  int Value;
3562  switch (Orig.getOpcode()) {
3563  case X86::MOV32r0: Value = 0; break;
3564  case X86::MOV32r1: Value = 1; break;
3565  case X86::MOV32r_1: Value = -1; break;
3566  default:
3567  llvm_unreachable("Unexpected instruction!");
3568  }
3569 
3570  const DebugLoc &DL = Orig.getDebugLoc();
3571  BuildMI(MBB, I, DL, get(X86::MOV32ri))
3572  .addOperand(Orig.getOperand(0))
3573  .addImm(Value);
3574  } else {
3575  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
3576  MBB.insert(I, MI);
3577  }
3578 
3579  MachineInstr &NewMI = *std::prev(I);
3580  NewMI.substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
3581 }
3582 
3583 /// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
3585  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
3586  MachineOperand &MO = MI.getOperand(i);
3587  if (MO.isReg() && MO.isDef() &&
3588  MO.getReg() == X86::EFLAGS && !MO.isDead()) {
3589  return true;
3590  }
3591  }
3592  return false;
3593 }
3594 
3595 /// Check whether the shift count for a machine operand is non-zero.
3596 inline static unsigned getTruncatedShiftCount(MachineInstr &MI,
3597  unsigned ShiftAmtOperandIdx) {
3598  // The shift count is six bits with the REX.W prefix and five bits without.
3599  unsigned ShiftCountMask = (MI.getDesc().TSFlags & X86II::REX_W) ? 63 : 31;
3600  unsigned Imm = MI.getOperand(ShiftAmtOperandIdx).getImm();
3601  return Imm & ShiftCountMask;
3602 }
3603 
3604 /// Check whether the given shift count is appropriate
3605 /// can be represented by a LEA instruction.
3606 inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
3607  // Left shift instructions can be transformed into load-effective-address
3608  // instructions if we can encode them appropriately.
3609  // A LEA instruction utilizes a SIB byte to encode its scale factor.
3610  // The SIB.scale field is two bits wide which means that we can encode any
3611  // shift amount less than 4.
3612  return ShAmt < 4 && ShAmt > 0;
3613 }
3614 
3616  unsigned Opc, bool AllowSP, unsigned &NewSrc,
3617  bool &isKill, bool &isUndef,
3618  MachineOperand &ImplicitOp,
3619  LiveVariables *LV) const {
3620  MachineFunction &MF = *MI.getParent()->getParent();
3621  const TargetRegisterClass *RC;
3622  if (AllowSP) {
3623  RC = Opc != X86::LEA32r ? &X86::GR64RegClass : &X86::GR32RegClass;
3624  } else {
3625  RC = Opc != X86::LEA32r ?
3626  &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
3627  }
3628  unsigned SrcReg = Src.getReg();
3629 
3630  // For both LEA64 and LEA32 the register already has essentially the right
3631  // type (32-bit or 64-bit) we may just need to forbid SP.
3632  if (Opc != X86::LEA64_32r) {
3633  NewSrc = SrcReg;
3634  isKill = Src.isKill();
3635  isUndef = Src.isUndef();
3636 
3638  !MF.getRegInfo().constrainRegClass(NewSrc, RC))
3639  return false;
3640 
3641  return true;
3642  }
3643 
3644  // This is for an LEA64_32r and incoming registers are 32-bit. One way or
3645  // another we need to add 64-bit registers to the final MI.
3647  ImplicitOp = Src;
3648  ImplicitOp.setImplicit();
3649 
3650  NewSrc = getX86SubSuperRegister(Src.getReg(), 64);
3651  isKill = Src.isKill();
3652  isUndef = Src.isUndef();
3653  } else {
3654  // Virtual register of the wrong class, we have to create a temporary 64-bit
3655  // vreg to feed into the LEA.
3656  NewSrc = MF.getRegInfo().createVirtualRegister(RC);
3657  MachineInstr *Copy = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
3658  get(TargetOpcode::COPY))
3659  .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
3660  .addOperand(Src);
3661 
3662  // Which is obviously going to be dead after we're done with it.
3663  isKill = true;
3664  isUndef = false;
3665 
3666  if (LV)
3667  LV->replaceKillInstruction(SrcReg, MI, *Copy);
3668  }
3669 
3670  // We've set all the parameters without issue.
3671  return true;
3672 }
3673 
3674 /// Helper for convertToThreeAddress when 16-bit LEA is disabled, use 32-bit
3675 /// LEA to form 3-address code by promoting to a 32-bit superregister and then
3676 /// truncating back down to a 16-bit subregister.
3677 MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
3678  unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI,
3679  LiveVariables *LV) const {
3681  unsigned Dest = MI.getOperand(0).getReg();
3682  unsigned Src = MI.getOperand(1).getReg();
3683  bool isDead = MI.getOperand(0).isDead();
3684  bool isKill = MI.getOperand(1).isKill();
3685 
3686  MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
3687  unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
3688  unsigned Opc, leaInReg;
3689  if (Subtarget.is64Bit()) {
3690  Opc = X86::LEA64_32r;
3691  leaInReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
3692  } else {
3693  Opc = X86::LEA32r;
3694  leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
3695  }
3696 
3697  // Build and insert into an implicit UNDEF value. This is OK because
3698  // well be shifting and then extracting the lower 16-bits.
3699  // This has the potential to cause partial register stall. e.g.
3700  // movw (%rbp,%rcx,2), %dx
3701  // leal -65(%rdx), %esi
3702  // But testing has shown this *does* help performance in 64-bit mode (at
3703  // least on modern x86 machines).
3704  BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
3705  MachineInstr *InsMI =
3706  BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
3707  .addReg(leaInReg, RegState::Define, X86::sub_16bit)
3708  .addReg(Src, getKillRegState(isKill));
3709 
3710  MachineInstrBuilder MIB =
3711  BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opc), leaOutReg);
3712  switch (MIOpc) {
3713  default: llvm_unreachable("Unreachable!");
3714  case X86::SHL16ri: {
3715  unsigned ShAmt = MI.getOperand(2).getImm();
3716  MIB.addReg(0).addImm(1ULL << ShAmt)
3717  .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0);
3718  break;
3719  }
3720  case X86::INC16r:
3721  addRegOffset(MIB, leaInReg, true, 1);
3722  break;
3723  case X86::DEC16r:
3724  addRegOffset(MIB, leaInReg, true, -1);
3725  break;
3726  case X86::ADD16ri:
3727  case X86::ADD16ri8:
3728  case X86::ADD16ri_DB:
3729  case X86::ADD16ri8_DB:
3730  addRegOffset(MIB, leaInReg, true, MI.getOperand(2).getImm());
3731  break;
3732  case X86::ADD16rr:
3733  case X86::ADD16rr_DB: {
3734  unsigned Src2 = MI.getOperand(2).getReg();
3735  bool isKill2 = MI.getOperand(2).isKill();
3736  unsigned leaInReg2 = 0;
3737  MachineInstr *InsMI2 = nullptr;
3738  if (Src == Src2) {
3739  // ADD16rr %reg1028<kill>, %reg1028
3740  // just a single insert_subreg.
3741  addRegReg(MIB, leaInReg, true, leaInReg, false);
3742  } else {
3743  if (Subtarget.is64Bit())
3744  leaInReg2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
3745  else
3746  leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
3747  // Build and insert into an implicit UNDEF value. This is OK because
3748  // well be shifting and then extracting the lower 16-bits.
3749  BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
3750  InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
3751  .addReg(leaInReg2, RegState::Define, X86::sub_16bit)
3752  .addReg(Src2, getKillRegState(isKill2));
3753  addRegReg(MIB, leaInReg, true, leaInReg2, true);
3754  }
3755  if (LV && isKill2 && InsMI2)
3756  LV->replaceKillInstruction(Src2, MI, *InsMI2);
3757  break;
3758  }
3759  }
3760 
3761  MachineInstr *NewMI = MIB;
3762  MachineInstr *ExtMI =
3763  BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
3764  .addReg(Dest, RegState::Define | getDeadRegState(isDead))
3765  .addReg(leaOutReg, RegState::Kill, X86::sub_16bit);
3766 
3767  if (LV) {
3768  // Update live variables
3769  LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
3770  LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
3771  if (isKill)
3772  LV->replaceKillInstruction(Src, MI, *InsMI);
3773  if (isDead)
3774  LV->replaceKillInstruction(Dest, MI, *ExtMI);
3775  }
3776 
3777  return ExtMI;
3778 }
3779 
3780 /// This method must be implemented by targets that
3781 /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
3782 /// may be able to convert a two-address instruction into a true
3783 /// three-address instruction on demand. This allows the X86 target (for
3784 /// example) to convert ADD and SHL instructions into LEA instructions if they
3785 /// would require register copies due to two-addressness.
3786 ///
3787 /// This method returns a null pointer if the transformation cannot be
3788 /// performed, otherwise it returns the new instruction.
3789 ///
3790 MachineInstr *
3792  MachineInstr &MI, LiveVariables *LV) const {
3793  // The following opcodes also sets the condition code register(s). Only
3794  // convert them to equivalent lea if the condition code register def's
3795  // are dead!
3796  if (hasLiveCondCodeDef(MI))
3797  return nullptr;
3798 
3799  MachineFunction &MF = *MI.getParent()->getParent();
3800  // All instructions input are two-addr instructions. Get the known operands.
3801  const MachineOperand &Dest = MI.getOperand(0);
3802  const MachineOperand &Src = MI.getOperand(1);
3803 
3804  MachineInstr *NewMI = nullptr;
3805  // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
3806  // we have better subtarget support, enable the 16-bit LEA generation here.
3807  // 16-bit LEA is also slow on Core2.
3808  bool DisableLEA16 = true;
3809  bool is64Bit = Subtarget.is64Bit();
3810 
3811  unsigned MIOpc = MI.getOpcode();
3812  switch (MIOpc) {
3813  default: return nullptr;
3814  case X86::SHL64ri: {
3815  assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
3816  unsigned ShAmt = getTruncatedShiftCount(MI, 2);
3817  if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
3818 
3819  // LEA can't handle RSP.
3821  !MF.getRegInfo().constrainRegClass(Src.getReg(),
3822  &X86::GR64_NOSPRegClass))
3823  return nullptr;
3824 
3825  NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
3826  .addOperand(Dest)
3827  .addReg(0)
3828  .addImm(1ULL << ShAmt)
3829  .addOperand(Src)
3830  .addImm(0)
3831  .addReg(0);
3832  break;
3833  }
3834  case X86::SHL32ri: {
3835  assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
3836  unsigned ShAmt = getTruncatedShiftCount(MI, 2);
3837  if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
3838 
3839  unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
3840 
3841  // LEA can't handle ESP.
3842  bool isKill, isUndef;
3843  unsigned SrcReg;
3844  MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
3845  if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
3846  SrcReg, isKill, isUndef, ImplicitOp, LV))
3847  return nullptr;
3848 
3849  MachineInstrBuilder MIB =
3850  BuildMI(MF, MI.getDebugLoc(), get(Opc))
3851  .addOperand(Dest)
3852  .addReg(0)
3853  .addImm(1ULL << ShAmt)
3854  .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
3855  .addImm(0)
3856  .addReg(0);
3857  if (ImplicitOp.getReg() != 0)
3858  MIB.addOperand(ImplicitOp);
3859  NewMI = MIB;
3860 
3861  break;
3862  }
3863  case X86::SHL16ri: {
3864  assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
3865  unsigned ShAmt = getTruncatedShiftCount(MI, 2);
3866  if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
3867 
3868  if (DisableLEA16)
3869  return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
3870  : nullptr;
3871  NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r))
3872  .addOperand(Dest)
3873  .addReg(0)
3874  .addImm(1ULL << ShAmt)
3875  .addOperand(Src)
3876  .addImm(0)
3877  .addReg(0);
3878  break;
3879  }
3880  case X86::INC64r:
3881  case X86::INC32r: {
3882  assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
3883  unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
3884  : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
3885  bool isKill, isUndef;
3886  unsigned SrcReg;
3887  MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
3888  if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
3889  SrcReg, isKill, isUndef, ImplicitOp, LV))
3890  return nullptr;
3891 
3892  MachineInstrBuilder MIB =
3893  BuildMI(MF, MI.getDebugLoc(), get(Opc))
3894  .addOperand(Dest)
3895  .addReg(SrcReg,
3896  getKillRegState(isKill) | getUndefRegState(isUndef));
3897  if (ImplicitOp.getReg() != 0)
3898  MIB.addOperand(ImplicitOp);
3899 
3900  NewMI = addOffset(MIB, 1);
3901  break;
3902  }
3903  case X86::INC16r:
3904  if (DisableLEA16)
3905  return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
3906  : nullptr;
3907  assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
3908  NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r))
3909  .addOperand(Dest)
3910  .addOperand(Src),
3911  1);
3912  break;
3913  case X86::DEC64r:
3914  case X86::DEC32r: {
3915  assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
3916  unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
3917  : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
3918 
3919  bool isKill, isUndef;
3920  unsigned SrcReg;
3921  MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
3922  if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
3923  SrcReg, isKill, isUndef, ImplicitOp, LV))
3924  return nullptr;
3925 
3926  MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
3927  .addOperand(Dest)
3928  .addReg(SrcReg, getUndefRegState(isUndef) |
3929  getKillRegState(isKill));
3930  if (ImplicitOp.getReg() != 0)
3931  MIB.addOperand(ImplicitOp);
3932 
3933  NewMI = addOffset(MIB, -1);
3934 
3935  break;
3936  }
3937  case X86::DEC16r:
3938  if (DisableLEA16)
3939  return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
3940  : nullptr;
3941  assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
3942  NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r))
3943  .addOperand(Dest)
3944  .addOperand(Src),
3945  -1);
3946  break;
3947  case X86::ADD64rr:
3948  case X86::ADD64rr_DB:
3949  case X86::ADD32rr:
3950  case X86::ADD32rr_DB: {
3951  assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
3952  unsigned Opc;
3953  if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
3954  Opc = X86::LEA64r;
3955  else
3956  Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
3957 
3958  bool isKill, isUndef;
3959  unsigned SrcReg;
3960  MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
3961  if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
3962  SrcReg, isKill, isUndef, ImplicitOp, LV))
3963  return nullptr;
3964 
3965  const MachineOperand &Src2 = MI.getOperand(2);
3966  bool isKill2, isUndef2;
3967  unsigned SrcReg2;
3968  MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
3969  if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
3970  SrcReg2, isKill2, isUndef2, ImplicitOp2, LV))
3971  return nullptr;
3972 
3973  MachineInstrBuilder MIB =
3974  BuildMI(MF, MI.getDebugLoc(), get(Opc)).addOperand(Dest);
3975  if (ImplicitOp.getReg() != 0)
3976  MIB.addOperand(ImplicitOp);
3977  if (ImplicitOp2.getReg() != 0)
3978  MIB.addOperand(ImplicitOp2);
3979 
3980  NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
3981 
3982  // Preserve undefness of the operands.
3983  NewMI->getOperand(1).setIsUndef(isUndef);
3984  NewMI->getOperand(3).setIsUndef(isUndef2);
3985 
3986  if (LV && Src2.isKill())
3987  LV->replaceKillInstruction(SrcReg2, MI, *NewMI);
3988  break;
3989  }
3990  case X86::ADD16rr:
3991  case X86::ADD16rr_DB: {
3992  if (DisableLEA16)
3993  return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
3994  : nullptr;
3995  assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
3996  unsigned Src2 = MI.getOperand(2).getReg();
3997  bool isKill2 = MI.getOperand(2).isKill();
3998  NewMI = addRegReg(
3999  BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).addOperand(Dest),
4000  Src.getReg(), Src.isKill(), Src2, isKill2);
4001 
4002  // Preserve undefness of the operands.
4003  bool isUndef = MI.getOperand(1).isUndef();
4004  bool isUndef2 = MI.getOperand(2).isUndef();
4005  NewMI->getOperand(1).setIsUndef(isUndef);
4006  NewMI->getOperand(3).setIsUndef(isUndef2);
4007 
4008  if (LV && isKill2)
4009  LV->replaceKillInstruction(Src2, MI, *NewMI);
4010  break;
4011  }
4012  case X86::ADD64ri32:
4013  case X86::ADD64ri8:
4014  case X86::ADD64ri32_DB:
4015  case X86::ADD64ri8_DB:
4016  assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
4017  NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
4018  .addOperand(Dest)
4019  .addOperand(Src),
4020  MI.getOperand(2));
4021  break;
4022  case X86::ADD32ri:
4023  case X86::ADD32ri8:
4024  case X86::ADD32ri_DB:
4025  case X86::ADD32ri8_DB: {
4026  assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
4027  unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
4028 
4029  bool isKill, isUndef;
4030  unsigned SrcReg;
4031  MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
4032  if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
4033  SrcReg, isKill, isUndef, ImplicitOp, LV))
4034  return nullptr;
4035 
4036  MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
4037  .addOperand(Dest)
4038  .addReg(SrcReg, getUndefRegState(isUndef) |
4039  getKillRegState(isKill));
4040  if (ImplicitOp.getReg() != 0)
4041  MIB.addOperand(ImplicitOp);
4042 
4043  NewMI = addOffset(MIB, MI.getOperand(2));
4044  break;
4045  }
4046  case X86::ADD16ri:
4047  case X86::ADD16ri8:
4048  case X86::ADD16ri_DB:
4049  case X86::ADD16ri8_DB:
4050  if (DisableLEA16)
4051  return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
4052  : nullptr;
4053  assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
4054  NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r))
4055  .addOperand(Dest)
4056  .addOperand(Src),
4057  MI.getOperand(2));
4058  break;
4059  }
4060 
4061  if (!NewMI) return nullptr;
4062 
4063  if (LV) { // Update live variables
4064  if (Src.isKill())
4065  LV->replaceKillInstruction(Src.getReg(), MI, *NewMI);
4066  if (Dest.isDead())
4067  LV->replaceKillInstruction(Dest.getReg(), MI, *NewMI);
4068  }
4069 
4070  MFI->insert(MI.getIterator(), NewMI); // Insert the new inst
4071  return NewMI;
4072 }
4073 
4074 /// This determines which of three possible cases of a three source commute
4075 /// the source indexes correspond to taking into account any mask operands.
4076 /// All prevents commuting a passthru operand. Returns -1 if the commute isn't
4077 /// possible.
4078 /// Case 0 - Possible to commute the first and second operands.
4079 /// Case 1 - Possible to commute the first and third operands.
4080 /// Case 2 - Possible to commute the second and third operands.
4081 static int getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1,
4082  unsigned SrcOpIdx2) {
4083  // Put the lowest index to SrcOpIdx1 to simplify the checks below.
4084  if (SrcOpIdx1 > SrcOpIdx2)
4085  std::swap(SrcOpIdx1, SrcOpIdx2);
4086 
4087  unsigned Op1 = 1, Op2 = 2, Op3 = 3;
4088  if (X86II::isKMasked(TSFlags)) {
4089  // The k-mask operand cannot be commuted.
4090  if (SrcOpIdx1 == 2)
4091  return -1;
4092 
4093  // For k-zero-masked operations it is Ok to commute the first vector
4094  // operand.
4095  // For regular k-masked operations a conservative choice is done as the
4096  // elements of the first vector operand, for which the corresponding bit
4097  // in the k-mask operand is set to 0, are copied to the result of the
4098  // instruction.
4099  // TODO/FIXME: The commute still may be legal if it is known that the
4100  // k-mask operand is set to either all ones or all zeroes.
4101  // It is also Ok to commute the 1st operand if all users of MI use only
4102  // the elements enabled by the k-mask operand. For example,
4103  // v4 = VFMADD213PSZrk v1, k, v2, v3; // v1[i] = k[i] ? v2[i]*v1[i]+v3[i]
4104  // : v1[i];
4105  // VMOVAPSZmrk <mem_addr>, k, v4; // this is the ONLY user of v4 ->
4106  // // Ok, to commute v1 in FMADD213PSZrk.
4107  if (X86II::isKMergeMasked(TSFlags) && SrcOpIdx1 == Op1)
4108  return -1;
4109  Op2++;
4110  Op3++;
4111  }
4112 
4113  if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op2)
4114  return 0;
4115  if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op3)
4116  return 1;
4117  if (SrcOpIdx1 == Op2 && SrcOpIdx2 == Op3)
4118  return 2;
4119  return -1;
4120 }
4121 
4123  const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2,
4124  const X86InstrFMA3Group &FMA3Group) const {
4125 
4126  unsigned Opc = MI.getOpcode();
4127 
4128  // Put the lowest index to SrcOpIdx1 to simplify the checks below.
4129  if (SrcOpIdx1 > SrcOpIdx2)
4130  std::swap(SrcOpIdx1, SrcOpIdx2);
4131 
4132  // TODO: Commuting the 1st operand of FMA*_Int requires some additional
4133  // analysis. The commute optimization is legal only if all users of FMA*_Int
4134  // use only the lowest element of the FMA*_Int instruction. Such analysis are
4135  // not implemented yet. So, just return 0 in that case.
4136  // When such analysis are available this place will be the right place for
4137  // calling it.
4138  if (FMA3Group.isIntrinsic() && SrcOpIdx1 == 1)
4139  return 0;
4140 
4141  // Determine which case this commute is or if it can't be done.
4142  int Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, SrcOpIdx2);
4143  if (Case < 0)
4144  return 0;
4145 
4146  // Define the FMA forms mapping array that helps to map input FMA form
4147  // to output FMA form to preserve the operation semantics after
4148  // commuting the operands.
4149  const unsigned Form132Index = 0;
4150  const unsigned Form213Index = 1;
4151  const unsigned Form231Index = 2;
4152  static const unsigned FormMapping[][3] = {
4153  // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2;
4154  // FMA132 A, C, b; ==> FMA231 C, A, b;
4155  // FMA213 B, A, c; ==> FMA213 A, B, c;
4156  // FMA231 C, A, b; ==> FMA132 A, C, b;
4157  { Form231Index, Form213Index, Form132Index },
4158  // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3;
4159  // FMA132 A, c, B; ==> FMA132 B, c, A;
4160  // FMA213 B, a, C; ==> FMA231 C, a, B;
4161  // FMA231 C, a, B; ==> FMA213 B, a, C;
4162  { Form132Index, Form231Index, Form213Index },
4163  // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3;
4164  // FMA132 a, C, B; ==> FMA213 a, B, C;
4165  // FMA213 b, A, C; ==> FMA132 b, C, A;
4166  // FMA231 c, A, B; ==> FMA231 c, B, A;
4167  { Form213Index, Form132Index, Form231Index }
4168  };
4169 
4170  unsigned FMAForms[3];
4171  if (FMA3Group.isRegOpcodeFromGroup(Opc)) {
4172  FMAForms[0] = FMA3Group.getReg132Opcode();
4173  FMAForms[1] = FMA3Group.getReg213Opcode();
4174  FMAForms[2] = FMA3Group.getReg231Opcode();
4175  } else {
4176  FMAForms[0] = FMA3Group.getMem132Opcode();
4177  FMAForms[1] = FMA3Group.getMem213Opcode();
4178  FMAForms[2] = FMA3Group.getMem231Opcode();
4179  }
4180  unsigned FormIndex;
4181  for (FormIndex = 0; FormIndex < 3; FormIndex++)
4182  if (Opc == FMAForms[FormIndex])
4183  break;
4184 
4185  // Everything is ready, just adjust the FMA opcode and return it.
4186  FormIndex = FormMapping[Case][FormIndex];
4187  return FMAForms[FormIndex];
4188 }
4189 
4190 static bool commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1,
4191  unsigned SrcOpIdx2) {
4192  uint64_t TSFlags = MI.getDesc().TSFlags;
4193 
4194  // Determine which case this commute is or if it can't be done.
4195  int Case = getThreeSrcCommuteCase(TSFlags, SrcOpIdx1, SrcOpIdx2);
4196  if (Case < 0)
4197  return false;
4198 
4199  // For each case we need to swap two pairs of bits in the final immediate.
4200  static const uint8_t SwapMasks[3][4] = {
4201  { 0x04, 0x10, 0x08, 0x20 }, // Swap bits 2/4 and 3/5.
4202  { 0x02, 0x10, 0x08, 0x40 }, // Swap bits 1/4 and 3/6.
4203  { 0x02, 0x04, 0x20, 0x40 }, // Swap bits 1/2 and 5/6.
4204  };
4205 
4206  uint8_t Imm = MI.getOperand(MI.getNumOperands()-1).getImm();
4207  // Clear out the bits we are swapping.
4208  uint8_t NewImm = Imm & ~(SwapMasks[Case][0] | SwapMasks[Case][1] |
4209  SwapMasks[Case][2] | SwapMasks[Case][3]);
4210  // If the immediate had a bit of the pair set, then set the opposite bit.
4211  if (Imm & SwapMasks[Case][0]) NewImm |= SwapMasks[Case][1];
4212  if (Imm & SwapMasks[Case][1]) NewImm |= SwapMasks[Case][0];
4213  if (Imm & SwapMasks[Case][2]) NewImm |= SwapMasks[Case][3];
4214  if (Imm & SwapMasks[Case][3]) NewImm |= SwapMasks[Case][2];
4215  MI.getOperand(MI.getNumOperands()-1).setImm(NewImm);
4216 
4217  return true;
4218 }
4219 
4220 // Returns true if this is a VPERMI2 or VPERMT2 instrution that can be
4221 // commuted.
4222 static bool isCommutableVPERMV3Instruction(unsigned Opcode) {
4223 #define VPERM_CASES(Suffix) \
4224  case X86::VPERMI2##Suffix##128rr: case X86::VPERMT2##Suffix##128rr: \
4225  case X86::VPERMI2##Suffix##256rr: case X86::VPERMT2##Suffix##256rr: \
4226  case X86::VPERMI2##Suffix##rr: case X86::VPERMT2##Suffix##rr: \
4227  case X86::VPERMI2##Suffix##128rm: case X86::VPERMT2##Suffix##128rm: \
4228  case X86::VPERMI2##Suffix##256rm: case X86::VPERMT2##Suffix##256rm: \
4229  case X86::VPERMI2##Suffix##rm: case X86::VPERMT2##Suffix##rm: \
4230  case X86::VPERMI2##Suffix##128rrkz: case X86::VPERMT2##Suffix##128rrkz: \
4231  case X86::VPERMI2##Suffix##256rrkz: case X86::VPERMT2##Suffix##256rrkz: \
4232  case X86::VPERMI2##Suffix##rrkz: case X86::VPERMT2##Suffix##rrkz: \
4233  case X86::VPERMI2##Suffix##128rmkz: case X86::VPERMT2##Suffix##128rmkz: \
4234  case X86::VPERMI2##Suffix##256rmkz: case X86::VPERMT2##Suffix##256rmkz: \
4235  case X86::VPERMI2##Suffix##rmkz: case X86::VPERMT2##Suffix##rmkz:
4236 
4237 #define VPERM_CASES_BROADCAST(Suffix) \
4238  VPERM_CASES(Suffix) \
4239  case X86::VPERMI2##Suffix##128rmb: case X86::VPERMT2##Suffix##128rmb: \
4240  case X86::VPERMI2##Suffix##256rmb: case X86::VPERMT2##Suffix##256rmb: \
4241  case X86::VPERMI2##Suffix##rmb: case X86::VPERMT2##Suffix##rmb: \
4242  case X86::VPERMI2##Suffix##128rmbkz: case X86::VPERMT2##Suffix##128rmbkz: \
4243  case X86::VPERMI2##Suffix##256rmbkz: case X86::VPERMT2##Suffix##256rmbkz: \
4244  case X86::VPERMI2##Suffix##rmbkz: case X86::VPERMT2##Suffix##rmbkz:
4245 
4246  switch (Opcode) {
4247  default: return false;
4248  VPERM_CASES(B)
4253  VPERM_CASES(W)
4254  return true;
4255  }
4256 #undef VPERM_CASES_BROADCAST
4257 #undef VPERM_CASES
4258 }
4259 
4260 // Returns commuted opcode for VPERMI2 and VPERMT2 instructions by switching
4261 // from the I opcod to the T opcode and vice versa.
4262 static unsigned getCommutedVPERMV3Opcode(unsigned Opcode) {
4263 #define VPERM_CASES(Orig, New) \
4264  case X86::Orig##128rr: return X86::New##128rr; \
4265  case X86::Orig##128rrkz: return X86::New##128rrkz; \
4266  case X86::Orig##128rm: return X86::New##128rm; \
4267  case X86::Orig##128rmkz: return X86::New##128rmkz; \
4268  case X86::Orig##256rr: return X86::New##256rr; \
4269  case X86::Orig##256rrkz: return X86::New##256rrkz; \
4270  case X86::Orig##256rm: return X86::New##256rm; \
4271  case X86::Orig##256rmkz: return X86::New##256rmkz; \
4272  case X86::Orig##rr: return X86::New##rr; \
4273  case X86::Orig##rrkz: return X86::New##rrkz; \
4274  case X86::Orig##rm: return X86::New##rm; \
4275  case X86::Orig##rmkz: return X86::New##rmkz;
4276 
4277 #define VPERM_CASES_BROADCAST(Orig, New) \
4278  VPERM_CASES(Orig, New) \
4279  case X86::Orig##128rmb: return X86::New##128rmb; \
4280  case X86::Orig##128rmbkz: return X86::New##128rmbkz; \
4281  case X86::Orig##256rmb: return X86::New##256rmb; \
4282  case X86::Orig##256rmbkz: return X86::New##256rmbkz; \
4283  case X86::Orig##rmb: return X86::New##rmb; \
4284  case X86::Orig##rmbkz: return X86::New##rmbkz;
4285 
4286  switch (Opcode) {
4287  VPERM_CASES(VPERMI2B, VPERMT2B)
4288  VPERM_CASES_BROADCAST(VPERMI2D, VPERMT2D)
4289  VPERM_CASES_BROADCAST(VPERMI2PD, VPERMT2PD)
4290  VPERM_CASES_BROADCAST(VPERMI2PS, VPERMT2PS)
4291  VPERM_CASES_BROADCAST(VPERMI2Q, VPERMT2Q)
4292  VPERM_CASES(VPERMI2W, VPERMT2W)
4293  VPERM_CASES(VPERMT2B, VPERMI2B)
4294  VPERM_CASES_BROADCAST(VPERMT2D, VPERMI2D)
4295  VPERM_CASES_BROADCAST(VPERMT2PD, VPERMI2PD)
4296  VPERM_CASES_BROADCAST(VPERMT2PS, VPERMI2PS)
4297  VPERM_CASES_BROADCAST(VPERMT2Q, VPERMI2Q)
4298  VPERM_CASES(VPERMT2W, VPERMI2W)
4299  }
4300 
4301  llvm_unreachable("Unreachable!");
4302 #undef VPERM_CASES_BROADCAST
4303 #undef VPERM_CASES
4304 }
4305 
4307  unsigned OpIdx1,
4308  unsigned OpIdx2) const {
4309  auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
4310  if (NewMI)
4311  return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
4312  return MI;
4313  };
4314 
4315  switch (MI.getOpcode()) {
4316  case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
4317  case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
4318  case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
4319  case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
4320  case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
4321  case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)
4322  unsigned Opc;
4323  unsigned Size;
4324  switch (MI.getOpcode()) {
4325  default: llvm_unreachable("Unreachable!");
4326  case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
4327  case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
4328  case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
4329  case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
4330  case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break;
4331  case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
4332  }
4333  unsigned Amt = MI.getOperand(3).getImm();
4334  auto &WorkingMI = cloneIfNew(MI);
4335  WorkingMI.setDesc(get(Opc));
4336  WorkingMI.getOperand(3).setImm(Size - Amt);
4337  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
4338  OpIdx1, OpIdx2);
4339  }
4340  case X86::BLENDPDrri:
4341  case X86::BLENDPSrri:
4342  case X86::PBLENDWrri:
4343  case X86::VBLENDPDrri:
4344  case X86::VBLENDPSrri:
4345  case X86::VBLENDPDYrri:
4346  case X86::VBLENDPSYrri:
4347  case X86::VPBLENDDrri:
4348  case X86::VPBLENDWrri:
4349  case X86::VPBLENDDYrri:
4350  case X86::VPBLENDWYrri:{
4351  unsigned Mask;
4352  switch (MI.getOpcode()) {
4353  default: llvm_unreachable("Unreachable!");
4354  case X86::BLENDPDrri: Mask = 0x03; break;
4355  case X86::BLENDPSrri: Mask = 0x0F; break;
4356  case X86::PBLENDWrri: Mask = 0xFF; break;
4357  case X86::VBLENDPDrri: Mask = 0x03; break;
4358  case X86::VBLENDPSrri: Mask = 0x0F; break;
4359  case X86::VBLENDPDYrri: Mask = 0x0F; break;
4360  case X86::VBLENDPSYrri: Mask = 0xFF; break;
4361  case X86::VPBLENDDrri: Mask = 0x0F; break;
4362  case X86::VPBLENDWrri: Mask = 0xFF; break;
4363  case X86::VPBLENDDYrri: Mask = 0xFF; break;
4364  case X86::VPBLENDWYrri: Mask = 0xFF; break;
4365  }
4366  // Only the least significant bits of Imm are used.
4367  unsigned Imm = MI.getOperand(3).getImm() & Mask;
4368  auto &WorkingMI = cloneIfNew(MI);
4369  WorkingMI.getOperand(3).setImm(Mask ^ Imm);
4370  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
4371  OpIdx1, OpIdx2);
4372  }
4373  case X86::MOVSDrr:
4374  case X86::MOVSSrr:
4375  case X86::VMOVSDrr:
4376  case X86::VMOVSSrr:{
4377  // On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
4378  if (!Subtarget.hasSSE41())
4379  return nullptr;
4380 
4381  unsigned Mask, Opc;
4382  switch (MI.getOpcode()) {
4383  default: llvm_unreachable("Unreachable!");
4384  case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break;
4385  case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break;
4386  case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
4387  case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
4388  }
4389 
4390  // MOVSD/MOVSS's 2nd operand is a FR64/FR32 reg class - we need to copy
4391  // this over to a VR128 class like the 1st operand to use a BLENDPD/BLENDPS.
4392  auto &MRI = MI.getParent()->getParent()->getRegInfo();
4393  auto VR128RC = MRI.getRegClass(MI.getOperand(1).getReg());
4394  unsigned VR128 = MRI.createVirtualRegister(VR128RC);
4395  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY),
4396  VR128)
4397  .addReg(MI.getOperand(2).getReg());
4398 
4399  auto &WorkingMI = cloneIfNew(MI);
4400  WorkingMI.setDesc(get(Opc));
4401  WorkingMI.getOperand(2).setReg(VR128);
4402  WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
4403  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
4404  OpIdx1, OpIdx2);
4405  }
4406  case X86::PCLMULQDQrr:
4407  case X86::VPCLMULQDQrr:{
4408  // SRC1 64bits = Imm[0] ? SRC1[127:64] : SRC1[63:0]
4409  // SRC2 64bits = Imm[4] ? SRC2[127:64] : SRC2[63:0]
4410  unsigned Imm = MI.getOperand(3).getImm();
4411  unsigned Src1Hi = Imm & 0x01;
4412  unsigned Src2Hi = Imm & 0x10;
4413  auto &WorkingMI = cloneIfNew(MI);
4414  WorkingMI.getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4));
4415  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
4416  OpIdx1, OpIdx2);
4417  }
4418  case X86::CMPSDrr:
4419  case X86::CMPSSrr:
4420  case X86::CMPPDrri:
4421  case X86::CMPPSrri:
4422  case X86::VCMPSDrr:
4423  case X86::VCMPSSrr:
4424  case X86::VCMPPDrri:
4425  case X86::VCMPPSrri:
4426  case X86::VCMPPDYrri:
4427  case X86::VCMPPSYrri:
4428  case X86::VCMPSDZrr:
4429  case X86::VCMPSSZrr:
4430  case X86::VCMPPDZrri:
4431  case X86::VCMPPSZrri:
4432  case X86::VCMPPDZ128rri:
4433  case X86::VCMPPSZ128rri:
4434  case X86::VCMPPDZ256rri:
4435  case X86::VCMPPSZ256rri: {
4436  // Float comparison can be safely commuted for
4437  // Ordered/Unordered/Equal/NotEqual tests
4438  unsigned Imm = MI.getOperand(3).getImm() & 0x7;
4439  switch (Imm) {
4440  case 0x00: // EQUAL
4441  case 0x03: // UNORDERED
4442  case 0x04: // NOT EQUAL
4443  case 0x07: // ORDERED
4444  return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
4445  default:
4446  return nullptr;
4447  }
4448  }
4449  case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri:
4450  case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri:
4451  case X86::VPCMPBZrri: case X86::VPCMPUBZrri:
4452  case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri:
4453  case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri:
4454  case X86::VPCMPDZrri: case X86::VPCMPUDZrri:
4455  case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri:
4456  case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri:
4457  case X86::VPCMPQZrri: case X86::VPCMPUQZrri:
4458  case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri:
4459  case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri:
4460  case X86::VPCMPWZrri: case X86::VPCMPUWZrri: {
4461  // Flip comparison mode immediate (if necessary).
4462  unsigned Imm = MI.getOperand(3).getImm() & 0x7;
4463  switch (Imm) {
4464  default: llvm_unreachable("Unreachable!");
4465  case 0x01: Imm = 0x06; break; // LT -> NLE
4466  case 0x02: Imm = 0x05; break; // LE -> NLT
4467  case 0x05: Imm = 0x02; break; // NLT -> LE
4468  case 0x06: Imm = 0x01; break; // NLE -> LT
4469  case 0x00: // EQ
4470  case 0x03: // FALSE
4471  case 0x04: // NE
4472  case 0x07: // TRUE
4473  break;
4474  }
4475  auto &WorkingMI = cloneIfNew(MI);
4476  WorkingMI.getOperand(3).setImm(Imm);
4477  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
4478  OpIdx1, OpIdx2);
4479  }
4480  case X86::VPCOMBri: case X86::VPCOMUBri:
4481  case X86::VPCOMDri: case X86::VPCOMUDri:
4482  case X86::VPCOMQri: case X86::VPCOMUQri:
4483  case X86::VPCOMWri: case X86::VPCOMUWri: {
4484  // Flip comparison mode immediate (if necessary).
4485  unsigned Imm = MI.getOperand(3).getImm() & 0x7;
4486  switch (Imm) {
4487  default: llvm_unreachable("Unreachable!");
4488  case 0x00: Imm = 0x02; break; // LT -> GT
4489  case 0x01: Imm = 0x03; break; // LE -> GE
4490  case 0x02: Imm = 0x00; break; // GT -> LT
4491  case 0x03: Imm = 0x01; break; // GE -> LE
4492  case 0x04: // EQ
4493  case 0x05: // NE
4494  case 0x06: // FALSE
4495  case 0x07: // TRUE
4496  break;
4497  }
4498  auto &WorkingMI = cloneIfNew(MI);
4499  WorkingMI.getOperand(3).setImm(Imm);
4500  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
4501  OpIdx1, OpIdx2);
4502  }
4503  case X86::VPERM2F128rr:
4504  case X86::VPERM2I128rr: {
4505  // Flip permute source immediate.
4506  // Imm & 0x02: lo = if set, select Op1.lo/hi else Op0.lo/hi.
4507  // Imm & 0x20: hi = if set, select Op1.lo/hi else Op0.lo/hi.
4508  unsigned Imm = MI.getOperand(3).getImm() & 0xFF;
4509  auto &WorkingMI = cloneIfNew(MI);
4510  WorkingMI.getOperand(3).setImm(Imm ^ 0x22);
4511  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
4512  OpIdx1, OpIdx2);
4513  }
4514  case X86::MOVHLPSrr:
4515  case X86::UNPCKHPDrr: {
4516  if (!Subtarget.hasSSE2())
4517  return nullptr;
4518 
4519  unsigned Opc = MI.getOpcode();
4520  switch (Opc) {
4521  default: llvm_unreachable("Unreachable!");
4522  case X86::MOVHLPSrr: Opc = X86::UNPCKHPDrr; break;
4523  case X86::UNPCKHPDrr: Opc = X86::MOVHLPSrr; break;
4524  }
4525  auto &WorkingMI = cloneIfNew(MI);
4526  WorkingMI.setDesc(get(Opc));
4527  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
4528  OpIdx1, OpIdx2);
4529  }
4530  case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
4531  case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
4532  case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
4533  case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
4534  case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr:
4535  case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr:
4536  case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
4537  case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
4538  case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
4539  case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
4540  case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
4541  case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
4542  case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
4543  case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
4544  case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
4545  case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: {
4546  unsigned Opc;
4547  switch (MI.getOpcode()) {
4548  default: llvm_unreachable("Unreachable!");
4549  case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break;
4550  case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break;
4551  case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break;
4552  case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break;
4553  case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break;
4554  case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break;
4555  case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break;
4556  case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break;
4557  case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break;
4558  case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break;
4559  case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break;
4560  case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break;
4561  case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break;
4562  case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break;
4563  case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break;
4564  case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break;
4565  case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break;
4566  case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break;
4567  case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break;
4568  case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break;
4569  case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break;
4570  case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break;
4571  case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break;
4572  case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break;
4573  case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break;
4574  case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break;
4575  case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break;
4576  case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break;
4577  case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break;
4578  case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break;
4579  case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break;
4580  case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break;
4581  case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break;
4582  case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break;
4583  case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break;
4584  case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break;
4585  case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break;
4586  case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break;
4587  case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break;
4588  case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break;
4589  case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break;
4590  case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
4591  case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break;
4592  case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break;
4593  case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break;
4594  case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break;
4595  case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break;
4596  case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break;
4597  }
4598  auto &WorkingMI = cloneIfNew(MI);
4599  WorkingMI.setDesc(get(Opc));
4600  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
4601  OpIdx1, OpIdx2);
4602  }
4603  case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi:
4604  case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi:
4605  case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi:
4606  case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
4607  case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
4608  case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
4609  case X86::VPTERNLOGDZrrik: case X86::VPTERNLOGDZrmik:
4610  case X86::VPTERNLOGDZ128rrik: case X86::VPTERNLOGDZ128rmik:
4611  case X86::VPTERNLOGDZ256rrik: case X86::VPTERNLOGDZ256rmik:
4612  case X86::VPTERNLOGQZrrik: case X86::VPTERNLOGQZrmik:
4613  case X86::VPTERNLOGQZ128rrik: case X86::VPTERNLOGQZ128rmik:
4614  case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGQZ256rmik:
4615  case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
4616  case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
4617  case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
4618  case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
4619  case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
4620  case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: {
4621  auto &WorkingMI = cloneIfNew(MI);
4622  if (!commuteVPTERNLOG(WorkingMI, OpIdx1, OpIdx2))
4623  return nullptr;
4624  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
4625  OpIdx1, OpIdx2);
4626  }
4627  default: {
4629  unsigned Opc = getCommutedVPERMV3Opcode(MI.getOpcode());
4630  auto &WorkingMI = cloneIfNew(MI);
4631  WorkingMI.setDesc(get(Opc));
4632  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
4633  OpIdx1, OpIdx2);
4634  }
4635 
4636  const X86InstrFMA3Group *FMA3Group =
4638  if (FMA3Group) {
4639  unsigned Opc =
4640  getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group);
4641  if (Opc == 0)
4642  return nullptr;
4643  auto &WorkingMI = cloneIfNew(MI);
4644  WorkingMI.setDesc(get(Opc));
4645  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
4646  OpIdx1, OpIdx2);
4647  }
4648 
4649  return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
4650  }
4651  }
4652 }
4653 
4655  const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2,
4656  const X86InstrFMA3Group &FMA3Group) const {
4657 
4658  if (!findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2))
4659  return false;
4660 
4661  // Check if we can adjust the opcode to preserve the semantics when
4662  // commute the register operands.
4663  return getFMA3OpcodeToCommuteOperands(MI, SrcOpIdx1, SrcOpIdx2, FMA3Group) != 0;
4664 }
4665 
4666 bool X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
4667  unsigned &SrcOpIdx1,
4668  unsigned &SrcOpIdx2) const {
4669  uint64_t TSFlags = MI.getDesc().TSFlags;
4670 
4671  unsigned FirstCommutableVecOp = 1;
4672  unsigned LastCommutableVecOp = 3;
4673  unsigned KMaskOp = 0;
4674  if (X86II::isKMasked(TSFlags)) {
4675  // The k-mask operand has index = 2 for masked and zero-masked operations.
4676  KMaskOp = 2;
4677 
4678  // The operand with index = 1 is used as a source for those elements for
4679  // which the corresponding bit in the k-mask is set to 0.
4680  if (X86II::isKMergeMasked(TSFlags))
4681  FirstCommutableVecOp = 3;
4682 
4683  LastCommutableVecOp++;
4684  }
4685 
4686  if (isMem(MI, LastCommutableVecOp))
4687  LastCommutableVecOp--;
4688 
4689  // Only the first RegOpsNum operands are commutable.
4690  // Also, the value 'CommuteAnyOperandIndex' is valid here as it means
4691  // that the operand is not specified/fixed.
4692  if (SrcOpIdx1 != CommuteAnyOperandIndex &&
4693  (SrcOpIdx1 < FirstCommutableVecOp || SrcOpIdx1 > LastCommutableVecOp ||
4694  SrcOpIdx1 == KMaskOp))
4695  return false;
4696  if (SrcOpIdx2 != CommuteAnyOperandIndex &&
4697  (SrcOpIdx2 < FirstCommutableVecOp || SrcOpIdx2 > LastCommutableVecOp ||
4698  SrcOpIdx2 == KMaskOp))
4699  return false;
4700 
4701  // Look for two different register operands assumed to be commutable
4702  // regardless of the FMA opcode. The FMA opcode is adjusted later.
4703  if (SrcOpIdx1 == CommuteAnyOperandIndex ||
4704  SrcOpIdx2 == CommuteAnyOperandIndex) {
4705  unsigned CommutableOpIdx1 = SrcOpIdx1;
4706  unsigned CommutableOpIdx2 = SrcOpIdx2;
4707 
4708  // At least one of operands to be commuted is not specified and
4709  // this method is free to choose appropriate commutable operands.
4710  if (SrcOpIdx1 == SrcOpIdx2)
4711  // Both of operands are not fixed. By default set one of commutable
4712  // operands to the last register operand of the instruction.
4713  CommutableOpIdx2 = LastCommutableVecOp;
4714  else if (SrcOpIdx2 == CommuteAnyOperandIndex)
4715  // Only one of operands is not fixed.
4716  CommutableOpIdx2 = SrcOpIdx1;
4717 
4718  // CommutableOpIdx2 is well defined now. Let's choose another commutable
4719  // operand and assign its index to CommutableOpIdx1.
4720  unsigned Op2Reg = MI.getOperand(CommutableOpIdx2).getReg();
4721  for (CommutableOpIdx1 = LastCommutableVecOp;
4722  CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) {
4723  // Just ignore and skip the k-mask operand.
4724  if (CommutableOpIdx1 == KMaskOp)
4725  continue;
4726 
4727  // The commuted operands must have different registers.
4728  // Otherwise, the commute transformation does not change anything and
4729  // is useless then.
4730  if (Op2Reg != MI.getOperand(CommutableOpIdx1).getReg())
4731  break;
4732  }
4733 
4734  // No appropriate commutable operands were found.
4735  if (CommutableOpIdx1 < FirstCommutableVecOp)
4736  return false;
4737 
4738  // Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2
4739  // to return those values.
4740  if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
4741  CommutableOpIdx1, CommutableOpIdx2))
4742  return false;
4743  }
4744 
4745  return true;
4746 }
4747 
4749  unsigned &SrcOpIdx2) const {
4750  const MCInstrDesc &Desc = MI.getDesc();
4751  if (!Desc.isCommutable())
4752  return false;
4753 
4754  switch (MI.getOpcode()) {
4755  case X86::CMPSDrr:
4756  case X86::CMPSSrr:
4757  case X86::CMPPDrri:
4758  case X86::CMPPSrri:
4759  case X86::VCMPSDrr:
4760  case X86::VCMPSSrr:
4761  case X86::VCMPPDrri:
4762  case X86::VCMPPSrri:
4763  case X86::VCMPPDYrri:
4764  case X86::VCMPPSYrri:
4765  case X86::VCMPSDZrr:
4766  case X86::VCMPSSZrr:
4767  case X86::VCMPPDZrri:
4768  case X86::VCMPPSZrri:
4769  case X86::VCMPPDZ128rri:
4770  case X86::VCMPPSZ128rri:
4771  case X86::VCMPPDZ256rri:
4772  case X86::VCMPPSZ256rri: {
4773  // Float comparison can be safely commuted for
4774  // Ordered/Unordered/Equal/NotEqual tests
4775  unsigned Imm = MI.getOperand(3).getImm() & 0x7;
4776  switch (Imm) {
4777  case 0x00: // EQUAL
4778  case 0x03: // UNORDERED
4779  case 0x04: // NOT EQUAL
4780  case 0x07: // ORDERED
4781  // The indices of the commutable operands are 1 and 2.
4782  // Assign them to the returned operand indices here.
4783  return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
4784  }
4785  return false;
4786  }
4787  case X86::MOVSDrr:
4788  case X86::MOVSSrr:
4789  case X86::VMOVSDrr:
4790  case X86::VMOVSSrr: {
4791  if (Subtarget.hasSSE41())
4792  return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
4793  return false;
4794  }
4795  case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi:
4796  case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi:
4797  case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi:
4798  case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
4799  case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
4800  case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
4801  case X86::VPTERNLOGDZrrik: case X86::VPTERNLOGDZrmik:
4802  case X86::VPTERNLOGDZ128rrik: case X86::VPTERNLOGDZ128rmik:
4803  case X86::VPTERNLOGDZ256rrik: case X86::VPTERNLOGDZ256rmik:
4804  case X86::VPTERNLOGQZrrik: case X86::VPTERNLOGQZrmik:
4805  case X86::VPTERNLOGQZ128rrik: case X86::VPTERNLOGQZ128rmik:
4806  case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGQZ256rmik:
4807  case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
4808  case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
4809  case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
4810  case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
4811  case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
4812  case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
4813  return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
4814  default:
4815  const X86InstrFMA3Group *FMA3Group =
4817  if (FMA3Group)
4818  return findFMA3CommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2, *FMA3Group);
4819 
4820  // Handled masked instructions since we need to skip over the mask input
4821  // and the preserved input.
4822  if (Desc.TSFlags & X86II::EVEX_K) {
4823  // First assume that the first input is the mask operand and skip past it.
4824  unsigned CommutableOpIdx1 = Desc.getNumDefs() + 1;
4825  unsigned CommutableOpIdx2 = Desc.getNumDefs() + 2;
4826  // Check if the first input is tied. If there isn't one then we only
4827  // need to skip the mask operand which we did above.
4828  if ((MI.getDesc().getOperandConstraint(Desc.getNumDefs(),
4829  MCOI::TIED_TO) != -1)) {
4830  // If this is zero masking instruction with a tied operand, we need to
4831  // move the first index back to the first input since this must
4832  // be a 3 input instruction and we want the first two non-mask inputs.
4833  // Otherwise this is a 2 input instruction with a preserved input and
4834  // mask, so we need to move the indices to skip one more input.
4835  if (Desc.TSFlags & X86II::EVEX_Z)
4836  --CommutableOpIdx1;
4837  else {
4838  ++CommutableOpIdx1;
4839  ++CommutableOpIdx2;
4840  }
4841  }
4842 
4843  if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
4844  CommutableOpIdx1, CommutableOpIdx2))
4845  return false;
4846 
4847  if (!MI.getOperand(SrcOpIdx1).isReg() ||
4848  !MI.getOperand(SrcOpIdx2).isReg())
4849  // No idea.
4850  return false;
4851  return true;
4852  }
4853 
4854  return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
4855  }
4856  return false;
4857 }
4858 
4859 static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) {
4860  switch (BrOpc) {
4861  default: return X86::COND_INVALID;
4862  case X86::JE_1: return X86::COND_E;
4863  case X86::JNE_1: return X86::COND_NE;
4864  case X86::JL_1: return X86::COND_L;
4865  case X86::JLE_1: return X86::COND_LE;
4866  case X86::JG_1: return X86::COND_G;
4867  case X86::JGE_1: return X86::COND_GE;
4868  case X86::JB_1: return X86::COND_B;
4869  case X86::JBE_1: return X86::COND_BE;
4870  case X86::JA_1: return X86::COND_A;
4871  case X86::JAE_1: return X86::COND_AE;
4872  case X86::JS_1: return X86::COND_S;
4873  case X86::JNS_1: return X86::COND_NS;
4874  case X86::JP_1: return X86::COND_P;
4875  case X86::JNP_1: return X86::COND_NP;
4876  case X86::JO_1: return X86::COND_O;
4877  case X86::JNO_1: return X86::COND_NO;
4878  }
4879 }
4880 
4881 /// Return condition code of a SET opcode.
4882 static X86::CondCode getCondFromSETOpc(unsigned Opc) {
4883  switch (Opc) {
4884  default: return X86::COND_INVALID;
4885  case X86::SETAr: case X86::SETAm: return X86::COND_A;
4886  case X86::SETAEr: case X86::SETAEm: return X86::COND_AE;
4887  case X86::SETBr: case X86::SETBm: return X86::COND_B;
4888  case X86::SETBEr: case X86::SETBEm: return X86::COND_BE;
4889  case X86::SETEr: case X86::SETEm: return X86::COND_E;
4890  case X86::SETGr: case X86::SETGm: return X86::COND_G;
4891  case X86::SETGEr: case X86::SETGEm: return X86::COND_GE;
4892  case X86::SETLr: case X86::SETLm: return X86::COND_L;
4893  case X86::SETLEr: case X86::SETLEm: return X86::COND_LE;
4894  case X86::SETNEr: case X86::SETNEm: return X86::COND_NE;
4895  case X86::SETNOr: case X86::SETNOm: return X86::COND_NO;
4896  case X86::SETNPr: case X86::SETNPm: return X86::COND_NP;
4897  case X86::SETNSr: case X86::SETNSm: return X86::COND_NS;
4898  case X86::SETOr: case X86::SETOm: return X86::COND_O;
4899  case X86::SETPr: case X86::SETPm: return X86::COND_P;
4900  case X86::SETSr: case X86::SETSm: return X86::COND_S;
4901  }
4902 }
4903 
4904 /// Return condition code of a CMov opcode.
4906  switch (Opc) {
4907  default: return X86::COND_INVALID;
4908  case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm:
4909  case X86::CMOVA32rr: case X86::CMOVA64rm: case X86::CMOVA64rr:
4910  return X86::COND_A;
4911  case X86::CMOVAE16rm: case X86::CMOVAE16rr: case X86::CMOVAE32rm:
4912  case X86::CMOVAE32rr: case X86::CMOVAE64rm: case X86::CMOVAE64rr:
4913  return X86::COND_AE;
4914  case X86::CMOVB16rm: case X86::CMOVB16rr: case X86::CMOVB32rm:
4915  case X86::CMOVB32rr: case X86::CMOVB64rm: case X86::CMOVB64rr:
4916  return X86::COND_B;
4917  case X86::CMOVBE16rm: case X86::CMOVBE16rr: case X86::CMOVBE32rm:
4918  case X86::CMOVBE32rr: case X86::CMOVBE64rm: case X86::CMOVBE64rr:
4919  return X86::COND_BE;
4920  case X86::CMOVE16rm: case X86::CMOVE16rr: case X86::CMOVE32rm:
4921  case X86::CMOVE32rr: case X86::CMOVE64rm: case X86::CMOVE64rr:
4922  return X86::COND_E;
4923  case X86::CMOVG16rm: case X86::CMOVG16rr: case X86::CMOVG32rm:
4924  case X86::CMOVG32rr: case X86::CMOVG64rm: case X86::CMOVG64rr:
4925  return X86::COND_G;
4926  case X86::CMOVGE16rm: case X86::CMOVGE16rr: case X86::CMOVGE32rm:
4927  case X86::CMOVGE32rr: case X86::CMOVGE64rm: case X86::CMOVGE64rr:
4928  return X86::COND_GE;
4929  case X86::CMOVL16rm: case X86::CMOVL16rr: case X86::CMOVL32rm:
4930  case X86::CMOVL32rr: case X86::CMOVL64rm: case X86::CMOVL64rr:
4931  return X86::COND_L;
4932  case X86::CMOVLE16rm: case X86::CMOVLE16rr: case X86::CMOVLE32rm:
4933  case X86::CMOVLE32rr: case X86::CMOVLE64rm: case X86::CMOVLE64rr:
4934  return X86::COND_LE;
4935  case X86::CMOVNE16rm: case X86::CMOVNE16rr: case X86::CMOVNE32rm:
4936  case X86::CMOVNE32rr: case X86::CMOVNE64rm: case X86::CMOVNE64rr:
4937  return X86::COND_NE;
4938  case X86::CMOVNO16rm: case X86::CMOVNO16rr: case X86::CMOVNO32rm:
4939  case X86::CMOVNO32rr: case X86::CMOVNO64rm: case X86::CMOVNO64rr:
4940  return X86::COND_NO;
4941  case X86::CMOVNP16rm: case X86::CMOVNP16rr: case X86::CMOVNP32rm:
4942  case X86::CMOVNP32rr: case X86::CMOVNP64rm: case X86::CMOVNP64rr:
4943  return X86::COND_NP;
4944  case X86::CMOVNS16rm: case X86::CMOVNS16rr: case X86::CMOVNS32rm:
4945  case X86::CMOVNS32rr: case X86::CMOVNS64rm: case X86::CMOVNS64rr:
4946  return X86::COND_NS;
4947  case X86::CMOVO16rm: case X86::CMOVO16rr: case X86::CMOVO32rm:
4948  case X86::CMOVO32rr: case X86::CMOVO64rm: case X86::CMOVO64rr:
4949  return X86::COND_O;
4950  case X86::CMOVP16rm: case X86::CMOVP16rr: case X86::CMOVP32rm:
4951  case X86::CMOVP32rr: case X86::CMOVP64rm: case X86::CMOVP64rr:
4952  return X86::COND_P;
4953  case X86::CMOVS16rm: case X86::CMOVS16rr: case X86::CMOVS32rm:
4954  case X86::CMOVS32rr: case X86::CMOVS64rm: case X86::CMOVS64rr:
4955  return X86::COND_S;
4956  }
4957 }
4958 
4960  switch (CC) {
4961  default: llvm_unreachable("Illegal condition code!");
4962  case X86::COND_E: return X86::JE_1;
4963  case X86::COND_NE: return X86::JNE_1;
4964  case X86::COND_L: return X86::JL_1;
4965  case X86::COND_LE: return X86::JLE_1;
4966  case X86::COND_G: return X86::JG_1;
4967  case X86::COND_GE: return X86::JGE_1;
4968  case X86::COND_B: return X86::JB_1;
4969  case X86::COND_BE: return X86::JBE_1;
4970  case X86::COND_A: return X86::JA_1;
4971  case X86::COND_AE: return X86::JAE_1;
4972  case X86::COND_S: return X86::JS_1;
4973  case X86::COND_NS: return X86::JNS_1;
4974  case X86::COND_P: return X86::JP_1;
4975  case X86::COND_NP: return X86::JNP_1;
4976  case X86::COND_O: return X86::JO_1;
4977  case X86::COND_NO: return X86::JNO_1;
4978  }
4979 }
4980 
4981 /// Return the inverse of the specified condition,
4982 /// e.g. turning COND_E to COND_NE.
4984  switch (CC) {
4985  default: llvm_unreachable("Illegal condition code!");
4986  case X86::COND_E: return X86::COND_NE;
4987  case X86::COND_NE: return X86::COND_E;
4988  case X86::COND_L: return X86::COND_GE;
4989  case X86::COND_LE: return X86::COND_G;
4990  case X86::COND_G: return X86::COND_LE;
4991  case X86::COND_GE: return X86::COND_L;
4992  case X86::COND_B: return X86::COND_AE;
4993  case X86::COND_BE: return X86::COND_A;
4994  case X86::COND_A: return X86::COND_BE;
4995  case X86::COND_AE: return X86::COND_B;
4996  case X86::COND_S: return X86::COND_NS;
4997  case X86::COND_NS: return X86::COND_S;
4998  case X86::COND_P: return X86::COND_NP;
4999  case X86::COND_NP: return X86::COND_P;
5000  case X86::COND_O: return X86::COND_NO;
5001  case X86::COND_NO: return X86::COND_O;
5004  }
5005 }
5006 
5007 /// Assuming the flags are set by MI(a,b), return the condition code if we
5008 /// modify the instructions such that flags are set by MI(b,a).
5010  switch (CC) {
5011  default: return X86::COND_INVALID;
5012  case X86::COND_E: return X86::COND_E;
5013  case X86::COND_NE: return X86::COND_NE;
5014  case X86::COND_L: return X86::COND_G;
5015  case X86::COND_LE: return X86::COND_GE;
5016  case X86::COND_G: return X86::COND_L;
5017  case X86::COND_GE: return X86::COND_LE;
5018  case X86::COND_B: return X86::COND_A;
5019  case X86::COND_BE: return X86::COND_AE;
5020  case X86::COND_A: return X86::COND_B;
5021  case X86::COND_AE: return X86::COND_BE;
5022  }
5023 }
5024 
5025 /// Return a set opcode for the given condition and
5026 /// whether it has memory operand.
5027 unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) {
5028  static const uint16_t Opc[16][2] = {
5029  { X86::SETAr, X86::SETAm },
5030  { X86::SETAEr, X86::SETAEm },
5031  { X86::SETBr, X86::SETBm },
5032  { X86::SETBEr, X86::SETBEm },
5033  { X86::SETEr, X86::SETEm },
5034  { X86::SETGr, X86::SETGm },
5035  { X86::SETGEr, X86::SETGEm },
5036  { X86::SETLr, X86::SETLm },
5037  { X86::SETLEr, X86::SETLEm },
5038  { X86::SETNEr, X86::SETNEm },
5039  { X86::SETNOr, X86::SETNOm },
5040  { X86::SETNPr, X86::SETNPm },
5041  { X86::SETNSr, X86::SETNSm },
5042  { X86::SETOr, X86::SETOm },
5043  { X86::SETPr, X86::SETPm },
5044  { X86::SETSr, X86::SETSm }
5045  };
5046 
5047  assert(CC <= LAST_VALID_COND && "Can only handle standard cond codes");
5048  return Opc[CC][HasMemoryOperand ? 1 : 0];
5049 }
5050 
5051 /// Return a cmov opcode for the given condition,
5052 /// register size in bytes, and operand type.
5053 unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes,
5054  bool HasMemoryOperand) {
5055  static const uint16_t Opc[32][3] = {
5056  { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
5057  { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
5058  { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr },
5059  { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr },
5060  { X86::CMOVE16rr, X86::CMOVE32rr, X86::CMOVE64rr },
5061  { X86::CMOVG16rr, X86::CMOVG32rr, X86::CMOVG64rr },
5062  { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr },
5063  { X86::CMOVL16rr, X86::CMOVL32rr, X86::CMOVL64rr },
5064  { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr },
5065  { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr },
5066  { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr },
5067  { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr },
5068  { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr },
5069  { X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr },
5070  { X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr },
5071  { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr },
5072  { X86::CMOVA16rm, X86::CMOVA32rm, X86::CMOVA64rm },
5073  { X86::CMOVAE16rm, X86::CMOVAE32rm, X86::CMOVAE64rm },
5074  { X86::CMOVB16rm, X86::CMOVB32rm, X86::CMOVB64rm },
5075  { X86::CMOVBE16rm, X86::CMOVBE32rm, X86::CMOVBE64rm },
5076  { X86::CMOVE16rm, X86::CMOVE32rm, X86::CMOVE64rm },
5077  { X86::CMOVG16rm, X86::CMOVG32rm, X86::CMOVG64rm },
5078  { X86::CMOVGE16rm, X86::CMOVGE32rm, X86::CMOVGE64rm },
5079  { X86::CMOVL16rm, X86::CMOVL32rm, X86::CMOVL64rm },
5080  { X86::CMOVLE16rm, X86::CMOVLE32rm, X86::CMOVLE64rm },
5081  { X86::CMOVNE16rm, X86::CMOVNE32rm, X86::CMOVNE64rm },
5082  { X86::CMOVNO16rm, X86::CMOVNO32rm, X86::CMOVNO64rm },
5083  { X86::CMOVNP16rm, X86::CMOVNP32rm, X86::CMOVNP64rm },
5084  { X86::CMOVNS16rm, X86::CMOVNS32rm, X86::CMOVNS64rm },
5085  { X86::CMOVO16rm, X86::CMOVO32rm, X86::CMOVO64rm },
5086  { X86::CMOVP16rm, X86::CMOVP32rm, X86::CMOVP64rm },
5087  { X86::CMOVS16rm, X86::CMOVS32rm, X86::CMOVS64rm }
5088  };
5089 
5090  assert(CC < 16 && "Can only handle standard cond codes");
5091  unsigned Idx = HasMemoryOperand ? 16+CC : CC;
5092  switch(RegBytes) {
5093  default: llvm_unreachable("Illegal register size!");
5094  case 2: return Opc[Idx][0];
5095  case 4: return Opc[Idx][1];
5096  case 8: return Opc[Idx][2];
5097  }
5098 }
5099 
5101  if (!MI.isTerminator()) return false;
5102 
5103  // Conditional branch is a special case.
5104  if (MI.isBranch() && !MI.isBarrier())
5105  return true;
5106  if (!MI.isPredicable())
5107  return true;
5108  return !isPredicated(MI);
5109 }
5110 
5111 // Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may
5112 // not be a fallthrough MBB now due to layout changes). Return nullptr if the
5113 // fallthrough MBB cannot be identified.
5115  MachineBasicBlock *TBB) {
5116  // Look for non-EHPad successors other than TBB. If we find exactly one, it
5117  // is the fallthrough MBB. If we find zero, then TBB is both the target MBB
5118  // and fallthrough MBB. If we find more than one, we cannot identify the
5119  // fallthrough MBB and should return nullptr.
5120  MachineBasicBlock *FallthroughBB = nullptr;
5121  for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) {
5122  if ((*SI)->isEHPad() || (*SI == TBB && FallthroughBB))
5123  continue;
5124  // Return a nullptr if we found more than one fallthrough successor.
5125  if (FallthroughBB && FallthroughBB != TBB)
5126  return nullptr;
5127  FallthroughBB = *SI;
5128  }
5129  return FallthroughBB;
5130 }
5131 
5132 bool X86InstrInfo::AnalyzeBranchImpl(
5135  SmallVectorImpl<MachineInstr *> &CondBranches, bool AllowModify) const {
5136 
5137  // Start from the bottom of the block and work up, examining the
5138  // terminator instructions.
5140  MachineBasicBlock::iterator UnCondBrIter = MBB.end();
5141  while (I != MBB.begin()) {
5142  --I;
5143  if (I->isDebugValue())
5144  continue;
5145 
5146  // Working from the bottom, when we see a non-terminator instruction, we're
5147  // done.
5148  if (!isUnpredicatedTerminator(*I))
5149  break;
5150 
5151  // A terminator that isn't a branch can't easily be handled by this
5152  // analysis.
5153  if (!I->isBranch())
5154  return true;
5155 
5156  // Handle unconditional branches.
5157  if (I->getOpcode() == X86::JMP_1) {
5158  UnCondBrIter = I;
5159 
5160  if (!AllowModify) {
5161  TBB = I->getOperand(0).getMBB();
5162  continue;
5163  }
5164 
5165  // If the block has any instructions after a JMP, delete them.
5166  while (std::next(I) != MBB.end())
5167  std::next(I)->eraseFromParent();
5168 
5169  Cond.clear();
5170  FBB = nullptr;
5171 
5172  // Delete the JMP if it's equivalent to a fall-through.
5173  if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
5174  TBB = nullptr;
5175  I->eraseFromParent();
5176  I = MBB.end();
5177  UnCondBrIter = MBB.end();
5178  continue;
5179  }
5180 
5181  // TBB is used to indicate the unconditional destination.
5182  TBB = I->getOperand(0).getMBB();
5183  continue;
5184  }
5185 
5186  // Handle conditional branches.
5187  X86::CondCode BranchCode = getCondFromBranchOpc(I->getOpcode());
5188  if (BranchCode == X86::COND_INVALID)
5189  return true; // Can't handle indirect branch.
5190 
5191  // Working from the bottom, handle the first conditional branch.
5192  if (Cond.empty()) {
5193  MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
5194  if (AllowModify && UnCondBrIter != MBB.end() &&
5195  MBB.isLayoutSuccessor(TargetBB)) {
5196  // If we can modify the code and it ends in something like:
5197  //
5198  // jCC L1
5199  // jmp L2
5200  // L1:
5201  // ...
5202  // L2:
5203  //
5204  // Then we can change this to:
5205  //
5206  // jnCC L2
5207  // L1:
5208  // ...
5209  // L2:
5210  //
5211  // Which is a bit more efficient.
5212  // We conditionally jump to the fall-through block.
5213  BranchCode = GetOppositeBranchCondition(BranchCode);
5214  unsigned JNCC = GetCondBranchFromCond(BranchCode);
5215  MachineBasicBlock::iterator OldInst = I;
5216 
5217  BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
5218  .addMBB(UnCondBrIter->getOperand(0).getMBB());
5219  BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_1))
5220  .addMBB(TargetBB);
5221 
5222  OldInst->eraseFromParent();
5223  UnCondBrIter->eraseFromParent();
5224 
5225  // Restart the analysis.
5226  UnCondBrIter = MBB.end();
5227  I = MBB.end();
5228  continue;
5229  }
5230 
5231  FBB = TBB;
5232  TBB = I->getOperand(0).getMBB();
5233  Cond.push_back(MachineOperand::CreateImm(BranchCode));
5234  CondBranches.push_back(&*I);
5235  continue;
5236  }
5237 
5238  // Handle subsequent conditional branches. Only handle the case where all
5239  // conditional branches branch to the same destination and their condition
5240  // opcodes fit one of the special multi-branch idioms.
5241  assert(Cond.size() == 1);
5242  assert(TBB);
5243 
5244  // If the conditions are the same, we can leave them alone.
5245  X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
5246  auto NewTBB = I->getOperand(0).getMBB();
5247  if (OldBranchCode == BranchCode && TBB == NewTBB)
5248  continue;
5249 
5250  // If they differ, see if they fit one of the known patterns. Theoretically,
5251  // we could handle more patterns here, but we shouldn't expect to see them
5252  // if instruction selection has done a reasonable job.
5253  if (TBB == NewTBB &&
5254  ((OldBranchCode == X86::COND_P && BranchCode == X86::COND_NE) ||
5255  (OldBranchCode == X86::COND_NE && BranchCode == X86::COND_P))) {
5256  BranchCode = X86::COND_NE_OR_P;
5257  } else if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_NE) ||
5258  (OldBranchCode == X86::COND_E && BranchCode == X86::COND_P)) {
5259  if (NewTBB != (FBB ? FBB : getFallThroughMBB(&MBB, TBB)))
5260  return true;
5261 
5262  // X86::COND_E_AND_NP usually has two different branch destinations.
5263  //
5264  // JP B1
5265  // JE B2
5266  // JMP B1
5267  // B1:
5268  // B2:
5269  //
5270  // Here this condition branches to B2 only if NP && E. It has another
5271  // equivalent form:
5272  //
5273  // JNE B1
5274  // JNP B2
5275  // JMP B1
5276  // B1:
5277  // B2:
5278  //
5279  // Similarly it branches to B2 only if E && NP. That is why this condition
5280  // is named with COND_E_AND_NP.
5281  BranchCode = X86::COND_E_AND_NP;
5282  } else
5283  return true;
5284 
5285  // Update the MachineOperand.
5286  Cond[0].setImm(BranchCode);
5287  CondBranches.push_back(&*I);
5288  }
5289 
5290  return false;
5291 }
5292 
5294  MachineBasicBlock *&TBB,
5295  MachineBasicBlock *&FBB,
5297  bool AllowModify) const {
5298  SmallVector<MachineInstr *, 4> CondBranches;
5299  return AnalyzeBranchImpl(MBB, TBB, FBB, Cond, CondBranches, AllowModify);
5300 }
5301 
5303  MachineBranchPredicate &MBP,
5304  bool AllowModify) const {
5305  using namespace std::placeholders;
5306 
5308  SmallVector<MachineInstr *, 4> CondBranches;
5309  if (AnalyzeBranchImpl(MBB, MBP.TrueDest, MBP.FalseDest, Cond, CondBranches,
5310  AllowModify))
5311  return true;
5312 
5313  if (Cond.size() != 1)
5314  return true;
5315 
5316  assert(MBP.TrueDest && "expected!");
5317 
5318  if (!MBP.FalseDest)
5319  MBP.FalseDest = MBB.getNextNode();
5320 
5321  const TargetRegisterInfo *TRI = &getRegisterInfo();
5322 
5323  MachineInstr *ConditionDef = nullptr;
5324  bool SingleUseCondition = true;
5325 
5326  for (auto I = std::next(MBB.rbegin()), E = MBB.rend(); I != E; ++I) {
5327  if (I->modifiesRegister(X86::EFLAGS, TRI)) {
5328  ConditionDef = &*I;
5329  break;
5330  }
5331 
5332  if (I->readsRegister(X86::EFLAGS, TRI))
5333  SingleUseCondition = false;
5334  }
5335 
5336  if (!ConditionDef)
5337  return true;
5338 
5339  if (SingleUseCondition) {
5340  for (auto *Succ : MBB.successors())
5341  if (Succ->isLiveIn(X86::EFLAGS))
5342  SingleUseCondition = false;
5343  }
5344 
5345  MBP.ConditionDef = ConditionDef;
5346  MBP.SingleUseCondition = SingleUseCondition;
5347 
5348  // Currently we only recognize the simple pattern:
5349  //
5350  // test %reg, %reg
5351  // je %label
5352  //
5353  const unsigned TestOpcode =
5354  Subtarget.is64Bit() ? X86::TEST64rr : X86::TEST32rr;
5355 
5356  if (ConditionDef->getOpcode() == TestOpcode &&
5357  ConditionDef->getNumOperands() == 3 &&
5358  ConditionDef->getOperand(0).isIdenticalTo(ConditionDef->getOperand(1)) &&
5359  (Cond[0].getImm() == X86::COND_NE || Cond[0].getImm() == X86::COND_E)) {
5360  MBP.LHS = ConditionDef->getOperand(0);
5361  MBP.RHS = MachineOperand::CreateImm(0);
5362  MBP.Predicate = Cond[0].getImm() == X86::COND_NE
5365  return false;
5366  }
5367 
5368  return true;
5369 }
5370 
5372  int *BytesRemoved) const {
5373  assert(!BytesRemoved && "code size not handled");
5374 
5375  MachineBasicBlock::iterator I = MBB.end();
5376  unsigned Count = 0;
5377 
5378  while (I != MBB.begin()) {
5379  --I;
5380  if (I->isDebugValue())
5381  continue;
5382  if (I->getOpcode() != X86::JMP_1 &&
5383  getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
5384  break;
5385  // Remove the branch.
5386  I->eraseFromParent();
5387  I = MBB.end();
5388  ++Count;
5389  }
5390 
5391  return Count;
5392 }
5393 
5395  MachineBasicBlock *TBB,
5396  MachineBasicBlock *FBB,
5398  const DebugLoc &DL,
5399  int *BytesAdded) const {
5400  // Shouldn't be a fall through.
5401  assert(TBB && "insertBranch must not be told to insert a fallthrough");
5402  assert((Cond.size() == 1 || Cond.size() == 0) &&
5403  "X86 branch conditions have one component!");
5404  assert(!BytesAdded && "code size not handled");
5405 
5406  if (Cond.empty()) {
5407  // Unconditional branch?
5408  assert(!FBB && "Unconditional branch with multiple successors!");
5409  BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(TBB);
5410  return 1;
5411  }
5412 
5413  // If FBB is null, it is implied to be a fall-through block.
5414  bool FallThru = FBB == nullptr;
5415 
5416  // Conditional branch.
5417  unsigned Count = 0;
5418  X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
5419  switch (CC) {
5420  case X86::COND_NE_OR_P:
5421  // Synthesize NE_OR_P with two branches.
5422  BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(TBB);
5423  ++Count;
5424  BuildMI(&MBB, DL, get(X86::JP_1)).addMBB(TBB);
5425  ++Count;
5426  break;
5427  case X86::COND_E_AND_NP:
5428  // Use the next block of MBB as FBB if it is null.
5429  if (FBB == nullptr) {
5430  FBB = getFallThroughMBB(&MBB, TBB);
5431  assert(FBB && "MBB cannot be the last block in function when the false "
5432  "body is a fall-through.");
5433  }
5434  // Synthesize COND_E_AND_NP with two branches.
5435  BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(FBB);
5436  ++Count;
5437  BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(TBB);
5438  ++Count;
5439  break;
5440  default: {
5441  unsigned Opc = GetCondBranchFromCond(CC);
5442  BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
5443  ++Count;
5444  }
5445  }
5446  if (!FallThru) {
5447  // Two-way Conditional branch. Insert the second branch.
5448  BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB);
5449  ++Count;
5450  }
5451  return Count;
5452 }
5453 
5454 bool X86InstrInfo::
5457  unsigned TrueReg, unsigned FalseReg,
5458  int &CondCycles, int &TrueCycles, int &FalseCycles) const {
5459  // Not all subtargets have cmov instructions.
5460  if (!Subtarget.hasCMov())
5461  return false;
5462  if (Cond.size() != 1)
5463  return false;
5464  // We cannot do the composite conditions, at least not in SSA form.
5465  if ((X86::CondCode)Cond[0].getImm() > X86::COND_S)
5466  return false;
5467 
5468  // Check register classes.
5469  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
5470  const TargetRegisterClass *RC =
5471  RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
5472  if (!RC)
5473  return false;
5474 
5475  // We have cmov instructions for 16, 32, and 64 bit general purpose registers.
5476  if (X86::GR16RegClass.hasSubClassEq(RC) ||
5477  X86::GR32RegClass.hasSubClassEq(RC) ||
5478  X86::GR64RegClass.hasSubClassEq(RC)) {
5479  // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy
5480  // Bridge. Probably Ivy Bridge as well.
5481  CondCycles = 2;
5482  TrueCycles = 2;
5483  FalseCycles = 2;
5484  return true;
5485  }
5486 
5487  // Can't do vectors.
5488  return false;
5489 }
5490 
5493  const DebugLoc &DL, unsigned DstReg,
5494  ArrayRef<MachineOperand> Cond, unsigned TrueReg,
5495  unsigned FalseReg) const {
5497  assert(Cond.size() == 1 && "Invalid Cond array");
5498  unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(),
5499  MRI.getRegClass(DstReg)->getSize(),
5500  false /*HasMemoryOperand*/);
5501  BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg);
5502 }
5503 
5504 /// Test if the given register is a physical h register.
5505 static bool isHReg(unsigned Reg) {
5506  return X86::GR8_ABCD_HRegClass.contains(Reg);
5507 }
5508 
5509 // Try and copy between VR128/VR64 and GR64 registers.
5510 static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
5511  const X86Subtarget &Subtarget) {
5512  bool HasAVX = Subtarget.hasAVX();
5513  bool HasAVX512 = Subtarget.hasAVX512();
5514 
5515  // SrcReg(MaskReg) -> DestReg(GR64)
5516  // SrcReg(MaskReg) -> DestReg(GR32)
5517  // SrcReg(MaskReg) -> DestReg(GR16)
5518  // SrcReg(MaskReg) -> DestReg(GR8)
5519 
5520  // All KMASK RegClasses hold the same k registers, can be tested against anyone.
5521  if (X86::VK16RegClass.contains(SrcReg)) {
5522  if (X86::GR64RegClass.contains(DestReg)) {
5523  assert(Subtarget.hasBWI());
5524  return X86::KMOVQrk;
5525  }
5526  if (X86::GR32RegClass.contains(DestReg))
5527  return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk;
5528  if (X86::GR16RegClass.contains(DestReg)) {
5529  DestReg = getX86SubSuperRegister(DestReg, 32);
5530  return X86::KMOVWrk;
5531  }
5532  if (X86::GR8RegClass.contains(DestReg)) {
5533  DestReg = getX86SubSuperRegister(DestReg, 32);
5534  return Subtarget.hasDQI() ? X86::KMOVBrk : X86::KMOVWrk;
5535  }
5536  }
5537 
5538  // SrcReg(GR64) -> DestReg(MaskReg)
5539  // SrcReg(GR32) -> DestReg(MaskReg)
5540  // SrcReg(GR16) -> DestReg(MaskReg)
5541  // SrcReg(GR8) -> DestReg(MaskReg)
5542 
5543  // All KMASK RegClasses hold the same k registers, can be tested against anyone.
5544  if (X86::VK16RegClass.contains(DestReg)) {
5545  if (X86::GR64RegClass.contains(SrcReg)) {
5546  assert(Subtarget.hasBWI());
5547  return X86::KMOVQkr;
5548  }
5549  if (X86::GR32RegClass.contains(SrcReg))
5550  return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr;
5551  if (X86::GR16RegClass.contains(SrcReg)) {
5552  SrcReg = getX86SubSuperRegister(SrcReg, 32);
5553  return X86::KMOVWkr;
5554  }
5555  if (X86::GR8RegClass.contains(SrcReg)) {
5556  SrcReg = getX86SubSuperRegister(SrcReg, 32);
5557  return Subtarget.hasDQI() ? X86::KMOVBkr : X86::KMOVWkr;
5558  }
5559  }
5560 
5561 
5562  // SrcReg(VR128) -> DestReg(GR64)
5563  // SrcReg(VR64) -> DestReg(GR64)
5564  // SrcReg(GR64) -> DestReg(VR128)
5565  // SrcReg(GR64) -> DestReg(VR64)
5566 
5567  if (X86::GR64RegClass.contains(DestReg)) {
5568  if (X86::VR128XRegClass.contains(SrcReg))
5569  // Copy from a VR128 register to a GR64 register.
5570  return HasAVX512 ? X86::VMOVPQIto64Zrr :
5571  HasAVX ? X86::VMOVPQIto64rr :
5572  X86::MOVPQIto64rr;
5573  if (X86::VR64RegClass.contains(SrcReg))
5574  // Copy from a VR64 register to a GR64 register.
5575  return X86::MMX_MOVD64from64rr;
5576  } else if (X86::GR64RegClass.contains(SrcReg)) {
5577  // Copy from a GR64 register to a VR128 register.
5578  if (X86::VR128XRegClass.contains(DestReg))
5579  return HasAVX512 ? X86::VMOV64toPQIZrr :
5580  HasAVX ? X86::VMOV64toPQIrr :
5581  X86::MOV64toPQIrr;
5582  // Copy from a GR64 register to a VR64 register.
5583  if (X86::VR64RegClass.contains(DestReg))
5584  return X86::MMX_MOVD64to64rr;
5585  }
5586 
5587  // SrcReg(FR32) -> DestReg(GR32)
5588  // SrcReg(GR32) -> DestReg(FR32)
5589 
5590  if (X86::GR32RegClass.contains(DestReg) &&
5591  X86::FR32XRegClass.contains(SrcReg))
5592  // Copy from a FR32 register to a GR32 register.
5593  return HasAVX512 ? X86::VMOVSS2DIZrr :
5594  HasAVX ? X86::VMOVSS2DIrr :
5595  X86::MOVSS2DIrr;
5596 
5597  if (X86::FR32XRegClass.contains(DestReg) &&
5598  X86::GR32RegClass.contains(SrcReg))
5599  // Copy from a GR32 register to a FR32 register.
5600  return HasAVX512 ? X86::VMOVDI2SSZrr :
5601  HasAVX ? X86::VMOVDI2SSrr :
5602  X86::MOVDI2SSrr;
5603  return 0;
5604 }
5605 
5608  const DebugLoc &DL, unsigned DestReg,
5609  unsigned SrcReg, bool KillSrc) const {
5610  // First deal with the normal symmetric copies.
5611  bool HasAVX = Subtarget.hasAVX();
5612  bool HasVLX = Subtarget.hasVLX();
5613  unsigned Opc = 0;
5614  if (X86::GR64RegClass.contains(DestReg, SrcReg))
5615  Opc = X86::MOV64rr;
5616  else if (X86::GR32RegClass.contains(DestReg, SrcReg))
5617  Opc = X86::MOV32rr;
5618  else if (X86::GR16RegClass.contains(DestReg, SrcReg))
5619  Opc = X86::MOV16rr;
5620  else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
5621  // Copying to or from a physical H register on x86-64 requires a NOREX
5622  // move. Otherwise use a normal move.
5623  if ((isHReg(DestReg) || isHReg(SrcReg)) &&
5624  Subtarget.is64Bit()) {
5625  Opc = X86::MOV8rr_NOREX;
5626  // Both operands must be encodable without an REX prefix.
5627  assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) &&
5628  "8-bit H register can not be copied outside GR8_NOREX");
5629  } else
5630  Opc = X86::MOV8rr;
5631  }
5632  else if (X86::VR64RegClass.contains(DestReg, SrcReg))
5633  Opc = X86::MMX_MOVQ64rr;
5634  else if (X86::VR128XRegClass.contains(DestReg, SrcReg)) {
5635  if (HasVLX)
5636  Opc = X86::VMOVAPSZ128rr;
5637  else if (X86::VR128RegClass.contains(DestReg, SrcReg))
5638  Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
5639  else {
5640  // If this an extended register and we don't have VLX we need to use a
5641  // 512-bit move.
5642  Opc = X86::VMOVAPSZrr;
5643  const TargetRegisterInfo *TRI = &getRegisterInfo();
5644  DestReg = TRI->getMatchingSuperReg(DestReg, X86::sub_xmm,
5645  &X86::VR512RegClass);
5646  SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm,
5647  &X86::VR512RegClass);
5648  }
5649  } else if (X86::VR256XRegClass.contains(DestReg, SrcReg)) {
5650  if (HasVLX)
5651  Opc = X86::VMOVAPSZ256rr;
5652  else if (X86::VR256RegClass.contains(DestReg, SrcReg))
5653  Opc = X86::VMOVAPSYrr;
5654  else {
5655  // If this an extended register and we don't have VLX we need to use a
5656  // 512-bit move.
5657  Opc = X86::VMOVAPSZrr;
5658  const TargetRegisterInfo *TRI = &getRegisterInfo();
5659  DestReg = TRI->getMatchingSuperReg(DestReg, X86::sub_ymm,
5660  &X86::VR512RegClass);
5661  SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm,
5662  &X86::VR512RegClass);
5663  }
5664  } else if (X86::VR512RegClass.contains(DestReg, SrcReg))
5665  Opc = X86::VMOVAPSZrr;
5666  // All KMASK RegClasses hold the same k registers, can be tested against anyone.
5667  else if (X86::VK16RegClass.contains(DestReg, SrcReg))
5668  Opc = Subtarget.hasBWI() ? X86::KMOVQkk : X86::KMOVWkk;
5669  if (!Opc)
5670  Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
5671 
5672  if (Opc) {
5673  BuildMI(MBB, MI, DL, get(Opc), DestReg)
5674  .addReg(SrcReg, getKillRegState(KillSrc));
5675  return;
5676  }
5677 
5678  bool FromEFLAGS = SrcReg == X86::EFLAGS;
5679  bool ToEFLAGS = DestReg == X86::EFLAGS;
5680  int Reg = FromEFLAGS ? DestReg : SrcReg;
5681  bool is32 = X86::GR32RegClass.contains(Reg);
5682  bool is64 = X86::GR64RegClass.contains(Reg);
5683 
5684  if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) {
5685  int Mov = is64 ? X86::MOV64rr : X86::MOV32rr;
5686  int Push = is64 ? X86::PUSH64r : X86::PUSH32r;
5687  int PushF = is64 ? X86::PUSHF64 : X86::PUSHF32;
5688  int Pop = is64 ? X86::POP64r : X86::POP32r;
5689  int PopF = is64 ? X86::POPF64 : X86::POPF32;
5690  int AX = is64 ? X86::RAX : X86::EAX;
5691 
5692  if (!Subtarget.hasLAHFSAHF()) {
5693  assert(Subtarget.is64Bit() &&
5694  "Not having LAHF/SAHF only happens on 64-bit.");
5695  // Moving EFLAGS to / from another register requires a push and a pop.
5696  // Notice that we have to adjust the stack if we don't want to clobber the
5697  // first frame index. See X86FrameLowering.cpp - usesTheStack.
5698  if (FromEFLAGS) {
5699  BuildMI(MBB, MI, DL, get(PushF));
5700  BuildMI(MBB, MI, DL, get(Pop), DestReg);
5701  }
5702  if (ToEFLAGS) {
5703  BuildMI(MBB, MI, DL, get(Push))
5704  .addReg(SrcReg, getKillRegState(KillSrc));
5705  BuildMI(MBB, MI, DL, get(PopF));
5706  }
5707  return;
5708  }
5709 
5710  // The flags need to be saved, but saving EFLAGS with PUSHF/POPF is
5711  // inefficient. Instead:
5712  // - Save the overflow flag OF into AL using SETO, and restore it using a
5713  // signed 8-bit addition of AL and INT8_MAX.
5714  // - Save/restore the bottom 8 EFLAGS bits (CF, PF, AF, ZF, SF) to/from AH
5715  // using LAHF/SAHF.
5716  // - When RAX/EAX is live and isn't the destination register, make sure it
5717  // isn't clobbered by PUSH/POP'ing it before and after saving/restoring
5718  // the flags.
5719  // This approach is ~2.25x faster than using PUSHF/POPF.
5720  //
5721  // This is still somewhat inefficient because we don't know which flags are
5722  // actually live inside EFLAGS. Were we able to do a single SETcc instead of
5723  // SETO+LAHF / ADDB+SAHF the code could be 1.02x faster.
5724  //
5725  // PUSHF/POPF is also potentially incorrect because it affects other flags
5726  // such as TF/IF/DF, which LLVM doesn't model.
5727  //
5728  // Notice that we have to adjust the stack if we don't want to clobber the
5729  // first frame index.
5730  // See X86ISelLowering.cpp - X86::hasCopyImplyingStackAdjustment.
5731 
5732  const TargetRegisterInfo *TRI = &getRegisterInfo();
5734  MBB.computeRegisterLiveness(TRI, AX, MI);
5735  // We do not want to save and restore AX if we do not have to.
5736  // Moreover, if we do so whereas AX is dead, we would need to set
5737  // an undef flag on the use of AX, otherwise the verifier will
5738  // complain that we read an undef value.
5739  // We do not want to change the behavior of the machine verifier
5740  // as this is usually wrong to read an undef value.
5741  if (MachineBasicBlock::LQR_Unknown == LQR) {
5742  LivePhysRegs LPR(TRI);
5743  LPR.addLiveOuts(MBB);
5744  MachineBasicBlock::iterator I = MBB.end();
5745  while (I != MI) {
5746  --I;
5747  LPR.stepBackward(*I);
5748  }
5749  // AX contains the top most register in the aliasing hierarchy.
5750  // It may not be live, but one of its aliases may be.
5751  for (MCRegAliasIterator AI(AX, TRI, true);
5752  AI.isValid() && LQR != MachineBasicBlock::LQR_Live; ++AI)
5753  LQR = LPR.contains(*AI) ? MachineBasicBlock::LQR_Live
5755  }
5756  bool AXDead = (Reg == AX) || (MachineBasicBlock::LQR_Dead == LQR);
5757  if (!AXDead)
5758  BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true));
5759  if (FromEFLAGS) {
5760  BuildMI(MBB, MI, DL, get(X86::SETOr), X86::AL);
5761  BuildMI(MBB, MI, DL, get(X86::LAHF));
5762  BuildMI(MBB, MI, DL, get(Mov), Reg).addReg(AX);
5763  }
5764  if (ToEFLAGS) {
5765  BuildMI(MBB, MI, DL, get(Mov), AX).addReg(Reg, getKillRegState(KillSrc));
5766  BuildMI(MBB, MI, DL, get(X86::ADD8ri), X86::AL)
5767  .addReg(X86::AL)
5768  .addImm(INT8_MAX);
5769  BuildMI(MBB, MI, DL, get(X86::SAHF));
5770  }
5771  if (!AXDead)
5772  BuildMI(MBB, MI, DL, get(Pop), AX);
5773  return;
5774  }
5775 
5776  DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
5777  << " to " << RI.getName(DestReg) << '\n');
5778  llvm_unreachable("Cannot emit physreg copy instruction");
5779 }
5780 
5781 static unsigned getLoadStoreRegOpcode(unsigned Reg,
5782  const TargetRegisterClass *RC,
5783  bool isStackAligned,
5784  const X86Subtarget &STI,
5785  bool load) {
5786  bool HasAVX = STI.hasAVX();
5787  bool HasAVX512 = STI.hasAVX512();
5788  bool HasVLX = STI.hasVLX();
5789 
5790  switch (RC->getSize()) {
5791  default:
5792  llvm_unreachable("Unknown spill size");
5793  case 1:
5794  assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass");
5795  if (STI.is64Bit())
5796  // Copying to or from a physical H register on x86-64 requires a NOREX
5797  // move. Otherwise use a normal move.
5798  if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
5799  return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
5800  return load ? X86::MOV8rm : X86::MOV8mr;
5801  case 2:
5802  if (X86::VK16RegClass.hasSubClassEq(RC))
5803  return load ? X86::KMOVWkm : X86::KMOVWmk;
5804  assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
5805  return load ? X86::MOV16rm : X86::MOV16mr;
5806  case 4:
5807  if (X86::GR32RegClass.hasSubClassEq(RC))
5808  return load ? X86::MOV32rm : X86::MOV32mr;
5809  if (X86::FR32XRegClass.hasSubClassEq(RC))
5810  return load ?
5811  (HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) :
5812  (HasAVX512 ? X86::VMOVSSZmr : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
5813  if (X86::RFP32RegClass.hasSubClassEq(RC))
5814  return load ? X86::LD_Fp32m : X86::ST_Fp32m;
5815  if (X86::VK32RegClass.hasSubClassEq(RC))
5816  return load ? X86::KMOVDkm : X86::KMOVDmk;
5817  llvm_unreachable("Unknown 4-byte regclass");
5818  case 8:
5819  if (X86::GR64RegClass.hasSubClassEq(RC))
5820  return load ? X86::MOV64rm : X86::MOV64mr;
5821  if (X86::FR64XRegClass.hasSubClassEq(RC))
5822  return load ?
5823  (HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) :
5824  (HasAVX512 ? X86::VMOVSDZmr : HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
5825  if (X86::VR64RegClass.hasSubClassEq(RC))
5826  return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
5827  if (X86::RFP64RegClass.hasSubClassEq(RC))
5828  return load ? X86::LD_Fp64m : X86::ST_Fp64m;
5829  if (X86::VK64RegClass.hasSubClassEq(RC))
5830  return load ? X86::KMOVQkm : X86::KMOVQmk;
5831  llvm_unreachable("Unknown 8-byte regclass");
5832  case 10:
5833  assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
5834  return load ? X86::LD_Fp80m : X86::ST_FpP80m;
5835  case 16: {
5836  assert(X86::VR128XRegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass");
5837  // If stack is realigned we can use aligned stores.
5838  if (isStackAligned)
5839  return load ?
5840  (HasVLX ? X86::VMOVAPSZ128rm :
5841  HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX :
5842  HasAVX ? X86::VMOVAPSrm :
5843  X86::MOVAPSrm):
5844  (HasVLX ? X86::VMOVAPSZ128mr :
5845  HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX :
5846  HasAVX ? X86::VMOVAPSmr :
5847  X86::MOVAPSmr);
5848  else
5849  return load ?
5850  (HasVLX ? X86::VMOVUPSZ128rm :
5851  HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX :
5852  HasAVX ? X86::VMOVUPSrm :
5853  X86::MOVUPSrm):
5854  (HasVLX ? X86::VMOVUPSZ128mr :
5855  HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX :
5856  HasAVX ? X86::VMOVUPSmr :
5857  X86::MOVUPSmr);
5858  }
5859  case 32:
5860  assert(X86::VR256XRegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
5861  // If stack is realigned we can use aligned stores.
5862  if (isStackAligned)
5863  return load ?
5864  (HasVLX ? X86::VMOVAPSZ256rm :
5865  HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX :
5866  X86::VMOVAPSYrm) :
5867  (HasVLX ? X86::VMOVAPSZ256mr :
5868  HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX :
5869  X86::VMOVAPSYmr);
5870  else
5871  return load ?
5872  (HasVLX ? X86::VMOVUPSZ256rm :
5873  HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX :
5874  X86::VMOVUPSYrm) :
5875  (HasVLX ? X86::VMOVUPSZ256mr :
5876  HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX :
5877  X86::VMOVUPSYmr);
5878  case 64:
5879  assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass");
5880  assert(STI.hasAVX512() && "Using 512-bit register requires AVX512");
5881  if (isStackAligned)
5882  return load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
5883  else
5884  return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
5885  }
5886 }
5887 
5888 bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg,
5889  int64_t &Offset,
5890  const TargetRegisterInfo *TRI) const {
5891  const MCInstrDesc &Desc = MemOp.getDesc();
5892  int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
5893  if (MemRefBegin < 0)
5894  return false;
5895 
5896  MemRefBegin += X86II::getOperandBias(Desc);
5897 
5898  MachineOperand &BaseMO = MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
5899  if (!BaseMO.isReg()) // Can be an MO_FrameIndex
5900  return false;
5901 
5902  BaseReg = BaseMO.getReg();
5903  if (MemOp.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1)
5904  return false;
5905 
5906  if (MemOp.getOperand(MemRefBegin + X86::AddrIndexReg).getReg() !=
5907  X86::NoRegister)
5908  return false;
5909 
5910  const MachineOperand &DispMO = MemOp.getOperand(MemRefBegin + X86::AddrDisp);
5911 
5912  // Displacement can be symbolic
5913  if (!DispMO.isImm())
5914  return false;
5915 
5916  Offset = DispMO.getImm();
5917 
5918  return true;
5919 }
5920 
5921 static unsigned getStoreRegOpcode(unsigned SrcReg,
5922  const TargetRegisterClass *RC,
5923  bool isStackAligned,
5924  const X86Subtarget &STI) {
5925  return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, STI, false);
5926 }
5927 
5928 
5929 static unsigned getLoadRegOpcode(unsigned DestReg,
5930  const TargetRegisterClass *RC,
5931  bool isStackAligned,
5932  const X86Subtarget &STI) {
5933  return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, STI, true);
5934 }
5935 
5938  unsigned SrcReg, bool isKill, int FrameIdx,
5939  const TargetRegisterClass *RC,
5940  const TargetRegisterInfo *TRI) const {
5941  const MachineFunction &MF = *MBB.getParent();
5942  assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= RC->getSize() &&
5943  "Stack slot too small for store");
5944  unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
5945  bool isAligned =
5946  (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
5947  RI.canRealignStack(MF);
5948  unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
5949  DebugLoc DL = MBB.findDebugLoc(MI);
5950  addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
5951  .addReg(SrcReg, getKillRegState(isKill));
5952 }
5953 
5955  bool isKill,
5957  const TargetRegisterClass *RC,
5958  MachineInstr::mmo_iterator MMOBegin,
5960  SmallVectorImpl<MachineInstr*> &NewMIs) const {
5961  unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
5962  bool isAligned = MMOBegin != MMOEnd &&
5963  (*MMOBegin)->getAlignment() >= Alignment;
5964  unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
5965  DebugLoc DL;
5966  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
5967  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
5968  MIB.addOperand(Addr[i]);
5969  MIB.addReg(SrcReg, getKillRegState(isKill));
5970  (*MIB).setMemRefs(MMOBegin, MMOEnd);
5971  NewMIs.push_back(MIB);
5972 }
5973 
5974 
5977  unsigned DestReg, int FrameIdx,
5978  const TargetRegisterClass *RC,
5979  const TargetRegisterInfo *TRI) const {
5980  const MachineFunction &MF = *MBB.getParent();
5981  unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
5982  bool isAligned =
5983  (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
5984  RI.canRealignStack(MF);
5985  unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
5986  DebugLoc DL = MBB.findDebugLoc(MI);
5987  addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
5988 }
5989 
5992  const TargetRegisterClass *RC,
5993  MachineInstr::mmo_iterator MMOBegin,
5995  SmallVectorImpl<MachineInstr*> &NewMIs) const {
5996  unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
5997  bool isAligned = MMOBegin != MMOEnd &&
5998  (*MMOBegin)->getAlignment() >= Alignment;
5999  unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
6000  DebugLoc DL;
6001  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
6002  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
6003  MIB.addOperand(Addr[i]);
6004  (*MIB).setMemRefs(MMOBegin, MMOEnd);
6005  NewMIs.push_back(MIB);
6006 }
6007 
6008 bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
6009  unsigned &SrcReg2, int &CmpMask,
6010  int &CmpValue) const {
6011  switch (MI.getOpcode()) {
6012  default: break;
6013  case X86::CMP64ri32:
6014  case X86::CMP64ri8:
6015  case X86::CMP32ri:
6016  case X86::CMP32ri8:
6017  case X86::CMP16ri:
6018  case X86::CMP16ri8:
6019  case X86::CMP8ri:
6020  if (!MI.getOperand(1).isImm())
6021  return false;
6022  SrcReg = MI.getOperand(0).getReg();
6023  SrcReg2 = 0;
6024  CmpMask = ~0;
6025  CmpValue = MI.getOperand(1).getImm();
6026  return true;
6027  // A SUB can be used to perform comparison.
6028  case X86::SUB64rm:
6029  case X86::SUB32rm:
6030  case X86::SUB16rm:
6031  case X86::SUB8rm:
6032  SrcReg = MI.getOperand(1).getReg();
6033  SrcReg2 = 0;
6034  CmpMask = ~0;
6035  CmpValue = 0;
6036  return true;
6037  case X86::SUB64rr:
6038  case X86::SUB32rr:
6039  case X86::SUB16rr:
6040  case X86::SUB8rr:
6041  SrcReg = MI.getOperand(1).getReg();
6042  SrcReg2 = MI.getOperand(2).getReg();
6043  CmpMask = ~0;
6044  CmpValue = 0;
6045  return true;
6046  case X86::SUB64ri32:
6047  case X86::SUB64ri8:
6048  case X86::SUB32ri:
6049  case X86::SUB32ri8:
6050  case X86::SUB16ri:
6051  case X86::SUB16ri8:
6052  case X86::SUB8ri:
6053  if (!MI.getOperand(2).isImm())
6054  return false;
6055  SrcReg = MI.getOperand(1).getReg();
6056  SrcReg2 = 0;
6057  CmpMask = ~0;
6058  CmpValue = MI.getOperand(2).getImm();
6059  return true;
6060  case X86::CMP64rr:
6061  case X86::CMP32rr:
6062  case X86::CMP16rr:
6063  case X86::CMP8rr:
6064  SrcReg = MI.getOperand(0).getReg();
6065  SrcReg2 = MI.getOperand(1).getReg();
6066  CmpMask = ~0;
6067  CmpValue = 0;
6068  return true;
6069  case X86::TEST8rr:
6070  case X86::TEST16rr:
6071  case X86::TEST32rr:
6072  case X86::TEST64rr:
6073  SrcReg = MI.getOperand(0).getReg();
6074  if (MI.getOperand(1).getReg() != SrcReg)
6075  return false;
6076  // Compare against zero.
6077  SrcReg2 = 0;
6078  CmpMask = ~0;
6079  CmpValue = 0;
6080  return true;
6081  }
6082  return false;
6083 }
6084 
6085 /// Check whether the first instruction, whose only
6086 /// purpose is to update flags, can be made redundant.
6087 /// CMPrr can be made redundant by SUBrr if the operands are the same.
6088 /// This function can be extended later on.
6089 /// SrcReg, SrcRegs: register operands for FlagI.
6090 /// ImmValue: immediate for FlagI if it takes an immediate.
6091 inline static bool isRedundantFlagInstr(MachineInstr &FlagI, unsigned SrcReg,
6092  unsigned SrcReg2, int ImmValue,
6093  MachineInstr &OI) {
6094  if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) ||
6095  (FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) ||
6096  (FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) ||
6097  (FlagI.getOpcode() == X86::CMP8rr && OI.getOpcode() == X86::SUB8rr)) &&
6098  ((OI.getOperand(1).getReg() == SrcReg &&
6099  OI.getOperand(2).getReg() == SrcReg2) ||
6100  (OI.getOperand(1).getReg() == SrcReg2 &&
6101  OI.getOperand(2).getReg() == SrcReg)))
6102  return true;
6103 
6104  if (((FlagI.getOpcode() == X86::CMP64ri32 &&
6105  OI.getOpcode() == X86::SUB64ri32) ||
6106  (FlagI.getOpcode() == X86::CMP64ri8 &&
6107  OI.getOpcode() == X86::SUB64ri8) ||
6108  (FlagI.getOpcode() == X86::CMP32ri && OI.getOpcode() == X86::SUB32ri) ||
6109  (FlagI.getOpcode() == X86::CMP32ri8 &&
6110  OI.getOpcode() == X86::SUB32ri8) ||
6111  (FlagI.getOpcode() == X86::CMP16ri && OI.getOpcode() == X86::SUB16ri) ||
6112  (FlagI.getOpcode() == X86::CMP16ri8 &&
6113  OI.getOpcode() == X86::SUB16ri8) ||
6114  (FlagI.getOpcode() == X86::CMP8ri && OI.getOpcode() == X86::SUB8ri)) &&
6115  OI.getOperand(1).getReg() == SrcReg &&
6116  OI.getOperand(2).getImm() == ImmValue)
6117  return true;
6118  return false;
6119 }
6120 
6121 /// Check whether the definition can be converted
6122 /// to remove a comparison against zero.
6123 inline static bool isDefConvertible(MachineInstr &MI) {
6124  switch (MI.getOpcode()) {
6125  default: return false;
6126 
6127  // The shift instructions only modify ZF if their shift count is non-zero.
6128  // N.B.: The processor truncates the shift count depending on the encoding.
6129  case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri:case X86::SAR64ri:
6130  case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri:case X86::SHR64ri:
6131  return getTruncatedShiftCount(MI, 2) != 0;
6132 
6133  // Some left shift instructions can be turned into LEA instructions but only
6134  // if their flags aren't used. Avoid transforming such instructions.
6135  case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri:case X86::SHL64ri:{
6136  unsigned ShAmt = getTruncatedShiftCount(MI, 2);
6137  if (isTruncatedShiftCountForLEA(ShAmt)) return false;
6138  return ShAmt != 0;
6139  }
6140 
6141  case X86::SHRD16rri8:case X86::SHRD32rri8:case X86::SHRD64rri8:
6142  case X86::SHLD16rri8:case X86::SHLD32rri8:case X86::SHLD64rri8:
6143  return getTruncatedShiftCount(MI, 3) != 0;
6144 
6145  case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB32ri:
6146  case X86::SUB32ri8: case X86::SUB16ri: case X86::SUB16ri8:
6147  case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr:
6148  case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm:
6149  case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm:
6150  case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r:
6151  case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri:
6152  case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8:
6153  case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr:
6154  case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm:
6155  case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm:
6156  case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r:
6157  case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
6158  case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8:
6159  case X86::AND8ri: case X86::AND64rr: case X86::AND32rr:
6160  case X86::AND16rr: case X86::AND8rr: case X86::AND64rm:
6161  case X86::AND32rm: case X86::AND16rm: case X86::AND8rm:
6162  case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri:
6163  case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8:
6164  case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr:
6165  case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm:
6166  case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm:
6167  case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri:
6168  case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8:
6169  case X86::OR8ri: case X86::OR64rr: case X86::OR32rr:
6170  case X86::OR16rr: case X86::OR8rr: case X86::OR64rm:
6171  case X86::OR32rm: case X86::OR16rm: case X86::OR8rm:
6172  case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r:
6173  case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1:case X86::SAR64r1:
6174  case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1:case X86::SHR64r1:
6175  case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1:
6176  case X86::ADC32ri: case X86::ADC32ri8:
6177  case X86::ADC32rr: case X86::ADC64ri32:
6178  case X86::ADC64ri8: case X86::ADC64rr:
6179  case X86::SBB32ri: case X86::SBB32ri8:
6180  case X86::SBB32rr: case X86::SBB64ri32:
6181  case X86::SBB64ri8: case X86::SBB64rr:
6182  case X86::ANDN32rr: case X86::ANDN32rm:
6183  case X86::ANDN64rr: case X86::ANDN64rm:
6184  case X86::BEXTR32rr: case X86::BEXTR64rr:
6185  case X86::BEXTR32rm: case X86::BEXTR64rm:
6186  case X86::BLSI32rr: case X86::BLSI32rm:
6187  case X86::BLSI64rr: case X86::BLSI64rm:
6188  case X86::BLSMSK32rr:case X86::BLSMSK32rm:
6189  case X86::BLSMSK64rr:case X86::BLSMSK64rm:
6190  case X86::BLSR32rr: case X86::BLSR32rm:
6191  case X86::BLSR64rr: case X86::BLSR64rm:
6192  case X86::BZHI32rr: case X86::BZHI32rm:
6193  case X86::BZHI64rr: case X86::BZHI64rm:
6194  case X86::LZCNT16rr: case X86::LZCNT16rm:
6195  case X86::LZCNT32rr: case X86::LZCNT32rm:
6196  case X86::LZCNT64rr: case X86::LZCNT64rm:
6197  case X86::POPCNT16rr:case X86::POPCNT16rm:
6198  case X86::POPCNT32rr:case X86::POPCNT32rm:
6199  case X86::POPCNT64rr:case X86::POPCNT64rm:
6200  case X86::TZCNT16rr: case X86::TZCNT16rm:
6201  case X86::TZCNT32rr: case X86::TZCNT32rm:
6202  case X86::TZCNT64rr: case X86::TZCNT64rm:
6203  return true;
6204  }
6205 }
6206 
6207 /// Check whether the use can be converted to remove a comparison against zero.
6209  switch (MI.getOpcode()) {
6210  default: return X86::COND_INVALID;
6211  case X86::LZCNT16rr: case X86::LZCNT16rm:
6212  case X86::LZCNT32rr: case X86::LZCNT32rm:
6213  case X86::LZCNT64rr: case X86::LZCNT64rm:
6214  return X86::COND_B;
6215  case X86::POPCNT16rr:case X86::POPCNT16rm:
6216  case X86::POPCNT32rr:case X86::POPCNT32rm:
6217  case X86::POPCNT64rr:case X86::POPCNT64rm:
6218  return X86::COND_E;
6219  case X86::TZCNT16rr: case X86::TZCNT16rm:
6220  case X86::TZCNT32rr: case X86::TZCNT32rm:
6221  case X86::TZCNT64rr: case X86::TZCNT64rm:
6222  return X86::COND_B;
6223  }
6224 }
6225 
6226 /// Check if there exists an earlier instruction that
6227 /// operates on the same source operands and sets flags in the same way as
6228 /// Compare; remove Compare if possible.
6229 bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
6230  unsigned SrcReg2, int CmpMask,
6231  int CmpValue,
6232  const MachineRegisterInfo *MRI) const {
6233  // Check whether we can replace SUB with CMP.
6234  unsigned NewOpcode = 0;
6235  switch (CmpInstr.getOpcode()) {
6236  default: break;
6237  case X86::SUB64ri32:
6238  case X86::SUB64ri8:
6239  case X86::SUB32ri:
6240  case X86::SUB32ri8:
6241  case X86::SUB16ri:
6242  case X86::SUB16ri8:
6243  case X86::SUB8ri:
6244  case X86::SUB64rm:
6245  case X86::SUB32rm:
6246  case X86::SUB16rm:
6247  case X86::SUB8rm:
6248  case X86::SUB64rr:
6249  case X86::SUB32rr:
6250  case X86::SUB16rr:
6251  case X86::SUB8rr: {
6252  if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
6253  return false;
6254  // There is no use of the destination register, we can replace SUB with CMP.
6255  switch (CmpInstr.getOpcode()) {
6256  default: llvm_unreachable("Unreachable!");
6257  case X86::SUB64rm: NewOpcode = X86::CMP64rm; break;
6258  case X86::SUB32rm: NewOpcode = X86::CMP32rm; break;
6259  case X86::SUB16rm: NewOpcode = X86::CMP16rm; break;
6260  case X86::SUB8rm: NewOpcode = X86::CMP8rm; break;
6261  case X86::SUB64rr: NewOpcode = X86::CMP64rr; break;
6262  case X86::SUB32rr: NewOpcode = X86::CMP32rr; break;
6263  case X86::SUB16rr: NewOpcode = X86::CMP16rr; break;
6264  case X86::SUB8rr: NewOpcode = X86::CMP8rr; break;
6265  case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break;
6266  case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break;
6267  case X86::SUB32ri: NewOpcode = X86::CMP32ri; break;
6268  case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break;
6269  case X86::SUB16ri: NewOpcode = X86::CMP16ri; break;
6270  case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break;
6271  case X86::SUB8ri: NewOpcode = X86::CMP8ri; break;
6272  }
6273  CmpInstr.setDesc(get(NewOpcode));
6274  CmpInstr.RemoveOperand(0);
6275  // Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
6276  if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
6277  NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
6278  return false;
6279  }
6280  }
6281 
6282  // Get the unique definition of SrcReg.
6283  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
6284  if (!MI) return false;
6285 
6286  // CmpInstr is the first instruction of the BB.
6287  MachineBasicBlock::iterator I = CmpInstr, Def = MI;
6288 
6289  // If we are comparing against zero, check whether we can use MI to update
6290  // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize.
6291  bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0);
6292  if (IsCmpZero && MI->getParent() != CmpInstr.getParent())
6293  return false;
6294 
6295  // If we have a use of the source register between the def and our compare
6296  // instruction we can eliminate the compare iff the use sets EFLAGS in the
6297  // right way.
6298  bool ShouldUpdateCC = false;
6300  if (IsCmpZero && !isDefConvertible(*MI)) {
6301  // Scan forward from the use until we hit the use we're looking for or the
6302  // compare instruction.
6303  for (MachineBasicBlock::iterator J = MI;; ++J) {
6304  // Do we have a convertible instruction?
6305  NewCC = isUseDefConvertible(*J);
6306  if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() &&
6307  J->getOperand(1).getReg() == SrcReg) {
6308  assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!");
6309  ShouldUpdateCC = true; // Update CC later on.
6310  // This is not a def of SrcReg, but still a def of EFLAGS. Keep going
6311  // with the new def.
6312  Def = J;
6313  MI = &*Def;
6314  break;
6315  }
6316 
6317  if (J == I)
6318  return false;
6319  }
6320  }
6321 
6322  // We are searching for an earlier instruction that can make CmpInstr
6323  // redundant and that instruction will be saved in Sub.
6324  MachineInstr *Sub = nullptr;
6325  const TargetRegisterInfo *TRI = &getRegisterInfo();
6326 
6327  // We iterate backward, starting from the instruction before CmpInstr and
6328  // stop when reaching the definition of a source register or done with the BB.
6329  // RI points to the instruction before CmpInstr.
6330  // If the definition is in this basic block, RE points to the definition;
6331  // otherwise, RE is the rend of the basic block.
6333  RI = ++I.getReverse(),
6334  RE = CmpInstr.getParent() == MI->getParent()
6335  ? Def.getReverse() /* points to MI */
6336  : CmpInstr.getParent()->rend();
6337  MachineInstr *Movr0Inst = nullptr;
6338  for (; RI != RE; ++RI) {
6339  MachineInstr &Instr = *RI;
6340  // Check whether CmpInstr can be made redundant by the current instruction.
6341  if (!IsCmpZero &&
6342  isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, Instr)) {
6343  Sub = &Instr;
6344  break;
6345  }
6346 
6347  if (Instr.modifiesRegister(X86::EFLAGS, TRI) ||
6348  Instr.readsRegister(X86::EFLAGS, TRI)) {
6349  // This instruction modifies or uses EFLAGS.
6350 
6351  // MOV32r0 etc. are implemented with xor which clobbers condition code.
6352  // They are safe to move up, if the definition to EFLAGS is dead and
6353  // earlier instructions do not read or write EFLAGS.
6354  if (!Movr0Inst && Instr.getOpcode() == X86::MOV32r0 &&
6355  Instr.registerDefIsDead(X86::EFLAGS, TRI)) {
6356  Movr0Inst = &Instr;
6357  continue;
6358  }
6359 
6360  // We can't remove CmpInstr.
6361  return false;
6362  }
6363  }
6364 
6365  // Return false if no candidates exist.
6366  if (!IsCmpZero && !Sub)
6367  return false;
6368 
6369  bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
6370  Sub->getOperand(2).getReg() == SrcReg);
6371 
6372  // Scan forward from the instruction after CmpInstr for uses of EFLAGS.
6373  // It is safe to remove CmpInstr if EFLAGS is redefined or killed.
6374  // If we are done with the basic block, we need to check whether EFLAGS is
6375  // live-out.
6376  bool IsSafe = false;
6377  SmallVector<std::pair<MachineInstr*, unsigned /*NewOpc*/>, 4> OpsToUpdate;
6378  MachineBasicBlock::iterator E = CmpInstr.getParent()->end();
6379  for (++I; I != E; ++I) {
6380  const MachineInstr &Instr = *I;
6381  bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
6382  bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
6383  // We should check the usage if this instruction uses and updates EFLAGS.
6384  if (!UseEFLAGS && ModifyEFLAGS) {
6385  // It is safe to remove CmpInstr if EFLAGS is updated again.
6386  IsSafe = true;
6387  break;
6388  }
6389  if (!UseEFLAGS && !ModifyEFLAGS)
6390  continue;
6391 
6392  // EFLAGS is used by this instruction.
6394  bool OpcIsSET = false;
6395  if (IsCmpZero || IsSwapped) {
6396  // We decode the condition code from opcode.
6397  if (Instr.isBranch())
6398  OldCC = getCondFromBranchOpc(Instr.getOpcode());
6399  else {
6400  OldCC = getCondFromSETOpc(Instr.getOpcode());
6401  if (OldCC != X86::COND_INVALID)
6402  OpcIsSET = true;
6403  else
6404  OldCC = X86::getCondFromCMovOpc(Instr.getOpcode());
6405  }
6406  if (OldCC == X86::COND_INVALID) return false;
6407  }
6408  if (IsCmpZero) {
6409  switch (OldCC) {
6410  default: break;
6411  case X86::COND_A: case X86::COND_AE:
6412  case X86::COND_B: case X86::COND_BE:
6413  case X86::COND_G: case X86::COND_GE:
6414  case X86::COND_L: case X86::COND_LE:
6415  case X86::COND_O: case X86::COND_NO:
6416  // CF and OF are used, we can't perform this optimization.
6417  return false;
6418  }
6419 
6420  // If we're updating the condition code check if we have to reverse the
6421  // condition.
6422  if (ShouldUpdateCC)
6423  switch (OldCC) {
6424  default:
6425  return false;
6426  case X86::COND_E:
6427  break;
6428  case X86::COND_NE:
6429  NewCC = GetOppositeBranchCondition(NewCC);
6430  break;
6431  }
6432  } else if (IsSwapped) {
6433  // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
6434  // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
6435  // We swap the condition code and synthesize the new opcode.
6436  NewCC = getSwappedCondition(OldCC);
6437  if (NewCC == X86::COND_INVALID) return false;
6438  }
6439 
6440  if ((ShouldUpdateCC || IsSwapped) && NewCC != OldCC) {
6441  // Synthesize the new opcode.
6442  bool HasMemoryOperand = Instr.hasOneMemOperand();
6443  unsigned NewOpc;
6444  if (Instr.isBranch())
6445  NewOpc = GetCondBranchFromCond(NewCC);
6446  else if(OpcIsSET)
6447  NewOpc = getSETFromCond(NewCC, HasMemoryOperand);
6448  else {
6449  unsigned DstReg = Instr.getOperand(0).getReg();
6450  NewOpc = getCMovFromCond(NewCC, MRI->getRegClass(DstReg)->getSize(),
6451  HasMemoryOperand);
6452  }
6453 
6454  // Push the MachineInstr to OpsToUpdate.
6455  // If it is safe to remove CmpInstr, the condition code of these
6456  // instructions will be modified.
6457  OpsToUpdate.push_back(std::make_pair(&*I, NewOpc));
6458  }
6459  if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
6460  // It is safe to remove CmpInstr if EFLAGS is updated again or killed.
6461  IsSafe = true;
6462  break;
6463  }
6464  }
6465 
6466  // If EFLAGS is not killed nor re-defined, we should check whether it is
6467  // live-out. If it is live-out, do not optimize.
6468  if ((IsCmpZero || IsSwapped) && !IsSafe) {
6469  MachineBasicBlock *MBB = CmpInstr.getParent();
6470  for (MachineBasicBlock *Successor : MBB->successors())
6471  if (Successor->isLiveIn(X86::EFLAGS))
6472  return false;
6473  }
6474 
6475  // The instruction to be updated is either Sub or MI.
6476  Sub = IsCmpZero ? MI : Sub;
6477  // Move Movr0Inst to the appropriate place before Sub.
6478  if (Movr0Inst) {
6479  // Look backwards until we find a def that doesn't use the current EFLAGS.
6480  Def = Sub;
6482  InsertE = Sub->getParent()->rend();
6483  for (; InsertI != InsertE; ++InsertI) {
6484  MachineInstr *Instr = &*InsertI;
6485  if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
6486  Instr->modifiesRegister(X86::EFLAGS, TRI)) {
6487  Sub->getParent()->remove(Movr0Inst);
6488  Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
6489  Movr0Inst);
6490  break;
6491  }
6492  }
6493  if (InsertI == InsertE)
6494  return false;
6495  }
6496 
6497  // Make sure Sub instruction defines EFLAGS and mark the def live.
6498  unsigned i = 0, e = Sub->getNumOperands();
6499  for (; i != e; ++i) {
6500  MachineOperand &MO = Sub->getOperand(i);
6501  if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) {
6502  MO.setIsDead(false);
6503  break;
6504  }
6505  }
6506  assert(i != e && "Unable to locate a def EFLAGS operand");
6507 
6508  CmpInstr.eraseFromParent();
6509 
6510  // Modify the condition code of instructions in OpsToUpdate.
6511  for (auto &Op : OpsToUpdate)
6512  Op.first->setDesc(get(Op.second));
6513  return true;
6514 }
6515 
6516 /// Try to remove the load by folding it to a register
6517 /// operand at the use. We fold the load instructions if load defines a virtual
6518 /// register, the virtual register is used once in the same BB, and the
6519 /// instructions in-between do not load or store, and have no side effects.
6521  const MachineRegisterInfo *MRI,
6522  unsigned &FoldAsLoadDefReg,
6523  MachineInstr *&DefMI) const {
6524  // Check whether we can move DefMI here.
6525  DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
6526  assert(DefMI);
6527  bool SawStore = false;
6528  if (!DefMI->isSafeToMove(nullptr, SawStore))
6529  return nullptr;
6530 
6531  // Collect information about virtual register operands of MI.
6532  SmallVector<unsigned, 1> SrcOperandIds;
6533  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
6534  MachineOperand &MO = MI.getOperand(i);
6535  if (!MO.isReg())
6536  continue;
6537  unsigned Reg = MO.getReg();
6538  if (Reg != FoldAsLoadDefReg)
6539  continue;
6540  // Do not fold if we have a subreg use or a def.
6541  if (MO.getSubReg() || MO.isDef())
6542  return nullptr;
6543  SrcOperandIds.push_back(i);
6544  }
6545  if (SrcOperandIds.empty())
6546  return nullptr;
6547 
6548  // Check whether we can fold the def into SrcOperandId.
6549  if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandIds, *DefMI)) {
6550  FoldAsLoadDefReg = 0;
6551  return FoldMI;
6552  }
6553 
6554  return nullptr;
6555 }
6556 
6557 /// Expand a single-def pseudo instruction to a two-addr
6558 /// instruction with two undef reads of the register being defined.
6559 /// This is used for mapping:
6560 /// %xmm4 = V_SET0
6561 /// to:
6562 /// %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef>
6563 ///
6565  const MCInstrDesc &Desc) {
6566  assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
6567  unsigned Reg = MIB->getOperand(0).getReg();
6568  MIB->setDesc(Desc);
6569 
6570  // MachineInstr::addOperand() will insert explicit operands before any
6571  // implicit operands.
6572  MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
6573  // But we don't trust that.
6574  assert(MIB->getOperand(1).getReg() == Reg &&
6575  MIB->getOperand(2).getReg() == Reg && "Misplaced operand");
6576  return true;
6577 }
6578 
6579 /// Expand a single-def pseudo instruction to a two-addr
6580 /// instruction with two %k0 reads.
6581 /// This is used for mapping:
6582 /// %k4 = K_SET1
6583 /// to:
6584 /// %k4 = KXNORrr %k0, %k0
6586  const MCInstrDesc &Desc, unsigned Reg) {
6587  assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
6588  MIB->setDesc(Desc);
6589  MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
6590  return true;
6591 }
6592 
6594  bool MinusOne) {
6595  MachineBasicBlock &MBB = *MIB->getParent();
6596  DebugLoc DL = MIB->getDebugLoc();
6597  unsigned Reg = MIB->getOperand(0).getReg();
6598 
6599  // Insert the XOR.
6600  BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg)
6601  .addReg(Reg, RegState::Undef)
6602  .addReg(Reg, RegState::Undef);
6603 
6604  // Turn the pseudo into an INC or DEC.
6605  MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r));
6606  MIB.addReg(Reg);
6607 
6608  return true;
6609 }
6610 
6612  const TargetInstrInfo &TII,
6613  const X86Subtarget &Subtarget) {
6614  MachineBasicBlock &MBB = *MIB->getParent();
6615  DebugLoc DL = MIB->getDebugLoc();
6616  int64_t Imm = MIB->getOperand(1).getImm();
6617  assert(Imm != 0 && "Using push/pop for 0 is not efficient.");
6619 
6620  int StackAdjustment;
6621 
6622  if (Subtarget.is64Bit()) {
6623  assert(MIB->getOpcode() == X86::MOV64ImmSExti8 ||
6624  MIB->getOpcode() == X86::MOV32ImmSExti8);
6625 
6626  // Can't use push/pop lowering if the function might write to the red zone.
6627  X86MachineFunctionInfo *X86FI =
6629  if (X86FI->getUsesRedZone()) {
6630  MIB->setDesc(TII.get(MIB->getOpcode() ==
6631  X86::MOV32ImmSExti8 ? X86::MOV32ri : X86::MOV64ri));
6632  return true;
6633  }
6634 
6635  // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and
6636  // widen the register if necessary.
6637  StackAdjustment = 8;
6638  BuildMI(MBB, I, DL, TII.get(X86::PUSH64i8)).addImm(Imm);
6639  MIB->setDesc(TII.get(X86::POP64r));
6640  MIB->getOperand(0)
6642  } else {
6643  assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
6644  StackAdjustment = 4;
6645  BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm);
6646  MIB->setDesc(TII.get(X86::POP32r));
6647  }
6648 
6649  // Build CFI if necessary.
6650  MachineFunction &MF = *MBB.getParent();
6651  const X86FrameLowering *TFL = Subtarget.getFrameLowering();
6652  bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
6653  bool NeedsDwarfCFI =
6654  !IsWin64Prologue &&
6656  bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
6657  if (EmitCFI) {
6658  TFL->BuildCFI(MBB, I, DL,
6659  MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
6660  TFL->BuildCFI(MBB, std::next(I), DL,
6661  MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment));
6662  }
6663 
6664  return true;
6665 }
6666 
6667 // LoadStackGuard has so far only been implemented for 64-bit MachO. Different
6668 // code sequence is needed for other targets.
6670  const TargetInstrInfo &TII) {
6671  MachineBasicBlock &MBB = *MIB->getParent();
6672  DebugLoc DL = MIB->getDebugLoc();
6673  unsigned Reg = MIB->getOperand(0).getReg();
6674  const GlobalValue *GV =
6675  cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
6676  auto Flags = MachineMemOperand::MOLoad |
6680  MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 8, 8);
6682 
6683  BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1)
6685  .addMemOperand(MMO);
6686  MIB->setDebugLoc(DL);
6687  MIB->setDesc(TII.get(X86::MOV64rm));
6688  MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0);
6689 }
6690 
6691 // This is used to handle spills for 128/256-bit registers when we have AVX512,
6692 // but not VLX. If it uses an extended register we need to use an instruction
6693 // that loads the lower 128/256-bit, but is available with only AVX512F.
6695  const TargetRegisterInfo *TRI,
6696  const MCInstrDesc &LoadDesc,
6697  const MCInstrDesc &BroadcastDesc,
6698  unsigned SubIdx) {
6699  unsigned DestReg = MIB->getOperand(0).getReg();
6700  // Check if DestReg is XMM16-31 or YMM16-31.
6701  if (TRI->getEncodingValue(DestReg) < 16) {
6702  // We can use a normal VEX encoded load.
6703  MIB->setDesc(LoadDesc);
6704  } else {
6705  // Use a 128/256-bit VBROADCAST instruction.
6706  MIB->setDesc(BroadcastDesc);
6707  // Change the destination to a 512-bit register.
6708  DestReg = TRI->getMatchingSuperReg(DestReg, SubIdx, &X86::VR512RegClass);
6709  MIB->getOperand(0).setReg(DestReg);
6710  }
6711  return true;
6712 }
6713 
6714 // This is used to handle spills for 128/256-bit registers when we have AVX512,
6715 // but not VLX. If it uses an extended register we need to use an instruction
6716 // that stores the lower 128/256-bit, but is available with only AVX512F.
6718  const TargetRegisterInfo *TRI,
6719  const MCInstrDesc &StoreDesc,
6720  const MCInstrDesc &ExtractDesc,
6721  unsigned SubIdx) {
6722  unsigned SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg();
6723  // Check if DestReg is XMM16-31 or YMM16-31.
6724  if (TRI->getEncodingValue(SrcReg) < 16) {
6725  // We can use a normal VEX encoded store.
6726  MIB->setDesc(StoreDesc);
6727  } else {
6728  // Use a VEXTRACTF instruction.
6729  MIB->setDesc(ExtractDesc);
6730  // Change the destination to a 512-bit register.
6731  SrcReg = TRI->getMatchingSuperReg(SrcReg, SubIdx, &X86::VR512RegClass);
6732  MIB->getOperand(X86::AddrNumOperands).setReg(SrcReg);
6733  MIB.addImm(0x0); // Append immediate to extract from the lower bits.
6734  }
6735 
6736  return true;
6737 }
6739  bool HasAVX = Subtarget.hasAVX();
6740  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
6741  switch (MI.getOpcode()) {
6742  case X86::MOV32r0:
6743  return Expand2AddrUndef(MIB, get(X86::XOR32rr));
6744  case X86::MOV32r1:
6745  return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
6746  case X86::MOV32r_1:
6747  return expandMOV32r1(MIB, *this, /*MinusOne=*/ true);
6748  case X86::MOV32ImmSExti8:
6749  case X86::MOV64ImmSExti8:
6750  return ExpandMOVImmSExti8(MIB, *this, Subtarget);
6751  case X86::SETB_C8r:
6752  return Expand2AddrUndef(MIB, get(X86::SBB8rr));
6753  case X86::SETB_C16r:
6754  return Expand2AddrUndef(MIB, get(X86::SBB16rr));
6755  case X86::SETB_C32r:
6756  return Expand2AddrUndef(MIB, get(X86::SBB32rr));
6757  case X86::SETB_C64r:
6758  return Expand2AddrUndef(MIB, get(X86::SBB64rr));
6759  case X86::V_SET0:
6760  case X86::FsFLD0SS:
6761  case X86::FsFLD0SD:
6762  return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
6763  case X86::AVX_SET0:
6764  assert(HasAVX && "AVX not supported");
6765  return Expand2AddrUndef(MIB, get(X86::VXORPSYrr));
6766  case X86::AVX512_128_SET0:
6767  return Expand2AddrUndef(MIB, get(X86::VPXORDZ128rr));
6768  case X86::AVX512_256_SET0:
6769  return Expand2AddrUndef(MIB, get(X86::VPXORDZ256rr));
6770  case X86::AVX512_512_SET0:
6771  return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
6772  case X86::AVX512_FsFLD0SS:
6773  case X86::AVX512_FsFLD0SD:
6774  return Expand2AddrUndef(MIB, get(X86::VXORPSZ128rr));
6775  case X86::V_SETALLONES:
6776  return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
6777  case X86::AVX2_SETALLONES:
6778  return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
6779  case X86::AVX512_512_SETALLONES: {
6780  unsigned Reg = MIB->getOperand(0).getReg();
6781  MIB->setDesc(get(X86::VPTERNLOGDZrri));
6782  // VPTERNLOGD needs 3 register inputs and an immediate.
6783  // 0xff will return 1s for any input.
6785  .addReg(Reg, RegState::Undef).addImm(0xff);
6786  return true;
6787  }
6788  case X86::AVX512_512_SEXT_MASK_32:
6789  case X86::AVX512_512_SEXT_MASK_64: {
6790  unsigned Reg = MIB->getOperand(0).getReg();
6791  unsigned MaskReg = MIB->getOperand(1).getReg();
6792  unsigned MaskState = getRegState(MIB->getOperand(1));
6793  unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ?
6794  X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz;
6795  MI.RemoveOperand(1);
6796  MIB->setDesc(get(Opc));
6797  // VPTERNLOG needs 3 register inputs and an immediate.
6798  // 0xff will return 1s for any input.
6799  MIB.addReg(Reg, RegState::Undef).addReg(MaskReg, MaskState)
6800  .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xff);
6801  return true;
6802  }
6803  case X86::VMOVAPSZ128rm_NOVLX:
6804  return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm),
6805  get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
6806  case X86::VMOVUPSZ128rm_NOVLX:
6807  return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSrm),
6808  get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
6809  case X86::VMOVAPSZ256rm_NOVLX:
6810  return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSYrm),
6811  get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
6812  case X86::VMOVUPSZ256rm_NOVLX:
6813  return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSYrm),
6814  get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
6815  case X86::VMOVAPSZ128mr_NOVLX:
6816  return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSmr),
6817  get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
6818  case X86::VMOVUPSZ128mr_NOVLX:
6819  return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSmr),
6820  get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
6821  case X86::VMOVAPSZ256mr_NOVLX:
6822  return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSYmr),
6823  get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
6824  case X86::VMOVUPSZ256mr_NOVLX:
6825  return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
6826  get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
6827  case X86::TEST8ri_NOREX:
6828  MI.setDesc(get(X86::TEST8ri));
6829  return true;
6830  case X86::MOV32ri64:
6831  MI.setDesc(get(X86::MOV32ri));
6832  return true;
6833 
6834  // KNL does not recognize dependency-breaking idioms for mask registers,
6835  // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
6836  // Using %k0 as the undef input register is a performance heuristic based
6837  // on the assumption that %k0 is used less frequently than the other mask
6838  // registers, since it is not usable as a write mask.
6839  // FIXME: A more advanced approach would be to choose the best input mask
6840  // register based on context.
6841  case X86::KSET0B:
6842  case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0);
6843  case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0);
6844  case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0);
6845  case X86::KSET1B:
6846  case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0);
6847  case X86::KSET1D: return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0);
6848  case X86::KSET1Q: return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0);
6849  case TargetOpcode::LOAD_STACK_GUARD:
6850  expandLoadStackGuard(MIB, *this);
6851  return true;
6852  }
6853  return false;
6854 }
6855 
6857  int PtrOffset = 0) {
6858  unsigned NumAddrOps = MOs.size();
6859 
6860  if (NumAddrOps < 4) {
6861  // FrameIndex only - add an immediate offset (whether its zero or not).
6862  for (unsigned i = 0; i != NumAddrOps; ++i)
6863  MIB.addOperand(MOs[i]);
6864  addOffset(MIB, PtrOffset);
6865  } else {
6866  // General Memory Addressing - we need to add any offset to an existing
6867  // offset.
6868  assert(MOs.size() == 5 && "Unexpected memory operand list length");
6869  for (unsigned i = 0; i != NumAddrOps; ++i) {
6870  const MachineOperand &MO = MOs[i];
6871  if (i == 3 && PtrOffset != 0) {
6872  MIB.addDisp(MO, PtrOffset);
6873  } else {
6874  MIB.addOperand(MO);
6875  }
6876  }
6877  }
6878 }
6879 
6880 static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
6882  MachineBasicBlock::iterator InsertPt,
6883  MachineInstr &MI,
6884  const TargetInstrInfo &TII) {
6885  // Create the base instruction with the memory operand as the first part.
6886  // Omit the implicit operands, something BuildMI can't do.
6887  MachineInstr *NewMI =
6888  MF.CreateMachineInstr(TII.get(Opcode), MI.getDebugLoc(), true);
6889  MachineInstrBuilder MIB(MF, NewMI);
6890  addOperands(MIB, MOs);
6891 
6892  // Loop over the rest of the ri operands, converting them over.
6893  unsigned NumOps = MI.getDesc().getNumOperands() - 2;
6894  for (unsigned i = 0; i != NumOps; ++i) {
6895  MachineOperand &MO = MI.getOperand(i + 2);
6896  MIB.addOperand(MO);
6897  }
6898  for (unsigned i = NumOps + 2, e = MI.getNumOperands(); i != e; ++i) {
6899  MachineOperand &MO = MI.getOperand(i);
6900  MIB.addOperand(MO);
6901  }
6902 
6903  MachineBasicBlock *MBB = InsertPt->getParent();
6904  MBB->insert(InsertPt, NewMI);
6905 
6906  return MIB;
6907 }
6908 
6909 static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
6910  unsigned OpNo, ArrayRef<MachineOperand> MOs,
6911  MachineBasicBlock::iterator InsertPt,
6912  MachineInstr &MI, const TargetInstrInfo &TII,
6913  int PtrOffset = 0) {
6914  // Omit the implicit operands, something BuildMI can't do.
6915  MachineInstr *NewMI =
6916  MF.CreateMachineInstr(TII.get(Opcode), MI.getDebugLoc(), true);
6917  MachineInstrBuilder MIB(MF, NewMI);
6918 
6919  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
6920  MachineOperand &MO = MI.getOperand(i);
6921  if (i == OpNo) {
6922  assert(MO.isReg() && "Expected to fold into reg operand!");
6923  addOperands(MIB, MOs, PtrOffset);
6924  } else {
6925  MIB.addOperand(MO);
6926  }
6927  }
6928 
6929  MachineBasicBlock *MBB = InsertPt->getParent();
6930  MBB->insert(InsertPt, NewMI);
6931 
6932  return MIB;
6933 }
6934 
6935 static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
6937  MachineBasicBlock::iterator InsertPt,
6938  MachineInstr &MI) {
6939  MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
6940  MI.getDebugLoc(), TII.get(Opcode));
6941  addOperands(MIB, MOs);
6942  return MIB.addImm(0);
6943 }
6944 
6945 MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
6946  MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
6948  unsigned Size, unsigned Align) const {
6949  switch (MI.getOpcode()) {
6950  case X86::INSERTPSrr:
6951  case X86::VINSERTPSrr:
6952  case X86::VINSERTPSZrr:
6953  // Attempt to convert the load of inserted vector into a fold load
6954  // of a single float.
6955  if (OpNum == 2) {
6956  unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm();
6957  unsigned ZMask = Imm & 15;
6958  unsigned DstIdx = (Imm >> 4) & 3;
6959  unsigned SrcIdx = (Imm >> 6) & 3;
6960 
6961  unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize();
6962  if (Size <= RCSize && 4 <= Align) {
6963  int PtrOffset = SrcIdx * 4;
6964  unsigned NewImm = (DstIdx << 4) | ZMask;
6965  unsigned NewOpCode =
6966  (MI.getOpcode() == X86::VINSERTPSZrr) ? X86::VINSERTPSZrm :
6967  (MI.getOpcode() == X86::VINSERTPSrr) ? X86::VINSERTPSrm :
6968  X86::INSERTPSrm;
6969  MachineInstr *NewMI =
6970  FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, PtrOffset);
6971  NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm);
6972  return NewMI;
6973  }
6974  }
6975  break;
6976  case X86::MOVHLPSrr:
6977  case X86::VMOVHLPSrr:
6978  case X86::VMOVHLPSZrr:
6979  // Move the upper 64-bits of the second operand to the lower 64-bits.
6980  // To fold the load, adjust the pointer to the upper and use (V)MOVLPS.
6981  // TODO: In most cases AVX doesn't have a 8-byte alignment requirement.
6982  if (OpNum == 2) {
6983  unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize();
6984  if (Size <= RCSize && 8 <= Align) {
6985  unsigned NewOpCode =
6986  (MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm :
6987  (MI.getOpcode() == X86::VMOVHLPSrr) ? X86::VMOVLPSrm :
6988  X86::MOVLPSrm;
6989  MachineInstr *NewMI =
6990  FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, 8);
6991  return NewMI;
6992  }
6993  }
6994  break;
6995  };
6996 
6997  return nullptr;
6998 }
6999 
7001  MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
7003  unsigned Size, unsigned Align, bool AllowCommute) const {
7004  const DenseMap<unsigned,
7005  std::pair<uint16_t, uint16_t> > *OpcodeTablePtr = nullptr;
7006  bool isCallRegIndirect = Subtarget.callRegIndirect();
7007  bool isTwoAddrFold = false;
7008 
7009  // For CPUs that favor the register form of a call or push,
7010  // do not fold loads into calls or pushes, unless optimizing for size
7011  // aggressively.
7012  if (isCallRegIndirect && !MF.getFunction()->optForMinSize() &&
7013  (MI.getOpcode() == X86::CALL32r || MI.getOpcode() == X86::CALL64r ||
7014  MI.getOpcode() == X86::PUSH16r || MI.getOpcode() == X86::PUSH32r ||
7015  MI.getOpcode() == X86::PUSH64r))
7016  return nullptr;
7017 
7018  unsigned NumOps = MI.getDesc().getNumOperands();
7019  bool isTwoAddr =
7020  NumOps > 1 && MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
7021 
7022  // FIXME: AsmPrinter doesn't know how to handle
7023  // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
7024  if (MI.getOpcode() == X86::ADD32ri &&
7026  return nullptr;
7027 
7028  MachineInstr *NewMI = nullptr;
7029 
7030  // Attempt to fold any custom cases we have.
7031  if (MachineInstr *CustomMI =
7032  foldMemoryOperandCustom(MF, MI, OpNum, MOs, InsertPt, Size, Align))
7033  return CustomMI;
7034 
7035  // Folding a memory location into the two-address part of a two-address
7036  // instruction is different than folding it other places. It requires
7037  // replacing the *two* registers with the memory location.
7038  if (isTwoAddr && NumOps >= 2 && OpNum < 2 && MI.getOperand(0).isReg() &&
7039  MI.getOperand(1).isReg() &&
7040  MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
7041  OpcodeTablePtr = &RegOp2MemOpTable2Addr;
7042  isTwoAddrFold = true;
7043  } else if (OpNum == 0) {
7044  if (MI.getOpcode() == X86::MOV32r0) {
7045  NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, InsertPt, MI);
7046  if (NewMI)
7047  return NewMI;
7048  }
7049 
7050  OpcodeTablePtr = &RegOp2MemOpTable0;
7051  } else if (OpNum == 1) {
7052  OpcodeTablePtr = &RegOp2MemOpTable1;
7053  } else if (OpNum == 2) {
7054  OpcodeTablePtr = &RegOp2MemOpTable2;
7055  } else if (OpNum == 3) {
7056  OpcodeTablePtr = &RegOp2MemOpTable3;
7057  } else if (OpNum == 4) {
7058  OpcodeTablePtr = &RegOp2MemOpTable4;
7059  }
7060 
7061  // If table selected...
7062  if (OpcodeTablePtr) {
7063  // Find the Opcode to fuse
7064  auto I = OpcodeTablePtr->find(MI.getOpcode());
7065  if (I != OpcodeTablePtr->end()) {
7066  unsigned Opcode = I->second.first;
7067  unsigned MinAlign = (I->second.second & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT;
7068  if (Align < MinAlign)
7069  return nullptr;
7070  bool NarrowToMOV32rm = false;
7071  if (Size) {
7072  unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize();
7073  if (Size < RCSize) {
7074  // Check if it's safe to fold the load. If the size of the object is
7075  // narrower than the load width, then it's not.
7076  if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
7077  return nullptr;
7078  // If this is a 64-bit load, but the spill slot is 32, then we can do
7079  // a 32-bit load which is implicitly zero-extended. This likely is
7080  // due to live interval analysis remat'ing a load from stack slot.
7081  if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
7082  return nullptr;
7083  Opcode = X86::MOV32rm;
7084  NarrowToMOV32rm = true;
7085  }
7086  }
7087 
7088  if (isTwoAddrFold)
7089  NewMI = FuseTwoAddrInst(MF, Opcode, MOs, InsertPt, MI, *this);
7090  else
7091  NewMI = FuseInst(MF, Opcode, OpNum, MOs, InsertPt, MI, *this);
7092 
7093  if (NarrowToMOV32rm) {
7094  // If this is the special case where we use a MOV32rm to load a 32-bit
7095  // value and zero-extend the top bits. Change the destination register
7096  // to a 32-bit one.
7097  unsigned DstReg = NewMI->getOperand(0).getReg();
7099  NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit));
7100  else
7101  NewMI->getOperand(0).setSubReg(X86::sub_32bit);
7102  }
7103  return NewMI;
7104  }
7105  }
7106 
7107  // If the instruction and target operand are commutable, commute the
7108  // instruction and try again.
7109  if (AllowCommute) {
7110  unsigned CommuteOpIdx1 = OpNum, CommuteOpIdx2 = CommuteAnyOperandIndex;
7111  if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) {
7112  bool HasDef = MI.getDesc().getNumDefs();
7113  unsigned Reg0 = HasDef ? MI.getOperand(0).getReg() : 0;
7114  unsigned Reg1 = MI.getOperand(CommuteOpIdx1).getReg();
7115  unsigned Reg2 = MI.getOperand(CommuteOpIdx2).getReg();
7116  bool Tied1 =
7117  0 == MI.getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO);
7118  bool Tied2 =
7119  0 == MI.getDesc().getOperandConstraint(CommuteOpIdx2, MCOI::TIED_TO);
7120 
7121  // If either of the commutable operands are tied to the destination
7122  // then we can not commute + fold.
7123  if ((HasDef && Reg0 == Reg1 && Tied1) ||
7124  (HasDef && Reg0 == Reg2 && Tied2))
7125  return nullptr;
7126 
7127  MachineInstr *CommutedMI =
7128  commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2);
7129  if (!CommutedMI) {
7130  // Unable to commute.
7131  return nullptr;
7132  }
7133  if (CommutedMI != &MI) {
7134  // New instruction. We can't fold from this.
7135  CommutedMI->eraseFromParent();
7136  return nullptr;
7137  }
7138 
7139  // Attempt to fold with the commuted version of the instruction.
7140  NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt,
7141  Size, Align, /*AllowCommute=*/false);
7142  if (NewMI)
7143  return NewMI;
7144 
7145  // Folding failed again - undo the commute before returning.
7146  MachineInstr *UncommutedMI =
7147  commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2);
7148  if (!UncommutedMI) {
7149  // Unable to commute.
7150  return nullptr;
7151  }
7152  if (UncommutedMI != &MI) {
7153  // New instruction. It doesn't need to be kept.
7154  UncommutedMI->eraseFromParent();
7155  return nullptr;
7156  }
7157 
7158  // Return here to prevent duplicate fuse failure report.
7159  return nullptr;
7160  }
7161  }
7162 
7163  // No fusion
7164  if (PrintFailedFusing && !MI.isCopy())
7165  dbgs() << "We failed to fuse operand " << OpNum << " in " << MI;
7166  return nullptr;
7167 }
7168 
7169 /// Return true for all instructions that only update
7170 /// the first 32 or 64-bits of the destination register and leave the rest
7171 /// unmodified. This can be used to avoid folding loads if the instructions
7172 /// only update part of the destination register, and the non-updated part is
7173 /// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these
7174 /// instructions breaks the partial register dependency and it can improve
7175 /// performance. e.g.:
7176 ///
7177 /// movss (%rdi), %xmm0
7178 /// cvtss2sd %xmm0, %xmm0
7179 ///
7180 /// Instead of
7181 /// cvtss2sd (%rdi), %xmm0
7182 ///
7183 /// FIXME: This should be turned into a TSFlags.
7184 ///
7185 static bool hasPartialRegUpdate(unsigned Opcode) {
7186  switch (Opcode) {
7187  case X86::CVTSI2SSrr:
7188  case X86::CVTSI2SSrm:
7189  case X86::CVTSI2SS64rr:
7190  case X86::CVTSI2SS64rm:
7191  case X86::CVTSI2SDrr:
7192  case X86::CVTSI2SDrm:
7193  case X86::CVTSI2SD64rr:
7194  case X86::CVTSI2SD64rm:
7195  case X86::CVTSD2SSrr:
7196  case X86::CVTSD2SSrm:
7197  case X86::CVTSS2SDrr:
7198  case X86::CVTSS2SDrm:
7199  case X86::MOVHPDrm:
7200  case X86::MOVHPSrm:
7201  case X86::MOVLPDrm:
7202  case X86::MOVLPSrm:
7203  case X86::RCPSSr:
7204  case X86::RCPSSm:
7205  case X86::RCPSSr_Int:
7206  case X86::RCPSSm_Int:
7207  case X86::ROUNDSDr:
7208  case X86::ROUNDSDm:
7209  case X86::ROUNDSSr:
7210  case X86::ROUNDSSm:
7211  case X86::RSQRTSSr:
7212  case X86::RSQRTSSm:
7213  case X86::RSQRTSSr_Int:
7214  case X86::RSQRTSSm_Int:
7215  case X86::SQRTSSr:
7216  case X86::SQRTSSm:
7217  case X86::SQRTSSr_Int:
7218  case X86::SQRTSSm_Int:
7219  case X86::SQRTSDr:
7220  case X86::SQRTSDm:
7221  case X86::SQRTSDr_Int:
7222  case X86::SQRTSDm_Int:
7223  return true;
7224  }
7225 
7226  return false;
7227 }
7228 
7229 /// Inform the ExeDepsFix pass how many idle
7230 /// instructions we would like before a partial register update.
7232  const MachineInstr &MI, unsigned OpNum,
7233  const TargetRegisterInfo *TRI) const {
7234  if (OpNum != 0 || !hasPartialRegUpdate(MI.getOpcode()))
7235  return 0;
7236 
7237  // If MI is marked as reading Reg, the partial register update is wanted.
7238  const MachineOperand &MO = MI.getOperand(0);
7239  unsigned Reg = MO.getReg();
7241  if (MO.readsReg() || MI.readsVirtualRegister(Reg))
7242  return 0;
7243  } else {
7244  if (MI.readsRegister(Reg, TRI))
7245  return 0;
7246  }
7247 
7248  // If any instructions in the clearance range are reading Reg, insert a
7249  // dependency breaking instruction, which is inexpensive and is likely to
7250  // be hidden in other instruction's cycles.
7252 }
7253 
7254 // Return true for any instruction the copies the high bits of the first source
7255 // operand into the unused high bits of the destination operand.
7256 static bool hasUndefRegUpdate(unsigned Opcode) {
7257  switch (Opcode) {
7258  case X86::VCVTSI2SSrr:
7259  case X86::VCVTSI2SSrm:
7260  case X86::Int_VCVTSI2SSrr:
7261  case X86::Int_VCVTSI2SSrm:
7262  case X86::VCVTSI2SS64rr:
7263  case X86::VCVTSI2SS64rm:
7264  case X86::Int_VCVTSI2SS64rr:
7265  case X86::Int_VCVTSI2SS64rm:
7266  case X86::VCVTSI2SDrr:
7267  case X86::VCVTSI2SDrm:
7268  case X86::Int_VCVTSI2SDrr:
7269  case X86::Int_VCVTSI2SDrm:
7270  case X86::VCVTSI2SD64rr:
7271  case X86::VCVTSI2SD64rm:
7272  case X86::Int_VCVTSI2SD64rr:
7273  case X86::Int_VCVTSI2SD64rm:
7274  case X86::VCVTSD2SSrr:
7275  case X86::VCVTSD2SSrm:
7276  case X86::Int_VCVTSD2SSrr:
7277  case X86::Int_VCVTSD2SSrm:
7278  case X86::VCVTSS2SDrr:
7279  case X86::VCVTSS2SDrm:
7280  case X86::Int_VCVTSS2SDrr:
7281  case X86::Int_VCVTSS2SDrm:
7282  case X86::VRCPSSr:
7283  case X86::VRCPSSr_Int:
7284  case X86::VRCPSSm:
7285  case X86::VRCPSSm_Int:
7286  case X86::VROUNDSDr:
7287  case X86::VROUNDSDm:
7288  case X86::VROUNDSDr_Int:
7289  case X86::VROUNDSDm_Int:
7290  case X86::VROUNDSSr:
7291  case X86::VROUNDSSm:
7292  case X86::VROUNDSSr_Int:
7293  case X86::VROUNDSSm_Int:
7294  case X86::VRSQRTSSr:
7295  case X86::VRSQRTSSr_Int:
7296  case X86::VRSQRTSSm:
7297  case X86::VRSQRTSSm_Int:
7298  case X86::VSQRTSSr:
7299  case X86::VSQRTSSr_Int:
7300  case X86::VSQRTSSm:
7301  case X86::VSQRTSSm_Int:
7302  case X86::VSQRTSDr:
7303  case X86::VSQRTSDr_Int:
7304  case X86::VSQRTSDm:
7305  case X86::VSQRTSDm_Int:
7306  // AVX-512
7307  case X86::VCVTSI2SSZrr:
7308  case X86::VCVTSI2SSZrm:
7309  case X86::VCVTSI2SSZrr_Int:
7310  case X86::VCVTSI2SSZrrb_Int:
7311  case X86::VCVTSI2SSZrm_Int:
7312  case X86::VCVTSI642SSZrr:
7313  case X86::VCVTSI642SSZrm:
7314  case X86::VCVTSI642SSZrr_Int:
7315  case X86::VCVTSI642SSZrrb_Int:
7316  case X86::VCVTSI642SSZrm_Int:
7317  case X86::VCVTSI2SDZrr:
7318  case X86::VCVTSI2SDZrm:
7319  case X86::VCVTSI2SDZrr_Int:
7320  case X86::VCVTSI2SDZrrb_Int:
7321  case X86::VCVTSI2SDZrm_Int:
7322  case X86::VCVTSI642SDZrr:
7323  case X86::VCVTSI642SDZrm:
7324  case X86::VCVTSI642SDZrr_Int:
7325  case X86::VCVTSI642SDZrrb_Int:
7326  case X86::VCVTSI642SDZrm_Int:
7327  case X86::VCVTUSI2SSZrr:
7328  case X86::VCVTUSI2SSZrm:
7329  case X86::VCVTUSI2SSZrr_Int:
7330  case X86::VCVTUSI2SSZrrb_Int:
7331  case X86::VCVTUSI2SSZrm_Int:
7332  case X86::VCVTUSI642SSZrr:
7333  case X86::VCVTUSI642SSZrm:
7334  case X86::VCVTUSI642SSZrr_Int:
7335  case X86::VCVTUSI642SSZrrb_Int:
7336  case X86::VCVTUSI642SSZrm_Int:
7337  case X86::VCVTUSI2SDZrr:
7338  case X86::VCVTUSI2SDZrm:
7339  case X86::VCVTUSI2SDZrr_Int:
7340  case X86::VCVTUSI2SDZrm_Int:
7341  case X86::VCVTUSI642SDZrr:
7342  case X86::VCVTUSI642SDZrm:
7343  case X86::VCVTUSI642SDZrr_Int:
7344  case X86::VCVTUSI642SDZrrb_Int:
7345  case X86::VCVTUSI642SDZrm_Int:
7346  case X86::VCVTSD2SSZrr:
7347  case X86::VCVTSD2SSZrrb:
7348  case X86::VCVTSD2SSZrm:
7349  case X86::VCVTSS2SDZrr:
7350  case X86::VCVTSS2SDZrrb:
7351  case X86::VCVTSS2SDZrm:
7352  case X86::VRNDSCALESDr:
7353  case X86::VRNDSCALESDrb:
7354  case X86::VRNDSCALESDm:
7355  case X86::VRNDSCALESSr:
7356  case X86::VRNDSCALESSrb:
7357  case X86::VRNDSCALESSm:
7358  case X86::VRCP14SSrr:
7359  case X86::VRCP14SSrm:
7360  case X86::VRSQRT14SSrr:
7361  case X86::VRSQRT14SSrm:
7362  case X86::VSQRTSSZr:
7363  case X86::VSQRTSSZr_Int:
7364  case X86::VSQRTSSZrb_Int:
7365  case X86::VSQRTSSZm:
7366  case X86::VSQRTSSZm_Int:
7367  case X86::VSQRTSDZr:
7368  case X86::VSQRTSDZr_Int:
7369  case X86::VSQRTSDZrb_Int:
7370  case X86::VSQRTSDZm:
7371  case X86::VSQRTSDZm_Int:
7372  return true;
7373  }
7374 
7375  return false;
7376 }
7377 
7378 /// Inform the ExeDepsFix pass how many idle instructions we would like before
7379 /// certain undef register reads.
7380 ///
7381 /// This catches the VCVTSI2SD family of instructions:
7382 ///
7383 /// vcvtsi2sdq %rax, %xmm0<undef>, %xmm14
7384 ///
7385 /// We should to be careful *not* to catch VXOR idioms which are presumably
7386 /// handled specially in the pipeline:
7387 ///
7388 /// vxorps %xmm1<undef>, %xmm1<undef>, %xmm1
7389 ///
7390 /// Like getPartialRegUpdateClearance, this makes a strong assumption that the
7391 /// high bits that are passed-through are not live.
7392 unsigned
7394  const TargetRegisterInfo *TRI) const {
7395  if (!hasUndefRegUpdate(MI.getOpcode()))
7396  return 0;
7397 
7398  // Set the OpNum parameter to the first source operand.
7399  OpNum = 1;
7400 
7401  const MachineOperand &MO = MI.getOperand(OpNum);
7403  return UndefRegClearance;
7404  }
7405  return 0;
7406 }
7407 
7409  MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
7410  unsigned Reg = MI.getOperand(OpNum).getReg();
7411  // If MI kills this register, the false dependence is already broken.
7412  if (MI.killsRegister(Reg, TRI))
7413  return;
7414 
7415  if (X86::VR128RegClass.contains(Reg)) {
7416  // These instructions are all floating point domain, so xorps is the best
7417  // choice.
7418  unsigned Opc = Subtarget.hasAVX() ? X86::VXORPSrr : X86::XORPSrr;
7419  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(Opc), Reg)
7420  .addReg(Reg, RegState::Undef)
7421  .addReg(Reg, RegState::Undef);
7422  MI.addRegisterKilled(Reg, TRI, true);
7423  } else if (X86::VR256RegClass.contains(Reg)) {
7424  // Use vxorps to clear the full ymm register.
7425  // It wants to read and write the xmm sub-register.
7426  unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm);
7427  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VXORPSrr), XReg)
7428  .addReg(XReg, RegState::Undef)
7429  .addReg(XReg, RegState::Undef)
7431  MI.addRegisterKilled(Reg, TRI, true);
7432  }
7433 }
7434 
7435 MachineInstr *
7437  ArrayRef<unsigned> Ops,
7438  MachineBasicBlock::iterator InsertPt,
7439  int FrameIndex, LiveIntervals *LIS) const {
7440  // Check switch flag
7441  if (NoFusing)
7442  return nullptr;
7443 
7444  // Unless optimizing for size, don't fold to avoid partial
7445  // register update stalls
7446  if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
7447  return nullptr;
7448 
7449  // Don't fold subreg spills, or reloads that use a high subreg.
7450  for (auto Op : Ops) {
7451  MachineOperand &MO = MI.getOperand(Op);
7452  auto SubReg = MO.getSubReg();
7453  if (SubReg && (MO.isDef() || SubReg == X86::sub_8bit_hi))
7454  return nullptr;
7455  }
7456 
7457  const MachineFrameInfo &MFI = MF.getFrameInfo();
7458  unsigned Size = MFI.getObjectSize(FrameIndex);
7459  unsigned Alignment = MFI.getObjectAlignment(FrameIndex);
7460  // If the function stack isn't realigned we don't want to fold instructions
7461  // that need increased alignment.
7462  if (!RI.needsStackRealignment(MF))
7463  Alignment =
7464  std::min(Alignment, Subtarget.getFrameLowering()->getStackAlignment());
7465  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
7466  unsigned NewOpc = 0;
7467  unsigned RCSize = 0;
7468  switch (MI.getOpcode()) {
7469  default: return nullptr;
7470  case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
7471  case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
7472  case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
7473  case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
7474  }
7475  // Check if it's safe to fold the load. If the size of the object is
7476  // narrower than the load width, then it's not.
7477  if (Size < RCSize)
7478  return nullptr;
7479  // Change to CMPXXri r, 0 first.
7480  MI.setDesc(get(NewOpc));
7481  MI.getOperand(1).ChangeToImmediate(0);
7482  } else if (Ops.size() != 1)
7483  return nullptr;
7484 
7485  return foldMemoryOperandImpl(MF, MI, Ops[0],
7486  MachineOperand::CreateFI(FrameIndex), InsertPt,
7487  Size, Alignment, /*AllowCommute=*/true);
7488 }
7489 
7490 /// Check if \p LoadMI is a partial register load that we can't fold into \p MI
7491 /// because the latter uses contents that wouldn't be defined in the folded
7492 /// version. For instance, this transformation isn't legal:
7493 /// movss (%rdi), %xmm0
7494 /// addps %xmm0, %xmm0
7495 /// ->
7496 /// addps (%rdi), %xmm0
7497 ///
7498 /// But this one is:
7499 /// movss (%rdi), %xmm0
7500 /// addss %xmm0, %xmm0
7501 /// ->
7502 /// addss (%rdi), %xmm0
7503 ///
7505  const MachineInstr &UserMI,
7506  const MachineFunction &MF) {
7507  unsigned Opc = LoadMI.getOpcode();
7508  unsigned UserOpc = UserMI.getOpcode();
7509  unsigned RegSize =
7510  MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize();
7511 
7512  if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm) &&
7513  RegSize > 4) {
7514  // These instructions only load 32 bits, we can't fold them if the
7515  // destination register is wider than 32 bits (4 bytes), and its user
7516  // instruction isn't scalar (SS).
7517  switch (UserOpc) {
7518  case X86::ADDSSrr_Int: case X86::VADDSSrr_Int: case X86::VADDSSZrr_Int:
7519  case X86::Int_CMPSSrr: case X86::Int_VCMPSSrr: case X86::VCMPSSZrr_Int:
7520  case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int: case X86::VDIVSSZrr_Int:
7521  case X86::MAXSSrr_Int: case X86::VMAXSSrr_Int: case X86::VMAXSSZrr_Int:
7522  case X86::MINSSrr_Int: case X86::VMINSSrr_Int: case X86::VMINSSZrr_Int:
7523  case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::VMULSSZrr_Int:
7524  case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int:
7525  case X86::VFMADDSS4rr_Int: case X86::VFNMADDSS4rr_Int:
7526  case X86::VFMSUBSS4rr_Int: case X86::VFNMSUBSS4rr_Int:
7527  case X86::VFMADD132SSr_Int: case X86::VFNMADD132SSr_Int:
7528  case X86::VFMADD213SSr_Int: case X86::VFNMADD213SSr_Int:
7529  case X86::VFMADD231SSr_Int: case X86::VFNMADD231SSr_Int:
7530  case X86::VFMSUB132SSr_Int: case X86::VFNMSUB132SSr_Int:
7531  case X86::VFMSUB213SSr_Int: case X86::VFNMSUB213SSr_Int:
7532  case X86::VFMSUB231SSr_Int: case X86::VFNMSUB231SSr_Int:
7533  case X86::VFMADD132SSZr_Int: case X86::VFNMADD132SSZr_Int:
7534  case X86::VFMADD213SSZr_Int: case X86::VFNMADD213SSZr_Int:
7535  case X86::VFMADD231SSZr_Int: case X86::VFNMADD231SSZr_Int:
7536  case X86::VFMSUB132SSZr_Int: case X86::VFNMSUB132SSZr_Int:
7537  case X86::VFMSUB213SSZr_Int: case X86::VFNMSUB213SSZr_Int:
7538  case X86::VFMSUB231SSZr_Int: case X86::VFNMSUB231SSZr_Int:
7539  return false;
7540  default:
7541  return true;
7542  }
7543  }
7544 
7545  if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm) &&
7546  RegSize > 8) {
7547  // These instructions only load 64 bits, we can't fold them if the
7548  // destination register is wider than 64 bits (8 bytes), and its user
7549  // instruction isn't scalar (SD).
7550  switch (UserOpc) {
7551  case X86::ADDSDrr_Int: case X86::VADDSDrr_Int: case X86::VADDSDZrr_Int:
7552  case X86::Int_CMPSDrr: case X86::Int_VCMPSDrr: case X86::VCMPSDZrr_Int:
7553  case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int: case X86::VDIVSDZrr_Int:
7554  case X86::MAXSDrr_Int: case X86::VMAXSDrr_Int: case X86::VMAXSDZrr_Int:
7555  case X86::MINSDrr_Int: case X86::VMINSDrr_Int: case X86::VMINSDZrr_Int:
7556  case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::VMULSDZrr_Int:
7557  case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int:
7558  case X86::VFMADDSD4rr_Int: case X86::VFNMADDSD4rr_Int:
7559  case X86::VFMSUBSD4rr_Int: case X86::VFNMSUBSD4rr_Int:
7560  case X86::VFMADD132SDr_Int: case X86::VFNMADD132SDr_Int:
7561  case X86::VFMADD213SDr_Int: case X86::VFNMADD213SDr_Int:
7562  case X86::VFMADD231SDr_Int: case X86::VFNMADD231SDr_Int:
7563  case X86::VFMSUB132SDr_Int: case X86::VFNMSUB132SDr_Int:
7564  case X86::VFMSUB213SDr_Int: case X86::VFNMSUB213SDr_Int:
7565  case X86::VFMSUB231SDr_Int: case X86::VFNMSUB231SDr_Int:
7566  case X86::VFMADD132SDZr_Int: case X86::VFNMADD132SDZr_Int:
7567  case X86::VFMADD213SDZr_Int: case X86::VFNMADD213SDZr_Int:
7568  case X86::VFMADD231SDZr_Int: case X86::VFNMADD231SDZr_Int:
7569  case X86::VFMSUB132SDZr_Int: case X86::VFNMSUB132SDZr_Int:
7570  case X86::VFMSUB213SDZr_Int: case X86::VFNMSUB213SDZr_Int:
7571  case X86::VFMSUB231SDZr_Int: case X86::VFNMSUB231SDZr_Int:
7572  return false;
7573  default:
7574  return true;
7575  }
7576  }
7577 
7578  return false;
7579 }
7580 
7583  MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
7584  LiveIntervals *LIS) const {
7585 
7586  // TODO: Support the case where LoadMI loads a wide register, but MI
7587  // only uses a subreg.
7588  for (auto Op : Ops) {
7589  if (MI.getOperand(Op).getSubReg())
7590  return nullptr;
7591  }
7592 
7593  // If loading from a FrameIndex, fold directly from the FrameIndex.
7594  unsigned NumOps = LoadMI.getDesc().getNumOperands();
7595  int FrameIndex;
7596  if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
7597  if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF))
7598  return nullptr;
7599  return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex, LIS);
7600  }
7601 
7602  // Check switch flag
7603  if (NoFusing) return nullptr;
7604 
7605  // Avoid partial register update stalls unless optimizing for size.
7606  if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
7607  return nullptr;
7608 
7609  // Determine the alignment of the load.
7610  unsigned Alignment = 0;
7611  if (LoadMI.hasOneMemOperand())
7612  Alignment = (*LoadMI.memoperands_begin())->getAlignment();
7613  else
7614  switch (LoadMI.getOpcode()) {
7615  case X86::AVX512_512_SET0:
7616  case X86::AVX512_512_SETALLONES:
7617  Alignment = 64;
7618  break;
7619  case X86::AVX2_SETALLONES:
7620  case X86::AVX_SET0:
7621  case X86::AVX512_256_SET0:
7622  Alignment = 32;
7623  break;
7624  case X86::V_SET0:
7625  case X86::V_SETALLONES:
7626  case X86::AVX512_128_SET0:
7627  Alignment = 16;
7628  break;
7629  case X86::FsFLD0SD:
7630  case X86::AVX512_FsFLD0SD:
7631  Alignment = 8;
7632  break;
7633  case X86::FsFLD0SS:
7634  case X86::AVX512_FsFLD0SS:
7635  Alignment = 4;
7636  break;
7637  default:
7638  return nullptr;
7639  }
7640  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
7641  unsigned NewOpc = 0;
7642  switch (MI.getOpcode()) {
7643  default: return nullptr;
7644  case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
7645  case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
7646  case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
7647  case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
7648  }
7649  // Change to CMPXXri r, 0 first.
7650  MI.setDesc(get(NewOpc));
7651  MI.getOperand(1).ChangeToImmediate(0);
7652  } else if (Ops.size() != 1)
7653  return nullptr;
7654 
7655  // Make sure the subregisters match.
7656  // Otherwise we risk changing the size of the load.
7657  if (LoadMI.getOperand(0).getSubReg() != MI.getOperand(Ops[0]).getSubReg())
7658  return nullptr;
7659 
7661  switch (LoadMI.getOpcode()) {
7662  case X86::V_SET0:
7663  case X86::V_SETALLONES:
7664  case X86::AVX2_SETALLONES:
7665  case X86::AVX_SET0:
7666  case X86::AVX512_128_SET0:
7667  case X86::AVX512_256_SET0:
7668  case X86::AVX512_512_SET0:
7669  case X86::AVX512_512_SETALLONES:
7670  case X86::FsFLD0SD:
7671  case X86::AVX512_FsFLD0SD:
7672  case X86::FsFLD0SS:
7673  case X86::AVX512_FsFLD0SS: {
7674  // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
7675  // Create a constant-pool entry and operands to load from it.
7676 
7677  // Medium and large mode can't fold loads this way.
7678  if (MF.getTarget().getCodeModel() != CodeModel::Small &&
7680  return nullptr;
7681 
7682  // x86-32 PIC requires a PIC base register for constant pools.
7683  unsigned PICBase = 0;
7684  if (MF.getTarget().isPositionIndependent()) {
7685  if (Subtarget.is64Bit())
7686  PICBase = X86::RIP;
7687  else
7688  // FIXME: PICBase = getGlobalBaseReg(&MF);
7689  // This doesn't work for several reasons.
7690  // 1. GlobalBaseReg may have been spilled.
7691  // 2. It may not be live at MI.
7692  return nullptr;
7693  }
7694 
7695  // Create a constant-pool entry.
7696  MachineConstantPool &MCP = *MF.getConstantPool();
7697  Type *Ty;
7698  unsigned Opc = LoadMI.getOpcode();
7699  if (Opc == X86::FsFLD0SS || Opc == X86::AVX512_FsFLD0SS)
7700  Ty = Type::getFloatTy(MF.getFunction()->getContext());
7701  else if (Opc == X86::FsFLD0SD || Opc == X86::AVX512_FsFLD0SD)
7703  else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
7705  else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 ||
7706  Opc == X86::AVX512_256_SET0)
7708  else
7710 
7711  bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES ||
7712  Opc == X86::AVX512_512_SETALLONES);
7713  const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
7715  unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
7716 
7717  // Create operands to load from the constant pool entry.
7718  MOs.push_back(MachineOperand::CreateReg(PICBase, false));
7720  MOs.push_back(MachineOperand::CreateReg(0, false));
7721  MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
7722  MOs.push_back(MachineOperand::CreateReg(0, false));
7723  break;
7724  }
7725  default: {
7726  if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF))
7727  return nullptr;
7728 
7729  // Folding a normal load. Just copy the load's address operands.
7730  MOs.append(LoadMI.operands_begin() + NumOps - X86::AddrNumOperands,
7731  LoadMI.operands_begin() + NumOps);
7732  break;
7733  }
7734  }
7735  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, InsertPt,
7736  /*Size=*/0, Alignment, /*AllowCommute=*/true);
7737 }
7738 
7740  MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad,
7741  bool UnfoldStore, SmallVectorImpl<MachineInstr *> &NewMIs) const {
7742  auto I = MemOp2RegOpTable.find(MI.getOpcode());
7743  if (I == MemOp2RegOpTable.end())
7744  return false;
7745  unsigned Opc = I->second.first;
7746  unsigned Index = I->second.second & TB_INDEX_MASK;
7747  bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
7748  bool FoldedStore = I->second.second & TB_FOLDED_STORE;
7749  if (UnfoldLoad && !FoldedLoad)
7750  return false;
7751  UnfoldLoad &= FoldedLoad;
7752  if (UnfoldStore && !FoldedStore)
7753  return false;
7754  UnfoldStore &= FoldedStore;
7755 
7756  const MCInstrDesc &MCID = get(Opc);
7757  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
7758  // TODO: Check if 32-byte or greater accesses are slow too?
7759  if (!MI.hasOneMemOperand() && RC == &X86::VR128RegClass &&
7760  Subtarget.isUnalignedMem16Slow())
7761  // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
7762  // conservatively assume the address is unaligned. That's bad for
7763  // performance.
7764  return false;
7769  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
7770  MachineOperand &Op = MI.getOperand(i);
7771  if (i >= Index && i < Index + X86::AddrNumOperands)
7772  AddrOps.push_back(Op);
7773  else if (Op.isReg() && Op.isImplicit())
7774  ImpOps.push_back(Op);
7775  else if (i < Index)
7776  BeforeOps.push_back(Op);
7777  else if (i > Index)
7778  AfterOps.push_back(Op);
7779  }
7780 
7781  // Emit the load instruction.
7782  if (UnfoldLoad) {
7783  std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> MMOs =
7785  loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
7786  if (UnfoldStore) {
7787  // Address operands cannot be marked isKill.
7788  for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
7789  MachineOperand &MO = NewMIs[0]->getOperand(i);
7790  if (MO.isReg())
7791  MO.setIsKill(false);
7792  }
7793  }
7794  }
7795 
7796  // Emit the data processing instruction.
7797  MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI.getDebugLoc(), true);
7798  MachineInstrBuilder MIB(MF, DataMI);
7799 
7800  if (FoldedStore)
7801  MIB.addReg(Reg, RegState::Define);
7802  for (MachineOperand &BeforeOp : BeforeOps)
7803  MIB.addOperand(BeforeOp);
7804  if (FoldedLoad)
7805  MIB.addReg(Reg);
7806  for (MachineOperand &AfterOp : AfterOps)
7807  MIB.addOperand(AfterOp);
7808  for (MachineOperand &ImpOp : ImpOps) {
7809  MIB.addReg(ImpOp.getReg(),
7810  getDefRegState(ImpOp.isDef()) |
7812  getKillRegState(ImpOp.isKill()) |
7813  getDeadRegState(ImpOp.isDead()) |
7814  getUndefRegState(ImpOp.isUndef()));
7815  }
7816  // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
7817  switch (DataMI->getOpcode()) {
7818  default: break;
7819  case X86::CMP64ri32:
7820  case X86::CMP64ri8:
7821  case X86::CMP32ri:
7822  case X86::CMP32ri8:
7823  case X86::CMP16ri:
7824  case X86::CMP16ri8:
7825  case X86::CMP8ri: {
7826  MachineOperand &MO0 = DataMI->getOperand(0);
7827  MachineOperand &MO1 = DataMI->getOperand(1);
7828  if (MO1.getImm() == 0) {
7829  unsigned NewOpc;
7830  switch (DataMI->getOpcode()) {
7831  default: llvm_unreachable("Unreachable!");
7832  case X86::CMP64ri8:
7833  case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
7834  case X86::CMP32ri8:
7835  case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
7836  case X86::CMP16ri8:
7837  case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
7838  case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
7839  }
7840  DataMI->setDesc(get(NewOpc));
7841  MO1.ChangeToRegister(MO0.getReg(), false);
7842  }
7843  }
7844  }
7845  NewMIs.push_back(DataMI);
7846 
7847  // Emit the store instruction.
7848  if (UnfoldStore) {
7849  const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
7850  std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> MMOs =
7852  storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs);
7853  }
7854 
7855  return true;
7856 }
7857 
7858 bool
7860  SmallVectorImpl<SDNode*> &NewNodes) const {
7861  if (!N->isMachineOpcode())
7862  return false;
7863 
7864  auto I = MemOp2RegOpTable.find(N->getMachineOpcode());
7865  if (I == MemOp2RegOpTable.end())
7866  return false;
7867  unsigned Opc = I->second.first;
7868  unsigned Index = I->second.second & TB_INDEX_MASK;
7869  bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
7870  bool FoldedStore = I->second.second & TB_FOLDED_STORE;
7871  const MCInstrDesc &MCID = get(Opc);
7872  MachineFunction &MF = DAG.getMachineFunction();
7873  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
7874  unsigned NumDefs = MCID.NumDefs;
7875  std::vector<SDValue> AddrOps;
7876  std::vector<SDValue> BeforeOps;
7877  std::vector<SDValue> AfterOps;
7878  SDLoc dl(N);
7879  unsigned NumOps = N->getNumOperands();
7880  for (unsigned i = 0; i != NumOps-1; ++i) {
7881  SDValue Op = N->getOperand(i);
7882  if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands)
7883  AddrOps.push_back(Op);
7884  else if (i < Index-NumDefs)
7885  BeforeOps.push_back(Op);
7886  else if (i > Index-NumDefs)
7887  AfterOps.push_back(Op);
7888  }
7889  SDValue Chain = N->getOperand(NumOps-1);
7890  AddrOps.push_back(Chain);
7891 
7892  // Emit the load instruction.
7893  SDNode *Load = nullptr;
7894  if (FoldedLoad) {
7895  EVT VT = *RC->vt_begin();
7896  std::pair<MachineInstr::mmo_iterator,
7897  MachineInstr::mmo_iterator> MMOs =
7898  MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
7899  cast<MachineSDNode>(N)->memoperands_end());
7900  if (!(*MMOs.first) &&
7901  RC == &X86::VR128RegClass &&
7902  Subtarget.isUnalignedMem16Slow())
7903  // Do not introduce a slow unaligned load.
7904  return false;
7905  // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
7906  // memory access is slow above.
7907  unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
7908  bool isAligned = (*MMOs.first) &&
7909  (*MMOs.first)->getAlignment() >= Alignment;
7910  Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl,
7911  VT, MVT::Other, AddrOps);
7912  NewNodes.push_back(Load);
7913 
7914  // Preserve memory reference information.
7915  cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
7916  }
7917 
7918  // Emit the data processing instruction.
7919  std::vector<EVT> VTs;
7920  const TargetRegisterClass *DstRC = nullptr;
7921  if (MCID.getNumDefs() > 0) {
7922  DstRC = getRegClass(MCID, 0, &RI, MF);
7923  VTs.push_back(*DstRC->vt_begin());
7924  }
7925  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
7926  EVT VT = N->getValueType(i);
7927  if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs())
7928  VTs.push_back(VT);
7929  }
7930  if (Load)
7931  BeforeOps.push_back(SDValue(Load, 0));
7932  BeforeOps.insert(BeforeOps.end(), AfterOps.begin(), AfterOps.end());
7933  SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
7934  NewNodes.push_back(NewNode);
7935 
7936  // Emit the store instruction.
7937  if (FoldedStore) {
7938  AddrOps.pop_back();
7939  AddrOps.push_back(SDValue(NewNode, 0));
7940  AddrOps.push_back(Chain);
7941  std::pair<MachineInstr::mmo_iterator,
7942  MachineInstr::mmo_iterator> MMOs =
7943  MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
7944  cast<MachineSDNode>(N)->memoperands_end());
7945  if (!(*MMOs.first) &&
7946  RC == &X86::VR128RegClass &&
7947  Subtarget.isUnalignedMem16Slow())
7948  // Do not introduce a slow unaligned store.
7949  return false;
7950  // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
7951  // memory access is slow above.
7952  unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
7953  bool isAligned = (*MMOs.first) &&
7954  (*MMOs.first)->getAlignment() >= Alignment;
7955  SDNode *Store =
7956  DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget),
7957  dl, MVT::Other, AddrOps);
7958  NewNodes.push_back(Store);
7959 
7960  // Preserve memory reference information.
7961  cast<MachineSDNode>(Store)->setMemRefs(MMOs.first, MMOs.second);
7962  }
7963 
7964  return true;
7965 }
7966 
7968  bool UnfoldLoad, bool UnfoldStore,
7969  unsigned *LoadRegIndex) const {
7970  auto I = MemOp2RegOpTable.find(Opc);
7971  if (I == MemOp2RegOpTable.end())
7972  return 0;
7973  bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
7974  bool FoldedStore = I->second.second & TB_FOLDED_STORE;
7975  if (UnfoldLoad && !FoldedLoad)
7976  return 0;
7977  if (UnfoldStore && !FoldedStore)
7978  return 0;
7979  if (LoadRegIndex)
7980  *LoadRegIndex = I->second.second & TB_INDEX_MASK;
7981  return I->second.first;
7982 }
7983 
7984 bool
7986  int64_t &Offset1, int64_t &Offset2) const {
7987  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
7988  return false;
7989  unsigned Opc1 = Load1->getMachineOpcode();
7990  unsigned Opc2 = Load2->getMachineOpcode();
7991  switch (Opc1) {
7992  default: return false;
7993  case X86::MOV8rm:
7994  case X86::MOV16rm:
7995  case X86::MOV32rm:
7996  case X86::MOV64rm:
7997  case X86::LD_Fp32m:
7998  case X86::LD_Fp64m:
7999  case X86::LD_Fp80m:
8000  case X86::MOVSSrm:
8001  case X86::MOVSDrm:
8002  case X86::MMX_MOVD64rm:
8003  case X86::MMX_MOVQ64rm:
8004  case X86::MOVAPSrm:
8005  case X86::MOVUPSrm:
8006  case X86::MOVAPDrm:
8007  case X86::MOVUPDrm:
8008  case X86::MOVDQArm:
8009  case X86::MOVDQUrm:
8010  // AVX load instructions
8011  case X86::VMOVSSrm:
8012  case X86::VMOVSDrm:
8013  case X86::VMOVAPSrm:
8014  case X86::VMOVUPSrm:
8015  case X86::VMOVAPDrm:
8016  case X86::VMOVUPDrm:
8017  case X86::VMOVDQArm:
8018  case X86::VMOVDQUrm:
8019  case X86::VMOVAPSYrm:
8020  case X86::VMOVUPSYrm:
8021  case X86::VMOVAPDYrm:
8022  case X86::VMOVUPDYrm:
8023  case X86::VMOVDQAYrm:
8024  case X86::VMOVDQUYrm:
8025  // AVX512 load instructions
8026  case X86::VMOVSSZrm:
8027  case X86::VMOVSDZrm:
8028  case X86::VMOVAPSZ128rm:
8029  case X86::VMOVUPSZ128rm:
8030  case X86::VMOVAPSZ128rm_NOVLX:
8031  case X86::VMOVUPSZ128rm_NOVLX:
8032  case X86::VMOVAPDZ128rm:
8033  case X86::VMOVUPDZ128rm:
8034  case X86::VMOVDQU8Z128rm:
8035  case X86::VMOVDQU16Z128rm:
8036  case X86::VMOVDQA32Z128rm:
8037  case X86::VMOVDQU32Z128rm:
8038  case X86::VMOVDQA64Z128rm:
8039  case X86::VMOVDQU64Z128rm:
8040  case X86::VMOVAPSZ256rm:
8041  case X86::VMOVUPSZ256rm:
8042  case X86::VMOVAPSZ256rm_NOVLX:
8043  case X86::VMOVUPSZ256rm_NOVLX:
8044  case X86::VMOVAPDZ256rm:
8045  case X86::VMOVUPDZ256rm:
8046  case X86::VMOVDQU8Z256rm:
8047  case X86::VMOVDQU16Z256rm:
8048  case X86::VMOVDQA32Z256rm:
8049  case X86::VMOVDQU32Z256rm:
8050  case X86::VMOVDQA64Z256rm:
8051  case X86::VMOVDQU64Z256rm:
8052  case X86::VMOVAPSZrm:
8053  case X86::VMOVUPSZrm:
8054  case X86::VMOVAPDZrm:
8055  case X86::VMOVUPDZrm:
8056  case X86::VMOVDQU8Zrm:
8057  case X86::VMOVDQU16Zrm:
8058  case X86::VMOVDQA32Zrm:
8059  case X86::VMOVDQU32Zrm:
8060  case X86::VMOVDQA64Zrm:
8061  case X86::VMOVDQU64Zrm:
8062  case X86::KMOVBkm:
8063  case X86::KMOVWkm:
8064  case X86::KMOVDkm:
8065  case X86::KMOVQkm:
8066  break;
8067  }
8068  switch (Opc2) {
8069  default: return false;
8070  case X86::MOV8rm:
8071  case X86::MOV16rm:
8072  case X86::MOV32rm:
8073  case X86::MOV64rm:
8074  case X86::LD_Fp32m:
8075  case X86::LD_Fp64m:
8076  case X86::LD_Fp80m:
8077  case X86::MOVSSrm:
8078  case X86::MOVSDrm:
8079  case X86::MMX_MOVD64rm:
8080  case X86::MMX_MOVQ64rm:
8081  case X86::MOVAPSrm:
8082  case X86::MOVUPSrm:
8083  case X86::MOVAPDrm:
8084  case X86::MOVUPDrm:
8085  case X86::MOVDQArm:
8086  case X86::MOVDQUrm:
8087  // AVX load instructions
8088  case X86::VMOVSSrm:
8089  case X86::VMOVSDrm:
8090  case X86::VMOVAPSrm:
8091  case X86::VMOVUPSrm:
8092  case X86::VMOVAPDrm:
8093  case X86::VMOVUPDrm:
8094  case X86::VMOVDQArm:
8095  case X86::VMOVDQUrm:
8096  case X86::VMOVAPSYrm:
8097  case X86::VMOVUPSYrm:
8098  case X86::VMOVAPDYrm:
8099  case X86::VMOVUPDYrm:
8100  case X86::VMOVDQAYrm:
8101  case X86::VMOVDQUYrm:
8102  // AVX512 load instructions
8103  case X86::VMOVSSZrm:
8104  case X86::VMOVSDZrm:
8105  case X86::VMOVAPSZ128rm:
8106  case X86::VMOVUPSZ128rm:
8107  case X86::VMOVAPSZ128rm_NOVLX:
8108  case X86::VMOVUPSZ128rm_NOVLX:
8109  case X86::VMOVAPDZ128rm:
8110  case X86::VMOVUPDZ128rm:
8111  case X86::VMOVDQU8Z128rm:
8112  case X86::VMOVDQU16Z128rm:
8113  case X86::VMOVDQA32Z128rm:
8114  case X86::VMOVDQU32Z128rm:
8115  case X86::VMOVDQA64Z128rm:
8116  case X86::VMOVDQU64Z128rm:
8117  case X86::VMOVAPSZ256rm:
8118  case X86::VMOVUPSZ256rm:
8119  case X86::VMOVAPSZ256rm_NOVLX:
8120  case X86::VMOVUPSZ256rm_NOVLX:
8121  case X86::VMOVAPDZ256rm:
8122  case X86::VMOVUPDZ256rm:
8123  case X86::VMOVDQU8Z256rm:
8124  case X86::VMOVDQU16Z256rm:
8125  case X86::VMOVDQA32Z256rm:
8126  case X86::VMOVDQU32Z256rm:
8127  case X86::VMOVDQA64Z256rm:
8128  case X86::VMOVDQU64Z256rm:
8129  case X86::VMOVAPSZrm:
8130  case X86::VMOVUPSZrm:
8131  case X86::VMOVAPDZrm:
8132  case X86::VMOVUPDZrm:
8133  case X86::VMOVDQU8Zrm:
8134  case X86::VMOVDQU16Zrm:
8135  case X86::VMOVDQA32Zrm:
8136  case X86::VMOVDQU32Zrm:
8137  case X86::VMOVDQA64Zrm:
8138  case X86::VMOVDQU64Zrm:
8139  case X86::KMOVBkm:
8140  case X86::KMOVWkm:
8141  case X86::KMOVDkm:
8142  case X86::KMOVQkm:
8143  break;
8144  }
8145 
8146  // Check if chain operands and base addresses match.
8147  if (Load1->getOperand(0) != Load2->getOperand(0) ||
8148  Load1->getOperand(5) != Load2->getOperand(5))
8149  return false;
8150  // Segment operands should match as well.
8151  if (Load1->getOperand(4) != Load2->getOperand(4))
8152  return false;
8153  // Scale should be 1, Index should be Reg0.
8154  if (Load1->getOperand(1) == Load2->getOperand(1) &&
8155  Load1->getOperand(2) == Load2->getOperand(2)) {
8156  if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1)
8157  return false;
8158 
8159  // Now let's examine the displacements.
8160  if (isa<ConstantSDNode>(Load1->getOperand(3)) &&
8161  isa<ConstantSDNode>(Load2->getOperand(3))) {
8162  Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue();
8163  Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue();
8164  return true;
8165  }
8166  }
8167  return false;
8168 }
8169 
8171  int64_t Offset1, int64_t Offset2,
8172  unsigned NumLoads) const {
8173  assert(Offset2 > Offset1);
8174  if ((Offset2 - Offset1) / 8 > 64)
8175  return false;
8176 
8177  unsigned Opc1 = Load1->getMachineOpcode();
8178  unsigned Opc2 = Load2->getMachineOpcode();
8179  if (Opc1 != Opc2)
8180  return false; // FIXME: overly conservative?
8181 
8182  switch (Opc1) {
8183  default: break;
8184  case X86::LD_Fp32m:
8185  case X86::LD_Fp64m:
8186  case X86::LD_Fp80m:
8187  case X86::MMX_MOVD64rm:
8188  case X86::MMX_MOVQ64rm:
8189  return false;
8190  }
8191 
8192  EVT VT = Load1->getValueType(0);
8193  switch (VT.getSimpleVT().SimpleTy) {
8194  default:
8195  // XMM registers. In 64-bit mode we can be a bit more aggressive since we
8196  // have 16 of them to play with.
8197  if (Subtarget.is64Bit()) {
8198  if (NumLoads >= 3)
8199  return false;
8200  } else if (NumLoads) {
8201  return false;
8202  }
8203  break;
8204  case MVT::i8:
8205  case MVT::i16:
8206  case MVT::i32:
8207  case MVT::i64:
8208  case MVT::f32:
8209  case MVT::f64:
8210  if (NumLoads)
8211  return false;
8212  break;
8213  }
8214 
8215  return true;
8216 }
8217 
8219  const MachineInstr &Second) const {
8220  // Check if this processor supports macro-fusion. Since this is a minor
8221  // heuristic, we haven't specifically reserved a feature. hasAVX is a decent
8222  // proxy for SandyBridge+.
8223  if (!Subtarget.hasAVX())
8224  return false;
8225 
8226  enum {
8227  FuseTest,
8228  FuseCmp,
8229  FuseInc
8230  } FuseKind;
8231 
8232  switch (Second.getOpcode()) {
8233  default:
8234  return false;
8235  case X86::JE_1:
8236  case X86::JNE_1:
8237  case X86::JL_1:
8238  case X86::JLE_1:
8239  case X86::JG_1:
8240  case X86::JGE_1:
8241  FuseKind = FuseInc;
8242  break;
8243  case X86::JB_1:
8244  case X86::JBE_1:
8245  case X86::JA_1:
8246  case X86::JAE_1:
8247  FuseKind = FuseCmp;
8248  break;
8249  case X86::JS_1:
8250  case X86::JNS_1:
8251  case X86::JP_1:
8252  case X86::JNP_1:
8253  case X86::JO_1:
8254  case X86::JNO_1:
8255  FuseKind = FuseTest;
8256  break;
8257  }
8258  switch (First.getOpcode()) {
8259  default:
8260  return false;
8261  case X86::TEST8rr:
8262  case X86::TEST16rr:
8263  case X86::TEST32rr:
8264  case X86::TEST64rr:
8265  case X86::TEST8ri:
8266  case X86::TEST16ri:
8267  case X86::TEST32ri:
8268  case X86::TEST32i32:
8269  case X86::TEST64i32:
8270  case X86::TEST64ri32:
8271  case X86::TEST8rm:
8272  case X86::TEST16rm:
8273  case X86::TEST32rm:
8274  case X86::TEST64rm:
8275  case X86::TEST8ri_NOREX:
8276  case X86::AND16i16:
8277  case X86::AND16ri:
8278  case X86::AND16ri8:
8279  case X86::AND16rm:
8280  case X86::AND16rr:
8281  case X86::AND32i32:
8282  case X86::AND32ri:
8283  case X86::AND32ri8:
8284  case X86::AND32rm:
8285  case X86::AND32rr:
8286  case X86::AND64i32:
8287  case X86::AND64ri32:
8288  case X86::AND64ri8:
8289  case X86::AND64rm:
8290  case X86::AND64rr:
8291  case X86::AND8i8:
8292  case X86::AND8ri:
8293  case X86::AND8rm:
8294  case X86::AND8rr:
8295  return true;
8296  case X86::CMP16i16:
8297  case X86::CMP16ri:
8298  case X86::CMP16ri8:
8299  case X86::CMP16rm:
8300  case X86::CMP16rr:
8301  case X86::CMP32i32:
8302  case X86::CMP32ri:
8303  case X86::CMP32ri8:
8304  case X86::CMP32rm:
8305  case X86::CMP32rr:
8306  case X86::CMP64i32:
8307  case X86::CMP64ri32:
8308  case X86::CMP64ri8:
8309  case X86::CMP64rm:
8310  case X86::CMP64rr:
8311  case X86::CMP8i8:
8312  case X86::CMP8ri:
8313  case X86::CMP8rm:
8314  case X86::CMP8rr:
8315  case X86::ADD16i16:
8316  case X86::ADD16ri:
8317  case X86::ADD16ri8:
8318  case X86::ADD16ri8_DB:
8319  case X86::ADD16ri_DB:
8320  case X86::ADD16rm:
8321  case X86::ADD16rr:
8322  case X86::ADD16rr_DB:
8323  case X86::ADD32i32:
8324  case X86::ADD32ri:
8325  case X86::ADD32ri8:
8326  case X86::ADD32ri8_DB:
8327  case X86::ADD32ri_DB:
8328  case X86::ADD32rm:
8329  case X86::ADD32rr:
8330  case X86::ADD32rr_DB:
8331  case X86::ADD64i32:
8332  case X86::ADD64ri32:
8333  case X86::ADD64ri32_DB:
8334  case X86::ADD64ri8:
8335  case X86::ADD64ri8_DB:
8336  case X86::ADD64rm:
8337  case X86::ADD64rr:
8338  case X86::ADD64rr_DB:
8339  case X86::ADD8i8:
8340  case X86::ADD8mi:
8341  case X86::ADD8mr:
8342  case X86::ADD8ri:
8343  case X86::ADD8rm:
8344  case X86::ADD8rr:
8345  case X86::SUB16i16:
8346  case X86::SUB16ri:
8347  case X86::SUB16ri8:
8348  case X86::SUB16rm:
8349  case X86::SUB16rr:
8350  case X86::SUB32i32:
8351  case X86::SUB32ri:
8352  case X86::SUB32ri8:
8353  case X86::SUB32rm:
8354  case X86::SUB32rr:
8355  case X86::SUB64i32:
8356  case X86::SUB64ri32:
8357  case X86::SUB64ri8:
8358  case X86::SUB64rm:
8359  case X86::SUB64rr:
8360  case X86::SUB8i8:
8361  case X86::SUB8ri:
8362  case X86::SUB8rm:
8363  case X86::SUB8rr:
8364  return FuseKind == FuseCmp || FuseKind == FuseInc;
8365  case X86::INC16r:
8366  case X86::INC32r:
8367  case X86::INC64r:
8368  case X86::INC8r:
8369  case X86::DEC16r:
8370  case X86::DEC32r:
8371  case X86::DEC64r:
8372  case X86::DEC8r:
8373  return FuseKind == FuseInc;
8374  }
8375 }
8376 
8377 bool X86InstrInfo::
8379  assert(Cond.size() == 1 && "Invalid X86 branch condition!");
8380  X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
8381  Cond[0].setImm(GetOppositeBranchCondition(CC));
8382  return false;
8383 }
8384 
8385 bool X86InstrInfo::
8387  // FIXME: Return false for x87 stack register classes for now. We can't
8388  // allow any loads of these registers before FpGet_ST0_80.
8389  return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass ||
8390  RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
8391 }
8392 
8393 /// Return a virtual register initialized with the
8394 /// the global base register value. Output instructions required to
8395 /// initialize the register in the function entry block, if necessary.
8396 ///
8397 /// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
8398 ///
8400  assert(!Subtarget.is64Bit() &&
8401  "X86-64 PIC uses RIP relative addressing");
8402 
8404  unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
8405  if (GlobalBaseReg != 0)
8406  return GlobalBaseReg;
8407 
8408  // Create the register. The code to initialize it is inserted
8409  // later, by the CGBR pass (below).
8410  MachineRegisterInfo &RegInfo = MF->getRegInfo();
8411  GlobalBaseReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
8412  X86FI->setGlobalBaseReg(GlobalBaseReg);
8413  return GlobalBaseReg;
8414 }
8415 
8416 // These are the replaceable SSE instructions. Some of these have Int variants
8417 // that we don't include here. We don't want to replace instructions selected
8418 // by intrinsics.
8419 static const uint16_t ReplaceableInstrs[][3] = {
8420  //PackedSingle PackedDouble PackedInt
8421  { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
8422  { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
8423  { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
8424  { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
8425  { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
8426  { X86::MOVLPSmr, X86::MOVLPDmr, X86::MOVPQI2QImr },
8427  { X86::MOVSSmr, X86::MOVSSmr, X86::MOVPDI2DImr },
8428  { X86::MOVSDrm, X86::MOVSDrm, X86::MOVQI2PQIrm },
8429  { X86::MOVSSrm, X86::MOVSSrm, X86::MOVDI2PDIrm },
8430  { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
8431  { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
8432  { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
8433  { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm },
8434  { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
8435  { X86::ORPSrm, X86::ORPDrm, X86::PORrm },
8436  { X86::ORPSrr, X86::ORPDrr, X86::PORrr },
8437  { X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
8438  { X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
8439  // AVX 128-bit support
8440  { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
8441  { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
8442  { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr },
8443  { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
8444  { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
8445  { X86::VMOVLPSmr, X86::VMOVLPDmr, X86::VMOVPQI2QImr },
8446  { X86::VMOVSSmr, X86::VMOVSSmr, X86::VMOVPDI2DImr },
8447  { X86::VMOVSDrm, X86::VMOVSDrm, X86::VMOVQI2PQIrm },
8448  { X86::VMOVSSrm, X86::VMOVSSrm, X86::VMOVDI2PDIrm },
8449  { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
8450  { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
8451  { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
8452  { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
8453  { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
8454  { X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
8455  { X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
8456  { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
8457  { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
8458  // AVX 256-bit support
8459  { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
8460  { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
8461  { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr },
8462  { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr },
8463  { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm },
8464  { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr },
8465  // AVX512 support
8466  { X86::VMOVLPSZ128mr, X86::VMOVLPDZ128mr, X86::VMOVPQI2QIZmr },
8467  { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
8468  { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
8469  { X86::VMOVNTPSZmr, X86::VMOVNTPDZmr, X86::VMOVNTDQZmr },
8470  { X86::VMOVSDZmr, X86::VMOVSDZmr, X86::VMOVPQI2QIZmr },
8471  { X86::VMOVSSZmr, X86::VMOVSSZmr, X86::VMOVPDI2DIZmr },
8472  { X86::VMOVSDZrm, X86::VMOVSDZrm, X86::VMOVQI2PQIZrm },
8473  { X86::VMOVSSZrm, X86::VMOVSSZrm, X86::VMOVDI2PDIZrm },
8474  { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128r, X86::VPBROADCASTDZ128r },
8475  { X86::VBROADCASTSSZ128m, X86::VBROADCASTSSZ128m, X86::VPBROADCASTDZ128m },
8476  { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256r, X86::VPBROADCASTDZ256r },
8477  { X86::VBROADCASTSSZ256m, X86::VBROADCASTSSZ256m, X86::VPBROADCASTDZ256m },
8478  { X86::VBROADCASTSSZr, X86::VBROADCASTSSZr, X86::VPBROADCASTDZr },
8479  { X86::VBROADCASTSSZm, X86::VBROADCASTSSZm, X86::VPBROADCASTDZm },
8480  { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256r, X86::VPBROADCASTQZ256r },
8481  { X86::VBROADCASTSDZ256m, X86::VBROADCASTSDZ256m, X86::VPBROADCASTQZ256m },
8482  { X86::VBROADCASTSDZr, X86::VBROADCASTSDZr, X86::VPBROADCASTQZr },
8483  { X86::VBROADCASTSDZm, X86::VBROADCASTSDZm, X86::VPBROADCASTQZm },
8484 };
8485 
8486 static const uint16_t ReplaceableInstrsAVX2[][3] = {
8487  //PackedSingle PackedDouble PackedInt
8488  { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm },
8489  { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr },
8490  { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDYrm },
8491  { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDYrr },
8492  { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm },
8493  { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr },
8494  { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm },
8495  { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr },
8496  { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
8497  { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
8498  { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
8499  { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
8500  { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm },
8501  { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr },
8502  { X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm},
8503  { X86::VBROADCASTSSrr, X86::VBROADCASTSSrr, X86::VPBROADCASTDrr},
8504  { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrr, X86::VPBROADCASTDYrr},
8505  { X86::VBROADCASTSSYrm, X86::VBROADCASTSSYrm, X86::VPBROADCASTDYrm},
8506  { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr},
8507  { X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm},
8508  { X86::VBROADCASTF128, X86::VBROADCASTF128, X86::VBROADCASTI128 },
8509 };
8510 
8511 static const uint16_t ReplaceableInstrsAVX512[][4] = {
8512  // Two integer columns for 64-bit and 32-bit elements.
8513  //PackedSingle PackedDouble PackedInt PackedInt
8514  { X86::VMOVAPSZ128mr, X86::VMOVAPDZ128mr, X86::VMOVDQA64Z128mr, X86::VMOVDQA32Z128mr },
8515  { X86::VMOVAPSZ128rm, X86::VMOVAPDZ128rm, X86::VMOVDQA64Z128rm, X86::VMOVDQA32Z128rm },
8516  { X86::VMOVAPSZ128rr, X86::VMOVAPDZ128rr, X86::VMOVDQA64Z128rr, X86::VMOVDQA32Z128rr },
8517  { X86::VMOVUPSZ128mr, X86::VMOVUPDZ128mr, X86::VMOVDQU64Z128mr, X86::VMOVDQU32Z128mr },
8518  { X86::VMOVUPSZ128rm, X86::VMOVUPDZ128rm, X86::VMOVDQU64Z128rm, X86::VMOVDQU32Z128rm },
8519  { X86::VMOVAPSZ256mr, X86::VMOVAPDZ256mr, X86::VMOVDQA64Z256mr, X86::VMOVDQA32Z256mr },
8520  { X86::VMOVAPSZ256rm, X86::VMOVAPDZ256rm, X86::VMOVDQA64Z256rm, X86::VMOVDQA32Z256rm },
8521  { X86::VMOVAPSZ256rr, X86::VMOVAPDZ256rr, X86::VMOVDQA64Z256rr, X86::VMOVDQA32Z256rr },
8522  { X86::VMOVUPSZ256mr, X86::VMOVUPDZ256mr, X86::VMOVDQU64Z256mr, X86::VMOVDQU32Z256mr },
8523  { X86::VMOVUPSZ256rm, X86::VMOVUPDZ256rm, X86::VMOVDQU64Z256rm, X86::VMOVDQU32Z256rm },
8524  { X86::VMOVAPSZmr, X86::VMOVAPDZmr, X86::VMOVDQA64Zmr, X86::VMOVDQA32Zmr },
8525  { X86::VMOVAPSZrm, X86::VMOVAPDZrm, X86::VMOVDQA64Zrm, X86::VMOVDQA32Zrm },
8526  { X86::VMOVAPSZrr, X86::VMOVAPDZrr, X86::VMOVDQA64Zrr, X86::VMOVDQA32Zrr },
8527  { X86::VMOVUPSZmr, X86::VMOVUPDZmr, X86::VMOVDQU64Zmr, X86::VMOVDQU32Zmr },
8528  { X86::VMOVUPSZrm, X86::VMOVUPDZrm, X86::VMOVDQU64Zrm, X86::VMOVDQU32Zrm },
8529 };
8530 
8531 static const uint16_t ReplaceableInstrsAVX512DQ[][4] = {
8532  // Two integer columns for 64-bit and 32-bit elements.
8533  //PackedSingle PackedDouble PackedInt PackedInt
8534  { X86::VANDNPSZ128rm, X86::VANDNPDZ128rm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm },
8535  { X86::VANDNPSZ128rr, X86::VANDNPDZ128rr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr },
8536  { X86::VANDPSZ128rm, X86::VANDPDZ128rm, X86::VPANDQZ128rm, X86::VPANDDZ128rm },
8537  { X86::VANDPSZ128rr, X86::VANDPDZ128rr, X86::VPANDQZ128rr, X86::VPANDDZ128rr },
8538  { X86::VORPSZ128rm, X86::VORPDZ128rm, X86::VPORQZ128rm, X86::VPORDZ128rm },
8539  { X86::VORPSZ128rr, X86::VORPDZ128rr, X86::VPORQZ128rr, X86::VPORDZ128rr },
8540  { X86::VXORPSZ128rm, X86::VXORPDZ128rm, X86::VPXORQZ128rm, X86::VPXORDZ128rm },
8541  { X86::VXORPSZ128rr, X86::VXORPDZ128rr, X86::VPXORQZ128rr, X86::VPXORDZ128rr },
8542  { X86::VANDNPSZ256rm, X86::VANDNPDZ256rm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm },
8543  { X86::VANDNPSZ256rr, X86::VANDNPDZ256rr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr },
8544  { X86::VANDPSZ256rm, X86::VANDPDZ256rm, X86::VPANDQZ256rm, X86::VPANDDZ256rm },
8545  { X86::VANDPSZ256rr, X86::VANDPDZ256rr, X86::VPANDQZ256rr, X86::VPANDDZ256rr },
8546  { X86::VORPSZ256rm, X86::VORPDZ256rm, X86::VPORQZ256rm, X86::VPORDZ256rm },
8547  { X86::VORPSZ256rr, X86::VORPDZ256rr, X86::VPORQZ256rr, X86::VPORDZ256rr },
8548  { X86::VXORPSZ256rm, X86::VXORPDZ256rm, X86::VPXORQZ256rm, X86::VPXORDZ256rm },
8549  { X86::VXORPSZ256rr, X86::VXORPDZ256rr, X86::VPXORQZ256rr, X86::VPXORDZ256rr },
8550  { X86::VANDNPSZrm, X86::VANDNPDZrm, X86::VPANDNQZrm, X86::VPANDNDZrm },
8551  { X86::VANDNPSZrr, X86::VANDNPDZrr, X86::VPANDNQZrr, X86::VPANDNDZrr },
8552  { X86::VANDPSZrm, X86::VANDPDZrm, X86::VPANDQZrm, X86::VPANDDZrm },
8553  { X86::VANDPSZrr, X86::VANDPDZrr, X86::VPANDQZrr, X86::VPANDDZrr },
8554  { X86::VORPSZrm, X86::VORPDZrm, X86::VPORQZrm, X86::VPORDZrm },
8555  { X86::VORPSZrr, X86::VORPDZrr, X86::VPORQZrr, X86::VPORDZrr },
8556  { X86::VXORPSZrm, X86::VXORPDZrm, X86::VPXORQZrm, X86::VPXORDZrm },
8557  { X86::VXORPSZrr, X86::VXORPDZrr, X86::VPXORQZrr, X86::VPXORDZrr },
8558 };
8559 
8560 static const uint16_t ReplaceableInstrsAVX512DQMasked[][4] = {
8561  // Two integer columns for 64-bit and 32-bit elements.
8562  //PackedSingle PackedDouble
8563  //PackedInt PackedInt
8564  { X86::VANDNPSZ128rmk, X86::VANDNPDZ128rmk,
8565  X86::VPANDNQZ128rmk, X86::VPANDNDZ128rmk },
8566  { X86::VANDNPSZ128rmkz, X86::VANDNPDZ128rmkz,
8567  X86::VPANDNQZ128rmkz, X86::VPANDNDZ128rmkz },
8568  { X86::VANDNPSZ128rrk, X86::VANDNPDZ128rrk,
8569  X86::VPANDNQZ128rrk, X86::VPANDNDZ128rrk },
8570  { X86::VANDNPSZ128rrkz, X86::VANDNPDZ128rrkz,
8571  X86::VPANDNQZ128rrkz, X86::VPANDNDZ128rrkz },
8572  { X86::VANDPSZ128rmk, X86::VANDPDZ128rmk,
8573  X86::VPANDQZ128rmk, X86::VPANDDZ128rmk },
8574  { X86::VANDPSZ128rmkz, X86::VANDPDZ128rmkz,
8575  X86::VPANDQZ128rmkz, X86::VPANDDZ128rmkz },
8576  { X86::VANDPSZ128rrk, X86::VANDPDZ128rrk,
8577  X86::VPANDQZ128rrk, X86::VPANDDZ128rrk },
8578  { X86::VANDPSZ128rrkz, X86::VANDPDZ128rrkz,
8579  X86::VPANDQZ128rrkz, X86::VPANDDZ128rrkz },
8580  { X86::VORPSZ128rmk, X86::VORPDZ128rmk,
8581  X86::VPORQZ128rmk, X86::VPORDZ128rmk },
8582  { X86::VORPSZ128rmkz, X86::VORPDZ128rmkz,
8583  X86::VPORQZ128rmkz, X86::VPORDZ128rmkz },
8584  { X86::VORPSZ128rrk, X86::VORPDZ128rrk,
8585  X86::VPORQZ128rrk, X86::VPORDZ128rrk },
8586  { X86::VORPSZ128rrkz, X86::VORPDZ128rrkz,
8587  X86::VPORQZ128rrkz, X86::VPORDZ128rrkz },
8588  { X86::VXORPSZ128rmk, X86::VXORPDZ128rmk,
8589  X86::VPXORQZ128rmk, X86::VPXORDZ128rmk },
8590  { X86::VXORPSZ128rmkz, X86::VXORPDZ128rmkz,
8591  X86::VPXORQZ128rmkz, X86::VPXORDZ128rmkz },
8592  { X86::VXORPSZ128rrk, X86::VXORPDZ128rrk,
8593  X86::VPXORQZ128rrk, X86::VPXORDZ128rrk },
8594  { X86::VXORPSZ128rrkz, X86::VXORPDZ128rrkz,
8595  X86::VPXORQZ128rrkz, X86::VPXORDZ128rrkz },
8596  { X86::VANDNPSZ256rmk, X86::VANDNPDZ256rmk,
8597  X86::VPANDNQZ256rmk, X86::VPANDNDZ256rmk },
8598  { X86::VANDNPSZ256rmkz, X86::VANDNPDZ256rmkz,
8599  X86::VPANDNQZ256rmkz, X86::VPANDNDZ256rmkz },
8600  { X86::VANDNPSZ256rrk, X86::VANDNPDZ256rrk,
8601  X86::VPANDNQZ256rrk, X86::VPANDNDZ256rrk },
8602  { X86::VANDNPSZ256rrkz, X86::VANDNPDZ256rrkz,
8603  X86::VPANDNQZ256rrkz, X86::VPANDNDZ256rrkz },
8604  { X86::VANDPSZ256rmk, X86::VANDPDZ256rmk,
8605  X86::VPANDQZ256rmk, X86::VPANDDZ256rmk },
8606  { X86::VANDPSZ256rmkz, X86::VANDPDZ256rmkz,
8607  X86::VPANDQZ256rmkz, X86::VPANDDZ256rmkz },
8608  { X86::VANDPSZ256rrk, X86::VANDPDZ256rrk,
8609  X86::VPANDQZ256rrk, X86::VPANDDZ256rrk },
8610  { X86::VANDPSZ256rrkz, X86::VANDPDZ256rrkz,
8611  X86::VPANDQZ256rrkz, X86::VPANDDZ256rrkz },
8612  { X86::VORPSZ256rmk, X86::VORPDZ256rmk,
8613  X86::VPORQZ256rmk, X86::VPORDZ256rmk },
8614  { X86::VORPSZ256rmkz, X86::VORPDZ256rmkz,
8615  X86::VPORQZ256rmkz, X86::VPORDZ256rmkz },
8616  { X86::VORPSZ256rrk, X86::VORPDZ256rrk,
8617  X86::VPORQZ256rrk, X86::VPORDZ256rrk },
8618  { X86::VORPSZ256rrkz, X86::VORPDZ256rrkz,
8619  X86::VPORQZ256rrkz, X86::VPORDZ256rrkz },
8620  { X86::VXORPSZ256rmk, X86::VXORPDZ256rmk,
8621  X86::VPXORQZ256rmk, X86::VPXORDZ256rmk },
8622  { X86::VXORPSZ256rmkz, X86::VXORPDZ256rmkz,
8623  X86::VPXORQZ256rmkz, X86::VPXORDZ256rmkz },
8624  { X86::VXORPSZ256rrk, X86::VXORPDZ256rrk,
8625  X86::VPXORQZ256rrk, X86::VPXORDZ256rrk },
8626  { X86::VXORPSZ256rrkz, X86::VXORPDZ256rrkz,
8627  X86::VPXORQZ256rrkz, X86::VPXORDZ256rrkz },
8628  { X86::VANDNPSZrmk, X86::VANDNPDZrmk,
8629  X86::VPANDNQZrmk, X86::VPANDNDZrmk },
8630  { X86::VANDNPSZrmkz, X86::VANDNPDZrmkz,
8631  X86::VPANDNQZrmkz, X86::VPANDNDZrmkz },
8632  { X86::VANDNPSZrrk, X86::VANDNPDZrrk,
8633  X86::VPANDNQZrrk, X86::VPANDNDZrrk },
8634  { X86::VANDNPSZrrkz, X86::VANDNPDZrrkz,
8635  X86::VPANDNQZrrkz, X86::VPANDNDZrrkz },
8636  { X86::VANDPSZrmk, X86::VANDPDZrmk,
8637  X86::VPANDQZrmk, X86::VPANDDZrmk },
8638  { X86::VANDPSZrmkz, X86::VANDPDZrmkz,
8639  X86::VPANDQZrmkz, X86::VPANDDZrmkz },
8640  { X86::VANDPSZrrk, X86::VANDPDZrrk,
8641  X86::VPANDQZrrk, X86::VPANDDZrrk },
8642  { X86::VANDPSZrrkz, X86::VANDPDZrrkz,
8643  X86::VPANDQZrrkz, X86::VPANDDZrrkz },
8644  { X86::VORPSZrmk, X86::VORPDZrmk,
8645  X86::VPORQZrmk, X86::VPORDZrmk },
8646  { X86::VORPSZrmkz, X86::VORPDZrmkz,
8647  X86::VPORQZrmkz, X86::VPORDZrmkz },
8648  { X86::VORPSZrrk, X86::VORPDZrrk,
8649  X86::VPORQZrrk, X86::VPORDZrrk },
8650  { X86::VORPSZrrkz, X86::VORPDZrrkz,
8651  X86::VPORQZrrkz, X86::VPORDZrrkz },
8652  { X86::VXORPSZrmk, X86::VXORPDZrmk,
8653  X86::VPXORQZrmk, X86::VPXORDZrmk },
8654  { X86::VXORPSZrmkz, X86::VXORPDZrmkz,
8655  X86::VPXORQZrmkz, X86::VPXORDZrmkz },
8656  { X86::VXORPSZrrk, X86::VXORPDZrrk,
8657  X86::VPXORQZrrk, X86::VPXORDZrrk },
8658  { X86::VXORPSZrrkz, X86::VXORPDZrrkz,
8659  X86::VPXORQZrrkz, X86::VPXORDZrrkz },
8660  // Broadcast loads can be handled the same as masked operations to avoid
8661  // changing element size.
8662  { X86::VANDNPSZ128rmb, X86::VANDNPDZ128rmb,
8663  X86::VPANDNQZ128rmb, X86::VPANDNDZ128rmb },
8664  { X86::VANDPSZ128rmb, X86::VANDPDZ128rmb,
8665  X86::VPANDQZ128rmb, X86::VPANDDZ128rmb },
8666  { X86::VORPSZ128rmb, X86::VORPDZ128rmb,
8667  X86::VPORQZ128rmb, X86::VPORDZ128rmb },
8668  { X86::VXORPSZ128rmb, X86::VXORPDZ128rmb,
8669  X86::VPXORQZ128rmb, X86::VPXORDZ128rmb },
8670  { X86::VANDNPSZ256rmb, X86::VANDNPDZ256rmb,
8671  X86::VPANDNQZ256rmb, X86::VPANDNDZ256rmb },
8672  { X86::VANDPSZ256rmb, X86::VANDPDZ256rmb,
8673  X86::VPANDQZ256rmb, X86::VPANDDZ256rmb },
8674  { X86::VORPSZ256rmb, X86::VORPDZ256rmb,
8675  X86::VPORQZ256rmb, X86::VPORDZ256rmb },
8676  { X86::VXORPSZ256rmb, X86::VXORPDZ256rmb,
8677  X86::VPXORQZ256rmb, X86::VPXORDZ256rmb },
8678  { X86::VANDNPSZrmb, X86::VANDNPDZrmb,
8679  X86::VPANDNQZrmb, X86::VPANDNDZrmb },
8680  { X86::VANDPSZrmb, X86::VANDPDZrmb,
8681  X86::VPANDQZrmb, X86::VPANDDZrmb },
8682  { X86::VANDPSZrmb, X86::VANDPDZrmb,
8683  X86::VPANDQZrmb, X86::VPANDDZrmb },
8684  { X86::VORPSZrmb, X86::VORPDZrmb,
8685  X86::VPORQZrmb, X86::VPORDZrmb },
8686  { X86::VXORPSZrmb, X86::VXORPDZrmb,
8687  X86::VPXORQZrmb, X86::VPXORDZrmb },
8688  { X86::VANDNPSZ128rmbk, X86::VANDNPDZ128rmbk,
8689  X86::VPANDNQZ128rmbk, X86::VPANDNDZ128rmbk },
8690  { X86::VANDPSZ128rmbk, X86::VANDPDZ128rmbk,
8691  X86::VPANDQZ128rmbk, X86::VPANDDZ128rmbk },
8692  { X86::VORPSZ128rmbk, X86::VORPDZ128rmbk,
8693  X86::VPORQZ128rmbk, X86::VPORDZ128rmbk },
8694  { X86::VXORPSZ128rmbk, X86::VXORPDZ128rmbk,
8695  X86::VPXORQZ128rmbk, X86::VPXORDZ128rmbk },
8696  { X86::VANDNPSZ256rmbk, X86::VANDNPDZ256rmbk,
8697  X86::VPANDNQZ256rmbk, X86::VPANDNDZ256rmbk },
8698  { X86::VANDPSZ256rmbk, X86::VANDPDZ256rmbk,
8699  X86::VPANDQZ256rmbk, X86::VPANDDZ256rmbk },
8700  { X86::VORPSZ256rmbk, X86::VORPDZ256rmbk,
8701  X86::VPORQZ256rmbk, X86::VPORDZ256rmbk },
8702  { X86::VXORPSZ256rmbk, X86::VXORPDZ256rmbk,
8703  X86::VPXORQZ256rmbk, X86::VPXORDZ256rmbk },
8704  { X86::VANDNPSZrmbk, X86::VANDNPDZrmbk,
8705  X86::VPANDNQZrmbk, X86::VPANDNDZrmbk },
8706  { X86::VANDPSZrmbk, X86::VANDPDZrmbk,
8707  X86::VPANDQZrmbk, X86::VPANDDZrmbk },
8708  { X86::VANDPSZrmbk, X86::VANDPDZrmbk,
8709  X86::VPANDQZrmbk, X86::VPANDDZrmbk },
8710  { X86::VORPSZrmbk, X86::VORPDZrmbk,
8711  X86::VPORQZrmbk, X86::VPORDZrmbk },
8712  { X86::VXORPSZrmbk, X86::VXORPDZrmbk,
8713  X86::VPXORQZrmbk, X86::VPXORDZrmbk },
8714  { X86::VANDNPSZ128rmbkz,X86::VANDNPDZ128rmbkz,
8715  X86::VPANDNQZ128rmbkz,X86::VPANDNDZ128rmbkz},
8716  { X86::VANDPSZ128rmbkz, X86::VANDPDZ128rmbkz,
8717  X86::VPANDQZ128rmbkz, X86::VPANDDZ128rmbkz },
8718  { X86::VORPSZ128rmbkz, X86::VORPDZ128rmbkz,
8719  X86::VPORQZ128rmbkz, X86::VPORDZ128rmbkz },
8720  { X86::VXORPSZ128rmbkz, X86::VXORPDZ128rmbkz,
8721  X86::VPXORQZ128rmbkz, X86::VPXORDZ128rmbkz },
8722  { X86::VANDNPSZ256rmbkz,X86::VANDNPDZ256rmbkz,
8723  X86::VPANDNQZ256rmbkz,X86::VPANDNDZ256rmbkz},
8724  { X86::VANDPSZ256rmbkz, X86::VANDPDZ256rmbkz,
8725  X86::VPANDQZ256rmbkz, X86::VPANDDZ256rmbkz },
8726  { X86::VORPSZ256rmbkz, X86::VORPDZ256rmbkz,
8727  X86::VPORQZ256rmbkz, X86::VPORDZ256rmbkz },
8728  { X86::VXORPSZ256rmbkz, X86::VXORPDZ256rmbkz,
8729  X86::VPXORQZ256rmbkz, X86::VPXORDZ256rmbkz },
8730  { X86::VANDNPSZrmbkz, X86::VANDNPDZrmbkz,
8731  X86::VPANDNQZrmbkz, X86::VPANDNDZrmbkz },
8732  { X86::VANDPSZrmbkz, X86::VANDPDZrmbkz,
8733  X86::VPANDQZrmbkz, X86::VPANDDZrmbkz },
8734  { X86::VANDPSZrmbkz, X86::VANDPDZrmbkz,
8735  X86::VPANDQZrmbkz, X86::VPANDDZrmbkz },
8736  { X86::VORPSZrmbkz, X86::VORPDZrmbkz,
8737  X86::VPORQZrmbkz, X86::VPORDZrmbkz },
8738  { X86::VXORPSZrmbkz, X86::VXORPDZrmbkz,
8739  X86::VPXORQZrmbkz, X86::VPXORDZrmbkz },
8740 };
8741 
8742 // FIXME: Some shuffle and unpack instructions have equivalents in different
8743 // domains, but they require a bit more work than just switching opcodes.
8744 
8745 static const uint16_t *lookup(unsigned opcode, unsigned domain,
8746  ArrayRef<uint16_t[3]> Table) {
8747  for (const uint16_t (&Row)[3] : Table)
8748  if (Row[domain-1] == opcode)
8749  return Row;
8750  return nullptr;
8751 }
8752 
8753 static const uint16_t *lookupAVX512(unsigned opcode, unsigned domain,
8754  ArrayRef<uint16_t[4]> Table) {
8755  // If this is the integer domain make sure to check both integer columns.
8756  for (const uint16_t (&Row)[4] : Table)
8757  if (Row[domain-1] == opcode || (domain == 3 && Row[3] == opcode))
8758  return Row;
8759  return nullptr;
8760 }
8761 
8762 std::pair<uint16_t, uint16_t>
8764  uint16_t domain = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
8765  unsigned opcode = MI.getOpcode();
8766  uint16_t validDomains = 0;
8767  if (domain) {
8768  if (lookup(MI.getOpcode(), domain, ReplaceableInstrs)) {
8769  validDomains = 0xe;
8770  } else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) {
8771  validDomains = Subtarget.hasAVX2() ? 0xe : 0x6;
8772  } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) {
8773  validDomains = 0xe;
8774  } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512DQ)) {
8775  validDomains = Subtarget.hasDQI() ? 0xe : 0x8;
8776  } else if (const uint16_t *table = lookupAVX512(opcode, domain,
8777  ReplaceableInstrsAVX512DQMasked)) {
8778  if (domain == 1 || (domain == 3 && table[3] == opcode))
8779  validDomains = Subtarget.hasDQI() ? 0xa : 0x8;
8780  else
8781  validDomains = Subtarget.hasDQI() ? 0xc : 0x8;
8782  }
8783  }
8784  return std::make_pair(domain, validDomains);
8785 }
8786 
8787 void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
8788  assert(Domain>0 && Domain<4 && "Invalid execution domain");
8789  uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
8790  assert(dom && "Not an SSE instruction");
8791  const uint16_t *table = lookup(MI.getOpcode(), dom, ReplaceableInstrs);
8792  if (!table) { // try the other table
8793  assert((Subtarget.hasAVX2() || Domain < 3) &&
8794  "256-bit vector operations only available in AVX2");
8795  table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2);
8796  }
8797  if (!table) { // try the AVX512 table
8798  assert(Subtarget.hasAVX512() && "Requires AVX-512");
8799  table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512);
8800  // Don't change integer Q instructions to D instructions.
8801  if (table && Domain == 3 && table[3] == MI.getOpcode())
8802  Domain = 4;
8803  }
8804  if (!table) { // try the AVX512DQ table
8805  assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ");
8806  table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512DQ);
8807  // Don't change integer Q instructions to D instructions and
8808  // use D intructions if we started with a PS instruction.
8809  if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
8810  Domain = 4;
8811  }
8812  if (!table) { // try the AVX512DQMasked table
8813  assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ");
8815  if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
8816  Domain = 4;
8817  }
8818  assert(table && "Cannot change domain");
8819  MI.setDesc(get(table[Domain - 1]));
8820 }
8821 
8822 /// Return the noop instruction to use for a noop.
8824  NopInst.setOpcode(X86::NOOP);
8825 }
8826 
8827 bool X86InstrInfo::isHighLatencyDef(int opc) const {
8828  switch (opc) {
8829  default: return false;
8830  case X86::DIVPDrm:
8831  case X86::DIVPDrr:
8832  case X86::DIVPSrm:
8833  case X86::DIVPSrr:
8834  case X86::DIVSDrm:
8835  case X86::DIVSDrm_Int:
8836  case X86::DIVSDrr:
8837  case X86::DIVSDrr_Int:
8838  case X86::DIVSSrm:
8839  case X86::DIVSSrm_Int:
8840  case X86::DIVSSrr:
8841  case X86::DIVSSrr_Int:
8842  case X86::SQRTPDm:
8843  case X86::SQRTPDr:
8844  case X86::SQRTPSm:
8845  case X86::SQRTPSr:
8846  case X86::SQRTSDm:
8847  case X86::SQRTSDm_Int:
8848  case X86::SQRTSDr:
8849  case X86::SQRTSDr_Int:
8850  case X86::SQRTSSm:
8851  case X86::SQRTSSm_Int:
8852  case X86::SQRTSSr:
8853  case X86::SQRTSSr_Int:
8854  // AVX instructions with high latency
8855  case X86::VDIVPDrm:
8856  case X86::VDIVPDrr:
8857  case X86::VDIVPDYrm:
8858  case X86::VDIVPDYrr:
8859  case X86::VDIVPSrm:
8860  case X86::VDIVPSrr:
8861  case X86::VDIVPSYrm:
8862  case X86::VDIVPSYrr:
8863  case X86::VDIVSDrm:
8864  case X86::VDIVSDrm_Int:
8865  case X86::VDIVSDrr:
8866  case X86::VDIVSDrr_Int:
8867  case X86::VDIVSSrm:
8868  case X86::VDIVSSrm_Int:
8869  case X86::VDIVSSrr:
8870  case X86::VDIVSSrr_Int:
8871  case X86::VSQRTPDm:
8872  case X86::VSQRTPDr:
8873  case X86::VSQRTPDYm:
8874  case X86::VSQRTPDYr:
8875  case X86::VSQRTPSm:
8876  case X86::VSQRTPSr:
8877  case X86::VSQRTPSYm:
8878  case X86::VSQRTPSYr:
8879  case X86::VSQRTSDm:
8880  case X86::VSQRTSDm_Int:
8881  case X86::VSQRTSDr:
8882  case X86::VSQRTSDr_Int:
8883  case X86::VSQRTSSm:
8884  case X86::VSQRTSSm_Int:
8885  case X86::VSQRTSSr:
8886  case X86::VSQRTSSr_Int:
8887  // AVX512 instructions with high latency
8888  case X86::VDIVPDZ128rm:
8889  case X86::VDIVPDZ128rmb:
8890  case X86::VDIVPDZ128rmbk:
8891  case X86::VDIVPDZ128rmbkz:
8892  case X86::VDIVPDZ128rmk:
8893  case X86::VDIVPDZ128rmkz:
8894  case X86::VDIVPDZ128rr:
8895  case X86::VDIVPDZ128rrk:
8896  case X86::VDIVPDZ128rrkz:
8897  case X86::VDIVPDZ256rm:
8898  case X86::VDIVPDZ256rmb:
8899  case X86::VDIVPDZ256rmbk:
8900  case X86::VDIVPDZ256rmbkz:
8901  case X86::VDIVPDZ256rmk:
8902  case X86::VDIVPDZ256rmkz:
8903  case X86::VDIVPDZ256rr:
8904  case X86::VDIVPDZ256rrk:
8905  case X86::VDIVPDZ256rrkz:
8906  case X86::VDIVPDZrb:
8907  case X86::VDIVPDZrbk:
8908  case X86::VDIVPDZrbkz:
8909  case X86::VDIVPDZrm:
8910  case X86::VDIVPDZrmb:
8911  case X86::VDIVPDZrmbk:
8912  case X86::VDIVPDZrmbkz:
8913  case X86::VDIVPDZrmk:
8914  case X86::VDIVPDZrmkz:
8915  case X86::VDIVPDZrr:
8916  case X86::VDIVPDZrrk:
8917  case X86::VDIVPDZrrkz:
8918  case X86::VDIVPSZ128rm:
8919  case X86::VDIVPSZ128rmb:
8920  case X86::VDIVPSZ128rmbk:
8921  case X86::VDIVPSZ128rmbkz:
8922  case X86::VDIVPSZ128rmk:
8923  case X86::VDIVPSZ128rmkz:
8924  case X86::VDIVPSZ128rr:
8925  case X86::VDIVPSZ128rrk:
8926  case X86::VDIVPSZ128rrkz:
8927  case X86::VDIVPSZ256rm:
8928  case X86::VDIVPSZ256rmb:
8929  case X86::VDIVPSZ256rmbk:
8930  case X86::VDIVPSZ256rmbkz:
8931  case X86::VDIVPSZ256rmk:
8932  case X86::VDIVPSZ256rmkz:
8933  case X86::VDIVPSZ256rr:
8934  case X86::VDIVPSZ256rrk:
8935  case X86::VDIVPSZ256rrkz:
8936  case X86::VDIVPSZrb:
8937  case X86::VDIVPSZrbk:
8938  case X86::VDIVPSZrbkz:
8939  case X86::VDIVPSZrm:
8940  case X86::VDIVPSZrmb:
8941  case X86::VDIVPSZrmbk:
8942  case X86::VDIVPSZrmbkz:
8943  case X86::VDIVPSZrmk:
8944  case X86::VDIVPSZrmkz:
8945  case X86::VDIVPSZrr:
8946  case X86::VDIVPSZrrk:
8947  case X86::VDIVPSZrrkz:
8948  case X86::VDIVSDZrm:
8949  case X86::VDIVSDZrr:
8950  case X86::VDIVSDZrm_Int:
8951  case X86::VDIVSDZrm_Intk:
8952  case X86::VDIVSDZrm_Intkz:
8953  case X86::VDIVSDZrr_Int:
8954  case X86::VDIVSDZrr_Intk:
8955  case X86::VDIVSDZrr_Intkz:
8956  case X86::VDIVSDZrrb:
8957  case X86::VDIVSDZrrbk:
8958  case X86::VDIVSDZrrbkz:
8959  case X86::VDIVSSZrm:
8960  case X86::VDIVSSZrr:
8961  case X86::VDIVSSZrm_Int:
8962  case X86::VDIVSSZrm_Intk:
8963  case X86::VDIVSSZrm_Intkz:
8964  case X86::VDIVSSZrr_Int:
8965  case X86::VDIVSSZrr_Intk:
8966  case X86::VDIVSSZrr_Intkz:
8967  case X86::VDIVSSZrrb:
8968  case X86::VDIVSSZrrbk:
8969  case X86::VDIVSSZrrbkz:
8970  case X86::VSQRTPDZ128m:
8971  case X86::VSQRTPDZ128mb:
8972  case X86::VSQRTPDZ128mbk:
8973  case X86::VSQRTPDZ128mbkz:
8974  case X86::VSQRTPDZ128mk:
8975  case X86::VSQRTPDZ128mkz:
8976  case X86::VSQRTPDZ128r:
8977  case X86::VSQRTPDZ128rk:
8978  case X86::VSQRTPDZ128rkz:
8979  case X86::VSQRTPDZ256m:
8980  case X86::VSQRTPDZ256mb:
8981  case X86::VSQRTPDZ256mbk:
8982  case X86::VSQRTPDZ256mbkz:
8983  case X86::VSQRTPDZ256mk:
8984  case X86::VSQRTPDZ256mkz:
8985  case X86::VSQRTPDZ256r:
8986  case X86::VSQRTPDZ256rk:
8987  case X86::VSQRTPDZ256rkz:
8988  case X86::VSQRTPDZm:
8989  case X86::VSQRTPDZmb:
8990  case X86::VSQRTPDZmbk:
8991  case X86::VSQRTPDZmbkz:
8992  case X86::VSQRTPDZmk:
8993  case X86::VSQRTPDZmkz:
8994  case X86::VSQRTPDZr:
8995  case X86::VSQRTPDZrb:
8996  case X86::VSQRTPDZrbk:
8997  case X86::VSQRTPDZrbkz:
8998  case X86::VSQRTPDZrk:
8999  case X86::VSQRTPDZrkz:
9000  case X86::VSQRTPSZ128m:
9001  case X86::VSQRTPSZ128mb:
9002  case X86::VSQRTPSZ128mbk:
9003  case X86::VSQRTPSZ128mbkz:
9004  case X86::VSQRTPSZ128mk:
9005  case X86::VSQRTPSZ128mkz:
9006  case X86::VSQRTPSZ128r:
9007  case X86::VSQRTPSZ128rk:
9008  case X86::VSQRTPSZ128rkz:
9009  case X86::VSQRTPSZ256m:
9010  case X86::VSQRTPSZ256mb:
9011  case X86::VSQRTPSZ256mbk:
9012  case X86::VSQRTPSZ256mbkz:
9013  case X86::VSQRTPSZ256mk:
9014  case X86::VSQRTPSZ256mkz:
9015  case X86::VSQRTPSZ256r:
9016  case X86::VSQRTPSZ256rk:
9017  case X86::VSQRTPSZ256rkz:
9018  case X86::VSQRTPSZm:
9019  case X86::VSQRTPSZmb:
9020  case X86::VSQRTPSZmbk:
9021  case X86::VSQRTPSZmbkz:
9022  case X86::VSQRTPSZmk:
9023  case X86::VSQRTPSZmkz:
9024  case X86::VSQRTPSZr:
9025  case X86::VSQRTPSZrb:
9026  case X86::VSQRTPSZrbk:
9027  case X86::VSQRTPSZrbkz:
9028  case X86::VSQRTPSZrk:
9029  case X86::VSQRTPSZrkz:
9030  case X86::VSQRTSDZm:
9031  case X86::VSQRTSDZm_Int:
9032  case X86::VSQRTSDZm_Intk:
9033  case X86::VSQRTSDZm_Intkz:
9034  case X86::VSQRTSDZr:
9035  case X86::VSQRTSDZr_Int:
9036  case X86::VSQRTSDZr_Intk:
9037  case X86::VSQRTSDZr_Intkz:
9038  case X86::VSQRTSDZrb_Int:
9039  case X86::VSQRTSDZrb_Intk:
9040  case X86::VSQRTSDZrb_Intkz:
9041  case X86::VSQRTSSZm:
9042  case X86::VSQRTSSZm_Int:
9043  case X86::VSQRTSSZm_Intk:
9044  case X86::VSQRTSSZm_Intkz:
9045  case X86::VSQRTSSZr:
9046  case X86::VSQRTSSZr_Int:
9047  case X86::VSQRTSSZr_Intk:
9048  case X86::VSQRTSSZr_Intkz:
9049  case X86::VSQRTSSZrb_Int:
9050  case X86::VSQRTSSZrb_Intk:
9051  case X86::VSQRTSSZrb_Intkz:
9052 
9053  case X86::VGATHERDPDYrm:
9054  case X86::VGATHERDPDZ128rm:
9055  case X86::VGATHERDPDZ256rm:
9056  case X86::VGATHERDPDZrm:
9057  case X86::VGATHERDPDrm:
9058  case X86::VGATHERDPSYrm:
9059  case X86::VGATHERDPSZ128rm:
9060  case X86::VGATHERDPSZ256rm:
9061  case X86::VGATHERDPSZrm:
9062  case X86::VGATHERDPSrm:
9063  case X86::VGATHERPF0DPDm:
9064  case X86::VGATHERPF0DPSm:
9065  case X86::VGATHERPF0QPDm:
9066  case X86::VGATHERPF0QPSm:
9067  case X86::VGATHERPF1DPDm:
9068  case X86::VGATHERPF1DPSm:
9069  case X86::VGATHERPF1QPDm:
9070  case X86::VGATHERPF1QPSm:
9071  case X86::VGATHERQPDYrm:
9072  case X86::VGATHERQPDZ128rm:
9073  case X86::VGATHERQPDZ256rm:
9074  case X86::VGATHERQPDZrm:
9075  case X86::VGATHERQPDrm:
9076  case X86::VGATHERQPSYrm:
9077  case X86::VGATHERQPSZ128rm:
9078  case X86::VGATHERQPSZ256rm:
9079  case X86::VGATHERQPSZrm:
9080  case X86::VGATHERQPSrm:
9081  case X86::VPGATHERDDYrm:
9082  case X86::VPGATHERDDZ128rm:
9083  case X86::VPGATHERDDZ256rm:
9084  case X86::VPGATHERDDZrm:
9085  case X86::VPGATHERDDrm:
9086  case X86::VPGATHERDQYrm:
9087  case X86::VPGATHERDQZ128rm:
9088  case X86::VPGATHERDQZ256rm:
9089  case X86::VPGATHERDQZrm:
9090  case X86::VPGATHERDQrm:
9091  case X86::VPGATHERQDYrm:
9092  case X86::VPGATHERQDZ128rm:
9093  case X86::VPGATHERQDZ256rm:
9094  case X86::VPGATHERQDZrm:
9095  case X86::VPGATHERQDrm:
9096  case X86::VPGATHERQQYrm:
9097  case X86::VPGATHERQQZ128rm:
9098  case X86::VPGATHERQQZ256rm:
9099  case X86::VPGATHERQQZrm:
9100  case X86::VPGATHERQQrm:
9101  case X86::VSCATTERDPDZ128mr:
9102  case X86::VSCATTERDPDZ256mr:
9103  case X86::VSCATTERDPDZmr:
9104  case X86::VSCATTERDPSZ128mr:
9105  case X86::VSCATTERDPSZ256mr:
9106  case X86::VSCATTERDPSZmr:
9107  case X86::VSCATTERPF0DPDm:
9108  case X86::VSCATTERPF0DPSm:
9109  case X86::VSCATTERPF0QPDm:
9110  case X86::VSCATTERPF0QPSm:
9111  case X86::VSCATTERPF1DPDm:
9112  case X86::VSCATTERPF1DPSm:
9113  case X86::VSCATTERPF1QPDm:
9114  case X86::VSCATTERPF1QPSm:
9115  case X86::VSCATTERQPDZ128mr:
9116  case X86::VSCATTERQPDZ256mr:
9117  case X86::VSCATTERQPDZmr:
9118  case X86::VSCATTERQPSZ128mr:
9119  case X86::VSCATTERQPSZ256mr:
9120  case X86::VSCATTERQPSZmr:
9121  case X86::VPSCATTERDDZ128mr:
9122  case X86::VPSCATTERDDZ256mr:
9123  case X86::VPSCATTERDDZmr:
9124  case X86::VPSCATTERDQZ128mr:
9125  case X86::VPSCATTERDQZ256mr:
9126  case X86::VPSCATTERDQZmr:
9127  case X86::VPSCATTERQDZ128mr:
9128  case X86::VPSCATTERQDZ256mr:
9129  case X86::VPSCATTERQDZmr:
9130  case X86::VPSCATTERQQZ128mr:
9131  case X86::VPSCATTERQQZ256mr:
9132  case X86::VPSCATTERQQZmr:
9133  return true;
9134  }
9135 }
9136 
9138  const MachineRegisterInfo *MRI,
9139  const MachineInstr &DefMI,
9140  unsigned DefIdx,
9141  const MachineInstr &UseMI,
9142  unsigned UseIdx) const {
9143  return isHighLatencyDef(DefMI.getOpcode());
9144 }
9145 
9147  const MachineBasicBlock *MBB) const {
9148  assert((Inst.getNumOperands() == 3 || Inst.getNumOperands() == 4) &&
9149  "Reassociation needs binary operators");
9150 
9151  // Integer binary math/logic instructions have a third source operand:
9152  // the EFLAGS register. That operand must be both defined here and never
9153  // used; ie, it must be dead. If the EFLAGS operand is live, then we can
9154  // not change anything because rearranging the operands could affect other
9155  // instructions that depend on the exact status flags (zero, sign, etc.)
9156  // that are set by using these particular operands with this operation.
9157  if (Inst.getNumOperands() == 4) {
9158  assert(Inst.getOperand(3).isReg() &&
9159  Inst.getOperand(3).getReg() == X86::EFLAGS &&
9160  "Unexpected operand in reassociable instruction");
9161  if (!Inst.getOperand(3).isDead())
9162  return false;
9163  }
9164 
9165  return TargetInstrInfo::hasReassociableOperands(Inst, MBB);
9166 }
9167 
9168 // TODO: There are many more machine instruction opcodes to match:
9169 // 1. Other data types (integer, vectors)
9170 // 2. Other math / logic operations (xor, or)
9171 // 3. Other forms of the same operation (intrinsics and other variants)
9173  switch (Inst.getOpcode()) {
9174  case X86::AND8rr:
9175  case X86::AND16rr:
9176  case X86::AND32rr:
9177  case X86::AND64rr:
9178  case X86::OR8rr:
9179  case X86::OR16rr:
9180  case X86::OR32rr:
9181  case X86::OR64rr:
9182  case X86::XOR8rr:
9183  case X86::XOR16rr:
9184  case X86::XOR32rr:
9185  case X86::XOR64rr:
9186  case X86::IMUL16rr:
9187  case X86::IMUL32rr:
9188  case X86::IMUL64rr:
9189  case X86::PANDrr:
9190  case X86::PORrr:
9191  case X86::PXORrr:
9192  case X86::ANDPDrr:
9193  case X86::ANDPSrr:
9194  case X86::ORPDrr:
9195  case X86::ORPSrr:
9196  case X86::XORPDrr:
9197  case X86::XORPSrr:
9198  case X86::PADDBrr:
9199  case X86::PADDWrr:
9200  case X86::PADDDrr:
9201  case X86::PADDQrr:
9202  case X86::VPANDrr:
9203  case X86::VPANDYrr:
9204  case X86::VPANDDZ128rr:
9205  case X86::VPANDDZ256rr:
9206  case X86::VPANDDZrr:
9207  case X86::VPANDQZ128rr:
9208  case X86::VPANDQZ256rr:
9209  case X86::VPANDQZrr:
9210  case X86::VPORrr:
9211  case X86::VPORYrr:
9212  case X86::VPORDZ128rr:
9213  case X86::VPORDZ256rr:
9214  case X86::VPORDZrr:
9215  case X86::VPORQZ128rr:
9216  case X86::VPORQZ256rr:
9217  case X86::VPORQZrr:
9218  case X86::VPXORrr:
9219  case X86::VPXORYrr:
9220  case X86::VPXORDZ128rr:
9221  case X86::VPXORDZ256rr:
9222  case X86::VPXORDZrr:
9223  case X86::VPXORQZ128rr:
9224  case X86::VPXORQZ256rr:
9225  case X86::VPXORQZrr:
9226  case X86::VANDPDrr:
9227  case X86::VANDPSrr:
9228  case X86::VANDPDYrr:
9229  case X86::VANDPSYrr:
9230  case X86::VANDPDZ128rr:
9231  case X86::VANDPSZ128rr:
9232  case X86::VANDPDZ256rr:
9233  case X86::VANDPSZ256rr:
9234  case X86::VANDPDZrr:
9235  case X86::VANDPSZrr:
9236  case X86::VORPDrr:
9237  case X86::VORPSrr:
9238  case X86::VORPDYrr:
9239  case X86::VORPSYrr:
9240  case X86::VORPDZ128rr:
9241  case X86::VORPSZ128rr:
9242  case X86::VORPDZ256rr:
9243  case X86::VORPSZ256rr:
9244  case X86::VORPDZrr:
9245  case X86::VORPSZrr:
9246  case X86::VXORPDrr:
9247  case X86::VXORPSrr:
9248  case X86::VXORPDYrr:
9249  case X86::VXORPSYrr:
9250  case X86::VXORPDZ128rr:
9251  case X86::VXORPSZ128rr:
9252  case X86::VXORPDZ256rr:
9253  case X86::VXORPSZ256rr:
9254  case X86::VXORPDZrr:
9255  case X86::VXORPSZrr:
9256  case X86::KADDBrr:
9257  case X86::KADDWrr:
9258  case X86::KADDDrr:
9259  case X86::KADDQrr:
9260  case X86::KANDBrr:
9261  case X86::KANDWrr:
9262  case X86::KANDDrr:
9263  case X86::KANDQrr:
9264  case X86::KORBrr:
9265  case X86::KORWrr:
9266  case X86::KORDrr:
9267  case X86::KORQrr:
9268  case X86::KXORBrr:
9269  case X86::KXORWrr:
9270  case X86::KXORDrr:
9271  case X86::KXORQrr:
9272  case X86::VPADDBrr:
9273  case X86::VPADDWrr:
9274  case X86::VPADDDrr:
9275  case X86::VPADDQrr:
9276  case X86::VPADDBYrr:
9277  case X86::VPADDWYrr:
9278  case X86::VPADDDYrr:
9279  case X86::VPADDQYrr:
9280  case X86::VPADDBZ128rr:
9281  case X86::VPADDWZ128rr:
9282  case X86::VPADDDZ128rr:
9283  case X86::VPADDQZ128rr:
9284  case X86::VPADDBZ256rr:
9285  case X86::VPADDWZ256rr:
9286  case X86::VPADDDZ256rr:
9287  case X86::VPADDQZ256rr:
9288  case X86::VPADDBZrr:
9289  case X86::VPADDWZrr:
9290  case X86::VPADDDZrr:
9291  case X86::VPADDQZrr:
9292  case X86::VPMULLWrr:
9293  case X86::VPMULLWYrr:
9294  case X86::VPMULLWZ128rr:
9295  case X86::VPMULLWZ256rr:
9296  case X86::VPMULLWZrr:
9297  case X86::VPMULLDrr:
9298  case X86::VPMULLDYrr:
9299  case X86::VPMULLDZ128rr:
9300  case X86::VPMULLDZ256rr:
9301  case X86::VPMULLDZrr:
9302  case X86::VPMULLQZ128rr:
9303  case X86::VPMULLQZ256rr:
9304  case X86::VPMULLQZrr:
9305  // Normal min/max instructions are not commutative because of NaN and signed
9306  // zero semantics, but these are. Thus, there's no need to check for global
9307  // relaxed math; the instructions themselves have the properties we need.
9308  case X86::MAXCPDrr:
9309  case X86::MAXCPSrr:
9310  case X86::MAXCSDrr:
9311  case X86::MAXCSSrr:
9312  case X86::MINCPDrr:
9313  case X86::MINCPSrr:
9314  case X86::MINCSDrr:
9315  case X86::MINCSSrr:
9316  case X86::VMAXCPDrr:
9317  case X86::VMAXCPSrr:
9318  case X86::VMAXCPDYrr:
9319  case X86::VMAXCPSYrr:
9320  case X86::VMAXCPDZ128rr:
9321  case X86::VMAXCPSZ128rr:
9322  case X86::VMAXCPDZ256rr:
9323  case X86::VMAXCPSZ256rr:
9324  case X86::VMAXCPDZrr:
9325  case X86::VMAXCPSZrr:
9326  case X86::VMAXCSDrr:
9327  case X86::VMAXCSSrr:
9328  case X86::VMAXCSDZrr:
9329  case X86::VMAXCSSZrr:
9330  case X86::VMINCPDrr:
9331  case X86::VMINCPSrr:
9332  case X86::VMINCPDYrr:
9333  case X86::VMINCPSYrr:
9334  case X86::VMINCPDZ128rr:
9335  case X86::VMINCPSZ128rr:
9336  case X86::VMINCPDZ256rr:
9337  case X86::VMINCPSZ256rr:
9338  case X86::VMINCPDZrr:
9339  case X86::VMINCPSZrr:
9340  case X86::VMINCSDrr:
9341  case X86::VMINCSSrr:
9342  case X86::VMINCSDZrr:
9343  case X86::VMINCSSZrr:
9344  return true;
9345  case X86::ADDPDrr:
9346  case X86::ADDPSrr:
9347  case X86::ADDSDrr:
9348  case X86::ADDSSrr:
9349  case X86::MULPDrr:
9350  case X86::MULPSrr:
9351  case X86::MULSDrr:
9352  case X86::MULSSrr:
9353  case X86::VADDPDrr:
9354  case X86::VADDPSrr:
9355  case X86::VADDPDYrr:
9356  case X86::VADDPSYrr:
9357  case X86::VADDPDZ128rr:
9358  case X86::VADDPSZ128rr:
9359  case X86::VADDPDZ256rr:
9360  case X86::VADDPSZ256rr:
9361  case X86::VADDPDZrr:
9362  case X86::VADDPSZrr:
9363  case X86::VADDSDrr:
9364  case X86::VADDSSrr:
9365  case X86::VADDSDZrr:
9366  case X86::VADDSSZrr:
9367  case X86::VMULPDrr:
9368  case X86::VMULPSrr:
9369  case X86::VMULPDYrr:
9370  case X86::VMULPSYrr:
9371  case X86::VMULPDZ128rr:
9372  case X86::VMULPSZ128rr:
9373  case X86::VMULPDZ256rr:
9374  case X86::VMULPSZ256rr:
9375  case X86::VMULPDZrr:
9376  case X86::VMULPSZrr:
9377  case X86::VMULSDrr:
9378  case X86::VMULSSrr:
9379  case X86::VMULSDZrr:
9380  case X86::VMULSSZrr:
9381  return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
9382  default:
9383  return false;
9384  }
9385 }
9386 
9387 /// This is an architecture-specific helper function of reassociateOps.
9388 /// Set special operand attributes for new instructions after reassociation.
9390  MachineInstr &OldMI2,
9391  MachineInstr &NewMI1,
9392  MachineInstr &NewMI2) const {
9393  // Integer instructions define an implicit EFLAGS source register operand as
9394  // the third source (fourth total) operand.
9395  if (OldMI1.getNumOperands() != 4 || OldMI2.getNumOperands() != 4)
9396  return;
9397 
9398  assert(NewMI1.getNumOperands() == 4 && NewMI2.getNumOperands() == 4 &&
9399  "Unexpected instruction type for reassociation");
9400 
9401  MachineOperand &OldOp1 = OldMI1.getOperand(3);
9402  MachineOperand &OldOp2 = OldMI2.getOperand(3);
9403  MachineOperand &NewOp1 = NewMI1.getOperand(3);
9404  MachineOperand &NewOp2 = NewMI2.getOperand(3);
9405 
9406  assert(OldOp1.isReg() && OldOp1.getReg() == X86::EFLAGS && OldOp1.isDead() &&
9407  "Must have dead EFLAGS operand in reassociable instruction");
9408  assert(OldOp2.isReg() && OldOp2.getReg() == X86::EFLAGS && OldOp2.isDead() &&
9409  "Must have dead EFLAGS operand in reassociable instruction");
9410 
9411  (void)OldOp1;
9412  (void)OldOp2;
9413 
9414  assert(NewOp1.isReg() && NewOp1.getReg() == X86::EFLAGS &&
9415  "Unexpected operand in reassociable instruction");
9416  assert(NewOp2.isReg() && NewOp2.getReg() == X86::EFLAGS &&
9417  "Unexpected operand in reassociable instruction");
9418 
9419  // Mark the new EFLAGS operands as dead to be helpful to subsequent iterations
9420  // of this pass or other passes. The EFLAGS operands must be dead in these new
9421  // instructions because the EFLAGS operands in the original instructions must
9422  // be dead in order for reassociation to occur.
9423  NewOp1.setIsDead();
9424  NewOp2.setIsDead();
9425 }
9426 
9427 std::pair<unsigned, unsigned>
9429  return std::make_pair(TF, 0u);
9430 }
9431 
9434  using namespace X86II;
9435  static const std::pair<unsigned, const char *> TargetFlags[] = {
9436  {MO_GOT_ABSOLUTE_ADDRESS, "x86-got-absolute-address"},
9437  {MO_PIC_BASE_OFFSET, "x86-pic-base-offset"},
9438  {MO_GOT, "x86-got"},
9439  {MO_GOTOFF, "x86-gotoff"},
9440  {MO_GOTPCREL, "x86-gotpcrel"},
9441  {MO_PLT, "x86-plt"},
9442  {MO_TLSGD, "x86-tlsgd"},
9443  {MO_TLSLD, "x86-tlsld"},
9444  {MO_TLSLDM, "x86-tlsldm"},
9445  {MO_GOTTPOFF, "x86-gottpoff"},
9446  {MO_INDNTPOFF, "x86-indntpoff"},
9447  {MO_TPOFF, "x86-tpoff"},
9448  {MO_DTPOFF, "x86-dtpoff"},
9449  {MO_NTPOFF, "x86-ntpoff"},
9450  {MO_GOTNTPOFF, "x86-gotntpoff"},
9451  {MO_DLLIMPORT, "x86-dllimport"},
9452  {MO_DARWIN_NONLAZY, "x86-darwin-nonlazy"},
9453  {MO_DARWIN_NONLAZY_PIC_BASE, "x86-darwin-nonlazy-pic-base"},
9454  {MO_TLVP, "x86-tlvp"},
9455  {MO_TLVP_PIC_BASE, "x86-tlvp-pic-base"},
9456  {MO_SECREL, "x86-secrel"}};
9457  return makeArrayRef(TargetFlags);
9458 }
9459 
9460 bool X86InstrInfo::isTailCall(const MachineInstr &Inst) const {
9461  switch (Inst.getOpcode()) {
9462  case X86::TCRETURNdi:
9463  case X86::TCRETURNmi:
9464  case X86::TCRETURNri:
9465  case X86::TCRETURNdi64:
9466  case X86::TCRETURNmi64:
9467  case X86::TCRETURNri64:
9468  case X86::TAILJMPd:
9469  case X86::TAILJMPm:
9470  case X86::TAILJMPr:
9471  case X86::TAILJMPd64:
9472  case X86::TAILJMPm64:
9473  case X86::TAILJMPr64:
9474  case X86::TAILJMPm64_REX:
9475  case X86::TAILJMPr64_REX:
9476  return true;
9477  default:
9478  return false;
9479  }
9480 }
9481 
9482 namespace {
9483  /// Create Global Base Reg pass. This initializes the PIC
9484  /// global base register for x86-32.
9485  struct CGBR : public MachineFunctionPass {
9486  static char ID;
9487  CGBR() : MachineFunctionPass(ID) {}
9488 
9489  bool runOnMachineFunction(MachineFunction &MF) override {
9490  const X86TargetMachine *TM =
9491  static_cast<const X86TargetMachine *>(&MF.getTarget());
9492  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
9493 
9494  // Don't do anything if this is 64-bit as 64-bit PIC
9495  // uses RIP relative addressing.
9496  if (STI.is64Bit())
9497  return false;
9498 
9499  // Only emit a global base reg in PIC mode.
9500  if (!TM->isPositionIndependent())
9501  return false;
9502 
9504  unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
9505 
9506  // If we didn't need a GlobalBaseReg, don't insert code.
9507  if (GlobalBaseReg == 0)
9508  return false;
9509 
9510  // Insert the set of GlobalBaseReg into the first MBB of the function
9511  MachineBasicBlock &FirstMBB = MF.front();
9512  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
9513  DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
9514  MachineRegisterInfo &RegInfo = MF.getRegInfo();
9515  const X86InstrInfo *TII = STI.getInstrInfo();
9516 
9517  unsigned PC;
9518  if (STI.isPICStyleGOT())
9519  PC = RegInfo.createVirtualRegister(&X86::GR32RegClass);
9520  else
9521  PC = GlobalBaseReg;
9522 
9523  // Operand of MovePCtoStack is completely ignored by asm printer. It's
9524  // only used in JIT code emission as displacement to pc.
9525  BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
9526 
9527  // If we're using vanilla 'GOT' PIC style, we should use relative addressing
9528  // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
9529  if (STI.isPICStyleGOT()) {
9530  // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
9531  BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
9532  .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
9534  }
9535 
9536  return true;
9537  }
9538 
9539  StringRef getPassName() const override {
9540  return "X86 PIC Global Base Reg Initialization";
9541  }
9542 
9543  void getAnalysisUsage(AnalysisUsage &AU) const override {
9544  AU.setPreservesCFG();
9546  }
9547  };
9548 }
9549 
9550 char CGBR::ID = 0;
9551 FunctionPass*
9552 llvm::createX86GlobalBaseRegPass() { return new CGBR(); }
9553 
9554 namespace {
9555  struct LDTLSCleanup : public MachineFunctionPass {
9556  static char ID;
9557  LDTLSCleanup() : MachineFunctionPass(ID) {}
9558 
9559  bool runOnMachineFunction(MachineFunction &MF) override {
9560  if (skipFunction(*MF.getFunction()))
9561  return false;
9562 
9564  if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
9565  // No point folding accesses if there isn't at least two.
9566  return false;
9567  }
9568 
9569  MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
9570  return VisitNode(DT->getRootNode(), 0);
9571  }
9572 
9573  // Visit the dominator subtree rooted at Node in pre-order.
9574  // If TLSBaseAddrReg is non-null, then use that to replace any
9575  // TLS_base_addr instructions. Otherwise, create the register
9576  // when the first such instruction is seen, and then use it
9577  // as we encounter more instructions.
9578  bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
9579  MachineBasicBlock *BB = Node->getBlock();
9580  bool Changed = false;
9581 
9582  // Traverse the current block.
9583  for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
9584  ++I) {
9585  switch (I->getOpcode()) {
9586  case X86::TLS_base_addr32:
9587  case X86::TLS_base_addr64:
9588  if (TLSBaseAddrReg)
9589  I = ReplaceTLSBaseAddrCall(*I, TLSBaseAddrReg);
9590  else
9591  I = SetRegister(*I, &TLSBaseAddrReg);
9592  Changed = true;
9593  break;
9594  default:
9595  break;
9596  }
9597  }
9598 
9599  // Visit the children of this block in the dominator tree.
9600  for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
9601  I != E; ++I) {
9602  Changed |= VisitNode(*I, TLSBaseAddrReg);
9603  }
9604 
9605  return Changed;
9606  }
9607 
9608  // Replace the TLS_base_addr instruction I with a copy from
9609  // TLSBaseAddrReg, returning the new instruction.
9610  MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr &I,
9611  unsigned TLSBaseAddrReg) {
9612  MachineFunction *MF = I.getParent()->getParent();
9613  const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
9614  const bool is64Bit = STI.is64Bit();
9615  const X86InstrInfo *TII = STI.getInstrInfo();
9616 
9617  // Insert a Copy from TLSBaseAddrReg to RAX/EAX.
9618  MachineInstr *Copy =
9619  BuildMI(*I.getParent(), I, I.getDebugLoc(),
9620  TII->get(TargetOpcode::COPY), is64Bit ? X86::RAX : X86::EAX)
9621  .addReg(TLSBaseAddrReg);
9622 
9623  // Erase the TLS_base_addr instruction.
9624  I.eraseFromParent();
9625 
9626  return Copy;
9627  }
9628 
9629  // Create a virtal register in *TLSBaseAddrReg, and populate it by
9630  // inserting a copy instruction after I. Returns the new instruction.
9631  MachineInstr *SetRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) {
9632  MachineFunction *MF = I.getParent()->getParent();
9633  const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
9634  const bool is64Bit = STI.is64Bit();
9635  const X86InstrInfo *TII = STI.getInstrInfo();
9636 
9637  // Create a virtual register for the TLS base address.
9638  MachineRegisterInfo &RegInfo = MF->getRegInfo();
9639  *TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit
9640  ? &X86::GR64RegClass
9641  : &X86::GR32RegClass);
9642 
9643  // Insert a copy from RAX/EAX to TLSBaseAddrReg.
9644  MachineInstr *Next = I.getNextNode();
9645  MachineInstr *Copy =
9646  BuildMI(*I.getParent(), Next, I.getDebugLoc(),
9647  TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
9648  .addReg(is64Bit ? X86::RAX : X86::EAX);
9649 
9650  return Copy;
9651  }
9652 
9653  StringRef getPassName() const override {
9654  return "Local Dynamic TLS Access Clean-up";
9655  }
9656 
9657  void getAnalysisUsage(AnalysisUsage &AU) const override {
9658  AU.setPreservesCFG();
9661  }
9662  };
9663 }
9664 
9665 char LDTLSCleanup::ID = 0;
9666 FunctionPass*
9667 llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
const X86RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition: X86InstrInfo.h:183
bool isImplicit() const
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
unsigned GetCondBranchFromCond(CondCode CC)
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:299
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool hasDQI() const
Definition: X86Subtarget.h:479
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
static bool hasUndefRegUpdate(unsigned Opcode)
static bool Expand2AddrKreg(MachineInstrBuilder &MIB, const MCInstrDesc &Desc, unsigned Reg)
Expand a single-def pseudo instruction to a two-addr instruction with two k0 reads.
bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Check if there exists an earlier instruction that operates on the same source ...
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:158
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:226
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override
static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI)
Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
bool expandPostRAPseudo(MachineInstr &MI) const override
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:448
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:274
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
size_t i
MO_TLSLDM - Represents the offset into the global offset table at which.
Definition: MipsBaseInfo.h:63
void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, SmallVectorImpl< MachineOperand > &Addr, const TargetRegisterClass *RC, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl< MachineInstr * > &NewMIs) const
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:416
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
bool shouldScheduleAdjacent(const MachineInstr &First, const MachineInstr &Second) const override
static unsigned getTruncatedShiftCount(MachineInstr &MI, unsigned ShiftAmtOperandIdx)
Check whether the shift count for a machine operand is non-zero.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
MO_TLSLD - On a symbol operand this indicates that the immediate is the offset of the GOT entry with ...
Definition: X86BaseInfo.h:123
MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate is some TLS offset from the ...
Definition: X86BaseInfo.h:209
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to determine if two loads are lo...
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:216
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:344
CondCode getCondFromCMovOpc(unsigned Opc)
Return condition code of a CMov opcode.
MachineDomTreeNode * getRootNode() const
const X86FrameLowering * getFrameLowering() const override
Definition: X86Subtarget.h:345
MO_TLSGD - Represents the offset into the global offset table at which.
Definition: MipsBaseInfo.h:58
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
MachineInstrBuilder MachineInstrBuilder &DefMI const MCInstrDesc & Desc
bool isHighLatencyDef(int opc) const override
bool isDead() const
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool isPredicable(QueryType Type=AllInBundle) const
Return true if this instruction has a predicate operand that controls execution.
Definition: MachineInstr.h:478
bool readsVirtualRegister(unsigned Reg) const
Return true if the MachineInstr reads the specified virtual register.
Definition: MachineInstr.h:873
unsigned getPartialRegUpdateClearance(const MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
Inform the ExeDepsFix pass how many idle instructions we would like before a partial register update...
static cl::opt< bool > ReMatPICStubLoad("remat-pic-stub-load", cl::desc("Re-materialize load from stub in PIC mode"), cl::init(false), cl::Hidden)
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
static X86::CondCode getCondFromBranchOpc(unsigned BrOpc)
static unsigned getLoadRegOpcode(unsigned DestReg, const TargetRegisterClass *RC, bool isStackAligned, const X86Subtarget &STI)
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
bool hasVLX() const
Definition: X86Subtarget.h:481
static const X86InstrFMA3Group * getFMA3Group(unsigned Opcode)
Returns a reference to a group of FMA3 opcodes to where the given Opcode is included.
bool hasLiveCondCodeDef(MachineInstr &MI) const
True if MI has a condition code def, e.g.
bool hasSSE41() const
Definition: X86Subtarget.h:414
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:270
unsigned getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum, const TargetRegisterInfo *TRI) const override
Inform the ExeDepsFix pass how many idle instructions we would like before certain undef register rea...
unsigned getNumOperands() const
Return the number of values used by this operation.
static MachineInstr * FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI, const TargetInstrInfo &TII)
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl< MachineInstr * > &NewMIs) const override
unfoldMemoryOperand - Separate a single instruction which folded a load or a store or a load and a st...
A debug info location.
Definition: DebugLoc.h:34
const SDValue & getOperand(unsigned Num) const
void setIsDead(bool Val=true)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:97
MO_DTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry with...
Definition: X86BaseInfo.h:166
void breakPartialRegDependency(MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr &MI) const override
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:301
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL, bool NoImp=false)
CreateMachineInstr - Allocate a new MachineInstr.
bool hasBWI() const
Definition: X86Subtarget.h:480
static bool isAligned(const Value *Base, const APInt &Offset, unsigned Align, const DataLayout &DL)
Definition: Loads.cpp:28
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment)
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:390
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:464
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:440
unsigned isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
isLoadFromStackSlotPostFE - Check for post-frame ptr elimination stack locations as well...
bool hasLAHFSAHF() const
Definition: X86Subtarget.h:455
static bool expandNOVLXStore(MachineInstrBuilder &MIB, const TargetRegisterInfo *TRI, const MCInstrDesc &StoreDesc, const MCInstrDesc &ExtractDesc, unsigned SubIdx)
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:461
return AArch64::GPR64RegClass contains(Reg)
iterator_range< succ_iterator > successors()
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:195
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
MachineInstr * convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const override
convertToThreeAddress - This method must be implemented by targets that set the M_CONVERTIBLE_TO_3_AD...
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
static MachineInstr * MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI)
bool findFMA3CommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2, const X86InstrFMA3Group &FMA3Group) const
Returns true if the routine could find two commutable operands in the given FMA instruction MI...
static cl::opt< unsigned > UndefRegClearance("undef-reg-clearance", cl::desc("How many idle instructions we would like before ""certain undef register reads"), cl::init(128), cl::Hidden)
unsigned getSize() const
Return the size of the register in bytes, which is also the size of a stack slot allocated to hold a ...
AnalysisUsage & addRequired()
MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates that the reference is actually...
Definition: X86BaseInfo.h:197
A description of a memory reference used in the backend.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static unsigned getLoadStoreRegOpcode(unsigned Reg, const TargetRegisterClass *RC, bool isStackAligned, const X86Subtarget &STI, bool load)
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
struct fuzzer::@269 Flags
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:157
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:440
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
const MachineInstrBuilder & addDisp(const MachineOperand &Disp, int64_t off, unsigned char TargetFlags=0) const
MachineInstr * optimizeLoadInstr(MachineInstr &MI, const MachineRegisterInfo *MRI, unsigned &FoldAsLoadDefReg, MachineInstr *&DefMI) const override
optimizeLoadInstr - Try to remove the load by folding it to a register operand at the use...
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:327
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setImplicit(bool Val=true)
static GCRegistry::Add< StatepointGC > D("statepoint-example","an example strategy for statepoint")
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
static bool hasPartialRegUpdate(unsigned Opcode)
Return true for all instructions that only update the first 32 or 64-bits of the destination register...
unsigned SubReg
unsigned getFMA3OpcodeToCommuteOperands(const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2, const X86InstrFMA3Group &FMA3Group) const
Returns an adjusted FMA opcode that must be used in FMA instruction that performs the same computatio...
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
Reg
All possible values of the reg field in the ModR/M byte.
SimpleValueType SimpleTy
MachineMemOperand ** mmo_iterator
Definition: MachineInstr.h:56
static int getRegClass(RegisterKind Is, unsigned RegWidth)
unsigned getNumLocalDynamicTLSAccesses() const
void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, MachineInstr &NewMI1, MachineInstr &NewMI2) const override
This is an architecture-specific helper function of reassociateOps.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst) const
Wraps up getting a CFI index and building a MachineInstr for it.
The memory access is dereferenceable (i.e., doesn't trap).
bool isReallyTriviallyReMaterializable(const MachineInstr &MI, AliasAnalysis *AA) const override
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
std::pair< MachineInstr::mmo_iterator, MachineInstr::mmo_iterator > extractLoadMemRefs(MachineInstr::mmo_iterator Begin, MachineInstr::mmo_iterator End)
extractLoadMemRefs - Allocate an array and populate it with just the load information from the given ...
bool isUndef() const
bool hasDebugInfo() const
Returns true if valid debug info is present.
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const override
isCoalescableExtInstr - Return true if the instruction is a "coalescable" extension instruction...
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override
isSafeToMoveRegClassDefs - Return true if it's safe to move a machine instruction that defines the sp...
static const uint16_t * lookup(unsigned opcode, unsigned domain, ArrayRef< uint16_t[3]> Table)
static bool isMem(const MachineInstr &MI, unsigned Op)
Definition: X86InstrInfo.h:135
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
static const uint16_t ReplaceableInstrsAVX512DQMasked[][4]
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:277
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &CmpMask, int &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition: ARMWinEH.h:197
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
const MachineBasicBlock & front() const
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register...
static unsigned getAlignment(GlobalVariable *GV)
bool isKill() const
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
static X86::CondCode getCondFromSETOpc(unsigned Opc)
Return condition code of a SET opcode.
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to determine (in conjunction w...
MachineBasicBlock * MBB
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:381
static def_instr_iterator def_instr_end()
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg...
MO_GOTTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry wi...
Definition: X86BaseInfo.h:141
static const uint16_t ReplaceableInstrsAVX512DQ[][4]
Base class for the actual dominator tree node.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
uint64_t getAlignment() const
Return the minimum known alignment in bytes of the actual memory reference.
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
static bool isRedundantFlagInstr(MachineInstr &FlagI, unsigned SrcReg, unsigned SrcReg2, int ImmValue, MachineInstr &OI)
Check whether the first instruction, whose only purpose is to update flags, can be made redundant...
bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, unsigned LEAOpcode, bool AllowSP, unsigned &NewSrc, bool &isKill, bool &isUndef, MachineOperand &ImplicitOp, LiveVariables *LV) const
Given an operand within a MachineInstr, insert preceding code to put it into the right format for a p...
bool isUnpredicatedTerminator(const MachineInstr &MI) const override
static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg, const X86Subtarget &Subtarget)
int64_t getImm() const
bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const override
unsigned getReg132Opcode() const
Returns the 132 form of FMA register opcode.
unsigned getUndefRegState(bool B)
reverse_iterator rend()
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been <def>ined and not <kill>ed as of just before Before...
MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the reference is actually to the "...
Definition: X86BaseInfo.h:192
static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, const X86Subtarget &Subtarget)
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:141
NodeT * getBlock() const
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
reverse_iterator rbegin()
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:150
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
unsigned getKillRegState(bool B)
bool hasSSE2() const
Definition: X86Subtarget.h:411
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
TargetInstrInfo - Interface to description of machine instruction set.
static X86::CondCode isUseDefConvertible(MachineInstr &MI)
Check whether the use can be converted to remove a comparison against zero.
unsigned getDeadRegState(bool B)
mmo_iterator memoperands_end() const
Definition: MachineInstr.h:359
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:589
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isFrameLoadOpcode(int Opcode)
MO_SECREL - On a symbol operand this indicates that the immediate is the offset from beginning of sec...
Definition: ARMBaseInfo.h:305
static unsigned getCommutedVPERMV3Opcode(unsigned Opcode)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
unsigned getTargetFlags() const
const MachineInstrBuilder & setMemRefs(MachineInstr::mmo_iterator b, MachineInstr::mmo_iterator e) const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
AddrNumOperands - Total number of operands in a memory reference.
Definition: X86BaseInfo.h:42
unsigned getMem132Opcode() const
Returns the 132 form of FMA memory opcode.
unsigned getReg213Opcode() const
Returns the 213 form of FMA register opcode.
int getSPAdjust(const MachineInstr &MI) const override
getSPAdjust - This returns the stack pointer adjustment made by this instruction. ...
unsigned const MachineRegisterInfo * MRI
static bool isTruncatedShiftCountForLEA(unsigned ShAmt)
Check whether the given shift count is appropriate can be represented by a LEA instruction.
MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a relocation of: SYMBOL_LABEL + [...
Definition: X86BaseInfo.h:70
static MachineOperand CreateCPI(unsigned Idx, int Offset, unsigned char TargetFlags=0)
CodeModel::Model getCodeModel() const
Returns the code model.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static const uint16_t ReplaceableInstrs[][3]
FunctionPass * createX86GlobalBaseRegPass()
This pass initializes a global base register for PIC on x86-32.
MO_GOTNTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry w...
Definition: X86BaseInfo.h:182
MO_TLVP - On a symbol operand this indicates that the immediate is some TLS offset.
Definition: X86BaseInfo.h:203
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineInstrBuilder & UseMI
This is an important base class in LLVM.
Definition: Constant.h:42
static bool commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2)
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
static void expandLoadStackGuard(MachineInstrBuilder &MIB, const TargetInstrInfo &TII)
static bool is64Bit(const char *name)
bool hasAVX2() const
Definition: X86Subtarget.h:417
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
static bool expandNOVLXLoad(MachineInstrBuilder &MIB, const TargetRegisterInfo *TRI, const MCInstrDesc &LoadDesc, const MCInstrDesc &BroadcastDesc, unsigned SubIdx)
unsigned getSubReg(unsigned Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo...
static cl::opt< bool > PrintFailedFusing("print-failed-fuse-candidates", cl::desc("Print instructions that the allocator wants to"" fuse, but the X86 backend currently can't"), cl::Hidden)
unsigned getMem231Opcode() const
Returns the 231 form of FMA memory opcode.
static int getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1, unsigned SrcOpIdx2)
This determines which of three possible cases of a three source commute the source indexes correspond...
Register is known to be fully dead.
bool isCopy() const
Definition: MachineInstr.h:807
MCRegAliasIterator enumerates all registers aliasing Reg.
Represent the analysis usage information of a pass.
void stepBackward(const MachineInstr &MI)
Simulates liveness when stepping backwards over an instruction(bundle): Remove Defs, add uses.
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset]...
X86InstrInfo(X86Subtarget &STI)
uint32_t Offset
virtual bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction...
bool isPositionIndependent() const
bool contains(unsigned Reg) const
Returns true if register Reg is contained in the set.
Definition: LivePhysRegs.h:102
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:373
static void addOperands(MachineInstrBuilder &MIB, ArrayRef< MachineOperand > MOs, int PtrOffset=0)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
bool registerDefIsDead(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Returns true if the register is dead in this machine instruction.
Definition: MachineInstr.h:910
MO_NTPOFF - On a symbol operand this indicates that the immediate is the negative thread-pointer offs...
Definition: X86BaseInfo.h:174
#define VPERM_CASES(Suffix)
unsigned getOperandBias(const MCInstrDesc &Desc)
getOperandBias - compute any additional adjustment needed to the offset to the start of the memory op...
Definition: X86BaseInfo.h:630
self_iterator getIterator()
Definition: ilist_node.h:81
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
unsigned getReg231Opcode() const
Returns the 231 form of FMA register opcode.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:136
This class is used to group {132, 213, 231} forms of FMA opcodes together.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
static Constant * getAllOnesValue(Type *Ty)
Get the all ones value.
Definition: Constants.cpp:249
Register is known to be (at least partially) live.
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned CommuteOpIdx1, unsigned CommuteOpIdx2) const override
Commutes the operands in the given instruction by changing the operands order and/or changing the ins...
bool isPICStyleGOT() const
Definition: X86Subtarget.h:553
static unsigned GetCondBranchFromCond(XCore::CondCode CC)
GetCondBranchFromCond - Return the Branch instruction opcode that matches the cc. ...
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:392
unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand=false)
Return a set opcode for the given condition and whether it has a memory operand.
unsigned getSubReg() const
EVT - Extended Value Type.
Definition: ValueTypes.h:31
VarInfo & getVarInfo(unsigned RegIdx)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
unsigned char NumDefs
Definition: MCInstrDesc.h:167
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const override
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set.
Definition: MCInstrDesc.h:187
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
static const uint16_t * lookupAVX512(unsigned opcode, unsigned domain, ArrayRef< uint16_t[4]> Table)
static bool isHReg(unsigned Reg)
Test if the given register is a physical h register.
void setIsKill(bool Val=true)
bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const
isSafeToClobberEFLAGS - Return true if it's safe insert an instruction tha would clobber the EFLAGS c...
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE instructions.
bool isSafeToMove(AliasAnalysis *AA, bool &SawStore) const
Return true if it is safe to move this instruction.
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...
Definition: LiveVariables.h:89
MO_TPOFF - On a symbol operand this indicates that the immediate is the thread-pointer offset for the...
Definition: X86BaseInfo.h:158
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
Definition: MachineInstr.h:865
virtual const TargetFrameLowering * getFrameLowering() const
bool analyzeBranchPredicate(MachineBasicBlock &MBB, TargetInstrInfo::MachineBranchPredicate &MBP, bool AllowModify=false) const override
#define VPERM_CASES_BROADCAST(Suffix)
Iterator for intrusive lists based on ilist_node.
void replaceKillInstruction(unsigned Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one...
void setOpcode(unsigned Op)
Definition: MCInst.h:158
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
void substituteRegister(unsigned FromReg, unsigned ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
CloneMachineInstr - Create a new MachineInstr which is a copy of the 'Orig' instruction, identical in all ways except the instruction has no parent, prev, or next.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:166
unsigned isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
isStoreToStackSlotPostFE - Check for post-frame ptr elimination stack locations as well...
MachineOperand class - Representation of each machine instruction operand.
static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, bool MinusOne)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
static const MachineInstrBuilder & addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2)
addRegReg - This function is used to add a memory reference of the form: [Reg + Reg].
Information about stack frame layout on the target.
bool isIntrinsic() const
Returns true iff the group of FMA opcodes holds intrinsic opcodes.
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:415
unsigned getGlobalBaseReg(MachineFunction *MF) const
getGlobalBaseReg - Return a virtual register initialized with the the global base register value...
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:276
Represents one node in the SelectionDAG.
bool isKMergeMasked(uint64_t TSFlags)
isKMergedMasked - Is this a merge masked instruction.
Definition: X86BaseInfo.h:777
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
void getNoopForMachoTarget(MCInst &NopInst) const override
Return the noop instruction to use for a noop.
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getX86SubSuperRegister(unsigned, unsigned, bool High=false)
Returns the sub or super register of a specific X86 register.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SynchronizationScope SynchScope=CrossThread, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
void setDebugLoc(DebugLoc dl)
Replace current source information with new such.
static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, const MachineInstr &UserMI, const MachineFunction &MF)
Check if LoadMI is a partial register load that we can't fold into MI because the latter uses content...
unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given condition, register size in bytes, and operand type...
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
static MachineBasicBlock * getFallThroughMBB(MachineBasicBlock *MBB, MachineBasicBlock *TBB)
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:250
static rm_iterator rm_end()
Returns the last rm_iterator.
bool hasCMov() const
Definition: X86Subtarget.h:409
CATCHRET - Represents a return from a catch block funclet.
Definition: ISDOpcodes.h:602
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
virtual bool hasReassociableOperands(const MachineInstr &Inst, const MachineBasicBlock *MBB) const
Return true when Inst has reassociable operands in the same MBB.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, unsigned, unsigned, int &, int &, int &) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
Representation of each machine instruction.
Definition: MachineInstr.h:52
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:89
static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
bool isRegOpcodeFromGroup(unsigned Opcode) const
Returns true iff the given Opcode is a register opcode from the groups of FMA opcodes.
def_instr_iterator def_instr_begin(unsigned RegNo) const
unsigned getMem213Opcode() const
Returns the 213 form of FMA memory opcode.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:169
A set of live physical registers with functions to track liveness when walking backward/forward throu...
Definition: LivePhysRegs.h:45
static const MachineInstrBuilder & addOffset(const MachineInstrBuilder &MIB, int Offset)
static unsigned getStoreRegOpcode(unsigned SrcReg, const TargetRegisterClass *RC, bool isStackAligned, const X86Subtarget &STI)
Register liveness not decidable from local neighborhood.
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned char TargetFlags=0) const
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:424
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
static MachineInstr * FuseInst(MachineFunction &MF, unsigned Opcode, unsigned OpNo, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI, const TargetInstrInfo &TII, int PtrOffset=0)
static cl::opt< unsigned > PartialRegUpdateClearance("partial-reg-update-clearance", cl::desc("Clearance between two register writes ""for inserting XOR to avoid partial ""register update"), cl::init(64), cl::Hidden)
void setSubReg(unsigned subReg)
The memory access always returns the same value (or traps).
iterator end()
Definition: DenseMap.h:69
bool isKMasked(uint64_t TSFlags)
isKMasked - Is this a masked instruction.
Definition: X86BaseInfo.h:772
iterator find(const KeyT &Val)
Definition: DenseMap.h:127
bool hasAVX512() const
Definition: X86Subtarget.h:418
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
bool isUnalignedMem16Slow() const
Definition: X86Subtarget.h:460
static rm_iterator rm_begin()
Returns rm_iterator pointing to the first entry of OpcodeToGroup map with a register FMA opcode havin...
void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl< MachineOperand > &Addr, const TargetRegisterClass *RC, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl< MachineInstr * > &NewMIs) const
bool canRealignStack(const MachineFunction &MF) const override
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool isTailCall(const MachineInstr &Inst) const override
unsigned getReg() const
getReg - Returns the register number.
std::pair< MachineInstr::mmo_iterator, MachineInstr::mmo_iterator > extractStoreMemRefs(MachineInstr::mmo_iterator Begin, MachineInstr::mmo_iterator End)
extractStoreMemRefs - Allocate an array and populate it with just the store information from the give...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool killsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
Definition: MachineInstr.h:886
std::vector< DomTreeNodeBase< NodeT > * >::iterator iterator
static const uint16_t ReplaceableInstrsAVX2[][3]
bool hasReassociableOperands(const MachineInstr &Inst, const MachineBasicBlock *MBB) const override
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
uint16_t getEncodingValue(unsigned RegNo) const
Returns the encoding for RegNo.
LLVM Value Representation.
Definition: Value.h:71
mop_iterator operands_begin()
Definition: MachineInstr.h:295
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:631
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
MachineInstrBundleIterator< const MachineInstr > const_iterator
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
static bool isDefConvertible(MachineInstr &MI)
Check whether the definition can be converted to remove a comparison against zero.
#define DEBUG(X)
Definition: Debug.h:100
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:75
bool addRegisterKilled(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
IRTranslator LLVM IR MI
bool callRegIndirect() const
Definition: X86Subtarget.h:472
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
vt_iterator vt_begin() const
vt_begin / vt_end - Loop over all of the value types that can be represented by values in this regist...
MachineModuleInfo & getMMI() const
static const uint16_t ReplaceableInstrsAVX512[][4]
static bool isFrameStoreOpcode(int Opcode)
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Returns true iff the routine could find two commutable operands in the given machine instruction...
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
On a symbol operand "FOO", this indicates that the reference is actually to "FOO@plt".
Definition: PPC.h:66
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
FunctionPass * createCleanupLocalDynamicTLSPass()
This pass combines multiple accesses to local-dynamic TLS variables so that the TLS base address for ...
static bool Expand2AddrUndef(MachineInstrBuilder &MIB, const MCInstrDesc &Desc)
Expand a single-def pseudo instruction to a two-addr instruction with two undef reads of the register...
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex=nullptr) const override
getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new instruction after load / store ar...
static MachineOperand CreateFI(int Idx)
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
static bool isCommutableVPERMV3Instruction(unsigned Opcode)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
Definition: MachineInstr.h:903
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:431
bool hasAVX() const
Definition: X86Subtarget.h:416
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:226
bool hasHighOperandLatency(const TargetSchedModel &SchedModel, const MachineRegisterInfo *MRI, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
static cl::opt< bool > NoFusing("disable-spill-fusing", cl::desc("Disable fusing of spill code into instructions"))
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
int getMemoryOperandNo(uint64_t TSFlags)
getMemoryOperandNo - The function returns the MCInst operand # for the first field of the memory oper...
Definition: X86BaseInfo.h:659
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:358
char * PC
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:552
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...