LLVM  3.7.0
X86InstrInfo.cpp
Go to the documentation of this file.
1 //===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the X86 implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86InstrInfo.h"
15 #include "X86.h"
16 #include "X86InstrBuilder.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86Subtarget.h"
19 #include "X86TargetMachine.h"
20 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/CodeGen/StackMaps.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/MC/MCAsmInfo.h"
32 #include "llvm/MC/MCExpr.h"
33 #include "llvm/MC/MCInst.h"
35 #include "llvm/Support/Debug.h"
39 #include <limits>
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "x86-instr-info"
44 
45 #define GET_INSTRINFO_CTOR_DTOR
46 #include "X86GenInstrInfo.inc"
47 
48 static cl::opt<bool>
49 NoFusing("disable-spill-fusing",
50  cl::desc("Disable fusing of spill code into instructions"));
51 static cl::opt<bool>
52 PrintFailedFusing("print-failed-fuse-candidates",
53  cl::desc("Print instructions that the allocator wants to"
54  " fuse, but the X86 backend currently can't"),
55  cl::Hidden);
56 static cl::opt<bool>
57 ReMatPICStubLoad("remat-pic-stub-load",
58  cl::desc("Re-materialize load from stub in PIC mode"),
59  cl::init(false), cl::Hidden);
60 
61 enum {
62  // Select which memory operand is being unfolded.
63  // (stored in bits 0 - 3)
70 
71  // Do not insert the reverse map (MemOp -> RegOp) into the table.
72  // This may be needed because there is a many -> one mapping.
73  TB_NO_REVERSE = 1 << 4,
74 
75  // Do not insert the forward map (RegOp -> MemOp) into the table.
76  // This is needed for Native Client, which prohibits branch
77  // instructions from using a memory operand.
78  TB_NO_FORWARD = 1 << 5,
79 
80  TB_FOLDED_LOAD = 1 << 6,
81  TB_FOLDED_STORE = 1 << 7,
82 
83  // Minimum alignment required for load/store.
84  // Used for RegOp->MemOp conversion.
85  // (stored in bits 8 - 15)
92 };
93 
95  uint16_t RegOp;
96  uint16_t MemOp;
97  uint16_t Flags;
98 };
99 
100 // Pin the vtable to this file.
101 void X86InstrInfo::anchor() {}
102 
104  : X86GenInstrInfo(
105  (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
106  (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
107  Subtarget(STI), RI(STI.getTargetTriple()) {
108 
109  static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
110  { X86::ADC32ri, X86::ADC32mi, 0 },
111  { X86::ADC32ri8, X86::ADC32mi8, 0 },
112  { X86::ADC32rr, X86::ADC32mr, 0 },
113  { X86::ADC64ri32, X86::ADC64mi32, 0 },
114  { X86::ADC64ri8, X86::ADC64mi8, 0 },
115  { X86::ADC64rr, X86::ADC64mr, 0 },
116  { X86::ADD16ri, X86::ADD16mi, 0 },
117  { X86::ADD16ri8, X86::ADD16mi8, 0 },
118  { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE },
119  { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE },
120  { X86::ADD16rr, X86::ADD16mr, 0 },
121  { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE },
122  { X86::ADD32ri, X86::ADD32mi, 0 },
123  { X86::ADD32ri8, X86::ADD32mi8, 0 },
124  { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE },
125  { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE },
126  { X86::ADD32rr, X86::ADD32mr, 0 },
127  { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE },
128  { X86::ADD64ri32, X86::ADD64mi32, 0 },
129  { X86::ADD64ri8, X86::ADD64mi8, 0 },
130  { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE },
131  { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE },
132  { X86::ADD64rr, X86::ADD64mr, 0 },
133  { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE },
134  { X86::ADD8ri, X86::ADD8mi, 0 },
135  { X86::ADD8rr, X86::ADD8mr, 0 },
136  { X86::AND16ri, X86::AND16mi, 0 },
137  { X86::AND16ri8, X86::AND16mi8, 0 },
138  { X86::AND16rr, X86::AND16mr, 0 },
139  { X86::AND32ri, X86::AND32mi, 0 },
140  { X86::AND32ri8, X86::AND32mi8, 0 },
141  { X86::AND32rr, X86::AND32mr, 0 },
142  { X86::AND64ri32, X86::AND64mi32, 0 },
143  { X86::AND64ri8, X86::AND64mi8, 0 },
144  { X86::AND64rr, X86::AND64mr, 0 },
145  { X86::AND8ri, X86::AND8mi, 0 },
146  { X86::AND8rr, X86::AND8mr, 0 },
147  { X86::DEC16r, X86::DEC16m, 0 },
148  { X86::DEC32r, X86::DEC32m, 0 },
149  { X86::DEC64r, X86::DEC64m, 0 },
150  { X86::DEC8r, X86::DEC8m, 0 },
151  { X86::INC16r, X86::INC16m, 0 },
152  { X86::INC32r, X86::INC32m, 0 },
153  { X86::INC64r, X86::INC64m, 0 },
154  { X86::INC8r, X86::INC8m, 0 },
155  { X86::NEG16r, X86::NEG16m, 0 },
156  { X86::NEG32r, X86::NEG32m, 0 },
157  { X86::NEG64r, X86::NEG64m, 0 },
158  { X86::NEG8r, X86::NEG8m, 0 },
159  { X86::NOT16r, X86::NOT16m, 0 },
160  { X86::NOT32r, X86::NOT32m, 0 },
161  { X86::NOT64r, X86::NOT64m, 0 },
162  { X86::NOT8r, X86::NOT8m, 0 },
163  { X86::OR16ri, X86::OR16mi, 0 },
164  { X86::OR16ri8, X86::OR16mi8, 0 },
165  { X86::OR16rr, X86::OR16mr, 0 },
166  { X86::OR32ri, X86::OR32mi, 0 },
167  { X86::OR32ri8, X86::OR32mi8, 0 },
168  { X86::OR32rr, X86::OR32mr, 0 },
169  { X86::OR64ri32, X86::OR64mi32, 0 },
170  { X86::OR64ri8, X86::OR64mi8, 0 },
171  { X86::OR64rr, X86::OR64mr, 0 },
172  { X86::OR8ri, X86::OR8mi, 0 },
173  { X86::OR8rr, X86::OR8mr, 0 },
174  { X86::ROL16r1, X86::ROL16m1, 0 },
175  { X86::ROL16rCL, X86::ROL16mCL, 0 },
176  { X86::ROL16ri, X86::ROL16mi, 0 },
177  { X86::ROL32r1, X86::ROL32m1, 0 },
178  { X86::ROL32rCL, X86::ROL32mCL, 0 },
179  { X86::ROL32ri, X86::ROL32mi, 0 },
180  { X86::ROL64r1, X86::ROL64m1, 0 },
181  { X86::ROL64rCL, X86::ROL64mCL, 0 },
182  { X86::ROL64ri, X86::ROL64mi, 0 },
183  { X86::ROL8r1, X86::ROL8m1, 0 },
184  { X86::ROL8rCL, X86::ROL8mCL, 0 },
185  { X86::ROL8ri, X86::ROL8mi, 0 },
186  { X86::ROR16r1, X86::ROR16m1, 0 },
187  { X86::ROR16rCL, X86::ROR16mCL, 0 },
188  { X86::ROR16ri, X86::ROR16mi, 0 },
189  { X86::ROR32r1, X86::ROR32m1, 0 },
190  { X86::ROR32rCL, X86::ROR32mCL, 0 },
191  { X86::ROR32ri, X86::ROR32mi, 0 },
192  { X86::ROR64r1, X86::ROR64m1, 0 },
193  { X86::ROR64rCL, X86::ROR64mCL, 0 },
194  { X86::ROR64ri, X86::ROR64mi, 0 },
195  { X86::ROR8r1, X86::ROR8m1, 0 },
196  { X86::ROR8rCL, X86::ROR8mCL, 0 },
197  { X86::ROR8ri, X86::ROR8mi, 0 },
198  { X86::SAR16r1, X86::SAR16m1, 0 },
199  { X86::SAR16rCL, X86::SAR16mCL, 0 },
200  { X86::SAR16ri, X86::SAR16mi, 0 },
201  { X86::SAR32r1, X86::SAR32m1, 0 },
202  { X86::SAR32rCL, X86::SAR32mCL, 0 },
203  { X86::SAR32ri, X86::SAR32mi, 0 },
204  { X86::SAR64r1, X86::SAR64m1, 0 },
205  { X86::SAR64rCL, X86::SAR64mCL, 0 },
206  { X86::SAR64ri, X86::SAR64mi, 0 },
207  { X86::SAR8r1, X86::SAR8m1, 0 },
208  { X86::SAR8rCL, X86::SAR8mCL, 0 },
209  { X86::SAR8ri, X86::SAR8mi, 0 },
210  { X86::SBB32ri, X86::SBB32mi, 0 },
211  { X86::SBB32ri8, X86::SBB32mi8, 0 },
212  { X86::SBB32rr, X86::SBB32mr, 0 },
213  { X86::SBB64ri32, X86::SBB64mi32, 0 },
214  { X86::SBB64ri8, X86::SBB64mi8, 0 },
215  { X86::SBB64rr, X86::SBB64mr, 0 },
216  { X86::SHL16rCL, X86::SHL16mCL, 0 },
217  { X86::SHL16ri, X86::SHL16mi, 0 },
218  { X86::SHL32rCL, X86::SHL32mCL, 0 },
219  { X86::SHL32ri, X86::SHL32mi, 0 },
220  { X86::SHL64rCL, X86::SHL64mCL, 0 },
221  { X86::SHL64ri, X86::SHL64mi, 0 },
222  { X86::SHL8rCL, X86::SHL8mCL, 0 },
223  { X86::SHL8ri, X86::SHL8mi, 0 },
224  { X86::SHLD16rrCL, X86::SHLD16mrCL, 0 },
225  { X86::SHLD16rri8, X86::SHLD16mri8, 0 },
226  { X86::SHLD32rrCL, X86::SHLD32mrCL, 0 },
227  { X86::SHLD32rri8, X86::SHLD32mri8, 0 },
228  { X86::SHLD64rrCL, X86::SHLD64mrCL, 0 },
229  { X86::SHLD64rri8, X86::SHLD64mri8, 0 },
230  { X86::SHR16r1, X86::SHR16m1, 0 },
231  { X86::SHR16rCL, X86::SHR16mCL, 0 },
232  { X86::SHR16ri, X86::SHR16mi, 0 },
233  { X86::SHR32r1, X86::SHR32m1, 0 },
234  { X86::SHR32rCL, X86::SHR32mCL, 0 },
235  { X86::SHR32ri, X86::SHR32mi, 0 },
236  { X86::SHR64r1, X86::SHR64m1, 0 },
237  { X86::SHR64rCL, X86::SHR64mCL, 0 },
238  { X86::SHR64ri, X86::SHR64mi, 0 },
239  { X86::SHR8r1, X86::SHR8m1, 0 },
240  { X86::SHR8rCL, X86::SHR8mCL, 0 },
241  { X86::SHR8ri, X86::SHR8mi, 0 },
242  { X86::SHRD16rrCL, X86::SHRD16mrCL, 0 },
243  { X86::SHRD16rri8, X86::SHRD16mri8, 0 },
244  { X86::SHRD32rrCL, X86::SHRD32mrCL, 0 },
245  { X86::SHRD32rri8, X86::SHRD32mri8, 0 },
246  { X86::SHRD64rrCL, X86::SHRD64mrCL, 0 },
247  { X86::SHRD64rri8, X86::SHRD64mri8, 0 },
248  { X86::SUB16ri, X86::SUB16mi, 0 },
249  { X86::SUB16ri8, X86::SUB16mi8, 0 },
250  { X86::SUB16rr, X86::SUB16mr, 0 },
251  { X86::SUB32ri, X86::SUB32mi, 0 },
252  { X86::SUB32ri8, X86::SUB32mi8, 0 },
253  { X86::SUB32rr, X86::SUB32mr, 0 },
254  { X86::SUB64ri32, X86::SUB64mi32, 0 },
255  { X86::SUB64ri8, X86::SUB64mi8, 0 },
256  { X86::SUB64rr, X86::SUB64mr, 0 },
257  { X86::SUB8ri, X86::SUB8mi, 0 },
258  { X86::SUB8rr, X86::SUB8mr, 0 },
259  { X86::XOR16ri, X86::XOR16mi, 0 },
260  { X86::XOR16ri8, X86::XOR16mi8, 0 },
261  { X86::XOR16rr, X86::XOR16mr, 0 },
262  { X86::XOR32ri, X86::XOR32mi, 0 },
263  { X86::XOR32ri8, X86::XOR32mi8, 0 },
264  { X86::XOR32rr, X86::XOR32mr, 0 },
265  { X86::XOR64ri32, X86::XOR64mi32, 0 },
266  { X86::XOR64ri8, X86::XOR64mi8, 0 },
267  { X86::XOR64rr, X86::XOR64mr, 0 },
268  { X86::XOR8ri, X86::XOR8mi, 0 },
269  { X86::XOR8rr, X86::XOR8mr, 0 }
270  };
271 
272  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2Addr) {
273  AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
274  Entry.RegOp, Entry.MemOp,
275  // Index 0, folded load and store, no alignment requirement.
276  Entry.Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
277  }
278 
279  static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
280  { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD },
281  { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD },
282  { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD },
283  { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD },
284  { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD },
285  { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD },
286  { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD },
287  { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD },
288  { X86::CMP32ri, X86::CMP32mi, TB_FOLDED_LOAD },
289  { X86::CMP32ri8, X86::CMP32mi8, TB_FOLDED_LOAD },
290  { X86::CMP32rr, X86::CMP32mr, TB_FOLDED_LOAD },
291  { X86::CMP64ri32, X86::CMP64mi32, TB_FOLDED_LOAD },
292  { X86::CMP64ri8, X86::CMP64mi8, TB_FOLDED_LOAD },
293  { X86::CMP64rr, X86::CMP64mr, TB_FOLDED_LOAD },
294  { X86::CMP8ri, X86::CMP8mi, TB_FOLDED_LOAD },
295  { X86::CMP8rr, X86::CMP8mr, TB_FOLDED_LOAD },
296  { X86::DIV16r, X86::DIV16m, TB_FOLDED_LOAD },
297  { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD },
298  { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD },
299  { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD },
300  { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE },
301  { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD },
302  { X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD },
303  { X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD },
304  { X86::IDIV8r, X86::IDIV8m, TB_FOLDED_LOAD },
305  { X86::IMUL16r, X86::IMUL16m, TB_FOLDED_LOAD },
306  { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD },
307  { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD },
308  { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD },
309  { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD },
310  { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD },
311  { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE },
312  { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE },
313  { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE },
314  { X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE },
315  { X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE },
316  { X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE },
317  { X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE },
318  { X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE },
319  { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE },
320  { X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
321  { X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
322  { X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
323  { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE },
324  { X86::MOVPQIto64rr,X86::MOVPQI2QImr, TB_FOLDED_STORE },
325  { X86::MOVSDto64rr, X86::MOVSDto64mr, TB_FOLDED_STORE },
326  { X86::MOVSS2DIrr, X86::MOVSS2DImr, TB_FOLDED_STORE },
327  { X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE },
328  { X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE },
329  { X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD },
330  { X86::MUL32r, X86::MUL32m, TB_FOLDED_LOAD },
331  { X86::MUL64r, X86::MUL64m, TB_FOLDED_LOAD },
332  { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD },
333  { X86::PEXTRDrr, X86::PEXTRDmr, TB_FOLDED_STORE },
334  { X86::PEXTRQrr, X86::PEXTRQmr, TB_FOLDED_STORE },
335  { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE },
336  { X86::SETAr, X86::SETAm, TB_FOLDED_STORE },
337  { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE },
338  { X86::SETBr, X86::SETBm, TB_FOLDED_STORE },
339  { X86::SETEr, X86::SETEm, TB_FOLDED_STORE },
340  { X86::SETGEr, X86::SETGEm, TB_FOLDED_STORE },
341  { X86::SETGr, X86::SETGm, TB_FOLDED_STORE },
342  { X86::SETLEr, X86::SETLEm, TB_FOLDED_STORE },
343  { X86::SETLr, X86::SETLm, TB_FOLDED_STORE },
344  { X86::SETNEr, X86::SETNEm, TB_FOLDED_STORE },
345  { X86::SETNOr, X86::SETNOm, TB_FOLDED_STORE },
346  { X86::SETNPr, X86::SETNPm, TB_FOLDED_STORE },
347  { X86::SETNSr, X86::SETNSm, TB_FOLDED_STORE },
348  { X86::SETOr, X86::SETOm, TB_FOLDED_STORE },
349  { X86::SETPr, X86::SETPm, TB_FOLDED_STORE },
350  { X86::SETSr, X86::SETSm, TB_FOLDED_STORE },
351  { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD },
352  { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD },
353  { X86::TAILJMPr64_REX, X86::TAILJMPm64_REX, TB_FOLDED_LOAD },
354  { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD },
355  { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD },
356  { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD },
357  { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD },
358 
359  // AVX 128-bit versions of foldable instructions
360  { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE },
361  { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
362  { X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
363  { X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
364  { X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
365  { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr, TB_FOLDED_STORE },
366  { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE },
367  { X86::VMOVSDto64rr,X86::VMOVSDto64mr, TB_FOLDED_STORE },
368  { X86::VMOVSS2DIrr, X86::VMOVSS2DImr, TB_FOLDED_STORE },
369  { X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE },
370  { X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE },
371  { X86::VPEXTRDrr, X86::VPEXTRDmr, TB_FOLDED_STORE },
372  { X86::VPEXTRQrr, X86::VPEXTRQmr, TB_FOLDED_STORE },
373 
374  // AVX 256-bit foldable instructions
375  { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
376  { X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
377  { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
378  { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
379  { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
380  { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE },
381 
382  // AVX-512 foldable instructions
383  { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE },
384  { X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
385  { X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
386  { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
387  { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
388  { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE },
389  { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE },
390  { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE },
391  { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE },
392  { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE },
393  { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE },
394 
395  // AVX-512 foldable instructions (256-bit versions)
396  { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
397  { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
398  { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
399  { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
400  { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256mr, TB_FOLDED_STORE },
401  { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256mr, TB_FOLDED_STORE },
402  { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256mr, TB_FOLDED_STORE },
403  { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256mr, TB_FOLDED_STORE },
404  { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256mr, TB_FOLDED_STORE },
405  { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256mr, TB_FOLDED_STORE },
406 
407  // AVX-512 foldable instructions (128-bit versions)
408  { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
409  { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
410  { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
411  { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
412  { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128mr, TB_FOLDED_STORE },
413  { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128mr, TB_FOLDED_STORE },
414  { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128mr, TB_FOLDED_STORE },
415  { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128mr, TB_FOLDED_STORE },
416  { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128mr, TB_FOLDED_STORE },
417  { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128mr, TB_FOLDED_STORE },
418 
419  // F16C foldable instructions
420  { X86::VCVTPS2PHrr, X86::VCVTPS2PHmr, TB_FOLDED_STORE },
421  { X86::VCVTPS2PHYrr, X86::VCVTPS2PHYmr, TB_FOLDED_STORE }
422  };
423 
424  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable0) {
425  AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
426  Entry.RegOp, Entry.MemOp, TB_INDEX_0 | Entry.Flags);
427  }
428 
429  static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
430  { X86::BSF16rr, X86::BSF16rm, 0 },
431  { X86::BSF32rr, X86::BSF32rm, 0 },
432  { X86::BSF64rr, X86::BSF64rm, 0 },
433  { X86::BSR16rr, X86::BSR16rm, 0 },
434  { X86::BSR32rr, X86::BSR32rm, 0 },
435  { X86::BSR64rr, X86::BSR64rm, 0 },
436  { X86::CMP16rr, X86::CMP16rm, 0 },
437  { X86::CMP32rr, X86::CMP32rm, 0 },
438  { X86::CMP64rr, X86::CMP64rm, 0 },
439  { X86::CMP8rr, X86::CMP8rm, 0 },
440  { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 },
441  { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 },
442  { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 },
443  { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 },
444  { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 },
445  { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 },
446  { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 },
447  { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
448  { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
449  { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
450  { X86::IMUL16rri, X86::IMUL16rmi, 0 },
451  { X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
452  { X86::IMUL32rri, X86::IMUL32rmi, 0 },
453  { X86::IMUL32rri8, X86::IMUL32rmi8, 0 },
454  { X86::IMUL64rri32, X86::IMUL64rmi32, 0 },
455  { X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
456  { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
457  { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
458  { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
459  { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
460  { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 },
461  { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
462  { X86::CVTDQ2PDrr, X86::CVTDQ2PDrm, TB_ALIGN_16 },
463  { X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 },
464  { X86::CVTPD2DQrr, X86::CVTPD2DQrm, TB_ALIGN_16 },
465  { X86::CVTPD2PSrr, X86::CVTPD2PSrm, TB_ALIGN_16 },
466  { X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 },
467  { X86::CVTPS2PDrr, X86::CVTPS2PDrm, TB_ALIGN_16 },
468  { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
469  { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
470  { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
471  { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
472  { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
473  { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 },
474  { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 },
475  { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 },
476  { X86::MOV16rr, X86::MOV16rm, 0 },
477  { X86::MOV32rr, X86::MOV32rm, 0 },
478  { X86::MOV64rr, X86::MOV64rm, 0 },
479  { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
480  { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
481  { X86::MOV8rr, X86::MOV8rm, 0 },
482  { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 },
483  { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 },
484  { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 },
485  { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
486  { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
487  { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 },
488  { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 },
489  { X86::MOVSLDUPrr, X86::MOVSLDUPrm, TB_ALIGN_16 },
490  { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
491  { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
492  { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
493  { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 },
494  { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 },
495  { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
496  { X86::MOVUPDrr, X86::MOVUPDrm, TB_ALIGN_16 },
497  { X86::MOVUPSrr, X86::MOVUPSrm, 0 },
498  { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 },
499  { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, TB_ALIGN_16 },
500  { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
501  { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
502  { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
503  { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
504  { X86::PABSBrr128, X86::PABSBrm128, TB_ALIGN_16 },
505  { X86::PABSDrr128, X86::PABSDrm128, TB_ALIGN_16 },
506  { X86::PABSWrr128, X86::PABSWrm128, TB_ALIGN_16 },
507  { X86::PCMPESTRIrr, X86::PCMPESTRIrm, TB_ALIGN_16 },
508  { X86::PCMPESTRM128rr, X86::PCMPESTRM128rm, TB_ALIGN_16 },
509  { X86::PCMPISTRIrr, X86::PCMPISTRIrm, TB_ALIGN_16 },
510  { X86::PCMPISTRM128rr, X86::PCMPISTRM128rm, TB_ALIGN_16 },
511  { X86::PHMINPOSUWrr128, X86::PHMINPOSUWrm128, TB_ALIGN_16 },
512  { X86::PMOVSXBDrr, X86::PMOVSXBDrm, TB_ALIGN_16 },
513  { X86::PMOVSXBQrr, X86::PMOVSXBQrm, TB_ALIGN_16 },
514  { X86::PMOVSXBWrr, X86::PMOVSXBWrm, TB_ALIGN_16 },
515  { X86::PMOVSXDQrr, X86::PMOVSXDQrm, TB_ALIGN_16 },
516  { X86::PMOVSXWDrr, X86::PMOVSXWDrm, TB_ALIGN_16 },
517  { X86::PMOVSXWQrr, X86::PMOVSXWQrm, TB_ALIGN_16 },
518  { X86::PMOVZXBDrr, X86::PMOVZXBDrm, TB_ALIGN_16 },
519  { X86::PMOVZXBQrr, X86::PMOVZXBQrm, TB_ALIGN_16 },
520  { X86::PMOVZXBWrr, X86::PMOVZXBWrm, TB_ALIGN_16 },
521  { X86::PMOVZXDQrr, X86::PMOVZXDQrm, TB_ALIGN_16 },
522  { X86::PMOVZXWDrr, X86::PMOVZXWDrm, TB_ALIGN_16 },
523  { X86::PMOVZXWQrr, X86::PMOVZXWQrm, TB_ALIGN_16 },
524  { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 },
525  { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 },
526  { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 },
527  { X86::PTESTrr, X86::PTESTrm, TB_ALIGN_16 },
528  { X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16 },
529  { X86::RCPSSr, X86::RCPSSm, 0 },
530  { X86::RCPSSr_Int, X86::RCPSSm_Int, 0 },
531  { X86::ROUNDPDr, X86::ROUNDPDm, TB_ALIGN_16 },
532  { X86::ROUNDPSr, X86::ROUNDPSm, TB_ALIGN_16 },
533  { X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16 },
534  { X86::RSQRTSSr, X86::RSQRTSSm, 0 },
535  { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 },
536  { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 },
537  { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 },
538  { X86::SQRTSDr, X86::SQRTSDm, 0 },
539  { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 },
540  { X86::SQRTSSr, X86::SQRTSSm, 0 },
541  { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 },
542  { X86::TEST16rr, X86::TEST16rm, 0 },
543  { X86::TEST32rr, X86::TEST32rm, 0 },
544  { X86::TEST64rr, X86::TEST64rm, 0 },
545  { X86::TEST8rr, X86::TEST8rm, 0 },
546  // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
547  { X86::UCOMISDrr, X86::UCOMISDrm, 0 },
548  { X86::UCOMISSrr, X86::UCOMISSrm, 0 },
549 
550  // MMX version of foldable instructions
551  { X86::MMX_CVTPD2PIirr, X86::MMX_CVTPD2PIirm, 0 },
552  { X86::MMX_CVTPI2PDirr, X86::MMX_CVTPI2PDirm, 0 },
553  { X86::MMX_CVTPS2PIirr, X86::MMX_CVTPS2PIirm, 0 },
554  { X86::MMX_CVTTPD2PIirr, X86::MMX_CVTTPD2PIirm, 0 },
555  { X86::MMX_CVTTPS2PIirr, X86::MMX_CVTTPS2PIirm, 0 },
556  { X86::MMX_MOVD64to64rr, X86::MMX_MOVQ64rm, 0 },
557  { X86::MMX_PABSBrr64, X86::MMX_PABSBrm64, 0 },
558  { X86::MMX_PABSDrr64, X86::MMX_PABSDrm64, 0 },
559  { X86::MMX_PABSWrr64, X86::MMX_PABSWrm64, 0 },
560  { X86::MMX_PSHUFWri, X86::MMX_PSHUFWmi, 0 },
561 
562  // 3DNow! version of foldable instructions
563  { X86::PF2IDrr, X86::PF2IDrm, 0 },
564  { X86::PF2IWrr, X86::PF2IWrm, 0 },
565  { X86::PFRCPrr, X86::PFRCPrm, 0 },
566  { X86::PFRSQRTrr, X86::PFRSQRTrm, 0 },
567  { X86::PI2FDrr, X86::PI2FDrm, 0 },
568  { X86::PI2FWrr, X86::PI2FWrm, 0 },
569  { X86::PSWAPDrr, X86::PSWAPDrm, 0 },
570 
571  // AVX 128-bit versions of foldable instructions
572  { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 },
573  { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 },
574  { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 },
575  { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 },
576  { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
577  { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,0 },
578  { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
579  { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, 0 },
580  { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
581  { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,0 },
582  { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
583  { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, 0 },
584  { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
585  { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
586  { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 },
587  { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 },
588  { X86::VCVTDQ2PDrr, X86::VCVTDQ2PDrm, 0 },
589  { X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0 },
590  { X86::VCVTPD2DQrr, X86::VCVTPD2DQXrm, 0 },
591  { X86::VCVTPD2PSrr, X86::VCVTPD2PSXrm, 0 },
592  { X86::VCVTPS2DQrr, X86::VCVTPS2DQrm, 0 },
593  { X86::VCVTPS2PDrr, X86::VCVTPS2PDrm, 0 },
594  { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 },
595  { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 },
596  { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
597  { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
598  { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 },
599  { X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 },
600  { X86::VMOVDDUPrr, X86::VMOVDDUPrm, 0 },
601  { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 },
602  { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 },
603  { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 },
604  { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, 0 },
605  { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, 0 },
606  { X86::VMOVUPDrr, X86::VMOVUPDrm, 0 },
607  { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
608  { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 },
609  { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 },
610  { X86::VPABSBrr128, X86::VPABSBrm128, 0 },
611  { X86::VPABSDrr128, X86::VPABSDrm128, 0 },
612  { X86::VPABSWrr128, X86::VPABSWrm128, 0 },
613  { X86::VPCMPESTRIrr, X86::VPCMPESTRIrm, 0 },
614  { X86::VPCMPESTRM128rr, X86::VPCMPESTRM128rm, 0 },
615  { X86::VPCMPISTRIrr, X86::VPCMPISTRIrm, 0 },
616  { X86::VPCMPISTRM128rr, X86::VPCMPISTRM128rm, 0 },
617  { X86::VPHMINPOSUWrr128, X86::VPHMINPOSUWrm128, 0 },
618  { X86::VPERMILPDri, X86::VPERMILPDmi, 0 },
619  { X86::VPERMILPSri, X86::VPERMILPSmi, 0 },
620  { X86::VPMOVSXBDrr, X86::VPMOVSXBDrm, 0 },
621  { X86::VPMOVSXBQrr, X86::VPMOVSXBQrm, 0 },
622  { X86::VPMOVSXBWrr, X86::VPMOVSXBWrm, 0 },
623  { X86::VPMOVSXDQrr, X86::VPMOVSXDQrm, 0 },
624  { X86::VPMOVSXWDrr, X86::VPMOVSXWDrm, 0 },
625  { X86::VPMOVSXWQrr, X86::VPMOVSXWQrm, 0 },
626  { X86::VPMOVZXBDrr, X86::VPMOVZXBDrm, 0 },
627  { X86::VPMOVZXBQrr, X86::VPMOVZXBQrm, 0 },
628  { X86::VPMOVZXBWrr, X86::VPMOVZXBWrm, 0 },
629  { X86::VPMOVZXDQrr, X86::VPMOVZXDQrm, 0 },
630  { X86::VPMOVZXWDrr, X86::VPMOVZXWDrm, 0 },
631  { X86::VPMOVZXWQrr, X86::VPMOVZXWQrm, 0 },
632  { X86::VPSHUFDri, X86::VPSHUFDmi, 0 },
633  { X86::VPSHUFHWri, X86::VPSHUFHWmi, 0 },
634  { X86::VPSHUFLWri, X86::VPSHUFLWmi, 0 },
635  { X86::VPTESTrr, X86::VPTESTrm, 0 },
636  { X86::VRCPPSr, X86::VRCPPSm, 0 },
637  { X86::VROUNDPDr, X86::VROUNDPDm, 0 },
638  { X86::VROUNDPSr, X86::VROUNDPSm, 0 },
639  { X86::VRSQRTPSr, X86::VRSQRTPSm, 0 },
640  { X86::VSQRTPDr, X86::VSQRTPDm, 0 },
641  { X86::VSQRTPSr, X86::VSQRTPSm, 0 },
642  { X86::VTESTPDrr, X86::VTESTPDrm, 0 },
643  { X86::VTESTPSrr, X86::VTESTPSrm, 0 },
644  { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 },
645  { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 },
646 
647  // AVX 256-bit foldable instructions
648  { X86::VCVTDQ2PDYrr, X86::VCVTDQ2PDYrm, 0 },
649  { X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0 },
650  { X86::VCVTPD2DQYrr, X86::VCVTPD2DQYrm, 0 },
651  { X86::VCVTPD2PSYrr, X86::VCVTPD2PSYrm, 0 },
652  { X86::VCVTPS2DQYrr, X86::VCVTPS2DQYrm, 0 },
653  { X86::VCVTPS2PDYrr, X86::VCVTPS2PDYrm, 0 },
654  { X86::VCVTTPD2DQYrr, X86::VCVTTPD2DQYrm, 0 },
655  { X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0 },
656  { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
657  { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
658  { X86::VMOVDDUPYrr, X86::VMOVDDUPYrm, 0 },
659  { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 },
660  { X86::VMOVSLDUPYrr, X86::VMOVSLDUPYrm, 0 },
661  { X86::VMOVSHDUPYrr, X86::VMOVSHDUPYrm, 0 },
662  { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
663  { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
664  { X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 },
665  { X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 },
666  { X86::VPTESTYrr, X86::VPTESTYrm, 0 },
667  { X86::VRCPPSYr, X86::VRCPPSYm, 0 },
668  { X86::VROUNDYPDr, X86::VROUNDYPDm, 0 },
669  { X86::VROUNDYPSr, X86::VROUNDYPSm, 0 },
670  { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
671  { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
672  { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
673  { X86::VTESTPDYrr, X86::VTESTPDYrm, 0 },
674  { X86::VTESTPSYrr, X86::VTESTPSYrm, 0 },
675 
676  // AVX2 foldable instructions
677 
678  // VBROADCASTS{SD}rr register instructions were an AVX2 addition while the
679  // VBROADCASTS{SD}rm memory instructions were available from AVX1.
680  // TB_NO_REVERSE prevents unfolding from introducing an illegal instruction
681  // on AVX1 targets. The VPBROADCAST instructions are all AVX2 instructions
682  // so they don't need an equivalent limitation.
683  { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE },
684  { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
685  { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
686  { X86::VPABSBrr256, X86::VPABSBrm256, 0 },
687  { X86::VPABSDrr256, X86::VPABSDrm256, 0 },
688  { X86::VPABSWrr256, X86::VPABSWrm256, 0 },
689  { X86::VPBROADCASTBrr, X86::VPBROADCASTBrm, 0 },
690  { X86::VPBROADCASTBYrr, X86::VPBROADCASTBYrm, 0 },
691  { X86::VPBROADCASTDrr, X86::VPBROADCASTDrm, 0 },
692  { X86::VPBROADCASTDYrr, X86::VPBROADCASTDYrm, 0 },
693  { X86::VPBROADCASTQrr, X86::VPBROADCASTQrm, 0 },
694  { X86::VPBROADCASTQYrr, X86::VPBROADCASTQYrm, 0 },
695  { X86::VPBROADCASTWrr, X86::VPBROADCASTWrm, 0 },
696  { X86::VPBROADCASTWYrr, X86::VPBROADCASTWYrm, 0 },
697  { X86::VPERMPDYri, X86::VPERMPDYmi, 0 },
698  { X86::VPERMQYri, X86::VPERMQYmi, 0 },
699  { X86::VPMOVSXBDYrr, X86::VPMOVSXBDYrm, 0 },
700  { X86::VPMOVSXBQYrr, X86::VPMOVSXBQYrm, 0 },
701  { X86::VPMOVSXBWYrr, X86::VPMOVSXBWYrm, 0 },
702  { X86::VPMOVSXDQYrr, X86::VPMOVSXDQYrm, 0 },
703  { X86::VPMOVSXWDYrr, X86::VPMOVSXWDYrm, 0 },
704  { X86::VPMOVSXWQYrr, X86::VPMOVSXWQYrm, 0 },
705  { X86::VPMOVZXBDYrr, X86::VPMOVZXBDYrm, 0 },
706  { X86::VPMOVZXBQYrr, X86::VPMOVZXBQYrm, 0 },
707  { X86::VPMOVZXBWYrr, X86::VPMOVZXBWYrm, 0 },
708  { X86::VPMOVZXDQYrr, X86::VPMOVZXDQYrm, 0 },
709  { X86::VPMOVZXWDYrr, X86::VPMOVZXWDYrm, 0 },
710  { X86::VPMOVZXWQYrr, X86::VPMOVZXWQYrm, 0 },
711  { X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 },
712  { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 },
713  { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 },
714 
715  // XOP foldable instructions
716  { X86::VFRCZPDrr, X86::VFRCZPDrm, 0 },
717  { X86::VFRCZPDrrY, X86::VFRCZPDrmY, 0 },
718  { X86::VFRCZPSrr, X86::VFRCZPSrm, 0 },
719  { X86::VFRCZPSrrY, X86::VFRCZPSrmY, 0 },
720  { X86::VFRCZSDrr, X86::VFRCZSDrm, 0 },
721  { X86::VFRCZSSrr, X86::VFRCZSSrm, 0 },
722  { X86::VPHADDBDrr, X86::VPHADDBDrm, 0 },
723  { X86::VPHADDBQrr, X86::VPHADDBQrm, 0 },
724  { X86::VPHADDBWrr, X86::VPHADDBWrm, 0 },
725  { X86::VPHADDDQrr, X86::VPHADDDQrm, 0 },
726  { X86::VPHADDWDrr, X86::VPHADDWDrm, 0 },
727  { X86::VPHADDWQrr, X86::VPHADDWQrm, 0 },
728  { X86::VPHADDUBDrr, X86::VPHADDUBDrm, 0 },
729  { X86::VPHADDUBQrr, X86::VPHADDUBQrm, 0 },
730  { X86::VPHADDUBWrr, X86::VPHADDUBWrm, 0 },
731  { X86::VPHADDUDQrr, X86::VPHADDUDQrm, 0 },
732  { X86::VPHADDUWDrr, X86::VPHADDUWDrm, 0 },
733  { X86::VPHADDUWQrr, X86::VPHADDUWQrm, 0 },
734  { X86::VPHSUBBWrr, X86::VPHSUBBWrm, 0 },
735  { X86::VPHSUBDQrr, X86::VPHSUBDQrm, 0 },
736  { X86::VPHSUBWDrr, X86::VPHSUBWDrm, 0 },
737  { X86::VPROTBri, X86::VPROTBmi, 0 },
738  { X86::VPROTBrr, X86::VPROTBmr, 0 },
739  { X86::VPROTDri, X86::VPROTDmi, 0 },
740  { X86::VPROTDrr, X86::VPROTDmr, 0 },
741  { X86::VPROTQri, X86::VPROTQmi, 0 },
742  { X86::VPROTQrr, X86::VPROTQmr, 0 },
743  { X86::VPROTWri, X86::VPROTWmi, 0 },
744  { X86::VPROTWrr, X86::VPROTWmr, 0 },
745  { X86::VPSHABrr, X86::VPSHABmr, 0 },
746  { X86::VPSHADrr, X86::VPSHADmr, 0 },
747  { X86::VPSHAQrr, X86::VPSHAQmr, 0 },
748  { X86::VPSHAWrr, X86::VPSHAWmr, 0 },
749  { X86::VPSHLBrr, X86::VPSHLBmr, 0 },
750  { X86::VPSHLDrr, X86::VPSHLDmr, 0 },
751  { X86::VPSHLQrr, X86::VPSHLQmr, 0 },
752  { X86::VPSHLWrr, X86::VPSHLWmr, 0 },
753 
754  // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions
755  { X86::BEXTR32rr, X86::BEXTR32rm, 0 },
756  { X86::BEXTR64rr, X86::BEXTR64rm, 0 },
757  { X86::BEXTRI32ri, X86::BEXTRI32mi, 0 },
758  { X86::BEXTRI64ri, X86::BEXTRI64mi, 0 },
759  { X86::BLCFILL32rr, X86::BLCFILL32rm, 0 },
760  { X86::BLCFILL64rr, X86::BLCFILL64rm, 0 },
761  { X86::BLCI32rr, X86::BLCI32rm, 0 },
762  { X86::BLCI64rr, X86::BLCI64rm, 0 },
763  { X86::BLCIC32rr, X86::BLCIC32rm, 0 },
764  { X86::BLCIC64rr, X86::BLCIC64rm, 0 },
765  { X86::BLCMSK32rr, X86::BLCMSK32rm, 0 },
766  { X86::BLCMSK64rr, X86::BLCMSK64rm, 0 },
767  { X86::BLCS32rr, X86::BLCS32rm, 0 },
768  { X86::BLCS64rr, X86::BLCS64rm, 0 },
769  { X86::BLSFILL32rr, X86::BLSFILL32rm, 0 },
770  { X86::BLSFILL64rr, X86::BLSFILL64rm, 0 },
771  { X86::BLSI32rr, X86::BLSI32rm, 0 },
772  { X86::BLSI64rr, X86::BLSI64rm, 0 },
773  { X86::BLSIC32rr, X86::BLSIC32rm, 0 },
774  { X86::BLSIC64rr, X86::BLSIC64rm, 0 },
775  { X86::BLSMSK32rr, X86::BLSMSK32rm, 0 },
776  { X86::BLSMSK64rr, X86::BLSMSK64rm, 0 },
777  { X86::BLSR32rr, X86::BLSR32rm, 0 },
778  { X86::BLSR64rr, X86::BLSR64rm, 0 },
779  { X86::BZHI32rr, X86::BZHI32rm, 0 },
780  { X86::BZHI64rr, X86::BZHI64rm, 0 },
781  { X86::LZCNT16rr, X86::LZCNT16rm, 0 },
782  { X86::LZCNT32rr, X86::LZCNT32rm, 0 },
783  { X86::LZCNT64rr, X86::LZCNT64rm, 0 },
784  { X86::POPCNT16rr, X86::POPCNT16rm, 0 },
785  { X86::POPCNT32rr, X86::POPCNT32rm, 0 },
786  { X86::POPCNT64rr, X86::POPCNT64rm, 0 },
787  { X86::RORX32ri, X86::RORX32mi, 0 },
788  { X86::RORX64ri, X86::RORX64mi, 0 },
789  { X86::SARX32rr, X86::SARX32rm, 0 },
790  { X86::SARX64rr, X86::SARX64rm, 0 },
791  { X86::SHRX32rr, X86::SHRX32rm, 0 },
792  { X86::SHRX64rr, X86::SHRX64rm, 0 },
793  { X86::SHLX32rr, X86::SHLX32rm, 0 },
794  { X86::SHLX64rr, X86::SHLX64rm, 0 },
795  { X86::T1MSKC32rr, X86::T1MSKC32rm, 0 },
796  { X86::T1MSKC64rr, X86::T1MSKC64rm, 0 },
797  { X86::TZCNT16rr, X86::TZCNT16rm, 0 },
798  { X86::TZCNT32rr, X86::TZCNT32rm, 0 },
799  { X86::TZCNT64rr, X86::TZCNT64rm, 0 },
800  { X86::TZMSK32rr, X86::TZMSK32rm, 0 },
801  { X86::TZMSK64rr, X86::TZMSK64rm, 0 },
802 
803  // AVX-512 foldable instructions
804  { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
805  { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
806  { X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 },
807  { X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 },
808  { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 },
809  { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrm, TB_ALIGN_64 },
810  { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zrm, 0 },
811  { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zrm, 0 },
812  { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zrm, 0 },
813  { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 },
814  { X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 },
815  { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 },
816  { X86::VPABSDZrr, X86::VPABSDZrm, 0 },
817  { X86::VPABSQZrr, X86::VPABSQZrm, 0 },
818  { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE },
819  { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE },
820 
821  // AVX-512 foldable instructions (256-bit versions)
822  { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
823  { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 },
824  { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
825  { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256rm, TB_ALIGN_32 },
826  { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256rm, 0 },
827  { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256rm, 0 },
828  { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256rm, 0 },
829  { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 },
830  { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 },
831  { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 },
832  { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
833  { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
834 
835  // AVX-512 foldable instructions (256-bit versions)
836  { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
837  { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 },
838  { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
839  { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128rm, TB_ALIGN_16 },
840  { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128rm, 0 },
841  { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128rm, 0 },
842  { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128rm, 0 },
843  { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 },
844  { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 },
845  { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 },
846  { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
847 
848  // F16C foldable instructions
849  { X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, 0 },
850  { X86::VCVTPH2PSYrr, X86::VCVTPH2PSYrm, 0 },
851 
852  // AES foldable instructions
853  { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 },
854  { X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 },
855  { X86::VAESIMCrr, X86::VAESIMCrm, 0 },
856  { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, 0 }
857  };
858 
859  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable1) {
860  AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
861  Entry.RegOp, Entry.MemOp,
862  // Index 1, folded load
863  Entry.Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
864  }
865 
866  static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
867  { X86::ADC32rr, X86::ADC32rm, 0 },
868  { X86::ADC64rr, X86::ADC64rm, 0 },
869  { X86::ADD16rr, X86::ADD16rm, 0 },
870  { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE },
871  { X86::ADD32rr, X86::ADD32rm, 0 },
872  { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE },
873  { X86::ADD64rr, X86::ADD64rm, 0 },
874  { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE },
875  { X86::ADD8rr, X86::ADD8rm, 0 },
876  { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 },
877  { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 },
878  { X86::ADDSDrr, X86::ADDSDrm, 0 },
879  { X86::ADDSDrr_Int, X86::ADDSDrm_Int, 0 },
880  { X86::ADDSSrr, X86::ADDSSrm, 0 },
881  { X86::ADDSSrr_Int, X86::ADDSSrm_Int, 0 },
882  { X86::ADDSUBPDrr, X86::ADDSUBPDrm, TB_ALIGN_16 },
883  { X86::ADDSUBPSrr, X86::ADDSUBPSrm, TB_ALIGN_16 },
884  { X86::AND16rr, X86::AND16rm, 0 },
885  { X86::AND32rr, X86::AND32rm, 0 },
886  { X86::AND64rr, X86::AND64rm, 0 },
887  { X86::AND8rr, X86::AND8rm, 0 },
888  { X86::ANDNPDrr, X86::ANDNPDrm, TB_ALIGN_16 },
889  { X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 },
890  { X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 },
891  { X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 },
892  { X86::BLENDPDrri, X86::BLENDPDrmi, TB_ALIGN_16 },
893  { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 },
894  { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 },
895  { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 },
896  { X86::CMOVA16rr, X86::CMOVA16rm, 0 },
897  { X86::CMOVA32rr, X86::CMOVA32rm, 0 },
898  { X86::CMOVA64rr, X86::CMOVA64rm, 0 },
899  { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 },
900  { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 },
901  { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 },
902  { X86::CMOVB16rr, X86::CMOVB16rm, 0 },
903  { X86::CMOVB32rr, X86::CMOVB32rm, 0 },
904  { X86::CMOVB64rr, X86::CMOVB64rm, 0 },
905  { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 },
906  { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 },
907  { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 },
908  { X86::CMOVE16rr, X86::CMOVE16rm, 0 },
909  { X86::CMOVE32rr, X86::CMOVE32rm, 0 },
910  { X86::CMOVE64rr, X86::CMOVE64rm, 0 },
911  { X86::CMOVG16rr, X86::CMOVG16rm, 0 },
912  { X86::CMOVG32rr, X86::CMOVG32rm, 0 },
913  { X86::CMOVG64rr, X86::CMOVG64rm, 0 },
914  { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 },
915  { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 },
916  { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 },
917  { X86::CMOVL16rr, X86::CMOVL16rm, 0 },
918  { X86::CMOVL32rr, X86::CMOVL32rm, 0 },
919  { X86::CMOVL64rr, X86::CMOVL64rm, 0 },
920  { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 },
921  { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 },
922  { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 },
923  { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 },
924  { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 },
925  { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 },
926  { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 },
927  { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 },
928  { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 },
929  { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 },
930  { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 },
931  { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 },
932  { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 },
933  { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 },
934  { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 },
935  { X86::CMOVO16rr, X86::CMOVO16rm, 0 },
936  { X86::CMOVO32rr, X86::CMOVO32rm, 0 },
937  { X86::CMOVO64rr, X86::CMOVO64rm, 0 },
938  { X86::CMOVP16rr, X86::CMOVP16rm, 0 },
939  { X86::CMOVP32rr, X86::CMOVP32rm, 0 },
940  { X86::CMOVP64rr, X86::CMOVP64rm, 0 },
941  { X86::CMOVS16rr, X86::CMOVS16rm, 0 },
942  { X86::CMOVS32rr, X86::CMOVS32rm, 0 },
943  { X86::CMOVS64rr, X86::CMOVS64rm, 0 },
944  { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 },
945  { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 },
946  { X86::CMPSDrr, X86::CMPSDrm, 0 },
947  { X86::CMPSSrr, X86::CMPSSrm, 0 },
948  { X86::CRC32r32r32, X86::CRC32r32m32, 0 },
949  { X86::CRC32r64r64, X86::CRC32r64m64, 0 },
950  { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 },
951  { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 },
952  { X86::DIVSDrr, X86::DIVSDrm, 0 },
953  { X86::DIVSDrr_Int, X86::DIVSDrm_Int, 0 },
954  { X86::DIVSSrr, X86::DIVSSrm, 0 },
955  { X86::DIVSSrr_Int, X86::DIVSSrm_Int, 0 },
956  { X86::DPPDrri, X86::DPPDrmi, TB_ALIGN_16 },
957  { X86::DPPSrri, X86::DPPSrmi, TB_ALIGN_16 },
958 
959  // Do not fold Fs* scalar logical op loads because there are no scalar
960  // load variants for these instructions. When folded, the load is required
961  // to be 128-bits, so the load size would not match.
962 
963  { X86::FvANDNPDrr, X86::FvANDNPDrm, TB_ALIGN_16 },
964  { X86::FvANDNPSrr, X86::FvANDNPSrm, TB_ALIGN_16 },
965  { X86::FvANDPDrr, X86::FvANDPDrm, TB_ALIGN_16 },
966  { X86::FvANDPSrr, X86::FvANDPSrm, TB_ALIGN_16 },
967  { X86::FvORPDrr, X86::FvORPDrm, TB_ALIGN_16 },
968  { X86::FvORPSrr, X86::FvORPSrm, TB_ALIGN_16 },
969  { X86::FvXORPDrr, X86::FvXORPDrm, TB_ALIGN_16 },
970  { X86::FvXORPSrr, X86::FvXORPSrm, TB_ALIGN_16 },
971  { X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 },
972  { X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 },
973  { X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 },
974  { X86::HSUBPSrr, X86::HSUBPSrm, TB_ALIGN_16 },
975  { X86::IMUL16rr, X86::IMUL16rm, 0 },
976  { X86::IMUL32rr, X86::IMUL32rm, 0 },
977  { X86::IMUL64rr, X86::IMUL64rm, 0 },
978  { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
979  { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
980  { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
981  { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
982  { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
983  { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
984  { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
985  { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
986  { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
987  { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
988  { X86::MAXSDrr, X86::MAXSDrm, 0 },
989  { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 },
990  { X86::MAXSSrr, X86::MAXSSrm, 0 },
991  { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 },
992  { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 },
993  { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 },
994  { X86::MINSDrr, X86::MINSDrm, 0 },
995  { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 },
996  { X86::MINSSrr, X86::MINSSrm, 0 },
997  { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 },
998  { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 },
999  { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
1000  { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
1001  { X86::MULSDrr, X86::MULSDrm, 0 },
1002  { X86::MULSDrr_Int, X86::MULSDrm_Int, 0 },
1003  { X86::MULSSrr, X86::MULSSrm, 0 },
1004  { X86::MULSSrr_Int, X86::MULSSrm_Int, 0 },
1005  { X86::OR16rr, X86::OR16rm, 0 },
1006  { X86::OR32rr, X86::OR32rm, 0 },
1007  { X86::OR64rr, X86::OR64rm, 0 },
1008  { X86::OR8rr, X86::OR8rm, 0 },
1009  { X86::ORPDrr, X86::ORPDrm, TB_ALIGN_16 },
1010  { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 },
1011  { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 },
1012  { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 },
1013  { X86::PACKUSDWrr, X86::PACKUSDWrm, TB_ALIGN_16 },
1014  { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 },
1015  { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 },
1016  { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 },
1017  { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 },
1018  { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 },
1019  { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 },
1020  { X86::PADDUSBrr, X86::PADDUSBrm, TB_ALIGN_16 },
1021  { X86::PADDUSWrr, X86::PADDUSWrm, TB_ALIGN_16 },
1022  { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 },
1023  { X86::PALIGNR128rr, X86::PALIGNR128rm, TB_ALIGN_16 },
1024  { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 },
1025  { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 },
1026  { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 },
1027  { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 },
1028  { X86::PBLENDVBrr0, X86::PBLENDVBrm0, TB_ALIGN_16 },
1029  { X86::PBLENDWrri, X86::PBLENDWrmi, TB_ALIGN_16 },
1030  { X86::PCLMULQDQrr, X86::PCLMULQDQrm, TB_ALIGN_16 },
1031  { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 },
1032  { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 },
1033  { X86::PCMPEQQrr, X86::PCMPEQQrm, TB_ALIGN_16 },
1034  { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 },
1035  { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 },
1036  { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 },
1037  { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 },
1038  { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 },
1039  { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 },
1040  { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 },
1041  { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 },
1042  { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 },
1043  { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 },
1044  { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 },
1045  { X86::PINSRBrr, X86::PINSRBrm, 0 },
1046  { X86::PINSRDrr, X86::PINSRDrm, 0 },
1047  { X86::PINSRQrr, X86::PINSRQrm, 0 },
1048  { X86::PINSRWrri, X86::PINSRWrmi, 0 },
1049  { X86::PMADDUBSWrr128, X86::PMADDUBSWrm128, TB_ALIGN_16 },
1050  { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
1051  { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 },
1052  { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
1053  { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
1054  { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
1055  { X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 },
1056  { X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 },
1057  { X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 },
1058  { X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 },
1059  { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 },
1060  { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 },
1061  { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 },
1062  { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 },
1063  { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
1064  { X86::PMULHRSWrr128, X86::PMULHRSWrm128, TB_ALIGN_16 },
1065  { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
1066  { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 },
1067  { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 },
1068  { X86::PMULLWrr, X86::PMULLWrm, TB_ALIGN_16 },
1069  { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 },
1070  { X86::PORrr, X86::PORrm, TB_ALIGN_16 },
1071  { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 },
1072  { X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 },
1073  { X86::PSIGNBrr, X86::PSIGNBrm, TB_ALIGN_16 },
1074  { X86::PSIGNWrr, X86::PSIGNWrm, TB_ALIGN_16 },
1075  { X86::PSIGNDrr, X86::PSIGNDrm, TB_ALIGN_16 },
1076  { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 },
1077  { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 },
1078  { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 },
1079  { X86::PSRADrr, X86::PSRADrm, TB_ALIGN_16 },
1080  { X86::PSRAWrr, X86::PSRAWrm, TB_ALIGN_16 },
1081  { X86::PSRLDrr, X86::PSRLDrm, TB_ALIGN_16 },
1082  { X86::PSRLQrr, X86::PSRLQrm, TB_ALIGN_16 },
1083  { X86::PSRLWrr, X86::PSRLWrm, TB_ALIGN_16 },
1084  { X86::PSUBBrr, X86::PSUBBrm, TB_ALIGN_16 },
1085  { X86::PSUBDrr, X86::PSUBDrm, TB_ALIGN_16 },
1086  { X86::PSUBQrr, X86::PSUBQrm, TB_ALIGN_16 },
1087  { X86::PSUBSBrr, X86::PSUBSBrm, TB_ALIGN_16 },
1088  { X86::PSUBSWrr, X86::PSUBSWrm, TB_ALIGN_16 },
1089  { X86::PSUBUSBrr, X86::PSUBUSBrm, TB_ALIGN_16 },
1090  { X86::PSUBUSWrr, X86::PSUBUSWrm, TB_ALIGN_16 },
1091  { X86::PSUBWrr, X86::PSUBWrm, TB_ALIGN_16 },
1092  { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, TB_ALIGN_16 },
1093  { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, TB_ALIGN_16 },
1094  { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, TB_ALIGN_16 },
1095  { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, TB_ALIGN_16 },
1096  { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, TB_ALIGN_16 },
1097  { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, TB_ALIGN_16 },
1098  { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 },
1099  { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 },
1100  { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 },
1101  { X86::ROUNDSDr, X86::ROUNDSDm, 0 },
1102  { X86::ROUNDSSr, X86::ROUNDSSm, 0 },
1103  { X86::SBB32rr, X86::SBB32rm, 0 },
1104  { X86::SBB64rr, X86::SBB64rm, 0 },
1105  { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 },
1106  { X86::SHUFPSrri, X86::SHUFPSrmi, TB_ALIGN_16 },
1107  { X86::SUB16rr, X86::SUB16rm, 0 },
1108  { X86::SUB32rr, X86::SUB32rm, 0 },
1109  { X86::SUB64rr, X86::SUB64rm, 0 },
1110  { X86::SUB8rr, X86::SUB8rm, 0 },
1111  { X86::SUBPDrr, X86::SUBPDrm, TB_ALIGN_16 },
1112  { X86::SUBPSrr, X86::SUBPSrm, TB_ALIGN_16 },
1113  { X86::SUBSDrr, X86::SUBSDrm, 0 },
1114  { X86::SUBSDrr_Int, X86::SUBSDrm_Int, 0 },
1115  { X86::SUBSSrr, X86::SUBSSrm, 0 },
1116  { X86::SUBSSrr_Int, X86::SUBSSrm_Int, 0 },
1117  // FIXME: TEST*rr -> swapped operand of TEST*mr.
1118  { X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 },
1119  { X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 },
1120  { X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 },
1121  { X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16 },
1122  { X86::XOR16rr, X86::XOR16rm, 0 },
1123  { X86::XOR32rr, X86::XOR32rm, 0 },
1124  { X86::XOR64rr, X86::XOR64rm, 0 },
1125  { X86::XOR8rr, X86::XOR8rm, 0 },
1126  { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 },
1127  { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 },
1128 
1129  // MMX version of foldable instructions
1130  { X86::MMX_CVTPI2PSirr, X86::MMX_CVTPI2PSirm, 0 },
1131  { X86::MMX_PACKSSDWirr, X86::MMX_PACKSSDWirm, 0 },
1132  { X86::MMX_PACKSSWBirr, X86::MMX_PACKSSWBirm, 0 },
1133  { X86::MMX_PACKUSWBirr, X86::MMX_PACKUSWBirm, 0 },
1134  { X86::MMX_PADDBirr, X86::MMX_PADDBirm, 0 },
1135  { X86::MMX_PADDDirr, X86::MMX_PADDDirm, 0 },
1136  { X86::MMX_PADDQirr, X86::MMX_PADDQirm, 0 },
1137  { X86::MMX_PADDSBirr, X86::MMX_PADDSBirm, 0 },
1138  { X86::MMX_PADDSWirr, X86::MMX_PADDSWirm, 0 },
1139  { X86::MMX_PADDUSBirr, X86::MMX_PADDUSBirm, 0 },
1140  { X86::MMX_PADDUSWirr, X86::MMX_PADDUSWirm, 0 },
1141  { X86::MMX_PADDWirr, X86::MMX_PADDWirm, 0 },
1142  { X86::MMX_PALIGNR64irr, X86::MMX_PALIGNR64irm, 0 },
1143  { X86::MMX_PANDNirr, X86::MMX_PANDNirm, 0 },
1144  { X86::MMX_PANDirr, X86::MMX_PANDirm, 0 },
1145  { X86::MMX_PAVGBirr, X86::MMX_PAVGBirm, 0 },
1146  { X86::MMX_PAVGWirr, X86::MMX_PAVGWirm, 0 },
1147  { X86::MMX_PCMPEQBirr, X86::MMX_PCMPEQBirm, 0 },
1148  { X86::MMX_PCMPEQDirr, X86::MMX_PCMPEQDirm, 0 },
1149  { X86::MMX_PCMPEQWirr, X86::MMX_PCMPEQWirm, 0 },
1150  { X86::MMX_PCMPGTBirr, X86::MMX_PCMPGTBirm, 0 },
1151  { X86::MMX_PCMPGTDirr, X86::MMX_PCMPGTDirm, 0 },
1152  { X86::MMX_PCMPGTWirr, X86::MMX_PCMPGTWirm, 0 },
1153  { X86::MMX_PHADDSWrr64, X86::MMX_PHADDSWrm64, 0 },
1154  { X86::MMX_PHADDWrr64, X86::MMX_PHADDWrm64, 0 },
1155  { X86::MMX_PHADDrr64, X86::MMX_PHADDrm64, 0 },
1156  { X86::MMX_PHSUBDrr64, X86::MMX_PHSUBDrm64, 0 },
1157  { X86::MMX_PHSUBSWrr64, X86::MMX_PHSUBSWrm64, 0 },
1158  { X86::MMX_PHSUBWrr64, X86::MMX_PHSUBWrm64, 0 },
1159  { X86::MMX_PINSRWirri, X86::MMX_PINSRWirmi, 0 },
1160  { X86::MMX_PMADDUBSWrr64, X86::MMX_PMADDUBSWrm64, 0 },
1161  { X86::MMX_PMADDWDirr, X86::MMX_PMADDWDirm, 0 },
1162  { X86::MMX_PMAXSWirr, X86::MMX_PMAXSWirm, 0 },
1163  { X86::MMX_PMAXUBirr, X86::MMX_PMAXUBirm, 0 },
1164  { X86::MMX_PMINSWirr, X86::MMX_PMINSWirm, 0 },
1165  { X86::MMX_PMINUBirr, X86::MMX_PMINUBirm, 0 },
1166  { X86::MMX_PMULHRSWrr64, X86::MMX_PMULHRSWrm64, 0 },
1167  { X86::MMX_PMULHUWirr, X86::MMX_PMULHUWirm, 0 },
1168  { X86::MMX_PMULHWirr, X86::MMX_PMULHWirm, 0 },
1169  { X86::MMX_PMULLWirr, X86::MMX_PMULLWirm, 0 },
1170  { X86::MMX_PMULUDQirr, X86::MMX_PMULUDQirm, 0 },
1171  { X86::MMX_PORirr, X86::MMX_PORirm, 0 },
1172  { X86::MMX_PSADBWirr, X86::MMX_PSADBWirm, 0 },
1173  { X86::MMX_PSHUFBrr64, X86::MMX_PSHUFBrm64, 0 },
1174  { X86::MMX_PSIGNBrr64, X86::MMX_PSIGNBrm64, 0 },
1175  { X86::MMX_PSIGNDrr64, X86::MMX_PSIGNDrm64, 0 },
1176  { X86::MMX_PSIGNWrr64, X86::MMX_PSIGNWrm64, 0 },
1177  { X86::MMX_PSLLDrr, X86::MMX_PSLLDrm, 0 },
1178  { X86::MMX_PSLLQrr, X86::MMX_PSLLQrm, 0 },
1179  { X86::MMX_PSLLWrr, X86::MMX_PSLLWrm, 0 },
1180  { X86::MMX_PSRADrr, X86::MMX_PSRADrm, 0 },
1181  { X86::MMX_PSRAWrr, X86::MMX_PSRAWrm, 0 },
1182  { X86::MMX_PSRLDrr, X86::MMX_PSRLDrm, 0 },
1183  { X86::MMX_PSRLQrr, X86::MMX_PSRLQrm, 0 },
1184  { X86::MMX_PSRLWrr, X86::MMX_PSRLWrm, 0 },
1185  { X86::MMX_PSUBBirr, X86::MMX_PSUBBirm, 0 },
1186  { X86::MMX_PSUBDirr, X86::MMX_PSUBDirm, 0 },
1187  { X86::MMX_PSUBQirr, X86::MMX_PSUBQirm, 0 },
1188  { X86::MMX_PSUBSBirr, X86::MMX_PSUBSBirm, 0 },
1189  { X86::MMX_PSUBSWirr, X86::MMX_PSUBSWirm, 0 },
1190  { X86::MMX_PSUBUSBirr, X86::MMX_PSUBUSBirm, 0 },
1191  { X86::MMX_PSUBUSWirr, X86::MMX_PSUBUSWirm, 0 },
1192  { X86::MMX_PSUBWirr, X86::MMX_PSUBWirm, 0 },
1193  { X86::MMX_PUNPCKHBWirr, X86::MMX_PUNPCKHBWirm, 0 },
1194  { X86::MMX_PUNPCKHDQirr, X86::MMX_PUNPCKHDQirm, 0 },
1195  { X86::MMX_PUNPCKHWDirr, X86::MMX_PUNPCKHWDirm, 0 },
1196  { X86::MMX_PUNPCKLBWirr, X86::MMX_PUNPCKLBWirm, 0 },
1197  { X86::MMX_PUNPCKLDQirr, X86::MMX_PUNPCKLDQirm, 0 },
1198  { X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, 0 },
1199  { X86::MMX_PXORirr, X86::MMX_PXORirm, 0 },
1200 
1201  // 3DNow! version of foldable instructions
1202  { X86::PAVGUSBrr, X86::PAVGUSBrm, 0 },
1203  { X86::PFACCrr, X86::PFACCrm, 0 },
1204  { X86::PFADDrr, X86::PFADDrm, 0 },
1205  { X86::PFCMPEQrr, X86::PFCMPEQrm, 0 },
1206  { X86::PFCMPGErr, X86::PFCMPGErm, 0 },
1207  { X86::PFCMPGTrr, X86::PFCMPGTrm, 0 },
1208  { X86::PFMAXrr, X86::PFMAXrm, 0 },
1209  { X86::PFMINrr, X86::PFMINrm, 0 },
1210  { X86::PFMULrr, X86::PFMULrm, 0 },
1211  { X86::PFNACCrr, X86::PFNACCrm, 0 },
1212  { X86::PFPNACCrr, X86::PFPNACCrm, 0 },
1213  { X86::PFRCPIT1rr, X86::PFRCPIT1rm, 0 },
1214  { X86::PFRCPIT2rr, X86::PFRCPIT2rm, 0 },
1215  { X86::PFRSQIT1rr, X86::PFRSQIT1rm, 0 },
1216  { X86::PFSUBrr, X86::PFSUBrm, 0 },
1217  { X86::PFSUBRrr, X86::PFSUBRrm, 0 },
1218  { X86::PMULHRWrr, X86::PMULHRWrm, 0 },
1219 
1220  // AVX 128-bit versions of foldable instructions
1221  { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 },
1222  { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, 0 },
1223  { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 },
1224  { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 },
1225  { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 },
1226  { X86::Int_VCVTSI2SDrr, X86::Int_VCVTSI2SDrm, 0 },
1227  { X86::VCVTSI2SS64rr, X86::VCVTSI2SS64rm, 0 },
1228  { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 },
1229  { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 },
1230  { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
1231  { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
1232  { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 },
1233  { X86::VRCPSSr, X86::VRCPSSm, 0 },
1234  { X86::VRCPSSr_Int, X86::VRCPSSm_Int, 0 },
1235  { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
1236  { X86::VRSQRTSSr_Int, X86::VRSQRTSSm_Int, 0 },
1237  { X86::VSQRTSDr, X86::VSQRTSDm, 0 },
1238  { X86::VSQRTSDr_Int, X86::VSQRTSDm_Int, 0 },
1239  { X86::VSQRTSSr, X86::VSQRTSSm, 0 },
1240  { X86::VSQRTSSr_Int, X86::VSQRTSSm_Int, 0 },
1241  { X86::VADDPDrr, X86::VADDPDrm, 0 },
1242  { X86::VADDPSrr, X86::VADDPSrm, 0 },
1243  { X86::VADDSDrr, X86::VADDSDrm, 0 },
1244  { X86::VADDSDrr_Int, X86::VADDSDrm_Int, 0 },
1245  { X86::VADDSSrr, X86::VADDSSrm, 0 },
1246  { X86::VADDSSrr_Int, X86::VADDSSrm_Int, 0 },
1247  { X86::VADDSUBPDrr, X86::VADDSUBPDrm, 0 },
1248  { X86::VADDSUBPSrr, X86::VADDSUBPSrm, 0 },
1249  { X86::VANDNPDrr, X86::VANDNPDrm, 0 },
1250  { X86::VANDNPSrr, X86::VANDNPSrm, 0 },
1251  { X86::VANDPDrr, X86::VANDPDrm, 0 },
1252  { X86::VANDPSrr, X86::VANDPSrm, 0 },
1253  { X86::VBLENDPDrri, X86::VBLENDPDrmi, 0 },
1254  { X86::VBLENDPSrri, X86::VBLENDPSrmi, 0 },
1255  { X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0 },
1256  { X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0 },
1257  { X86::VCMPPDrri, X86::VCMPPDrmi, 0 },
1258  { X86::VCMPPSrri, X86::VCMPPSrmi, 0 },
1259  { X86::VCMPSDrr, X86::VCMPSDrm, 0 },
1260  { X86::VCMPSSrr, X86::VCMPSSrm, 0 },
1261  { X86::VDIVPDrr, X86::VDIVPDrm, 0 },
1262  { X86::VDIVPSrr, X86::VDIVPSrm, 0 },
1263  { X86::VDIVSDrr, X86::VDIVSDrm, 0 },
1264  { X86::VDIVSDrr_Int, X86::VDIVSDrm_Int, 0 },
1265  { X86::VDIVSSrr, X86::VDIVSSrm, 0 },
1266  { X86::VDIVSSrr_Int, X86::VDIVSSrm_Int, 0 },
1267  { X86::VDPPDrri, X86::VDPPDrmi, 0 },
1268  { X86::VDPPSrri, X86::VDPPSrmi, 0 },
1269  // Do not fold VFs* loads because there are no scalar load variants for
1270  // these instructions. When folded, the load is required to be 128-bits, so
1271  // the load size would not match.
1272  { X86::VFvANDNPDrr, X86::VFvANDNPDrm, 0 },
1273  { X86::VFvANDNPSrr, X86::VFvANDNPSrm, 0 },
1274  { X86::VFvANDPDrr, X86::VFvANDPDrm, 0 },
1275  { X86::VFvANDPSrr, X86::VFvANDPSrm, 0 },
1276  { X86::VFvORPDrr, X86::VFvORPDrm, 0 },
1277  { X86::VFvORPSrr, X86::VFvORPSrm, 0 },
1278  { X86::VFvXORPDrr, X86::VFvXORPDrm, 0 },
1279  { X86::VFvXORPSrr, X86::VFvXORPSrm, 0 },
1280  { X86::VHADDPDrr, X86::VHADDPDrm, 0 },
1281  { X86::VHADDPSrr, X86::VHADDPSrm, 0 },
1282  { X86::VHSUBPDrr, X86::VHSUBPDrm, 0 },
1283  { X86::VHSUBPSrr, X86::VHSUBPSrm, 0 },
1284  { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, 0 },
1285  { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, 0 },
1286  { X86::VMAXPDrr, X86::VMAXPDrm, 0 },
1287  { X86::VMAXPSrr, X86::VMAXPSrm, 0 },
1288  { X86::VMAXSDrr, X86::VMAXSDrm, 0 },
1289  { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, 0 },
1290  { X86::VMAXSSrr, X86::VMAXSSrm, 0 },
1291  { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, 0 },
1292  { X86::VMINPDrr, X86::VMINPDrm, 0 },
1293  { X86::VMINPSrr, X86::VMINPSrm, 0 },
1294  { X86::VMINSDrr, X86::VMINSDrm, 0 },
1295  { X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 },
1296  { X86::VMINSSrr, X86::VMINSSrm, 0 },
1297  { X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 },
1298  { X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 },
1299  { X86::VMULPDrr, X86::VMULPDrm, 0 },
1300  { X86::VMULPSrr, X86::VMULPSrm, 0 },
1301  { X86::VMULSDrr, X86::VMULSDrm, 0 },
1302  { X86::VMULSDrr_Int, X86::VMULSDrm_Int, 0 },
1303  { X86::VMULSSrr, X86::VMULSSrm, 0 },
1304  { X86::VMULSSrr_Int, X86::VMULSSrm_Int, 0 },
1305  { X86::VORPDrr, X86::VORPDrm, 0 },
1306  { X86::VORPSrr, X86::VORPSrm, 0 },
1307  { X86::VPACKSSDWrr, X86::VPACKSSDWrm, 0 },
1308  { X86::VPACKSSWBrr, X86::VPACKSSWBrm, 0 },
1309  { X86::VPACKUSDWrr, X86::VPACKUSDWrm, 0 },
1310  { X86::VPACKUSWBrr, X86::VPACKUSWBrm, 0 },
1311  { X86::VPADDBrr, X86::VPADDBrm, 0 },
1312  { X86::VPADDDrr, X86::VPADDDrm, 0 },
1313  { X86::VPADDQrr, X86::VPADDQrm, 0 },
1314  { X86::VPADDSBrr, X86::VPADDSBrm, 0 },
1315  { X86::VPADDSWrr, X86::VPADDSWrm, 0 },
1316  { X86::VPADDUSBrr, X86::VPADDUSBrm, 0 },
1317  { X86::VPADDUSWrr, X86::VPADDUSWrm, 0 },
1318  { X86::VPADDWrr, X86::VPADDWrm, 0 },
1319  { X86::VPALIGNR128rr, X86::VPALIGNR128rm, 0 },
1320  { X86::VPANDNrr, X86::VPANDNrm, 0 },
1321  { X86::VPANDrr, X86::VPANDrm, 0 },
1322  { X86::VPAVGBrr, X86::VPAVGBrm, 0 },
1323  { X86::VPAVGWrr, X86::VPAVGWrm, 0 },
1324  { X86::VPBLENDVBrr, X86::VPBLENDVBrm, 0 },
1325  { X86::VPBLENDWrri, X86::VPBLENDWrmi, 0 },
1326  { X86::VPCLMULQDQrr, X86::VPCLMULQDQrm, 0 },
1327  { X86::VPCMPEQBrr, X86::VPCMPEQBrm, 0 },
1328  { X86::VPCMPEQDrr, X86::VPCMPEQDrm, 0 },
1329  { X86::VPCMPEQQrr, X86::VPCMPEQQrm, 0 },
1330  { X86::VPCMPEQWrr, X86::VPCMPEQWrm, 0 },
1331  { X86::VPCMPGTBrr, X86::VPCMPGTBrm, 0 },
1332  { X86::VPCMPGTDrr, X86::VPCMPGTDrm, 0 },
1333  { X86::VPCMPGTQrr, X86::VPCMPGTQrm, 0 },
1334  { X86::VPCMPGTWrr, X86::VPCMPGTWrm, 0 },
1335  { X86::VPHADDDrr, X86::VPHADDDrm, 0 },
1336  { X86::VPHADDSWrr128, X86::VPHADDSWrm128, 0 },
1337  { X86::VPHADDWrr, X86::VPHADDWrm, 0 },
1338  { X86::VPHSUBDrr, X86::VPHSUBDrm, 0 },
1339  { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, 0 },
1340  { X86::VPHSUBWrr, X86::VPHSUBWrm, 0 },
1341  { X86::VPERMILPDrr, X86::VPERMILPDrm, 0 },
1342  { X86::VPERMILPSrr, X86::VPERMILPSrm, 0 },
1343  { X86::VPINSRBrr, X86::VPINSRBrm, 0 },
1344  { X86::VPINSRDrr, X86::VPINSRDrm, 0 },
1345  { X86::VPINSRQrr, X86::VPINSRQrm, 0 },
1346  { X86::VPINSRWrri, X86::VPINSRWrmi, 0 },
1347  { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, 0 },
1348  { X86::VPMADDWDrr, X86::VPMADDWDrm, 0 },
1349  { X86::VPMAXSWrr, X86::VPMAXSWrm, 0 },
1350  { X86::VPMAXUBrr, X86::VPMAXUBrm, 0 },
1351  { X86::VPMINSWrr, X86::VPMINSWrm, 0 },
1352  { X86::VPMINUBrr, X86::VPMINUBrm, 0 },
1353  { X86::VPMINSBrr, X86::VPMINSBrm, 0 },
1354  { X86::VPMINSDrr, X86::VPMINSDrm, 0 },
1355  { X86::VPMINUDrr, X86::VPMINUDrm, 0 },
1356  { X86::VPMINUWrr, X86::VPMINUWrm, 0 },
1357  { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 },
1358  { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 },
1359  { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 },
1360  { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 },
1361  { X86::VPMULDQrr, X86::VPMULDQrm, 0 },
1362  { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, 0 },
1363  { X86::VPMULHUWrr, X86::VPMULHUWrm, 0 },
1364  { X86::VPMULHWrr, X86::VPMULHWrm, 0 },
1365  { X86::VPMULLDrr, X86::VPMULLDrm, 0 },
1366  { X86::VPMULLWrr, X86::VPMULLWrm, 0 },
1367  { X86::VPMULUDQrr, X86::VPMULUDQrm, 0 },
1368  { X86::VPORrr, X86::VPORrm, 0 },
1369  { X86::VPSADBWrr, X86::VPSADBWrm, 0 },
1370  { X86::VPSHUFBrr, X86::VPSHUFBrm, 0 },
1371  { X86::VPSIGNBrr, X86::VPSIGNBrm, 0 },
1372  { X86::VPSIGNWrr, X86::VPSIGNWrm, 0 },
1373  { X86::VPSIGNDrr, X86::VPSIGNDrm, 0 },
1374  { X86::VPSLLDrr, X86::VPSLLDrm, 0 },
1375  { X86::VPSLLQrr, X86::VPSLLQrm, 0 },
1376  { X86::VPSLLWrr, X86::VPSLLWrm, 0 },
1377  { X86::VPSRADrr, X86::VPSRADrm, 0 },
1378  { X86::VPSRAWrr, X86::VPSRAWrm, 0 },
1379  { X86::VPSRLDrr, X86::VPSRLDrm, 0 },
1380  { X86::VPSRLQrr, X86::VPSRLQrm, 0 },
1381  { X86::VPSRLWrr, X86::VPSRLWrm, 0 },
1382  { X86::VPSUBBrr, X86::VPSUBBrm, 0 },
1383  { X86::VPSUBDrr, X86::VPSUBDrm, 0 },
1384  { X86::VPSUBQrr, X86::VPSUBQrm, 0 },
1385  { X86::VPSUBSBrr, X86::VPSUBSBrm, 0 },
1386  { X86::VPSUBSWrr, X86::VPSUBSWrm, 0 },
1387  { X86::VPSUBUSBrr, X86::VPSUBUSBrm, 0 },
1388  { X86::VPSUBUSWrr, X86::VPSUBUSWrm, 0 },
1389  { X86::VPSUBWrr, X86::VPSUBWrm, 0 },
1390  { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, 0 },
1391  { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, 0 },
1392  { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, 0 },
1393  { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, 0 },
1394  { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, 0 },
1395  { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, 0 },
1396  { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 },
1397  { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 },
1398  { X86::VPXORrr, X86::VPXORrm, 0 },
1399  { X86::VROUNDSDr, X86::VROUNDSDm, 0 },
1400  { X86::VROUNDSSr, X86::VROUNDSSm, 0 },
1401  { X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 },
1402  { X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 },
1403  { X86::VSUBPDrr, X86::VSUBPDrm, 0 },
1404  { X86::VSUBPSrr, X86::VSUBPSrm, 0 },
1405  { X86::VSUBSDrr, X86::VSUBSDrm, 0 },
1406  { X86::VSUBSDrr_Int, X86::VSUBSDrm_Int, 0 },
1407  { X86::VSUBSSrr, X86::VSUBSSrm, 0 },
1408  { X86::VSUBSSrr_Int, X86::VSUBSSrm_Int, 0 },
1409  { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, 0 },
1410  { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, 0 },
1411  { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, 0 },
1412  { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, 0 },
1413  { X86::VXORPDrr, X86::VXORPDrm, 0 },
1414  { X86::VXORPSrr, X86::VXORPSrm, 0 },
1415 
1416  // AVX 256-bit foldable instructions
1417  { X86::VADDPDYrr, X86::VADDPDYrm, 0 },
1418  { X86::VADDPSYrr, X86::VADDPSYrm, 0 },
1419  { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, 0 },
1420  { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, 0 },
1421  { X86::VANDNPDYrr, X86::VANDNPDYrm, 0 },
1422  { X86::VANDNPSYrr, X86::VANDNPSYrm, 0 },
1423  { X86::VANDPDYrr, X86::VANDPDYrm, 0 },
1424  { X86::VANDPSYrr, X86::VANDPSYrm, 0 },
1425  { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, 0 },
1426  { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0 },
1427  { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0 },
1428  { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0 },
1429  { X86::VCMPPDYrri, X86::VCMPPDYrmi, 0 },
1430  { X86::VCMPPSYrri, X86::VCMPPSYrmi, 0 },
1431  { X86::VDIVPDYrr, X86::VDIVPDYrm, 0 },
1432  { X86::VDIVPSYrr, X86::VDIVPSYrm, 0 },
1433  { X86::VDPPSYrri, X86::VDPPSYrmi, 0 },
1434  { X86::VHADDPDYrr, X86::VHADDPDYrm, 0 },
1435  { X86::VHADDPSYrr, X86::VHADDPSYrm, 0 },
1436  { X86::VHSUBPDYrr, X86::VHSUBPDYrm, 0 },
1437  { X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 },
1438  { X86::VINSERTF128rr, X86::VINSERTF128rm, 0 },
1439  { X86::VMAXPDYrr, X86::VMAXPDYrm, 0 },
1440  { X86::VMAXPSYrr, X86::VMAXPSYrm, 0 },
1441  { X86::VMINPDYrr, X86::VMINPDYrm, 0 },
1442  { X86::VMINPSYrr, X86::VMINPSYrm, 0 },
1443  { X86::VMULPDYrr, X86::VMULPDYrm, 0 },
1444  { X86::VMULPSYrr, X86::VMULPSYrm, 0 },
1445  { X86::VORPDYrr, X86::VORPDYrm, 0 },
1446  { X86::VORPSYrr, X86::VORPSYrm, 0 },
1447  { X86::VPERM2F128rr, X86::VPERM2F128rm, 0 },
1448  { X86::VPERMILPDYrr, X86::VPERMILPDYrm, 0 },
1449  { X86::VPERMILPSYrr, X86::VPERMILPSYrm, 0 },
1450  { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, 0 },
1451  { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, 0 },
1452  { X86::VSUBPDYrr, X86::VSUBPDYrm, 0 },
1453  { X86::VSUBPSYrr, X86::VSUBPSYrm, 0 },
1454  { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, 0 },
1455  { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, 0 },
1456  { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, 0 },
1457  { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, 0 },
1458  { X86::VXORPDYrr, X86::VXORPDYrm, 0 },
1459  { X86::VXORPSYrr, X86::VXORPSYrm, 0 },
1460 
1461  // AVX2 foldable instructions
1462  { X86::VINSERTI128rr, X86::VINSERTI128rm, 0 },
1463  { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, 0 },
1464  { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, 0 },
1465  { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, 0 },
1466  { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, 0 },
1467  { X86::VPADDBYrr, X86::VPADDBYrm, 0 },
1468  { X86::VPADDDYrr, X86::VPADDDYrm, 0 },
1469  { X86::VPADDQYrr, X86::VPADDQYrm, 0 },
1470  { X86::VPADDSBYrr, X86::VPADDSBYrm, 0 },
1471  { X86::VPADDSWYrr, X86::VPADDSWYrm, 0 },
1472  { X86::VPADDUSBYrr, X86::VPADDUSBYrm, 0 },
1473  { X86::VPADDUSWYrr, X86::VPADDUSWYrm, 0 },
1474  { X86::VPADDWYrr, X86::VPADDWYrm, 0 },
1475  { X86::VPALIGNR256rr, X86::VPALIGNR256rm, 0 },
1476  { X86::VPANDNYrr, X86::VPANDNYrm, 0 },
1477  { X86::VPANDYrr, X86::VPANDYrm, 0 },
1478  { X86::VPAVGBYrr, X86::VPAVGBYrm, 0 },
1479  { X86::VPAVGWYrr, X86::VPAVGWYrm, 0 },
1480  { X86::VPBLENDDrri, X86::VPBLENDDrmi, 0 },
1481  { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, 0 },
1482  { X86::VPBLENDVBYrr, X86::VPBLENDVBYrm, 0 },
1483  { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0 },
1484  { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, 0 },
1485  { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, 0 },
1486  { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, 0 },
1487  { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, 0 },
1488  { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, 0 },
1489  { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, 0 },
1490  { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, 0 },
1491  { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, 0 },
1492  { X86::VPERM2I128rr, X86::VPERM2I128rm, 0 },
1493  { X86::VPERMDYrr, X86::VPERMDYrm, 0 },
1494  { X86::VPERMPSYrr, X86::VPERMPSYrm, 0 },
1495  { X86::VPHADDDYrr, X86::VPHADDDYrm, 0 },
1496  { X86::VPHADDSWrr256, X86::VPHADDSWrm256, 0 },
1497  { X86::VPHADDWYrr, X86::VPHADDWYrm, 0 },
1498  { X86::VPHSUBDYrr, X86::VPHSUBDYrm, 0 },
1499  { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, 0 },
1500  { X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 },
1501  { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, 0 },
1502  { X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 },
1503  { X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 },
1504  { X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 },
1505  { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 },
1506  { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 },
1507  { X86::VPMINSBYrr, X86::VPMINSBYrm, 0 },
1508  { X86::VPMINSDYrr, X86::VPMINSDYrm, 0 },
1509  { X86::VPMINUDYrr, X86::VPMINUDYrm, 0 },
1510  { X86::VPMINUWYrr, X86::VPMINUWYrm, 0 },
1511  { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 },
1512  { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 },
1513  { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 },
1514  { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 },
1515  { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 },
1516  { X86::VPMULDQYrr, X86::VPMULDQYrm, 0 },
1517  { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, 0 },
1518  { X86::VPMULHUWYrr, X86::VPMULHUWYrm, 0 },
1519  { X86::VPMULHWYrr, X86::VPMULHWYrm, 0 },
1520  { X86::VPMULLDYrr, X86::VPMULLDYrm, 0 },
1521  { X86::VPMULLWYrr, X86::VPMULLWYrm, 0 },
1522  { X86::VPMULUDQYrr, X86::VPMULUDQYrm, 0 },
1523  { X86::VPORYrr, X86::VPORYrm, 0 },
1524  { X86::VPSADBWYrr, X86::VPSADBWYrm, 0 },
1525  { X86::VPSHUFBYrr, X86::VPSHUFBYrm, 0 },
1526  { X86::VPSIGNBYrr, X86::VPSIGNBYrm, 0 },
1527  { X86::VPSIGNWYrr, X86::VPSIGNWYrm, 0 },
1528  { X86::VPSIGNDYrr, X86::VPSIGNDYrm, 0 },
1529  { X86::VPSLLDYrr, X86::VPSLLDYrm, 0 },
1530  { X86::VPSLLQYrr, X86::VPSLLQYrm, 0 },
1531  { X86::VPSLLWYrr, X86::VPSLLWYrm, 0 },
1532  { X86::VPSLLVDrr, X86::VPSLLVDrm, 0 },
1533  { X86::VPSLLVDYrr, X86::VPSLLVDYrm, 0 },
1534  { X86::VPSLLVQrr, X86::VPSLLVQrm, 0 },
1535  { X86::VPSLLVQYrr, X86::VPSLLVQYrm, 0 },
1536  { X86::VPSRADYrr, X86::VPSRADYrm, 0 },
1537  { X86::VPSRAWYrr, X86::VPSRAWYrm, 0 },
1538  { X86::VPSRAVDrr, X86::VPSRAVDrm, 0 },
1539  { X86::VPSRAVDYrr, X86::VPSRAVDYrm, 0 },
1540  { X86::VPSRLDYrr, X86::VPSRLDYrm, 0 },
1541  { X86::VPSRLQYrr, X86::VPSRLQYrm, 0 },
1542  { X86::VPSRLWYrr, X86::VPSRLWYrm, 0 },
1543  { X86::VPSRLVDrr, X86::VPSRLVDrm, 0 },
1544  { X86::VPSRLVDYrr, X86::VPSRLVDYrm, 0 },
1545  { X86::VPSRLVQrr, X86::VPSRLVQrm, 0 },
1546  { X86::VPSRLVQYrr, X86::VPSRLVQYrm, 0 },
1547  { X86::VPSUBBYrr, X86::VPSUBBYrm, 0 },
1548  { X86::VPSUBDYrr, X86::VPSUBDYrm, 0 },
1549  { X86::VPSUBQYrr, X86::VPSUBQYrm, 0 },
1550  { X86::VPSUBSBYrr, X86::VPSUBSBYrm, 0 },
1551  { X86::VPSUBSWYrr, X86::VPSUBSWYrm, 0 },
1552  { X86::VPSUBUSBYrr, X86::VPSUBUSBYrm, 0 },
1553  { X86::VPSUBUSWYrr, X86::VPSUBUSWYrm, 0 },
1554  { X86::VPSUBWYrr, X86::VPSUBWYrm, 0 },
1555  { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, 0 },
1556  { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, 0 },
1557  { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, 0 },
1558  { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, 0 },
1559  { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, 0 },
1560  { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, 0 },
1561  { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, 0 },
1562  { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, 0 },
1563  { X86::VPXORYrr, X86::VPXORYrm, 0 },
1564 
1565  // FMA4 foldable patterns
1566  { X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_NONE },
1567  { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_NONE },
1568  { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_NONE },
1569  { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_NONE },
1570  { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, TB_ALIGN_NONE },
1571  { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, TB_ALIGN_NONE },
1572  { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, TB_ALIGN_NONE },
1573  { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, TB_ALIGN_NONE },
1574  { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_NONE },
1575  { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_NONE },
1576  { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, TB_ALIGN_NONE },
1577  { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, TB_ALIGN_NONE },
1578  { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_NONE },
1579  { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_NONE },
1580  { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_NONE },
1581  { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_NONE },
1582  { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, TB_ALIGN_NONE },
1583  { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, TB_ALIGN_NONE },
1584  { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, TB_ALIGN_NONE },
1585  { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, TB_ALIGN_NONE },
1586  { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_NONE },
1587  { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_NONE },
1588  { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, TB_ALIGN_NONE },
1589  { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, TB_ALIGN_NONE },
1590  { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_NONE },
1591  { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_NONE },
1592  { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, TB_ALIGN_NONE },
1593  { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, TB_ALIGN_NONE },
1594  { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_NONE },
1595  { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_NONE },
1596  { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_NONE },
1597  { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_NONE },
1598 
1599  // XOP foldable instructions
1600  { X86::VPCMOVrr, X86::VPCMOVmr, 0 },
1601  { X86::VPCMOVrrY, X86::VPCMOVmrY, 0 },
1602  { X86::VPCOMBri, X86::VPCOMBmi, 0 },
1603  { X86::VPCOMDri, X86::VPCOMDmi, 0 },
1604  { X86::VPCOMQri, X86::VPCOMQmi, 0 },
1605  { X86::VPCOMWri, X86::VPCOMWmi, 0 },
1606  { X86::VPCOMUBri, X86::VPCOMUBmi, 0 },
1607  { X86::VPCOMUDri, X86::VPCOMUDmi, 0 },
1608  { X86::VPCOMUQri, X86::VPCOMUQmi, 0 },
1609  { X86::VPCOMUWri, X86::VPCOMUWmi, 0 },
1610  { X86::VPERMIL2PDrr, X86::VPERMIL2PDmr, 0 },
1611  { X86::VPERMIL2PDrrY, X86::VPERMIL2PDmrY, 0 },
1612  { X86::VPERMIL2PSrr, X86::VPERMIL2PSmr, 0 },
1613  { X86::VPERMIL2PSrrY, X86::VPERMIL2PSmrY, 0 },
1614  { X86::VPMACSDDrr, X86::VPMACSDDrm, 0 },
1615  { X86::VPMACSDQHrr, X86::VPMACSDQHrm, 0 },
1616  { X86::VPMACSDQLrr, X86::VPMACSDQLrm, 0 },
1617  { X86::VPMACSSDDrr, X86::VPMACSSDDrm, 0 },
1618  { X86::VPMACSSDQHrr, X86::VPMACSSDQHrm, 0 },
1619  { X86::VPMACSSDQLrr, X86::VPMACSSDQLrm, 0 },
1620  { X86::VPMACSSWDrr, X86::VPMACSSWDrm, 0 },
1621  { X86::VPMACSSWWrr, X86::VPMACSSWWrm, 0 },
1622  { X86::VPMACSWDrr, X86::VPMACSWDrm, 0 },
1623  { X86::VPMACSWWrr, X86::VPMACSWWrm, 0 },
1624  { X86::VPMADCSSWDrr, X86::VPMADCSSWDrm, 0 },
1625  { X86::VPMADCSWDrr, X86::VPMADCSWDrm, 0 },
1626  { X86::VPPERMrr, X86::VPPERMmr, 0 },
1627  { X86::VPROTBrr, X86::VPROTBrm, 0 },
1628  { X86::VPROTDrr, X86::VPROTDrm, 0 },
1629  { X86::VPROTQrr, X86::VPROTQrm, 0 },
1630  { X86::VPROTWrr, X86::VPROTWrm, 0 },
1631  { X86::VPSHABrr, X86::VPSHABrm, 0 },
1632  { X86::VPSHADrr, X86::VPSHADrm, 0 },
1633  { X86::VPSHAQrr, X86::VPSHAQrm, 0 },
1634  { X86::VPSHAWrr, X86::VPSHAWrm, 0 },
1635  { X86::VPSHLBrr, X86::VPSHLBrm, 0 },
1636  { X86::VPSHLDrr, X86::VPSHLDrm, 0 },
1637  { X86::VPSHLQrr, X86::VPSHLQrm, 0 },
1638  { X86::VPSHLWrr, X86::VPSHLWrm, 0 },
1639 
1640  // BMI/BMI2 foldable instructions
1641  { X86::ANDN32rr, X86::ANDN32rm, 0 },
1642  { X86::ANDN64rr, X86::ANDN64rm, 0 },
1643  { X86::MULX32rr, X86::MULX32rm, 0 },
1644  { X86::MULX64rr, X86::MULX64rm, 0 },
1645  { X86::PDEP32rr, X86::PDEP32rm, 0 },
1646  { X86::PDEP64rr, X86::PDEP64rm, 0 },
1647  { X86::PEXT32rr, X86::PEXT32rm, 0 },
1648  { X86::PEXT64rr, X86::PEXT64rm, 0 },
1649 
1650  // AVX-512 foldable instructions
1651  { X86::VADDPSZrr, X86::VADDPSZrm, 0 },
1652  { X86::VADDPDZrr, X86::VADDPDZrm, 0 },
1653  { X86::VSUBPSZrr, X86::VSUBPSZrm, 0 },
1654  { X86::VSUBPDZrr, X86::VSUBPDZrm, 0 },
1655  { X86::VMULPSZrr, X86::VMULPSZrm, 0 },
1656  { X86::VMULPDZrr, X86::VMULPDZrm, 0 },
1657  { X86::VDIVPSZrr, X86::VDIVPSZrm, 0 },
1658  { X86::VDIVPDZrr, X86::VDIVPDZrm, 0 },
1659  { X86::VMINPSZrr, X86::VMINPSZrm, 0 },
1660  { X86::VMINPDZrr, X86::VMINPDZrm, 0 },
1661  { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 },
1662  { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
1663  { X86::VPADDDZrr, X86::VPADDDZrm, 0 },
1664  { X86::VPADDQZrr, X86::VPADDQZrm, 0 },
1665  { X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
1666  { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 },
1667  { X86::VPMAXSDZrr, X86::VPMAXSDZrm, 0 },
1668  { X86::VPMAXSQZrr, X86::VPMAXSQZrm, 0 },
1669  { X86::VPMAXUDZrr, X86::VPMAXUDZrm, 0 },
1670  { X86::VPMAXUQZrr, X86::VPMAXUQZrm, 0 },
1671  { X86::VPMINSDZrr, X86::VPMINSDZrm, 0 },
1672  { X86::VPMINSQZrr, X86::VPMINSQZrm, 0 },
1673  { X86::VPMINUDZrr, X86::VPMINUDZrm, 0 },
1674  { X86::VPMINUQZrr, X86::VPMINUQZrm, 0 },
1675  { X86::VPMULDQZrr, X86::VPMULDQZrm, 0 },
1676  { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 },
1677  { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 },
1678  { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 },
1679  { X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 },
1680  { X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 },
1681  { X86::VPSUBDZrr, X86::VPSUBDZrm, 0 },
1682  { X86::VPSUBQZrr, X86::VPSUBQZrm, 0 },
1683  { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 },
1684  { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 },
1685  { X86::VALIGNQZrri, X86::VALIGNQZrmi, 0 },
1686  { X86::VALIGNDZrri, X86::VALIGNDZrmi, 0 },
1687  { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 },
1688  { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE },
1689  { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE },
1690 
1691  // AVX-512{F,VL} foldable instructions
1692  { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE },
1693  { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE },
1694  { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE },
1695 
1696  // AVX-512{F,VL} foldable instructions
1697  { X86::VADDPDZ128rr, X86::VADDPDZ128rm, 0 },
1698  { X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0 },
1699  { X86::VADDPSZ128rr, X86::VADDPSZ128rm, 0 },
1700  { X86::VADDPSZ256rr, X86::VADDPSZ256rm, 0 },
1701 
1702  // AES foldable instructions
1703  { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },
1704  { X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 },
1705  { X86::AESENCLASTrr, X86::AESENCLASTrm, TB_ALIGN_16 },
1706  { X86::AESENCrr, X86::AESENCrm, TB_ALIGN_16 },
1707  { X86::VAESDECLASTrr, X86::VAESDECLASTrm, 0 },
1708  { X86::VAESDECrr, X86::VAESDECrm, 0 },
1709  { X86::VAESENCLASTrr, X86::VAESENCLASTrm, 0 },
1710  { X86::VAESENCrr, X86::VAESENCrm, 0 },
1711 
1712  // SHA foldable instructions
1713  { X86::SHA1MSG1rr, X86::SHA1MSG1rm, TB_ALIGN_16 },
1714  { X86::SHA1MSG2rr, X86::SHA1MSG2rm, TB_ALIGN_16 },
1715  { X86::SHA1NEXTErr, X86::SHA1NEXTErm, TB_ALIGN_16 },
1716  { X86::SHA1RNDS4rri, X86::SHA1RNDS4rmi, TB_ALIGN_16 },
1717  { X86::SHA256MSG1rr, X86::SHA256MSG1rm, TB_ALIGN_16 },
1718  { X86::SHA256MSG2rr, X86::SHA256MSG2rm, TB_ALIGN_16 },
1719  { X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 }
1720  };
1721 
1722  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2) {
1723  AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
1724  Entry.RegOp, Entry.MemOp,
1725  // Index 2, folded load
1726  Entry.Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
1727  }
1728 
1729  static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
1730  // FMA foldable instructions
1731  { X86::VFMADDSSr231r, X86::VFMADDSSr231m, TB_ALIGN_NONE },
1732  { X86::VFMADDSDr231r, X86::VFMADDSDr231m, TB_ALIGN_NONE },
1733  { X86::VFMADDSSr132r, X86::VFMADDSSr132m, TB_ALIGN_NONE },
1734  { X86::VFMADDSDr132r, X86::VFMADDSDr132m, TB_ALIGN_NONE },
1735  { X86::VFMADDSSr213r, X86::VFMADDSSr213m, TB_ALIGN_NONE },
1736  { X86::VFMADDSDr213r, X86::VFMADDSDr213m, TB_ALIGN_NONE },
1737 
1738  { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_NONE },
1739  { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_NONE },
1740  { X86::VFMADDPSr132r, X86::VFMADDPSr132m, TB_ALIGN_NONE },
1741  { X86::VFMADDPDr132r, X86::VFMADDPDr132m, TB_ALIGN_NONE },
1742  { X86::VFMADDPSr213r, X86::VFMADDPSr213m, TB_ALIGN_NONE },
1743  { X86::VFMADDPDr213r, X86::VFMADDPDr213m, TB_ALIGN_NONE },
1744  { X86::VFMADDPSr231rY, X86::VFMADDPSr231mY, TB_ALIGN_NONE },
1745  { X86::VFMADDPDr231rY, X86::VFMADDPDr231mY, TB_ALIGN_NONE },
1746  { X86::VFMADDPSr132rY, X86::VFMADDPSr132mY, TB_ALIGN_NONE },
1747  { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_NONE },
1748  { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_NONE },
1749  { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_NONE },
1750 
1751  { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, TB_ALIGN_NONE },
1752  { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, TB_ALIGN_NONE },
1753  { X86::VFNMADDSSr132r, X86::VFNMADDSSr132m, TB_ALIGN_NONE },
1754  { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, TB_ALIGN_NONE },
1755  { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, TB_ALIGN_NONE },
1756  { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, TB_ALIGN_NONE },
1757 
1758  { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_NONE },
1759  { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_NONE },
1760  { X86::VFNMADDPSr132r, X86::VFNMADDPSr132m, TB_ALIGN_NONE },
1761  { X86::VFNMADDPDr132r, X86::VFNMADDPDr132m, TB_ALIGN_NONE },
1762  { X86::VFNMADDPSr213r, X86::VFNMADDPSr213m, TB_ALIGN_NONE },
1763  { X86::VFNMADDPDr213r, X86::VFNMADDPDr213m, TB_ALIGN_NONE },
1764  { X86::VFNMADDPSr231rY, X86::VFNMADDPSr231mY, TB_ALIGN_NONE },
1765  { X86::VFNMADDPDr231rY, X86::VFNMADDPDr231mY, TB_ALIGN_NONE },
1766  { X86::VFNMADDPSr132rY, X86::VFNMADDPSr132mY, TB_ALIGN_NONE },
1767  { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_NONE },
1768  { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_NONE },
1769  { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_NONE },
1770 
1771  { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, TB_ALIGN_NONE },
1772  { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, TB_ALIGN_NONE },
1773  { X86::VFMSUBSSr132r, X86::VFMSUBSSr132m, TB_ALIGN_NONE },
1774  { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, TB_ALIGN_NONE },
1775  { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, TB_ALIGN_NONE },
1776  { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, TB_ALIGN_NONE },
1777 
1778  { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_NONE },
1779  { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_NONE },
1780  { X86::VFMSUBPSr132r, X86::VFMSUBPSr132m, TB_ALIGN_NONE },
1781  { X86::VFMSUBPDr132r, X86::VFMSUBPDr132m, TB_ALIGN_NONE },
1782  { X86::VFMSUBPSr213r, X86::VFMSUBPSr213m, TB_ALIGN_NONE },
1783  { X86::VFMSUBPDr213r, X86::VFMSUBPDr213m, TB_ALIGN_NONE },
1784  { X86::VFMSUBPSr231rY, X86::VFMSUBPSr231mY, TB_ALIGN_NONE },
1785  { X86::VFMSUBPDr231rY, X86::VFMSUBPDr231mY, TB_ALIGN_NONE },
1786  { X86::VFMSUBPSr132rY, X86::VFMSUBPSr132mY, TB_ALIGN_NONE },
1787  { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_NONE },
1788  { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_NONE },
1789  { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_NONE },
1790 
1791  { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, TB_ALIGN_NONE },
1792  { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, TB_ALIGN_NONE },
1793  { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr132m, TB_ALIGN_NONE },
1794  { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, TB_ALIGN_NONE },
1795  { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, TB_ALIGN_NONE },
1796  { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, TB_ALIGN_NONE },
1797 
1798  { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_NONE },
1799  { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_NONE },
1800  { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr132m, TB_ALIGN_NONE },
1801  { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr132m, TB_ALIGN_NONE },
1802  { X86::VFNMSUBPSr213r, X86::VFNMSUBPSr213m, TB_ALIGN_NONE },
1803  { X86::VFNMSUBPDr213r, X86::VFNMSUBPDr213m, TB_ALIGN_NONE },
1804  { X86::VFNMSUBPSr231rY, X86::VFNMSUBPSr231mY, TB_ALIGN_NONE },
1805  { X86::VFNMSUBPDr231rY, X86::VFNMSUBPDr231mY, TB_ALIGN_NONE },
1806  { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr132mY, TB_ALIGN_NONE },
1807  { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_NONE },
1808  { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_NONE },
1809  { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_NONE },
1810 
1811  { X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_NONE },
1812  { X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_NONE },
1813  { X86::VFMADDSUBPSr132r, X86::VFMADDSUBPSr132m, TB_ALIGN_NONE },
1814  { X86::VFMADDSUBPDr132r, X86::VFMADDSUBPDr132m, TB_ALIGN_NONE },
1815  { X86::VFMADDSUBPSr213r, X86::VFMADDSUBPSr213m, TB_ALIGN_NONE },
1816  { X86::VFMADDSUBPDr213r, X86::VFMADDSUBPDr213m, TB_ALIGN_NONE },
1817  { X86::VFMADDSUBPSr231rY, X86::VFMADDSUBPSr231mY, TB_ALIGN_NONE },
1818  { X86::VFMADDSUBPDr231rY, X86::VFMADDSUBPDr231mY, TB_ALIGN_NONE },
1819  { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr132mY, TB_ALIGN_NONE },
1820  { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_NONE },
1821  { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_NONE },
1822  { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_NONE },
1823 
1824  { X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_NONE },
1825  { X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_NONE },
1826  { X86::VFMSUBADDPSr132r, X86::VFMSUBADDPSr132m, TB_ALIGN_NONE },
1827  { X86::VFMSUBADDPDr132r, X86::VFMSUBADDPDr132m, TB_ALIGN_NONE },
1828  { X86::VFMSUBADDPSr213r, X86::VFMSUBADDPSr213m, TB_ALIGN_NONE },
1829  { X86::VFMSUBADDPDr213r, X86::VFMSUBADDPDr213m, TB_ALIGN_NONE },
1830  { X86::VFMSUBADDPSr231rY, X86::VFMSUBADDPSr231mY, TB_ALIGN_NONE },
1831  { X86::VFMSUBADDPDr231rY, X86::VFMSUBADDPDr231mY, TB_ALIGN_NONE },
1832  { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr132mY, TB_ALIGN_NONE },
1833  { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_NONE },
1834  { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_NONE },
1835  { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_NONE },
1836 
1837  // FMA4 foldable patterns
1838  { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_NONE },
1839  { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_NONE },
1840  { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_NONE },
1841  { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_NONE },
1842  { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_NONE },
1843  { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_NONE },
1844  { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, TB_ALIGN_NONE },
1845  { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, TB_ALIGN_NONE },
1846  { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_NONE },
1847  { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_NONE },
1848  { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_NONE },
1849  { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_NONE },
1850  { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_NONE },
1851  { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_NONE },
1852  { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_NONE },
1853  { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_NONE },
1854  { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_NONE },
1855  { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_NONE },
1856  { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, TB_ALIGN_NONE },
1857  { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, TB_ALIGN_NONE },
1858  { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_NONE },
1859  { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_NONE },
1860  { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_NONE },
1861  { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_NONE },
1862  { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_NONE },
1863  { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_NONE },
1864  { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_NONE },
1865  { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_NONE },
1866  { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_NONE },
1867  { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_NONE },
1868  { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_NONE },
1869  { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_NONE },
1870 
1871  // XOP foldable instructions
1872  { X86::VPCMOVrr, X86::VPCMOVrm, 0 },
1873  { X86::VPCMOVrrY, X86::VPCMOVrmY, 0 },
1874  { X86::VPERMIL2PDrr, X86::VPERMIL2PDrm, 0 },
1875  { X86::VPERMIL2PDrrY, X86::VPERMIL2PDrmY, 0 },
1876  { X86::VPERMIL2PSrr, X86::VPERMIL2PSrm, 0 },
1877  { X86::VPERMIL2PSrrY, X86::VPERMIL2PSrmY, 0 },
1878  { X86::VPPERMrr, X86::VPPERMrm, 0 },
1879 
1880  // AVX-512 VPERMI instructions with 3 source operands.
1881  { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 },
1882  { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 },
1883  { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 },
1884  { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 },
1885  { X86::VBLENDMPDZrr, X86::VBLENDMPDZrm, 0 },
1886  { X86::VBLENDMPSZrr, X86::VBLENDMPSZrm, 0 },
1887  { X86::VPBLENDMDZrr, X86::VPBLENDMDZrm, 0 },
1888  { X86::VPBLENDMQZrr, X86::VPBLENDMQZrm, 0 },
1889  { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE },
1890  { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE },
1891  { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE },
1892  { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE },
1893  { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE },
1894  // AVX-512 arithmetic instructions
1895  { X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 },
1896  { X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 },
1897  { X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 },
1898  { X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 },
1899  { X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 },
1900  { X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 },
1901  { X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 },
1902  { X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 },
1903  { X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 },
1904  { X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 },
1905  { X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 },
1906  { X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 },
1907  // AVX-512{F,VL} arithmetic instructions 256-bit
1908  { X86::VADDPSZ256rrkz, X86::VADDPSZ256rmkz, 0 },
1909  { X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0 },
1910  { X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 },
1911  { X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 },
1912  { X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 },
1913  { X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0 },
1914  { X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 },
1915  { X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 },
1916  { X86::VMINPSZ256rrkz, X86::VMINPSZ256rmkz, 0 },
1917  { X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0 },
1918  { X86::VMAXPSZ256rrkz, X86::VMAXPSZ256rmkz, 0 },
1919  { X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0 },
1920  // AVX-512{F,VL} arithmetic instructions 128-bit
1921  { X86::VADDPSZ128rrkz, X86::VADDPSZ128rmkz, 0 },
1922  { X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0 },
1923  { X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 },
1924  { X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 },
1925  { X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 },
1926  { X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0 },
1927  { X86::VDIVPSZ128rrkz, X86::VDIVPSZ128rmkz, 0 },
1928  { X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0 },
1929  { X86::VMINPSZ128rrkz, X86::VMINPSZ128rmkz, 0 },
1930  { X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0 },
1931  { X86::VMAXPSZ128rrkz, X86::VMAXPSZ128rmkz, 0 },
1932  { X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 }
1933  };
1934 
1935  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) {
1936  AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
1937  Entry.RegOp, Entry.MemOp,
1938  // Index 3, folded load
1939  Entry.Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
1940  }
1941 
1942  static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
1943  // AVX-512 foldable instructions
1944  { X86::VADDPSZrrk, X86::VADDPSZrmk, 0 },
1945  { X86::VADDPDZrrk, X86::VADDPDZrmk, 0 },
1946  { X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 },
1947  { X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 },
1948  { X86::VMULPSZrrk, X86::VMULPSZrmk, 0 },
1949  { X86::VMULPDZrrk, X86::VMULPDZrmk, 0 },
1950  { X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 },
1951  { X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 },
1952  { X86::VMINPSZrrk, X86::VMINPSZrmk, 0 },
1953  { X86::VMINPDZrrk, X86::VMINPDZrmk, 0 },
1954  { X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 },
1955  { X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 },
1956  // AVX-512{F,VL} foldable instructions 256-bit
1957  { X86::VADDPSZ256rrk, X86::VADDPSZ256rmk, 0 },
1958  { X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0 },
1959  { X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 },
1960  { X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 },
1961  { X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 },
1962  { X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0 },
1963  { X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 },
1964  { X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 },
1965  { X86::VMINPSZ256rrk, X86::VMINPSZ256rmk, 0 },
1966  { X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0 },
1967  { X86::VMAXPSZ256rrk, X86::VMAXPSZ256rmk, 0 },
1968  { X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0 },
1969  // AVX-512{F,VL} foldable instructions 128-bit
1970  { X86::VADDPSZ128rrk, X86::VADDPSZ128rmk, 0 },
1971  { X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0 },
1972  { X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 },
1973  { X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 },
1974  { X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 },
1975  { X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0 },
1976  { X86::VDIVPSZ128rrk, X86::VDIVPSZ128rmk, 0 },
1977  { X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0 },
1978  { X86::VMINPSZ128rrk, X86::VMINPSZ128rmk, 0 },
1979  { X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0 },
1980  { X86::VMAXPSZ128rrk, X86::VMAXPSZ128rmk, 0 },
1981  { X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 }
1982  };
1983 
1984  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable4) {
1985  AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
1986  Entry.RegOp, Entry.MemOp,
1987  // Index 4, folded load
1988  Entry.Flags | TB_INDEX_4 | TB_FOLDED_LOAD);
1989  }
1990 }
1991 
1992 void
1993 X86InstrInfo::AddTableEntry(RegOp2MemOpTableType &R2MTable,
1994  MemOp2RegOpTableType &M2RTable,
1995  unsigned RegOp, unsigned MemOp, unsigned Flags) {
1996  if ((Flags & TB_NO_FORWARD) == 0) {
1997  assert(!R2MTable.count(RegOp) && "Duplicate entry!");
1998  R2MTable[RegOp] = std::make_pair(MemOp, Flags);
1999  }
2000  if ((Flags & TB_NO_REVERSE) == 0) {
2001  assert(!M2RTable.count(MemOp) &&
2002  "Duplicated entries in unfolding maps?");
2003  M2RTable[MemOp] = std::make_pair(RegOp, Flags);
2004  }
2005 }
2006 
2007 bool
2009  unsigned &SrcReg, unsigned &DstReg,
2010  unsigned &SubIdx) const {
2011  switch (MI.getOpcode()) {
2012  default: break;
2013  case X86::MOVSX16rr8:
2014  case X86::MOVZX16rr8:
2015  case X86::MOVSX32rr8:
2016  case X86::MOVZX32rr8:
2017  case X86::MOVSX64rr8:
2018  if (!Subtarget.is64Bit())
2019  // It's not always legal to reference the low 8-bit of the larger
2020  // register in 32-bit mode.
2021  return false;
2022  case X86::MOVSX32rr16:
2023  case X86::MOVZX32rr16:
2024  case X86::MOVSX64rr16:
2025  case X86::MOVSX64rr32: {
2026  if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
2027  // Be conservative.
2028  return false;
2029  SrcReg = MI.getOperand(1).getReg();
2030  DstReg = MI.getOperand(0).getReg();
2031  switch (MI.getOpcode()) {
2032  default: llvm_unreachable("Unreachable!");
2033  case X86::MOVSX16rr8:
2034  case X86::MOVZX16rr8:
2035  case X86::MOVSX32rr8:
2036  case X86::MOVZX32rr8:
2037  case X86::MOVSX64rr8:
2038  SubIdx = X86::sub_8bit;
2039  break;
2040  case X86::MOVSX32rr16:
2041  case X86::MOVZX32rr16:
2042  case X86::MOVSX64rr16:
2043  SubIdx = X86::sub_16bit;
2044  break;
2045  case X86::MOVSX64rr32:
2046  SubIdx = X86::sub_32bit;
2047  break;
2048  }
2049  return true;
2050  }
2051  }
2052  return false;
2053 }
2054 
2056  const MachineFunction *MF = MI->getParent()->getParent();
2057  const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
2058 
2059  if (MI->getOpcode() == getCallFrameSetupOpcode() ||
2060  MI->getOpcode() == getCallFrameDestroyOpcode()) {
2061  unsigned StackAlign = TFI->getStackAlignment();
2062  int SPAdj = (MI->getOperand(0).getImm() + StackAlign - 1) / StackAlign *
2063  StackAlign;
2064 
2065  SPAdj -= MI->getOperand(1).getImm();
2066 
2067  if (MI->getOpcode() == getCallFrameSetupOpcode())
2068  return SPAdj;
2069  else
2070  return -SPAdj;
2071  }
2072 
2073  // To know whether a call adjusts the stack, we need information
2074  // that is bound to the following ADJCALLSTACKUP pseudo.
2075  // Look for the next ADJCALLSTACKUP that follows the call.
2076  if (MI->isCall()) {
2077  const MachineBasicBlock* MBB = MI->getParent();
2078  auto I = ++MachineBasicBlock::const_iterator(MI);
2079  for (auto E = MBB->end(); I != E; ++I) {
2080  if (I->getOpcode() == getCallFrameDestroyOpcode() ||
2081  I->isCall())
2082  break;
2083  }
2084 
2085  // If we could not find a frame destroy opcode, then it has already
2086  // been simplified, so we don't care.
2087  if (I->getOpcode() != getCallFrameDestroyOpcode())
2088  return 0;
2089 
2090  return -(I->getOperand(1).getImm());
2091  }
2092 
2093  // Currently handle only PUSHes we can reasonably expect to see
2094  // in call sequences
2095  switch (MI->getOpcode()) {
2096  default:
2097  return 0;
2098  case X86::PUSH32i8:
2099  case X86::PUSH32r:
2100  case X86::PUSH32rmm:
2101  case X86::PUSH32rmr:
2102  case X86::PUSHi32:
2103  return 4;
2104  }
2105 }
2106 
2107 /// Return true and the FrameIndex if the specified
2108 /// operand and follow operands form a reference to the stack frame.
2109 bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
2110  int &FrameIndex) const {
2111  if (MI->getOperand(Op+X86::AddrBaseReg).isFI() &&
2112  MI->getOperand(Op+X86::AddrScaleAmt).isImm() &&
2113  MI->getOperand(Op+X86::AddrIndexReg).isReg() &&
2114  MI->getOperand(Op+X86::AddrDisp).isImm() &&
2115  MI->getOperand(Op+X86::AddrScaleAmt).getImm() == 1 &&
2116  MI->getOperand(Op+X86::AddrIndexReg).getReg() == 0 &&
2117  MI->getOperand(Op+X86::AddrDisp).getImm() == 0) {
2118  FrameIndex = MI->getOperand(Op+X86::AddrBaseReg).getIndex();
2119  return true;
2120  }
2121  return false;
2122 }
2123 
2124 static bool isFrameLoadOpcode(int Opcode) {
2125  switch (Opcode) {
2126  default:
2127  return false;
2128  case X86::MOV8rm:
2129  case X86::MOV16rm:
2130  case X86::MOV32rm:
2131  case X86::MOV64rm:
2132  case X86::LD_Fp64m:
2133  case X86::MOVSSrm:
2134  case X86::MOVSDrm:
2135  case X86::MOVAPSrm:
2136  case X86::MOVAPDrm:
2137  case X86::MOVDQArm:
2138  case X86::VMOVSSrm:
2139  case X86::VMOVSDrm:
2140  case X86::VMOVAPSrm:
2141  case X86::VMOVAPDrm:
2142  case X86::VMOVDQArm:
2143  case X86::VMOVUPSYrm:
2144  case X86::VMOVAPSYrm:
2145  case X86::VMOVUPDYrm:
2146  case X86::VMOVAPDYrm:
2147  case X86::VMOVDQUYrm:
2148  case X86::VMOVDQAYrm:
2149  case X86::MMX_MOVD64rm:
2150  case X86::MMX_MOVQ64rm:
2151  case X86::VMOVAPSZrm:
2152  case X86::VMOVUPSZrm:
2153  return true;
2154  }
2155 }
2156 
2157 static bool isFrameStoreOpcode(int Opcode) {
2158  switch (Opcode) {
2159  default: break;
2160  case X86::MOV8mr:
2161  case X86::MOV16mr:
2162  case X86::MOV32mr:
2163  case X86::MOV64mr:
2164  case X86::ST_FpP64m:
2165  case X86::MOVSSmr:
2166  case X86::MOVSDmr:
2167  case X86::MOVAPSmr:
2168  case X86::MOVAPDmr:
2169  case X86::MOVDQAmr:
2170  case X86::VMOVSSmr:
2171  case X86::VMOVSDmr:
2172  case X86::VMOVAPSmr:
2173  case X86::VMOVAPDmr:
2174  case X86::VMOVDQAmr:
2175  case X86::VMOVUPSYmr:
2176  case X86::VMOVAPSYmr:
2177  case X86::VMOVUPDYmr:
2178  case X86::VMOVAPDYmr:
2179  case X86::VMOVDQUYmr:
2180  case X86::VMOVDQAYmr:
2181  case X86::VMOVUPSZmr:
2182  case X86::VMOVAPSZmr:
2183  case X86::MMX_MOVD64mr:
2184  case X86::MMX_MOVQ64mr:
2185  case X86::MMX_MOVNTQmr:
2186  return true;
2187  }
2188  return false;
2189 }
2190 
2192  int &FrameIndex) const {
2193  if (isFrameLoadOpcode(MI->getOpcode()))
2194  if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
2195  return MI->getOperand(0).getReg();
2196  return 0;
2197 }
2198 
2200  int &FrameIndex) const {
2201  if (isFrameLoadOpcode(MI->getOpcode())) {
2202  unsigned Reg;
2203  if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
2204  return Reg;
2205  // Check for post-frame index elimination operations
2206  const MachineMemOperand *Dummy;
2207  return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
2208  }
2209  return 0;
2210 }
2211 
2213  int &FrameIndex) const {
2214  if (isFrameStoreOpcode(MI->getOpcode()))
2215  if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
2216  isFrameOperand(MI, 0, FrameIndex))
2217  return MI->getOperand(X86::AddrNumOperands).getReg();
2218  return 0;
2219 }
2220 
2222  int &FrameIndex) const {
2223  if (isFrameStoreOpcode(MI->getOpcode())) {
2224  unsigned Reg;
2225  if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
2226  return Reg;
2227  // Check for post-frame index elimination operations
2228  const MachineMemOperand *Dummy;
2229  return hasStoreToStackSlot(MI, Dummy, FrameIndex);
2230  }
2231  return 0;
2232 }
2233 
2234 /// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
2235 static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
2236  // Don't waste compile time scanning use-def chains of physregs.
2238  return false;
2239  bool isPICBase = false;
2241  E = MRI.def_instr_end(); I != E; ++I) {
2242  MachineInstr *DefMI = &*I;
2243  if (DefMI->getOpcode() != X86::MOVPC32r)
2244  return false;
2245  assert(!isPICBase && "More than one PIC base?");
2246  isPICBase = true;
2247  }
2248  return isPICBase;
2249 }
2250 
2251 bool
2253  AliasAnalysis *AA) const {
2254  switch (MI->getOpcode()) {
2255  default: break;
2256  case X86::MOV8rm:
2257  case X86::MOV16rm:
2258  case X86::MOV32rm:
2259  case X86::MOV64rm:
2260  case X86::LD_Fp64m:
2261  case X86::MOVSSrm:
2262  case X86::MOVSDrm:
2263  case X86::MOVAPSrm:
2264  case X86::MOVUPSrm:
2265  case X86::MOVAPDrm:
2266  case X86::MOVDQArm:
2267  case X86::MOVDQUrm:
2268  case X86::VMOVSSrm:
2269  case X86::VMOVSDrm:
2270  case X86::VMOVAPSrm:
2271  case X86::VMOVUPSrm:
2272  case X86::VMOVAPDrm:
2273  case X86::VMOVDQArm:
2274  case X86::VMOVDQUrm:
2275  case X86::VMOVAPSYrm:
2276  case X86::VMOVUPSYrm:
2277  case X86::VMOVAPDYrm:
2278  case X86::VMOVDQAYrm:
2279  case X86::VMOVDQUYrm:
2280  case X86::MMX_MOVD64rm:
2281  case X86::MMX_MOVQ64rm:
2282  case X86::FsVMOVAPSrm:
2283  case X86::FsVMOVAPDrm:
2284  case X86::FsMOVAPSrm:
2285  case X86::FsMOVAPDrm: {
2286  // Loads from constant pools are trivially rematerializable.
2287  if (MI->getOperand(1+X86::AddrBaseReg).isReg() &&
2288  MI->getOperand(1+X86::AddrScaleAmt).isImm() &&
2289  MI->getOperand(1+X86::AddrIndexReg).isReg() &&
2290  MI->getOperand(1+X86::AddrIndexReg).getReg() == 0 &&
2291  MI->isInvariantLoad(AA)) {
2292  unsigned BaseReg = MI->getOperand(1+X86::AddrBaseReg).getReg();
2293  if (BaseReg == 0 || BaseReg == X86::RIP)
2294  return true;
2295  // Allow re-materialization of PIC load.
2297  return false;
2298  const MachineFunction &MF = *MI->getParent()->getParent();
2299  const MachineRegisterInfo &MRI = MF.getRegInfo();
2300  return regIsPICBase(BaseReg, MRI);
2301  }
2302  return false;
2303  }
2304 
2305  case X86::LEA32r:
2306  case X86::LEA64r: {
2307  if (MI->getOperand(1+X86::AddrScaleAmt).isImm() &&
2308  MI->getOperand(1+X86::AddrIndexReg).isReg() &&
2309  MI->getOperand(1+X86::AddrIndexReg).getReg() == 0 &&
2310  !MI->getOperand(1+X86::AddrDisp).isReg()) {
2311  // lea fi#, lea GV, etc. are all rematerializable.
2312  if (!MI->getOperand(1+X86::AddrBaseReg).isReg())
2313  return true;
2314  unsigned BaseReg = MI->getOperand(1+X86::AddrBaseReg).getReg();
2315  if (BaseReg == 0)
2316  return true;
2317  // Allow re-materialization of lea PICBase + x.
2318  const MachineFunction &MF = *MI->getParent()->getParent();
2319  const MachineRegisterInfo &MRI = MF.getRegInfo();
2320  return regIsPICBase(BaseReg, MRI);
2321  }
2322  return false;
2323  }
2324  }
2325 
2326  // All other instructions marked M_REMATERIALIZABLE are always trivially
2327  // rematerializable.
2328  return true;
2329 }
2330 
2333  MachineBasicBlock::iterator E = MBB.end();
2334 
2335  // For compile time consideration, if we are not able to determine the
2336  // safety after visiting 4 instructions in each direction, we will assume
2337  // it's not safe.
2339  for (unsigned i = 0; Iter != E && i < 4; ++i) {
2340  bool SeenDef = false;
2341  for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
2342  MachineOperand &MO = Iter->getOperand(j);
2343  if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
2344  SeenDef = true;
2345  if (!MO.isReg())
2346  continue;
2347  if (MO.getReg() == X86::EFLAGS) {
2348  if (MO.isUse())
2349  return false;
2350  SeenDef = true;
2351  }
2352  }
2353 
2354  if (SeenDef)
2355  // This instruction defines EFLAGS, no need to look any further.
2356  return true;
2357  ++Iter;
2358  // Skip over DBG_VALUE.
2359  while (Iter != E && Iter->isDebugValue())
2360  ++Iter;
2361  }
2362 
2363  // It is safe to clobber EFLAGS at the end of a block of no successor has it
2364  // live in.
2365  if (Iter == E) {
2367  SE = MBB.succ_end(); SI != SE; ++SI)
2368  if ((*SI)->isLiveIn(X86::EFLAGS))
2369  return false;
2370  return true;
2371  }
2372 
2374  Iter = I;
2375  for (unsigned i = 0; i < 4; ++i) {
2376  // If we make it to the beginning of the block, it's safe to clobber
2377  // EFLAGS iff EFLAGS is not live-in.
2378  if (Iter == B)
2379  return !MBB.isLiveIn(X86::EFLAGS);
2380 
2381  --Iter;
2382  // Skip over DBG_VALUE.
2383  while (Iter != B && Iter->isDebugValue())
2384  --Iter;
2385 
2386  bool SawKill = false;
2387  for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
2388  MachineOperand &MO = Iter->getOperand(j);
2389  // A register mask may clobber EFLAGS, but we should still look for a
2390  // live EFLAGS def.
2391  if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
2392  SawKill = true;
2393  if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
2394  if (MO.isDef()) return MO.isDead();
2395  if (MO.isKill()) SawKill = true;
2396  }
2397  }
2398 
2399  if (SawKill)
2400  // This instruction kills EFLAGS and doesn't redefine it, so
2401  // there's no need to look further.
2402  return true;
2403  }
2404 
2405  // Conservative answer.
2406  return false;
2407 }
2408 
2411  unsigned DestReg, unsigned SubIdx,
2412  const MachineInstr *Orig,
2413  const TargetRegisterInfo &TRI) const {
2414  // MOV32r0 is implemented with a xor which clobbers condition code.
2415  // Re-materialize it as movri instructions to avoid side effects.
2416  unsigned Opc = Orig->getOpcode();
2417  if (Opc == X86::MOV32r0 && !isSafeToClobberEFLAGS(MBB, I)) {
2418  DebugLoc DL = Orig->getDebugLoc();
2419  BuildMI(MBB, I, DL, get(X86::MOV32ri)).addOperand(Orig->getOperand(0))
2420  .addImm(0);
2421  } else {
2422  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
2423  MBB.insert(I, MI);
2424  }
2425 
2426  MachineInstr *NewMI = std::prev(I);
2427  NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
2428 }
2429 
2430 /// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
2432  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
2433  MachineOperand &MO = MI->getOperand(i);
2434  if (MO.isReg() && MO.isDef() &&
2435  MO.getReg() == X86::EFLAGS && !MO.isDead()) {
2436  return true;
2437  }
2438  }
2439  return false;
2440 }
2441 
2442 /// Check whether the shift count for a machine operand is non-zero.
2443 inline static unsigned getTruncatedShiftCount(MachineInstr *MI,
2444  unsigned ShiftAmtOperandIdx) {
2445  // The shift count is six bits with the REX.W prefix and five bits without.
2446  unsigned ShiftCountMask = (MI->getDesc().TSFlags & X86II::REX_W) ? 63 : 31;
2447  unsigned Imm = MI->getOperand(ShiftAmtOperandIdx).getImm();
2448  return Imm & ShiftCountMask;
2449 }
2450 
2451 /// Check whether the given shift count is appropriate
2452 /// can be represented by a LEA instruction.
2453 inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
2454  // Left shift instructions can be transformed into load-effective-address
2455  // instructions if we can encode them appropriately.
2456  // A LEA instruction utilizes a SIB byte to encode it's scale factor.
2457  // The SIB.scale field is two bits wide which means that we can encode any
2458  // shift amount less than 4.
2459  return ShAmt < 4 && ShAmt > 0;
2460 }
2461 
2463  unsigned Opc, bool AllowSP,
2464  unsigned &NewSrc, bool &isKill, bool &isUndef,
2465  MachineOperand &ImplicitOp) const {
2466  MachineFunction &MF = *MI->getParent()->getParent();
2467  const TargetRegisterClass *RC;
2468  if (AllowSP) {
2469  RC = Opc != X86::LEA32r ? &X86::GR64RegClass : &X86::GR32RegClass;
2470  } else {
2471  RC = Opc != X86::LEA32r ?
2472  &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
2473  }
2474  unsigned SrcReg = Src.getReg();
2475 
2476  // For both LEA64 and LEA32 the register already has essentially the right
2477  // type (32-bit or 64-bit) we may just need to forbid SP.
2478  if (Opc != X86::LEA64_32r) {
2479  NewSrc = SrcReg;
2480  isKill = Src.isKill();
2481  isUndef = Src.isUndef();
2482 
2484  !MF.getRegInfo().constrainRegClass(NewSrc, RC))
2485  return false;
2486 
2487  return true;
2488  }
2489 
2490  // This is for an LEA64_32r and incoming registers are 32-bit. One way or
2491  // another we need to add 64-bit registers to the final MI.
2493  ImplicitOp = Src;
2494  ImplicitOp.setImplicit();
2495 
2496  NewSrc = getX86SubSuperRegister(Src.getReg(), MVT::i64);
2498  MI->getParent()->computeRegisterLiveness(&getRegisterInfo(), NewSrc, MI);
2499 
2500  switch (LQR) {
2502  // We can't give sane liveness flags to the instruction, abandon LEA
2503  // formation.
2504  return false;
2506  isKill = MI->killsRegister(SrcReg);
2507  isUndef = false;
2508  break;
2509  default:
2510  // The physreg itself is dead, so we have to use it as an <undef>.
2511  isKill = false;
2512  isUndef = true;
2513  break;
2514  }
2515  } else {
2516  // Virtual register of the wrong class, we have to create a temporary 64-bit
2517  // vreg to feed into the LEA.
2518  NewSrc = MF.getRegInfo().createVirtualRegister(RC);
2519  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
2520  get(TargetOpcode::COPY))
2521  .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
2522  .addOperand(Src);
2523 
2524  // Which is obviously going to be dead after we're done with it.
2525  isKill = true;
2526  isUndef = false;
2527  }
2528 
2529  // We've set all the parameters without issue.
2530  return true;
2531 }
2532 
2533 /// Helper for convertToThreeAddress when 16-bit LEA is disabled, use 32-bit
2534 /// LEA to form 3-address code by promoting to a 32-bit superregister and then
2535 /// truncating back down to a 16-bit subregister.
2536 MachineInstr *
2537 X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
2540  LiveVariables *LV) const {
2541  MachineInstr *MI = MBBI;
2542  unsigned Dest = MI->getOperand(0).getReg();
2543  unsigned Src = MI->getOperand(1).getReg();
2544  bool isDead = MI->getOperand(0).isDead();
2545  bool isKill = MI->getOperand(1).isKill();
2546 
2547  MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
2548  unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
2549  unsigned Opc, leaInReg;
2550  if (Subtarget.is64Bit()) {
2551  Opc = X86::LEA64_32r;
2552  leaInReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
2553  } else {
2554  Opc = X86::LEA32r;
2555  leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
2556  }
2557 
2558  // Build and insert into an implicit UNDEF value. This is OK because
2559  // well be shifting and then extracting the lower 16-bits.
2560  // This has the potential to cause partial register stall. e.g.
2561  // movw (%rbp,%rcx,2), %dx
2562  // leal -65(%rdx), %esi
2563  // But testing has shown this *does* help performance in 64-bit mode (at
2564  // least on modern x86 machines).
2565  BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
2566  MachineInstr *InsMI =
2567  BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
2568  .addReg(leaInReg, RegState::Define, X86::sub_16bit)
2569  .addReg(Src, getKillRegState(isKill));
2570 
2571  MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
2572  get(Opc), leaOutReg);
2573  switch (MIOpc) {
2574  default: llvm_unreachable("Unreachable!");
2575  case X86::SHL16ri: {
2576  unsigned ShAmt = MI->getOperand(2).getImm();
2577  MIB.addReg(0).addImm(1 << ShAmt)
2578  .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0);
2579  break;
2580  }
2581  case X86::INC16r:
2582  addRegOffset(MIB, leaInReg, true, 1);
2583  break;
2584  case X86::DEC16r:
2585  addRegOffset(MIB, leaInReg, true, -1);
2586  break;
2587  case X86::ADD16ri:
2588  case X86::ADD16ri8:
2589  case X86::ADD16ri_DB:
2590  case X86::ADD16ri8_DB:
2591  addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
2592  break;
2593  case X86::ADD16rr:
2594  case X86::ADD16rr_DB: {
2595  unsigned Src2 = MI->getOperand(2).getReg();
2596  bool isKill2 = MI->getOperand(2).isKill();
2597  unsigned leaInReg2 = 0;
2598  MachineInstr *InsMI2 = nullptr;
2599  if (Src == Src2) {
2600  // ADD16rr %reg1028<kill>, %reg1028
2601  // just a single insert_subreg.
2602  addRegReg(MIB, leaInReg, true, leaInReg, false);
2603  } else {
2604  if (Subtarget.is64Bit())
2605  leaInReg2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
2606  else
2607  leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
2608  // Build and insert into an implicit UNDEF value. This is OK because
2609  // well be shifting and then extracting the lower 16-bits.
2610  BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF),leaInReg2);
2611  InsMI2 =
2612  BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
2613  .addReg(leaInReg2, RegState::Define, X86::sub_16bit)
2614  .addReg(Src2, getKillRegState(isKill2));
2615  addRegReg(MIB, leaInReg, true, leaInReg2, true);
2616  }
2617  if (LV && isKill2 && InsMI2)
2618  LV->replaceKillInstruction(Src2, MI, InsMI2);
2619  break;
2620  }
2621  }
2622 
2623  MachineInstr *NewMI = MIB;
2624  MachineInstr *ExtMI =
2625  BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
2626  .addReg(Dest, RegState::Define | getDeadRegState(isDead))
2627  .addReg(leaOutReg, RegState::Kill, X86::sub_16bit);
2628 
2629  if (LV) {
2630  // Update live variables
2631  LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
2632  LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
2633  if (isKill)
2634  LV->replaceKillInstruction(Src, MI, InsMI);
2635  if (isDead)
2636  LV->replaceKillInstruction(Dest, MI, ExtMI);
2637  }
2638 
2639  return ExtMI;
2640 }
2641 
2642 /// This method must be implemented by targets that
2643 /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
2644 /// may be able to convert a two-address instruction into a true
2645 /// three-address instruction on demand. This allows the X86 target (for
2646 /// example) to convert ADD and SHL instructions into LEA instructions if they
2647 /// would require register copies due to two-addressness.
2648 ///
2649 /// This method returns a null pointer if the transformation cannot be
2650 /// performed, otherwise it returns the new instruction.
2651 ///
2652 MachineInstr *
2655  LiveVariables *LV) const {
2656  MachineInstr *MI = MBBI;
2657 
2658  // The following opcodes also sets the condition code register(s). Only
2659  // convert them to equivalent lea if the condition code register def's
2660  // are dead!
2661  if (hasLiveCondCodeDef(MI))
2662  return nullptr;
2663 
2664  MachineFunction &MF = *MI->getParent()->getParent();
2665  // All instructions input are two-addr instructions. Get the known operands.
2666  const MachineOperand &Dest = MI->getOperand(0);
2667  const MachineOperand &Src = MI->getOperand(1);
2668 
2669  MachineInstr *NewMI = nullptr;
2670  // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
2671  // we have better subtarget support, enable the 16-bit LEA generation here.
2672  // 16-bit LEA is also slow on Core2.
2673  bool DisableLEA16 = true;
2674  bool is64Bit = Subtarget.is64Bit();
2675 
2676  unsigned MIOpc = MI->getOpcode();
2677  switch (MIOpc) {
2678  default: return nullptr;
2679  case X86::SHL64ri: {
2680  assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
2681  unsigned ShAmt = getTruncatedShiftCount(MI, 2);
2682  if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
2683 
2684  // LEA can't handle RSP.
2686  !MF.getRegInfo().constrainRegClass(Src.getReg(),
2687  &X86::GR64_NOSPRegClass))
2688  return nullptr;
2689 
2690  NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
2691  .addOperand(Dest)
2692  .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
2693  break;
2694  }
2695  case X86::SHL32ri: {
2696  assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
2697  unsigned ShAmt = getTruncatedShiftCount(MI, 2);
2698  if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
2699 
2700  unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
2701 
2702  // LEA can't handle ESP.
2703  bool isKill, isUndef;
2704  unsigned SrcReg;
2705  MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
2706  if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
2707  SrcReg, isKill, isUndef, ImplicitOp))
2708  return nullptr;
2709 
2710  MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
2711  .addOperand(Dest)
2712  .addReg(0).addImm(1 << ShAmt)
2713  .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
2714  .addImm(0).addReg(0);
2715  if (ImplicitOp.getReg() != 0)
2716  MIB.addOperand(ImplicitOp);
2717  NewMI = MIB;
2718 
2719  break;
2720  }
2721  case X86::SHL16ri: {
2722  assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
2723  unsigned ShAmt = getTruncatedShiftCount(MI, 2);
2724  if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
2725 
2726  if (DisableLEA16)
2727  return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : nullptr;
2728  NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
2729  .addOperand(Dest)
2730  .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
2731  break;
2732  }
2733  case X86::INC64r:
2734  case X86::INC32r: {
2735  assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
2736  unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
2737  : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
2738  bool isKill, isUndef;
2739  unsigned SrcReg;
2740  MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
2741  if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
2742  SrcReg, isKill, isUndef, ImplicitOp))
2743  return nullptr;
2744 
2745  MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
2746  .addOperand(Dest)
2747  .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef));
2748  if (ImplicitOp.getReg() != 0)
2749  MIB.addOperand(ImplicitOp);
2750 
2751  NewMI = addOffset(MIB, 1);
2752  break;
2753  }
2754  case X86::INC16r:
2755  if (DisableLEA16)
2756  return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
2757  : nullptr;
2758  assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
2759  NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
2760  .addOperand(Dest).addOperand(Src), 1);
2761  break;
2762  case X86::DEC64r:
2763  case X86::DEC32r: {
2764  assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
2765  unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
2766  : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
2767 
2768  bool isKill, isUndef;
2769  unsigned SrcReg;
2770  MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
2771  if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
2772  SrcReg, isKill, isUndef, ImplicitOp))
2773  return nullptr;
2774 
2775  MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
2776  .addOperand(Dest)
2777  .addReg(SrcReg, getUndefRegState(isUndef) | getKillRegState(isKill));
2778  if (ImplicitOp.getReg() != 0)
2779  MIB.addOperand(ImplicitOp);
2780 
2781  NewMI = addOffset(MIB, -1);
2782 
2783  break;
2784  }
2785  case X86::DEC16r:
2786  if (DisableLEA16)
2787  return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
2788  : nullptr;
2789  assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
2790  NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
2791  .addOperand(Dest).addOperand(Src), -1);
2792  break;
2793  case X86::ADD64rr:
2794  case X86::ADD64rr_DB:
2795  case X86::ADD32rr:
2796  case X86::ADD32rr_DB: {
2797  assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
2798  unsigned Opc;
2799  if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
2800  Opc = X86::LEA64r;
2801  else
2802  Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
2803 
2804  bool isKill, isUndef;
2805  unsigned SrcReg;
2806  MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
2807  if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
2808  SrcReg, isKill, isUndef, ImplicitOp))
2809  return nullptr;
2810 
2811  const MachineOperand &Src2 = MI->getOperand(2);
2812  bool isKill2, isUndef2;
2813  unsigned SrcReg2;
2814  MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
2815  if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
2816  SrcReg2, isKill2, isUndef2, ImplicitOp2))
2817  return nullptr;
2818 
2819  MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
2820  .addOperand(Dest);
2821  if (ImplicitOp.getReg() != 0)
2822  MIB.addOperand(ImplicitOp);
2823  if (ImplicitOp2.getReg() != 0)
2824  MIB.addOperand(ImplicitOp2);
2825 
2826  NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
2827 
2828  // Preserve undefness of the operands.
2829  NewMI->getOperand(1).setIsUndef(isUndef);
2830  NewMI->getOperand(3).setIsUndef(isUndef2);
2831 
2832  if (LV && Src2.isKill())
2833  LV->replaceKillInstruction(SrcReg2, MI, NewMI);
2834  break;
2835  }
2836  case X86::ADD16rr:
2837  case X86::ADD16rr_DB: {
2838  if (DisableLEA16)
2839  return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
2840  : nullptr;
2841  assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
2842  unsigned Src2 = MI->getOperand(2).getReg();
2843  bool isKill2 = MI->getOperand(2).isKill();
2844  NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
2845  .addOperand(Dest),
2846  Src.getReg(), Src.isKill(), Src2, isKill2);
2847 
2848  // Preserve undefness of the operands.
2849  bool isUndef = MI->getOperand(1).isUndef();
2850  bool isUndef2 = MI->getOperand(2).isUndef();
2851  NewMI->getOperand(1).setIsUndef(isUndef);
2852  NewMI->getOperand(3).setIsUndef(isUndef2);
2853 
2854  if (LV && isKill2)
2855  LV->replaceKillInstruction(Src2, MI, NewMI);
2856  break;
2857  }
2858  case X86::ADD64ri32:
2859  case X86::ADD64ri8:
2860  case X86::ADD64ri32_DB:
2861  case X86::ADD64ri8_DB:
2862  assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
2863  NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
2864  .addOperand(Dest).addOperand(Src),
2865  MI->getOperand(2).getImm());
2866  break;
2867  case X86::ADD32ri:
2868  case X86::ADD32ri8:
2869  case X86::ADD32ri_DB:
2870  case X86::ADD32ri8_DB: {
2871  assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
2872  unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
2873 
2874  bool isKill, isUndef;
2875  unsigned SrcReg;
2876  MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
2877  if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
2878  SrcReg, isKill, isUndef, ImplicitOp))
2879  return nullptr;
2880 
2881  MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
2882  .addOperand(Dest)
2883  .addReg(SrcReg, getUndefRegState(isUndef) | getKillRegState(isKill));
2884  if (ImplicitOp.getReg() != 0)
2885  MIB.addOperand(ImplicitOp);
2886 
2887  NewMI = addOffset(MIB, MI->getOperand(2).getImm());
2888  break;
2889  }
2890  case X86::ADD16ri:
2891  case X86::ADD16ri8:
2892  case X86::ADD16ri_DB:
2893  case X86::ADD16ri8_DB:
2894  if (DisableLEA16)
2895  return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
2896  : nullptr;
2897  assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
2898  NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
2899  .addOperand(Dest).addOperand(Src),
2900  MI->getOperand(2).getImm());
2901  break;
2902  }
2903 
2904  if (!NewMI) return nullptr;
2905 
2906  if (LV) { // Update live variables
2907  if (Src.isKill())
2908  LV->replaceKillInstruction(Src.getReg(), MI, NewMI);
2909  if (Dest.isDead())
2910  LV->replaceKillInstruction(Dest.getReg(), MI, NewMI);
2911  }
2912 
2913  MFI->insert(MBBI, NewMI); // Insert the new inst
2914  return NewMI;
2915 }
2916 
2917 /// We have a few instructions that must be hacked on to commute them.
2918 ///
2919 MachineInstr *
2921  switch (MI->getOpcode()) {
2922  case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
2923  case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
2924  case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
2925  case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
2926  case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
2927  case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)
2928  unsigned Opc;
2929  unsigned Size;
2930  switch (MI->getOpcode()) {
2931  default: llvm_unreachable("Unreachable!");
2932  case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
2933  case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
2934  case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
2935  case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
2936  case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break;
2937  case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
2938  }
2939  unsigned Amt = MI->getOperand(3).getImm();
2940  if (NewMI) {
2941  MachineFunction &MF = *MI->getParent()->getParent();
2942  MI = MF.CloneMachineInstr(MI);
2943  NewMI = false;
2944  }
2945  MI->setDesc(get(Opc));
2946  MI->getOperand(3).setImm(Size-Amt);
2947  return TargetInstrInfo::commuteInstruction(MI, NewMI);
2948  }
2949  case X86::BLENDPDrri:
2950  case X86::BLENDPSrri:
2951  case X86::PBLENDWrri:
2952  case X86::VBLENDPDrri:
2953  case X86::VBLENDPSrri:
2954  case X86::VBLENDPDYrri:
2955  case X86::VBLENDPSYrri:
2956  case X86::VPBLENDDrri:
2957  case X86::VPBLENDWrri:
2958  case X86::VPBLENDDYrri:
2959  case X86::VPBLENDWYrri:{
2960  unsigned Mask;
2961  switch (MI->getOpcode()) {
2962  default: llvm_unreachable("Unreachable!");
2963  case X86::BLENDPDrri: Mask = 0x03; break;
2964  case X86::BLENDPSrri: Mask = 0x0F; break;
2965  case X86::PBLENDWrri: Mask = 0xFF; break;
2966  case X86::VBLENDPDrri: Mask = 0x03; break;
2967  case X86::VBLENDPSrri: Mask = 0x0F; break;
2968  case X86::VBLENDPDYrri: Mask = 0x0F; break;
2969  case X86::VBLENDPSYrri: Mask = 0xFF; break;
2970  case X86::VPBLENDDrri: Mask = 0x0F; break;
2971  case X86::VPBLENDWrri: Mask = 0xFF; break;
2972  case X86::VPBLENDDYrri: Mask = 0xFF; break;
2973  case X86::VPBLENDWYrri: Mask = 0xFF; break;
2974  }
2975  // Only the least significant bits of Imm are used.
2976  unsigned Imm = MI->getOperand(3).getImm() & Mask;
2977  if (NewMI) {
2978  MachineFunction &MF = *MI->getParent()->getParent();
2979  MI = MF.CloneMachineInstr(MI);
2980  NewMI = false;
2981  }
2982  MI->getOperand(3).setImm(Mask ^ Imm);
2983  return TargetInstrInfo::commuteInstruction(MI, NewMI);
2984  }
2985  case X86::PCLMULQDQrr:
2986  case X86::VPCLMULQDQrr:{
2987  // SRC1 64bits = Imm[0] ? SRC1[127:64] : SRC1[63:0]
2988  // SRC2 64bits = Imm[4] ? SRC2[127:64] : SRC2[63:0]
2989  unsigned Imm = MI->getOperand(3).getImm();
2990  unsigned Src1Hi = Imm & 0x01;
2991  unsigned Src2Hi = Imm & 0x10;
2992  if (NewMI) {
2993  MachineFunction &MF = *MI->getParent()->getParent();
2994  MI = MF.CloneMachineInstr(MI);
2995  NewMI = false;
2996  }
2997  MI->getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4));
2998  return TargetInstrInfo::commuteInstruction(MI, NewMI);
2999  }
3000  case X86::CMPPDrri:
3001  case X86::CMPPSrri:
3002  case X86::VCMPPDrri:
3003  case X86::VCMPPSrri:
3004  case X86::VCMPPDYrri:
3005  case X86::VCMPPSYrri: {
3006  // Float comparison can be safely commuted for
3007  // Ordered/Unordered/Equal/NotEqual tests
3008  unsigned Imm = MI->getOperand(3).getImm() & 0x7;
3009  switch (Imm) {
3010  case 0x00: // EQUAL
3011  case 0x03: // UNORDERED
3012  case 0x04: // NOT EQUAL
3013  case 0x07: // ORDERED
3014  if (NewMI) {
3015  MachineFunction &MF = *MI->getParent()->getParent();
3016  MI = MF.CloneMachineInstr(MI);
3017  NewMI = false;
3018  }
3019  return TargetInstrInfo::commuteInstruction(MI, NewMI);
3020  default:
3021  return nullptr;
3022  }
3023  }
3024  case X86::VPCOMBri: case X86::VPCOMUBri:
3025  case X86::VPCOMDri: case X86::VPCOMUDri:
3026  case X86::VPCOMQri: case X86::VPCOMUQri:
3027  case X86::VPCOMWri: case X86::VPCOMUWri: {
3028  // Flip comparison mode immediate (if necessary).
3029  unsigned Imm = MI->getOperand(3).getImm() & 0x7;
3030  switch (Imm) {
3031  case 0x00: Imm = 0x02; break; // LT -> GT
3032  case 0x01: Imm = 0x03; break; // LE -> GE
3033  case 0x02: Imm = 0x00; break; // GT -> LT
3034  case 0x03: Imm = 0x01; break; // GE -> LE
3035  case 0x04: // EQ
3036  case 0x05: // NE
3037  case 0x06: // FALSE
3038  case 0x07: // TRUE
3039  default:
3040  break;
3041  }
3042  if (NewMI) {
3043  MachineFunction &MF = *MI->getParent()->getParent();
3044  MI = MF.CloneMachineInstr(MI);
3045  NewMI = false;
3046  }
3047  MI->getOperand(3).setImm(Imm);
3048  return TargetInstrInfo::commuteInstruction(MI, NewMI);
3049  }
3050  case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
3051  case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
3052  case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
3053  case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
3054  case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr:
3055  case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr:
3056  case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
3057  case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
3058  case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
3059  case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
3060  case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
3061  case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
3062  case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
3063  case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
3064  case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
3065  case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: {
3066  unsigned Opc;
3067  switch (MI->getOpcode()) {
3068  default: llvm_unreachable("Unreachable!");
3069  case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break;
3070  case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break;
3071  case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break;
3072  case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break;
3073  case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break;
3074  case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break;
3075  case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break;
3076  case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break;
3077  case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break;
3078  case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break;
3079  case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break;
3080  case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break;
3081  case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break;
3082  case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break;
3083  case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break;
3084  case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break;
3085  case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break;
3086  case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break;
3087  case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break;
3088  case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break;
3089  case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break;
3090  case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break;
3091  case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break;
3092  case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break;
3093  case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break;
3094  case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break;
3095  case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break;
3096  case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break;
3097  case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break;
3098  case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break;
3099  case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break;
3100  case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break;
3101  case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break;
3102  case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break;
3103  case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break;
3104  case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break;
3105  case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break;
3106  case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break;
3107  case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break;
3108  case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break;
3109  case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break;
3110  case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
3111  case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break;
3112  case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break;
3113  case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break;
3114  case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break;
3115  case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break;
3116  case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break;
3117  }
3118  if (NewMI) {
3119  MachineFunction &MF = *MI->getParent()->getParent();
3120  MI = MF.CloneMachineInstr(MI);
3121  NewMI = false;
3122  }
3123  MI->setDesc(get(Opc));
3124  // Fallthrough intended.
3125  }
3126  default:
3127  return TargetInstrInfo::commuteInstruction(MI, NewMI);
3128  }
3129 }
3130 
3132  unsigned &SrcOpIdx2) const {
3133  switch (MI->getOpcode()) {
3134  case X86::CMPPDrri:
3135  case X86::CMPPSrri:
3136  case X86::VCMPPDrri:
3137  case X86::VCMPPSrri:
3138  case X86::VCMPPDYrri:
3139  case X86::VCMPPSYrri: {
3140  // Float comparison can be safely commuted for
3141  // Ordered/Unordered/Equal/NotEqual tests
3142  unsigned Imm = MI->getOperand(3).getImm() & 0x7;
3143  switch (Imm) {
3144  case 0x00: // EQUAL
3145  case 0x03: // UNORDERED
3146  case 0x04: // NOT EQUAL
3147  case 0x07: // ORDERED
3148  SrcOpIdx1 = 1;
3149  SrcOpIdx2 = 2;
3150  return true;
3151  }
3152  return false;
3153  }
3154  case X86::VFMADDPDr231r:
3155  case X86::VFMADDPSr231r:
3156  case X86::VFMADDSDr231r:
3157  case X86::VFMADDSSr231r:
3158  case X86::VFMSUBPDr231r:
3159  case X86::VFMSUBPSr231r:
3160  case X86::VFMSUBSDr231r:
3161  case X86::VFMSUBSSr231r:
3162  case X86::VFNMADDPDr231r:
3163  case X86::VFNMADDPSr231r:
3164  case X86::VFNMADDSDr231r:
3165  case X86::VFNMADDSSr231r:
3166  case X86::VFNMSUBPDr231r:
3167  case X86::VFNMSUBPSr231r:
3168  case X86::VFNMSUBSDr231r:
3169  case X86::VFNMSUBSSr231r:
3170  case X86::VFMADDPDr231rY:
3171  case X86::VFMADDPSr231rY:
3172  case X86::VFMSUBPDr231rY:
3173  case X86::VFMSUBPSr231rY:
3174  case X86::VFNMADDPDr231rY:
3175  case X86::VFNMADDPSr231rY:
3176  case X86::VFNMSUBPDr231rY:
3177  case X86::VFNMSUBPSr231rY:
3178  SrcOpIdx1 = 2;
3179  SrcOpIdx2 = 3;
3180  return true;
3181  default:
3182  return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
3183  }
3184 }
3185 
3186 static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) {
3187  switch (BrOpc) {
3188  default: return X86::COND_INVALID;
3189  case X86::JE_1: return X86::COND_E;
3190  case X86::JNE_1: return X86::COND_NE;
3191  case X86::JL_1: return X86::COND_L;
3192  case X86::JLE_1: return X86::COND_LE;
3193  case X86::JG_1: return X86::COND_G;
3194  case X86::JGE_1: return X86::COND_GE;
3195  case X86::JB_1: return X86::COND_B;
3196  case X86::JBE_1: return X86::COND_BE;
3197  case X86::JA_1: return X86::COND_A;
3198  case X86::JAE_1: return X86::COND_AE;
3199  case X86::JS_1: return X86::COND_S;
3200  case X86::JNS_1: return X86::COND_NS;
3201  case X86::JP_1: return X86::COND_P;
3202  case X86::JNP_1: return X86::COND_NP;
3203  case X86::JO_1: return X86::COND_O;
3204  case X86::JNO_1: return X86::COND_NO;
3205  }
3206 }
3207 
3208 /// Return condition code of a SET opcode.
3209 static X86::CondCode getCondFromSETOpc(unsigned Opc) {
3210  switch (Opc) {
3211  default: return X86::COND_INVALID;
3212  case X86::SETAr: case X86::SETAm: return X86::COND_A;
3213  case X86::SETAEr: case X86::SETAEm: return X86::COND_AE;
3214  case X86::SETBr: case X86::SETBm: return X86::COND_B;
3215  case X86::SETBEr: case X86::SETBEm: return X86::COND_BE;
3216  case X86::SETEr: case X86::SETEm: return X86::COND_E;
3217  case X86::SETGr: case X86::SETGm: return X86::COND_G;
3218  case X86::SETGEr: case X86::SETGEm: return X86::COND_GE;
3219  case X86::SETLr: case X86::SETLm: return X86::COND_L;
3220  case X86::SETLEr: case X86::SETLEm: return X86::COND_LE;
3221  case X86::SETNEr: case X86::SETNEm: return X86::COND_NE;
3222  case X86::SETNOr: case X86::SETNOm: return X86::COND_NO;
3223  case X86::SETNPr: case X86::SETNPm: return X86::COND_NP;
3224  case X86::SETNSr: case X86::SETNSm: return X86::COND_NS;
3225  case X86::SETOr: case X86::SETOm: return X86::COND_O;
3226  case X86::SETPr: case X86::SETPm: return X86::COND_P;
3227  case X86::SETSr: case X86::SETSm: return X86::COND_S;
3228  }
3229 }
3230 
3231 /// Return condition code of a CMov opcode.
3233  switch (Opc) {
3234  default: return X86::COND_INVALID;
3235  case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm:
3236  case X86::CMOVA32rr: case X86::CMOVA64rm: case X86::CMOVA64rr:
3237  return X86::COND_A;
3238  case X86::CMOVAE16rm: case X86::CMOVAE16rr: case X86::CMOVAE32rm:
3239  case X86::CMOVAE32rr: case X86::CMOVAE64rm: case X86::CMOVAE64rr:
3240  return X86::COND_AE;
3241  case X86::CMOVB16rm: case X86::CMOVB16rr: case X86::CMOVB32rm:
3242  case X86::CMOVB32rr: case X86::CMOVB64rm: case X86::CMOVB64rr:
3243  return X86::COND_B;
3244  case X86::CMOVBE16rm: case X86::CMOVBE16rr: case X86::CMOVBE32rm:
3245  case X86::CMOVBE32rr: case X86::CMOVBE64rm: case X86::CMOVBE64rr:
3246  return X86::COND_BE;
3247  case X86::CMOVE16rm: case X86::CMOVE16rr: case X86::CMOVE32rm:
3248  case X86::CMOVE32rr: case X86::CMOVE64rm: case X86::CMOVE64rr:
3249  return X86::COND_E;
3250  case X86::CMOVG16rm: case X86::CMOVG16rr: case X86::CMOVG32rm:
3251  case X86::CMOVG32rr: case X86::CMOVG64rm: case X86::CMOVG64rr:
3252  return X86::COND_G;
3253  case X86::CMOVGE16rm: case X86::CMOVGE16rr: case X86::CMOVGE32rm:
3254  case X86::CMOVGE32rr: case X86::CMOVGE64rm: case X86::CMOVGE64rr:
3255  return X86::COND_GE;
3256  case X86::CMOVL16rm: case X86::CMOVL16rr: case X86::CMOVL32rm:
3257  case X86::CMOVL32rr: case X86::CMOVL64rm: case X86::CMOVL64rr:
3258  return X86::COND_L;
3259  case X86::CMOVLE16rm: case X86::CMOVLE16rr: case X86::CMOVLE32rm:
3260  case X86::CMOVLE32rr: case X86::CMOVLE64rm: case X86::CMOVLE64rr:
3261  return X86::COND_LE;
3262  case X86::CMOVNE16rm: case X86::CMOVNE16rr: case X86::CMOVNE32rm:
3263  case X86::CMOVNE32rr: case X86::CMOVNE64rm: case X86::CMOVNE64rr:
3264  return X86::COND_NE;
3265  case X86::CMOVNO16rm: case X86::CMOVNO16rr: case X86::CMOVNO32rm:
3266  case X86::CMOVNO32rr: case X86::CMOVNO64rm: case X86::CMOVNO64rr:
3267  return X86::COND_NO;
3268  case X86::CMOVNP16rm: case X86::CMOVNP16rr: case X86::CMOVNP32rm:
3269  case X86::CMOVNP32rr: case X86::CMOVNP64rm: case X86::CMOVNP64rr:
3270  return X86::COND_NP;
3271  case X86::CMOVNS16rm: case X86::CMOVNS16rr: case X86::CMOVNS32rm:
3272  case X86::CMOVNS32rr: case X86::CMOVNS64rm: case X86::CMOVNS64rr:
3273  return X86::COND_NS;
3274  case X86::CMOVO16rm: case X86::CMOVO16rr: case X86::CMOVO32rm:
3275  case X86::CMOVO32rr: case X86::CMOVO64rm: case X86::CMOVO64rr:
3276  return X86::COND_O;
3277  case X86::CMOVP16rm: case X86::CMOVP16rr: case X86::CMOVP32rm:
3278  case X86::CMOVP32rr: case X86::CMOVP64rm: case X86::CMOVP64rr:
3279  return X86::COND_P;
3280  case X86::CMOVS16rm: case X86::CMOVS16rr: case X86::CMOVS32rm:
3281  case X86::CMOVS32rr: case X86::CMOVS64rm: case X86::CMOVS64rr:
3282  return X86::COND_S;
3283  }
3284 }
3285 
3287  switch (CC) {
3288  default: llvm_unreachable("Illegal condition code!");
3289  case X86::COND_E: return X86::JE_1;
3290  case X86::COND_NE: return X86::JNE_1;
3291  case X86::COND_L: return X86::JL_1;
3292  case X86::COND_LE: return X86::JLE_1;
3293  case X86::COND_G: return X86::JG_1;
3294  case X86::COND_GE: return X86::JGE_1;
3295  case X86::COND_B: return X86::JB_1;
3296  case X86::COND_BE: return X86::JBE_1;
3297  case X86::COND_A: return X86::JA_1;
3298  case X86::COND_AE: return X86::JAE_1;
3299  case X86::COND_S: return X86::JS_1;
3300  case X86::COND_NS: return X86::JNS_1;
3301  case X86::COND_P: return X86::JP_1;
3302  case X86::COND_NP: return X86::JNP_1;
3303  case X86::COND_O: return X86::JO_1;
3304  case X86::COND_NO: return X86::JNO_1;
3305  }
3306 }
3307 
3308 /// Return the inverse of the specified condition,
3309 /// e.g. turning COND_E to COND_NE.
3311  switch (CC) {
3312  default: llvm_unreachable("Illegal condition code!");
3313  case X86::COND_E: return X86::COND_NE;
3314  case X86::COND_NE: return X86::COND_E;
3315  case X86::COND_L: return X86::COND_GE;
3316  case X86::COND_LE: return X86::COND_G;
3317  case X86::COND_G: return X86::COND_LE;
3318  case X86::COND_GE: return X86::COND_L;
3319  case X86::COND_B: return X86::COND_AE;
3320  case X86::COND_BE: return X86::COND_A;
3321  case X86::COND_A: return X86::COND_BE;
3322  case X86::COND_AE: return X86::COND_B;
3323  case X86::COND_S: return X86::COND_NS;
3324  case X86::COND_NS: return X86::COND_S;
3325  case X86::COND_P: return X86::COND_NP;
3326  case X86::COND_NP: return X86::COND_P;
3327  case X86::COND_O: return X86::COND_NO;
3328  case X86::COND_NO: return X86::COND_O;
3329  }
3330 }
3331 
3332 /// Assuming the flags are set by MI(a,b), return the condition code if we
3333 /// modify the instructions such that flags are set by MI(b,a).
3335  switch (CC) {
3336  default: return X86::COND_INVALID;
3337  case X86::COND_E: return X86::COND_E;
3338  case X86::COND_NE: return X86::COND_NE;
3339  case X86::COND_L: return X86::COND_G;
3340  case X86::COND_LE: return X86::COND_GE;
3341  case X86::COND_G: return X86::COND_L;
3342  case X86::COND_GE: return X86::COND_LE;
3343  case X86::COND_B: return X86::COND_A;
3344  case X86::COND_BE: return X86::COND_AE;
3345  case X86::COND_A: return X86::COND_B;
3346  case X86::COND_AE: return X86::COND_BE;
3347  }
3348 }
3349 
3350 /// Return a set opcode for the given condition and
3351 /// whether it has memory operand.
3352 unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) {
3353  static const uint16_t Opc[16][2] = {
3354  { X86::SETAr, X86::SETAm },
3355  { X86::SETAEr, X86::SETAEm },
3356  { X86::SETBr, X86::SETBm },
3357  { X86::SETBEr, X86::SETBEm },
3358  { X86::SETEr, X86::SETEm },
3359  { X86::SETGr, X86::SETGm },
3360  { X86::SETGEr, X86::SETGEm },
3361  { X86::SETLr, X86::SETLm },
3362  { X86::SETLEr, X86::SETLEm },
3363  { X86::SETNEr, X86::SETNEm },
3364  { X86::SETNOr, X86::SETNOm },
3365  { X86::SETNPr, X86::SETNPm },
3366  { X86::SETNSr, X86::SETNSm },
3367  { X86::SETOr, X86::SETOm },
3368  { X86::SETPr, X86::SETPm },
3369  { X86::SETSr, X86::SETSm }
3370  };
3371 
3372  assert(CC <= LAST_VALID_COND && "Can only handle standard cond codes");
3373  return Opc[CC][HasMemoryOperand ? 1 : 0];
3374 }
3375 
3376 /// Return a cmov opcode for the given condition,
3377 /// register size in bytes, and operand type.
3378 unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes,
3379  bool HasMemoryOperand) {
3380  static const uint16_t Opc[32][3] = {
3381  { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
3382  { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
3383  { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr },
3384  { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr },
3385  { X86::CMOVE16rr, X86::CMOVE32rr, X86::CMOVE64rr },
3386  { X86::CMOVG16rr, X86::CMOVG32rr, X86::CMOVG64rr },
3387  { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr },
3388  { X86::CMOVL16rr, X86::CMOVL32rr, X86::CMOVL64rr },
3389  { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr },
3390  { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr },
3391  { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr },
3392  { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr },
3393  { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr },
3394  { X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr },
3395  { X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr },
3396  { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr },
3397  { X86::CMOVA16rm, X86::CMOVA32rm, X86::CMOVA64rm },
3398  { X86::CMOVAE16rm, X86::CMOVAE32rm, X86::CMOVAE64rm },
3399  { X86::CMOVB16rm, X86::CMOVB32rm, X86::CMOVB64rm },
3400  { X86::CMOVBE16rm, X86::CMOVBE32rm, X86::CMOVBE64rm },
3401  { X86::CMOVE16rm, X86::CMOVE32rm, X86::CMOVE64rm },
3402  { X86::CMOVG16rm, X86::CMOVG32rm, X86::CMOVG64rm },
3403  { X86::CMOVGE16rm, X86::CMOVGE32rm, X86::CMOVGE64rm },
3404  { X86::CMOVL16rm, X86::CMOVL32rm, X86::CMOVL64rm },
3405  { X86::CMOVLE16rm, X86::CMOVLE32rm, X86::CMOVLE64rm },
3406  { X86::CMOVNE16rm, X86::CMOVNE32rm, X86::CMOVNE64rm },
3407  { X86::CMOVNO16rm, X86::CMOVNO32rm, X86::CMOVNO64rm },
3408  { X86::CMOVNP16rm, X86::CMOVNP32rm, X86::CMOVNP64rm },
3409  { X86::CMOVNS16rm, X86::CMOVNS32rm, X86::CMOVNS64rm },
3410  { X86::CMOVO16rm, X86::CMOVO32rm, X86::CMOVO64rm },
3411  { X86::CMOVP16rm, X86::CMOVP32rm, X86::CMOVP64rm },
3412  { X86::CMOVS16rm, X86::CMOVS32rm, X86::CMOVS64rm }
3413  };
3414 
3415  assert(CC < 16 && "Can only handle standard cond codes");
3416  unsigned Idx = HasMemoryOperand ? 16+CC : CC;
3417  switch(RegBytes) {
3418  default: llvm_unreachable("Illegal register size!");
3419  case 2: return Opc[Idx][0];
3420  case 4: return Opc[Idx][1];
3421  case 8: return Opc[Idx][2];
3422  }
3423 }
3424 
3426  if (!MI->isTerminator()) return false;
3427 
3428  // Conditional branch is a special case.
3429  if (MI->isBranch() && !MI->isBarrier())
3430  return true;
3431  if (!MI->isPredicable())
3432  return true;
3433  return !isPredicated(MI);
3434 }
3435 
3436 bool X86InstrInfo::AnalyzeBranchImpl(
3439  SmallVectorImpl<MachineInstr *> &CondBranches, bool AllowModify) const {
3440 
3441  // Start from the bottom of the block and work up, examining the
3442  // terminator instructions.
3444  MachineBasicBlock::iterator UnCondBrIter = MBB.end();
3445  while (I != MBB.begin()) {
3446  --I;
3447  if (I->isDebugValue())
3448  continue;
3449 
3450  // Working from the bottom, when we see a non-terminator instruction, we're
3451  // done.
3452  if (!isUnpredicatedTerminator(I))
3453  break;
3454 
3455  // A terminator that isn't a branch can't easily be handled by this
3456  // analysis.
3457  if (!I->isBranch())
3458  return true;
3459 
3460  // Handle unconditional branches.
3461  if (I->getOpcode() == X86::JMP_1) {
3462  UnCondBrIter = I;
3463 
3464  if (!AllowModify) {
3465  TBB = I->getOperand(0).getMBB();
3466  continue;
3467  }
3468 
3469  // If the block has any instructions after a JMP, delete them.
3470  while (std::next(I) != MBB.end())
3471  std::next(I)->eraseFromParent();
3472 
3473  Cond.clear();
3474  FBB = nullptr;
3475 
3476  // Delete the JMP if it's equivalent to a fall-through.
3477  if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
3478  TBB = nullptr;
3479  I->eraseFromParent();
3480  I = MBB.end();
3481  UnCondBrIter = MBB.end();
3482  continue;
3483  }
3484 
3485  // TBB is used to indicate the unconditional destination.
3486  TBB = I->getOperand(0).getMBB();
3487  continue;
3488  }
3489 
3490  // Handle conditional branches.
3491  X86::CondCode BranchCode = getCondFromBranchOpc(I->getOpcode());
3492  if (BranchCode == X86::COND_INVALID)
3493  return true; // Can't handle indirect branch.
3494 
3495  // Working from the bottom, handle the first conditional branch.
3496  if (Cond.empty()) {
3497  MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
3498  if (AllowModify && UnCondBrIter != MBB.end() &&
3499  MBB.isLayoutSuccessor(TargetBB)) {
3500  // If we can modify the code and it ends in something like:
3501  //
3502  // jCC L1
3503  // jmp L2
3504  // L1:
3505  // ...
3506  // L2:
3507  //
3508  // Then we can change this to:
3509  //
3510  // jnCC L2
3511  // L1:
3512  // ...
3513  // L2:
3514  //
3515  // Which is a bit more efficient.
3516  // We conditionally jump to the fall-through block.
3517  BranchCode = GetOppositeBranchCondition(BranchCode);
3518  unsigned JNCC = GetCondBranchFromCond(BranchCode);
3519  MachineBasicBlock::iterator OldInst = I;
3520 
3521  BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
3522  .addMBB(UnCondBrIter->getOperand(0).getMBB());
3523  BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_1))
3524  .addMBB(TargetBB);
3525 
3526  OldInst->eraseFromParent();
3527  UnCondBrIter->eraseFromParent();
3528 
3529  // Restart the analysis.
3530  UnCondBrIter = MBB.end();
3531  I = MBB.end();
3532  continue;
3533  }
3534 
3535  FBB = TBB;
3536  TBB = I->getOperand(0).getMBB();
3537  Cond.push_back(MachineOperand::CreateImm(BranchCode));
3538  CondBranches.push_back(I);
3539  continue;
3540  }
3541 
3542  // Handle subsequent conditional branches. Only handle the case where all
3543  // conditional branches branch to the same destination and their condition
3544  // opcodes fit one of the special multi-branch idioms.
3545  assert(Cond.size() == 1);
3546  assert(TBB);
3547 
3548  // Only handle the case where all conditional branches branch to the same
3549  // destination.
3550  if (TBB != I->getOperand(0).getMBB())
3551  return true;
3552 
3553  // If the conditions are the same, we can leave them alone.
3554  X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
3555  if (OldBranchCode == BranchCode)
3556  continue;
3557 
3558  // If they differ, see if they fit one of the known patterns. Theoretically,
3559  // we could handle more patterns here, but we shouldn't expect to see them
3560  // if instruction selection has done a reasonable job.
3561  if ((OldBranchCode == X86::COND_NP &&
3562  BranchCode == X86::COND_E) ||
3563  (OldBranchCode == X86::COND_E &&
3564  BranchCode == X86::COND_NP))
3565  BranchCode = X86::COND_NP_OR_E;
3566  else if ((OldBranchCode == X86::COND_P &&
3567  BranchCode == X86::COND_NE) ||
3568  (OldBranchCode == X86::COND_NE &&
3569  BranchCode == X86::COND_P))
3570  BranchCode = X86::COND_NE_OR_P;
3571  else
3572  return true;
3573 
3574  // Update the MachineOperand.
3575  Cond[0].setImm(BranchCode);
3576  CondBranches.push_back(I);
3577  }
3578 
3579  return false;
3580 }
3581 
3583  MachineBasicBlock *&TBB,
3584  MachineBasicBlock *&FBB,
3586  bool AllowModify) const {
3587  SmallVector<MachineInstr *, 4> CondBranches;
3588  return AnalyzeBranchImpl(MBB, TBB, FBB, Cond, CondBranches, AllowModify);
3589 }
3590 
3592  MachineBranchPredicate &MBP,
3593  bool AllowModify) const {
3594  using namespace std::placeholders;
3595 
3597  SmallVector<MachineInstr *, 4> CondBranches;
3598  if (AnalyzeBranchImpl(MBB, MBP.TrueDest, MBP.FalseDest, Cond, CondBranches,
3599  AllowModify))
3600  return true;
3601 
3602  if (Cond.size() != 1)
3603  return true;
3604 
3605  assert(MBP.TrueDest && "expected!");
3606 
3607  if (!MBP.FalseDest)
3608  MBP.FalseDest = MBB.getNextNode();
3609 
3610  const TargetRegisterInfo *TRI = &getRegisterInfo();
3611 
3612  MachineInstr *ConditionDef = nullptr;
3613  bool SingleUseCondition = true;
3614 
3615  for (auto I = std::next(MBB.rbegin()), E = MBB.rend(); I != E; ++I) {
3616  if (I->modifiesRegister(X86::EFLAGS, TRI)) {
3617  ConditionDef = &*I;
3618  break;
3619  }
3620 
3621  if (I->readsRegister(X86::EFLAGS, TRI))
3622  SingleUseCondition = false;
3623  }
3624 
3625  if (!ConditionDef)
3626  return true;
3627 
3628  if (SingleUseCondition) {
3629  for (auto *Succ : MBB.successors())
3630  if (Succ->isLiveIn(X86::EFLAGS))
3631  SingleUseCondition = false;
3632  }
3633 
3634  MBP.ConditionDef = ConditionDef;
3635  MBP.SingleUseCondition = SingleUseCondition;
3636 
3637  // Currently we only recognize the simple pattern:
3638  //
3639  // test %reg, %reg
3640  // je %label
3641  //
3642  const unsigned TestOpcode =
3643  Subtarget.is64Bit() ? X86::TEST64rr : X86::TEST32rr;
3644 
3645  if (ConditionDef->getOpcode() == TestOpcode &&
3646  ConditionDef->getNumOperands() == 3 &&
3647  ConditionDef->getOperand(0).isIdenticalTo(ConditionDef->getOperand(1)) &&
3648  (Cond[0].getImm() == X86::COND_NE || Cond[0].getImm() == X86::COND_E)) {
3649  MBP.LHS = ConditionDef->getOperand(0);
3650  MBP.RHS = MachineOperand::CreateImm(0);
3651  MBP.Predicate = Cond[0].getImm() == X86::COND_NE
3654  return false;
3655  }
3656 
3657  return true;
3658 }
3659 
3661  MachineBasicBlock::iterator I = MBB.end();
3662  unsigned Count = 0;
3663 
3664  while (I != MBB.begin()) {
3665  --I;
3666  if (I->isDebugValue())
3667  continue;
3668  if (I->getOpcode() != X86::JMP_1 &&
3669  getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
3670  break;
3671  // Remove the branch.
3672  I->eraseFromParent();
3673  I = MBB.end();
3674  ++Count;
3675  }
3676 
3677  return Count;
3678 }
3679 
3680 unsigned
3683  DebugLoc DL) const {
3684  // Shouldn't be a fall through.
3685  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
3686  assert((Cond.size() == 1 || Cond.size() == 0) &&
3687  "X86 branch conditions have one component!");
3688 
3689  if (Cond.empty()) {
3690  // Unconditional branch?
3691  assert(!FBB && "Unconditional branch with multiple successors!");
3692  BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(TBB);
3693  return 1;
3694  }
3695 
3696  // Conditional branch.
3697  unsigned Count = 0;
3698  X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
3699  switch (CC) {
3700  case X86::COND_NP_OR_E:
3701  // Synthesize NP_OR_E with two branches.
3702  BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(TBB);
3703  ++Count;
3704  BuildMI(&MBB, DL, get(X86::JE_1)).addMBB(TBB);
3705  ++Count;
3706  break;
3707  case X86::COND_NE_OR_P:
3708  // Synthesize NE_OR_P with two branches.
3709  BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(TBB);
3710  ++Count;
3711  BuildMI(&MBB, DL, get(X86::JP_1)).addMBB(TBB);
3712  ++Count;
3713  break;
3714  default: {
3715  unsigned Opc = GetCondBranchFromCond(CC);
3716  BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
3717  ++Count;
3718  }
3719  }
3720  if (FBB) {
3721  // Two-way Conditional branch. Insert the second branch.
3722  BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB);
3723  ++Count;
3724  }
3725  return Count;
3726 }
3727 
3728 bool X86InstrInfo::
3731  unsigned TrueReg, unsigned FalseReg,
3732  int &CondCycles, int &TrueCycles, int &FalseCycles) const {
3733  // Not all subtargets have cmov instructions.
3734  if (!Subtarget.hasCMov())
3735  return false;
3736  if (Cond.size() != 1)
3737  return false;
3738  // We cannot do the composite conditions, at least not in SSA form.
3739  if ((X86::CondCode)Cond[0].getImm() > X86::COND_S)
3740  return false;
3741 
3742  // Check register classes.
3743  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3744  const TargetRegisterClass *RC =
3745  RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
3746  if (!RC)
3747  return false;
3748 
3749  // We have cmov instructions for 16, 32, and 64 bit general purpose registers.
3750  if (X86::GR16RegClass.hasSubClassEq(RC) ||
3751  X86::GR32RegClass.hasSubClassEq(RC) ||
3752  X86::GR64RegClass.hasSubClassEq(RC)) {
3753  // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy
3754  // Bridge. Probably Ivy Bridge as well.
3755  CondCycles = 2;
3756  TrueCycles = 2;
3757  FalseCycles = 2;
3758  return true;
3759  }
3760 
3761  // Can't do vectors.
3762  return false;
3763 }
3764 
3767  unsigned DstReg, ArrayRef<MachineOperand> Cond,
3768  unsigned TrueReg, unsigned FalseReg) const {
3769  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3770  assert(Cond.size() == 1 && "Invalid Cond array");
3771  unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(),
3772  MRI.getRegClass(DstReg)->getSize(),
3773  false/*HasMemoryOperand*/);
3774  BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg);
3775 }
3776 
3777 /// Test if the given register is a physical h register.
3778 static bool isHReg(unsigned Reg) {
3779  return X86::GR8_ABCD_HRegClass.contains(Reg);
3780 }
3781 
3782 // Try and copy between VR128/VR64 and GR64 registers.
3783 static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
3784  const X86Subtarget &Subtarget) {
3785 
3786  // SrcReg(VR128) -> DestReg(GR64)
3787  // SrcReg(VR64) -> DestReg(GR64)
3788  // SrcReg(GR64) -> DestReg(VR128)
3789  // SrcReg(GR64) -> DestReg(VR64)
3790 
3791  bool HasAVX = Subtarget.hasAVX();
3792  bool HasAVX512 = Subtarget.hasAVX512();
3793  if (X86::GR64RegClass.contains(DestReg)) {
3794  if (X86::VR128XRegClass.contains(SrcReg))
3795  // Copy from a VR128 register to a GR64 register.
3796  return HasAVX512 ? X86::VMOVPQIto64Zrr: (HasAVX ? X86::VMOVPQIto64rr :
3797  X86::MOVPQIto64rr);
3798  if (X86::VR64RegClass.contains(SrcReg))
3799  // Copy from a VR64 register to a GR64 register.
3800  return X86::MMX_MOVD64from64rr;
3801  } else if (X86::GR64RegClass.contains(SrcReg)) {
3802  // Copy from a GR64 register to a VR128 register.
3803  if (X86::VR128XRegClass.contains(DestReg))
3804  return HasAVX512 ? X86::VMOV64toPQIZrr: (HasAVX ? X86::VMOV64toPQIrr :
3805  X86::MOV64toPQIrr);
3806  // Copy from a GR64 register to a VR64 register.
3807  if (X86::VR64RegClass.contains(DestReg))
3808  return X86::MMX_MOVD64to64rr;
3809  }
3810 
3811  // SrcReg(FR32) -> DestReg(GR32)
3812  // SrcReg(GR32) -> DestReg(FR32)
3813 
3814  if (X86::GR32RegClass.contains(DestReg) && X86::FR32XRegClass.contains(SrcReg))
3815  // Copy from a FR32 register to a GR32 register.
3816  return HasAVX512 ? X86::VMOVSS2DIZrr : (HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr);
3817 
3818  if (X86::FR32XRegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg))
3819  // Copy from a GR32 register to a FR32 register.
3820  return HasAVX512 ? X86::VMOVDI2SSZrr : (HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr);
3821  return 0;
3822 }
3823 
3824 inline static bool MaskRegClassContains(unsigned Reg) {
3825  return X86::VK8RegClass.contains(Reg) ||
3826  X86::VK16RegClass.contains(Reg) ||
3827  X86::VK32RegClass.contains(Reg) ||
3828  X86::VK64RegClass.contains(Reg) ||
3829  X86::VK1RegClass.contains(Reg);
3830 }
3831 static
3832 unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
3833  if (X86::VR128XRegClass.contains(DestReg, SrcReg) ||
3834  X86::VR256XRegClass.contains(DestReg, SrcReg) ||
3835  X86::VR512RegClass.contains(DestReg, SrcReg)) {
3836  DestReg = get512BitSuperRegister(DestReg);
3837  SrcReg = get512BitSuperRegister(SrcReg);
3838  return X86::VMOVAPSZrr;
3839  }
3840  if (MaskRegClassContains(DestReg) &&
3841  MaskRegClassContains(SrcReg))
3842  return X86::KMOVWkk;
3843  if (MaskRegClassContains(DestReg) &&
3844  (X86::GR32RegClass.contains(SrcReg) ||
3845  X86::GR16RegClass.contains(SrcReg) ||
3846  X86::GR8RegClass.contains(SrcReg))) {
3847  SrcReg = getX86SubSuperRegister(SrcReg, MVT::i32);
3848  return X86::KMOVWkr;
3849  }
3850  if ((X86::GR32RegClass.contains(DestReg) ||
3851  X86::GR16RegClass.contains(DestReg) ||
3852  X86::GR8RegClass.contains(DestReg)) &&
3853  MaskRegClassContains(SrcReg)) {
3854  DestReg = getX86SubSuperRegister(DestReg, MVT::i32);
3855  return X86::KMOVWrk;
3856  }
3857  return 0;
3858 }
3859 
3862  unsigned DestReg, unsigned SrcReg,
3863  bool KillSrc) const {
3864  // First deal with the normal symmetric copies.
3865  bool HasAVX = Subtarget.hasAVX();
3866  bool HasAVX512 = Subtarget.hasAVX512();
3867  unsigned Opc = 0;
3868  if (X86::GR64RegClass.contains(DestReg, SrcReg))
3869  Opc = X86::MOV64rr;
3870  else if (X86::GR32RegClass.contains(DestReg, SrcReg))
3871  Opc = X86::MOV32rr;
3872  else if (X86::GR16RegClass.contains(DestReg, SrcReg))
3873  Opc = X86::MOV16rr;
3874  else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
3875  // Copying to or from a physical H register on x86-64 requires a NOREX
3876  // move. Otherwise use a normal move.
3877  if ((isHReg(DestReg) || isHReg(SrcReg)) &&
3878  Subtarget.is64Bit()) {
3879  Opc = X86::MOV8rr_NOREX;
3880  // Both operands must be encodable without an REX prefix.
3881  assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) &&
3882  "8-bit H register can not be copied outside GR8_NOREX");
3883  } else
3884  Opc = X86::MOV8rr;
3885  }
3886  else if (X86::VR64RegClass.contains(DestReg, SrcReg))
3887  Opc = X86::MMX_MOVQ64rr;
3888  else if (HasAVX512)
3889  Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg);
3890  else if (X86::VR128RegClass.contains(DestReg, SrcReg))
3891  Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
3892  else if (X86::VR256RegClass.contains(DestReg, SrcReg))
3893  Opc = X86::VMOVAPSYrr;
3894  if (!Opc)
3895  Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
3896 
3897  if (Opc) {
3898  BuildMI(MBB, MI, DL, get(Opc), DestReg)
3899  .addReg(SrcReg, getKillRegState(KillSrc));
3900  return;
3901  }
3902 
3903  // Moving EFLAGS to / from another register requires a push and a pop.
3904  // Notice that we have to adjust the stack if we don't want to clobber the
3905  // first frame index. See X86FrameLowering.cpp - clobbersTheStack.
3906  if (SrcReg == X86::EFLAGS) {
3907  if (X86::GR64RegClass.contains(DestReg)) {
3908  BuildMI(MBB, MI, DL, get(X86::PUSHF64));
3909  BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
3910  return;
3911  }
3912  if (X86::GR32RegClass.contains(DestReg)) {
3913  BuildMI(MBB, MI, DL, get(X86::PUSHF32));
3914  BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
3915  return;
3916  }
3917  }
3918  if (DestReg == X86::EFLAGS) {
3919  if (X86::GR64RegClass.contains(SrcReg)) {
3920  BuildMI(MBB, MI, DL, get(X86::PUSH64r))
3921  .addReg(SrcReg, getKillRegState(KillSrc));
3922  BuildMI(MBB, MI, DL, get(X86::POPF64));
3923  return;
3924  }
3925  if (X86::GR32RegClass.contains(SrcReg)) {
3926  BuildMI(MBB, MI, DL, get(X86::PUSH32r))
3927  .addReg(SrcReg, getKillRegState(KillSrc));
3928  BuildMI(MBB, MI, DL, get(X86::POPF32));
3929  return;
3930  }
3931  }
3932 
3933  DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
3934  << " to " << RI.getName(DestReg) << '\n');
3935  llvm_unreachable("Cannot emit physreg copy instruction");
3936 }
3937 
3938 static unsigned getLoadStoreRegOpcode(unsigned Reg,
3939  const TargetRegisterClass *RC,
3940  bool isStackAligned,
3941  const X86Subtarget &STI,
3942  bool load) {
3943  if (STI.hasAVX512()) {
3944  if (X86::VK8RegClass.hasSubClassEq(RC) ||
3945  X86::VK16RegClass.hasSubClassEq(RC))
3946  return load ? X86::KMOVWkm : X86::KMOVWmk;
3947  if (RC->getSize() == 4 && X86::FR32XRegClass.hasSubClassEq(RC))
3948  return load ? X86::VMOVSSZrm : X86::VMOVSSZmr;
3949  if (RC->getSize() == 8 && X86::FR64XRegClass.hasSubClassEq(RC))
3950  return load ? X86::VMOVSDZrm : X86::VMOVSDZmr;
3951  if (X86::VR512RegClass.hasSubClassEq(RC))
3952  return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
3953  }
3954 
3955  bool HasAVX = STI.hasAVX();
3956  switch (RC->getSize()) {
3957  default:
3958  llvm_unreachable("Unknown spill size");
3959  case 1:
3960  assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass");
3961  if (STI.is64Bit())
3962  // Copying to or from a physical H register on x86-64 requires a NOREX
3963  // move. Otherwise use a normal move.
3964  if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
3965  return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
3966  return load ? X86::MOV8rm : X86::MOV8mr;
3967  case 2:
3968  assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
3969  return load ? X86::MOV16rm : X86::MOV16mr;
3970  case 4:
3971  if (X86::GR32RegClass.hasSubClassEq(RC))
3972  return load ? X86::MOV32rm : X86::MOV32mr;
3973  if (X86::FR32RegClass.hasSubClassEq(RC))
3974  return load ?
3975  (HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) :
3976  (HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
3977  if (X86::RFP32RegClass.hasSubClassEq(RC))
3978  return load ? X86::LD_Fp32m : X86::ST_Fp32m;
3979  llvm_unreachable("Unknown 4-byte regclass");
3980  case 8:
3981  if (X86::GR64RegClass.hasSubClassEq(RC))
3982  return load ? X86::MOV64rm : X86::MOV64mr;
3983  if (X86::FR64RegClass.hasSubClassEq(RC))
3984  return load ?
3985  (HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) :
3986  (HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
3987  if (X86::VR64RegClass.hasSubClassEq(RC))
3988  return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
3989  if (X86::RFP64RegClass.hasSubClassEq(RC))
3990  return load ? X86::LD_Fp64m : X86::ST_Fp64m;
3991  llvm_unreachable("Unknown 8-byte regclass");
3992  case 10:
3993  assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
3994  return load ? X86::LD_Fp80m : X86::ST_FpP80m;
3995  case 16: {
3996  assert((X86::VR128RegClass.hasSubClassEq(RC) ||
3997  X86::VR128XRegClass.hasSubClassEq(RC))&& "Unknown 16-byte regclass");
3998  // If stack is realigned we can use aligned stores.
3999  if (isStackAligned)
4000  return load ?
4001  (HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) :
4002  (HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
4003  else
4004  return load ?
4005  (HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) :
4006  (HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
4007  }
4008  case 32:
4009  assert((X86::VR256RegClass.hasSubClassEq(RC) ||
4010  X86::VR256XRegClass.hasSubClassEq(RC)) && "Unknown 32-byte regclass");
4011  // If stack is realigned we can use aligned stores.
4012  if (isStackAligned)
4013  return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr;
4014  else
4015  return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr;
4016  case 64:
4017  assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass");
4018  if (isStackAligned)
4019  return load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
4020  else
4021  return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
4022  }
4023 }
4024 
4025 bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr *MemOp, unsigned &BaseReg,
4026  unsigned &Offset,
4027  const TargetRegisterInfo *TRI) const {
4028  const MCInstrDesc &Desc = MemOp->getDesc();
4029  int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags, MemOp->getOpcode());
4030  if (MemRefBegin < 0)
4031  return false;
4032 
4033  MemRefBegin += X86II::getOperandBias(Desc);
4034 
4035  BaseReg = MemOp->getOperand(MemRefBegin + X86::AddrBaseReg).getReg();
4036  if (MemOp->getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1)
4037  return false;
4038 
4039  if (MemOp->getOperand(MemRefBegin + X86::AddrIndexReg).getReg() !=
4040  X86::NoRegister)
4041  return false;
4042 
4043  const MachineOperand &DispMO = MemOp->getOperand(MemRefBegin + X86::AddrDisp);
4044 
4045  // Displacement can be symbolic
4046  if (!DispMO.isImm())
4047  return false;
4048 
4049  Offset = DispMO.getImm();
4050 
4051  return (MemOp->getOperand(MemRefBegin + X86::AddrIndexReg).getReg() ==
4052  X86::NoRegister);
4053 }
4054 
4055 static unsigned getStoreRegOpcode(unsigned SrcReg,
4056  const TargetRegisterClass *RC,
4057  bool isStackAligned,
4058  const X86Subtarget &STI) {
4059  return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, STI, false);
4060 }
4061 
4062 
4063 static unsigned getLoadRegOpcode(unsigned DestReg,
4064  const TargetRegisterClass *RC,
4065  bool isStackAligned,
4066  const X86Subtarget &STI) {
4067  return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, STI, true);
4068 }
4069 
4072  unsigned SrcReg, bool isKill, int FrameIdx,
4073  const TargetRegisterClass *RC,
4074  const TargetRegisterInfo *TRI) const {
4075  const MachineFunction &MF = *MBB.getParent();
4076  assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
4077  "Stack slot too small for store");
4078  unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
4079  bool isAligned =
4080  (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
4081  RI.canRealignStack(MF);
4082  unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
4083  DebugLoc DL = MBB.findDebugLoc(MI);
4084  addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
4085  .addReg(SrcReg, getKillRegState(isKill));
4086 }
4087 
4089  bool isKill,
4091  const TargetRegisterClass *RC,
4092  MachineInstr::mmo_iterator MMOBegin,
4094  SmallVectorImpl<MachineInstr*> &NewMIs) const {
4095  unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
4096  bool isAligned = MMOBegin != MMOEnd &&
4097  (*MMOBegin)->getAlignment() >= Alignment;
4098  unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
4099  DebugLoc DL;
4100  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
4101  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
4102  MIB.addOperand(Addr[i]);
4103  MIB.addReg(SrcReg, getKillRegState(isKill));
4104  (*MIB).setMemRefs(MMOBegin, MMOEnd);
4105  NewMIs.push_back(MIB);
4106 }
4107 
4108 
4111  unsigned DestReg, int FrameIdx,
4112  const TargetRegisterClass *RC,
4113  const TargetRegisterInfo *TRI) const {
4114  const MachineFunction &MF = *MBB.getParent();
4115  unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
4116  bool isAligned =
4117  (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
4118  RI.canRealignStack(MF);
4119  unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
4120  DebugLoc DL = MBB.findDebugLoc(MI);
4121  addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
4122 }
4123 
4126  const TargetRegisterClass *RC,
4127  MachineInstr::mmo_iterator MMOBegin,
4129  SmallVectorImpl<MachineInstr*> &NewMIs) const {
4130  unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
4131  bool isAligned = MMOBegin != MMOEnd &&
4132  (*MMOBegin)->getAlignment() >= Alignment;
4133  unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
4134  DebugLoc DL;
4135  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
4136  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
4137  MIB.addOperand(Addr[i]);
4138  (*MIB).setMemRefs(MMOBegin, MMOEnd);
4139  NewMIs.push_back(MIB);
4140 }
4141 
4142 bool X86InstrInfo::
4143 analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
4144  int &CmpMask, int &CmpValue) const {
4145  switch (MI->getOpcode()) {
4146  default: break;
4147  case X86::CMP64ri32:
4148  case X86::CMP64ri8:
4149  case X86::CMP32ri:
4150  case X86::CMP32ri8:
4151  case X86::CMP16ri:
4152  case X86::CMP16ri8:
4153  case X86::CMP8ri:
4154  SrcReg = MI->getOperand(0).getReg();
4155  SrcReg2 = 0;
4156  CmpMask = ~0;
4157  CmpValue = MI->getOperand(1).getImm();
4158  return true;
4159  // A SUB can be used to perform comparison.
4160  case X86::SUB64rm:
4161  case X86::SUB32rm:
4162  case X86::SUB16rm:
4163  case X86::SUB8rm:
4164  SrcReg = MI->getOperand(1).getReg();
4165  SrcReg2 = 0;
4166  CmpMask = ~0;
4167  CmpValue = 0;
4168  return true;
4169  case X86::SUB64rr:
4170  case X86::SUB32rr:
4171  case X86::SUB16rr:
4172  case X86::SUB8rr:
4173  SrcReg = MI->getOperand(1).getReg();
4174  SrcReg2 = MI->getOperand(2).getReg();
4175  CmpMask = ~0;
4176  CmpValue = 0;
4177  return true;
4178  case X86::SUB64ri32:
4179  case X86::SUB64ri8:
4180  case X86::SUB32ri:
4181  case X86::SUB32ri8:
4182  case X86::SUB16ri:
4183  case X86::SUB16ri8:
4184  case X86::SUB8ri:
4185  SrcReg = MI->getOperand(1).getReg();
4186  SrcReg2 = 0;
4187  CmpMask = ~0;
4188  CmpValue = MI->getOperand(2).getImm();
4189  return true;
4190  case X86::CMP64rr:
4191  case X86::CMP32rr:
4192  case X86::CMP16rr:
4193  case X86::CMP8rr:
4194  SrcReg = MI->getOperand(0).getReg();
4195  SrcReg2 = MI->getOperand(1).getReg();
4196  CmpMask = ~0;
4197  CmpValue = 0;
4198  return true;
4199  case X86::TEST8rr:
4200  case X86::TEST16rr:
4201  case X86::TEST32rr:
4202  case X86::TEST64rr:
4203  SrcReg = MI->getOperand(0).getReg();
4204  if (MI->getOperand(1).getReg() != SrcReg) return false;
4205  // Compare against zero.
4206  SrcReg2 = 0;
4207  CmpMask = ~0;
4208  CmpValue = 0;
4209  return true;
4210  }
4211  return false;
4212 }
4213 
4214 /// Check whether the first instruction, whose only
4215 /// purpose is to update flags, can be made redundant.
4216 /// CMPrr can be made redundant by SUBrr if the operands are the same.
4217 /// This function can be extended later on.
4218 /// SrcReg, SrcRegs: register operands for FlagI.
4219 /// ImmValue: immediate for FlagI if it takes an immediate.
4220 inline static bool isRedundantFlagInstr(MachineInstr *FlagI, unsigned SrcReg,
4221  unsigned SrcReg2, int ImmValue,
4222  MachineInstr *OI) {
4223  if (((FlagI->getOpcode() == X86::CMP64rr &&
4224  OI->getOpcode() == X86::SUB64rr) ||
4225  (FlagI->getOpcode() == X86::CMP32rr &&
4226  OI->getOpcode() == X86::SUB32rr)||
4227  (FlagI->getOpcode() == X86::CMP16rr &&
4228  OI->getOpcode() == X86::SUB16rr)||
4229  (FlagI->getOpcode() == X86::CMP8rr &&
4230  OI->getOpcode() == X86::SUB8rr)) &&
4231  ((OI->getOperand(1).getReg() == SrcReg &&
4232  OI->getOperand(2).getReg() == SrcReg2) ||
4233  (OI->getOperand(1).getReg() == SrcReg2 &&
4234  OI->getOperand(2).getReg() == SrcReg)))
4235  return true;
4236 
4237  if (((FlagI->getOpcode() == X86::CMP64ri32 &&
4238  OI->getOpcode() == X86::SUB64ri32) ||
4239  (FlagI->getOpcode() == X86::CMP64ri8 &&
4240  OI->getOpcode() == X86::SUB64ri8) ||
4241  (FlagI->getOpcode() == X86::CMP32ri &&
4242  OI->getOpcode() == X86::SUB32ri) ||
4243  (FlagI->getOpcode() == X86::CMP32ri8 &&
4244  OI->getOpcode() == X86::SUB32ri8) ||
4245  (FlagI->getOpcode() == X86::CMP16ri &&
4246  OI->getOpcode() == X86::SUB16ri) ||
4247  (FlagI->getOpcode() == X86::CMP16ri8 &&
4248  OI->getOpcode() == X86::SUB16ri8) ||
4249  (FlagI->getOpcode() == X86::CMP8ri &&
4250  OI->getOpcode() == X86::SUB8ri)) &&
4251  OI->getOperand(1).getReg() == SrcReg &&
4252  OI->getOperand(2).getImm() == ImmValue)
4253  return true;
4254  return false;
4255 }
4256 
4257 /// Check whether the definition can be converted
4258 /// to remove a comparison against zero.
4259 inline static bool isDefConvertible(MachineInstr *MI) {
4260  switch (MI->getOpcode()) {
4261  default: return false;
4262 
4263  // The shift instructions only modify ZF if their shift count is non-zero.
4264  // N.B.: The processor truncates the shift count depending on the encoding.
4265  case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri:case X86::SAR64ri:
4266  case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri:case X86::SHR64ri:
4267  return getTruncatedShiftCount(MI, 2) != 0;
4268 
4269  // Some left shift instructions can be turned into LEA instructions but only
4270  // if their flags aren't used. Avoid transforming such instructions.
4271  case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri:case X86::SHL64ri:{
4272  unsigned ShAmt = getTruncatedShiftCount(MI, 2);
4273  if (isTruncatedShiftCountForLEA(ShAmt)) return false;
4274  return ShAmt != 0;
4275  }
4276 
4277  case X86::SHRD16rri8:case X86::SHRD32rri8:case X86::SHRD64rri8:
4278  case X86::SHLD16rri8:case X86::SHLD32rri8:case X86::SHLD64rri8:
4279  return getTruncatedShiftCount(MI, 3) != 0;
4280 
4281  case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB32ri:
4282  case X86::SUB32ri8: case X86::SUB16ri: case X86::SUB16ri8:
4283  case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr:
4284  case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm:
4285  case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm:
4286  case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r:
4287  case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri:
4288  case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8:
4289  case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr:
4290  case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm:
4291  case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm:
4292  case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r:
4293  case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
4294  case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8:
4295  case X86::AND8ri: case X86::AND64rr: case X86::AND32rr:
4296  case X86::AND16rr: case X86::AND8rr: case X86::AND64rm:
4297  case X86::AND32rm: case X86::AND16rm: case X86::AND8rm:
4298  case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri:
4299  case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8:
4300  case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr:
4301  case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm:
4302  case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm:
4303  case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri:
4304  case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8:
4305  case X86::OR8ri: case X86::OR64rr: case X86::OR32rr:
4306  case X86::OR16rr: case X86::OR8rr: case X86::OR64rm:
4307  case X86::OR32rm: case X86::OR16rm: case X86::OR8rm:
4308  case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r:
4309  case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1:case X86::SAR64r1:
4310  case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1:case X86::SHR64r1:
4311  case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1:
4312  case X86::ADC32ri: case X86::ADC32ri8:
4313  case X86::ADC32rr: case X86::ADC64ri32:
4314  case X86::ADC64ri8: case X86::ADC64rr:
4315  case X86::SBB32ri: case X86::SBB32ri8:
4316  case X86::SBB32rr: case X86::SBB64ri32:
4317  case X86::SBB64ri8: case X86::SBB64rr:
4318  case X86::ANDN32rr: case X86::ANDN32rm:
4319  case X86::ANDN64rr: case X86::ANDN64rm:
4320  case X86::BEXTR32rr: case X86::BEXTR64rr:
4321  case X86::BEXTR32rm: case X86::BEXTR64rm:
4322  case X86::BLSI32rr: case X86::BLSI32rm:
4323  case X86::BLSI64rr: case X86::BLSI64rm:
4324  case X86::BLSMSK32rr:case X86::BLSMSK32rm:
4325  case X86::BLSMSK64rr:case X86::BLSMSK64rm:
4326  case X86::BLSR32rr: case X86::BLSR32rm:
4327  case X86::BLSR64rr: case X86::BLSR64rm:
4328  case X86::BZHI32rr: case X86::BZHI32rm:
4329  case X86::BZHI64rr: case X86::BZHI64rm:
4330  case X86::LZCNT16rr: case X86::LZCNT16rm:
4331  case X86::LZCNT32rr: case X86::LZCNT32rm:
4332  case X86::LZCNT64rr: case X86::LZCNT64rm:
4333  case X86::POPCNT16rr:case X86::POPCNT16rm:
4334  case X86::POPCNT32rr:case X86::POPCNT32rm:
4335  case X86::POPCNT64rr:case X86::POPCNT64rm:
4336  case X86::TZCNT16rr: case X86::TZCNT16rm:
4337  case X86::TZCNT32rr: case X86::TZCNT32rm:
4338  case X86::TZCNT64rr: case X86::TZCNT64rm:
4339  return true;
4340  }
4341 }
4342 
4343 /// Check whether the use can be converted to remove a comparison against zero.
4345  switch (MI->getOpcode()) {
4346  default: return X86::COND_INVALID;
4347  case X86::LZCNT16rr: case X86::LZCNT16rm:
4348  case X86::LZCNT32rr: case X86::LZCNT32rm:
4349  case X86::LZCNT64rr: case X86::LZCNT64rm:
4350  return X86::COND_B;
4351  case X86::POPCNT16rr:case X86::POPCNT16rm:
4352  case X86::POPCNT32rr:case X86::POPCNT32rm:
4353  case X86::POPCNT64rr:case X86::POPCNT64rm:
4354  return X86::COND_E;
4355  case X86::TZCNT16rr: case X86::TZCNT16rm:
4356  case X86::TZCNT32rr: case X86::TZCNT32rm:
4357  case X86::TZCNT64rr: case X86::TZCNT64rm:
4358  return X86::COND_B;
4359  }
4360 }
4361 
4362 /// Check if there exists an earlier instruction that
4363 /// operates on the same source operands and sets flags in the same way as
4364 /// Compare; remove Compare if possible.
4365 bool X86InstrInfo::
4366 optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
4367  int CmpMask, int CmpValue,
4368  const MachineRegisterInfo *MRI) const {
4369  // Check whether we can replace SUB with CMP.
4370  unsigned NewOpcode = 0;
4371  switch (CmpInstr->getOpcode()) {
4372  default: break;
4373  case X86::SUB64ri32:
4374  case X86::SUB64ri8:
4375  case X86::SUB32ri:
4376  case X86::SUB32ri8:
4377  case X86::SUB16ri:
4378  case X86::SUB16ri8:
4379  case X86::SUB8ri:
4380  case X86::SUB64rm:
4381  case X86::SUB32rm:
4382  case X86::SUB16rm:
4383  case X86::SUB8rm:
4384  case X86::SUB64rr:
4385  case X86::SUB32rr:
4386  case X86::SUB16rr:
4387  case X86::SUB8rr: {
4388  if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
4389  return false;
4390  // There is no use of the destination register, we can replace SUB with CMP.
4391  switch (CmpInstr->getOpcode()) {
4392  default: llvm_unreachable("Unreachable!");
4393  case X86::SUB64rm: NewOpcode = X86::CMP64rm; break;
4394  case X86::SUB32rm: NewOpcode = X86::CMP32rm; break;
4395  case X86::SUB16rm: NewOpcode = X86::CMP16rm; break;
4396  case X86::SUB8rm: NewOpcode = X86::CMP8rm; break;
4397  case X86::SUB64rr: NewOpcode = X86::CMP64rr; break;
4398  case X86::SUB32rr: NewOpcode = X86::CMP32rr; break;
4399  case X86::SUB16rr: NewOpcode = X86::CMP16rr; break;
4400  case X86::SUB8rr: NewOpcode = X86::CMP8rr; break;
4401  case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break;
4402  case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break;
4403  case X86::SUB32ri: NewOpcode = X86::CMP32ri; break;
4404  case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break;
4405  case X86::SUB16ri: NewOpcode = X86::CMP16ri; break;
4406  case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break;
4407  case X86::SUB8ri: NewOpcode = X86::CMP8ri; break;
4408  }
4409  CmpInstr->setDesc(get(NewOpcode));
4410  CmpInstr->RemoveOperand(0);
4411  // Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
4412  if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
4413  NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
4414  return false;
4415  }
4416  }
4417 
4418  // Get the unique definition of SrcReg.
4419  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
4420  if (!MI) return false;
4421 
4422  // CmpInstr is the first instruction of the BB.
4423  MachineBasicBlock::iterator I = CmpInstr, Def = MI;
4424 
4425  // If we are comparing against zero, check whether we can use MI to update
4426  // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize.
4427  bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0);
4428  if (IsCmpZero && MI->getParent() != CmpInstr->getParent())
4429  return false;
4430 
4431  // If we have a use of the source register between the def and our compare
4432  // instruction we can eliminate the compare iff the use sets EFLAGS in the
4433  // right way.
4434  bool ShouldUpdateCC = false;
4436  if (IsCmpZero && !isDefConvertible(MI)) {
4437  // Scan forward from the use until we hit the use we're looking for or the
4438  // compare instruction.
4439  for (MachineBasicBlock::iterator J = MI;; ++J) {
4440  // Do we have a convertible instruction?
4441  NewCC = isUseDefConvertible(J);
4442  if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() &&
4443  J->getOperand(1).getReg() == SrcReg) {
4444  assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!");
4445  ShouldUpdateCC = true; // Update CC later on.
4446  // This is not a def of SrcReg, but still a def of EFLAGS. Keep going
4447  // with the new def.
4448  MI = Def = J;
4449  break;
4450  }
4451 
4452  if (J == I)
4453  return false;
4454  }
4455  }
4456 
4457  // We are searching for an earlier instruction that can make CmpInstr
4458  // redundant and that instruction will be saved in Sub.
4459  MachineInstr *Sub = nullptr;
4460  const TargetRegisterInfo *TRI = &getRegisterInfo();
4461 
4462  // We iterate backward, starting from the instruction before CmpInstr and
4463  // stop when reaching the definition of a source register or done with the BB.
4464  // RI points to the instruction before CmpInstr.
4465  // If the definition is in this basic block, RE points to the definition;
4466  // otherwise, RE is the rend of the basic block.
4469  RE = CmpInstr->getParent() == MI->getParent() ?
4470  MachineBasicBlock::reverse_iterator(++Def) /* points to MI */ :
4471  CmpInstr->getParent()->rend();
4472  MachineInstr *Movr0Inst = nullptr;
4473  for (; RI != RE; ++RI) {
4474  MachineInstr *Instr = &*RI;
4475  // Check whether CmpInstr can be made redundant by the current instruction.
4476  if (!IsCmpZero &&
4477  isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, Instr)) {
4478  Sub = Instr;
4479  break;
4480  }
4481 
4482  if (Instr->modifiesRegister(X86::EFLAGS, TRI) ||
4483  Instr->readsRegister(X86::EFLAGS, TRI)) {
4484  // This instruction modifies or uses EFLAGS.
4485 
4486  // MOV32r0 etc. are implemented with xor which clobbers condition code.
4487  // They are safe to move up, if the definition to EFLAGS is dead and
4488  // earlier instructions do not read or write EFLAGS.
4489  if (!Movr0Inst && Instr->getOpcode() == X86::MOV32r0 &&
4490  Instr->registerDefIsDead(X86::EFLAGS, TRI)) {
4491  Movr0Inst = Instr;
4492  continue;
4493  }
4494 
4495  // We can't remove CmpInstr.
4496  return false;
4497  }
4498  }
4499 
4500  // Return false if no candidates exist.
4501  if (!IsCmpZero && !Sub)
4502  return false;
4503 
4504  bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
4505  Sub->getOperand(2).getReg() == SrcReg);
4506 
4507  // Scan forward from the instruction after CmpInstr for uses of EFLAGS.
4508  // It is safe to remove CmpInstr if EFLAGS is redefined or killed.
4509  // If we are done with the basic block, we need to check whether EFLAGS is
4510  // live-out.
4511  bool IsSafe = false;
4512  SmallVector<std::pair<MachineInstr*, unsigned /*NewOpc*/>, 4> OpsToUpdate;
4513  MachineBasicBlock::iterator E = CmpInstr->getParent()->end();
4514  for (++I; I != E; ++I) {
4515  const MachineInstr &Instr = *I;
4516  bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
4517  bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
4518  // We should check the usage if this instruction uses and updates EFLAGS.
4519  if (!UseEFLAGS && ModifyEFLAGS) {
4520  // It is safe to remove CmpInstr if EFLAGS is updated again.
4521  IsSafe = true;
4522  break;
4523  }
4524  if (!UseEFLAGS && !ModifyEFLAGS)
4525  continue;
4526 
4527  // EFLAGS is used by this instruction.
4529  bool OpcIsSET = false;
4530  if (IsCmpZero || IsSwapped) {
4531  // We decode the condition code from opcode.
4532  if (Instr.isBranch())
4533  OldCC = getCondFromBranchOpc(Instr.getOpcode());
4534  else {
4535  OldCC = getCondFromSETOpc(Instr.getOpcode());
4536  if (OldCC != X86::COND_INVALID)
4537  OpcIsSET = true;
4538  else
4539  OldCC = X86::getCondFromCMovOpc(Instr.getOpcode());
4540  }
4541  if (OldCC == X86::COND_INVALID) return false;
4542  }
4543  if (IsCmpZero) {
4544  switch (OldCC) {
4545  default: break;
4546  case X86::COND_A: case X86::COND_AE:
4547  case X86::COND_B: case X86::COND_BE:
4548  case X86::COND_G: case X86::COND_GE:
4549  case X86::COND_L: case X86::COND_LE:
4550  case X86::COND_O: case X86::COND_NO:
4551  // CF and OF are used, we can't perform this optimization.
4552  return false;
4553  }
4554 
4555  // If we're updating the condition code check if we have to reverse the
4556  // condition.
4557  if (ShouldUpdateCC)
4558  switch (OldCC) {
4559  default:
4560  return false;
4561  case X86::COND_E:
4562  break;
4563  case X86::COND_NE:
4564  NewCC = GetOppositeBranchCondition(NewCC);
4565  break;
4566  }
4567  } else if (IsSwapped) {
4568  // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
4569  // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
4570  // We swap the condition code and synthesize the new opcode.
4571  NewCC = getSwappedCondition(OldCC);
4572  if (NewCC == X86::COND_INVALID) return false;
4573  }
4574 
4575  if ((ShouldUpdateCC || IsSwapped) && NewCC != OldCC) {
4576  // Synthesize the new opcode.
4577  bool HasMemoryOperand = Instr.hasOneMemOperand();
4578  unsigned NewOpc;
4579  if (Instr.isBranch())
4580  NewOpc = GetCondBranchFromCond(NewCC);
4581  else if(OpcIsSET)
4582  NewOpc = getSETFromCond(NewCC, HasMemoryOperand);
4583  else {
4584  unsigned DstReg = Instr.getOperand(0).getReg();
4585  NewOpc = getCMovFromCond(NewCC, MRI->getRegClass(DstReg)->getSize(),
4586  HasMemoryOperand);
4587  }
4588 
4589  // Push the MachineInstr to OpsToUpdate.
4590  // If it is safe to remove CmpInstr, the condition code of these
4591  // instructions will be modified.
4592  OpsToUpdate.push_back(std::make_pair(&*I, NewOpc));
4593  }
4594  if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
4595  // It is safe to remove CmpInstr if EFLAGS is updated again or killed.
4596  IsSafe = true;
4597  break;
4598  }
4599  }
4600 
4601  // If EFLAGS is not killed nor re-defined, we should check whether it is
4602  // live-out. If it is live-out, do not optimize.
4603  if ((IsCmpZero || IsSwapped) && !IsSafe) {
4604  MachineBasicBlock *MBB = CmpInstr->getParent();
4606  SE = MBB->succ_end(); SI != SE; ++SI)
4607  if ((*SI)->isLiveIn(X86::EFLAGS))
4608  return false;
4609  }
4610 
4611  // The instruction to be updated is either Sub or MI.
4612  Sub = IsCmpZero ? MI : Sub;
4613  // Move Movr0Inst to the appropriate place before Sub.
4614  if (Movr0Inst) {
4615  // Look backwards until we find a def that doesn't use the current EFLAGS.
4616  Def = Sub;
4618  InsertI = MachineBasicBlock::reverse_iterator(++Def),
4619  InsertE = Sub->getParent()->rend();
4620  for (; InsertI != InsertE; ++InsertI) {
4621  MachineInstr *Instr = &*InsertI;
4622  if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
4623  Instr->modifiesRegister(X86::EFLAGS, TRI)) {
4624  Sub->getParent()->remove(Movr0Inst);
4625  Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
4626  Movr0Inst);
4627  break;
4628  }
4629  }
4630  if (InsertI == InsertE)
4631  return false;
4632  }
4633 
4634  // Make sure Sub instruction defines EFLAGS and mark the def live.
4635  unsigned i = 0, e = Sub->getNumOperands();
4636  for (; i != e; ++i) {
4637  MachineOperand &MO = Sub->getOperand(i);
4638  if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) {
4639  MO.setIsDead(false);
4640  break;
4641  }
4642  }
4643  assert(i != e && "Unable to locate a def EFLAGS operand");
4644 
4645  CmpInstr->eraseFromParent();
4646 
4647  // Modify the condition code of instructions in OpsToUpdate.
4648  for (unsigned i = 0, e = OpsToUpdate.size(); i < e; i++)
4649  OpsToUpdate[i].first->setDesc(get(OpsToUpdate[i].second));
4650  return true;
4651 }
4652 
4653 /// Try to remove the load by folding it to a register
4654 /// operand at the use. We fold the load instructions if load defines a virtual
4655 /// register, the virtual register is used once in the same BB, and the
4656 /// instructions in-between do not load or store, and have no side effects.
4658  const MachineRegisterInfo *MRI,
4659  unsigned &FoldAsLoadDefReg,
4660  MachineInstr *&DefMI) const {
4661  if (FoldAsLoadDefReg == 0)
4662  return nullptr;
4663  // To be conservative, if there exists another load, clear the load candidate.
4664  if (MI->mayLoad()) {
4665  FoldAsLoadDefReg = 0;
4666  return nullptr;
4667  }
4668 
4669  // Check whether we can move DefMI here.
4670  DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
4671  assert(DefMI);
4672  bool SawStore = false;
4673  if (!DefMI->isSafeToMove(nullptr, SawStore))
4674  return nullptr;
4675 
4676  // Collect information about virtual register operands of MI.
4677  unsigned SrcOperandId = 0;
4678  bool FoundSrcOperand = false;
4679  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
4680  MachineOperand &MO = MI->getOperand(i);
4681  if (!MO.isReg())
4682  continue;
4683  unsigned Reg = MO.getReg();
4684  if (Reg != FoldAsLoadDefReg)
4685  continue;
4686  // Do not fold if we have a subreg use or a def or multiple uses.
4687  if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
4688  return nullptr;
4689 
4690  SrcOperandId = i;
4691  FoundSrcOperand = true;
4692  }
4693  if (!FoundSrcOperand)
4694  return nullptr;
4695 
4696  // Check whether we can fold the def into SrcOperandId.
4697  MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandId, DefMI);
4698  if (FoldMI) {
4699  FoldAsLoadDefReg = 0;
4700  return FoldMI;
4701  }
4702 
4703  return nullptr;
4704 }
4705 
4706 /// Expand a single-def pseudo instruction to a two-addr
4707 /// instruction with two undef reads of the register being defined.
4708 /// This is used for mapping:
4709 /// %xmm4 = V_SET0
4710 /// to:
4711 /// %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef>
4712 ///
4714  const MCInstrDesc &Desc) {
4715  assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
4716  unsigned Reg = MIB->getOperand(0).getReg();
4717  MIB->setDesc(Desc);
4718 
4719  // MachineInstr::addOperand() will insert explicit operands before any
4720  // implicit operands.
4721  MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
4722  // But we don't trust that.
4723  assert(MIB->getOperand(1).getReg() == Reg &&
4724  MIB->getOperand(2).getReg() == Reg && "Misplaced operand");
4725  return true;
4726 }
4727 
4728 // LoadStackGuard has so far only been implemented for 64-bit MachO. Different
4729 // code sequence is needed for other targets.
4731  const TargetInstrInfo &TII) {
4732  MachineBasicBlock &MBB = *MIB->getParent();
4733  DebugLoc DL = MIB->getDebugLoc();
4734  unsigned Reg = MIB->getOperand(0).getReg();
4735  const GlobalValue *GV =
4736  cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
4738  MachineMemOperand *MMO = MBB.getParent()->
4739  getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 8, 8);
4741 
4742  BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1)
4744  .addMemOperand(MMO);
4745  MIB->setDebugLoc(DL);
4746  MIB->setDesc(TII.get(X86::MOV64rm));
4747  MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0);
4748 }
4749 
4751  bool HasAVX = Subtarget.hasAVX();
4752  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
4753  switch (MI->getOpcode()) {
4754  case X86::MOV32r0:
4755  return Expand2AddrUndef(MIB, get(X86::XOR32rr));
4756  case X86::SETB_C8r:
4757  return Expand2AddrUndef(MIB, get(X86::SBB8rr));
4758  case X86::SETB_C16r:
4759  return Expand2AddrUndef(MIB, get(X86::SBB16rr));
4760  case X86::SETB_C32r:
4761  return Expand2AddrUndef(MIB, get(X86::SBB32rr));
4762  case X86::SETB_C64r:
4763  return Expand2AddrUndef(MIB, get(X86::SBB64rr));
4764  case X86::V_SET0:
4765  case X86::FsFLD0SS:
4766  case X86::FsFLD0SD:
4767  return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
4768  case X86::AVX_SET0:
4769  assert(HasAVX && "AVX not supported");
4770  return Expand2AddrUndef(MIB, get(X86::VXORPSYrr));
4771  case X86::AVX512_512_SET0:
4772  return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
4773  case X86::V_SETALLONES:
4774  return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
4775  case X86::AVX2_SETALLONES:
4776  return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
4777  case X86::TEST8ri_NOREX:
4778  MI->setDesc(get(X86::TEST8ri));
4779  return true;
4780  case X86::KSET0B:
4781  case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr));
4782  case X86::KSET1B:
4783  case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr));
4785  expandLoadStackGuard(MIB, *this);
4786  return true;
4787  }
4788  return false;
4789 }
4790 
4792  unsigned NumAddrOps = MOs.size();
4793  for (unsigned i = 0; i != NumAddrOps; ++i)
4794  MIB.addOperand(MOs[i]);
4795  if (NumAddrOps < 4) // FrameIndex only
4796  addOffset(MIB, 0);
4797 }
4798 
4799 static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
4801  MachineBasicBlock::iterator InsertPt,
4802  MachineInstr *MI,
4803  const TargetInstrInfo &TII) {
4804  // Create the base instruction with the memory operand as the first part.
4805  // Omit the implicit operands, something BuildMI can't do.
4806  MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
4807  MI->getDebugLoc(), true);
4808  MachineInstrBuilder MIB(MF, NewMI);
4809  addOperands(MIB, MOs);
4810 
4811  // Loop over the rest of the ri operands, converting them over.
4812  unsigned NumOps = MI->getDesc().getNumOperands()-2;
4813  for (unsigned i = 0; i != NumOps; ++i) {
4814  MachineOperand &MO = MI->getOperand(i+2);
4815  MIB.addOperand(MO);
4816  }
4817  for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) {
4818  MachineOperand &MO = MI->getOperand(i);
4819  MIB.addOperand(MO);
4820  }
4821 
4822  MachineBasicBlock *MBB = InsertPt->getParent();
4823  MBB->insert(InsertPt, NewMI);
4824 
4825  return MIB;
4826 }
4827 
4828 static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
4829  unsigned OpNo, ArrayRef<MachineOperand> MOs,
4830  MachineBasicBlock::iterator InsertPt,
4831  MachineInstr *MI, const TargetInstrInfo &TII) {
4832  // Omit the implicit operands, something BuildMI can't do.
4833  MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
4834  MI->getDebugLoc(), true);
4835  MachineInstrBuilder MIB(MF, NewMI);
4836 
4837  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
4838  MachineOperand &MO = MI->getOperand(i);
4839  if (i == OpNo) {
4840  assert(MO.isReg() && "Expected to fold into reg operand!");
4841  addOperands(MIB, MOs);
4842  } else {
4843  MIB.addOperand(MO);
4844  }
4845  }
4846 
4847  MachineBasicBlock *MBB = InsertPt->getParent();
4848  MBB->insert(InsertPt, NewMI);
4849 
4850  return MIB;
4851 }
4852 
4853 static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
4855  MachineBasicBlock::iterator InsertPt,
4856  MachineInstr *MI) {
4857  MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
4858  MI->getDebugLoc(), TII.get(Opcode));
4859  addOperands(MIB, MOs);
4860  return MIB.addImm(0);
4861 }
4862 
4864  MachineFunction &MF, MachineInstr *MI, unsigned OpNum,
4866  unsigned Size, unsigned Align, bool AllowCommute) const {
4867  const DenseMap<unsigned,
4868  std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
4869  bool isCallRegIndirect = Subtarget.callRegIndirect();
4870  bool isTwoAddrFold = false;
4871 
4872  // For CPUs that favor the register form of a call,
4873  // do not fold loads into calls.
4874  if (isCallRegIndirect &&
4875  (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r))
4876  return nullptr;
4877 
4878  unsigned NumOps = MI->getDesc().getNumOperands();
4879  bool isTwoAddr = NumOps > 1 &&
4880  MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
4881 
4882  // FIXME: AsmPrinter doesn't know how to handle
4883  // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
4884  if (MI->getOpcode() == X86::ADD32ri &&
4886  return nullptr;
4887 
4888  MachineInstr *NewMI = nullptr;
4889  // Folding a memory location into the two-address part of a two-address
4890  // instruction is different than folding it other places. It requires
4891  // replacing the *two* registers with the memory location.
4892  if (isTwoAddr && NumOps >= 2 && OpNum < 2 &&
4893  MI->getOperand(0).isReg() &&
4894  MI->getOperand(1).isReg() &&
4895  MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
4896  OpcodeTablePtr = &RegOp2MemOpTable2Addr;
4897  isTwoAddrFold = true;
4898  } else if (OpNum == 0) {
4899  if (MI->getOpcode() == X86::MOV32r0) {
4900  NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, InsertPt, MI);
4901  if (NewMI)
4902  return NewMI;
4903  }
4904 
4905  OpcodeTablePtr = &RegOp2MemOpTable0;
4906  } else if (OpNum == 1) {
4907  OpcodeTablePtr = &RegOp2MemOpTable1;
4908  } else if (OpNum == 2) {
4909  OpcodeTablePtr = &RegOp2MemOpTable2;
4910  } else if (OpNum == 3) {
4911  OpcodeTablePtr = &RegOp2MemOpTable3;
4912  } else if (OpNum == 4) {
4913  OpcodeTablePtr = &RegOp2MemOpTable4;
4914  }
4915 
4916  // If table selected...
4917  if (OpcodeTablePtr) {
4918  // Find the Opcode to fuse
4920  OpcodeTablePtr->find(MI->getOpcode());
4921  if (I != OpcodeTablePtr->end()) {
4922  unsigned Opcode = I->second.first;
4923  unsigned MinAlign = (I->second.second & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT;
4924  if (Align < MinAlign)
4925  return nullptr;
4926  bool NarrowToMOV32rm = false;
4927  if (Size) {
4928  unsigned RCSize = getRegClass(MI->getDesc(), OpNum, &RI, MF)->getSize();
4929  if (Size < RCSize) {
4930  // Check if it's safe to fold the load. If the size of the object is
4931  // narrower than the load width, then it's not.
4932  if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
4933  return nullptr;
4934  // If this is a 64-bit load, but the spill slot is 32, then we can do
4935  // a 32-bit load which is implicitly zero-extended. This likely is
4936  // due to live interval analysis remat'ing a load from stack slot.
4937  if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
4938  return nullptr;
4939  Opcode = X86::MOV32rm;
4940  NarrowToMOV32rm = true;
4941  }
4942  }
4943 
4944  if (isTwoAddrFold)
4945  NewMI = FuseTwoAddrInst(MF, Opcode, MOs, InsertPt, MI, *this);
4946  else
4947  NewMI = FuseInst(MF, Opcode, OpNum, MOs, InsertPt, MI, *this);
4948 
4949  if (NarrowToMOV32rm) {
4950  // If this is the special case where we use a MOV32rm to load a 32-bit
4951  // value and zero-extend the top bits. Change the destination register
4952  // to a 32-bit one.
4953  unsigned DstReg = NewMI->getOperand(0).getReg();
4955  NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit));
4956  else
4957  NewMI->getOperand(0).setSubReg(X86::sub_32bit);
4958  }
4959  return NewMI;
4960  }
4961  }
4962 
4963  // If the instruction and target operand are commutable, commute the
4964  // instruction and try again.
4965  if (AllowCommute) {
4966  unsigned OriginalOpIdx = OpNum, CommuteOpIdx1, CommuteOpIdx2;
4967  if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) {
4968  bool HasDef = MI->getDesc().getNumDefs();
4969  unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
4970  unsigned Reg1 = MI->getOperand(CommuteOpIdx1).getReg();
4971  unsigned Reg2 = MI->getOperand(CommuteOpIdx2).getReg();
4972  bool Tied0 =
4973  0 == MI->getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO);
4974  bool Tied1 =
4975  0 == MI->getDesc().getOperandConstraint(CommuteOpIdx2, MCOI::TIED_TO);
4976 
4977  // If either of the commutable operands are tied to the destination
4978  // then we can not commute + fold.
4979  if ((HasDef && Reg0 == Reg1 && Tied0) ||
4980  (HasDef && Reg0 == Reg2 && Tied1))
4981  return nullptr;
4982 
4983  if ((CommuteOpIdx1 == OriginalOpIdx) ||
4984  (CommuteOpIdx2 == OriginalOpIdx)) {
4985  MachineInstr *CommutedMI = commuteInstruction(MI, false);
4986  if (!CommutedMI) {
4987  // Unable to commute.
4988  return nullptr;
4989  }
4990  if (CommutedMI != MI) {
4991  // New instruction. We can't fold from this.
4992  CommutedMI->eraseFromParent();
4993  return nullptr;
4994  }
4995 
4996  // Attempt to fold with the commuted version of the instruction.
4997  unsigned CommuteOp =
4998  (CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1);
4999  NewMI =
5000  foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, InsertPt, Size, Align,
5001  /*AllowCommute=*/false);
5002  if (NewMI)
5003  return NewMI;
5004 
5005  // Folding failed again - undo the commute before returning.
5006  MachineInstr *UncommutedMI = commuteInstruction(MI, false);
5007  if (!UncommutedMI) {
5008  // Unable to commute.
5009  return nullptr;
5010  }
5011  if (UncommutedMI != MI) {
5012  // New instruction. It doesn't need to be kept.
5013  UncommutedMI->eraseFromParent();
5014  return nullptr;
5015  }
5016 
5017  // Return here to prevent duplicate fuse failure report.
5018  return nullptr;
5019  }
5020  }
5021  }
5022 
5023  // No fusion
5024  if (PrintFailedFusing && !MI->isCopy())
5025  dbgs() << "We failed to fuse operand " << OpNum << " in " << *MI;
5026  return nullptr;
5027 }
5028 
5029 /// Return true for all instructions that only update
5030 /// the first 32 or 64-bits of the destination register and leave the rest
5031 /// unmodified. This can be used to avoid folding loads if the instructions
5032 /// only update part of the destination register, and the non-updated part is
5033 /// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these
5034 /// instructions breaks the partial register dependency and it can improve
5035 /// performance. e.g.:
5036 ///
5037 /// movss (%rdi), %xmm0
5038 /// cvtss2sd %xmm0, %xmm0
5039 ///
5040 /// Instead of
5041 /// cvtss2sd (%rdi), %xmm0
5042 ///
5043 /// FIXME: This should be turned into a TSFlags.
5044 ///
5045 static bool hasPartialRegUpdate(unsigned Opcode) {
5046  switch (Opcode) {
5047  case X86::CVTSI2SSrr:
5048  case X86::CVTSI2SSrm:
5049  case X86::CVTSI2SS64rr:
5050  case X86::CVTSI2SS64rm:
5051  case X86::CVTSI2SDrr:
5052  case X86::CVTSI2SDrm:
5053  case X86::CVTSI2SD64rr:
5054  case X86::CVTSI2SD64rm:
5055  case X86::CVTSD2SSrr:
5056  case X86::CVTSD2SSrm:
5057  case X86::Int_CVTSD2SSrr:
5058  case X86::Int_CVTSD2SSrm:
5059  case X86::CVTSS2SDrr:
5060  case X86::CVTSS2SDrm:
5061  case X86::Int_CVTSS2SDrr:
5062  case X86::Int_CVTSS2SDrm:
5063  case X86::RCPSSr:
5064  case X86::RCPSSm:
5065  case X86::RCPSSr_Int:
5066  case X86::RCPSSm_Int:
5067  case X86::ROUNDSDr:
5068  case X86::ROUNDSDm:
5069  case X86::ROUNDSDr_Int:
5070  case X86::ROUNDSSr:
5071  case X86::ROUNDSSm:
5072  case X86::ROUNDSSr_Int:
5073  case X86::RSQRTSSr:
5074  case X86::RSQRTSSm:
5075  case X86::RSQRTSSr_Int:
5076  case X86::RSQRTSSm_Int:
5077  case X86::SQRTSSr:
5078  case X86::SQRTSSm:
5079  case X86::SQRTSSr_Int:
5080  case X86::SQRTSSm_Int:
5081  case X86::SQRTSDr:
5082  case X86::SQRTSDm:
5083  case X86::SQRTSDr_Int:
5084  case X86::SQRTSDm_Int:
5085  return true;
5086  }
5087 
5088  return false;
5089 }
5090 
5091 /// Inform the ExeDepsFix pass how many idle
5092 /// instructions we would like before a partial register update.
5093 unsigned X86InstrInfo::
5094 getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
5095  const TargetRegisterInfo *TRI) const {
5096  if (OpNum != 0 || !hasPartialRegUpdate(MI->getOpcode()))
5097  return 0;
5098 
5099  // If MI is marked as reading Reg, the partial register update is wanted.
5100  const MachineOperand &MO = MI->getOperand(0);
5101  unsigned Reg = MO.getReg();
5103  if (MO.readsReg() || MI->readsVirtualRegister(Reg))
5104  return 0;
5105  } else {
5106  if (MI->readsRegister(Reg, TRI))
5107  return 0;
5108  }
5109 
5110  // If any of the preceding 16 instructions are reading Reg, insert a
5111  // dependency breaking instruction. The magic number is based on a few
5112  // Nehalem experiments.
5113  return 16;
5114 }
5115 
5116 // Return true for any instruction the copies the high bits of the first source
5117 // operand into the unused high bits of the destination operand.
5118 static bool hasUndefRegUpdate(unsigned Opcode) {
5119  switch (Opcode) {
5120  case X86::VCVTSI2SSrr:
5121  case X86::VCVTSI2SSrm:
5122  case X86::Int_VCVTSI2SSrr:
5123  case X86::Int_VCVTSI2SSrm:
5124  case X86::VCVTSI2SS64rr:
5125  case X86::VCVTSI2SS64rm:
5126  case X86::Int_VCVTSI2SS64rr:
5127  case X86::Int_VCVTSI2SS64rm:
5128  case X86::VCVTSI2SDrr:
5129  case X86::VCVTSI2SDrm:
5130  case X86::Int_VCVTSI2SDrr:
5131  case X86::Int_VCVTSI2SDrm:
5132  case X86::VCVTSI2SD64rr:
5133  case X86::VCVTSI2SD64rm:
5134  case X86::Int_VCVTSI2SD64rr:
5135  case X86::Int_VCVTSI2SD64rm:
5136  case X86::VCVTSD2SSrr:
5137  case X86::VCVTSD2SSrm:
5138  case X86::Int_VCVTSD2SSrr:
5139  case X86::Int_VCVTSD2SSrm:
5140  case X86::VCVTSS2SDrr:
5141  case X86::VCVTSS2SDrm:
5142  case X86::Int_VCVTSS2SDrr:
5143  case X86::Int_VCVTSS2SDrm:
5144  case X86::VRCPSSr:
5145  case X86::VRCPSSm:
5146  case X86::VRCPSSm_Int:
5147  case X86::VROUNDSDr:
5148  case X86::VROUNDSDm:
5149  case X86::VROUNDSDr_Int:
5150  case X86::VROUNDSSr:
5151  case X86::VROUNDSSm:
5152  case X86::VROUNDSSr_Int:
5153  case X86::VRSQRTSSr:
5154  case X86::VRSQRTSSm:
5155  case X86::VRSQRTSSm_Int:
5156  case X86::VSQRTSSr:
5157  case X86::VSQRTSSm:
5158  case X86::VSQRTSSm_Int:
5159  case X86::VSQRTSDr:
5160  case X86::VSQRTSDm:
5161  case X86::VSQRTSDm_Int:
5162  // AVX-512
5163  case X86::VCVTSD2SSZrr:
5164  case X86::VCVTSD2SSZrm:
5165  case X86::VCVTSS2SDZrr:
5166  case X86::VCVTSS2SDZrm:
5167  return true;
5168  }
5169 
5170  return false;
5171 }
5172 
5173 /// Inform the ExeDepsFix pass how many idle instructions we would like before
5174 /// certain undef register reads.
5175 ///
5176 /// This catches the VCVTSI2SD family of instructions:
5177 ///
5178 /// vcvtsi2sdq %rax, %xmm0<undef>, %xmm14
5179 ///
5180 /// We should to be careful *not* to catch VXOR idioms which are presumably
5181 /// handled specially in the pipeline:
5182 ///
5183 /// vxorps %xmm1<undef>, %xmm1<undef>, %xmm1
5184 ///
5185 /// Like getPartialRegUpdateClearance, this makes a strong assumption that the
5186 /// high bits that are passed-through are not live.
5187 unsigned X86InstrInfo::
5188 getUndefRegClearance(const MachineInstr *MI, unsigned &OpNum,
5189  const TargetRegisterInfo *TRI) const {
5190  if (!hasUndefRegUpdate(MI->getOpcode()))
5191  return 0;
5192 
5193  // Set the OpNum parameter to the first source operand.
5194  OpNum = 1;
5195 
5196  const MachineOperand &MO = MI->getOperand(OpNum);
5198  // Use the same magic number as getPartialRegUpdateClearance.
5199  return 16;
5200  }
5201  return 0;
5202 }
5203 
5204 void X86InstrInfo::
5206  const TargetRegisterInfo *TRI) const {
5207  unsigned Reg = MI->getOperand(OpNum).getReg();
5208  // If MI kills this register, the false dependence is already broken.
5209  if (MI->killsRegister(Reg, TRI))
5210  return;
5211  if (X86::VR128RegClass.contains(Reg)) {
5212  // These instructions are all floating point domain, so xorps is the best
5213  // choice.
5214  bool HasAVX = Subtarget.hasAVX();
5215  unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr;
5216  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg)
5218  } else if (X86::VR256RegClass.contains(Reg)) {
5219  // Use vxorps to clear the full ymm register.
5220  // It wants to read and write the xmm sub-register.
5221  unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm);
5222  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(X86::VXORPSrr), XReg)
5225  } else
5226  return;
5227  MI->addRegisterKilled(Reg, TRI, true);
5228 }
5229 
5232  MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
5233  // Check switch flag
5234  if (NoFusing) return nullptr;
5235 
5236  // Unless optimizing for size, don't fold to avoid partial
5237  // register update stalls
5240  return nullptr;
5241 
5242  const MachineFrameInfo *MFI = MF.getFrameInfo();
5243  unsigned Size = MFI->getObjectSize(FrameIndex);
5244  unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
5245  // If the function stack isn't realigned we don't want to fold instructions
5246  // that need increased alignment.
5247  if (!RI.needsStackRealignment(MF))
5248  Alignment =
5249  std::min(Alignment, Subtarget.getFrameLowering()->getStackAlignment());
5250  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
5251  unsigned NewOpc = 0;
5252  unsigned RCSize = 0;
5253  switch (MI->getOpcode()) {
5254  default: return nullptr;
5255  case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
5256  case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
5257  case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
5258  case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
5259  }
5260  // Check if it's safe to fold the load. If the size of the object is
5261  // narrower than the load width, then it's not.
5262  if (Size < RCSize)
5263  return nullptr;
5264  // Change to CMPXXri r, 0 first.
5265  MI->setDesc(get(NewOpc));
5266  MI->getOperand(1).ChangeToImmediate(0);
5267  } else if (Ops.size() != 1)
5268  return nullptr;
5269 
5270  return foldMemoryOperandImpl(MF, MI, Ops[0],
5271  MachineOperand::CreateFI(FrameIndex), InsertPt,
5272  Size, Alignment, /*AllowCommute=*/true);
5273 }
5274 
5275 /// Check if \p LoadMI is a partial register load that we can't fold into \p MI
5276 /// because the latter uses contents that wouldn't be defined in the folded
5277 /// version. For instance, this transformation isn't legal:
5278 /// movss (%rdi), %xmm0
5279 /// addps %xmm0, %xmm0
5280 /// ->
5281 /// addps (%rdi), %xmm0
5282 ///
5283 /// But this one is:
5284 /// movss (%rdi), %xmm0
5285 /// addss %xmm0, %xmm0
5286 /// ->
5287 /// addss (%rdi), %xmm0
5288 ///
5290  const MachineInstr &UserMI,
5291  const MachineFunction &MF) {
5292  unsigned Opc = LoadMI.getOpcode();
5293  unsigned UserOpc = UserMI.getOpcode();
5294  unsigned RegSize =
5295  MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize();
5296 
5297  if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4) {
5298  // These instructions only load 32 bits, we can't fold them if the
5299  // destination register is wider than 32 bits (4 bytes), and its user
5300  // instruction isn't scalar (SS).
5301  switch (UserOpc) {
5302  case X86::ADDSSrr_Int: case X86::VADDSSrr_Int:
5303  case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int:
5304  case X86::MULSSrr_Int: case X86::VMULSSrr_Int:
5305  case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int:
5306  return false;
5307  default:
5308  return true;
5309  }
5310  }
5311 
5312  if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8) {
5313  // These instructions only load 64 bits, we can't fold them if the
5314  // destination register is wider than 64 bits (8 bytes), and its user
5315  // instruction isn't scalar (SD).
5316  switch (UserOpc) {
5317  case X86::ADDSDrr_Int: case X86::VADDSDrr_Int:
5318  case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int:
5319  case X86::MULSDrr_Int: case X86::VMULSDrr_Int:
5320  case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int:
5321  return false;
5322  default:
5323  return true;
5324  }
5325  }
5326 
5327  return false;
5328 }
5329 
5332  MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const {
5333  // If loading from a FrameIndex, fold directly from the FrameIndex.
5334  unsigned NumOps = LoadMI->getDesc().getNumOperands();
5335  int FrameIndex;
5336  if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
5337  if (isNonFoldablePartialRegisterLoad(*LoadMI, *MI, MF))
5338  return nullptr;
5339  return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex);
5340  }
5341 
5342  // Check switch flag
5343  if (NoFusing) return nullptr;
5344 
5345  // Unless optimizing for size, don't fold to avoid partial
5346  // register update stalls
5349  return nullptr;
5350 
5351  // Determine the alignment of the load.
5352  unsigned Alignment = 0;
5353  if (LoadMI->hasOneMemOperand())
5354  Alignment = (*LoadMI->memoperands_begin())->getAlignment();
5355  else
5356  switch (LoadMI->getOpcode()) {
5357  case X86::AVX2_SETALLONES:
5358  case X86::AVX_SET0:
5359  Alignment = 32;
5360  break;
5361  case X86::V_SET0:
5362  case X86::V_SETALLONES:
5363  Alignment = 16;
5364  break;
5365  case X86::FsFLD0SD:
5366  Alignment = 8;
5367  break;
5368  case X86::FsFLD0SS:
5369  Alignment = 4;
5370  break;
5371  default:
5372  return nullptr;
5373  }
5374  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
5375  unsigned NewOpc = 0;
5376  switch (MI->getOpcode()) {
5377  default: return nullptr;
5378  case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
5379  case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
5380  case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
5381  case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
5382  }
5383  // Change to CMPXXri r, 0 first.
5384  MI->setDesc(get(NewOpc));
5385  MI->getOperand(1).ChangeToImmediate(0);
5386  } else if (Ops.size() != 1)
5387  return nullptr;
5388 
5389  // Make sure the subregisters match.
5390  // Otherwise we risk changing the size of the load.
5391  if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg())
5392  return nullptr;
5393 
5395  switch (LoadMI->getOpcode()) {
5396  case X86::V_SET0:
5397  case X86::V_SETALLONES:
5398  case X86::AVX2_SETALLONES:
5399  case X86::AVX_SET0:
5400  case X86::FsFLD0SD:
5401  case X86::FsFLD0SS: {
5402  // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
5403  // Create a constant-pool entry and operands to load from it.
5404 
5405  // Medium and large mode can't fold loads this way.
5406  if (MF.getTarget().getCodeModel() != CodeModel::Small &&
5408  return nullptr;
5409 
5410  // x86-32 PIC requires a PIC base register for constant pools.
5411  unsigned PICBase = 0;
5412  if (MF.getTarget().getRelocationModel() == Reloc::PIC_) {
5413  if (Subtarget.is64Bit())
5414  PICBase = X86::RIP;
5415  else
5416  // FIXME: PICBase = getGlobalBaseReg(&MF);
5417  // This doesn't work for several reasons.
5418  // 1. GlobalBaseReg may have been spilled.
5419  // 2. It may not be live at MI.
5420  return nullptr;
5421  }
5422 
5423  // Create a constant-pool entry.
5424  MachineConstantPool &MCP = *MF.getConstantPool();
5425  Type *Ty;
5426  unsigned Opc = LoadMI->getOpcode();
5427  if (Opc == X86::FsFLD0SS)
5428  Ty = Type::getFloatTy(MF.getFunction()->getContext());
5429  else if (Opc == X86::FsFLD0SD)
5431  else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0)
5433  else
5435 
5436  bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES);
5437  const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
5439  unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
5440 
5441  // Create operands to load from the constant pool entry.
5442  MOs.push_back(MachineOperand::CreateReg(PICBase, false));
5444  MOs.push_back(MachineOperand::CreateReg(0, false));
5445  MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
5446  MOs.push_back(MachineOperand::CreateReg(0, false));
5447  break;
5448  }
5449  default: {
5450  if (isNonFoldablePartialRegisterLoad(*LoadMI, *MI, MF))
5451  return nullptr;
5452 
5453  // Folding a normal load. Just copy the load's address operands.
5454  MOs.append(LoadMI->operands_begin() + NumOps - X86::AddrNumOperands,
5455  LoadMI->operands_begin() + NumOps);
5456  break;
5457  }
5458  }
5459  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, InsertPt,
5460  /*Size=*/0, Alignment, /*AllowCommute=*/true);
5461 }
5462 
5464  ArrayRef<unsigned> Ops) const {
5465  // Check switch flag
5466  if (NoFusing) return 0;
5467 
5468  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
5469  switch (MI->getOpcode()) {
5470  default: return false;
5471  case X86::TEST8rr:
5472  case X86::TEST16rr:
5473  case X86::TEST32rr:
5474  case X86::TEST64rr:
5475  return true;
5476  case X86::ADD32ri:
5477  // FIXME: AsmPrinter doesn't know how to handle
5478  // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
5480  return false;
5481  break;
5482  }
5483  }
5484 
5485  if (Ops.size() != 1)
5486  return false;
5487 
5488  unsigned OpNum = Ops[0];
5489  unsigned Opc = MI->getOpcode();
5490  unsigned NumOps = MI->getDesc().getNumOperands();
5491  bool isTwoAddr = NumOps > 1 &&
5492  MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
5493 
5494  // Folding a memory location into the two-address part of a two-address
5495  // instruction is different than folding it other places. It requires
5496  // replacing the *two* registers with the memory location.
5497  const DenseMap<unsigned,
5498  std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
5499  if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
5500  OpcodeTablePtr = &RegOp2MemOpTable2Addr;
5501  } else if (OpNum == 0) {
5502  if (Opc == X86::MOV32r0)
5503  return true;
5504 
5505  OpcodeTablePtr = &RegOp2MemOpTable0;
5506  } else if (OpNum == 1) {
5507  OpcodeTablePtr = &RegOp2MemOpTable1;
5508  } else if (OpNum == 2) {
5509  OpcodeTablePtr = &RegOp2MemOpTable2;
5510  } else if (OpNum == 3) {
5511  OpcodeTablePtr = &RegOp2MemOpTable3;
5512  }
5513 
5514  if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
5515  return true;
5516  return TargetInstrInfo::canFoldMemoryOperand(MI, Ops);
5517 }
5518 
5520  unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
5521  SmallVectorImpl<MachineInstr*> &NewMIs) const {
5523  MemOp2RegOpTable.find(MI->getOpcode());
5524  if (I == MemOp2RegOpTable.end())
5525  return false;
5526  unsigned Opc = I->second.first;
5527  unsigned Index = I->second.second & TB_INDEX_MASK;
5528  bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
5529  bool FoldedStore = I->second.second & TB_FOLDED_STORE;
5530  if (UnfoldLoad && !FoldedLoad)
5531  return false;
5532  UnfoldLoad &= FoldedLoad;
5533  if (UnfoldStore && !FoldedStore)
5534  return false;
5535  UnfoldStore &= FoldedStore;
5536 
5537  const MCInstrDesc &MCID = get(Opc);
5538  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
5539  if (!MI->hasOneMemOperand() &&
5540  RC == &X86::VR128RegClass &&
5541  !Subtarget.isUnalignedMemAccessFast())
5542  // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
5543  // conservatively assume the address is unaligned. That's bad for
5544  // performance.
5545  return false;
5550  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
5551  MachineOperand &Op = MI->getOperand(i);
5552  if (i >= Index && i < Index + X86::AddrNumOperands)
5553  AddrOps.push_back(Op);
5554  else if (Op.isReg() && Op.isImplicit())
5555  ImpOps.push_back(Op);
5556  else if (i < Index)
5557  BeforeOps.push_back(Op);
5558  else if (i > Index)
5559  AfterOps.push_back(Op);
5560  }
5561 
5562  // Emit the load instruction.
5563  if (UnfoldLoad) {
5564  std::pair<MachineInstr::mmo_iterator,
5565  MachineInstr::mmo_iterator> MMOs =
5567  MI->memoperands_end());
5568  loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
5569  if (UnfoldStore) {
5570  // Address operands cannot be marked isKill.
5571  for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
5572  MachineOperand &MO = NewMIs[0]->getOperand(i);
5573  if (MO.isReg())
5574  MO.setIsKill(false);
5575  }
5576  }
5577  }
5578 
5579  // Emit the data processing instruction.
5580  MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true);
5581  MachineInstrBuilder MIB(MF, DataMI);
5582 
5583  if (FoldedStore)
5584  MIB.addReg(Reg, RegState::Define);
5585  for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
5586  MIB.addOperand(BeforeOps[i]);
5587  if (FoldedLoad)
5588  MIB.addReg(Reg);
5589  for (unsigned i = 0, e = AfterOps.size(); i != e; ++i)
5590  MIB.addOperand(AfterOps[i]);
5591  for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) {
5592  MachineOperand &MO = ImpOps[i];
5593  MIB.addReg(MO.getReg(),
5594  getDefRegState(MO.isDef()) |
5596  getKillRegState(MO.isKill()) |
5597  getDeadRegState(MO.isDead()) |
5598  getUndefRegState(MO.isUndef()));
5599  }
5600  // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
5601  switch (DataMI->getOpcode()) {
5602  default: break;
5603  case X86::CMP64ri32:
5604  case X86::CMP64ri8:
5605  case X86::CMP32ri:
5606  case X86::CMP32ri8:
5607  case X86::CMP16ri:
5608  case X86::CMP16ri8:
5609  case X86::CMP8ri: {
5610  MachineOperand &MO0 = DataMI->getOperand(0);
5611  MachineOperand &MO1 = DataMI->getOperand(1);
5612  if (MO1.getImm() == 0) {
5613  unsigned NewOpc;
5614  switch (DataMI->getOpcode()) {
5615  default: llvm_unreachable("Unreachable!");
5616  case X86::CMP64ri8:
5617  case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
5618  case X86::CMP32ri8:
5619  case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
5620  case X86::CMP16ri8:
5621  case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
5622  case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
5623  }
5624  DataMI->setDesc(get(NewOpc));
5625  MO1.ChangeToRegister(MO0.getReg(), false);
5626  }
5627  }
5628  }
5629  NewMIs.push_back(DataMI);
5630 
5631  // Emit the store instruction.
5632  if (UnfoldStore) {
5633  const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
5634  std::pair<MachineInstr::mmo_iterator,
5635  MachineInstr::mmo_iterator> MMOs =
5637  MI->memoperands_end());
5638  storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs);
5639  }
5640 
5641  return true;
5642 }
5643 
5644 bool
5646  SmallVectorImpl<SDNode*> &NewNodes) const {
5647  if (!N->isMachineOpcode())
5648  return false;
5649 
5651  MemOp2RegOpTable.find(N->getMachineOpcode());
5652  if (I == MemOp2RegOpTable.end())
5653  return false;
5654  unsigned Opc = I->second.first;
5655  unsigned Index = I->second.second & TB_INDEX_MASK;
5656  bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
5657  bool FoldedStore = I->second.second & TB_FOLDED_STORE;
5658  const MCInstrDesc &MCID = get(Opc);
5659  MachineFunction &MF = DAG.getMachineFunction();
5660  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
5661  unsigned NumDefs = MCID.NumDefs;
5662  std::vector<SDValue> AddrOps;
5663  std::vector<SDValue> BeforeOps;
5664  std::vector<SDValue> AfterOps;
5665  SDLoc dl(N);
5666  unsigned NumOps = N->getNumOperands();
5667  for (unsigned i = 0; i != NumOps-1; ++i) {
5668  SDValue Op = N->getOperand(i);
5669  if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands)
5670  AddrOps.push_back(Op);
5671  else if (i < Index-NumDefs)
5672  BeforeOps.push_back(Op);
5673  else if (i > Index-NumDefs)
5674  AfterOps.push_back(Op);
5675  }
5676  SDValue Chain = N->getOperand(NumOps-1);
5677  AddrOps.push_back(Chain);
5678 
5679  // Emit the load instruction.
5680  SDNode *Load = nullptr;
5681  if (FoldedLoad) {
5682  EVT VT = *RC->vt_begin();
5683  std::pair<MachineInstr::mmo_iterator,
5684  MachineInstr::mmo_iterator> MMOs =
5685  MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
5686  cast<MachineSDNode>(N)->memoperands_end());
5687  if (!(*MMOs.first) &&
5688  RC == &X86::VR128RegClass &&
5689  !Subtarget.isUnalignedMemAccessFast())
5690  // Do not introduce a slow unaligned load.
5691  return false;
5692  unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
5693  bool isAligned = (*MMOs.first) &&
5694  (*MMOs.first)->getAlignment() >= Alignment;
5695  Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl,
5696  VT, MVT::Other, AddrOps);
5697  NewNodes.push_back(Load);
5698 
5699  // Preserve memory reference information.
5700  cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
5701  }
5702 
5703  // Emit the data processing instruction.
5704  std::vector<EVT> VTs;
5705  const TargetRegisterClass *DstRC = nullptr;
5706  if (MCID.getNumDefs() > 0) {
5707  DstRC = getRegClass(MCID, 0, &RI, MF);
5708  VTs.push_back(*DstRC->vt_begin());
5709  }
5710  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
5711  EVT VT = N->getValueType(i);
5712  if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs())
5713  VTs.push_back(VT);
5714  }
5715  if (Load)
5716  BeforeOps.push_back(SDValue(Load, 0));
5717  BeforeOps.insert(BeforeOps.end(), AfterOps.begin(), AfterOps.end());
5718  SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
5719  NewNodes.push_back(NewNode);
5720 
5721  // Emit the store instruction.
5722  if (FoldedStore) {
5723  AddrOps.pop_back();
5724  AddrOps.push_back(SDValue(NewNode, 0));
5725  AddrOps.push_back(Chain);
5726  std::pair<MachineInstr::mmo_iterator,
5727  MachineInstr::mmo_iterator> MMOs =
5728  MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
5729  cast<MachineSDNode>(N)->memoperands_end());
5730  if (!(*MMOs.first) &&
5731  RC == &X86::VR128RegClass &&
5732  !Subtarget.isUnalignedMemAccessFast())
5733  // Do not introduce a slow unaligned store.
5734  return false;
5735  unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
5736  bool isAligned = (*MMOs.first) &&
5737  (*MMOs.first)->getAlignment() >= Alignment;
5738  SDNode *Store =
5739  DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget),
5740  dl, MVT::Other, AddrOps);
5741  NewNodes.push_back(Store);
5742 
5743  // Preserve memory reference information.
5744  cast<MachineSDNode>(Store)->setMemRefs(MMOs.first, MMOs.second);
5745  }
5746 
5747  return true;
5748 }
5749 
5751  bool UnfoldLoad, bool UnfoldStore,
5752  unsigned *LoadRegIndex) const {
5754  MemOp2RegOpTable.find(Opc);
5755  if (I == MemOp2RegOpTable.end())
5756  return 0;
5757  bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
5758  bool FoldedStore = I->second.second & TB_FOLDED_STORE;
5759  if (UnfoldLoad && !FoldedLoad)
5760  return 0;
5761  if (UnfoldStore && !FoldedStore)
5762  return 0;
5763  if (LoadRegIndex)
5764  *LoadRegIndex = I->second.second & TB_INDEX_MASK;
5765  return I->second.first;
5766 }
5767 
5768 bool
5770  int64_t &Offset1, int64_t &Offset2) const {
5771  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
5772  return false;
5773  unsigned Opc1 = Load1->getMachineOpcode();
5774  unsigned Opc2 = Load2->getMachineOpcode();
5775  switch (Opc1) {
5776  default: return false;
5777  case X86::MOV8rm:
5778  case X86::MOV16rm:
5779  case X86::MOV32rm:
5780  case X86::MOV64rm:
5781  case X86::LD_Fp32m:
5782  case X86::LD_Fp64m:
5783  case X86::LD_Fp80m:
5784  case X86::MOVSSrm:
5785  case X86::MOVSDrm:
5786  case X86::MMX_MOVD64rm:
5787  case X86::MMX_MOVQ64rm:
5788  case X86::FsMOVAPSrm:
5789  case X86::FsMOVAPDrm:
5790  case X86::MOVAPSrm:
5791  case X86::MOVUPSrm:
5792  case X86::MOVAPDrm:
5793  case X86::MOVDQArm:
5794  case X86::MOVDQUrm:
5795  // AVX load instructions
5796  case X86::VMOVSSrm:
5797  case X86::VMOVSDrm:
5798  case X86::FsVMOVAPSrm:
5799  case X86::FsVMOVAPDrm:
5800  case X86::VMOVAPSrm:
5801  case X86::VMOVUPSrm:
5802  case X86::VMOVAPDrm:
5803  case X86::VMOVDQArm:
5804  case X86::VMOVDQUrm:
5805  case X86::VMOVAPSYrm:
5806  case X86::VMOVUPSYrm:
5807  case X86::VMOVAPDYrm:
5808  case X86::VMOVDQAYrm:
5809  case X86::VMOVDQUYrm:
5810  break;
5811  }
5812  switch (Opc2) {
5813  default: return false;
5814  case X86::MOV8rm:
5815  case X86::MOV16rm:
5816  case X86::MOV32rm:
5817  case X86::MOV64rm:
5818  case X86::LD_Fp32m:
5819  case X86::LD_Fp64m:
5820  case X86::LD_Fp80m:
5821  case X86::MOVSSrm:
5822  case X86::MOVSDrm:
5823  case X86::MMX_MOVD64rm:
5824  case X86::MMX_MOVQ64rm:
5825  case X86::FsMOVAPSrm:
5826  case X86::FsMOVAPDrm:
5827  case X86::MOVAPSrm:
5828  case X86::MOVUPSrm:
5829  case X86::MOVAPDrm:
5830  case X86::MOVDQArm:
5831  case X86::MOVDQUrm:
5832  // AVX load instructions
5833  case X86::VMOVSSrm:
5834  case X86::VMOVSDrm:
5835  case X86::FsVMOVAPSrm:
5836  case X86::FsVMOVAPDrm:
5837  case X86::VMOVAPSrm:
5838  case X86::VMOVUPSrm:
5839  case X86::VMOVAPDrm:
5840  case X86::VMOVDQArm:
5841  case X86::VMOVDQUrm:
5842  case X86::VMOVAPSYrm:
5843  case X86::VMOVUPSYrm:
5844  case X86::VMOVAPDYrm:
5845  case X86::VMOVDQAYrm:
5846  case X86::VMOVDQUYrm:
5847  break;
5848  }
5849 
5850  // Check if chain operands and base addresses match.
5851  if (Load1->getOperand(0) != Load2->getOperand(0) ||
5852  Load1->getOperand(5) != Load2->getOperand(5))
5853  return false;
5854  // Segment operands should match as well.
5855  if (Load1->getOperand(4) != Load2->getOperand(4))
5856  return false;
5857  // Scale should be 1, Index should be Reg0.
5858  if (Load1->getOperand(1) == Load2->getOperand(1) &&
5859  Load1->getOperand(2) == Load2->getOperand(2)) {
5860  if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1)
5861  return false;
5862 
5863  // Now let's examine the displacements.
5864  if (isa<ConstantSDNode>(Load1->getOperand(3)) &&
5865  isa<ConstantSDNode>(Load2->getOperand(3))) {
5866  Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue();
5867  Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue();
5868  return true;
5869  }
5870  }
5871  return false;
5872 }
5873 
5875  int64_t Offset1, int64_t Offset2,
5876  unsigned NumLoads) const {
5877  assert(Offset2 > Offset1);
5878  if ((Offset2 - Offset1) / 8 > 64)
5879  return false;
5880 
5881  unsigned Opc1 = Load1->getMachineOpcode();
5882  unsigned Opc2 = Load2->getMachineOpcode();
5883  if (Opc1 != Opc2)
5884  return false; // FIXME: overly conservative?
5885 
5886  switch (Opc1) {
5887  default: break;
5888  case X86::LD_Fp32m:
5889  case X86::LD_Fp64m:
5890  case X86::LD_Fp80m:
5891  case X86::MMX_MOVD64rm:
5892  case X86::MMX_MOVQ64rm:
5893  return false;
5894  }
5895 
5896  EVT VT = Load1->getValueType(0);
5897  switch (VT.getSimpleVT().SimpleTy) {
5898  default:
5899  // XMM registers. In 64-bit mode we can be a bit more aggressive since we
5900  // have 16 of them to play with.
5901  if (Subtarget.is64Bit()) {
5902  if (NumLoads >= 3)
5903  return false;
5904  } else if (NumLoads) {
5905  return false;
5906  }
5907  break;
5908  case MVT::i8:
5909  case MVT::i16:
5910  case MVT::i32:
5911  case MVT::i64:
5912  case MVT::f32:
5913  case MVT::f64:
5914  if (NumLoads)
5915  return false;
5916  break;
5917  }
5918 
5919  return true;
5920 }
5921 
5923  MachineInstr *Second) const {
5924  // Check if this processor supports macro-fusion. Since this is a minor
5925  // heuristic, we haven't specifically reserved a feature. hasAVX is a decent
5926  // proxy for SandyBridge+.
5927  if (!Subtarget.hasAVX())
5928  return false;
5929 
5930  enum {
5931  FuseTest,
5932  FuseCmp,
5933  FuseInc
5934  } FuseKind;
5935 
5936  switch(Second->getOpcode()) {
5937  default:
5938  return false;
5939  case X86::JE_1:
5940  case X86::JNE_1:
5941  case X86::JL_1:
5942  case X86::JLE_1:
5943  case X86::JG_1:
5944  case X86::JGE_1:
5945  FuseKind = FuseInc;
5946  break;
5947  case X86::JB_1:
5948  case X86::JBE_1:
5949  case X86::JA_1:
5950  case X86::JAE_1:
5951  FuseKind = FuseCmp;
5952  break;
5953  case X86::JS_1:
5954  case X86::JNS_1:
5955  case X86::JP_1:
5956  case X86::JNP_1:
5957  case X86::JO_1:
5958  case X86::JNO_1:
5959  FuseKind = FuseTest;
5960  break;
5961  }
5962  switch (First->getOpcode()) {
5963  default:
5964  return false;
5965  case X86::TEST8rr:
5966  case X86::TEST16rr:
5967  case X86::TEST32rr:
5968  case X86::TEST64rr:
5969  case X86::TEST8ri:
5970  case X86::TEST16ri:
5971  case X86::TEST32ri:
5972  case X86::TEST32i32:
5973  case X86::TEST64i32:
5974  case X86::TEST64ri32:
5975  case X86::TEST8rm:
5976  case X86::TEST16rm:
5977  case X86::TEST32rm:
5978  case X86::TEST64rm:
5979  case X86::TEST8ri_NOREX:
5980  case X86::AND16i16:
5981  case X86::AND16ri:
5982  case X86::AND16ri8:
5983  case X86::AND16rm:
5984  case X86::AND16rr:
5985  case X86::AND32i32:
5986  case X86::AND32ri:
5987  case X86::AND32ri8:
5988  case X86::AND32rm:
5989  case X86::AND32rr:
5990  case X86::AND64i32:
5991  case X86::AND64ri32:
5992  case X86::AND64ri8:
5993  case X86::AND64rm:
5994  case X86::AND64rr:
5995  case X86::AND8i8:
5996  case X86::AND8ri:
5997  case X86::AND8rm:
5998  case X86::AND8rr:
5999  return true;
6000  case X86::CMP16i16:
6001  case X86::CMP16ri:
6002  case X86::CMP16ri8:
6003  case X86::CMP16rm:
6004  case X86::CMP16rr:
6005  case X86::CMP32i32:
6006  case X86::CMP32ri:
6007  case X86::CMP32ri8:
6008  case X86::CMP32rm:
6009  case X86::CMP32rr:
6010  case X86::CMP64i32:
6011  case X86::CMP64ri32:
6012  case X86::CMP64ri8:
6013  case X86::CMP64rm:
6014  case X86::CMP64rr:
6015  case X86::CMP8i8:
6016  case X86::CMP8ri:
6017  case X86::CMP8rm:
6018  case X86::CMP8rr:
6019  case X86::ADD16i16:
6020  case X86::ADD16ri:
6021  case X86::ADD16ri8:
6022  case X86::ADD16ri8_DB:
6023  case X86::ADD16ri_DB:
6024  case X86::ADD16rm:
6025  case X86::ADD16rr:
6026  case X86::ADD16rr_DB:
6027  case X86::ADD32i32:
6028  case X86::ADD32ri:
6029  case X86::ADD32ri8:
6030  case X86::ADD32ri8_DB:
6031  case X86::ADD32ri_DB:
6032  case X86::ADD32rm:
6033  case X86::ADD32rr:
6034  case X86::ADD32rr_DB:
6035  case X86::ADD64i32:
6036  case X86::ADD64ri32:
6037  case X86::ADD64ri32_DB:
6038  case X86::ADD64ri8:
6039  case X86::ADD64ri8_DB:
6040  case X86::ADD64rm:
6041  case X86::ADD64rr:
6042  case X86::ADD64rr_DB:
6043  case X86::ADD8i8:
6044  case X86::ADD8mi:
6045  case X86::ADD8mr:
6046  case X86::ADD8ri:
6047  case X86::ADD8rm:
6048  case X86::ADD8rr:
6049  case X86::SUB16i16:
6050  case X86::SUB16ri:
6051  case X86::SUB16ri8:
6052  case X86::SUB16rm:
6053  case X86::SUB16rr:
6054  case X86::SUB32i32:
6055  case X86::SUB32ri:
6056  case X86::SUB32ri8:
6057  case X86::SUB32rm:
6058  case X86::SUB32rr:
6059  case X86::SUB64i32:
6060  case X86::SUB64ri32:
6061  case X86::SUB64ri8:
6062  case X86::SUB64rm:
6063  case X86::SUB64rr:
6064  case X86::SUB8i8:
6065  case X86::SUB8ri:
6066  case X86::SUB8rm:
6067  case X86::SUB8rr:
6068  return FuseKind == FuseCmp || FuseKind == FuseInc;
6069  case X86::INC16r:
6070  case X86::INC32r:
6071  case X86::INC64r:
6072  case X86::INC8r:
6073  case X86::DEC16r:
6074  case X86::DEC32r:
6075  case X86::DEC64r:
6076  case X86::DEC8r:
6077  return FuseKind == FuseInc;
6078  }
6079 }
6080 
6081 bool X86InstrInfo::
6083  assert(Cond.size() == 1 && "Invalid X86 branch condition!");
6084  X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
6085  if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E)
6086  return true;
6087  Cond[0].setImm(GetOppositeBranchCondition(CC));
6088  return false;
6089 }
6090 
6091 bool X86InstrInfo::
6093  // FIXME: Return false for x87 stack register classes for now. We can't
6094  // allow any loads of these registers before FpGet_ST0_80.
6095  return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass ||
6096  RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
6097 }
6098 
6099 /// Return a virtual register initialized with the
6100 /// the global base register value. Output instructions required to
6101 /// initialize the register in the function entry block, if necessary.
6102 ///
6103 /// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
6104 ///
6106  assert(!Subtarget.is64Bit() &&
6107  "X86-64 PIC uses RIP relative addressing");
6108 
6110  unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
6111  if (GlobalBaseReg != 0)
6112  return GlobalBaseReg;
6113 
6114  // Create the register. The code to initialize it is inserted
6115  // later, by the CGBR pass (below).
6116  MachineRegisterInfo &RegInfo = MF->getRegInfo();
6117  GlobalBaseReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
6118  X86FI->setGlobalBaseReg(GlobalBaseReg);
6119  return GlobalBaseReg;
6120 }
6121 
6122 // These are the replaceable SSE instructions. Some of these have Int variants
6123 // that we don't include here. We don't want to replace instructions selected
6124 // by intrinsics.
6125 static const uint16_t ReplaceableInstrs[][3] = {
6126  //PackedSingle PackedDouble PackedInt
6127  { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
6128  { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
6129  { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
6130  { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
6131  { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
6132  { X86::MOVLPSmr, X86::MOVLPDmr, X86::MOVPQI2QImr },
6133  { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
6134  { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
6135  { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
6136  { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm },
6137  { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
6138  { X86::ORPSrm, X86::ORPDrm, X86::PORrm },
6139  { X86::ORPSrr, X86::ORPDrr, X86::PORrr },
6140  { X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
6141  { X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
6142  // AVX 128-bit support
6143  { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
6144  { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
6145  { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr },
6146  { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
6147  { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
6148  { X86::VMOVLPSmr, X86::VMOVLPDmr, X86::VMOVPQI2QImr },
6149  { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
6150  { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
6151  { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
6152  { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
6153  { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
6154  { X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
6155  { X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
6156  { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
6157  { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
6158  // AVX 256-bit support
6159  { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
6160  { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
6161  { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr },
6162  { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr },
6163  { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm },
6164  { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }
6165 };
6166 
6167 static const uint16_t ReplaceableInstrsAVX2[][3] = {
6168  //PackedSingle PackedDouble PackedInt
6169  { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm },
6170  { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr },
6171  { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDYrm },
6172  { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDYrr },
6173  { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm },
6174  { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr },
6175  { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm },
6176  { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr },
6177  { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
6178  { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
6179  { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
6180  { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
6181  { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm },
6182  { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr },
6183  { X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm},
6184  { X86::VBROADCASTSSrr, X86::VBROADCASTSSrr, X86::VPBROADCASTDrr},
6185  { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrr, X86::VPBROADCASTDYrr},
6186  { X86::VBROADCASTSSYrm, X86::VBROADCASTSSYrm, X86::VPBROADCASTDYrm},
6187  { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr},
6188  { X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm}
6189 };
6190 
6191 // FIXME: Some shuffle and unpack instructions have equivalents in different
6192 // domains, but they require a bit more work than just switching opcodes.
6193 
6194 static const uint16_t *lookup(unsigned opcode, unsigned domain) {
6195  for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
6196  if (ReplaceableInstrs[i][domain-1] == opcode)
6197  return ReplaceableInstrs[i];
6198  return nullptr;
6199 }
6200 
6201 static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
6202  for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
6203  if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
6204  return ReplaceableInstrsAVX2[i];
6205  return nullptr;
6206 }
6207 
6208 std::pair<uint16_t, uint16_t>
6210  uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
6211  bool hasAVX2 = Subtarget.hasAVX2();
6212  uint16_t validDomains = 0;
6213  if (domain && lookup(MI->getOpcode(), domain))
6214  validDomains = 0xe;
6215  else if (domain && lookupAVX2(MI->getOpcode(), domain))
6216  validDomains = hasAVX2 ? 0xe : 0x6;
6217  return std::make_pair(domain, validDomains);
6218 }
6219 
6220 void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
6221  assert(Domain>0 && Domain<4 && "Invalid execution domain");
6222  uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
6223  assert(dom && "Not an SSE instruction");
6224  const uint16_t *table = lookup(MI->getOpcode(), dom);
6225  if (!table) { // try the other table
6226  assert((Subtarget.hasAVX2() || Domain < 3) &&
6227  "256-bit vector operations only available in AVX2");
6228  table = lookupAVX2(MI->getOpcode(), dom);
6229  }
6230  assert(table && "Cannot change domain");
6231  MI->setDesc(get(table[Domain-1]));
6232 }
6233 
6234 /// Return the noop instruction to use for a noop.
6236  NopInst.setOpcode(X86::NOOP);
6237 }
6238 
6239 // This code must remain in sync with getJumpInstrTableEntryBound in this class!
6240 // In particular, getJumpInstrTableEntryBound must always return an upper bound
6241 // on the encoding lengths of the instructions generated by
6242 // getUnconditionalBranch and getTrap.
6244  MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
6245  Branch.setOpcode(X86::JMP_1);
6246  Branch.addOperand(MCOperand::createExpr(BranchTarget));
6247 }
6248 
6249 // This code must remain in sync with getJumpInstrTableEntryBound in this class!
6250 // In particular, getJumpInstrTableEntryBound must always return an upper bound
6251 // on the encoding lengths of the instructions generated by
6252 // getUnconditionalBranch and getTrap.
6254  MI.setOpcode(X86::TRAP);
6255 }
6256 
6257 // See getTrap and getUnconditionalBranch for conditions on the value returned
6258 // by this function.
6260  // 5 bytes suffice: JMP_4 Symbol@PLT is uses 1 byte (E9) for the JMP_4 and 4
6261  // bytes for the symbol offset. And TRAP is ud2, which is two bytes (0F 0B).
6262  return 5;
6263 }
6264 
6265 bool X86InstrInfo::isHighLatencyDef(int opc) const {
6266  switch (opc) {
6267  default: return false;
6268  case X86::DIVSDrm:
6269  case X86::DIVSDrm_Int:
6270  case X86::DIVSDrr:
6271  case X86::DIVSDrr_Int:
6272  case X86::DIVSSrm:
6273  case X86::DIVSSrm_Int:
6274  case X86::DIVSSrr:
6275  case X86::DIVSSrr_Int:
6276  case X86::SQRTPDm:
6277  case X86::SQRTPDr:
6278  case X86::SQRTPSm:
6279  case X86::SQRTPSr:
6280  case X86::SQRTSDm:
6281  case X86::SQRTSDm_Int:
6282  case X86::SQRTSDr:
6283  case X86::SQRTSDr_Int:
6284  case X86::SQRTSSm:
6285  case X86::SQRTSSm_Int:
6286  case X86::SQRTSSr:
6287  case X86::SQRTSSr_Int:
6288  // AVX instructions with high latency
6289  case X86::VDIVSDrm:
6290  case X86::VDIVSDrm_Int:
6291  case X86::VDIVSDrr:
6292  case X86::VDIVSDrr_Int:
6293  case X86::VDIVSSrm:
6294  case X86::VDIVSSrm_Int:
6295  case X86::VDIVSSrr:
6296  case X86::VDIVSSrr_Int:
6297  case X86::VSQRTPDm:
6298  case X86::VSQRTPDr:
6299  case X86::VSQRTPSm:
6300  case X86::VSQRTPSr:
6301  case X86::VSQRTSDm:
6302  case X86::VSQRTSDm_Int:
6303  case X86::VSQRTSDr:
6304  case X86::VSQRTSSm:
6305  case X86::VSQRTSSm_Int:
6306  case X86::VSQRTSSr:
6307  case X86::VSQRTPDZm:
6308  case X86::VSQRTPDZr:
6309  case X86::VSQRTPSZm:
6310  case X86::VSQRTPSZr:
6311  case X86::VSQRTSDZm:
6312  case X86::VSQRTSDZm_Int:
6313  case X86::VSQRTSDZr:
6314  case X86::VSQRTSSZm_Int:
6315  case X86::VSQRTSSZr:
6316  case X86::VSQRTSSZm:
6317  case X86::VDIVSDZrm:
6318  case X86::VDIVSDZrr:
6319  case X86::VDIVSSZrm:
6320  case X86::VDIVSSZrr:
6321 
6322  case X86::VGATHERQPSZrm:
6323  case X86::VGATHERQPDZrm:
6324  case X86::VGATHERDPDZrm:
6325  case X86::VGATHERDPSZrm:
6326  case X86::VPGATHERQDZrm:
6327  case X86::VPGATHERQQZrm:
6328  case X86::VPGATHERDDZrm:
6329  case X86::VPGATHERDQZrm:
6330  case X86::VSCATTERQPDZmr:
6331  case X86::VSCATTERQPSZmr:
6332  case X86::VSCATTERDPDZmr:
6333  case X86::VSCATTERDPSZmr:
6334  case X86::VPSCATTERQDZmr:
6335  case X86::VPSCATTERQQZmr:
6336  case X86::VPSCATTERDDZmr:
6337  case X86::VPSCATTERDQZmr:
6338  return true;
6339  }
6340 }
6341 
6342 bool X86InstrInfo::
6344  const MachineRegisterInfo *MRI,
6345  const MachineInstr *DefMI, unsigned DefIdx,
6346  const MachineInstr *UseMI, unsigned UseIdx) const {
6347  return isHighLatencyDef(DefMI->getOpcode());
6348 }
6349 
6351  const MachineBasicBlock *MBB) {
6352  assert(Inst.getNumOperands() == 3 && "Reassociation needs binary operators");
6353  const MachineOperand &Op1 = Inst.getOperand(1);
6354  const MachineOperand &Op2 = Inst.getOperand(2);
6355  const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
6356 
6357  // We need virtual register definitions.
6358  MachineInstr *MI1 = nullptr;
6359  MachineInstr *MI2 = nullptr;
6360  if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg()))
6361  MI1 = MRI.getUniqueVRegDef(Op1.getReg());
6363  MI2 = MRI.getUniqueVRegDef(Op2.getReg());
6364 
6365  // And they need to be in the trace (otherwise, they won't have a depth).
6366  if (MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB)
6367  return true;
6368 
6369  return false;
6370 }
6371 
6372 static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) {
6373  const MachineBasicBlock *MBB = Inst.getParent();
6374  const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
6375  MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg());
6376  MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
6377  unsigned AssocOpcode = Inst.getOpcode();
6378 
6379  // If only one operand has the same opcode and it's the second source operand,
6380  // the operands must be commuted.
6381  Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode;
6382  if (Commuted)
6383  std::swap(MI1, MI2);
6384 
6385  // 1. The previous instruction must be the same type as Inst.
6386  // 2. The previous instruction must have virtual register definitions for its
6387  // operands in the same basic block as Inst.
6388  // 3. The previous instruction's result must only be used by Inst.
6389  if (MI1->getOpcode() == AssocOpcode &&
6390  hasVirtualRegDefsInBasicBlock(*MI1, MBB) &&
6391  MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
6392  return true;
6393 
6394  return false;
6395 }
6396 
6397 // TODO: There are many more machine instruction opcodes to match:
6398 // 1. Other data types (integer, vectors)
6399 // 2. Other math / logic operations (and, or)
6400 static bool isAssociativeAndCommutative(unsigned Opcode) {
6401  switch (Opcode) {
6402  case X86::ADDSDrr:
6403  case X86::ADDSSrr:
6404  case X86::VADDSDrr:
6405  case X86::VADDSSrr:
6406  case X86::MULSDrr:
6407  case X86::MULSSrr:
6408  case X86::VMULSDrr:
6409  case X86::VMULSSrr:
6410  return true;
6411  default:
6412  return false;
6413  }
6414 }
6415 
6416 /// Return true if the input instruction is part of a chain of dependent ops
6417 /// that are suitable for reassociation, otherwise return false.
6418 /// If the instruction's operands must be commuted to have a previous
6419 /// instruction of the same type define the first source operand, Commuted will
6420 /// be set to true.
6421 static bool isReassocCandidate(const MachineInstr &Inst, bool &Commuted) {
6422  // 1. The operation must be associative and commutative.
6423  // 2. The instruction must have virtual register definitions for its
6424  // operands in the same basic block.
6425  // 3. The instruction must have a reassociable sibling.
6426  if (isAssociativeAndCommutative(Inst.getOpcode()) &&
6427  hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) &&
6428  hasReassocSibling(Inst, Commuted))
6429  return true;
6430 
6431  return false;
6432 }
6433 
6434 // FIXME: This has the potential to be expensive (compile time) while not
6435 // improving the code at all. Some ways to limit the overhead:
6436 // 1. Track successful transforms; bail out if hit rate gets too low.
6437 // 2. Only enable at -O3 or some other non-default optimization level.
6438 // 3. Pre-screen pattern candidates here: if an operand of the previous
6439 // instruction is known to not increase the critical path, then don't match
6440 // that pattern.
6443  if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
6444  return false;
6445 
6446  // TODO: There is nothing x86-specific here except the instruction type.
6447  // This logic could be hoisted into the machine combiner pass itself.
6448 
6449  // Look for this reassociation pattern:
6450  // B = A op X (Prev)
6451  // C = B op Y (Root)
6452 
6453  bool Commute;
6454  if (isReassocCandidate(Root, Commute)) {
6455  // We found a sequence of instructions that may be suitable for a
6456  // reassociation of operands to increase ILP. Specify each commutation
6457  // possibility for the Prev instruction in the sequence and let the
6458  // machine combiner decide if changing the operands is worthwhile.
6459  if (Commute) {
6462  } else {
6465  }
6466  return true;
6467  }
6468 
6469  return false;
6470 }
6471 
6472 /// Attempt the following reassociation to reduce critical path length:
6473 /// B = A op X (Prev)
6474 /// C = B op Y (Root)
6475 /// ===>
6476 /// B = X op Y
6477 /// C = A op B
6478 static void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
6482  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
6483  MachineFunction *MF = Root.getParent()->getParent();
6484  MachineRegisterInfo &MRI = MF->getRegInfo();
6485  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
6486  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
6487  const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);
6488 
6489  // This array encodes the operand index for each parameter because the
6490  // operands may be commuted. Each row corresponds to a pattern value,
6491  // and each column specifies the index of A, B, X, Y.
6492  unsigned OpIdx[4][4] = {
6493  { 1, 1, 2, 2 },
6494  { 1, 2, 2, 1 },
6495  { 2, 1, 1, 2 },
6496  { 2, 2, 1, 1 }
6497  };
6498 
6499  MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]);
6500  MachineOperand &OpB = Root.getOperand(OpIdx[Pattern][1]);
6501  MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]);
6502  MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]);
6503  MachineOperand &OpC = Root.getOperand(0);
6504 
6505  unsigned RegA = OpA.getReg();
6506  unsigned RegB = OpB.getReg();
6507  unsigned RegX = OpX.getReg();
6508  unsigned RegY = OpY.getReg();
6509  unsigned RegC = OpC.getReg();
6510 
6512  MRI.constrainRegClass(RegA, RC);
6514  MRI.constrainRegClass(RegB, RC);
6516  MRI.constrainRegClass(RegX, RC);
6518  MRI.constrainRegClass(RegY, RC);
6520  MRI.constrainRegClass(RegC, RC);
6521 
6522  // Create a new virtual register for the result of (X op Y) instead of
6523  // recycling RegB because the MachineCombiner's computation of the critical
6524  // path requires a new register definition rather than an existing one.
6525  unsigned NewVR = MRI.createVirtualRegister(RC);
6526  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6527 
6528  unsigned Opcode = Root.getOpcode();
6529  bool KillA = OpA.isKill();
6530  bool KillX = OpX.isKill();
6531  bool KillY = OpY.isKill();
6532 
6533  // Create new instructions for insertion.
6534  MachineInstrBuilder MIB1 =
6535  BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
6536  .addReg(RegX, getKillRegState(KillX))
6537  .addReg(RegY, getKillRegState(KillY));
6538  InsInstrs.push_back(MIB1);
6539 
6540  MachineInstrBuilder MIB2 =
6541  BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
6542  .addReg(RegA, getKillRegState(KillA))
6543  .addReg(NewVR, getKillRegState(true));
6544  InsInstrs.push_back(MIB2);
6545 
6546  // Record old instructions for deletion.
6547  DelInstrs.push_back(&Prev);
6548  DelInstrs.push_back(&Root);
6549 }
6550 
6552  MachineInstr &Root,
6556  DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
6557  MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo();
6558 
6559  // Select the previous instruction in the sequence based on the input pattern.
6560  MachineInstr *Prev = nullptr;
6561  switch (Pattern) {
6564  Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
6565  break;
6568  Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
6569  }
6570  assert(Prev && "Unknown pattern for machine combiner");
6571 
6572  reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
6573  return;
6574 }
6575 
6576 namespace {
6577  /// Create Global Base Reg pass. This initializes the PIC
6578  /// global base register for x86-32.
6579  struct CGBR : public MachineFunctionPass {
6580  static char ID;
6581  CGBR() : MachineFunctionPass(ID) {}
6582 
6583  bool runOnMachineFunction(MachineFunction &MF) override {
6584  const X86TargetMachine *TM =
6585  static_cast<const X86TargetMachine *>(&MF.getTarget());
6586  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
6587 
6588  // Don't do anything if this is 64-bit as 64-bit PIC
6589  // uses RIP relative addressing.
6590  if (STI.is64Bit())
6591  return false;
6592 
6593  // Only emit a global base reg in PIC mode.
6594  if (TM->getRelocationModel() != Reloc::PIC_)
6595  return false;
6596 
6598  unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
6599 
6600  // If we didn't need a GlobalBaseReg, don't insert code.
6601  if (GlobalBaseReg == 0)
6602  return false;
6603 
6604  // Insert the set of GlobalBaseReg into the first MBB of the function
6605  MachineBasicBlock &FirstMBB = MF.front();
6606  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
6607  DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
6608  MachineRegisterInfo &RegInfo = MF.getRegInfo();
6609  const X86InstrInfo *TII = STI.getInstrInfo();
6610 
6611  unsigned PC;
6612  if (STI.isPICStyleGOT())
6613  PC = RegInfo.createVirtualRegister(&X86::GR32RegClass);
6614  else
6615  PC = GlobalBaseReg;
6616 
6617  // Operand of MovePCtoStack is completely ignored by asm printer. It's
6618  // only used in JIT code emission as displacement to pc.
6619  BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
6620 
6621  // If we're using vanilla 'GOT' PIC style, we should use relative addressing
6622  // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
6623  if (STI.isPICStyleGOT()) {
6624  // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
6625  BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
6626  .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
6628  }
6629 
6630  return true;
6631  }
6632 
6633  const char *getPassName() const override {
6634  return "X86 PIC Global Base Reg Initialization";
6635  }
6636 
6637  void getAnalysisUsage(AnalysisUsage &AU) const override {
6638  AU.setPreservesCFG();
6640  }
6641  };
6642 }
6643 
6644 char CGBR::ID = 0;
6645 FunctionPass*
6646 llvm::createX86GlobalBaseRegPass() { return new CGBR(); }
6647 
6648 namespace {
6649  struct LDTLSCleanup : public MachineFunctionPass {
6650  static char ID;
6651  LDTLSCleanup() : MachineFunctionPass(ID) {}
6652 
6653  bool runOnMachineFunction(MachineFunction &MF) override {
6655  if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
6656  // No point folding accesses if there isn't at least two.
6657  return false;
6658  }
6659 
6660  MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
6661  return VisitNode(DT->getRootNode(), 0);
6662  }
6663 
6664  // Visit the dominator subtree rooted at Node in pre-order.
6665  // If TLSBaseAddrReg is non-null, then use that to replace any
6666  // TLS_base_addr instructions. Otherwise, create the register
6667  // when the first such instruction is seen, and then use it
6668  // as we encounter more instructions.
6669  bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
6670  MachineBasicBlock *BB = Node->getBlock();
6671  bool Changed = false;
6672 
6673  // Traverse the current block.
6674  for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
6675  ++I) {
6676  switch (I->getOpcode()) {
6677  case X86::TLS_base_addr32:
6678  case X86::TLS_base_addr64:
6679  if (TLSBaseAddrReg)
6680  I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
6681  else
6682  I = SetRegister(I, &TLSBaseAddrReg);
6683  Changed = true;
6684  break;
6685  default:
6686  break;
6687  }
6688  }
6689 
6690  // Visit the children of this block in the dominator tree.
6691  for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
6692  I != E; ++I) {
6693  Changed |= VisitNode(*I, TLSBaseAddrReg);
6694  }
6695 
6696  return Changed;
6697  }
6698 
6699  // Replace the TLS_base_addr instruction I with a copy from
6700  // TLSBaseAddrReg, returning the new instruction.
6701  MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
6702  unsigned TLSBaseAddrReg) {
6703  MachineFunction *MF = I->getParent()->getParent();
6704  const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
6705  const bool is64Bit = STI.is64Bit();
6706  const X86InstrInfo *TII = STI.getInstrInfo();
6707 
6708  // Insert a Copy from TLSBaseAddrReg to RAX/EAX.
6709  MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
6710  TII->get(TargetOpcode::COPY),
6711  is64Bit ? X86::RAX : X86::EAX)
6712  .addReg(TLSBaseAddrReg);
6713 
6714  // Erase the TLS_base_addr instruction.
6715  I->eraseFromParent();
6716 
6717  return Copy;
6718  }
6719 
6720  // Create a virtal register in *TLSBaseAddrReg, and populate it by
6721  // inserting a copy instruction after I. Returns the new instruction.
6722  MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
6723  MachineFunction *MF = I->getParent()->getParent();
6724  const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
6725  const bool is64Bit = STI.is64Bit();
6726  const X86InstrInfo *TII = STI.getInstrInfo();
6727 
6728  // Create a virtual register for the TLS base address.
6729  MachineRegisterInfo &RegInfo = MF->getRegInfo();
6730  *TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit
6731  ? &X86::GR64RegClass
6732  : &X86::GR32RegClass);
6733 
6734  // Insert a copy from RAX/EAX to TLSBaseAddrReg.
6735  MachineInstr *Next = I->getNextNode();
6736  MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
6737  TII->get(TargetOpcode::COPY),
6738  *TLSBaseAddrReg)
6739  .addReg(is64Bit ? X86::RAX : X86::EAX);
6740 
6741  return Copy;
6742  }
6743 
6744  const char *getPassName() const override {
6745  return "Local Dynamic TLS Access Clean-up";
6746  }
6747 
6748  void getAnalysisUsage(AnalysisUsage &AU) const override {
6749  AU.setPreservesCFG();
6752  }
6753  };
6754 }
6755 
6756 char LDTLSCleanup::ID = 0;
6757 FunctionPass*
6758 llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
static MachineInstr * MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr *MI)
const X86RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition: X86InstrInfo.h:195
bool isImplicit() const
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
unsigned GetCondBranchFromCond(CondCode CC)
The memory access reads data.
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
static bool hasUndefRegUpdate(unsigned Opcode)
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:229
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImp=false)
CreateMachineInstr - Allocate a new MachineInstr.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, const TargetRegisterInfo &TRI) const override
static MachineInstr * FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr *MI, const TargetInstrInfo &TII)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:223
static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI)
Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:427
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, SmallVectorImpl< MachineOperand > &Addr, const TargetRegisterClass *RC, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl< MachineInstr * > &NewMIs) const
static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, const X86Subtarget &Subtarget)
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to determine if two loads are lo...
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:191
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:262
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:129
void setExecutionDomain(MachineInstr *MI, unsigned Domain) const override
CondCode getCondFromCMovOpc(unsigned Opc)
Return condition code of a CMov opcode.
MachineDomTreeNode * getRootNode() const
static void addOperands(MachineInstrBuilder &MIB, ArrayRef< MachineOperand > MOs)
const X86FrameLowering * getFrameLowering() const override
Definition: X86Subtarget.h:263
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:138
static MachineInstr * FuseInst(MachineFunction &MF, unsigned Opcode, unsigned OpNo, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr *MI, const TargetInstrInfo &TII)
bool isHighLatencyDef(int opc) const override
bool isDead() const
static bool isVirtualRegister(unsigned Reg)
isVirtualRegister - Return true if the specified register number is in the virtual register namespace...
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const override
bool isPredicable(QueryType Type=AllInBundle) const
Return true if this instruction has a predicate operand that controls execution.
Definition: MachineInstr.h:457
bool readsVirtualRegister(unsigned Reg) const
Return true if the MachineInstr reads the specified virtual register.
Definition: MachineInstr.h:844
static cl::opt< bool > ReMatPICStubLoad("remat-pic-stub-load", cl::desc("Re-materialize load from stub in PIC mode"), cl::init(false), cl::Hidden)
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
static X86::CondCode getCondFromBranchOpc(unsigned BrOpc)
static unsigned getLoadRegOpcode(unsigned DestReg, const TargetRegisterClass *RC, bool isStackAligned, const X86Subtarget &STI)
static void reassociateOps(MachineInstr &Root, MachineInstr &Prev, MachineCombinerPattern::MC_PATTERN Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg)
Attempt the following reassociation to reduce critical path length: B = A op X (Prev) C = B op Y (Roo...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:264
unsigned getNumOperands() const
Return the number of values used by this operation.
int getOperandBias(const MCInstrDesc &Desc)
getOperandBias - compute any additional adjustment needed to the offset to the start of the memory op...
Definition: X86BaseInfo.h:630
A debug info location.
Definition: DebugLoc.h:34
const SDValue & getOperand(unsigned Num) const
void setIsDead(bool Val=true)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:87
unsigned getUndefRegClearance(const MachineInstr *MI, unsigned &OpNum, const TargetRegisterInfo *TRI) const override
Inform the ExeDepsFix pass how many idle instructions we would like before certain undef register rea...
int getSPAdjust(const MachineInstr *MI) const override
getSPAdjust - This returns the stack pointer adjustment made by this instruction. ...
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:419
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern::MC_PATTERN > &P) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in <Root...
iterator_range< succ_iterator > successors()
static Constant * getNullValue(Type *Ty)
Definition: Constants.cpp:178
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
COPY - Target-independent register copy.
Definition: TargetOpcodes.h:86
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:169
MachineInstr * optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, unsigned &FoldAsLoadDefReg, MachineInstr *&DefMI) const override
optimizeLoadInstr - Try to remove the load by folding it to a register operand at the use...
unsigned getSize() const
getSize - Return the size of the register in bytes, which is also the size of a stack slot allocated ...
AnalysisUsage & addRequired()
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing function and deletes it...
MachineMemOperand - A description of a memory reference used in the backend.
unsigned getJumpInstrTableEntryBound() const override
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static unsigned getLoadStoreRegOpcode(unsigned Reg, const TargetRegisterClass *RC, bool isStackAligned, const X86Subtarget &STI, bool load)
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
std::vector< DomTreeNodeBase< NodeT > * >::iterator iterator
const HexagonInstrInfo * TII
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:228
NodeTy * getNextNode()
Get the next node, or 0 for the list tail.
Definition: ilist_node.h:80
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isUnpredicatedTerminator(const MachineInstr *MI) const override
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:283
void getTrap(MCInst &MI) const override
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setImplicit(bool Val=true)
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
static bool hasPartialRegUpdate(unsigned Opcode)
Return true for all instructions that only update the first 32 or 64-bits of the destination register...
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:566
const TargetRegisterClass * getRegClass(unsigned Reg) const
getRegClass - Return the register class of the specified virtual register.
std::vector< MachineBasicBlock * >::iterator succ_iterator
Reg
All possible values of the reg field in the ModR/M byte.
SimpleValueType SimpleTy
MachineMemOperand ** mmo_iterator
Definition: MachineInstr.h:53
void replaceKillInstruction(unsigned Reg, MachineInstr *OldMI, MachineInstr *NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one...
unsigned getNumLocalDynamicTLSAccesses() const
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:159
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
std::pair< MachineInstr::mmo_iterator, MachineInstr::mmo_iterator > extractLoadMemRefs(MachineInstr::mmo_iterator Begin, MachineInstr::mmo_iterator End)
extractLoadMemRefs - Allocate an array and populate it with just the load information from the given ...
bool isUndef() const
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:317
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const override
isCoalescableExtInstr - Return true if the instruction is a "coalescable" extension instruction...
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override
isSafeToMoveRegClassDefs - Return true if it's safe to move a machine instruction that defines the sp...
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
bool hasHighOperandLatency(const TargetSchedModel &SchedModel, const MachineRegisterInfo *MRI, const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx) const override
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:271
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
void RemoveOperand(unsigned i)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
const MachineBasicBlock & front() const
bool isKill() const
AddrNumOperands - Total number of operands in a memory reference.
Definition: X86BaseInfo.h:42
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
unsigned isStoreToStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const override
isStoreToStackSlotPostFE - Check for post-frame ptr elimination stack locations as well...
static X86::CondCode getCondFromSETOpc(unsigned Opc)
Return condition code of a SET opcode.
bool canRealignStack(const MachineFunction &MF) const
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to determine (in conjunction w...
virtual MachineInstr * commuteInstruction(MachineInstr *MI, bool NewMI=false) const
If a target has any instructions that are commutable but require converting to different instructions...
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:294
static def_instr_iterator def_instr_end()
bool getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, unsigned &Offset, const TargetRegisterInfo *TRI) const override
Base class for the actual dominator tree node.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:31
uint64_t getAlignment() const
getAlignment - Return the minimum known alignment in bytes of the actual memory reference.
bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Check if there exists an earlier instruction that operates on the same source ...
int64_t getImm() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, bool High)
Returns the sub or super register of a specific X86 register.
unsigned getUndefRegState(bool B)
reverse_iterator rend()
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been <def>ined and not <kill>ed as of just before Before...
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:134
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
reverse_iterator rbegin()
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:150
unsigned getKillRegState(bool B)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:97
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:120
TargetInstrInfo - Interface to description of machine instruction set.
LLVM_CONSTEXPR size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:247
static bool hasVirtualRegDefsInBasicBlock(const MachineInstr &Inst, const MachineBasicBlock *MBB)
unsigned getDeadRegState(bool B)
mmo_iterator memoperands_end() const
Definition: MachineInstr.h:341
unsigned getDefRegState(bool B)
void genAlternativeCodeSequence(MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds a pattern, this function generates the instructions that coul...
IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
Definition: TargetOpcodes.h:52
bundle_iterator< MachineInstr, instr_iterator > iterator
static bool isFrameLoadOpcode(int Opcode)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
unsigned getTargetFlags() const
const MachineInstrBuilder & setMemRefs(MachineInstr::mmo_iterator b, MachineInstr::mmo_iterator e) const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
void breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
static bool isAssociativeAndCommutative(unsigned Opcode)
bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, int &CmpMask, int &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
static bool isDefConvertible(MachineInstr *MI)
Check whether the definition can be converted to remove a comparison against zero.
void getUnconditionalBranch(MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const override
bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr *MI) const override
static bool isTruncatedShiftCountForLEA(unsigned ShAmt)
Check whether the given shift count is appropriate can be represented by a LEA instruction.
MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a relocation of: SYMBOL_LABEL + [...
Definition: X86BaseInfo.h:60
static MachineOperand CreateCPI(unsigned Idx, int Offset, unsigned char TargetFlags=0)
CodeModel::Model getCodeModel() const
Returns the code model.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static const uint16_t ReplaceableInstrs[][3]
FunctionPass * createX86GlobalBaseRegPass()
createX86GlobalBaseRegPass - This pass initializes a global base register for PIC on x86-32...
unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const override
isLoadFromStackSlotPostFE - Check for post-frame ptr elimination stack locations as well...
This is an important base class in LLVM.
Definition: Constant.h:41
static void expandLoadStackGuard(MachineInstrBuilder &MIB, const TargetInstrInfo &TII)
static bool is64Bit(const char *name)
DebugLoc findDebugLoc(instr_iterator MBBI)
findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE instructions...
bool hasAVX2() const
Definition: X86Subtarget.h:330
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
unsigned get512BitSuperRegister(unsigned Reg)
unsigned getSubReg(unsigned Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo...
static cl::opt< bool > PrintFailedFusing("print-failed-fuse-candidates", cl::desc("Print instructions that the allocator wants to"" fuse, but the X86 backend currently can't"), cl::Hidden)
This pseudo-instruction loads the stack guard value.
bool isCopy() const
Definition: MachineInstr.h:778
Represent the analysis usage information of a pass.
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset]...
X86InstrInfo(X86Subtarget &STI)
static bool MaskRegClassContains(unsigned Reg)
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:352
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:644
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
bool registerDefIsDead(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Returns true if the register is dead in this machine instruction.
Definition: MachineInstr.h:881
unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const override
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl< MachineInstr * > &NewMIs) const override
unfoldMemoryOperand - Separate a single instruction which folded a load or a store or a load and a st...
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:129
static unsigned getTruncatedShiftCount(MachineInstr *MI, unsigned ShiftAmtOperandIdx)
Check whether the shift count for a machine operand is non-zero.
bool isInvariantLoad(AliasAnalysis *AA) const
Return true if this instruction is loading from a location whose value is invariant across the functi...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
static Constant * getAllOnesValue(Type *Ty)
Get the all ones value.
Definition: Constants.cpp:230
unsigned RemoveBranch(MachineBasicBlock &MBB) const override
bool classifyLEAReg(MachineInstr *MI, const MachineOperand &Src, unsigned LEAOpcode, bool AllowSP, unsigned &NewSrc, bool &isKill, bool &isUndef, MachineOperand &ImplicitOp) const
Given an operand within a MachineInstr, insert preceding code to put it into the right format for a p...
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
Register is known to be live.
bool isPICStyleGOT() const
Definition: X86Subtarget.h:434
static unsigned GetCondBranchFromCond(XCore::CondCode CC)
GetCondBranchFromCond - Return the Branch instruction opcode that matches the cc. ...
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:416
unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand=false)
Return a set opcode for the given condition and whether it has a memory operand.
unsigned getSubReg() const
EVT - Extended Value Type.
Definition: ValueTypes.h:31
VarInfo & getVarInfo(unsigned RegIdx)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
unsigned char NumDefs
Definition: MCInstrDesc.h:142
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set.
Definition: MCInstrDesc.h:162
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
The memory access is invariant.
static bool isHReg(unsigned Reg)
Test if the given register is a physical h register.
void setIsKill(bool Val=true)
bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const
isSafeToClobberEFLAGS - Return true if it's safe insert an instruction tha would clobber the EFLAGS c...
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
static unsigned getRegClass(bool IsVgpr, unsigned RegWidth)
bool isSafeToMove(AliasAnalysis *AA, bool &SawStore) const
Return true if it is safe to move this instruction.
bool shouldScheduleAdjacent(MachineInstr *First, MachineInstr *Second) const override
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...
Definition: LiveVariables.h:89
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
Definition: MachineInstr.h:836
virtual const TargetFrameLowering * getFrameLowering() const
static bool isRedundantFlagInstr(MachineInstr *FlagI, unsigned SrcReg, unsigned SrcReg2, int ImmValue, MachineInstr *OI)
Check whether the first instruction, whose only purpose is to update flags, can be made redundant...
void setOpcode(unsigned Op)
Definition: MCInst.h:158
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
void substituteRegister(unsigned FromReg, unsigned ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
If specified MI is commutable, return the two operand indices that would swap value.
bool canFoldMemoryOperand(const MachineInstr *, ArrayRef< unsigned >) const override
canFoldMemoryOperand - Returns true if the specified load / store is folding is possible.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
CloneMachineInstr - Create a new MachineInstr which is a copy of the 'Orig' instruction, identical in all ways except the instruction has no parent, prev, or next.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:179
static unsigned copyPhysRegOpcode_AVX512(unsigned &DestReg, unsigned &SrcReg)
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
static const MachineInstrBuilder & addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2)
addRegReg - This function is used to add a memory reference of the form: [Reg + Reg].
Information about stack frame layout on the target.
unsigned getGlobalBaseReg(MachineFunction *MF) const
getGlobalBaseReg - Return a virtual register initialized with the the global base register value...
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, DebugLoc DL) const override
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
MachineFrameInfo * getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
Represents one node in the SelectionDAG.
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
void getNoopForMachoTarget(MCInst &NopInst) const override
Return the noop instruction to use for a noop.
MachineInstr * commuteInstruction(MachineInstr *MI, bool NewMI) const override
commuteInstruction - We have a few instructions that must be hacked on to commute them...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const TargetRegisterClass * getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
Compute the static register class constraint for operand OpIdx.
MachineInstr * convertToThreeAddress(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const override
convertToThreeAddress - This method must be implemented by targets that set the M_CONVERTIBLE_TO_3_AD...
void setDebugLoc(DebugLoc dl)
Replace current source information with new such.
static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, const MachineInstr &UserMI, const MachineFunction &MF)
Check if LoadMI is a partial register load that we can't fold into MI because the latter uses content...
unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given condition, register size in bytes, and operand type...
NodeT * getBlock() const
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
bool isUnalignedMemAccessFast() const
Definition: X86Subtarget.h:360
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:238
bool hasCMov() const
Definition: X86Subtarget.h:321
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, unsigned, unsigned, int &, int &, int &) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
static MachinePointerInfo getGOT()
getGOT - Return a MachinePointerInfo record that refers to a GOT entry.
Representation of each machine instruction.
Definition: MachineInstr.h:51
uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:552
bundle_iterator< const MachineInstr, const_instr_iterator > const_iterator
static const uint16_t * lookup(unsigned opcode, unsigned domain)
static bool isPhysicalRegister(unsigned Reg)
isPhysicalRegister - Return true if the specified register number is in the physical register namespa...
static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
bool AnalyzeBranchPredicate(MachineBasicBlock &MBB, TargetInstrInfo::MachineBranchPredicate &MBP, bool AllowModify=false) const override
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:217
def_instr_iterator def_instr_begin(unsigned RegNo) const
bool isLiveIn(unsigned Reg) const
isLiveIn - Return true if the specified register is in the live in set.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
static const MachineInstrBuilder & addOffset(const MachineInstrBuilder &MIB, int Offset)
static unsigned getStoreRegOpcode(unsigned SrcReg, const TargetRegisterClass *RC, bool isStackAligned, const X86Subtarget &STI)
Register liveness not decidable from local neighborhood.
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned char TargetFlags=0) const
unsigned getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
Inform the ExeDepsFix pass how many idle instructions we would like before a partial register update...
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
void setReg(unsigned Reg)
Change the register this operand corresponds to.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:403
void setSubReg(unsigned subReg)
iterator find(const KeyT &Val)
Definition: DenseMap.h:124
bool needsStackRealignment(const MachineFunction &MF) const override
bool hasAVX512() const
Definition: X86Subtarget.h:331
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineSDNode * getMachineNode(unsigned Opcode, SDLoc dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
virtual bool canFoldMemoryOperand(const MachineInstr *MI, ArrayRef< unsigned > Ops) const
Returns true for the specified load / store if folding is possible.
void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl< MachineOperand > &Addr, const TargetRegisterClass *RC, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl< MachineInstr * > &NewMIs) const
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
unsigned getReg() const
getReg - Returns the register number.
std::pair< MachineInstr::mmo_iterator, MachineInstr::mmo_iterator > extractStoreMemRefs(MachineInstr::mmo_iterator Begin, MachineInstr::mmo_iterator End)
extractStoreMemRefs - Allocate an array and populate it with just the store information from the give...
bool killsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
Definition: MachineInstr.h:857
static X86::CondCode isUseDefConvertible(MachineInstr *MI)
Check whether the use can be converted to remove a comparison against zero.
static const uint16_t ReplaceableInstrsAVX2[][3]
int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode)
getMemoryOperandNo - The function returns the MCInst operand # for the first field of the memory oper...
Definition: X86BaseInfo.h:660
virtual const TargetInstrInfo * getInstrInfo() const
std::reverse_iterator< iterator > reverse_iterator
mop_iterator operands_begin()
Definition: MachineInstr.h:289
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static VectorType * get(Type *ElementType, unsigned NumElements)
VectorType::get - This static method is the primary way to construct an VectorType.
Definition: Type.cpp:713
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:185
static bool hasLiveCondCodeDef(MachineInstr *MI)
True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
BasicBlockListType::iterator iterator
#define DEBUG(X)
Definition: Debug.h:92
bool addRegisterKilled(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
bool callRegIndirect() const
Definition: X86Subtarget.h:368
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
void addOperand(const MCOperand &Op)
Definition: MCInst.h:168
vt_iterator vt_begin() const
vt_begin / vt_end - Loop over all of the value types that can be represented by values in this regist...
static bool isFrameStoreOpcode(int Opcode)
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
static const uint16_t * lookupAVX2(unsigned opcode, unsigned domain)
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static bool isReassocCandidate(const MachineInstr &Inst, bool &Commuted)
Return true if the input instruction is part of a chain of dependent ops that are suitable for reasso...
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
isLayoutSuccessor - Return true if the specified MBB will be emitted immediately after this block...
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
FunctionPass * createCleanupLocalDynamicTLSPass()
createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses to local-dynamic TLS variab...
static bool Expand2AddrUndef(MachineInstrBuilder &MIB, const MCInstrDesc &Desc)
Expand a single-def pseudo instruction to a two-addr instruction with two undef reads of the register...
bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex=nullptr) const override
getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new instruction after load / store ar...
static MachineOperand CreateFI(int Idx)
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
Definition: MachineInstr.h:874
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:410
bool hasAVX() const
Definition: X86Subtarget.h:329
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:203
static cl::opt< bool > NoFusing("disable-spill-fusing", cl::desc("Disable fusing of spill code into instructions"))
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool isReallyTriviallyReMaterializable(const MachineInstr *MI, AliasAnalysis *AA) const override
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:340
bool ReverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...