Bug Summary

File:llvm/lib/Target/X86/X86InstrInfo.cpp
Warning:line 1966, column 13
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name X86InstrInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220125101009+ceec4383681c/build-llvm -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/X86 -I /build/llvm-toolchain-snapshot-14~++20220125101009+ceec4383681c/llvm/lib/Target/X86 -I include -I /build/llvm-toolchain-snapshot-14~++20220125101009+ceec4383681c/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220125101009+ceec4383681c/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220125101009+ceec4383681c/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220125101009+ceec4383681c/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220125101009+ceec4383681c/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220125101009+ceec4383681c/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220125101009+ceec4383681c/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220125101009+ceec4383681c/= -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-01-25-232935-20746-1 -x c++ /build/llvm-toolchain-snapshot-14~++20220125101009+ceec4383681c/llvm/lib/Target/X86/X86InstrInfo.cpp
1//===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86InstrInfo.h"
14#include "X86.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrFoldTables.h"
17#include "X86MachineFunctionInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Sequence.h"
22#include "llvm/CodeGen/LiveIntervals.h"
23#include "llvm/CodeGen/LivePhysRegs.h"
24#include "llvm/CodeGen/LiveVariables.h"
25#include "llvm/CodeGen/MachineConstantPool.h"
26#include "llvm/CodeGen/MachineDominators.h"
27#include "llvm/CodeGen/MachineFrameInfo.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineModuleInfo.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/CodeGen/StackMaps.h"
32#include "llvm/IR/DebugInfoMetadata.h"
33#include "llvm/IR/DerivedTypes.h"
34#include "llvm/IR/Function.h"
35#include "llvm/MC/MCAsmInfo.h"
36#include "llvm/MC/MCExpr.h"
37#include "llvm/MC/MCInst.h"
38#include "llvm/Support/CommandLine.h"
39#include "llvm/Support/Debug.h"
40#include "llvm/Support/ErrorHandling.h"
41#include "llvm/Support/raw_ostream.h"
42#include "llvm/Target/TargetOptions.h"
43
44using namespace llvm;
45
46#define DEBUG_TYPE"x86-instr-info" "x86-instr-info"
47
48#define GET_INSTRINFO_CTOR_DTOR
49#include "X86GenInstrInfo.inc"
50
51static cl::opt<bool>
52 NoFusing("disable-spill-fusing",
53 cl::desc("Disable fusing of spill code into instructions"),
54 cl::Hidden);
55static cl::opt<bool>
56PrintFailedFusing("print-failed-fuse-candidates",
57 cl::desc("Print instructions that the allocator wants to"
58 " fuse, but the X86 backend currently can't"),
59 cl::Hidden);
60static cl::opt<bool>
61ReMatPICStubLoad("remat-pic-stub-load",
62 cl::desc("Re-materialize load from stub in PIC mode"),
63 cl::init(false), cl::Hidden);
64static cl::opt<unsigned>
65PartialRegUpdateClearance("partial-reg-update-clearance",
66 cl::desc("Clearance between two register writes "
67 "for inserting XOR to avoid partial "
68 "register update"),
69 cl::init(64), cl::Hidden);
70static cl::opt<unsigned>
71UndefRegClearance("undef-reg-clearance",
72 cl::desc("How many idle instructions we would like before "
73 "certain undef register reads"),
74 cl::init(128), cl::Hidden);
75
76
77// Pin the vtable to this file.
78void X86InstrInfo::anchor() {}
79
80X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
81 : X86GenInstrInfo((STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64
82 : X86::ADJCALLSTACKDOWN32),
83 (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64
84 : X86::ADJCALLSTACKUP32),
85 X86::CATCHRET,
86 (STI.is64Bit() ? X86::RET64 : X86::RET32)),
87 Subtarget(STI), RI(STI.getTargetTriple()) {
88}
89
90bool
91X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
92 Register &SrcReg, Register &DstReg,
93 unsigned &SubIdx) const {
94 switch (MI.getOpcode()) {
95 default: break;
96 case X86::MOVSX16rr8:
97 case X86::MOVZX16rr8:
98 case X86::MOVSX32rr8:
99 case X86::MOVZX32rr8:
100 case X86::MOVSX64rr8:
101 if (!Subtarget.is64Bit())
102 // It's not always legal to reference the low 8-bit of the larger
103 // register in 32-bit mode.
104 return false;
105 LLVM_FALLTHROUGH[[gnu::fallthrough]];
106 case X86::MOVSX32rr16:
107 case X86::MOVZX32rr16:
108 case X86::MOVSX64rr16:
109 case X86::MOVSX64rr32: {
110 if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
111 // Be conservative.
112 return false;
113 SrcReg = MI.getOperand(1).getReg();
114 DstReg = MI.getOperand(0).getReg();
115 switch (MI.getOpcode()) {
116 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 116)
;
117 case X86::MOVSX16rr8:
118 case X86::MOVZX16rr8:
119 case X86::MOVSX32rr8:
120 case X86::MOVZX32rr8:
121 case X86::MOVSX64rr8:
122 SubIdx = X86::sub_8bit;
123 break;
124 case X86::MOVSX32rr16:
125 case X86::MOVZX32rr16:
126 case X86::MOVSX64rr16:
127 SubIdx = X86::sub_16bit;
128 break;
129 case X86::MOVSX64rr32:
130 SubIdx = X86::sub_32bit;
131 break;
132 }
133 return true;
134 }
135 }
136 return false;
137}
138
139bool X86InstrInfo::isDataInvariant(MachineInstr &MI) {
140 switch (MI.getOpcode()) {
141 default:
142 // By default, assume that the instruction is not data invariant.
143 return false;
144
145 // Some target-independent operations that trivially lower to data-invariant
146 // instructions.
147 case TargetOpcode::COPY:
148 case TargetOpcode::INSERT_SUBREG:
149 case TargetOpcode::SUBREG_TO_REG:
150 return true;
151
152 // On x86 it is believed that imul is constant time w.r.t. the loaded data.
153 // However, they set flags and are perhaps the most surprisingly constant
154 // time operations so we call them out here separately.
155 case X86::IMUL16rr:
156 case X86::IMUL16rri8:
157 case X86::IMUL16rri:
158 case X86::IMUL32rr:
159 case X86::IMUL32rri8:
160 case X86::IMUL32rri:
161 case X86::IMUL64rr:
162 case X86::IMUL64rri32:
163 case X86::IMUL64rri8:
164
165 // Bit scanning and counting instructions that are somewhat surprisingly
166 // constant time as they scan across bits and do other fairly complex
167 // operations like popcnt, but are believed to be constant time on x86.
168 // However, these set flags.
169 case X86::BSF16rr:
170 case X86::BSF32rr:
171 case X86::BSF64rr:
172 case X86::BSR16rr:
173 case X86::BSR32rr:
174 case X86::BSR64rr:
175 case X86::LZCNT16rr:
176 case X86::LZCNT32rr:
177 case X86::LZCNT64rr:
178 case X86::POPCNT16rr:
179 case X86::POPCNT32rr:
180 case X86::POPCNT64rr:
181 case X86::TZCNT16rr:
182 case X86::TZCNT32rr:
183 case X86::TZCNT64rr:
184
185 // Bit manipulation instructions are effectively combinations of basic
186 // arithmetic ops, and should still execute in constant time. These also
187 // set flags.
188 case X86::BLCFILL32rr:
189 case X86::BLCFILL64rr:
190 case X86::BLCI32rr:
191 case X86::BLCI64rr:
192 case X86::BLCIC32rr:
193 case X86::BLCIC64rr:
194 case X86::BLCMSK32rr:
195 case X86::BLCMSK64rr:
196 case X86::BLCS32rr:
197 case X86::BLCS64rr:
198 case X86::BLSFILL32rr:
199 case X86::BLSFILL64rr:
200 case X86::BLSI32rr:
201 case X86::BLSI64rr:
202 case X86::BLSIC32rr:
203 case X86::BLSIC64rr:
204 case X86::BLSMSK32rr:
205 case X86::BLSMSK64rr:
206 case X86::BLSR32rr:
207 case X86::BLSR64rr:
208 case X86::TZMSK32rr:
209 case X86::TZMSK64rr:
210
211 // Bit extracting and clearing instructions should execute in constant time,
212 // and set flags.
213 case X86::BEXTR32rr:
214 case X86::BEXTR64rr:
215 case X86::BEXTRI32ri:
216 case X86::BEXTRI64ri:
217 case X86::BZHI32rr:
218 case X86::BZHI64rr:
219
220 // Shift and rotate.
221 case X86::ROL8r1:
222 case X86::ROL16r1:
223 case X86::ROL32r1:
224 case X86::ROL64r1:
225 case X86::ROL8rCL:
226 case X86::ROL16rCL:
227 case X86::ROL32rCL:
228 case X86::ROL64rCL:
229 case X86::ROL8ri:
230 case X86::ROL16ri:
231 case X86::ROL32ri:
232 case X86::ROL64ri:
233 case X86::ROR8r1:
234 case X86::ROR16r1:
235 case X86::ROR32r1:
236 case X86::ROR64r1:
237 case X86::ROR8rCL:
238 case X86::ROR16rCL:
239 case X86::ROR32rCL:
240 case X86::ROR64rCL:
241 case X86::ROR8ri:
242 case X86::ROR16ri:
243 case X86::ROR32ri:
244 case X86::ROR64ri:
245 case X86::SAR8r1:
246 case X86::SAR16r1:
247 case X86::SAR32r1:
248 case X86::SAR64r1:
249 case X86::SAR8rCL:
250 case X86::SAR16rCL:
251 case X86::SAR32rCL:
252 case X86::SAR64rCL:
253 case X86::SAR8ri:
254 case X86::SAR16ri:
255 case X86::SAR32ri:
256 case X86::SAR64ri:
257 case X86::SHL8r1:
258 case X86::SHL16r1:
259 case X86::SHL32r1:
260 case X86::SHL64r1:
261 case X86::SHL8rCL:
262 case X86::SHL16rCL:
263 case X86::SHL32rCL:
264 case X86::SHL64rCL:
265 case X86::SHL8ri:
266 case X86::SHL16ri:
267 case X86::SHL32ri:
268 case X86::SHL64ri:
269 case X86::SHR8r1:
270 case X86::SHR16r1:
271 case X86::SHR32r1:
272 case X86::SHR64r1:
273 case X86::SHR8rCL:
274 case X86::SHR16rCL:
275 case X86::SHR32rCL:
276 case X86::SHR64rCL:
277 case X86::SHR8ri:
278 case X86::SHR16ri:
279 case X86::SHR32ri:
280 case X86::SHR64ri:
281 case X86::SHLD16rrCL:
282 case X86::SHLD32rrCL:
283 case X86::SHLD64rrCL:
284 case X86::SHLD16rri8:
285 case X86::SHLD32rri8:
286 case X86::SHLD64rri8:
287 case X86::SHRD16rrCL:
288 case X86::SHRD32rrCL:
289 case X86::SHRD64rrCL:
290 case X86::SHRD16rri8:
291 case X86::SHRD32rri8:
292 case X86::SHRD64rri8:
293
294 // Basic arithmetic is constant time on the input but does set flags.
295 case X86::ADC8rr:
296 case X86::ADC8ri:
297 case X86::ADC16rr:
298 case X86::ADC16ri:
299 case X86::ADC16ri8:
300 case X86::ADC32rr:
301 case X86::ADC32ri:
302 case X86::ADC32ri8:
303 case X86::ADC64rr:
304 case X86::ADC64ri8:
305 case X86::ADC64ri32:
306 case X86::ADD8rr:
307 case X86::ADD8ri:
308 case X86::ADD16rr:
309 case X86::ADD16ri:
310 case X86::ADD16ri8:
311 case X86::ADD32rr:
312 case X86::ADD32ri:
313 case X86::ADD32ri8:
314 case X86::ADD64rr:
315 case X86::ADD64ri8:
316 case X86::ADD64ri32:
317 case X86::AND8rr:
318 case X86::AND8ri:
319 case X86::AND16rr:
320 case X86::AND16ri:
321 case X86::AND16ri8:
322 case X86::AND32rr:
323 case X86::AND32ri:
324 case X86::AND32ri8:
325 case X86::AND64rr:
326 case X86::AND64ri8:
327 case X86::AND64ri32:
328 case X86::OR8rr:
329 case X86::OR8ri:
330 case X86::OR16rr:
331 case X86::OR16ri:
332 case X86::OR16ri8:
333 case X86::OR32rr:
334 case X86::OR32ri:
335 case X86::OR32ri8:
336 case X86::OR64rr:
337 case X86::OR64ri8:
338 case X86::OR64ri32:
339 case X86::SBB8rr:
340 case X86::SBB8ri:
341 case X86::SBB16rr:
342 case X86::SBB16ri:
343 case X86::SBB16ri8:
344 case X86::SBB32rr:
345 case X86::SBB32ri:
346 case X86::SBB32ri8:
347 case X86::SBB64rr:
348 case X86::SBB64ri8:
349 case X86::SBB64ri32:
350 case X86::SUB8rr:
351 case X86::SUB8ri:
352 case X86::SUB16rr:
353 case X86::SUB16ri:
354 case X86::SUB16ri8:
355 case X86::SUB32rr:
356 case X86::SUB32ri:
357 case X86::SUB32ri8:
358 case X86::SUB64rr:
359 case X86::SUB64ri8:
360 case X86::SUB64ri32:
361 case X86::XOR8rr:
362 case X86::XOR8ri:
363 case X86::XOR16rr:
364 case X86::XOR16ri:
365 case X86::XOR16ri8:
366 case X86::XOR32rr:
367 case X86::XOR32ri:
368 case X86::XOR32ri8:
369 case X86::XOR64rr:
370 case X86::XOR64ri8:
371 case X86::XOR64ri32:
372 // Arithmetic with just 32-bit and 64-bit variants and no immediates.
373 case X86::ADCX32rr:
374 case X86::ADCX64rr:
375 case X86::ADOX32rr:
376 case X86::ADOX64rr:
377 case X86::ANDN32rr:
378 case X86::ANDN64rr:
379 // Unary arithmetic operations.
380 case X86::DEC8r:
381 case X86::DEC16r:
382 case X86::DEC32r:
383 case X86::DEC64r:
384 case X86::INC8r:
385 case X86::INC16r:
386 case X86::INC32r:
387 case X86::INC64r:
388 case X86::NEG8r:
389 case X86::NEG16r:
390 case X86::NEG32r:
391 case X86::NEG64r:
392
393 // Unlike other arithmetic, NOT doesn't set EFLAGS.
394 case X86::NOT8r:
395 case X86::NOT16r:
396 case X86::NOT32r:
397 case X86::NOT64r:
398
399 // Various move instructions used to zero or sign extend things. Note that we
400 // intentionally don't support the _NOREX variants as we can't handle that
401 // register constraint anyways.
402 case X86::MOVSX16rr8:
403 case X86::MOVSX32rr8:
404 case X86::MOVSX32rr16:
405 case X86::MOVSX64rr8:
406 case X86::MOVSX64rr16:
407 case X86::MOVSX64rr32:
408 case X86::MOVZX16rr8:
409 case X86::MOVZX32rr8:
410 case X86::MOVZX32rr16:
411 case X86::MOVZX64rr8:
412 case X86::MOVZX64rr16:
413 case X86::MOV32rr:
414
415 // Arithmetic instructions that are both constant time and don't set flags.
416 case X86::RORX32ri:
417 case X86::RORX64ri:
418 case X86::SARX32rr:
419 case X86::SARX64rr:
420 case X86::SHLX32rr:
421 case X86::SHLX64rr:
422 case X86::SHRX32rr:
423 case X86::SHRX64rr:
424
425 // LEA doesn't actually access memory, and its arithmetic is constant time.
426 case X86::LEA16r:
427 case X86::LEA32r:
428 case X86::LEA64_32r:
429 case X86::LEA64r:
430 return true;
431 }
432}
433
434bool X86InstrInfo::isDataInvariantLoad(MachineInstr &MI) {
435 switch (MI.getOpcode()) {
436 default:
437 // By default, assume that the load will immediately leak.
438 return false;
439
440 // On x86 it is believed that imul is constant time w.r.t. the loaded data.
441 // However, they set flags and are perhaps the most surprisingly constant
442 // time operations so we call them out here separately.
443 case X86::IMUL16rm:
444 case X86::IMUL16rmi8:
445 case X86::IMUL16rmi:
446 case X86::IMUL32rm:
447 case X86::IMUL32rmi8:
448 case X86::IMUL32rmi:
449 case X86::IMUL64rm:
450 case X86::IMUL64rmi32:
451 case X86::IMUL64rmi8:
452
453 // Bit scanning and counting instructions that are somewhat surprisingly
454 // constant time as they scan across bits and do other fairly complex
455 // operations like popcnt, but are believed to be constant time on x86.
456 // However, these set flags.
457 case X86::BSF16rm:
458 case X86::BSF32rm:
459 case X86::BSF64rm:
460 case X86::BSR16rm:
461 case X86::BSR32rm:
462 case X86::BSR64rm:
463 case X86::LZCNT16rm:
464 case X86::LZCNT32rm:
465 case X86::LZCNT64rm:
466 case X86::POPCNT16rm:
467 case X86::POPCNT32rm:
468 case X86::POPCNT64rm:
469 case X86::TZCNT16rm:
470 case X86::TZCNT32rm:
471 case X86::TZCNT64rm:
472
473 // Bit manipulation instructions are effectively combinations of basic
474 // arithmetic ops, and should still execute in constant time. These also
475 // set flags.
476 case X86::BLCFILL32rm:
477 case X86::BLCFILL64rm:
478 case X86::BLCI32rm:
479 case X86::BLCI64rm:
480 case X86::BLCIC32rm:
481 case X86::BLCIC64rm:
482 case X86::BLCMSK32rm:
483 case X86::BLCMSK64rm:
484 case X86::BLCS32rm:
485 case X86::BLCS64rm:
486 case X86::BLSFILL32rm:
487 case X86::BLSFILL64rm:
488 case X86::BLSI32rm:
489 case X86::BLSI64rm:
490 case X86::BLSIC32rm:
491 case X86::BLSIC64rm:
492 case X86::BLSMSK32rm:
493 case X86::BLSMSK64rm:
494 case X86::BLSR32rm:
495 case X86::BLSR64rm:
496 case X86::TZMSK32rm:
497 case X86::TZMSK64rm:
498
499 // Bit extracting and clearing instructions should execute in constant time,
500 // and set flags.
501 case X86::BEXTR32rm:
502 case X86::BEXTR64rm:
503 case X86::BEXTRI32mi:
504 case X86::BEXTRI64mi:
505 case X86::BZHI32rm:
506 case X86::BZHI64rm:
507
508 // Basic arithmetic is constant time on the input but does set flags.
509 case X86::ADC8rm:
510 case X86::ADC16rm:
511 case X86::ADC32rm:
512 case X86::ADC64rm:
513 case X86::ADCX32rm:
514 case X86::ADCX64rm:
515 case X86::ADD8rm:
516 case X86::ADD16rm:
517 case X86::ADD32rm:
518 case X86::ADD64rm:
519 case X86::ADOX32rm:
520 case X86::ADOX64rm:
521 case X86::AND8rm:
522 case X86::AND16rm:
523 case X86::AND32rm:
524 case X86::AND64rm:
525 case X86::ANDN32rm:
526 case X86::ANDN64rm:
527 case X86::OR8rm:
528 case X86::OR16rm:
529 case X86::OR32rm:
530 case X86::OR64rm:
531 case X86::SBB8rm:
532 case X86::SBB16rm:
533 case X86::SBB32rm:
534 case X86::SBB64rm:
535 case X86::SUB8rm:
536 case X86::SUB16rm:
537 case X86::SUB32rm:
538 case X86::SUB64rm:
539 case X86::XOR8rm:
540 case X86::XOR16rm:
541 case X86::XOR32rm:
542 case X86::XOR64rm:
543
544 // Integer multiply w/o affecting flags is still believed to be constant
545 // time on x86. Called out separately as this is among the most surprising
546 // instructions to exhibit that behavior.
547 case X86::MULX32rm:
548 case X86::MULX64rm:
549
550 // Arithmetic instructions that are both constant time and don't set flags.
551 case X86::RORX32mi:
552 case X86::RORX64mi:
553 case X86::SARX32rm:
554 case X86::SARX64rm:
555 case X86::SHLX32rm:
556 case X86::SHLX64rm:
557 case X86::SHRX32rm:
558 case X86::SHRX64rm:
559
560 // Conversions are believed to be constant time and don't set flags.
561 case X86::CVTTSD2SI64rm:
562 case X86::VCVTTSD2SI64rm:
563 case X86::VCVTTSD2SI64Zrm:
564 case X86::CVTTSD2SIrm:
565 case X86::VCVTTSD2SIrm:
566 case X86::VCVTTSD2SIZrm:
567 case X86::CVTTSS2SI64rm:
568 case X86::VCVTTSS2SI64rm:
569 case X86::VCVTTSS2SI64Zrm:
570 case X86::CVTTSS2SIrm:
571 case X86::VCVTTSS2SIrm:
572 case X86::VCVTTSS2SIZrm:
573 case X86::CVTSI2SDrm:
574 case X86::VCVTSI2SDrm:
575 case X86::VCVTSI2SDZrm:
576 case X86::CVTSI2SSrm:
577 case X86::VCVTSI2SSrm:
578 case X86::VCVTSI2SSZrm:
579 case X86::CVTSI642SDrm:
580 case X86::VCVTSI642SDrm:
581 case X86::VCVTSI642SDZrm:
582 case X86::CVTSI642SSrm:
583 case X86::VCVTSI642SSrm:
584 case X86::VCVTSI642SSZrm:
585 case X86::CVTSS2SDrm:
586 case X86::VCVTSS2SDrm:
587 case X86::VCVTSS2SDZrm:
588 case X86::CVTSD2SSrm:
589 case X86::VCVTSD2SSrm:
590 case X86::VCVTSD2SSZrm:
591 // AVX512 added unsigned integer conversions.
592 case X86::VCVTTSD2USI64Zrm:
593 case X86::VCVTTSD2USIZrm:
594 case X86::VCVTTSS2USI64Zrm:
595 case X86::VCVTTSS2USIZrm:
596 case X86::VCVTUSI2SDZrm:
597 case X86::VCVTUSI642SDZrm:
598 case X86::VCVTUSI2SSZrm:
599 case X86::VCVTUSI642SSZrm:
600
601 // Loads to register don't set flags.
602 case X86::MOV8rm:
603 case X86::MOV8rm_NOREX:
604 case X86::MOV16rm:
605 case X86::MOV32rm:
606 case X86::MOV64rm:
607 case X86::MOVSX16rm8:
608 case X86::MOVSX32rm16:
609 case X86::MOVSX32rm8:
610 case X86::MOVSX32rm8_NOREX:
611 case X86::MOVSX64rm16:
612 case X86::MOVSX64rm32:
613 case X86::MOVSX64rm8:
614 case X86::MOVZX16rm8:
615 case X86::MOVZX32rm16:
616 case X86::MOVZX32rm8:
617 case X86::MOVZX32rm8_NOREX:
618 case X86::MOVZX64rm16:
619 case X86::MOVZX64rm8:
620 return true;
621 }
622}
623
624int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
625 const MachineFunction *MF = MI.getParent()->getParent();
626 const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
627
628 if (isFrameInstr(MI)) {
629 int SPAdj = alignTo(getFrameSize(MI), TFI->getStackAlign());
630 SPAdj -= getFrameAdjustment(MI);
631 if (!isFrameSetup(MI))
632 SPAdj = -SPAdj;
633 return SPAdj;
634 }
635
636 // To know whether a call adjusts the stack, we need information
637 // that is bound to the following ADJCALLSTACKUP pseudo.
638 // Look for the next ADJCALLSTACKUP that follows the call.
639 if (MI.isCall()) {
640 const MachineBasicBlock *MBB = MI.getParent();
641 auto I = ++MachineBasicBlock::const_iterator(MI);
642 for (auto E = MBB->end(); I != E; ++I) {
643 if (I->getOpcode() == getCallFrameDestroyOpcode() ||
644 I->isCall())
645 break;
646 }
647
648 // If we could not find a frame destroy opcode, then it has already
649 // been simplified, so we don't care.
650 if (I->getOpcode() != getCallFrameDestroyOpcode())
651 return 0;
652
653 return -(I->getOperand(1).getImm());
654 }
655
656 // Currently handle only PUSHes we can reasonably expect to see
657 // in call sequences
658 switch (MI.getOpcode()) {
659 default:
660 return 0;
661 case X86::PUSH32i8:
662 case X86::PUSH32r:
663 case X86::PUSH32rmm:
664 case X86::PUSH32rmr:
665 case X86::PUSHi32:
666 return 4;
667 case X86::PUSH64i8:
668 case X86::PUSH64r:
669 case X86::PUSH64rmm:
670 case X86::PUSH64rmr:
671 case X86::PUSH64i32:
672 return 8;
673 }
674}
675
676/// Return true and the FrameIndex if the specified
677/// operand and follow operands form a reference to the stack frame.
678bool X86InstrInfo::isFrameOperand(const MachineInstr &MI, unsigned int Op,
679 int &FrameIndex) const {
680 if (MI.getOperand(Op + X86::AddrBaseReg).isFI() &&
681 MI.getOperand(Op + X86::AddrScaleAmt).isImm() &&
682 MI.getOperand(Op + X86::AddrIndexReg).isReg() &&
683 MI.getOperand(Op + X86::AddrDisp).isImm() &&
684 MI.getOperand(Op + X86::AddrScaleAmt).getImm() == 1 &&
685 MI.getOperand(Op + X86::AddrIndexReg).getReg() == 0 &&
686 MI.getOperand(Op + X86::AddrDisp).getImm() == 0) {
687 FrameIndex = MI.getOperand(Op + X86::AddrBaseReg).getIndex();
688 return true;
689 }
690 return false;
691}
692
693static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
694 switch (Opcode) {
695 default:
696 return false;
697 case X86::MOV8rm:
698 case X86::KMOVBkm:
699 MemBytes = 1;
700 return true;
701 case X86::MOV16rm:
702 case X86::KMOVWkm:
703 case X86::VMOVSHZrm:
704 case X86::VMOVSHZrm_alt:
705 MemBytes = 2;
706 return true;
707 case X86::MOV32rm:
708 case X86::MOVSSrm:
709 case X86::MOVSSrm_alt:
710 case X86::VMOVSSrm:
711 case X86::VMOVSSrm_alt:
712 case X86::VMOVSSZrm:
713 case X86::VMOVSSZrm_alt:
714 case X86::KMOVDkm:
715 MemBytes = 4;
716 return true;
717 case X86::MOV64rm:
718 case X86::LD_Fp64m:
719 case X86::MOVSDrm:
720 case X86::MOVSDrm_alt:
721 case X86::VMOVSDrm:
722 case X86::VMOVSDrm_alt:
723 case X86::VMOVSDZrm:
724 case X86::VMOVSDZrm_alt:
725 case X86::MMX_MOVD64rm:
726 case X86::MMX_MOVQ64rm:
727 case X86::KMOVQkm:
728 MemBytes = 8;
729 return true;
730 case X86::MOVAPSrm:
731 case X86::MOVUPSrm:
732 case X86::MOVAPDrm:
733 case X86::MOVUPDrm:
734 case X86::MOVDQArm:
735 case X86::MOVDQUrm:
736 case X86::VMOVAPSrm:
737 case X86::VMOVUPSrm:
738 case X86::VMOVAPDrm:
739 case X86::VMOVUPDrm:
740 case X86::VMOVDQArm:
741 case X86::VMOVDQUrm:
742 case X86::VMOVAPSZ128rm:
743 case X86::VMOVUPSZ128rm:
744 case X86::VMOVAPSZ128rm_NOVLX:
745 case X86::VMOVUPSZ128rm_NOVLX:
746 case X86::VMOVAPDZ128rm:
747 case X86::VMOVUPDZ128rm:
748 case X86::VMOVDQU8Z128rm:
749 case X86::VMOVDQU16Z128rm:
750 case X86::VMOVDQA32Z128rm:
751 case X86::VMOVDQU32Z128rm:
752 case X86::VMOVDQA64Z128rm:
753 case X86::VMOVDQU64Z128rm:
754 MemBytes = 16;
755 return true;
756 case X86::VMOVAPSYrm:
757 case X86::VMOVUPSYrm:
758 case X86::VMOVAPDYrm:
759 case X86::VMOVUPDYrm:
760 case X86::VMOVDQAYrm:
761 case X86::VMOVDQUYrm:
762 case X86::VMOVAPSZ256rm:
763 case X86::VMOVUPSZ256rm:
764 case X86::VMOVAPSZ256rm_NOVLX:
765 case X86::VMOVUPSZ256rm_NOVLX:
766 case X86::VMOVAPDZ256rm:
767 case X86::VMOVUPDZ256rm:
768 case X86::VMOVDQU8Z256rm:
769 case X86::VMOVDQU16Z256rm:
770 case X86::VMOVDQA32Z256rm:
771 case X86::VMOVDQU32Z256rm:
772 case X86::VMOVDQA64Z256rm:
773 case X86::VMOVDQU64Z256rm:
774 MemBytes = 32;
775 return true;
776 case X86::VMOVAPSZrm:
777 case X86::VMOVUPSZrm:
778 case X86::VMOVAPDZrm:
779 case X86::VMOVUPDZrm:
780 case X86::VMOVDQU8Zrm:
781 case X86::VMOVDQU16Zrm:
782 case X86::VMOVDQA32Zrm:
783 case X86::VMOVDQU32Zrm:
784 case X86::VMOVDQA64Zrm:
785 case X86::VMOVDQU64Zrm:
786 MemBytes = 64;
787 return true;
788 }
789}
790
791static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
792 switch (Opcode) {
793 default:
794 return false;
795 case X86::MOV8mr:
796 case X86::KMOVBmk:
797 MemBytes = 1;
798 return true;
799 case X86::MOV16mr:
800 case X86::KMOVWmk:
801 case X86::VMOVSHZmr:
802 MemBytes = 2;
803 return true;
804 case X86::MOV32mr:
805 case X86::MOVSSmr:
806 case X86::VMOVSSmr:
807 case X86::VMOVSSZmr:
808 case X86::KMOVDmk:
809 MemBytes = 4;
810 return true;
811 case X86::MOV64mr:
812 case X86::ST_FpP64m:
813 case X86::MOVSDmr:
814 case X86::VMOVSDmr:
815 case X86::VMOVSDZmr:
816 case X86::MMX_MOVD64mr:
817 case X86::MMX_MOVQ64mr:
818 case X86::MMX_MOVNTQmr:
819 case X86::KMOVQmk:
820 MemBytes = 8;
821 return true;
822 case X86::MOVAPSmr:
823 case X86::MOVUPSmr:
824 case X86::MOVAPDmr:
825 case X86::MOVUPDmr:
826 case X86::MOVDQAmr:
827 case X86::MOVDQUmr:
828 case X86::VMOVAPSmr:
829 case X86::VMOVUPSmr:
830 case X86::VMOVAPDmr:
831 case X86::VMOVUPDmr:
832 case X86::VMOVDQAmr:
833 case X86::VMOVDQUmr:
834 case X86::VMOVUPSZ128mr:
835 case X86::VMOVAPSZ128mr:
836 case X86::VMOVUPSZ128mr_NOVLX:
837 case X86::VMOVAPSZ128mr_NOVLX:
838 case X86::VMOVUPDZ128mr:
839 case X86::VMOVAPDZ128mr:
840 case X86::VMOVDQA32Z128mr:
841 case X86::VMOVDQU32Z128mr:
842 case X86::VMOVDQA64Z128mr:
843 case X86::VMOVDQU64Z128mr:
844 case X86::VMOVDQU8Z128mr:
845 case X86::VMOVDQU16Z128mr:
846 MemBytes = 16;
847 return true;
848 case X86::VMOVUPSYmr:
849 case X86::VMOVAPSYmr:
850 case X86::VMOVUPDYmr:
851 case X86::VMOVAPDYmr:
852 case X86::VMOVDQUYmr:
853 case X86::VMOVDQAYmr:
854 case X86::VMOVUPSZ256mr:
855 case X86::VMOVAPSZ256mr:
856 case X86::VMOVUPSZ256mr_NOVLX:
857 case X86::VMOVAPSZ256mr_NOVLX:
858 case X86::VMOVUPDZ256mr:
859 case X86::VMOVAPDZ256mr:
860 case X86::VMOVDQU8Z256mr:
861 case X86::VMOVDQU16Z256mr:
862 case X86::VMOVDQA32Z256mr:
863 case X86::VMOVDQU32Z256mr:
864 case X86::VMOVDQA64Z256mr:
865 case X86::VMOVDQU64Z256mr:
866 MemBytes = 32;
867 return true;
868 case X86::VMOVUPSZmr:
869 case X86::VMOVAPSZmr:
870 case X86::VMOVUPDZmr:
871 case X86::VMOVAPDZmr:
872 case X86::VMOVDQU8Zmr:
873 case X86::VMOVDQU16Zmr:
874 case X86::VMOVDQA32Zmr:
875 case X86::VMOVDQU32Zmr:
876 case X86::VMOVDQA64Zmr:
877 case X86::VMOVDQU64Zmr:
878 MemBytes = 64;
879 return true;
880 }
881 return false;
882}
883
884unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
885 int &FrameIndex) const {
886 unsigned Dummy;
887 return X86InstrInfo::isLoadFromStackSlot(MI, FrameIndex, Dummy);
888}
889
890unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
891 int &FrameIndex,
892 unsigned &MemBytes) const {
893 if (isFrameLoadOpcode(MI.getOpcode(), MemBytes))
894 if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
895 return MI.getOperand(0).getReg();
896 return 0;
897}
898
899unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
900 int &FrameIndex) const {
901 unsigned Dummy;
902 if (isFrameLoadOpcode(MI.getOpcode(), Dummy)) {
903 unsigned Reg;
904 if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
905 return Reg;
906 // Check for post-frame index elimination operations
907 SmallVector<const MachineMemOperand *, 1> Accesses;
908 if (hasLoadFromStackSlot(MI, Accesses)) {
909 FrameIndex =
910 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
911 ->getFrameIndex();
912 return MI.getOperand(0).getReg();
913 }
914 }
915 return 0;
916}
917
918unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
919 int &FrameIndex) const {
920 unsigned Dummy;
921 return X86InstrInfo::isStoreToStackSlot(MI, FrameIndex, Dummy);
922}
923
924unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
925 int &FrameIndex,
926 unsigned &MemBytes) const {
927 if (isFrameStoreOpcode(MI.getOpcode(), MemBytes))
928 if (MI.getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
929 isFrameOperand(MI, 0, FrameIndex))
930 return MI.getOperand(X86::AddrNumOperands).getReg();
931 return 0;
932}
933
934unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
935 int &FrameIndex) const {
936 unsigned Dummy;
937 if (isFrameStoreOpcode(MI.getOpcode(), Dummy)) {
938 unsigned Reg;
939 if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
940 return Reg;
941 // Check for post-frame index elimination operations
942 SmallVector<const MachineMemOperand *, 1> Accesses;
943 if (hasStoreToStackSlot(MI, Accesses)) {
944 FrameIndex =
945 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
946 ->getFrameIndex();
947 return MI.getOperand(X86::AddrNumOperands).getReg();
948 }
949 }
950 return 0;
951}
952
953/// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
954static bool regIsPICBase(Register BaseReg, const MachineRegisterInfo &MRI) {
955 // Don't waste compile time scanning use-def chains of physregs.
956 if (!BaseReg.isVirtual())
957 return false;
958 bool isPICBase = false;
959 for (MachineRegisterInfo::def_instr_iterator I = MRI.def_instr_begin(BaseReg),
960 E = MRI.def_instr_end(); I != E; ++I) {
961 MachineInstr *DefMI = &*I;
962 if (DefMI->getOpcode() != X86::MOVPC32r)
963 return false;
964 assert(!isPICBase && "More than one PIC base?")(static_cast <bool> (!isPICBase && "More than one PIC base?"
) ? void (0) : __assert_fail ("!isPICBase && \"More than one PIC base?\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 964, __extension__ __PRETTY_FUNCTION__
))
;
965 isPICBase = true;
966 }
967 return isPICBase;
968}
969
970bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
971 AAResults *AA) const {
972 switch (MI.getOpcode()) {
973 default:
974 // This function should only be called for opcodes with the ReMaterializable
975 // flag set.
976 llvm_unreachable("Unknown rematerializable operation!")::llvm::llvm_unreachable_internal("Unknown rematerializable operation!"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 976)
;
977 break;
978
979 case X86::LOAD_STACK_GUARD:
980 case X86::AVX1_SETALLONES:
981 case X86::AVX2_SETALLONES:
982 case X86::AVX512_128_SET0:
983 case X86::AVX512_256_SET0:
984 case X86::AVX512_512_SET0:
985 case X86::AVX512_512_SETALLONES:
986 case X86::AVX512_FsFLD0SD:
987 case X86::AVX512_FsFLD0SH:
988 case X86::AVX512_FsFLD0SS:
989 case X86::AVX512_FsFLD0F128:
990 case X86::AVX_SET0:
991 case X86::FsFLD0SD:
992 case X86::FsFLD0SS:
993 case X86::FsFLD0F128:
994 case X86::KSET0D:
995 case X86::KSET0Q:
996 case X86::KSET0W:
997 case X86::KSET1D:
998 case X86::KSET1Q:
999 case X86::KSET1W:
1000 case X86::MMX_SET0:
1001 case X86::MOV32ImmSExti8:
1002 case X86::MOV32r0:
1003 case X86::MOV32r1:
1004 case X86::MOV32r_1:
1005 case X86::MOV32ri64:
1006 case X86::MOV64ImmSExti8:
1007 case X86::V_SET0:
1008 case X86::V_SETALLONES:
1009 case X86::MOV16ri:
1010 case X86::MOV32ri:
1011 case X86::MOV64ri:
1012 case X86::MOV64ri32:
1013 case X86::MOV8ri:
1014 case X86::PTILEZEROV:
1015 return true;
1016
1017 case X86::MOV8rm:
1018 case X86::MOV8rm_NOREX:
1019 case X86::MOV16rm:
1020 case X86::MOV32rm:
1021 case X86::MOV64rm:
1022 case X86::MOVSSrm:
1023 case X86::MOVSSrm_alt:
1024 case X86::MOVSDrm:
1025 case X86::MOVSDrm_alt:
1026 case X86::MOVAPSrm:
1027 case X86::MOVUPSrm:
1028 case X86::MOVAPDrm:
1029 case X86::MOVUPDrm:
1030 case X86::MOVDQArm:
1031 case X86::MOVDQUrm:
1032 case X86::VMOVSSrm:
1033 case X86::VMOVSSrm_alt:
1034 case X86::VMOVSDrm:
1035 case X86::VMOVSDrm_alt:
1036 case X86::VMOVAPSrm:
1037 case X86::VMOVUPSrm:
1038 case X86::VMOVAPDrm:
1039 case X86::VMOVUPDrm:
1040 case X86::VMOVDQArm:
1041 case X86::VMOVDQUrm:
1042 case X86::VMOVAPSYrm:
1043 case X86::VMOVUPSYrm:
1044 case X86::VMOVAPDYrm:
1045 case X86::VMOVUPDYrm:
1046 case X86::VMOVDQAYrm:
1047 case X86::VMOVDQUYrm:
1048 case X86::MMX_MOVD64rm:
1049 case X86::MMX_MOVQ64rm:
1050 // AVX-512
1051 case X86::VMOVSSZrm:
1052 case X86::VMOVSSZrm_alt:
1053 case X86::VMOVSDZrm:
1054 case X86::VMOVSDZrm_alt:
1055 case X86::VMOVSHZrm:
1056 case X86::VMOVSHZrm_alt:
1057 case X86::VMOVAPDZ128rm:
1058 case X86::VMOVAPDZ256rm:
1059 case X86::VMOVAPDZrm:
1060 case X86::VMOVAPSZ128rm:
1061 case X86::VMOVAPSZ256rm:
1062 case X86::VMOVAPSZ128rm_NOVLX:
1063 case X86::VMOVAPSZ256rm_NOVLX:
1064 case X86::VMOVAPSZrm:
1065 case X86::VMOVDQA32Z128rm:
1066 case X86::VMOVDQA32Z256rm:
1067 case X86::VMOVDQA32Zrm:
1068 case X86::VMOVDQA64Z128rm:
1069 case X86::VMOVDQA64Z256rm:
1070 case X86::VMOVDQA64Zrm:
1071 case X86::VMOVDQU16Z128rm:
1072 case X86::VMOVDQU16Z256rm:
1073 case X86::VMOVDQU16Zrm:
1074 case X86::VMOVDQU32Z128rm:
1075 case X86::VMOVDQU32Z256rm:
1076 case X86::VMOVDQU32Zrm:
1077 case X86::VMOVDQU64Z128rm:
1078 case X86::VMOVDQU64Z256rm:
1079 case X86::VMOVDQU64Zrm:
1080 case X86::VMOVDQU8Z128rm:
1081 case X86::VMOVDQU8Z256rm:
1082 case X86::VMOVDQU8Zrm:
1083 case X86::VMOVUPDZ128rm:
1084 case X86::VMOVUPDZ256rm:
1085 case X86::VMOVUPDZrm:
1086 case X86::VMOVUPSZ128rm:
1087 case X86::VMOVUPSZ256rm:
1088 case X86::VMOVUPSZ128rm_NOVLX:
1089 case X86::VMOVUPSZ256rm_NOVLX:
1090 case X86::VMOVUPSZrm: {
1091 // Loads from constant pools are trivially rematerializable.
1092 if (MI.getOperand(1 + X86::AddrBaseReg).isReg() &&
1093 MI.getOperand(1 + X86::AddrScaleAmt).isImm() &&
1094 MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
1095 MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
1096 MI.isDereferenceableInvariantLoad(AA)) {
1097 Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
1098 if (BaseReg == 0 || BaseReg == X86::RIP)
1099 return true;
1100 // Allow re-materialization of PIC load.
1101 if (!ReMatPICStubLoad && MI.getOperand(1 + X86::AddrDisp).isGlobal())
1102 return false;
1103 const MachineFunction &MF = *MI.getParent()->getParent();
1104 const MachineRegisterInfo &MRI = MF.getRegInfo();
1105 return regIsPICBase(BaseReg, MRI);
1106 }
1107 return false;
1108 }
1109
1110 case X86::LEA32r:
1111 case X86::LEA64r: {
1112 if (MI.getOperand(1 + X86::AddrScaleAmt).isImm() &&
1113 MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
1114 MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
1115 !MI.getOperand(1 + X86::AddrDisp).isReg()) {
1116 // lea fi#, lea GV, etc. are all rematerializable.
1117 if (!MI.getOperand(1 + X86::AddrBaseReg).isReg())
1118 return true;
1119 Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
1120 if (BaseReg == 0)
1121 return true;
1122 // Allow re-materialization of lea PICBase + x.
1123 const MachineFunction &MF = *MI.getParent()->getParent();
1124 const MachineRegisterInfo &MRI = MF.getRegInfo();
1125 return regIsPICBase(BaseReg, MRI);
1126 }
1127 return false;
1128 }
1129 }
1130}
1131
1132void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
1133 MachineBasicBlock::iterator I,
1134 Register DestReg, unsigned SubIdx,
1135 const MachineInstr &Orig,
1136 const TargetRegisterInfo &TRI) const {
1137 bool ClobbersEFLAGS = Orig.modifiesRegister(X86::EFLAGS, &TRI);
1138 if (ClobbersEFLAGS && MBB.computeRegisterLiveness(&TRI, X86::EFLAGS, I) !=
1139 MachineBasicBlock::LQR_Dead) {
1140 // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
1141 // effects.
1142 int Value;
1143 switch (Orig.getOpcode()) {
1144 case X86::MOV32r0: Value = 0; break;
1145 case X86::MOV32r1: Value = 1; break;
1146 case X86::MOV32r_1: Value = -1; break;
1147 default:
1148 llvm_unreachable("Unexpected instruction!")::llvm::llvm_unreachable_internal("Unexpected instruction!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 1148)
;
1149 }
1150
1151 const DebugLoc &DL = Orig.getDebugLoc();
1152 BuildMI(MBB, I, DL, get(X86::MOV32ri))
1153 .add(Orig.getOperand(0))
1154 .addImm(Value);
1155 } else {
1156 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1157 MBB.insert(I, MI);
1158 }
1159
1160 MachineInstr &NewMI = *std::prev(I);
1161 NewMI.substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1162}
1163
1164/// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
1165bool X86InstrInfo::hasLiveCondCodeDef(MachineInstr &MI) const {
1166 for (const MachineOperand &MO : MI.operands()) {
1167 if (MO.isReg() && MO.isDef() &&
1168 MO.getReg() == X86::EFLAGS && !MO.isDead()) {
1169 return true;
1170 }
1171 }
1172 return false;
1173}
1174
1175/// Check whether the shift count for a machine operand is non-zero.
1176inline static unsigned getTruncatedShiftCount(const MachineInstr &MI,
1177 unsigned ShiftAmtOperandIdx) {
1178 // The shift count is six bits with the REX.W prefix and five bits without.
1179 unsigned ShiftCountMask = (MI.getDesc().TSFlags & X86II::REX_W) ? 63 : 31;
1180 unsigned Imm = MI.getOperand(ShiftAmtOperandIdx).getImm();
1181 return Imm & ShiftCountMask;
1182}
1183
1184/// Check whether the given shift count is appropriate
1185/// can be represented by a LEA instruction.
1186inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
1187 // Left shift instructions can be transformed into load-effective-address
1188 // instructions if we can encode them appropriately.
1189 // A LEA instruction utilizes a SIB byte to encode its scale factor.
1190 // The SIB.scale field is two bits wide which means that we can encode any
1191 // shift amount less than 4.
1192 return ShAmt < 4 && ShAmt > 0;
1193}
1194
1195bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
1196 unsigned Opc, bool AllowSP, Register &NewSrc,
1197 bool &isKill, MachineOperand &ImplicitOp,
1198 LiveVariables *LV, LiveIntervals *LIS) const {
1199 MachineFunction &MF = *MI.getParent()->getParent();
1200 const TargetRegisterClass *RC;
1201 if (AllowSP) {
1202 RC = Opc != X86::LEA32r ? &X86::GR64RegClass : &X86::GR32RegClass;
1203 } else {
1204 RC = Opc != X86::LEA32r ?
1205 &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
1206 }
1207 Register SrcReg = Src.getReg();
1208 isKill = MI.killsRegister(SrcReg);
1209
1210 // For both LEA64 and LEA32 the register already has essentially the right
1211 // type (32-bit or 64-bit) we may just need to forbid SP.
1212 if (Opc != X86::LEA64_32r) {
1213 NewSrc = SrcReg;
1214 assert(!Src.isUndef() && "Undef op doesn't need optimization")(static_cast <bool> (!Src.isUndef() && "Undef op doesn't need optimization"
) ? void (0) : __assert_fail ("!Src.isUndef() && \"Undef op doesn't need optimization\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1214, __extension__
__PRETTY_FUNCTION__))
;
1215
1216 if (NewSrc.isVirtual() && !MF.getRegInfo().constrainRegClass(NewSrc, RC))
1217 return false;
1218
1219 return true;
1220 }
1221
1222 // This is for an LEA64_32r and incoming registers are 32-bit. One way or
1223 // another we need to add 64-bit registers to the final MI.
1224 if (SrcReg.isPhysical()) {
1225 ImplicitOp = Src;
1226 ImplicitOp.setImplicit();
1227
1228 NewSrc = getX86SubSuperRegister(SrcReg, 64);
1229 assert(!Src.isUndef() && "Undef op doesn't need optimization")(static_cast <bool> (!Src.isUndef() && "Undef op doesn't need optimization"
) ? void (0) : __assert_fail ("!Src.isUndef() && \"Undef op doesn't need optimization\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1229, __extension__
__PRETTY_FUNCTION__))
;
1230 } else {
1231 // Virtual register of the wrong class, we have to create a temporary 64-bit
1232 // vreg to feed into the LEA.
1233 NewSrc = MF.getRegInfo().createVirtualRegister(RC);
1234 MachineInstr *Copy =
1235 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY))
1236 .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
1237 .addReg(SrcReg, getKillRegState(isKill));
1238
1239 // Which is obviously going to be dead after we're done with it.
1240 isKill = true;
1241
1242 if (LV)
1243 LV->replaceKillInstruction(SrcReg, MI, *Copy);
1244
1245 if (LIS) {
1246 SlotIndex CopyIdx = LIS->InsertMachineInstrInMaps(*Copy);
1247 SlotIndex Idx = LIS->getInstructionIndex(MI);
1248 LiveInterval &LI = LIS->getInterval(SrcReg);
1249 LiveRange::Segment *S = LI.getSegmentContaining(Idx);
1250 if (S->end.getBaseIndex() == Idx)
1251 S->end = CopyIdx.getRegSlot();
1252 }
1253 }
1254
1255 // We've set all the parameters without issue.
1256 return true;
1257}
1258
1259MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
1260 MachineInstr &MI,
1261 LiveVariables *LV,
1262 LiveIntervals *LIS,
1263 bool Is8BitOp) const {
1264 // We handle 8-bit adds and various 16-bit opcodes in the switch below.
1265 MachineBasicBlock &MBB = *MI.getParent();
1266 MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
1267 assert((Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits((static_cast <bool> ((Is8BitOp || RegInfo.getTargetRegisterInfo
()->getRegSizeInBits( *RegInfo.getRegClass(MI.getOperand(0
).getReg())) == 16) && "Unexpected type for LEA transform"
) ? void (0) : __assert_fail ("(Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits( *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) && \"Unexpected type for LEA transform\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1269, __extension__
__PRETTY_FUNCTION__))
1268 *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) &&(static_cast <bool> ((Is8BitOp || RegInfo.getTargetRegisterInfo
()->getRegSizeInBits( *RegInfo.getRegClass(MI.getOperand(0
).getReg())) == 16) && "Unexpected type for LEA transform"
) ? void (0) : __assert_fail ("(Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits( *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) && \"Unexpected type for LEA transform\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1269, __extension__
__PRETTY_FUNCTION__))
1269 "Unexpected type for LEA transform")(static_cast <bool> ((Is8BitOp || RegInfo.getTargetRegisterInfo
()->getRegSizeInBits( *RegInfo.getRegClass(MI.getOperand(0
).getReg())) == 16) && "Unexpected type for LEA transform"
) ? void (0) : __assert_fail ("(Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits( *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) && \"Unexpected type for LEA transform\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1269, __extension__
__PRETTY_FUNCTION__))
;
1270
1271 // TODO: For a 32-bit target, we need to adjust the LEA variables with
1272 // something like this:
1273 // Opcode = X86::LEA32r;
1274 // InRegLEA = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
1275 // OutRegLEA =
1276 // Is8BitOp ? RegInfo.createVirtualRegister(&X86::GR32ABCD_RegClass)
1277 // : RegInfo.createVirtualRegister(&X86::GR32RegClass);
1278 if (!Subtarget.is64Bit())
1279 return nullptr;
1280
1281 unsigned Opcode = X86::LEA64_32r;
1282 Register InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
1283 Register OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
1284 Register InRegLEA2;
1285
1286 // Build and insert into an implicit UNDEF value. This is OK because
1287 // we will be shifting and then extracting the lower 8/16-bits.
1288 // This has the potential to cause partial register stall. e.g.
1289 // movw (%rbp,%rcx,2), %dx
1290 // leal -65(%rdx), %esi
1291 // But testing has shown this *does* help performance in 64-bit mode (at
1292 // least on modern x86 machines).
1293 MachineBasicBlock::iterator MBBI = MI.getIterator();
1294 Register Dest = MI.getOperand(0).getReg();
1295 Register Src = MI.getOperand(1).getReg();
1296 Register Src2;
1297 bool IsDead = MI.getOperand(0).isDead();
1298 bool IsKill = MI.getOperand(1).isKill();
1299 unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit;
1300 assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization")(static_cast <bool> (!MI.getOperand(1).isUndef() &&
"Undef op doesn't need optimization") ? void (0) : __assert_fail
("!MI.getOperand(1).isUndef() && \"Undef op doesn't need optimization\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1300, __extension__
__PRETTY_FUNCTION__))
;
1301 MachineInstr *ImpDef =
1302 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
1303 MachineInstr *InsMI =
1304 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
1305 .addReg(InRegLEA, RegState::Define, SubReg)
1306 .addReg(Src, getKillRegState(IsKill));
1307 MachineInstr *ImpDef2 = nullptr;
1308 MachineInstr *InsMI2 = nullptr;
1309
1310 MachineInstrBuilder MIB =
1311 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
1312 switch (MIOpc) {
1313 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 1313)
;
1314 case X86::SHL8ri:
1315 case X86::SHL16ri: {
1316 unsigned ShAmt = MI.getOperand(2).getImm();
1317 MIB.addReg(0).addImm(1ULL << ShAmt)
1318 .addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0);
1319 break;
1320 }
1321 case X86::INC8r:
1322 case X86::INC16r:
1323 addRegOffset(MIB, InRegLEA, true, 1);
1324 break;
1325 case X86::DEC8r:
1326 case X86::DEC16r:
1327 addRegOffset(MIB, InRegLEA, true, -1);
1328 break;
1329 case X86::ADD8ri:
1330 case X86::ADD8ri_DB:
1331 case X86::ADD16ri:
1332 case X86::ADD16ri8:
1333 case X86::ADD16ri_DB:
1334 case X86::ADD16ri8_DB:
1335 addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
1336 break;
1337 case X86::ADD8rr:
1338 case X86::ADD8rr_DB:
1339 case X86::ADD16rr:
1340 case X86::ADD16rr_DB: {
1341 Src2 = MI.getOperand(2).getReg();
1342 bool IsKill2 = MI.getOperand(2).isKill();
1343 assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization")(static_cast <bool> (!MI.getOperand(2).isUndef() &&
"Undef op doesn't need optimization") ? void (0) : __assert_fail
("!MI.getOperand(2).isUndef() && \"Undef op doesn't need optimization\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1343, __extension__
__PRETTY_FUNCTION__))
;
1344 if (Src == Src2) {
1345 // ADD8rr/ADD16rr killed %reg1028, %reg1028
1346 // just a single insert_subreg.
1347 addRegReg(MIB, InRegLEA, true, InRegLEA, false);
1348 } else {
1349 if (Subtarget.is64Bit())
1350 InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
1351 else
1352 InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
1353 // Build and insert into an implicit UNDEF value. This is OK because
1354 // we will be shifting and then extracting the lower 8/16-bits.
1355 ImpDef2 = BuildMI(MBB, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF),
1356 InRegLEA2);
1357 InsMI2 = BuildMI(MBB, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
1358 .addReg(InRegLEA2, RegState::Define, SubReg)
1359 .addReg(Src2, getKillRegState(IsKill2));
1360 addRegReg(MIB, InRegLEA, true, InRegLEA2, true);
1361 }
1362 if (LV && IsKill2 && InsMI2)
1363 LV->replaceKillInstruction(Src2, MI, *InsMI2);
1364 break;
1365 }
1366 }
1367
1368 MachineInstr *NewMI = MIB;
1369 MachineInstr *ExtMI =
1370 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
1371 .addReg(Dest, RegState::Define | getDeadRegState(IsDead))
1372 .addReg(OutRegLEA, RegState::Kill, SubReg);
1373
1374 if (LV) {
1375 // Update live variables.
1376 LV->getVarInfo(InRegLEA).Kills.push_back(NewMI);
1377 LV->getVarInfo(OutRegLEA).Kills.push_back(ExtMI);
1378 if (IsKill)
1379 LV->replaceKillInstruction(Src, MI, *InsMI);
1380 if (IsDead)
1381 LV->replaceKillInstruction(Dest, MI, *ExtMI);
1382 }
1383
1384 if (LIS) {
1385 LIS->InsertMachineInstrInMaps(*ImpDef);
1386 SlotIndex InsIdx = LIS->InsertMachineInstrInMaps(*InsMI);
1387 if (ImpDef2)
1388 LIS->InsertMachineInstrInMaps(*ImpDef2);
1389 SlotIndex Ins2Idx;
1390 if (InsMI2)
1391 Ins2Idx = LIS->InsertMachineInstrInMaps(*InsMI2);
1392 SlotIndex NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
1393 SlotIndex ExtIdx = LIS->InsertMachineInstrInMaps(*ExtMI);
1394 LIS->getInterval(InRegLEA);
1395 LIS->getInterval(OutRegLEA);
1396 if (InRegLEA2)
1397 LIS->getInterval(InRegLEA2);
1398
1399 // Move the use of Src up to InsMI.
1400 LiveInterval &SrcLI = LIS->getInterval(Src);
1401 LiveRange::Segment *SrcSeg = SrcLI.getSegmentContaining(NewIdx);
1402 if (SrcSeg->end == NewIdx.getRegSlot())
1403 SrcSeg->end = InsIdx.getRegSlot();
1404
1405 if (InsMI2) {
1406 // Move the use of Src2 up to InsMI2.
1407 LiveInterval &Src2LI = LIS->getInterval(Src2);
1408 LiveRange::Segment *Src2Seg = Src2LI.getSegmentContaining(NewIdx);
1409 if (Src2Seg->end == NewIdx.getRegSlot())
1410 Src2Seg->end = Ins2Idx.getRegSlot();
1411 }
1412
1413 // Move the definition of Dest down to ExtMI.
1414 LiveInterval &DestLI = LIS->getInterval(Dest);
1415 LiveRange::Segment *DestSeg =
1416 DestLI.getSegmentContaining(NewIdx.getRegSlot());
1417 assert(DestSeg->start == NewIdx.getRegSlot() &&(static_cast <bool> (DestSeg->start == NewIdx.getRegSlot
() && DestSeg->valno->def == NewIdx.getRegSlot(
)) ? void (0) : __assert_fail ("DestSeg->start == NewIdx.getRegSlot() && DestSeg->valno->def == NewIdx.getRegSlot()"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1418, __extension__
__PRETTY_FUNCTION__))
1418 DestSeg->valno->def == NewIdx.getRegSlot())(static_cast <bool> (DestSeg->start == NewIdx.getRegSlot
() && DestSeg->valno->def == NewIdx.getRegSlot(
)) ? void (0) : __assert_fail ("DestSeg->start == NewIdx.getRegSlot() && DestSeg->valno->def == NewIdx.getRegSlot()"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1418, __extension__
__PRETTY_FUNCTION__))
;
1419 DestSeg->start = ExtIdx.getRegSlot();
1420 DestSeg->valno->def = ExtIdx.getRegSlot();
1421 }
1422
1423 return ExtMI;
1424}
1425
1426/// This method must be implemented by targets that
1427/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
1428/// may be able to convert a two-address instruction into a true
1429/// three-address instruction on demand. This allows the X86 target (for
1430/// example) to convert ADD and SHL instructions into LEA instructions if they
1431/// would require register copies due to two-addressness.
1432///
1433/// This method returns a null pointer if the transformation cannot be
1434/// performed, otherwise it returns the new instruction.
1435///
1436MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
1437 LiveVariables *LV,
1438 LiveIntervals *LIS) const {
1439 // The following opcodes also sets the condition code register(s). Only
1440 // convert them to equivalent lea if the condition code register def's
1441 // are dead!
1442 if (hasLiveCondCodeDef(MI))
1443 return nullptr;
1444
1445 MachineFunction &MF = *MI.getParent()->getParent();
1446 // All instructions input are two-addr instructions. Get the known operands.
1447 const MachineOperand &Dest = MI.getOperand(0);
1448 const MachineOperand &Src = MI.getOperand(1);
1449
1450 // Ideally, operations with undef should be folded before we get here, but we
1451 // can't guarantee it. Bail out because optimizing undefs is a waste of time.
1452 // Without this, we have to forward undef state to new register operands to
1453 // avoid machine verifier errors.
1454 if (Src.isUndef())
1455 return nullptr;
1456 if (MI.getNumOperands() > 2)
1457 if (MI.getOperand(2).isReg() && MI.getOperand(2).isUndef())
1458 return nullptr;
1459
1460 MachineInstr *NewMI = nullptr;
1461 Register SrcReg, SrcReg2;
1462 bool Is64Bit = Subtarget.is64Bit();
1463
1464 bool Is8BitOp = false;
1465 unsigned MIOpc = MI.getOpcode();
1466 switch (MIOpc) {
1467 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 1467)
;
1468 case X86::SHL64ri: {
1469 assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!")(static_cast <bool> (MI.getNumOperands() >= 3 &&
"Unknown shift instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 3 && \"Unknown shift instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1469, __extension__
__PRETTY_FUNCTION__))
;
1470 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
1471 if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
1472
1473 // LEA can't handle RSP.
1474 if (Src.getReg().isVirtual() && !MF.getRegInfo().constrainRegClass(
1475 Src.getReg(), &X86::GR64_NOSPRegClass))
1476 return nullptr;
1477
1478 NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
1479 .add(Dest)
1480 .addReg(0)
1481 .addImm(1ULL << ShAmt)
1482 .add(Src)
1483 .addImm(0)
1484 .addReg(0);
1485 break;
1486 }
1487 case X86::SHL32ri: {
1488 assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!")(static_cast <bool> (MI.getNumOperands() >= 3 &&
"Unknown shift instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 3 && \"Unknown shift instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1488, __extension__
__PRETTY_FUNCTION__))
;
1489 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
1490 if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
1491
1492 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1493
1494 // LEA can't handle ESP.
1495 bool isKill;
1496 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1497 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
1498 ImplicitOp, LV, LIS))
1499 return nullptr;
1500
1501 MachineInstrBuilder MIB =
1502 BuildMI(MF, MI.getDebugLoc(), get(Opc))
1503 .add(Dest)
1504 .addReg(0)
1505 .addImm(1ULL << ShAmt)
1506 .addReg(SrcReg, getKillRegState(isKill))
1507 .addImm(0)
1508 .addReg(0);
1509 if (ImplicitOp.getReg() != 0)
1510 MIB.add(ImplicitOp);
1511 NewMI = MIB;
1512
1513 break;
1514 }
1515 case X86::SHL8ri:
1516 Is8BitOp = true;
1517 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1518 case X86::SHL16ri: {
1519 assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!")(static_cast <bool> (MI.getNumOperands() >= 3 &&
"Unknown shift instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 3 && \"Unknown shift instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1519, __extension__
__PRETTY_FUNCTION__))
;
1520 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
1521 if (!isTruncatedShiftCountForLEA(ShAmt))
1522 return nullptr;
1523 return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
1524 }
1525 case X86::INC64r:
1526 case X86::INC32r: {
1527 assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!")(static_cast <bool> (MI.getNumOperands() >= 2 &&
"Unknown inc instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 2 && \"Unknown inc instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1527, __extension__
__PRETTY_FUNCTION__))
;
1528 unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r :
1529 (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
1530 bool isKill;
1531 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1532 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
1533 ImplicitOp, LV, LIS))
1534 return nullptr;
1535
1536 MachineInstrBuilder MIB =
1537 BuildMI(MF, MI.getDebugLoc(), get(Opc))
1538 .add(Dest)
1539 .addReg(SrcReg, getKillRegState(isKill));
1540 if (ImplicitOp.getReg() != 0)
1541 MIB.add(ImplicitOp);
1542
1543 NewMI = addOffset(MIB, 1);
1544 break;
1545 }
1546 case X86::DEC64r:
1547 case X86::DEC32r: {
1548 assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!")(static_cast <bool> (MI.getNumOperands() >= 2 &&
"Unknown dec instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 2 && \"Unknown dec instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1548, __extension__
__PRETTY_FUNCTION__))
;
1549 unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
1550 : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
1551
1552 bool isKill;
1553 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1554 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
1555 ImplicitOp, LV, LIS))
1556 return nullptr;
1557
1558 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1559 .add(Dest)
1560 .addReg(SrcReg, getKillRegState(isKill));
1561 if (ImplicitOp.getReg() != 0)
1562 MIB.add(ImplicitOp);
1563
1564 NewMI = addOffset(MIB, -1);
1565
1566 break;
1567 }
1568 case X86::DEC8r:
1569 case X86::INC8r:
1570 Is8BitOp = true;
1571 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1572 case X86::DEC16r:
1573 case X86::INC16r:
1574 return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
1575 case X86::ADD64rr:
1576 case X86::ADD64rr_DB:
1577 case X86::ADD32rr:
1578 case X86::ADD32rr_DB: {
1579 assert(MI.getNumOperands() >= 3 && "Unknown add instruction!")(static_cast <bool> (MI.getNumOperands() >= 3 &&
"Unknown add instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 3 && \"Unknown add instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1579, __extension__
__PRETTY_FUNCTION__))
;
1580 unsigned Opc;
1581 if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
1582 Opc = X86::LEA64r;
1583 else
1584 Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1585
1586 const MachineOperand &Src2 = MI.getOperand(2);
1587 bool isKill2;
1588 MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
1589 if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/false, SrcReg2, isKill2,
1590 ImplicitOp2, LV, LIS))
1591 return nullptr;
1592
1593 bool isKill;
1594 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1595 if (Src.getReg() == Src2.getReg()) {
1596 // Don't call classify LEAReg a second time on the same register, in case
1597 // the first call inserted a COPY from Src2 and marked it as killed.
1598 isKill = isKill2;
1599 SrcReg = SrcReg2;
1600 } else {
1601 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
1602 ImplicitOp, LV, LIS))
1603 return nullptr;
1604 }
1605
1606 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest);
1607 if (ImplicitOp.getReg() != 0)
1608 MIB.add(ImplicitOp);
1609 if (ImplicitOp2.getReg() != 0)
1610 MIB.add(ImplicitOp2);
1611
1612 NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
1613 if (LV && Src2.isKill())
1614 LV->replaceKillInstruction(SrcReg2, MI, *NewMI);
1615 break;
1616 }
1617 case X86::ADD8rr:
1618 case X86::ADD8rr_DB:
1619 Is8BitOp = true;
1620 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1621 case X86::ADD16rr:
1622 case X86::ADD16rr_DB:
1623 return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
1624 case X86::ADD64ri32:
1625 case X86::ADD64ri8:
1626 case X86::ADD64ri32_DB:
1627 case X86::ADD64ri8_DB:
1628 assert(MI.getNumOperands() >= 3 && "Unknown add instruction!")(static_cast <bool> (MI.getNumOperands() >= 3 &&
"Unknown add instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 3 && \"Unknown add instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1628, __extension__
__PRETTY_FUNCTION__))
;
1629 NewMI = addOffset(
1630 BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src),
1631 MI.getOperand(2));
1632 break;
1633 case X86::ADD32ri:
1634 case X86::ADD32ri8:
1635 case X86::ADD32ri_DB:
1636 case X86::ADD32ri8_DB: {
1637 assert(MI.getNumOperands() >= 3 && "Unknown add instruction!")(static_cast <bool> (MI.getNumOperands() >= 3 &&
"Unknown add instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 3 && \"Unknown add instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1637, __extension__
__PRETTY_FUNCTION__))
;
1638 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1639
1640 bool isKill;
1641 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1642 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
1643 ImplicitOp, LV, LIS))
1644 return nullptr;
1645
1646 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1647 .add(Dest)
1648 .addReg(SrcReg, getKillRegState(isKill));
1649 if (ImplicitOp.getReg() != 0)
1650 MIB.add(ImplicitOp);
1651
1652 NewMI = addOffset(MIB, MI.getOperand(2));
1653 break;
1654 }
1655 case X86::ADD8ri:
1656 case X86::ADD8ri_DB:
1657 Is8BitOp = true;
1658 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1659 case X86::ADD16ri:
1660 case X86::ADD16ri8:
1661 case X86::ADD16ri_DB:
1662 case X86::ADD16ri8_DB:
1663 return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
1664 case X86::SUB8ri:
1665 case X86::SUB16ri8:
1666 case X86::SUB16ri:
1667 /// FIXME: Support these similar to ADD8ri/ADD16ri*.
1668 return nullptr;
1669 case X86::SUB32ri8:
1670 case X86::SUB32ri: {
1671 if (!MI.getOperand(2).isImm())
1672 return nullptr;
1673 int64_t Imm = MI.getOperand(2).getImm();
1674 if (!isInt<32>(-Imm))
1675 return nullptr;
1676
1677 assert(MI.getNumOperands() >= 3 && "Unknown add instruction!")(static_cast <bool> (MI.getNumOperands() >= 3 &&
"Unknown add instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 3 && \"Unknown add instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1677, __extension__
__PRETTY_FUNCTION__))
;
1678 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1679
1680 bool isKill;
1681 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1682 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
1683 ImplicitOp, LV, LIS))
1684 return nullptr;
1685
1686 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1687 .add(Dest)
1688 .addReg(SrcReg, getKillRegState(isKill));
1689 if (ImplicitOp.getReg() != 0)
1690 MIB.add(ImplicitOp);
1691
1692 NewMI = addOffset(MIB, -Imm);
1693 break;
1694 }
1695
1696 case X86::SUB64ri8:
1697 case X86::SUB64ri32: {
1698 if (!MI.getOperand(2).isImm())
1699 return nullptr;
1700 int64_t Imm = MI.getOperand(2).getImm();
1701 if (!isInt<32>(-Imm))
1702 return nullptr;
1703
1704 assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!")(static_cast <bool> (MI.getNumOperands() >= 3 &&
"Unknown sub instruction!") ? void (0) : __assert_fail ("MI.getNumOperands() >= 3 && \"Unknown sub instruction!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1704, __extension__
__PRETTY_FUNCTION__))
;
1705
1706 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(),
1707 get(X86::LEA64r)).add(Dest).add(Src);
1708 NewMI = addOffset(MIB, -Imm);
1709 break;
1710 }
1711
1712 case X86::VMOVDQU8Z128rmk:
1713 case X86::VMOVDQU8Z256rmk:
1714 case X86::VMOVDQU8Zrmk:
1715 case X86::VMOVDQU16Z128rmk:
1716 case X86::VMOVDQU16Z256rmk:
1717 case X86::VMOVDQU16Zrmk:
1718 case X86::VMOVDQU32Z128rmk: case X86::VMOVDQA32Z128rmk:
1719 case X86::VMOVDQU32Z256rmk: case X86::VMOVDQA32Z256rmk:
1720 case X86::VMOVDQU32Zrmk: case X86::VMOVDQA32Zrmk:
1721 case X86::VMOVDQU64Z128rmk: case X86::VMOVDQA64Z128rmk:
1722 case X86::VMOVDQU64Z256rmk: case X86::VMOVDQA64Z256rmk:
1723 case X86::VMOVDQU64Zrmk: case X86::VMOVDQA64Zrmk:
1724 case X86::VMOVUPDZ128rmk: case X86::VMOVAPDZ128rmk:
1725 case X86::VMOVUPDZ256rmk: case X86::VMOVAPDZ256rmk:
1726 case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk:
1727 case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk:
1728 case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk:
1729 case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk:
1730 case X86::VBROADCASTSDZ256rmk:
1731 case X86::VBROADCASTSDZrmk:
1732 case X86::VBROADCASTSSZ128rmk:
1733 case X86::VBROADCASTSSZ256rmk:
1734 case X86::VBROADCASTSSZrmk:
1735 case X86::VPBROADCASTDZ128rmk:
1736 case X86::VPBROADCASTDZ256rmk:
1737 case X86::VPBROADCASTDZrmk:
1738 case X86::VPBROADCASTQZ128rmk:
1739 case X86::VPBROADCASTQZ256rmk:
1740 case X86::VPBROADCASTQZrmk: {
1741 unsigned Opc;
1742 switch (MIOpc) {
1743 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 1743)
;
1744 case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
1745 case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
1746 case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
1747 case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
1748 case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
1749 case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
1750 case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
1751 case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
1752 case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
1753 case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
1754 case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
1755 case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
1756 case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
1757 case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
1758 case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
1759 case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
1760 case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
1761 case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
1762 case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
1763 case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
1764 case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
1765 case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
1766 case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
1767 case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
1768 case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
1769 case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
1770 case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
1771 case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
1772 case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
1773 case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
1774 case X86::VBROADCASTSDZ256rmk: Opc = X86::VBLENDMPDZ256rmbk; break;
1775 case X86::VBROADCASTSDZrmk: Opc = X86::VBLENDMPDZrmbk; break;
1776 case X86::VBROADCASTSSZ128rmk: Opc = X86::VBLENDMPSZ128rmbk; break;
1777 case X86::VBROADCASTSSZ256rmk: Opc = X86::VBLENDMPSZ256rmbk; break;
1778 case X86::VBROADCASTSSZrmk: Opc = X86::VBLENDMPSZrmbk; break;
1779 case X86::VPBROADCASTDZ128rmk: Opc = X86::VPBLENDMDZ128rmbk; break;
1780 case X86::VPBROADCASTDZ256rmk: Opc = X86::VPBLENDMDZ256rmbk; break;
1781 case X86::VPBROADCASTDZrmk: Opc = X86::VPBLENDMDZrmbk; break;
1782 case X86::VPBROADCASTQZ128rmk: Opc = X86::VPBLENDMQZ128rmbk; break;
1783 case X86::VPBROADCASTQZ256rmk: Opc = X86::VPBLENDMQZ256rmbk; break;
1784 case X86::VPBROADCASTQZrmk: Opc = X86::VPBLENDMQZrmbk; break;
1785 }
1786
1787 NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1788 .add(Dest)
1789 .add(MI.getOperand(2))
1790 .add(Src)
1791 .add(MI.getOperand(3))
1792 .add(MI.getOperand(4))
1793 .add(MI.getOperand(5))
1794 .add(MI.getOperand(6))
1795 .add(MI.getOperand(7));
1796 break;
1797 }
1798
1799 case X86::VMOVDQU8Z128rrk:
1800 case X86::VMOVDQU8Z256rrk:
1801 case X86::VMOVDQU8Zrrk:
1802 case X86::VMOVDQU16Z128rrk:
1803 case X86::VMOVDQU16Z256rrk:
1804 case X86::VMOVDQU16Zrrk:
1805 case X86::VMOVDQU32Z128rrk: case X86::VMOVDQA32Z128rrk:
1806 case X86::VMOVDQU32Z256rrk: case X86::VMOVDQA32Z256rrk:
1807 case X86::VMOVDQU32Zrrk: case X86::VMOVDQA32Zrrk:
1808 case X86::VMOVDQU64Z128rrk: case X86::VMOVDQA64Z128rrk:
1809 case X86::VMOVDQU64Z256rrk: case X86::VMOVDQA64Z256rrk:
1810 case X86::VMOVDQU64Zrrk: case X86::VMOVDQA64Zrrk:
1811 case X86::VMOVUPDZ128rrk: case X86::VMOVAPDZ128rrk:
1812 case X86::VMOVUPDZ256rrk: case X86::VMOVAPDZ256rrk:
1813 case X86::VMOVUPDZrrk: case X86::VMOVAPDZrrk:
1814 case X86::VMOVUPSZ128rrk: case X86::VMOVAPSZ128rrk:
1815 case X86::VMOVUPSZ256rrk: case X86::VMOVAPSZ256rrk:
1816 case X86::VMOVUPSZrrk: case X86::VMOVAPSZrrk: {
1817 unsigned Opc;
1818 switch (MIOpc) {
1819 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 1819)
;
1820 case X86::VMOVDQU8Z128rrk: Opc = X86::VPBLENDMBZ128rrk; break;
1821 case X86::VMOVDQU8Z256rrk: Opc = X86::VPBLENDMBZ256rrk; break;
1822 case X86::VMOVDQU8Zrrk: Opc = X86::VPBLENDMBZrrk; break;
1823 case X86::VMOVDQU16Z128rrk: Opc = X86::VPBLENDMWZ128rrk; break;
1824 case X86::VMOVDQU16Z256rrk: Opc = X86::VPBLENDMWZ256rrk; break;
1825 case X86::VMOVDQU16Zrrk: Opc = X86::VPBLENDMWZrrk; break;
1826 case X86::VMOVDQU32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break;
1827 case X86::VMOVDQU32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break;
1828 case X86::VMOVDQU32Zrrk: Opc = X86::VPBLENDMDZrrk; break;
1829 case X86::VMOVDQU64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break;
1830 case X86::VMOVDQU64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break;
1831 case X86::VMOVDQU64Zrrk: Opc = X86::VPBLENDMQZrrk; break;
1832 case X86::VMOVUPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break;
1833 case X86::VMOVUPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break;
1834 case X86::VMOVUPDZrrk: Opc = X86::VBLENDMPDZrrk; break;
1835 case X86::VMOVUPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break;
1836 case X86::VMOVUPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break;
1837 case X86::VMOVUPSZrrk: Opc = X86::VBLENDMPSZrrk; break;
1838 case X86::VMOVDQA32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break;
1839 case X86::VMOVDQA32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break;
1840 case X86::VMOVDQA32Zrrk: Opc = X86::VPBLENDMDZrrk; break;
1841 case X86::VMOVDQA64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break;
1842 case X86::VMOVDQA64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break;
1843 case X86::VMOVDQA64Zrrk: Opc = X86::VPBLENDMQZrrk; break;
1844 case X86::VMOVAPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break;
1845 case X86::VMOVAPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break;
1846 case X86::VMOVAPDZrrk: Opc = X86::VBLENDMPDZrrk; break;
1847 case X86::VMOVAPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break;
1848 case X86::VMOVAPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break;
1849 case X86::VMOVAPSZrrk: Opc = X86::VBLENDMPSZrrk; break;
1850 }
1851
1852 NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1853 .add(Dest)
1854 .add(MI.getOperand(2))
1855 .add(Src)
1856 .add(MI.getOperand(3));
1857 break;
1858 }
1859 }
1860
1861 if (!NewMI) return nullptr;
1862
1863 if (LV) { // Update live variables
1864 if (Src.isKill())
1865 LV->replaceKillInstruction(Src.getReg(), MI, *NewMI);
1866 if (Dest.isDead())
1867 LV->replaceKillInstruction(Dest.getReg(), MI, *NewMI);
1868 }
1869
1870 MachineBasicBlock &MBB = *MI.getParent();
1871 MBB.insert(MI.getIterator(), NewMI); // Insert the new inst
1872
1873 if (LIS) {
1874 LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
1875 if (SrcReg)
1876 LIS->getInterval(SrcReg);
1877 if (SrcReg2)
1878 LIS->getInterval(SrcReg2);
1879 }
1880
1881 return NewMI;
1882}
1883
1884/// This determines which of three possible cases of a three source commute
1885/// the source indexes correspond to taking into account any mask operands.
1886/// All prevents commuting a passthru operand. Returns -1 if the commute isn't
1887/// possible.
1888/// Case 0 - Possible to commute the first and second operands.
1889/// Case 1 - Possible to commute the first and third operands.
1890/// Case 2 - Possible to commute the second and third operands.
1891static unsigned getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1,
1892 unsigned SrcOpIdx2) {
1893 // Put the lowest index to SrcOpIdx1 to simplify the checks below.
1894 if (SrcOpIdx1 > SrcOpIdx2)
1895 std::swap(SrcOpIdx1, SrcOpIdx2);
1896
1897 unsigned Op1 = 1, Op2 = 2, Op3 = 3;
1898 if (X86II::isKMasked(TSFlags)) {
1899 Op2++;
1900 Op3++;
1901 }
1902
1903 if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op2)
1904 return 0;
1905 if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op3)
1906 return 1;
1907 if (SrcOpIdx1 == Op2 && SrcOpIdx2 == Op3)
1908 return 2;
1909 llvm_unreachable("Unknown three src commute case.")::llvm::llvm_unreachable_internal("Unknown three src commute case."
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1909)
;
1910}
1911
1912unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(
1913 const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2,
1914 const X86InstrFMA3Group &FMA3Group) const {
1915
1916 unsigned Opc = MI.getOpcode();
1917
1918 // TODO: Commuting the 1st operand of FMA*_Int requires some additional
1919 // analysis. The commute optimization is legal only if all users of FMA*_Int
1920 // use only the lowest element of the FMA*_Int instruction. Such analysis are
1921 // not implemented yet. So, just return 0 in that case.
1922 // When such analysis are available this place will be the right place for
1923 // calling it.
1924 assert(!(FMA3Group.isIntrinsic() && (SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) &&(static_cast <bool> (!(FMA3Group.isIntrinsic() &&
(SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) && "Intrinsic instructions can't commute operand 1"
) ? void (0) : __assert_fail ("!(FMA3Group.isIntrinsic() && (SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) && \"Intrinsic instructions can't commute operand 1\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1925, __extension__
__PRETTY_FUNCTION__))
7
'?' condition is true
1925 "Intrinsic instructions can't commute operand 1")(static_cast <bool> (!(FMA3Group.isIntrinsic() &&
(SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) && "Intrinsic instructions can't commute operand 1"
) ? void (0) : __assert_fail ("!(FMA3Group.isIntrinsic() && (SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) && \"Intrinsic instructions can't commute operand 1\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1925, __extension__
__PRETTY_FUNCTION__))
;
1926
1927 // Determine which case this commute is or if it can't be done.
1928 unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1,
1929 SrcOpIdx2);
1930 assert(Case < 3 && "Unexpected case number!")(static_cast <bool> (Case < 3 && "Unexpected case number!"
) ? void (0) : __assert_fail ("Case < 3 && \"Unexpected case number!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1930, __extension__
__PRETTY_FUNCTION__))
;
8
'?' condition is true
1931
1932 // Define the FMA forms mapping array that helps to map input FMA form
1933 // to output FMA form to preserve the operation semantics after
1934 // commuting the operands.
1935 const unsigned Form132Index = 0;
1936 const unsigned Form213Index = 1;
1937 const unsigned Form231Index = 2;
1938 static const unsigned FormMapping[][3] = {
1939 // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2;
1940 // FMA132 A, C, b; ==> FMA231 C, A, b;
1941 // FMA213 B, A, c; ==> FMA213 A, B, c;
1942 // FMA231 C, A, b; ==> FMA132 A, C, b;
1943 { Form231Index, Form213Index, Form132Index },
1944 // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3;
1945 // FMA132 A, c, B; ==> FMA132 B, c, A;
1946 // FMA213 B, a, C; ==> FMA231 C, a, B;
1947 // FMA231 C, a, B; ==> FMA213 B, a, C;
1948 { Form132Index, Form231Index, Form213Index },
1949 // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3;
1950 // FMA132 a, C, B; ==> FMA213 a, B, C;
1951 // FMA213 b, A, C; ==> FMA132 b, C, A;
1952 // FMA231 c, A, B; ==> FMA231 c, B, A;
1953 { Form213Index, Form132Index, Form231Index }
1954 };
1955
1956 unsigned FMAForms[3];
1957 FMAForms[0] = FMA3Group.get132Opcode();
1958 FMAForms[1] = FMA3Group.get213Opcode();
1959 FMAForms[2] = FMA3Group.get231Opcode();
1960 unsigned FormIndex;
1961 for (FormIndex = 0; FormIndex < 3; FormIndex++)
9
Loop condition is true. Entering loop body
12
Loop condition is true. Entering loop body
15
Loop condition is true. Entering loop body
18
The value 3 is assigned to 'FormIndex'
19
Loop condition is false. Execution continues on line 1966
1962 if (Opc == FMAForms[FormIndex])
10
Assuming the condition is false
11
Taking false branch
13
Assuming the condition is false
14
Taking false branch
16
Assuming the condition is false
17
Taking false branch
1963 break;
1964
1965 // Everything is ready, just adjust the FMA opcode and return it.
1966 FormIndex = FormMapping[Case][FormIndex];
20
Assigned value is garbage or undefined
1967 return FMAForms[FormIndex];
1968}
1969
1970static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1,
1971 unsigned SrcOpIdx2) {
1972 // Determine which case this commute is or if it can't be done.
1973 unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1,
1974 SrcOpIdx2);
1975 assert(Case < 3 && "Unexpected case value!")(static_cast <bool> (Case < 3 && "Unexpected case value!"
) ? void (0) : __assert_fail ("Case < 3 && \"Unexpected case value!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 1975, __extension__
__PRETTY_FUNCTION__))
;
1976
1977 // For each case we need to swap two pairs of bits in the final immediate.
1978 static const uint8_t SwapMasks[3][4] = {
1979 { 0x04, 0x10, 0x08, 0x20 }, // Swap bits 2/4 and 3/5.
1980 { 0x02, 0x10, 0x08, 0x40 }, // Swap bits 1/4 and 3/6.
1981 { 0x02, 0x04, 0x20, 0x40 }, // Swap bits 1/2 and 5/6.
1982 };
1983
1984 uint8_t Imm = MI.getOperand(MI.getNumOperands()-1).getImm();
1985 // Clear out the bits we are swapping.
1986 uint8_t NewImm = Imm & ~(SwapMasks[Case][0] | SwapMasks[Case][1] |
1987 SwapMasks[Case][2] | SwapMasks[Case][3]);
1988 // If the immediate had a bit of the pair set, then set the opposite bit.
1989 if (Imm & SwapMasks[Case][0]) NewImm |= SwapMasks[Case][1];
1990 if (Imm & SwapMasks[Case][1]) NewImm |= SwapMasks[Case][0];
1991 if (Imm & SwapMasks[Case][2]) NewImm |= SwapMasks[Case][3];
1992 if (Imm & SwapMasks[Case][3]) NewImm |= SwapMasks[Case][2];
1993 MI.getOperand(MI.getNumOperands()-1).setImm(NewImm);
1994}
1995
1996// Returns true if this is a VPERMI2 or VPERMT2 instruction that can be
1997// commuted.
1998static bool isCommutableVPERMV3Instruction(unsigned Opcode) {
1999#define VPERM_CASES(Suffix) \
2000 case X86::VPERMI2##Suffix##128rr: case X86::VPERMT2##Suffix##128rr: \
2001 case X86::VPERMI2##Suffix##256rr: case X86::VPERMT2##Suffix##256rr: \
2002 case X86::VPERMI2##Suffix##rr: case X86::VPERMT2##Suffix##rr: \
2003 case X86::VPERMI2##Suffix##128rm: case X86::VPERMT2##Suffix##128rm: \
2004 case X86::VPERMI2##Suffix##256rm: case X86::VPERMT2##Suffix##256rm: \
2005 case X86::VPERMI2##Suffix##rm: case X86::VPERMT2##Suffix##rm: \
2006 case X86::VPERMI2##Suffix##128rrkz: case X86::VPERMT2##Suffix##128rrkz: \
2007 case X86::VPERMI2##Suffix##256rrkz: case X86::VPERMT2##Suffix##256rrkz: \
2008 case X86::VPERMI2##Suffix##rrkz: case X86::VPERMT2##Suffix##rrkz: \
2009 case X86::VPERMI2##Suffix##128rmkz: case X86::VPERMT2##Suffix##128rmkz: \
2010 case X86::VPERMI2##Suffix##256rmkz: case X86::VPERMT2##Suffix##256rmkz: \
2011 case X86::VPERMI2##Suffix##rmkz: case X86::VPERMT2##Suffix##rmkz:
2012
2013#define VPERM_CASES_BROADCAST(Suffix) \
2014 VPERM_CASES(Suffix) \
2015 case X86::VPERMI2##Suffix##128rmb: case X86::VPERMT2##Suffix##128rmb: \
2016 case X86::VPERMI2##Suffix##256rmb: case X86::VPERMT2##Suffix##256rmb: \
2017 case X86::VPERMI2##Suffix##rmb: case X86::VPERMT2##Suffix##rmb: \
2018 case X86::VPERMI2##Suffix##128rmbkz: case X86::VPERMT2##Suffix##128rmbkz: \
2019 case X86::VPERMI2##Suffix##256rmbkz: case X86::VPERMT2##Suffix##256rmbkz: \
2020 case X86::VPERMI2##Suffix##rmbkz: case X86::VPERMT2##Suffix##rmbkz:
2021
2022 switch (Opcode) {
2023 default: return false;
2024 VPERM_CASES(B)
2025 VPERM_CASES_BROADCAST(D)
2026 VPERM_CASES_BROADCAST(PD)
2027 VPERM_CASES_BROADCAST(PS)
2028 VPERM_CASES_BROADCAST(Q)
2029 VPERM_CASES(W)
2030 return true;
2031 }
2032#undef VPERM_CASES_BROADCAST
2033#undef VPERM_CASES
2034}
2035
2036// Returns commuted opcode for VPERMI2 and VPERMT2 instructions by switching
2037// from the I opcode to the T opcode and vice versa.
2038static unsigned getCommutedVPERMV3Opcode(unsigned Opcode) {
2039#define VPERM_CASES(Orig, New) \
2040 case X86::Orig##128rr: return X86::New##128rr; \
2041 case X86::Orig##128rrkz: return X86::New##128rrkz; \
2042 case X86::Orig##128rm: return X86::New##128rm; \
2043 case X86::Orig##128rmkz: return X86::New##128rmkz; \
2044 case X86::Orig##256rr: return X86::New##256rr; \
2045 case X86::Orig##256rrkz: return X86::New##256rrkz; \
2046 case X86::Orig##256rm: return X86::New##256rm; \
2047 case X86::Orig##256rmkz: return X86::New##256rmkz; \
2048 case X86::Orig##rr: return X86::New##rr; \
2049 case X86::Orig##rrkz: return X86::New##rrkz; \
2050 case X86::Orig##rm: return X86::New##rm; \
2051 case X86::Orig##rmkz: return X86::New##rmkz;
2052
2053#define VPERM_CASES_BROADCAST(Orig, New) \
2054 VPERM_CASES(Orig, New) \
2055 case X86::Orig##128rmb: return X86::New##128rmb; \
2056 case X86::Orig##128rmbkz: return X86::New##128rmbkz; \
2057 case X86::Orig##256rmb: return X86::New##256rmb; \
2058 case X86::Orig##256rmbkz: return X86::New##256rmbkz; \
2059 case X86::Orig##rmb: return X86::New##rmb; \
2060 case X86::Orig##rmbkz: return X86::New##rmbkz;
2061
2062 switch (Opcode) {
2063 VPERM_CASES(VPERMI2B, VPERMT2B)
2064 VPERM_CASES_BROADCAST(VPERMI2D, VPERMT2D)
2065 VPERM_CASES_BROADCAST(VPERMI2PD, VPERMT2PD)
2066 VPERM_CASES_BROADCAST(VPERMI2PS, VPERMT2PS)
2067 VPERM_CASES_BROADCAST(VPERMI2Q, VPERMT2Q)
2068 VPERM_CASES(VPERMI2W, VPERMT2W)
2069 VPERM_CASES(VPERMT2B, VPERMI2B)
2070 VPERM_CASES_BROADCAST(VPERMT2D, VPERMI2D)
2071 VPERM_CASES_BROADCAST(VPERMT2PD, VPERMI2PD)
2072 VPERM_CASES_BROADCAST(VPERMT2PS, VPERMI2PS)
2073 VPERM_CASES_BROADCAST(VPERMT2Q, VPERMI2Q)
2074 VPERM_CASES(VPERMT2W, VPERMI2W)
2075 }
2076
2077 llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2077)
;
2078#undef VPERM_CASES_BROADCAST
2079#undef VPERM_CASES
2080}
2081
2082MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
2083 unsigned OpIdx1,
2084 unsigned OpIdx2) const {
2085 auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
2086 if (NewMI)
2087 return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
2088 return MI;
2089 };
2090
2091 switch (MI.getOpcode()) {
1
Control jumps to the 'default' case at line 2408
2092 case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
2093 case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
2094 case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
2095 case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
2096 case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
2097 case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)
2098 unsigned Opc;
2099 unsigned Size;
2100 switch (MI.getOpcode()) {
2101 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2101)
;
2102 case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
2103 case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
2104 case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
2105 case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
2106 case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break;
2107 case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
2108 }
2109 unsigned Amt = MI.getOperand(3).getImm();
2110 auto &WorkingMI = cloneIfNew(MI);
2111 WorkingMI.setDesc(get(Opc));
2112 WorkingMI.getOperand(3).setImm(Size - Amt);
2113 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2114 OpIdx1, OpIdx2);
2115 }
2116 case X86::PFSUBrr:
2117 case X86::PFSUBRrr: {
2118 // PFSUB x, y: x = x - y
2119 // PFSUBR x, y: x = y - x
2120 unsigned Opc =
2121 (X86::PFSUBRrr == MI.getOpcode() ? X86::PFSUBrr : X86::PFSUBRrr);
2122 auto &WorkingMI = cloneIfNew(MI);
2123 WorkingMI.setDesc(get(Opc));
2124 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2125 OpIdx1, OpIdx2);
2126 }
2127 case X86::BLENDPDrri:
2128 case X86::BLENDPSrri:
2129 case X86::VBLENDPDrri:
2130 case X86::VBLENDPSrri:
2131 // If we're optimizing for size, try to use MOVSD/MOVSS.
2132 if (MI.getParent()->getParent()->getFunction().hasOptSize()) {
2133 unsigned Mask, Opc;
2134 switch (MI.getOpcode()) {
2135 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2135)
;
2136 case X86::BLENDPDrri: Opc = X86::MOVSDrr; Mask = 0x03; break;
2137 case X86::BLENDPSrri: Opc = X86::MOVSSrr; Mask = 0x0F; break;
2138 case X86::VBLENDPDrri: Opc = X86::VMOVSDrr; Mask = 0x03; break;
2139 case X86::VBLENDPSrri: Opc = X86::VMOVSSrr; Mask = 0x0F; break;
2140 }
2141 if ((MI.getOperand(3).getImm() ^ Mask) == 1) {
2142 auto &WorkingMI = cloneIfNew(MI);
2143 WorkingMI.setDesc(get(Opc));
2144 WorkingMI.RemoveOperand(3);
2145 return TargetInstrInfo::commuteInstructionImpl(WorkingMI,
2146 /*NewMI=*/false,
2147 OpIdx1, OpIdx2);
2148 }
2149 }
2150 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2151 case X86::PBLENDWrri:
2152 case X86::VBLENDPDYrri:
2153 case X86::VBLENDPSYrri:
2154 case X86::VPBLENDDrri:
2155 case X86::VPBLENDWrri:
2156 case X86::VPBLENDDYrri:
2157 case X86::VPBLENDWYrri:{
2158 int8_t Mask;
2159 switch (MI.getOpcode()) {
2160 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2160)
;
2161 case X86::BLENDPDrri: Mask = (int8_t)0x03; break;
2162 case X86::BLENDPSrri: Mask = (int8_t)0x0F; break;
2163 case X86::PBLENDWrri: Mask = (int8_t)0xFF; break;
2164 case X86::VBLENDPDrri: Mask = (int8_t)0x03; break;
2165 case X86::VBLENDPSrri: Mask = (int8_t)0x0F; break;
2166 case X86::VBLENDPDYrri: Mask = (int8_t)0x0F; break;
2167 case X86::VBLENDPSYrri: Mask = (int8_t)0xFF; break;
2168 case X86::VPBLENDDrri: Mask = (int8_t)0x0F; break;
2169 case X86::VPBLENDWrri: Mask = (int8_t)0xFF; break;
2170 case X86::VPBLENDDYrri: Mask = (int8_t)0xFF; break;
2171 case X86::VPBLENDWYrri: Mask = (int8_t)0xFF; break;
2172 }
2173 // Only the least significant bits of Imm are used.
2174 // Using int8_t to ensure it will be sign extended to the int64_t that
2175 // setImm takes in order to match isel behavior.
2176 int8_t Imm = MI.getOperand(3).getImm() & Mask;
2177 auto &WorkingMI = cloneIfNew(MI);
2178 WorkingMI.getOperand(3).setImm(Mask ^ Imm);
2179 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2180 OpIdx1, OpIdx2);
2181 }
2182 case X86::INSERTPSrr:
2183 case X86::VINSERTPSrr:
2184 case X86::VINSERTPSZrr: {
2185 unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm();
2186 unsigned ZMask = Imm & 15;
2187 unsigned DstIdx = (Imm >> 4) & 3;
2188 unsigned SrcIdx = (Imm >> 6) & 3;
2189
2190 // We can commute insertps if we zero 2 of the elements, the insertion is
2191 // "inline" and we don't override the insertion with a zero.
2192 if (DstIdx == SrcIdx && (ZMask & (1 << DstIdx)) == 0 &&
2193 countPopulation(ZMask) == 2) {
2194 unsigned AltIdx = findFirstSet((ZMask | (1 << DstIdx)) ^ 15);
2195 assert(AltIdx < 4 && "Illegal insertion index")(static_cast <bool> (AltIdx < 4 && "Illegal insertion index"
) ? void (0) : __assert_fail ("AltIdx < 4 && \"Illegal insertion index\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 2195, __extension__
__PRETTY_FUNCTION__))
;
2196 unsigned AltImm = (AltIdx << 6) | (AltIdx << 4) | ZMask;
2197 auto &WorkingMI = cloneIfNew(MI);
2198 WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(AltImm);
2199 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2200 OpIdx1, OpIdx2);
2201 }
2202 return nullptr;
2203 }
2204 case X86::MOVSDrr:
2205 case X86::MOVSSrr:
2206 case X86::VMOVSDrr:
2207 case X86::VMOVSSrr:{
2208 // On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
2209 if (Subtarget.hasSSE41()) {
2210 unsigned Mask, Opc;
2211 switch (MI.getOpcode()) {
2212 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2212)
;
2213 case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break;
2214 case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break;
2215 case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
2216 case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
2217 }
2218
2219 auto &WorkingMI = cloneIfNew(MI);
2220 WorkingMI.setDesc(get(Opc));
2221 WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
2222 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2223 OpIdx1, OpIdx2);
2224 }
2225
2226 // Convert to SHUFPD.
2227 assert(MI.getOpcode() == X86::MOVSDrr &&(static_cast <bool> (MI.getOpcode() == X86::MOVSDrr &&
"Can only commute MOVSDrr without SSE4.1") ? void (0) : __assert_fail
("MI.getOpcode() == X86::MOVSDrr && \"Can only commute MOVSDrr without SSE4.1\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 2228, __extension__
__PRETTY_FUNCTION__))
2228 "Can only commute MOVSDrr without SSE4.1")(static_cast <bool> (MI.getOpcode() == X86::MOVSDrr &&
"Can only commute MOVSDrr without SSE4.1") ? void (0) : __assert_fail
("MI.getOpcode() == X86::MOVSDrr && \"Can only commute MOVSDrr without SSE4.1\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 2228, __extension__
__PRETTY_FUNCTION__))
;
2229
2230 auto &WorkingMI = cloneIfNew(MI);
2231 WorkingMI.setDesc(get(X86::SHUFPDrri));
2232 WorkingMI.addOperand(MachineOperand::CreateImm(0x02));
2233 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2234 OpIdx1, OpIdx2);
2235 }
2236 case X86::SHUFPDrri: {
2237 // Commute to MOVSD.
2238 assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!")(static_cast <bool> (MI.getOperand(3).getImm() == 0x02 &&
"Unexpected immediate!") ? void (0) : __assert_fail ("MI.getOperand(3).getImm() == 0x02 && \"Unexpected immediate!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 2238, __extension__
__PRETTY_FUNCTION__))
;
2239 auto &WorkingMI = cloneIfNew(MI);
2240 WorkingMI.setDesc(get(X86::MOVSDrr));
2241 WorkingMI.RemoveOperand(3);
2242 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2243 OpIdx1, OpIdx2);
2244 }
2245 case X86::PCLMULQDQrr:
2246 case X86::VPCLMULQDQrr:
2247 case X86::VPCLMULQDQYrr:
2248 case X86::VPCLMULQDQZrr:
2249 case X86::VPCLMULQDQZ128rr:
2250 case X86::VPCLMULQDQZ256rr: {
2251 // SRC1 64bits = Imm[0] ? SRC1[127:64] : SRC1[63:0]
2252 // SRC2 64bits = Imm[4] ? SRC2[127:64] : SRC2[63:0]
2253 unsigned Imm = MI.getOperand(3).getImm();
2254 unsigned Src1Hi = Imm & 0x01;
2255 unsigned Src2Hi = Imm & 0x10;
2256 auto &WorkingMI = cloneIfNew(MI);
2257 WorkingMI.getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4));
2258 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2259 OpIdx1, OpIdx2);
2260 }
2261 case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri:
2262 case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri:
2263 case X86::VPCMPBZrri: case X86::VPCMPUBZrri:
2264 case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri:
2265 case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri:
2266 case X86::VPCMPDZrri: case X86::VPCMPUDZrri:
2267 case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri:
2268 case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri:
2269 case X86::VPCMPQZrri: case X86::VPCMPUQZrri:
2270 case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri:
2271 case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri:
2272 case X86::VPCMPWZrri: case X86::VPCMPUWZrri:
2273 case X86::VPCMPBZ128rrik: case X86::VPCMPUBZ128rrik:
2274 case X86::VPCMPBZ256rrik: case X86::VPCMPUBZ256rrik:
2275 case X86::VPCMPBZrrik: case X86::VPCMPUBZrrik:
2276 case X86::VPCMPDZ128rrik: case X86::VPCMPUDZ128rrik:
2277 case X86::VPCMPDZ256rrik: case X86::VPCMPUDZ256rrik:
2278 case X86::VPCMPDZrrik: case X86::VPCMPUDZrrik:
2279 case X86::VPCMPQZ128rrik: case X86::VPCMPUQZ128rrik:
2280 case X86::VPCMPQZ256rrik: case X86::VPCMPUQZ256rrik:
2281 case X86::VPCMPQZrrik: case X86::VPCMPUQZrrik:
2282 case X86::VPCMPWZ128rrik: case X86::VPCMPUWZ128rrik:
2283 case X86::VPCMPWZ256rrik: case X86::VPCMPUWZ256rrik:
2284 case X86::VPCMPWZrrik: case X86::VPCMPUWZrrik: {
2285 // Flip comparison mode immediate (if necessary).
2286 unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x7;
2287 Imm = X86::getSwappedVPCMPImm(Imm);
2288 auto &WorkingMI = cloneIfNew(MI);
2289 WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm);
2290 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2291 OpIdx1, OpIdx2);
2292 }
2293 case X86::VPCOMBri: case X86::VPCOMUBri:
2294 case X86::VPCOMDri: case X86::VPCOMUDri:
2295 case X86::VPCOMQri: case X86::VPCOMUQri:
2296 case X86::VPCOMWri: case X86::VPCOMUWri: {
2297 // Flip comparison mode immediate (if necessary).
2298 unsigned Imm = MI.getOperand(3).getImm() & 0x7;
2299 Imm = X86::getSwappedVPCOMImm(Imm);
2300 auto &WorkingMI = cloneIfNew(MI);
2301 WorkingMI.getOperand(3).setImm(Imm);
2302 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2303 OpIdx1, OpIdx2);
2304 }
2305 case X86::VCMPSDZrr:
2306 case X86::VCMPSSZrr:
2307 case X86::VCMPPDZrri:
2308 case X86::VCMPPSZrri:
2309 case X86::VCMPSHZrr:
2310 case X86::VCMPPHZrri:
2311 case X86::VCMPPHZ128rri:
2312 case X86::VCMPPHZ256rri:
2313 case X86::VCMPPDZ128rri:
2314 case X86::VCMPPSZ128rri:
2315 case X86::VCMPPDZ256rri:
2316 case X86::VCMPPSZ256rri:
2317 case X86::VCMPPDZrrik:
2318 case X86::VCMPPSZrrik:
2319 case X86::VCMPPDZ128rrik:
2320 case X86::VCMPPSZ128rrik:
2321 case X86::VCMPPDZ256rrik:
2322 case X86::VCMPPSZ256rrik: {
2323 unsigned Imm =
2324 MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 0x1f;
2325 Imm = X86::getSwappedVCMPImm(Imm);
2326 auto &WorkingMI = cloneIfNew(MI);
2327 WorkingMI.getOperand(MI.getNumExplicitOperands() - 1).setImm(Imm);
2328 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2329 OpIdx1, OpIdx2);
2330 }
2331 case X86::VPERM2F128rr:
2332 case X86::VPERM2I128rr: {
2333 // Flip permute source immediate.
2334 // Imm & 0x02: lo = if set, select Op1.lo/hi else Op0.lo/hi.
2335 // Imm & 0x20: hi = if set, select Op1.lo/hi else Op0.lo/hi.
2336 int8_t Imm = MI.getOperand(3).getImm() & 0xFF;
2337 auto &WorkingMI = cloneIfNew(MI);
2338 WorkingMI.getOperand(3).setImm(Imm ^ 0x22);
2339 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2340 OpIdx1, OpIdx2);
2341 }
2342 case X86::MOVHLPSrr:
2343 case X86::UNPCKHPDrr:
2344 case X86::VMOVHLPSrr:
2345 case X86::VUNPCKHPDrr:
2346 case X86::VMOVHLPSZrr:
2347 case X86::VUNPCKHPDZ128rr: {
2348 assert(Subtarget.hasSSE2() && "Commuting MOVHLP/UNPCKHPD requires SSE2!")(static_cast <bool> (Subtarget.hasSSE2() && "Commuting MOVHLP/UNPCKHPD requires SSE2!"
) ? void (0) : __assert_fail ("Subtarget.hasSSE2() && \"Commuting MOVHLP/UNPCKHPD requires SSE2!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 2348, __extension__
__PRETTY_FUNCTION__))
;
2349
2350 unsigned Opc = MI.getOpcode();
2351 switch (Opc) {
2352 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2352)
;
2353 case X86::MOVHLPSrr: Opc = X86::UNPCKHPDrr; break;
2354 case X86::UNPCKHPDrr: Opc = X86::MOVHLPSrr; break;
2355 case X86::VMOVHLPSrr: Opc = X86::VUNPCKHPDrr; break;
2356 case X86::VUNPCKHPDrr: Opc = X86::VMOVHLPSrr; break;
2357 case X86::VMOVHLPSZrr: Opc = X86::VUNPCKHPDZ128rr; break;
2358 case X86::VUNPCKHPDZ128rr: Opc = X86::VMOVHLPSZrr; break;
2359 }
2360 auto &WorkingMI = cloneIfNew(MI);
2361 WorkingMI.setDesc(get(Opc));
2362 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2363 OpIdx1, OpIdx2);
2364 }
2365 case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: {
2366 auto &WorkingMI = cloneIfNew(MI);
2367 unsigned OpNo = MI.getDesc().getNumOperands() - 1;
2368 X86::CondCode CC = static_cast<X86::CondCode>(MI.getOperand(OpNo).getImm());
2369 WorkingMI.getOperand(OpNo).setImm(X86::GetOppositeBranchCondition(CC));
2370 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2371 OpIdx1, OpIdx2);
2372 }
2373 case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi:
2374 case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi:
2375 case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi:
2376 case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
2377 case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
2378 case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
2379 case X86::VPTERNLOGDZrrik:
2380 case X86::VPTERNLOGDZ128rrik:
2381 case X86::VPTERNLOGDZ256rrik:
2382 case X86::VPTERNLOGQZrrik:
2383 case X86::VPTERNLOGQZ128rrik:
2384 case X86::VPTERNLOGQZ256rrik:
2385 case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
2386 case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
2387 case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
2388 case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
2389 case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
2390 case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
2391 case X86::VPTERNLOGDZ128rmbi:
2392 case X86::VPTERNLOGDZ256rmbi:
2393 case X86::VPTERNLOGDZrmbi:
2394 case X86::VPTERNLOGQZ128rmbi:
2395 case X86::VPTERNLOGQZ256rmbi:
2396 case X86::VPTERNLOGQZrmbi:
2397 case X86::VPTERNLOGDZ128rmbikz:
2398 case X86::VPTERNLOGDZ256rmbikz:
2399 case X86::VPTERNLOGDZrmbikz:
2400 case X86::VPTERNLOGQZ128rmbikz:
2401 case X86::VPTERNLOGQZ256rmbikz:
2402 case X86::VPTERNLOGQZrmbikz: {
2403 auto &WorkingMI = cloneIfNew(MI);
2404 commuteVPTERNLOG(WorkingMI, OpIdx1, OpIdx2);
2405 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2406 OpIdx1, OpIdx2);
2407 }
2408 default: {
2409 if (isCommutableVPERMV3Instruction(MI.getOpcode())) {
2
Assuming the condition is false
3
Taking false branch
2410 unsigned Opc = getCommutedVPERMV3Opcode(MI.getOpcode());
2411 auto &WorkingMI = cloneIfNew(MI);
2412 WorkingMI.setDesc(get(Opc));
2413 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2414 OpIdx1, OpIdx2);
2415 }
2416
2417 const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(),
2418 MI.getDesc().TSFlags);
2419 if (FMA3Group) {
4
Assuming 'FMA3Group' is non-null
5
Taking true branch
2420 unsigned Opc =
2421 getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group);
6
Calling 'X86InstrInfo::getFMA3OpcodeToCommuteOperands'
2422 auto &WorkingMI = cloneIfNew(MI);
2423 WorkingMI.setDesc(get(Opc));
2424 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2425 OpIdx1, OpIdx2);
2426 }
2427
2428 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2429 }
2430 }
2431}
2432
2433bool
2434X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
2435 unsigned &SrcOpIdx1,
2436 unsigned &SrcOpIdx2,
2437 bool IsIntrinsic) const {
2438 uint64_t TSFlags = MI.getDesc().TSFlags;
2439
2440 unsigned FirstCommutableVecOp = 1;
2441 unsigned LastCommutableVecOp = 3;
2442 unsigned KMaskOp = -1U;
2443 if (X86II::isKMasked(TSFlags)) {
2444 // For k-zero-masked operations it is Ok to commute the first vector
2445 // operand. Unless this is an intrinsic instruction.
2446 // For regular k-masked operations a conservative choice is done as the
2447 // elements of the first vector operand, for which the corresponding bit
2448 // in the k-mask operand is set to 0, are copied to the result of the
2449 // instruction.
2450 // TODO/FIXME: The commute still may be legal if it is known that the
2451 // k-mask operand is set to either all ones or all zeroes.
2452 // It is also Ok to commute the 1st operand if all users of MI use only
2453 // the elements enabled by the k-mask operand. For example,
2454 // v4 = VFMADD213PSZrk v1, k, v2, v3; // v1[i] = k[i] ? v2[i]*v1[i]+v3[i]
2455 // : v1[i];
2456 // VMOVAPSZmrk <mem_addr>, k, v4; // this is the ONLY user of v4 ->
2457 // // Ok, to commute v1 in FMADD213PSZrk.
2458
2459 // The k-mask operand has index = 2 for masked and zero-masked operations.
2460 KMaskOp = 2;
2461
2462 // The operand with index = 1 is used as a source for those elements for
2463 // which the corresponding bit in the k-mask is set to 0.
2464 if (X86II::isKMergeMasked(TSFlags) || IsIntrinsic)
2465 FirstCommutableVecOp = 3;
2466
2467 LastCommutableVecOp++;
2468 } else if (IsIntrinsic) {
2469 // Commuting the first operand of an intrinsic instruction isn't possible
2470 // unless we can prove that only the lowest element of the result is used.
2471 FirstCommutableVecOp = 2;
2472 }
2473
2474 if (isMem(MI, LastCommutableVecOp))
2475 LastCommutableVecOp--;
2476
2477 // Only the first RegOpsNum operands are commutable.
2478 // Also, the value 'CommuteAnyOperandIndex' is valid here as it means
2479 // that the operand is not specified/fixed.
2480 if (SrcOpIdx1 != CommuteAnyOperandIndex &&
2481 (SrcOpIdx1 < FirstCommutableVecOp || SrcOpIdx1 > LastCommutableVecOp ||
2482 SrcOpIdx1 == KMaskOp))
2483 return false;
2484 if (SrcOpIdx2 != CommuteAnyOperandIndex &&
2485 (SrcOpIdx2 < FirstCommutableVecOp || SrcOpIdx2 > LastCommutableVecOp ||
2486 SrcOpIdx2 == KMaskOp))
2487 return false;
2488
2489 // Look for two different register operands assumed to be commutable
2490 // regardless of the FMA opcode. The FMA opcode is adjusted later.
2491 if (SrcOpIdx1 == CommuteAnyOperandIndex ||
2492 SrcOpIdx2 == CommuteAnyOperandIndex) {
2493 unsigned CommutableOpIdx2 = SrcOpIdx2;
2494
2495 // At least one of operands to be commuted is not specified and
2496 // this method is free to choose appropriate commutable operands.
2497 if (SrcOpIdx1 == SrcOpIdx2)
2498 // Both of operands are not fixed. By default set one of commutable
2499 // operands to the last register operand of the instruction.
2500 CommutableOpIdx2 = LastCommutableVecOp;
2501 else if (SrcOpIdx2 == CommuteAnyOperandIndex)
2502 // Only one of operands is not fixed.
2503 CommutableOpIdx2 = SrcOpIdx1;
2504
2505 // CommutableOpIdx2 is well defined now. Let's choose another commutable
2506 // operand and assign its index to CommutableOpIdx1.
2507 Register Op2Reg = MI.getOperand(CommutableOpIdx2).getReg();
2508
2509 unsigned CommutableOpIdx1;
2510 for (CommutableOpIdx1 = LastCommutableVecOp;
2511 CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) {
2512 // Just ignore and skip the k-mask operand.
2513 if (CommutableOpIdx1 == KMaskOp)
2514 continue;
2515
2516 // The commuted operands must have different registers.
2517 // Otherwise, the commute transformation does not change anything and
2518 // is useless then.
2519 if (Op2Reg != MI.getOperand(CommutableOpIdx1).getReg())
2520 break;
2521 }
2522
2523 // No appropriate commutable operands were found.
2524 if (CommutableOpIdx1 < FirstCommutableVecOp)
2525 return false;
2526
2527 // Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2
2528 // to return those values.
2529 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
2530 CommutableOpIdx1, CommutableOpIdx2))
2531 return false;
2532 }
2533
2534 return true;
2535}
2536
2537bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
2538 unsigned &SrcOpIdx1,
2539 unsigned &SrcOpIdx2) const {
2540 const MCInstrDesc &Desc = MI.getDesc();
2541 if (!Desc.isCommutable())
2542 return false;
2543
2544 switch (MI.getOpcode()) {
2545 case X86::CMPSDrr:
2546 case X86::CMPSSrr:
2547 case X86::CMPPDrri:
2548 case X86::CMPPSrri:
2549 case X86::VCMPSDrr:
2550 case X86::VCMPSSrr:
2551 case X86::VCMPPDrri:
2552 case X86::VCMPPSrri:
2553 case X86::VCMPPDYrri:
2554 case X86::VCMPPSYrri:
2555 case X86::VCMPSDZrr:
2556 case X86::VCMPSSZrr:
2557 case X86::VCMPPDZrri:
2558 case X86::VCMPPSZrri:
2559 case X86::VCMPSHZrr:
2560 case X86::VCMPPHZrri:
2561 case X86::VCMPPHZ128rri:
2562 case X86::VCMPPHZ256rri:
2563 case X86::VCMPPDZ128rri:
2564 case X86::VCMPPSZ128rri:
2565 case X86::VCMPPDZ256rri:
2566 case X86::VCMPPSZ256rri:
2567 case X86::VCMPPDZrrik:
2568 case X86::VCMPPSZrrik:
2569 case X86::VCMPPDZ128rrik:
2570 case X86::VCMPPSZ128rrik:
2571 case X86::VCMPPDZ256rrik:
2572 case X86::VCMPPSZ256rrik: {
2573 unsigned OpOffset = X86II::isKMasked(Desc.TSFlags) ? 1 : 0;
2574
2575 // Float comparison can be safely commuted for
2576 // Ordered/Unordered/Equal/NotEqual tests
2577 unsigned Imm = MI.getOperand(3 + OpOffset).getImm() & 0x7;
2578 switch (Imm) {
2579 default:
2580 // EVEX versions can be commuted.
2581 if ((Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX)
2582 break;
2583 return false;
2584 case 0x00: // EQUAL
2585 case 0x03: // UNORDERED
2586 case 0x04: // NOT EQUAL
2587 case 0x07: // ORDERED
2588 break;
2589 }
2590
2591 // The indices of the commutable operands are 1 and 2 (or 2 and 3
2592 // when masked).
2593 // Assign them to the returned operand indices here.
2594 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset,
2595 2 + OpOffset);
2596 }
2597 case X86::MOVSSrr:
2598 // X86::MOVSDrr is always commutable. MOVSS is only commutable if we can
2599 // form sse4.1 blend. We assume VMOVSSrr/VMOVSDrr is always commutable since
2600 // AVX implies sse4.1.
2601 if (Subtarget.hasSSE41())
2602 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2603 return false;
2604 case X86::SHUFPDrri:
2605 // We can commute this to MOVSD.
2606 if (MI.getOperand(3).getImm() == 0x02)
2607 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2608 return false;
2609 case X86::MOVHLPSrr:
2610 case X86::UNPCKHPDrr:
2611 case X86::VMOVHLPSrr:
2612 case X86::VUNPCKHPDrr:
2613 case X86::VMOVHLPSZrr:
2614 case X86::VUNPCKHPDZ128rr:
2615 if (Subtarget.hasSSE2())
2616 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2617 return false;
2618 case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi:
2619 case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi:
2620 case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi:
2621 case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
2622 case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
2623 case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
2624 case X86::VPTERNLOGDZrrik:
2625 case X86::VPTERNLOGDZ128rrik:
2626 case X86::VPTERNLOGDZ256rrik:
2627 case X86::VPTERNLOGQZrrik:
2628 case X86::VPTERNLOGQZ128rrik:
2629 case X86::VPTERNLOGQZ256rrik:
2630 case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
2631 case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
2632 case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
2633 case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
2634 case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
2635 case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
2636 case X86::VPTERNLOGDZ128rmbi:
2637 case X86::VPTERNLOGDZ256rmbi:
2638 case X86::VPTERNLOGDZrmbi:
2639 case X86::VPTERNLOGQZ128rmbi:
2640 case X86::VPTERNLOGQZ256rmbi:
2641 case X86::VPTERNLOGQZrmbi:
2642 case X86::VPTERNLOGDZ128rmbikz:
2643 case X86::VPTERNLOGDZ256rmbikz:
2644 case X86::VPTERNLOGDZrmbikz:
2645 case X86::VPTERNLOGQZ128rmbikz:
2646 case X86::VPTERNLOGQZ256rmbikz:
2647 case X86::VPTERNLOGQZrmbikz:
2648 return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2649 case X86::VPDPWSSDYrr:
2650 case X86::VPDPWSSDrr:
2651 case X86::VPDPWSSDSYrr:
2652 case X86::VPDPWSSDSrr:
2653 case X86::VPDPWSSDZ128r:
2654 case X86::VPDPWSSDZ128rk:
2655 case X86::VPDPWSSDZ128rkz:
2656 case X86::VPDPWSSDZ256r:
2657 case X86::VPDPWSSDZ256rk:
2658 case X86::VPDPWSSDZ256rkz:
2659 case X86::VPDPWSSDZr:
2660 case X86::VPDPWSSDZrk:
2661 case X86::VPDPWSSDZrkz:
2662 case X86::VPDPWSSDSZ128r:
2663 case X86::VPDPWSSDSZ128rk:
2664 case X86::VPDPWSSDSZ128rkz:
2665 case X86::VPDPWSSDSZ256r:
2666 case X86::VPDPWSSDSZ256rk:
2667 case X86::VPDPWSSDSZ256rkz:
2668 case X86::VPDPWSSDSZr:
2669 case X86::VPDPWSSDSZrk:
2670 case X86::VPDPWSSDSZrkz:
2671 case X86::VPMADD52HUQZ128r:
2672 case X86::VPMADD52HUQZ128rk:
2673 case X86::VPMADD52HUQZ128rkz:
2674 case X86::VPMADD52HUQZ256r:
2675 case X86::VPMADD52HUQZ256rk:
2676 case X86::VPMADD52HUQZ256rkz:
2677 case X86::VPMADD52HUQZr:
2678 case X86::VPMADD52HUQZrk:
2679 case X86::VPMADD52HUQZrkz:
2680 case X86::VPMADD52LUQZ128r:
2681 case X86::VPMADD52LUQZ128rk:
2682 case X86::VPMADD52LUQZ128rkz:
2683 case X86::VPMADD52LUQZ256r:
2684 case X86::VPMADD52LUQZ256rk:
2685 case X86::VPMADD52LUQZ256rkz:
2686 case X86::VPMADD52LUQZr:
2687 case X86::VPMADD52LUQZrk:
2688 case X86::VPMADD52LUQZrkz:
2689 case X86::VFMADDCPHZr:
2690 case X86::VFMADDCPHZrk:
2691 case X86::VFMADDCPHZrkz:
2692 case X86::VFMADDCPHZ128r:
2693 case X86::VFMADDCPHZ128rk:
2694 case X86::VFMADDCPHZ128rkz:
2695 case X86::VFMADDCPHZ256r:
2696 case X86::VFMADDCPHZ256rk:
2697 case X86::VFMADDCPHZ256rkz:
2698 case X86::VFMADDCSHZr:
2699 case X86::VFMADDCSHZrk:
2700 case X86::VFMADDCSHZrkz: {
2701 unsigned CommutableOpIdx1 = 2;
2702 unsigned CommutableOpIdx2 = 3;
2703 if (X86II::isKMasked(Desc.TSFlags)) {
2704 // Skip the mask register.
2705 ++CommutableOpIdx1;
2706 ++CommutableOpIdx2;
2707 }
2708 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
2709 CommutableOpIdx1, CommutableOpIdx2))
2710 return false;
2711 if (!MI.getOperand(SrcOpIdx1).isReg() ||
2712 !MI.getOperand(SrcOpIdx2).isReg())
2713 // No idea.
2714 return false;
2715 return true;
2716 }
2717
2718 default:
2719 const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(),
2720 MI.getDesc().TSFlags);
2721 if (FMA3Group)
2722 return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2,
2723 FMA3Group->isIntrinsic());
2724
2725 // Handled masked instructions since we need to skip over the mask input
2726 // and the preserved input.
2727 if (X86II::isKMasked(Desc.TSFlags)) {
2728 // First assume that the first input is the mask operand and skip past it.
2729 unsigned CommutableOpIdx1 = Desc.getNumDefs() + 1;
2730 unsigned CommutableOpIdx2 = Desc.getNumDefs() + 2;
2731 // Check if the first input is tied. If there isn't one then we only
2732 // need to skip the mask operand which we did above.
2733 if ((MI.getDesc().getOperandConstraint(Desc.getNumDefs(),
2734 MCOI::TIED_TO) != -1)) {
2735 // If this is zero masking instruction with a tied operand, we need to
2736 // move the first index back to the first input since this must
2737 // be a 3 input instruction and we want the first two non-mask inputs.
2738 // Otherwise this is a 2 input instruction with a preserved input and
2739 // mask, so we need to move the indices to skip one more input.
2740 if (X86II::isKMergeMasked(Desc.TSFlags)) {
2741 ++CommutableOpIdx1;
2742 ++CommutableOpIdx2;
2743 } else {
2744 --CommutableOpIdx1;
2745 }
2746 }
2747
2748 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
2749 CommutableOpIdx1, CommutableOpIdx2))
2750 return false;
2751
2752 if (!MI.getOperand(SrcOpIdx1).isReg() ||
2753 !MI.getOperand(SrcOpIdx2).isReg())
2754 // No idea.
2755 return false;
2756 return true;
2757 }
2758
2759 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2760 }
2761 return false;
2762}
2763
2764static bool isConvertibleLEA(MachineInstr *MI) {
2765 unsigned Opcode = MI->getOpcode();
2766 if (Opcode != X86::LEA32r && Opcode != X86::LEA64r &&
2767 Opcode != X86::LEA64_32r)
2768 return false;
2769
2770 const MachineOperand &Scale = MI->getOperand(1 + X86::AddrScaleAmt);
2771 const MachineOperand &Disp = MI->getOperand(1 + X86::AddrDisp);
2772 const MachineOperand &Segment = MI->getOperand(1 + X86::AddrSegmentReg);
2773
2774 if (Segment.getReg() != 0 || !Disp.isImm() || Disp.getImm() != 0 ||
2775 Scale.getImm() > 1)
2776 return false;
2777
2778 return true;
2779}
2780
2781bool X86InstrInfo::hasCommutePreference(MachineInstr &MI, bool &Commute) const {
2782 // Currently we're interested in following sequence only.
2783 // r3 = lea r1, r2
2784 // r5 = add r3, r4
2785 // Both r3 and r4 are killed in add, we hope the add instruction has the
2786 // operand order
2787 // r5 = add r4, r3
2788 // So later in X86FixupLEAs the lea instruction can be rewritten as add.
2789 unsigned Opcode = MI.getOpcode();
2790 if (Opcode != X86::ADD32rr && Opcode != X86::ADD64rr)
2791 return false;
2792
2793 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2794 Register Reg1 = MI.getOperand(1).getReg();
2795 Register Reg2 = MI.getOperand(2).getReg();
2796
2797 // Check if Reg1 comes from LEA in the same MBB.
2798 if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg1)) {
2799 if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) {
2800 Commute = true;
2801 return true;
2802 }
2803 }
2804
2805 // Check if Reg2 comes from LEA in the same MBB.
2806 if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg2)) {
2807 if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) {
2808 Commute = false;
2809 return true;
2810 }
2811 }
2812
2813 return false;
2814}
2815
2816X86::CondCode X86::getCondFromBranch(const MachineInstr &MI) {
2817 switch (MI.getOpcode()) {
2818 default: return X86::COND_INVALID;
2819 case X86::JCC_1:
2820 return static_cast<X86::CondCode>(
2821 MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
2822 }
2823}
2824
2825/// Return condition code of a SETCC opcode.
2826X86::CondCode X86::getCondFromSETCC(const MachineInstr &MI) {
2827 switch (MI.getOpcode()) {
2828 default: return X86::COND_INVALID;
2829 case X86::SETCCr: case X86::SETCCm:
2830 return static_cast<X86::CondCode>(
2831 MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
2832 }
2833}
2834
2835/// Return condition code of a CMov opcode.
2836X86::CondCode X86::getCondFromCMov(const MachineInstr &MI) {
2837 switch (MI.getOpcode()) {
2838 default: return X86::COND_INVALID;
2839 case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr:
2840 case X86::CMOV16rm: case X86::CMOV32rm: case X86::CMOV64rm:
2841 return static_cast<X86::CondCode>(
2842 MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
2843 }
2844}
2845
2846/// Return the inverse of the specified condition,
2847/// e.g. turning COND_E to COND_NE.
2848X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
2849 switch (CC) {
2850 default: llvm_unreachable("Illegal condition code!")::llvm::llvm_unreachable_internal("Illegal condition code!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2850)
;
2851 case X86::COND_E: return X86::COND_NE;
2852 case X86::COND_NE: return X86::COND_E;
2853 case X86::COND_L: return X86::COND_GE;
2854 case X86::COND_LE: return X86::COND_G;
2855 case X86::COND_G: return X86::COND_LE;
2856 case X86::COND_GE: return X86::COND_L;
2857 case X86::COND_B: return X86::COND_AE;
2858 case X86::COND_BE: return X86::COND_A;
2859 case X86::COND_A: return X86::COND_BE;
2860 case X86::COND_AE: return X86::COND_B;
2861 case X86::COND_S: return X86::COND_NS;
2862 case X86::COND_NS: return X86::COND_S;
2863 case X86::COND_P: return X86::COND_NP;
2864 case X86::COND_NP: return X86::COND_P;
2865 case X86::COND_O: return X86::COND_NO;
2866 case X86::COND_NO: return X86::COND_O;
2867 case X86::COND_NE_OR_P: return X86::COND_E_AND_NP;
2868 case X86::COND_E_AND_NP: return X86::COND_NE_OR_P;
2869 }
2870}
2871
2872/// Assuming the flags are set by MI(a,b), return the condition code if we
2873/// modify the instructions such that flags are set by MI(b,a).
2874static X86::CondCode getSwappedCondition(X86::CondCode CC) {
2875 switch (CC) {
2876 default: return X86::COND_INVALID;
2877 case X86::COND_E: return X86::COND_E;
2878 case X86::COND_NE: return X86::COND_NE;
2879 case X86::COND_L: return X86::COND_G;
2880 case X86::COND_LE: return X86::COND_GE;
2881 case X86::COND_G: return X86::COND_L;
2882 case X86::COND_GE: return X86::COND_LE;
2883 case X86::COND_B: return X86::COND_A;
2884 case X86::COND_BE: return X86::COND_AE;
2885 case X86::COND_A: return X86::COND_B;
2886 case X86::COND_AE: return X86::COND_BE;
2887 }
2888}
2889
2890std::pair<X86::CondCode, bool>
2891X86::getX86ConditionCode(CmpInst::Predicate Predicate) {
2892 X86::CondCode CC = X86::COND_INVALID;
2893 bool NeedSwap = false;
2894 switch (Predicate) {
2895 default: break;
2896 // Floating-point Predicates
2897 case CmpInst::FCMP_UEQ: CC = X86::COND_E; break;
2898 case CmpInst::FCMP_OLT: NeedSwap = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
2899 case CmpInst::FCMP_OGT: CC = X86::COND_A; break;
2900 case CmpInst::FCMP_OLE: NeedSwap = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
2901 case CmpInst::FCMP_OGE: CC = X86::COND_AE; break;
2902 case CmpInst::FCMP_UGT: NeedSwap = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
2903 case CmpInst::FCMP_ULT: CC = X86::COND_B; break;
2904 case CmpInst::FCMP_UGE: NeedSwap = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
2905 case CmpInst::FCMP_ULE: CC = X86::COND_BE; break;
2906 case CmpInst::FCMP_ONE: CC = X86::COND_NE; break;
2907 case CmpInst::FCMP_UNO: CC = X86::COND_P; break;
2908 case CmpInst::FCMP_ORD: CC = X86::COND_NP; break;
2909 case CmpInst::FCMP_OEQ: LLVM_FALLTHROUGH[[gnu::fallthrough]];
2910 case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break;
2911
2912 // Integer Predicates
2913 case CmpInst::ICMP_EQ: CC = X86::COND_E; break;
2914 case CmpInst::ICMP_NE: CC = X86::COND_NE; break;
2915 case CmpInst::ICMP_UGT: CC = X86::COND_A; break;
2916 case CmpInst::ICMP_UGE: CC = X86::COND_AE; break;
2917 case CmpInst::ICMP_ULT: CC = X86::COND_B; break;
2918 case CmpInst::ICMP_ULE: CC = X86::COND_BE; break;
2919 case CmpInst::ICMP_SGT: CC = X86::COND_G; break;
2920 case CmpInst::ICMP_SGE: CC = X86::COND_GE; break;
2921 case CmpInst::ICMP_SLT: CC = X86::COND_L; break;
2922 case CmpInst::ICMP_SLE: CC = X86::COND_LE; break;
2923 }
2924
2925 return std::make_pair(CC, NeedSwap);
2926}
2927
2928/// Return a cmov opcode for the given register size in bytes, and operand type.
2929unsigned X86::getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand) {
2930 switch(RegBytes) {
2931 default: llvm_unreachable("Illegal register size!")::llvm::llvm_unreachable_internal("Illegal register size!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2931)
;
2932 case 2: return HasMemoryOperand ? X86::CMOV16rm : X86::CMOV16rr;
2933 case 4: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV32rr;
2934 case 8: return HasMemoryOperand ? X86::CMOV64rm : X86::CMOV64rr;
2935 }
2936}
2937
2938/// Get the VPCMP immediate for the given condition.
2939unsigned X86::getVPCMPImmForCond(ISD::CondCode CC) {
2940 switch (CC) {
2941 default: llvm_unreachable("Unexpected SETCC condition")::llvm::llvm_unreachable_internal("Unexpected SETCC condition"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 2941)
;
2942 case ISD::SETNE: return 4;
2943 case ISD::SETEQ: return 0;
2944 case ISD::SETULT:
2945 case ISD::SETLT: return 1;
2946 case ISD::SETUGT:
2947 case ISD::SETGT: return 6;
2948 case ISD::SETUGE:
2949 case ISD::SETGE: return 5;
2950 case ISD::SETULE:
2951 case ISD::SETLE: return 2;
2952 }
2953}
2954
2955/// Get the VPCMP immediate if the operands are swapped.
2956unsigned X86::getSwappedVPCMPImm(unsigned Imm) {
2957 switch (Imm) {
2958 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2958)
;
2959 case 0x01: Imm = 0x06; break; // LT -> NLE
2960 case 0x02: Imm = 0x05; break; // LE -> NLT
2961 case 0x05: Imm = 0x02; break; // NLT -> LE
2962 case 0x06: Imm = 0x01; break; // NLE -> LT
2963 case 0x00: // EQ
2964 case 0x03: // FALSE
2965 case 0x04: // NE
2966 case 0x07: // TRUE
2967 break;
2968 }
2969
2970 return Imm;
2971}
2972
2973/// Get the VPCOM immediate if the operands are swapped.
2974unsigned X86::getSwappedVPCOMImm(unsigned Imm) {
2975 switch (Imm) {
2976 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2976)
;
2977 case 0x00: Imm = 0x02; break; // LT -> GT
2978 case 0x01: Imm = 0x03; break; // LE -> GE
2979 case 0x02: Imm = 0x00; break; // GT -> LT
2980 case 0x03: Imm = 0x01; break; // GE -> LE
2981 case 0x04: // EQ
2982 case 0x05: // NE
2983 case 0x06: // FALSE
2984 case 0x07: // TRUE
2985 break;
2986 }
2987
2988 return Imm;
2989}
2990
2991/// Get the VCMP immediate if the operands are swapped.
2992unsigned X86::getSwappedVCMPImm(unsigned Imm) {
2993 // Only need the lower 2 bits to distinquish.
2994 switch (Imm & 0x3) {
2995 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 2995)
;
2996 case 0x00: case 0x03:
2997 // EQ/NE/TRUE/FALSE/ORD/UNORD don't change immediate when commuted.
2998 break;
2999 case 0x01: case 0x02:
3000 // Need to toggle bits 3:0. Bit 4 stays the same.
3001 Imm ^= 0xf;
3002 break;
3003 }
3004
3005 return Imm;
3006}
3007
3008/// Return true if the Reg is X87 register.
3009static bool isX87Reg(unsigned Reg) {
3010 return (Reg == X86::FPCW || Reg == X86::FPSW ||
3011 (Reg >= X86::ST0 && Reg <= X86::ST7));
3012}
3013
3014/// check if the instruction is X87 instruction
3015bool X86::isX87Instruction(MachineInstr &MI) {
3016 for (const MachineOperand &MO : MI.operands()) {
3017 if (!MO.isReg())
3018 continue;
3019 if (isX87Reg(MO.getReg()))
3020 return true;
3021 }
3022 return false;
3023}
3024
3025bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const {
3026 switch (MI.getOpcode()) {
3027 case X86::TCRETURNdi:
3028 case X86::TCRETURNri:
3029 case X86::TCRETURNmi:
3030 case X86::TCRETURNdi64:
3031 case X86::TCRETURNri64:
3032 case X86::TCRETURNmi64:
3033 return true;
3034 default:
3035 return false;
3036 }
3037}
3038
3039bool X86InstrInfo::canMakeTailCallConditional(
3040 SmallVectorImpl<MachineOperand> &BranchCond,
3041 const MachineInstr &TailCall) const {
3042 if (TailCall.getOpcode() != X86::TCRETURNdi &&
3043 TailCall.getOpcode() != X86::TCRETURNdi64) {
3044 // Only direct calls can be done with a conditional branch.
3045 return false;
3046 }
3047
3048 const MachineFunction *MF = TailCall.getParent()->getParent();
3049 if (Subtarget.isTargetWin64() && MF->hasWinCFI()) {
3050 // Conditional tail calls confuse the Win64 unwinder.
3051 return false;
3052 }
3053
3054 assert(BranchCond.size() == 1)(static_cast <bool> (BranchCond.size() == 1) ? void (0)
: __assert_fail ("BranchCond.size() == 1", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 3054, __extension__ __PRETTY_FUNCTION__))
;
3055 if (BranchCond[0].getImm() > X86::LAST_VALID_COND) {
3056 // Can't make a conditional tail call with this condition.
3057 return false;
3058 }
3059
3060 const X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
3061 if (X86FI->getTCReturnAddrDelta() != 0 ||
3062 TailCall.getOperand(1).getImm() != 0) {
3063 // A conditional tail call cannot do any stack adjustment.
3064 return false;
3065 }
3066
3067 return true;
3068}
3069
3070void X86InstrInfo::replaceBranchWithTailCall(
3071 MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &BranchCond,
3072 const MachineInstr &TailCall) const {
3073 assert(canMakeTailCallConditional(BranchCond, TailCall))(static_cast <bool> (canMakeTailCallConditional(BranchCond
, TailCall)) ? void (0) : __assert_fail ("canMakeTailCallConditional(BranchCond, TailCall)"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3073, __extension__
__PRETTY_FUNCTION__))
;
3074
3075 MachineBasicBlock::iterator I = MBB.end();
3076 while (I != MBB.begin()) {
3077 --I;
3078 if (I->isDebugInstr())
3079 continue;
3080 if (!I->isBranch())
3081 assert(0 && "Can't find the branch to replace!")(static_cast <bool> (0 && "Can't find the branch to replace!"
) ? void (0) : __assert_fail ("0 && \"Can't find the branch to replace!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3081, __extension__
__PRETTY_FUNCTION__))
;
3082
3083 X86::CondCode CC = X86::getCondFromBranch(*I);
3084 assert(BranchCond.size() == 1)(static_cast <bool> (BranchCond.size() == 1) ? void (0)
: __assert_fail ("BranchCond.size() == 1", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 3084, __extension__ __PRETTY_FUNCTION__))
;
3085 if (CC != BranchCond[0].getImm())
3086 continue;
3087
3088 break;
3089 }
3090
3091 unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc
3092 : X86::TCRETURNdi64cc;
3093
3094 auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc));
3095 MIB->addOperand(TailCall.getOperand(0)); // Destination.
3096 MIB.addImm(0); // Stack offset (not used).
3097 MIB->addOperand(BranchCond[0]); // Condition.
3098 MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters.
3099
3100 // Add implicit uses and defs of all live regs potentially clobbered by the
3101 // call. This way they still appear live across the call.
3102 LivePhysRegs LiveRegs(getRegisterInfo());
3103 LiveRegs.addLiveOuts(MBB);
3104 SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 8> Clobbers;
3105 LiveRegs.stepForward(*MIB, Clobbers);
3106 for (const auto &C : Clobbers) {
3107 MIB.addReg(C.first, RegState::Implicit);
3108 MIB.addReg(C.first, RegState::Implicit | RegState::Define);
3109 }
3110
3111 I->eraseFromParent();
3112}
3113
3114// Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may
3115// not be a fallthrough MBB now due to layout changes). Return nullptr if the
3116// fallthrough MBB cannot be identified.
3117static MachineBasicBlock *getFallThroughMBB(MachineBasicBlock *MBB,
3118 MachineBasicBlock *TBB) {
3119 // Look for non-EHPad successors other than TBB. If we find exactly one, it
3120 // is the fallthrough MBB. If we find zero, then TBB is both the target MBB
3121 // and fallthrough MBB. If we find more than one, we cannot identify the
3122 // fallthrough MBB and should return nullptr.
3123 MachineBasicBlock *FallthroughBB = nullptr;
3124 for (MachineBasicBlock *Succ : MBB->successors()) {
3125 if (Succ->isEHPad() || (Succ == TBB && FallthroughBB))
3126 continue;
3127 // Return a nullptr if we found more than one fallthrough successor.
3128 if (FallthroughBB && FallthroughBB != TBB)
3129 return nullptr;
3130 FallthroughBB = Succ;
3131 }
3132 return FallthroughBB;
3133}
3134
3135bool X86InstrInfo::AnalyzeBranchImpl(
3136 MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
3137 SmallVectorImpl<MachineOperand> &Cond,
3138 SmallVectorImpl<MachineInstr *> &CondBranches, bool AllowModify) const {
3139
3140 // Start from the bottom of the block and work up, examining the
3141 // terminator instructions.
3142 MachineBasicBlock::iterator I = MBB.end();
3143 MachineBasicBlock::iterator UnCondBrIter = MBB.end();
3144 while (I != MBB.begin()) {
3145 --I;
3146 if (I->isDebugInstr())
3147 continue;
3148
3149 // Working from the bottom, when we see a non-terminator instruction, we're
3150 // done.
3151 if (!isUnpredicatedTerminator(*I))
3152 break;
3153
3154 // A terminator that isn't a branch can't easily be handled by this
3155 // analysis.
3156 if (!I->isBranch())
3157 return true;
3158
3159 // Handle unconditional branches.
3160 if (I->getOpcode() == X86::JMP_1) {
3161 UnCondBrIter = I;
3162
3163 if (!AllowModify) {
3164 TBB = I->getOperand(0).getMBB();
3165 continue;
3166 }
3167
3168 // If the block has any instructions after a JMP, delete them.
3169 while (std::next(I) != MBB.end())
3170 std::next(I)->eraseFromParent();
3171
3172 Cond.clear();
3173 FBB = nullptr;
3174
3175 // Delete the JMP if it's equivalent to a fall-through.
3176 if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
3177 TBB = nullptr;
3178 I->eraseFromParent();
3179 I = MBB.end();
3180 UnCondBrIter = MBB.end();
3181 continue;
3182 }
3183
3184 // TBB is used to indicate the unconditional destination.
3185 TBB = I->getOperand(0).getMBB();
3186 continue;
3187 }
3188
3189 // Handle conditional branches.
3190 X86::CondCode BranchCode = X86::getCondFromBranch(*I);
3191 if (BranchCode == X86::COND_INVALID)
3192 return true; // Can't handle indirect branch.
3193
3194 // In practice we should never have an undef eflags operand, if we do
3195 // abort here as we are not prepared to preserve the flag.
3196 if (I->findRegisterUseOperand(X86::EFLAGS)->isUndef())
3197 return true;
3198
3199 // Working from the bottom, handle the first conditional branch.
3200 if (Cond.empty()) {
3201 MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
3202 if (AllowModify && UnCondBrIter != MBB.end() &&
3203 MBB.isLayoutSuccessor(TargetBB)) {
3204 // If we can modify the code and it ends in something like:
3205 //
3206 // jCC L1
3207 // jmp L2
3208 // L1:
3209 // ...
3210 // L2:
3211 //
3212 // Then we can change this to:
3213 //
3214 // jnCC L2
3215 // L1:
3216 // ...
3217 // L2:
3218 //
3219 // Which is a bit more efficient.
3220 // We conditionally jump to the fall-through block.
3221 BranchCode = GetOppositeBranchCondition(BranchCode);
3222 MachineBasicBlock::iterator OldInst = I;
3223
3224 BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JCC_1))
3225 .addMBB(UnCondBrIter->getOperand(0).getMBB())
3226 .addImm(BranchCode);
3227 BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_1))
3228 .addMBB(TargetBB);
3229
3230 OldInst->eraseFromParent();
3231 UnCondBrIter->eraseFromParent();
3232
3233 // Restart the analysis.
3234 UnCondBrIter = MBB.end();
3235 I = MBB.end();
3236 continue;
3237 }
3238
3239 FBB = TBB;
3240 TBB = I->getOperand(0).getMBB();
3241 Cond.push_back(MachineOperand::CreateImm(BranchCode));
3242 CondBranches.push_back(&*I);
3243 continue;
3244 }
3245
3246 // Handle subsequent conditional branches. Only handle the case where all
3247 // conditional branches branch to the same destination and their condition
3248 // opcodes fit one of the special multi-branch idioms.
3249 assert(Cond.size() == 1)(static_cast <bool> (Cond.size() == 1) ? void (0) : __assert_fail
("Cond.size() == 1", "llvm/lib/Target/X86/X86InstrInfo.cpp",
3249, __extension__ __PRETTY_FUNCTION__))
;
3250 assert(TBB)(static_cast <bool> (TBB) ? void (0) : __assert_fail ("TBB"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3250, __extension__
__PRETTY_FUNCTION__))
;
3251
3252 // If the conditions are the same, we can leave them alone.
3253 X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
3254 auto NewTBB = I->getOperand(0).getMBB();
3255 if (OldBranchCode == BranchCode && TBB == NewTBB)
3256 continue;
3257
3258 // If they differ, see if they fit one of the known patterns. Theoretically,
3259 // we could handle more patterns here, but we shouldn't expect to see them
3260 // if instruction selection has done a reasonable job.
3261 if (TBB == NewTBB &&
3262 ((OldBranchCode == X86::COND_P && BranchCode == X86::COND_NE) ||
3263 (OldBranchCode == X86::COND_NE && BranchCode == X86::COND_P))) {
3264 BranchCode = X86::COND_NE_OR_P;
3265 } else if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_NE) ||
3266 (OldBranchCode == X86::COND_E && BranchCode == X86::COND_P)) {
3267 if (NewTBB != (FBB ? FBB : getFallThroughMBB(&MBB, TBB)))
3268 return true;
3269
3270 // X86::COND_E_AND_NP usually has two different branch destinations.
3271 //
3272 // JP B1
3273 // JE B2
3274 // JMP B1
3275 // B1:
3276 // B2:
3277 //
3278 // Here this condition branches to B2 only if NP && E. It has another
3279 // equivalent form:
3280 //
3281 // JNE B1
3282 // JNP B2
3283 // JMP B1
3284 // B1:
3285 // B2:
3286 //
3287 // Similarly it branches to B2 only if E && NP. That is why this condition
3288 // is named with COND_E_AND_NP.
3289 BranchCode = X86::COND_E_AND_NP;
3290 } else
3291 return true;
3292
3293 // Update the MachineOperand.
3294 Cond[0].setImm(BranchCode);
3295 CondBranches.push_back(&*I);
3296 }
3297
3298 return false;
3299}
3300
3301bool X86InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
3302 MachineBasicBlock *&TBB,
3303 MachineBasicBlock *&FBB,
3304 SmallVectorImpl<MachineOperand> &Cond,
3305 bool AllowModify) const {
3306 SmallVector<MachineInstr *, 4> CondBranches;
3307 return AnalyzeBranchImpl(MBB, TBB, FBB, Cond, CondBranches, AllowModify);
3308}
3309
3310bool X86InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
3311 MachineBranchPredicate &MBP,
3312 bool AllowModify) const {
3313 using namespace std::placeholders;
3314
3315 SmallVector<MachineOperand, 4> Cond;
3316 SmallVector<MachineInstr *, 4> CondBranches;
3317 if (AnalyzeBranchImpl(MBB, MBP.TrueDest, MBP.FalseDest, Cond, CondBranches,
3318 AllowModify))
3319 return true;
3320
3321 if (Cond.size() != 1)
3322 return true;
3323
3324 assert(MBP.TrueDest && "expected!")(static_cast <bool> (MBP.TrueDest && "expected!"
) ? void (0) : __assert_fail ("MBP.TrueDest && \"expected!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3324, __extension__
__PRETTY_FUNCTION__))
;
3325
3326 if (!MBP.FalseDest)
3327 MBP.FalseDest = MBB.getNextNode();
3328
3329 const TargetRegisterInfo *TRI = &getRegisterInfo();
3330
3331 MachineInstr *ConditionDef = nullptr;
3332 bool SingleUseCondition = true;
3333
3334 for (MachineInstr &MI : llvm::drop_begin(llvm::reverse(MBB))) {
3335 if (MI.modifiesRegister(X86::EFLAGS, TRI)) {
3336 ConditionDef = &MI;
3337 break;
3338 }
3339
3340 if (MI.readsRegister(X86::EFLAGS, TRI))
3341 SingleUseCondition = false;
3342 }
3343
3344 if (!ConditionDef)
3345 return true;
3346
3347 if (SingleUseCondition) {
3348 for (auto *Succ : MBB.successors())
3349 if (Succ->isLiveIn(X86::EFLAGS))
3350 SingleUseCondition = false;
3351 }
3352
3353 MBP.ConditionDef = ConditionDef;
3354 MBP.SingleUseCondition = SingleUseCondition;
3355
3356 // Currently we only recognize the simple pattern:
3357 //
3358 // test %reg, %reg
3359 // je %label
3360 //
3361 const unsigned TestOpcode =
3362 Subtarget.is64Bit() ? X86::TEST64rr : X86::TEST32rr;
3363
3364 if (ConditionDef->getOpcode() == TestOpcode &&
3365 ConditionDef->getNumOperands() == 3 &&
3366 ConditionDef->getOperand(0).isIdenticalTo(ConditionDef->getOperand(1)) &&
3367 (Cond[0].getImm() == X86::COND_NE || Cond[0].getImm() == X86::COND_E)) {
3368 MBP.LHS = ConditionDef->getOperand(0);
3369 MBP.RHS = MachineOperand::CreateImm(0);
3370 MBP.Predicate = Cond[0].getImm() == X86::COND_NE
3371 ? MachineBranchPredicate::PRED_NE
3372 : MachineBranchPredicate::PRED_EQ;
3373 return false;
3374 }
3375
3376 return true;
3377}
3378
3379unsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB,
3380 int *BytesRemoved) const {
3381 assert(!BytesRemoved && "code size not handled")(static_cast <bool> (!BytesRemoved && "code size not handled"
) ? void (0) : __assert_fail ("!BytesRemoved && \"code size not handled\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3381, __extension__
__PRETTY_FUNCTION__))
;
3382
3383 MachineBasicBlock::iterator I = MBB.end();
3384 unsigned Count = 0;
3385
3386 while (I != MBB.begin()) {
3387 --I;
3388 if (I->isDebugInstr())
3389 continue;
3390 if (I->getOpcode() != X86::JMP_1 &&
3391 X86::getCondFromBranch(*I) == X86::COND_INVALID)
3392 break;
3393 // Remove the branch.
3394 I->eraseFromParent();
3395 I = MBB.end();
3396 ++Count;
3397 }
3398
3399 return Count;
3400}
3401
3402unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB,
3403 MachineBasicBlock *TBB,
3404 MachineBasicBlock *FBB,
3405 ArrayRef<MachineOperand> Cond,
3406 const DebugLoc &DL,
3407 int *BytesAdded) const {
3408 // Shouldn't be a fall through.
3409 assert(TBB && "insertBranch must not be told to insert a fallthrough")(static_cast <bool> (TBB && "insertBranch must not be told to insert a fallthrough"
) ? void (0) : __assert_fail ("TBB && \"insertBranch must not be told to insert a fallthrough\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3409, __extension__
__PRETTY_FUNCTION__))
;
3410 assert((Cond.size() == 1 || Cond.size() == 0) &&(static_cast <bool> ((Cond.size() == 1 || Cond.size() ==
0) && "X86 branch conditions have one component!") ?
void (0) : __assert_fail ("(Cond.size() == 1 || Cond.size() == 0) && \"X86 branch conditions have one component!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3411, __extension__
__PRETTY_FUNCTION__))
3411 "X86 branch conditions have one component!")(static_cast <bool> ((Cond.size() == 1 || Cond.size() ==
0) && "X86 branch conditions have one component!") ?
void (0) : __assert_fail ("(Cond.size() == 1 || Cond.size() == 0) && \"X86 branch conditions have one component!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3411, __extension__
__PRETTY_FUNCTION__))
;
3412 assert(!BytesAdded && "code size not handled")(static_cast <bool> (!BytesAdded && "code size not handled"
) ? void (0) : __assert_fail ("!BytesAdded && \"code size not handled\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3412, __extension__
__PRETTY_FUNCTION__))
;
3413
3414 if (Cond.empty()) {
3415 // Unconditional branch?
3416 assert(!FBB && "Unconditional branch with multiple successors!")(static_cast <bool> (!FBB && "Unconditional branch with multiple successors!"
) ? void (0) : __assert_fail ("!FBB && \"Unconditional branch with multiple successors!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3416, __extension__
__PRETTY_FUNCTION__))
;
3417 BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(TBB);
3418 return 1;
3419 }
3420
3421 // If FBB is null, it is implied to be a fall-through block.
3422 bool FallThru = FBB == nullptr;
3423
3424 // Conditional branch.
3425 unsigned Count = 0;
3426 X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
3427 switch (CC) {
3428 case X86::COND_NE_OR_P:
3429 // Synthesize NE_OR_P with two branches.
3430 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_NE);
3431 ++Count;
3432 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_P);
3433 ++Count;
3434 break;
3435 case X86::COND_E_AND_NP:
3436 // Use the next block of MBB as FBB if it is null.
3437 if (FBB == nullptr) {
3438 FBB = getFallThroughMBB(&MBB, TBB);
3439 assert(FBB && "MBB cannot be the last block in function when the false "(static_cast <bool> (FBB && "MBB cannot be the last block in function when the false "
"body is a fall-through.") ? void (0) : __assert_fail ("FBB && \"MBB cannot be the last block in function when the false \" \"body is a fall-through.\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3440, __extension__
__PRETTY_FUNCTION__))
3440 "body is a fall-through.")(static_cast <bool> (FBB && "MBB cannot be the last block in function when the false "
"body is a fall-through.") ? void (0) : __assert_fail ("FBB && \"MBB cannot be the last block in function when the false \" \"body is a fall-through.\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3440, __extension__
__PRETTY_FUNCTION__))
;
3441 }
3442 // Synthesize COND_E_AND_NP with two branches.
3443 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(FBB).addImm(X86::COND_NE);
3444 ++Count;
3445 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_NP);
3446 ++Count;
3447 break;
3448 default: {
3449 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(CC);
3450 ++Count;
3451 }
3452 }
3453 if (!FallThru) {
3454 // Two-way Conditional branch. Insert the second branch.
3455 BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB);
3456 ++Count;
3457 }
3458 return Count;
3459}
3460
3461bool X86InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
3462 ArrayRef<MachineOperand> Cond,
3463 Register DstReg, Register TrueReg,
3464 Register FalseReg, int &CondCycles,
3465 int &TrueCycles, int &FalseCycles) const {
3466 // Not all subtargets have cmov instructions.
3467 if (!Subtarget.hasCMov())
3468 return false;
3469 if (Cond.size() != 1)
3470 return false;
3471 // We cannot do the composite conditions, at least not in SSA form.
3472 if ((X86::CondCode)Cond[0].getImm() > X86::LAST_VALID_COND)
3473 return false;
3474
3475 // Check register classes.
3476 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3477 const TargetRegisterClass *RC =
3478 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
3479 if (!RC)
3480 return false;
3481
3482 // We have cmov instructions for 16, 32, and 64 bit general purpose registers.
3483 if (X86::GR16RegClass.hasSubClassEq(RC) ||
3484 X86::GR32RegClass.hasSubClassEq(RC) ||
3485 X86::GR64RegClass.hasSubClassEq(RC)) {
3486 // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy
3487 // Bridge. Probably Ivy Bridge as well.
3488 CondCycles = 2;
3489 TrueCycles = 2;
3490 FalseCycles = 2;
3491 return true;
3492 }
3493
3494 // Can't do vectors.
3495 return false;
3496}
3497
3498void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
3499 MachineBasicBlock::iterator I,
3500 const DebugLoc &DL, Register DstReg,
3501 ArrayRef<MachineOperand> Cond, Register TrueReg,
3502 Register FalseReg) const {
3503 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3504 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3505 const TargetRegisterClass &RC = *MRI.getRegClass(DstReg);
3506 assert(Cond.size() == 1 && "Invalid Cond array")(static_cast <bool> (Cond.size() == 1 && "Invalid Cond array"
) ? void (0) : __assert_fail ("Cond.size() == 1 && \"Invalid Cond array\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3506, __extension__
__PRETTY_FUNCTION__))
;
3507 unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(RC) / 8,
3508 false /*HasMemoryOperand*/);
3509 BuildMI(MBB, I, DL, get(Opc), DstReg)
3510 .addReg(FalseReg)
3511 .addReg(TrueReg)
3512 .addImm(Cond[0].getImm());
3513}
3514
3515/// Test if the given register is a physical h register.
3516static bool isHReg(unsigned Reg) {
3517 return X86::GR8_ABCD_HRegClass.contains(Reg);
3518}
3519
3520// Try and copy between VR128/VR64 and GR64 registers.
3521static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
3522 const X86Subtarget &Subtarget) {
3523 bool HasAVX = Subtarget.hasAVX();
3524 bool HasAVX512 = Subtarget.hasAVX512();
3525
3526 // SrcReg(MaskReg) -> DestReg(GR64)
3527 // SrcReg(MaskReg) -> DestReg(GR32)
3528
3529 // All KMASK RegClasses hold the same k registers, can be tested against anyone.
3530 if (X86::VK16RegClass.contains(SrcReg)) {
3531 if (X86::GR64RegClass.contains(DestReg)) {
3532 assert(Subtarget.hasBWI())(static_cast <bool> (Subtarget.hasBWI()) ? void (0) : __assert_fail
("Subtarget.hasBWI()", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 3532, __extension__ __PRETTY_FUNCTION__))
;
3533 return X86::KMOVQrk;
3534 }
3535 if (X86::GR32RegClass.contains(DestReg))
3536 return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk;
3537 }
3538
3539 // SrcReg(GR64) -> DestReg(MaskReg)
3540 // SrcReg(GR32) -> DestReg(MaskReg)
3541
3542 // All KMASK RegClasses hold the same k registers, can be tested against anyone.
3543 if (X86::VK16RegClass.contains(DestReg)) {
3544 if (X86::GR64RegClass.contains(SrcReg)) {
3545 assert(Subtarget.hasBWI())(static_cast <bool> (Subtarget.hasBWI()) ? void (0) : __assert_fail
("Subtarget.hasBWI()", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 3545, __extension__ __PRETTY_FUNCTION__))
;
3546 return X86::KMOVQkr;
3547 }
3548 if (X86::GR32RegClass.contains(SrcReg))
3549 return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr;
3550 }
3551
3552
3553 // SrcReg(VR128) -> DestReg(GR64)
3554 // SrcReg(VR64) -> DestReg(GR64)
3555 // SrcReg(GR64) -> DestReg(VR128)
3556 // SrcReg(GR64) -> DestReg(VR64)
3557
3558 if (X86::GR64RegClass.contains(DestReg)) {
3559 if (X86::VR128XRegClass.contains(SrcReg))
3560 // Copy from a VR128 register to a GR64 register.
3561 return HasAVX512 ? X86::VMOVPQIto64Zrr :
3562 HasAVX ? X86::VMOVPQIto64rr :
3563 X86::MOVPQIto64rr;
3564 if (X86::VR64RegClass.contains(SrcReg))
3565 // Copy from a VR64 register to a GR64 register.
3566 return X86::MMX_MOVD64from64rr;
3567 } else if (X86::GR64RegClass.contains(SrcReg)) {
3568 // Copy from a GR64 register to a VR128 register.
3569 if (X86::VR128XRegClass.contains(DestReg))
3570 return HasAVX512 ? X86::VMOV64toPQIZrr :
3571 HasAVX ? X86::VMOV64toPQIrr :
3572 X86::MOV64toPQIrr;
3573 // Copy from a GR64 register to a VR64 register.
3574 if (X86::VR64RegClass.contains(DestReg))
3575 return X86::MMX_MOVD64to64rr;
3576 }
3577
3578 // SrcReg(VR128) -> DestReg(GR32)
3579 // SrcReg(GR32) -> DestReg(VR128)
3580
3581 if (X86::GR32RegClass.contains(DestReg) &&
3582 X86::VR128XRegClass.contains(SrcReg))
3583 // Copy from a VR128 register to a GR32 register.
3584 return HasAVX512 ? X86::VMOVPDI2DIZrr :
3585 HasAVX ? X86::VMOVPDI2DIrr :
3586 X86::MOVPDI2DIrr;
3587
3588 if (X86::VR128XRegClass.contains(DestReg) &&
3589 X86::GR32RegClass.contains(SrcReg))
3590 // Copy from a VR128 register to a VR128 register.
3591 return HasAVX512 ? X86::VMOVDI2PDIZrr :
3592 HasAVX ? X86::VMOVDI2PDIrr :
3593 X86::MOVDI2PDIrr;
3594 return 0;
3595}
3596
3597void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
3598 MachineBasicBlock::iterator MI,
3599 const DebugLoc &DL, MCRegister DestReg,
3600 MCRegister SrcReg, bool KillSrc) const {
3601 // First deal with the normal symmetric copies.
3602 bool HasAVX = Subtarget.hasAVX();
3603 bool HasVLX = Subtarget.hasVLX();
3604 unsigned Opc = 0;
3605 if (X86::GR64RegClass.contains(DestReg, SrcReg))
3606 Opc = X86::MOV64rr;
3607 else if (X86::GR32RegClass.contains(DestReg, SrcReg))
3608 Opc = X86::MOV32rr;
3609 else if (X86::GR16RegClass.contains(DestReg, SrcReg))
3610 Opc = X86::MOV16rr;
3611 else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
3612 // Copying to or from a physical H register on x86-64 requires a NOREX
3613 // move. Otherwise use a normal move.
3614 if ((isHReg(DestReg) || isHReg(SrcReg)) &&
3615 Subtarget.is64Bit()) {
3616 Opc = X86::MOV8rr_NOREX;
3617 // Both operands must be encodable without an REX prefix.
3618 assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) &&(static_cast <bool> (X86::GR8_NOREXRegClass.contains(SrcReg
, DestReg) && "8-bit H register can not be copied outside GR8_NOREX"
) ? void (0) : __assert_fail ("X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) && \"8-bit H register can not be copied outside GR8_NOREX\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3619, __extension__
__PRETTY_FUNCTION__))
3619 "8-bit H register can not be copied outside GR8_NOREX")(static_cast <bool> (X86::GR8_NOREXRegClass.contains(SrcReg
, DestReg) && "8-bit H register can not be copied outside GR8_NOREX"
) ? void (0) : __assert_fail ("X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) && \"8-bit H register can not be copied outside GR8_NOREX\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3619, __extension__
__PRETTY_FUNCTION__))
;
3620 } else
3621 Opc = X86::MOV8rr;
3622 }
3623 else if (X86::VR64RegClass.contains(DestReg, SrcReg))
3624 Opc = X86::MMX_MOVQ64rr;
3625 else if (X86::VR128XRegClass.contains(DestReg, SrcReg)) {
3626 if (HasVLX)
3627 Opc = X86::VMOVAPSZ128rr;
3628 else if (X86::VR128RegClass.contains(DestReg, SrcReg))
3629 Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
3630 else {
3631 // If this an extended register and we don't have VLX we need to use a
3632 // 512-bit move.
3633 Opc = X86::VMOVAPSZrr;
3634 const TargetRegisterInfo *TRI = &getRegisterInfo();
3635 DestReg = TRI->getMatchingSuperReg(DestReg, X86::sub_xmm,
3636 &X86::VR512RegClass);
3637 SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm,
3638 &X86::VR512RegClass);
3639 }
3640 } else if (X86::VR256XRegClass.contains(DestReg, SrcReg)) {
3641 if (HasVLX)
3642 Opc = X86::VMOVAPSZ256rr;
3643 else if (X86::VR256RegClass.contains(DestReg, SrcReg))
3644 Opc = X86::VMOVAPSYrr;
3645 else {
3646 // If this an extended register and we don't have VLX we need to use a
3647 // 512-bit move.
3648 Opc = X86::VMOVAPSZrr;
3649 const TargetRegisterInfo *TRI = &getRegisterInfo();
3650 DestReg = TRI->getMatchingSuperReg(DestReg, X86::sub_ymm,
3651 &X86::VR512RegClass);
3652 SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm,
3653 &X86::VR512RegClass);
3654 }
3655 } else if (X86::VR512RegClass.contains(DestReg, SrcReg))
3656 Opc = X86::VMOVAPSZrr;
3657 // All KMASK RegClasses hold the same k registers, can be tested against anyone.
3658 else if (X86::VK16RegClass.contains(DestReg, SrcReg))
3659 Opc = Subtarget.hasBWI() ? X86::KMOVQkk : X86::KMOVWkk;
3660 if (!Opc)
3661 Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
3662
3663 if (Opc) {
3664 BuildMI(MBB, MI, DL, get(Opc), DestReg)
3665 .addReg(SrcReg, getKillRegState(KillSrc));
3666 return;
3667 }
3668
3669 if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) {
3670 // FIXME: We use a fatal error here because historically LLVM has tried
3671 // lower some of these physreg copies and we want to ensure we get
3672 // reasonable bug reports if someone encounters a case no other testing
3673 // found. This path should be removed after the LLVM 7 release.
3674 report_fatal_error("Unable to copy EFLAGS physical register!");
3675 }
3676
3677 LLVM_DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) << " to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-instr-info")) { dbgs() << "Cannot copy " <<
RI.getName(SrcReg) << " to " << RI.getName(DestReg
) << '\n'; } } while (false)
3678 << RI.getName(DestReg) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-instr-info")) { dbgs() << "Cannot copy " <<
RI.getName(SrcReg) << " to " << RI.getName(DestReg
) << '\n'; } } while (false)
;
3679 report_fatal_error("Cannot emit physreg copy instruction");
3680}
3681
3682Optional<DestSourcePair>
3683X86InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
3684 if (MI.isMoveReg())
3685 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
3686 return None;
3687}
3688
3689static unsigned getLoadStoreRegOpcode(Register Reg,
3690 const TargetRegisterClass *RC,
3691 bool IsStackAligned,
3692 const X86Subtarget &STI, bool load) {
3693 bool HasAVX = STI.hasAVX();
3694 bool HasAVX512 = STI.hasAVX512();
3695 bool HasVLX = STI.hasVLX();
3696
3697 switch (STI.getRegisterInfo()->getSpillSize(*RC)) {
3698 default:
3699 llvm_unreachable("Unknown spill size")::llvm::llvm_unreachable_internal("Unknown spill size", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 3699)
;
3700 case 1:
3701 assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass")(static_cast <bool> (X86::GR8RegClass.hasSubClassEq(RC)
&& "Unknown 1-byte regclass") ? void (0) : __assert_fail
("X86::GR8RegClass.hasSubClassEq(RC) && \"Unknown 1-byte regclass\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3701, __extension__
__PRETTY_FUNCTION__))
;
3702 if (STI.is64Bit())
3703 // Copying to or from a physical H register on x86-64 requires a NOREX
3704 // move. Otherwise use a normal move.
3705 if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
3706 return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
3707 return load ? X86::MOV8rm : X86::MOV8mr;
3708 case 2:
3709 if (X86::VK16RegClass.hasSubClassEq(RC))
3710 return load ? X86::KMOVWkm : X86::KMOVWmk;
3711 if (X86::FR16XRegClass.hasSubClassEq(RC)) {
3712 assert(STI.hasFP16())(static_cast <bool> (STI.hasFP16()) ? void (0) : __assert_fail
("STI.hasFP16()", "llvm/lib/Target/X86/X86InstrInfo.cpp", 3712
, __extension__ __PRETTY_FUNCTION__))
;
3713 return load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr;
3714 }
3715 assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass")(static_cast <bool> (X86::GR16RegClass.hasSubClassEq(RC
) && "Unknown 2-byte regclass") ? void (0) : __assert_fail
("X86::GR16RegClass.hasSubClassEq(RC) && \"Unknown 2-byte regclass\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3715, __extension__
__PRETTY_FUNCTION__))
;
3716 return load ? X86::MOV16rm : X86::MOV16mr;
3717 case 4:
3718 if (X86::GR32RegClass.hasSubClassEq(RC))
3719 return load ? X86::MOV32rm : X86::MOV32mr;
3720 if (X86::FR32XRegClass.hasSubClassEq(RC))
3721 return load ?
3722 (HasAVX512 ? X86::VMOVSSZrm_alt :
3723 HasAVX ? X86::VMOVSSrm_alt :
3724 X86::MOVSSrm_alt) :
3725 (HasAVX512 ? X86::VMOVSSZmr :
3726 HasAVX ? X86::VMOVSSmr :
3727 X86::MOVSSmr);
3728 if (X86::RFP32RegClass.hasSubClassEq(RC))
3729 return load ? X86::LD_Fp32m : X86::ST_Fp32m;
3730 if (X86::VK32RegClass.hasSubClassEq(RC)) {
3731 assert(STI.hasBWI() && "KMOVD requires BWI")(static_cast <bool> (STI.hasBWI() && "KMOVD requires BWI"
) ? void (0) : __assert_fail ("STI.hasBWI() && \"KMOVD requires BWI\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3731, __extension__
__PRETTY_FUNCTION__))
;
3732 return load ? X86::KMOVDkm : X86::KMOVDmk;
3733 }
3734 // All of these mask pair classes have the same spill size, the same kind
3735 // of kmov instructions can be used with all of them.
3736 if (X86::VK1PAIRRegClass.hasSubClassEq(RC) ||
3737 X86::VK2PAIRRegClass.hasSubClassEq(RC) ||
3738 X86::VK4PAIRRegClass.hasSubClassEq(RC) ||
3739 X86::VK8PAIRRegClass.hasSubClassEq(RC) ||
3740 X86::VK16PAIRRegClass.hasSubClassEq(RC))
3741 return load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE;
3742 llvm_unreachable("Unknown 4-byte regclass")::llvm::llvm_unreachable_internal("Unknown 4-byte regclass", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 3742)
;
3743 case 8:
3744 if (X86::GR64RegClass.hasSubClassEq(RC))
3745 return load ? X86::MOV64rm : X86::MOV64mr;
3746 if (X86::FR64XRegClass.hasSubClassEq(RC))
3747 return load ?
3748 (HasAVX512 ? X86::VMOVSDZrm_alt :
3749 HasAVX ? X86::VMOVSDrm_alt :
3750 X86::MOVSDrm_alt) :
3751 (HasAVX512 ? X86::VMOVSDZmr :
3752 HasAVX ? X86::VMOVSDmr :
3753 X86::MOVSDmr);
3754 if (X86::VR64RegClass.hasSubClassEq(RC))
3755 return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
3756 if (X86::RFP64RegClass.hasSubClassEq(RC))
3757 return load ? X86::LD_Fp64m : X86::ST_Fp64m;
3758 if (X86::VK64RegClass.hasSubClassEq(RC)) {
3759 assert(STI.hasBWI() && "KMOVQ requires BWI")(static_cast <bool> (STI.hasBWI() && "KMOVQ requires BWI"
) ? void (0) : __assert_fail ("STI.hasBWI() && \"KMOVQ requires BWI\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3759, __extension__
__PRETTY_FUNCTION__))
;
3760 return load ? X86::KMOVQkm : X86::KMOVQmk;
3761 }
3762 llvm_unreachable("Unknown 8-byte regclass")::llvm::llvm_unreachable_internal("Unknown 8-byte regclass", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 3762)
;
3763 case 10:
3764 assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass")(static_cast <bool> (X86::RFP80RegClass.hasSubClassEq(RC
) && "Unknown 10-byte regclass") ? void (0) : __assert_fail
("X86::RFP80RegClass.hasSubClassEq(RC) && \"Unknown 10-byte regclass\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3764, __extension__
__PRETTY_FUNCTION__))
;
3765 return load ? X86::LD_Fp80m : X86::ST_FpP80m;
3766 case 16: {
3767 if (X86::VR128XRegClass.hasSubClassEq(RC)) {
3768 // If stack is realigned we can use aligned stores.
3769 if (IsStackAligned)
3770 return load ?
3771 (HasVLX ? X86::VMOVAPSZ128rm :
3772 HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX :
3773 HasAVX ? X86::VMOVAPSrm :
3774 X86::MOVAPSrm):
3775 (HasVLX ? X86::VMOVAPSZ128mr :
3776 HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX :
3777 HasAVX ? X86::VMOVAPSmr :
3778 X86::MOVAPSmr);
3779 else
3780 return load ?
3781 (HasVLX ? X86::VMOVUPSZ128rm :
3782 HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX :
3783 HasAVX ? X86::VMOVUPSrm :
3784 X86::MOVUPSrm):
3785 (HasVLX ? X86::VMOVUPSZ128mr :
3786 HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX :
3787 HasAVX ? X86::VMOVUPSmr :
3788 X86::MOVUPSmr);
3789 }
3790 llvm_unreachable("Unknown 16-byte regclass")::llvm::llvm_unreachable_internal("Unknown 16-byte regclass",
"llvm/lib/Target/X86/X86InstrInfo.cpp", 3790)
;
3791 }
3792 case 32:
3793 assert(X86::VR256XRegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass")(static_cast <bool> (X86::VR256XRegClass.hasSubClassEq(
RC) && "Unknown 32-byte regclass") ? void (0) : __assert_fail
("X86::VR256XRegClass.hasSubClassEq(RC) && \"Unknown 32-byte regclass\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3793, __extension__
__PRETTY_FUNCTION__))
;
3794 // If stack is realigned we can use aligned stores.
3795 if (IsStackAligned)
3796 return load ?
3797 (HasVLX ? X86::VMOVAPSZ256rm :
3798 HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX :
3799 X86::VMOVAPSYrm) :
3800 (HasVLX ? X86::VMOVAPSZ256mr :
3801 HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX :
3802 X86::VMOVAPSYmr);
3803 else
3804 return load ?
3805 (HasVLX ? X86::VMOVUPSZ256rm :
3806 HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX :
3807 X86::VMOVUPSYrm) :
3808 (HasVLX ? X86::VMOVUPSZ256mr :
3809 HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX :
3810 X86::VMOVUPSYmr);
3811 case 64:
3812 assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass")(static_cast <bool> (X86::VR512RegClass.hasSubClassEq(RC
) && "Unknown 64-byte regclass") ? void (0) : __assert_fail
("X86::VR512RegClass.hasSubClassEq(RC) && \"Unknown 64-byte regclass\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3812, __extension__
__PRETTY_FUNCTION__))
;
3813 assert(STI.hasAVX512() && "Using 512-bit register requires AVX512")(static_cast <bool> (STI.hasAVX512() && "Using 512-bit register requires AVX512"
) ? void (0) : __assert_fail ("STI.hasAVX512() && \"Using 512-bit register requires AVX512\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3813, __extension__
__PRETTY_FUNCTION__))
;
3814 if (IsStackAligned)
3815 return load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
3816 else
3817 return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
3818 }
3819}
3820
3821Optional<ExtAddrMode>
3822X86InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
3823 const TargetRegisterInfo *TRI) const {
3824 const MCInstrDesc &Desc = MemI.getDesc();
3825 int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
3826 if (MemRefBegin < 0)
3827 return None;
3828
3829 MemRefBegin += X86II::getOperandBias(Desc);
3830
3831 auto &BaseOp = MemI.getOperand(MemRefBegin + X86::AddrBaseReg);
3832 if (!BaseOp.isReg()) // Can be an MO_FrameIndex
3833 return None;
3834
3835 const MachineOperand &DispMO = MemI.getOperand(MemRefBegin + X86::AddrDisp);
3836 // Displacement can be symbolic
3837 if (!DispMO.isImm())
3838 return None;
3839
3840 ExtAddrMode AM;
3841 AM.BaseReg = BaseOp.getReg();
3842 AM.ScaledReg = MemI.getOperand(MemRefBegin + X86::AddrIndexReg).getReg();
3843 AM.Scale = MemI.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm();
3844 AM.Displacement = DispMO.getImm();
3845 return AM;
3846}
3847
3848bool X86InstrInfo::getConstValDefinedInReg(const MachineInstr &MI,
3849 const Register Reg,
3850 int64_t &ImmVal) const {
3851 if (MI.getOpcode() != X86::MOV32ri && MI.getOpcode() != X86::MOV64ri)
3852 return false;
3853 // Mov Src can be a global address.
3854 if (!MI.getOperand(1).isImm() || MI.getOperand(0).getReg() != Reg)
3855 return false;
3856 ImmVal = MI.getOperand(1).getImm();
3857 return true;
3858}
3859
3860bool X86InstrInfo::preservesZeroValueInReg(
3861 const MachineInstr *MI, const Register NullValueReg,
3862 const TargetRegisterInfo *TRI) const {
3863 if (!MI->modifiesRegister(NullValueReg, TRI))
3864 return true;
3865 switch (MI->getOpcode()) {
3866 // Shift right/left of a null unto itself is still a null, i.e. rax = shl rax
3867 // X.
3868 case X86::SHR64ri:
3869 case X86::SHR32ri:
3870 case X86::SHL64ri:
3871 case X86::SHL32ri:
3872 assert(MI->getOperand(0).isDef() && MI->getOperand(1).isUse() &&(static_cast <bool> (MI->getOperand(0).isDef() &&
MI->getOperand(1).isUse() && "expected for shift opcode!"
) ? void (0) : __assert_fail ("MI->getOperand(0).isDef() && MI->getOperand(1).isUse() && \"expected for shift opcode!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3873, __extension__
__PRETTY_FUNCTION__))
3873 "expected for shift opcode!")(static_cast <bool> (MI->getOperand(0).isDef() &&
MI->getOperand(1).isUse() && "expected for shift opcode!"
) ? void (0) : __assert_fail ("MI->getOperand(0).isDef() && MI->getOperand(1).isUse() && \"expected for shift opcode!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3873, __extension__
__PRETTY_FUNCTION__))
;
3874 return MI->getOperand(0).getReg() == NullValueReg &&
3875 MI->getOperand(1).getReg() == NullValueReg;
3876 // Zero extend of a sub-reg of NullValueReg into itself does not change the
3877 // null value.
3878 case X86::MOV32rr:
3879 return llvm::all_of(MI->operands(), [&](const MachineOperand &MO) {
3880 return TRI->isSubRegisterEq(NullValueReg, MO.getReg());
3881 });
3882 default:
3883 return false;
3884 }
3885 llvm_unreachable("Should be handled above!")::llvm::llvm_unreachable_internal("Should be handled above!",
"llvm/lib/Target/X86/X86InstrInfo.cpp", 3885)
;
3886}
3887
3888bool X86InstrInfo::getMemOperandsWithOffsetWidth(
3889 const MachineInstr &MemOp, SmallVectorImpl<const MachineOperand *> &BaseOps,
3890 int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
3891 const TargetRegisterInfo *TRI) const {
3892 const MCInstrDesc &Desc = MemOp.getDesc();
3893 int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
3894 if (MemRefBegin < 0)
3895 return false;
3896
3897 MemRefBegin += X86II::getOperandBias(Desc);
3898
3899 const MachineOperand *BaseOp =
3900 &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
3901 if (!BaseOp->isReg()) // Can be an MO_FrameIndex
3902 return false;
3903
3904 if (MemOp.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1)
3905 return false;
3906
3907 if (MemOp.getOperand(MemRefBegin + X86::AddrIndexReg).getReg() !=
3908 X86::NoRegister)
3909 return false;
3910
3911 const MachineOperand &DispMO = MemOp.getOperand(MemRefBegin + X86::AddrDisp);
3912
3913 // Displacement can be symbolic
3914 if (!DispMO.isImm())
3915 return false;
3916
3917 Offset = DispMO.getImm();
3918
3919 if (!BaseOp->isReg())
3920 return false;
3921
3922 OffsetIsScalable = false;
3923 // FIXME: Relying on memoperands() may not be right thing to do here. Check
3924 // with X86 maintainers, and fix it accordingly. For now, it is ok, since
3925 // there is no use of `Width` for X86 back-end at the moment.
3926 Width =
3927 !MemOp.memoperands_empty() ? MemOp.memoperands().front()->getSize() : 0;
3928 BaseOps.push_back(BaseOp);
3929 return true;
3930}
3931
3932static unsigned getStoreRegOpcode(Register SrcReg,
3933 const TargetRegisterClass *RC,
3934 bool IsStackAligned,
3935 const X86Subtarget &STI) {
3936 return getLoadStoreRegOpcode(SrcReg, RC, IsStackAligned, STI, false);
3937}
3938
3939static unsigned getLoadRegOpcode(Register DestReg,
3940 const TargetRegisterClass *RC,
3941 bool IsStackAligned, const X86Subtarget &STI) {
3942 return getLoadStoreRegOpcode(DestReg, RC, IsStackAligned, STI, true);
3943}
3944
3945void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
3946 MachineBasicBlock::iterator MI,
3947 Register SrcReg, bool isKill, int FrameIdx,
3948 const TargetRegisterClass *RC,
3949 const TargetRegisterInfo *TRI) const {
3950 const MachineFunction &MF = *MBB.getParent();
3951 const MachineFrameInfo &MFI = MF.getFrameInfo();
3952 assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&(static_cast <bool> (MFI.getObjectSize(FrameIdx) >= TRI
->getSpillSize(*RC) && "Stack slot too small for store"
) ? void (0) : __assert_fail ("MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && \"Stack slot too small for store\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3953, __extension__
__PRETTY_FUNCTION__))
3953 "Stack slot too small for store")(static_cast <bool> (MFI.getObjectSize(FrameIdx) >= TRI
->getSpillSize(*RC) && "Stack slot too small for store"
) ? void (0) : __assert_fail ("MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && \"Stack slot too small for store\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 3953, __extension__
__PRETTY_FUNCTION__))
;
3954 if (RC->getID() == X86::TILERegClassID) {
3955 unsigned Opc = X86::TILESTORED;
3956 // tilestored %tmm, (%sp, %idx)
3957 MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
3958 Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
3959 BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64);
3960 MachineInstr *NewMI =
3961 addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
3962 .addReg(SrcReg, getKillRegState(isKill));
3963 MachineOperand &MO = NewMI->getOperand(2);
3964 MO.setReg(VirtReg);
3965 MO.setIsKill(true);
3966 } else {
3967 unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
3968 bool isAligned =
3969 (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
3970 (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx));
3971 unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
3972 addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
3973 .addReg(SrcReg, getKillRegState(isKill));
3974 }
3975}
3976
3977void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
3978 MachineBasicBlock::iterator MI,
3979 Register DestReg, int FrameIdx,
3980 const TargetRegisterClass *RC,
3981 const TargetRegisterInfo *TRI) const {
3982 if (RC->getID() == X86::TILERegClassID) {
3983 unsigned Opc = X86::TILELOADD;
3984 // tileloadd (%sp, %idx), %tmm
3985 MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
3986 Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
3987 MachineInstr *NewMI =
3988 BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64);
3989 NewMI = addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg),
3990 FrameIdx);
3991 MachineOperand &MO = NewMI->getOperand(3);
3992 MO.setReg(VirtReg);
3993 MO.setIsKill(true);
3994 } else {
3995 const MachineFunction &MF = *MBB.getParent();
3996 const MachineFrameInfo &MFI = MF.getFrameInfo();
3997 unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
3998 bool isAligned =
3999 (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
4000 (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx));
4001 unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
4002 addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg),
4003 FrameIdx);
4004 }
4005}
4006
4007bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
4008 Register &SrcReg2, int64_t &CmpMask,
4009 int64_t &CmpValue) const {
4010 switch (MI.getOpcode()) {
4011 default: break;
4012 case X86::CMP64ri32:
4013 case X86::CMP64ri8:
4014 case X86::CMP32ri:
4015 case X86::CMP32ri8:
4016 case X86::CMP16ri:
4017 case X86::CMP16ri8:
4018 case X86::CMP8ri:
4019 SrcReg = MI.getOperand(0).getReg();
4020 SrcReg2 = 0;
4021 if (MI.getOperand(1).isImm()) {
4022 CmpMask = ~0;
4023 CmpValue = MI.getOperand(1).getImm();
4024 } else {
4025 CmpMask = CmpValue = 0;
4026 }
4027 return true;
4028 // A SUB can be used to perform comparison.
4029 case X86::SUB64rm:
4030 case X86::SUB32rm:
4031 case X86::SUB16rm:
4032 case X86::SUB8rm:
4033 SrcReg = MI.getOperand(1).getReg();
4034 SrcReg2 = 0;
4035 CmpMask = 0;
4036 CmpValue = 0;
4037 return true;
4038 case X86::SUB64rr:
4039 case X86::SUB32rr:
4040 case X86::SUB16rr:
4041 case X86::SUB8rr:
4042 SrcReg = MI.getOperand(1).getReg();
4043 SrcReg2 = MI.getOperand(2).getReg();
4044 CmpMask = 0;
4045 CmpValue = 0;
4046 return true;
4047 case X86::SUB64ri32:
4048 case X86::SUB64ri8:
4049 case X86::SUB32ri:
4050 case X86::SUB32ri8:
4051 case X86::SUB16ri:
4052 case X86::SUB16ri8:
4053 case X86::SUB8ri:
4054 SrcReg = MI.getOperand(1).getReg();
4055 SrcReg2 = 0;
4056 if (MI.getOperand(2).isImm()) {
4057 CmpMask = ~0;
4058 CmpValue = MI.getOperand(2).getImm();
4059 } else {
4060 CmpMask = CmpValue = 0;
4061 }
4062 return true;
4063 case X86::CMP64rr:
4064 case X86::CMP32rr:
4065 case X86::CMP16rr:
4066 case X86::CMP8rr:
4067 SrcReg = MI.getOperand(0).getReg();
4068 SrcReg2 = MI.getOperand(1).getReg();
4069 CmpMask = 0;
4070 CmpValue = 0;
4071 return true;
4072 case X86::TEST8rr:
4073 case X86::TEST16rr:
4074 case X86::TEST32rr:
4075 case X86::TEST64rr:
4076 SrcReg = MI.getOperand(0).getReg();
4077 if (MI.getOperand(1).getReg() != SrcReg)
4078 return false;
4079 // Compare against zero.
4080 SrcReg2 = 0;
4081 CmpMask = ~0;
4082 CmpValue = 0;
4083 return true;
4084 }
4085 return false;
4086}
4087
4088bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
4089 Register SrcReg, Register SrcReg2,
4090 int64_t ImmMask, int64_t ImmValue,
4091 const MachineInstr &OI, bool *IsSwapped,
4092 int64_t *ImmDelta) const {
4093 switch (OI.getOpcode()) {
4094 case X86::CMP64rr:
4095 case X86::CMP32rr:
4096 case X86::CMP16rr:
4097 case X86::CMP8rr:
4098 case X86::SUB64rr:
4099 case X86::SUB32rr:
4100 case X86::SUB16rr:
4101 case X86::SUB8rr: {
4102 Register OISrcReg;
4103 Register OISrcReg2;
4104 int64_t OIMask;
4105 int64_t OIValue;
4106 if (!analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) ||
4107 OIMask != ImmMask || OIValue != ImmValue)
4108 return false;
4109 if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) {
4110 *IsSwapped = false;
4111 return true;
4112 }
4113 if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) {
4114 *IsSwapped = true;
4115 return true;
4116 }
4117 return false;
4118 }
4119 case X86::CMP64ri32:
4120 case X86::CMP64ri8:
4121 case X86::CMP32ri:
4122 case X86::CMP32ri8:
4123 case X86::CMP16ri:
4124 case X86::CMP16ri8:
4125 case X86::CMP8ri:
4126 case X86::SUB64ri32:
4127 case X86::SUB64ri8:
4128 case X86::SUB32ri:
4129 case X86::SUB32ri8:
4130 case X86::SUB16ri:
4131 case X86::SUB16ri8:
4132 case X86::SUB8ri:
4133 case X86::TEST64rr:
4134 case X86::TEST32rr:
4135 case X86::TEST16rr:
4136 case X86::TEST8rr: {
4137 if (ImmMask != 0) {
4138 Register OISrcReg;
4139 Register OISrcReg2;
4140 int64_t OIMask;
4141 int64_t OIValue;
4142 if (analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) &&
4143 SrcReg == OISrcReg && ImmMask == OIMask) {
4144 if (OIValue == ImmValue) {
4145 *ImmDelta = 0;
4146 return true;
4147 } else if (static_cast<uint64_t>(ImmValue) ==
4148 static_cast<uint64_t>(OIValue) - 1) {
4149 *ImmDelta = -1;
4150 return true;
4151 } else if (static_cast<uint64_t>(ImmValue) ==
4152 static_cast<uint64_t>(OIValue) + 1) {
4153 *ImmDelta = 1;
4154 return true;
4155 } else {
4156 return false;
4157 }
4158 }
4159 }
4160 return FlagI.isIdenticalTo(OI);
4161 }
4162 default:
4163 return false;
4164 }
4165}
4166
4167/// Check whether the definition can be converted
4168/// to remove a comparison against zero.
4169inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
4170 bool &ClearsOverflowFlag) {
4171 NoSignFlag = false;
4172 ClearsOverflowFlag = false;
4173
4174 switch (MI.getOpcode()) {
4175 default: return false;
4176
4177 // The shift instructions only modify ZF if their shift count is non-zero.
4178 // N.B.: The processor truncates the shift count depending on the encoding.
4179 case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri:case X86::SAR64ri:
4180 case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri:case X86::SHR64ri:
4181 return getTruncatedShiftCount(MI, 2) != 0;
4182
4183 // Some left shift instructions can be turned into LEA instructions but only
4184 // if their flags aren't used. Avoid transforming such instructions.
4185 case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri:case X86::SHL64ri:{
4186 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
4187 if (isTruncatedShiftCountForLEA(ShAmt)) return false;
4188 return ShAmt != 0;
4189 }
4190
4191 case X86::SHRD16rri8:case X86::SHRD32rri8:case X86::SHRD64rri8:
4192 case X86::SHLD16rri8:case X86::SHLD32rri8:case X86::SHLD64rri8:
4193 return getTruncatedShiftCount(MI, 3) != 0;
4194
4195 case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB32ri:
4196 case X86::SUB32ri8: case X86::SUB16ri: case X86::SUB16ri8:
4197 case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr:
4198 case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm:
4199 case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm:
4200 case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r:
4201 case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri:
4202 case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8:
4203 case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr:
4204 case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm:
4205 case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm:
4206 case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r:
4207 case X86::ADC64ri32: case X86::ADC64ri8: case X86::ADC32ri:
4208 case X86::ADC32ri8: case X86::ADC16ri: case X86::ADC16ri8:
4209 case X86::ADC8ri: case X86::ADC64rr: case X86::ADC32rr:
4210 case X86::ADC16rr: case X86::ADC8rr: case X86::ADC64rm:
4211 case X86::ADC32rm: case X86::ADC16rm: case X86::ADC8rm:
4212 case X86::SBB64ri32: case X86::SBB64ri8: case X86::SBB32ri:
4213 case X86::SBB32ri8: case X86::SBB16ri: case X86::SBB16ri8:
4214 case X86::SBB8ri: case X86::SBB64rr: case X86::SBB32rr:
4215 case X86::SBB16rr: case X86::SBB8rr: case X86::SBB64rm:
4216 case X86::SBB32rm: case X86::SBB16rm: case X86::SBB8rm:
4217 case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r:
4218 case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1:case X86::SAR64r1:
4219 case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1:case X86::SHR64r1:
4220 case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1:
4221 case X86::LZCNT16rr: case X86::LZCNT16rm:
4222 case X86::LZCNT32rr: case X86::LZCNT32rm:
4223 case X86::LZCNT64rr: case X86::LZCNT64rm:
4224 case X86::POPCNT16rr:case X86::POPCNT16rm:
4225 case X86::POPCNT32rr:case X86::POPCNT32rm:
4226 case X86::POPCNT64rr:case X86::POPCNT64rm:
4227 case X86::TZCNT16rr: case X86::TZCNT16rm:
4228 case X86::TZCNT32rr: case X86::TZCNT32rm:
4229 case X86::TZCNT64rr: case X86::TZCNT64rm:
4230 return true;
4231 case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
4232 case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8:
4233 case X86::AND8ri: case X86::AND64rr: case X86::AND32rr:
4234 case X86::AND16rr: case X86::AND8rr: case X86::AND64rm:
4235 case X86::AND32rm: case X86::AND16rm: case X86::AND8rm:
4236 case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri:
4237 case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8:
4238 case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr:
4239 case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm:
4240 case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm:
4241 case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri:
4242 case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8:
4243 case X86::OR8ri: case X86::OR64rr: case X86::OR32rr:
4244 case X86::OR16rr: case X86::OR8rr: case X86::OR64rm:
4245 case X86::OR32rm: case X86::OR16rm: case X86::OR8rm:
4246 case X86::ANDN32rr: case X86::ANDN32rm:
4247 case X86::ANDN64rr: case X86::ANDN64rm:
4248 case X86::BLSI32rr: case X86::BLSI32rm:
4249 case X86::BLSI64rr: case X86::BLSI64rm:
4250 case X86::BLSMSK32rr: case X86::BLSMSK32rm:
4251 case X86::BLSMSK64rr: case X86::BLSMSK64rm:
4252 case X86::BLSR32rr: case X86::BLSR32rm:
4253 case X86::BLSR64rr: case X86::BLSR64rm:
4254 case X86::BLCFILL32rr: case X86::BLCFILL32rm:
4255 case X86::BLCFILL64rr: case X86::BLCFILL64rm:
4256 case X86::BLCI32rr: case X86::BLCI32rm:
4257 case X86::BLCI64rr: case X86::BLCI64rm:
4258 case X86::BLCIC32rr: case X86::BLCIC32rm:
4259 case X86::BLCIC64rr: case X86::BLCIC64rm:
4260 case X86::BLCMSK32rr: case X86::BLCMSK32rm:
4261 case X86::BLCMSK64rr: case X86::BLCMSK64rm:
4262 case X86::BLCS32rr: case X86::BLCS32rm:
4263 case X86::BLCS64rr: case X86::BLCS64rm:
4264 case X86::BLSFILL32rr: case X86::BLSFILL32rm:
4265 case X86::BLSFILL64rr: case X86::BLSFILL64rm:
4266 case X86::BLSIC32rr: case X86::BLSIC32rm:
4267 case X86::BLSIC64rr: case X86::BLSIC64rm:
4268 case X86::BZHI32rr: case X86::BZHI32rm:
4269 case X86::BZHI64rr: case X86::BZHI64rm:
4270 case X86::T1MSKC32rr: case X86::T1MSKC32rm:
4271 case X86::T1MSKC64rr: case X86::T1MSKC64rm:
4272 case X86::TZMSK32rr: case X86::TZMSK32rm:
4273 case X86::TZMSK64rr: case X86::TZMSK64rm:
4274 // These instructions clear the overflow flag just like TEST.
4275 // FIXME: These are not the only instructions in this switch that clear the
4276 // overflow flag.
4277 ClearsOverflowFlag = true;
4278 return true;
4279 case X86::BEXTR32rr: case X86::BEXTR64rr:
4280 case X86::BEXTR32rm: case X86::BEXTR64rm:
4281 case X86::BEXTRI32ri: case X86::BEXTRI32mi:
4282 case X86::BEXTRI64ri: case X86::BEXTRI64mi:
4283 // BEXTR doesn't update the sign flag so we can't use it. It does clear
4284 // the overflow flag, but that's not useful without the sign flag.
4285 NoSignFlag = true;
4286 return true;
4287 }
4288}
4289
4290/// Check whether the use can be converted to remove a comparison against zero.
4291static X86::CondCode isUseDefConvertible(const MachineInstr &MI) {
4292 switch (MI.getOpcode()) {
4293 default: return X86::COND_INVALID;
4294 case X86::NEG8r:
4295 case X86::NEG16r:
4296 case X86::NEG32r:
4297 case X86::NEG64r:
4298 return X86::COND_AE;
4299 case X86::LZCNT16rr:
4300 case X86::LZCNT32rr:
4301 case X86::LZCNT64rr:
4302 return X86::COND_B;
4303 case X86::POPCNT16rr:
4304 case X86::POPCNT32rr:
4305 case X86::POPCNT64rr:
4306 return X86::COND_E;
4307 case X86::TZCNT16rr:
4308 case X86::TZCNT32rr:
4309 case X86::TZCNT64rr:
4310 return X86::COND_B;
4311 case X86::BSF16rr:
4312 case X86::BSF32rr:
4313 case X86::BSF64rr:
4314 case X86::BSR16rr:
4315 case X86::BSR32rr:
4316 case X86::BSR64rr:
4317 return X86::COND_E;
4318 case X86::BLSI32rr:
4319 case X86::BLSI64rr:
4320 return X86::COND_AE;
4321 case X86::BLSR32rr:
4322 case X86::BLSR64rr:
4323 case X86::BLSMSK32rr:
4324 case X86::BLSMSK64rr:
4325 return X86::COND_B;
4326 // TODO: TBM instructions.
4327 }
4328}
4329
4330/// Check if there exists an earlier instruction that
4331/// operates on the same source operands and sets flags in the same way as
4332/// Compare; remove Compare if possible.
4333bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
4334 Register SrcReg2, int64_t CmpMask,
4335 int64_t CmpValue,
4336 const MachineRegisterInfo *MRI) const {
4337 // Check whether we can replace SUB with CMP.
4338 switch (CmpInstr.getOpcode()) {
4339 default: break;
4340 case X86::SUB64ri32:
4341 case X86::SUB64ri8:
4342 case X86::SUB32ri:
4343 case X86::SUB32ri8:
4344 case X86::SUB16ri:
4345 case X86::SUB16ri8:
4346 case X86::SUB8ri:
4347 case X86::SUB64rm:
4348 case X86::SUB32rm:
4349 case X86::SUB16rm:
4350 case X86::SUB8rm:
4351 case X86::SUB64rr:
4352 case X86::SUB32rr:
4353 case X86::SUB16rr:
4354 case X86::SUB8rr: {
4355 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
4356 return false;
4357 // There is no use of the destination register, we can replace SUB with CMP.
4358 unsigned NewOpcode = 0;
4359 switch (CmpInstr.getOpcode()) {
4360 default: llvm_unreachable("Unreachable!")::llvm::llvm_unreachable_internal("Unreachable!", "llvm/lib/Target/X86/X86InstrInfo.cpp"
, 4360)
;
4361 case X86::SUB64rm: NewOpcode = X86::CMP64rm; break;
4362 case X86::SUB32rm: NewOpcode = X86::CMP32rm; break;
4363 case X86::SUB16rm: NewOpcode = X86::CMP16rm; break;
4364 case X86::SUB8rm: NewOpcode = X86::CMP8rm; break;
4365 case X86::SUB64rr: NewOpcode = X86::CMP64rr; break;
4366 case X86::SUB32rr: NewOpcode = X86::CMP32rr; break;
4367 case X86::SUB16rr: NewOpcode = X86::CMP16rr; break;
4368 case X86::SUB8rr: NewOpcode = X86::CMP8rr; break;
4369 case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break;
4370 case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break;
4371 case X86::SUB32ri: NewOpcode = X86::CMP32ri; break;
4372 case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break;
4373 case X86::SUB16ri: NewOpcode = X86::CMP16ri; break;
4374 case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break;
4375 case X86::SUB8ri: NewOpcode = X86::CMP8ri; break;
4376 }
4377 CmpInstr.setDesc(get(NewOpcode));
4378 CmpInstr.RemoveOperand(0);
4379 // Mutating this instruction invalidates any debug data associated with it.
4380 CmpInstr.dropDebugNumber();
4381 // Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
4382 if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
4383 NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
4384 return false;
4385 }
4386 }
4387
4388 // The following code tries to remove the comparison by re-using EFLAGS
4389 // from earlier instructions.
4390
4391 bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);
4392
4393 // Transformation currently requires SSA values.
4394 if (SrcReg2.isPhysical())
4395 return false;
4396 MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
4397 assert(SrcRegDef && "Must have a definition (SSA)")(static_cast <bool> (SrcRegDef && "Must have a definition (SSA)"
) ? void (0) : __assert_fail ("SrcRegDef && \"Must have a definition (SSA)\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4397, __extension__
__PRETTY_FUNCTION__))
;
4398
4399 MachineInstr *MI = nullptr;
4400 MachineInstr *Sub = nullptr;
4401 MachineInstr *Movr0Inst = nullptr;
4402 bool NoSignFlag = false;
4403 bool ClearsOverflowFlag = false;
4404 bool ShouldUpdateCC = false;
4405 bool IsSwapped = false;
4406 X86::CondCode NewCC = X86::COND_INVALID;
4407 int64_t ImmDelta = 0;
4408
4409 // Search backward from CmpInstr for the next instruction defining EFLAGS.
4410 const TargetRegisterInfo *TRI = &getRegisterInfo();
4411 MachineBasicBlock &CmpMBB = *CmpInstr.getParent();
4412 MachineBasicBlock::reverse_iterator From =
4413 std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
4414 for (MachineBasicBlock *MBB = &CmpMBB;;) {
4415 for (MachineInstr &Inst : make_range(From, MBB->rend())) {
4416 // Try to use EFLAGS from the instruction defining %SrcReg. Example:
4417 // %eax = addl ...
4418 // ... // EFLAGS not changed
4419 // testl %eax, %eax // <-- can be removed
4420 if (&Inst == SrcRegDef) {
4421 if (IsCmpZero &&
4422 isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
4423 MI = &Inst;
4424 break;
4425 }
4426 // Cannot find other candidates before definition of SrcReg.
4427 return false;
4428 }
4429
4430 if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
4431 // Try to use EFLAGS produced by an instruction reading %SrcReg.
4432 // Example:
4433 // %eax = ...
4434 // ...
4435 // popcntl %eax
4436 // ... // EFLAGS not changed
4437 // testl %eax, %eax // <-- can be removed
4438 if (IsCmpZero) {
4439 NewCC = isUseDefConvertible(Inst);
4440 if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
4441 Inst.getOperand(1).getReg() == SrcReg) {
4442 ShouldUpdateCC = true;
4443 MI = &Inst;
4444 break;
4445 }
4446 }
4447
4448 // Try to use EFLAGS from an instruction with similar flag results.
4449 // Example:
4450 // sub x, y or cmp x, y
4451 // ... // EFLAGS not changed
4452 // cmp x, y // <-- can be removed
4453 if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue,
4454 Inst, &IsSwapped, &ImmDelta)) {
4455 Sub = &Inst;
4456 break;
4457 }
4458
4459 // MOV32r0 is implemented with xor which clobbers condition code. It is
4460 // safe to move up, if the definition to EFLAGS is dead and earlier
4461 // instructions do not read or write EFLAGS.
4462 if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
4463 Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
4464 Movr0Inst = &Inst;
4465 continue;
4466 }
4467
4468 // Cannot do anything for any other EFLAG changes.
4469 return false;
4470 }
4471 }
4472
4473 if (MI || Sub)
4474 break;
4475
4476 // Reached begin of basic block. Continue in predecessor if there is
4477 // exactly one.
4478 if (MBB->pred_size() != 1)
4479 return false;
4480 MBB = *MBB->pred_begin();
4481 From = MBB->rbegin();
4482 }
4483
4484 // Scan forward from the instruction after CmpInstr for uses of EFLAGS.
4485 // It is safe to remove CmpInstr if EFLAGS is redefined or killed.
4486 // If we are done with the basic block, we need to check whether EFLAGS is
4487 // live-out.
4488 bool FlagsMayLiveOut = true;
4489 SmallVector<std::pair<MachineInstr*, X86::CondCode>, 4> OpsToUpdate;
4490 MachineBasicBlock::iterator AfterCmpInstr =
4491 std::next(MachineBasicBlock::iterator(CmpInstr));
4492 for (MachineInstr &Instr : make_range(AfterCmpInstr, CmpMBB.end())) {
4493 bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
4494 bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
4495 // We should check the usage if this instruction uses and updates EFLAGS.
4496 if (!UseEFLAGS && ModifyEFLAGS) {
4497 // It is safe to remove CmpInstr if EFLAGS is updated again.
4498 FlagsMayLiveOut = false;
4499 break;
4500 }
4501 if (!UseEFLAGS && !ModifyEFLAGS)
4502 continue;
4503
4504 // EFLAGS is used by this instruction.
4505 X86::CondCode OldCC = X86::COND_INVALID;
4506 if (MI || IsSwapped || ImmDelta != 0) {
4507 // We decode the condition code from opcode.
4508 if (Instr.isBranch())
4509 OldCC = X86::getCondFromBranch(Instr);
4510 else {
4511 OldCC = X86::getCondFromSETCC(Instr);
4512 if (OldCC == X86::COND_INVALID)
4513 OldCC = X86::getCondFromCMov(Instr);
4514 }
4515 if (OldCC == X86::COND_INVALID) return false;
4516 }
4517 X86::CondCode ReplacementCC = X86::COND_INVALID;
4518 if (MI) {
4519 switch (OldCC) {
4520 default: break;
4521 case X86::COND_A: case X86::COND_AE:
4522 case X86::COND_B: case X86::COND_BE:
4523 // CF is used, we can't perform this optimization.
4524 return false;
4525 case X86::COND_G: case X86::COND_GE:
4526 case X86::COND_L: case X86::COND_LE:
4527 case X86::COND_O: case X86::COND_NO:
4528 // If OF is used, the instruction needs to clear it like CmpZero does.
4529 if (!ClearsOverflowFlag)
4530 return false;
4531 break;
4532 case X86::COND_S: case X86::COND_NS:
4533 // If SF is used, but the instruction doesn't update the SF, then we
4534 // can't do the optimization.
4535 if (NoSignFlag)
4536 return false;
4537 break;
4538 }
4539
4540 // If we're updating the condition code check if we have to reverse the
4541 // condition.
4542 if (ShouldUpdateCC)
4543 switch (OldCC) {
4544 default:
4545 return false;
4546 case X86::COND_E:
4547 ReplacementCC = NewCC;
4548 break;
4549 case X86::COND_NE:
4550 ReplacementCC = GetOppositeBranchCondition(NewCC);
4551 break;
4552 }
4553 } else if (IsSwapped) {
4554 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
4555 // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
4556 // We swap the condition code and synthesize the new opcode.
4557 ReplacementCC = getSwappedCondition(OldCC);
4558 if (ReplacementCC == X86::COND_INVALID)
4559 return false;
4560 ShouldUpdateCC = true;
4561 } else if (ImmDelta != 0) {
4562 unsigned BitWidth = TRI->getRegSizeInBits(*MRI->getRegClass(SrcReg));
4563 // Shift amount for min/max constants to adjust for 8/16/32 instruction
4564 // sizes.
4565 switch (OldCC) {
4566 case X86::COND_L: // x <s (C + 1) --> x <=s C
4567 if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue)
4568 return false;
4569 ReplacementCC = X86::COND_LE;
4570 break;
4571 case X86::COND_B: // x <u (C + 1) --> x <=u C
4572 if (ImmDelta != 1 || CmpValue == 0)
4573 return false;
4574 ReplacementCC = X86::COND_BE;
4575 break;
4576 case X86::COND_GE: // x >=s (C + 1) --> x >s C
4577 if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue)
4578 return false;
4579 ReplacementCC = X86::COND_G;
4580 break;
4581 case X86::COND_AE: // x >=u (C + 1) --> x >u C
4582 if (ImmDelta != 1 || CmpValue == 0)
4583 return false;
4584 ReplacementCC = X86::COND_A;
4585 break;
4586 case X86::COND_G: // x >s (C - 1) --> x >=s C
4587 if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue)
4588 return false;
4589 ReplacementCC = X86::COND_GE;
4590 break;
4591 case X86::COND_A: // x >u (C - 1) --> x >=u C
4592 if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue)
4593 return false;
4594 ReplacementCC = X86::COND_AE;
4595 break;
4596 case X86::COND_LE: // x <=s (C - 1) --> x <s C
4597 if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue)
4598 return false;
4599 ReplacementCC = X86::COND_L;
4600 break;
4601 case X86::COND_BE: // x <=u (C - 1) --> x <u C
4602 if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue)
4603 return false;
4604 ReplacementCC = X86::COND_B;
4605 break;
4606 default:
4607 return false;
4608 }
4609 ShouldUpdateCC = true;
4610 }
4611
4612 if (ShouldUpdateCC && ReplacementCC != OldCC) {
4613 // Push the MachineInstr to OpsToUpdate.
4614 // If it is safe to remove CmpInstr, the condition code of these
4615 // instructions will be modified.
4616 OpsToUpdate.push_back(std::make_pair(&Instr, ReplacementCC));
4617 }
4618 if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
4619 // It is safe to remove CmpInstr if EFLAGS is updated again or killed.
4620 FlagsMayLiveOut = false;
4621 break;
4622 }
4623 }
4624
4625 // If we have to update users but EFLAGS is live-out abort, since we cannot
4626 // easily find all of the users.
4627 if ((MI != nullptr || ShouldUpdateCC) && FlagsMayLiveOut) {
4628 for (MachineBasicBlock *Successor : CmpMBB.successors())
4629 if (Successor->isLiveIn(X86::EFLAGS))
4630 return false;
4631 }
4632
4633 // The instruction to be updated is either Sub or MI.
4634 assert((MI == nullptr || Sub == nullptr) && "Should not have Sub and MI set")(static_cast <bool> ((MI == nullptr || Sub == nullptr) &&
"Should not have Sub and MI set") ? void (0) : __assert_fail
("(MI == nullptr || Sub == nullptr) && \"Should not have Sub and MI set\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4634, __extension__
__PRETTY_FUNCTION__))
;
4635 Sub = MI != nullptr ? MI : Sub;
4636 MachineBasicBlock *SubBB = Sub->getParent();
4637 // Move Movr0Inst to the appropriate place before Sub.
4638 if (Movr0Inst) {
4639 // Only move within the same block so we don't accidentally move to a
4640 // block with higher execution frequency.
4641 if (&CmpMBB != SubBB)
4642 return false;
4643 // Look backwards until we find a def that doesn't use the current EFLAGS.
4644 MachineBasicBlock::reverse_iterator InsertI = Sub,
4645 InsertE = Sub->getParent()->rend();
4646 for (; InsertI != InsertE; ++InsertI) {
4647 MachineInstr *Instr = &*InsertI;
4648 if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
4649 Instr->modifiesRegister(X86::EFLAGS, TRI)) {
4650 Movr0Inst->getParent()->remove(Movr0Inst);
4651 Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
4652 Movr0Inst);
4653 break;
4654 }
4655 }
4656 if (InsertI == InsertE)
4657 return false;
4658 }
4659
4660 // Make sure Sub instruction defines EFLAGS and mark the def live.
4661 MachineOperand *FlagDef = Sub->findRegisterDefOperand(X86::EFLAGS);
4662 assert(FlagDef && "Unable to locate a def EFLAGS operand")(static_cast <bool> (FlagDef && "Unable to locate a def EFLAGS operand"
) ? void (0) : __assert_fail ("FlagDef && \"Unable to locate a def EFLAGS operand\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4662, __extension__
__PRETTY_FUNCTION__))
;
4663 FlagDef->setIsDead(false);
4664
4665 CmpInstr.eraseFromParent();
4666
4667 // Modify the condition code of instructions in OpsToUpdate.
4668 for (auto &Op : OpsToUpdate) {
4669 Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1)
4670 .setImm(Op.second);
4671 }
4672 // Add EFLAGS to block live-ins between CmpBB and block of flags producer.
4673 for (MachineBasicBlock *MBB = &CmpMBB; MBB != SubBB;
4674 MBB = *MBB->pred_begin()) {
4675 assert(MBB->pred_size() == 1 && "Expected exactly one predecessor")(static_cast <bool> (MBB->pred_size() == 1 &&
"Expected exactly one predecessor") ? void (0) : __assert_fail
("MBB->pred_size() == 1 && \"Expected exactly one predecessor\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4675, __extension__
__PRETTY_FUNCTION__))
;
4676 if (!MBB->isLiveIn(X86::EFLAGS))
4677 MBB->addLiveIn(X86::EFLAGS);
4678 }
4679 return true;
4680}
4681
4682/// Try to remove the load by folding it to a register
4683/// operand at the use. We fold the load instructions if load defines a virtual
4684/// register, the virtual register is used once in the same BB, and the
4685/// instructions in-between do not load or store, and have no side effects.
4686MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI,
4687 const MachineRegisterInfo *MRI,
4688 Register &FoldAsLoadDefReg,
4689 MachineInstr *&DefMI) const {
4690 // Check whether we can move DefMI here.
4691 DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
4692 assert(DefMI)(static_cast <bool> (DefMI) ? void (0) : __assert_fail (
"DefMI", "llvm/lib/Target/X86/X86InstrInfo.cpp", 4692, __extension__
__PRETTY_FUNCTION__))
;
4693 bool SawStore = false;
4694 if (!DefMI->isSafeToMove(nullptr, SawStore))
4695 return nullptr;
4696
4697 // Collect information about virtual register operands of MI.
4698 SmallVector<unsigned, 1> SrcOperandIds;
4699 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
4700 MachineOperand &MO = MI.getOperand(i);
4701 if (!MO.isReg())
4702 continue;
4703 Register Reg = MO.getReg();
4704 if (Reg != FoldAsLoadDefReg)
4705 continue;
4706 // Do not fold if we have a subreg use or a def.
4707 if (MO.getSubReg() || MO.isDef())
4708 return nullptr;
4709 SrcOperandIds.push_back(i);
4710 }
4711 if (SrcOperandIds.empty())
4712 return nullptr;
4713
4714 // Check whether we can fold the def into SrcOperandId.
4715 if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandIds, *DefMI)) {
4716 FoldAsLoadDefReg = 0;
4717 return FoldMI;
4718 }
4719
4720 return nullptr;
4721}
4722
4723/// Expand a single-def pseudo instruction to a two-addr
4724/// instruction with two undef reads of the register being defined.
4725/// This is used for mapping:
4726/// %xmm4 = V_SET0
4727/// to:
4728/// %xmm4 = PXORrr undef %xmm4, undef %xmm4
4729///
4730static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
4731 const MCInstrDesc &Desc) {
4732 assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.")(static_cast <bool> (Desc.getNumOperands() == 3 &&
"Expected two-addr instruction.") ? void (0) : __assert_fail
("Desc.getNumOperands() == 3 && \"Expected two-addr instruction.\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4732, __extension__
__PRETTY_FUNCTION__))
;
4733 Register Reg = MIB.getReg(0);
4734 MIB->setDesc(Desc);
4735
4736 // MachineInstr::addOperand() will insert explicit operands before any
4737 // implicit operands.
4738 MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
4739 // But we don't trust that.
4740 assert(MIB.getReg(1) == Reg &&(static_cast <bool> (MIB.getReg(1) == Reg && MIB
.getReg(2) == Reg && "Misplaced operand") ? void (0) :
__assert_fail ("MIB.getReg(1) == Reg && MIB.getReg(2) == Reg && \"Misplaced operand\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4741, __extension__
__PRETTY_FUNCTION__))
4741 MIB.getReg(2) == Reg && "Misplaced operand")(static_cast <bool> (MIB.getReg(1) == Reg && MIB
.getReg(2) == Reg && "Misplaced operand") ? void (0) :
__assert_fail ("MIB.getReg(1) == Reg && MIB.getReg(2) == Reg && \"Misplaced operand\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4741, __extension__
__PRETTY_FUNCTION__))
;
4742 return true;
4743}
4744
4745/// Expand a single-def pseudo instruction to a two-addr
4746/// instruction with two %k0 reads.
4747/// This is used for mapping:
4748/// %k4 = K_SET1
4749/// to:
4750/// %k4 = KXNORrr %k0, %k0
4751static bool Expand2AddrKreg(MachineInstrBuilder &MIB, const MCInstrDesc &Desc,
4752 Register Reg) {
4753 assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.")(static_cast <bool> (Desc.getNumOperands() == 3 &&
"Expected two-addr instruction.") ? void (0) : __assert_fail
("Desc.getNumOperands() == 3 && \"Expected two-addr instruction.\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4753, __extension__
__PRETTY_FUNCTION__))
;
4754 MIB->setDesc(Desc);
4755 MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
4756 return true;
4757}
4758
4759static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,
4760 bool MinusOne) {
4761 MachineBasicBlock &MBB = *MIB->getParent();
4762 const DebugLoc &DL = MIB->getDebugLoc();
4763 Register Reg = MIB.getReg(0);
4764
4765 // Insert the XOR.
4766 BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg)
4767 .addReg(Reg, RegState::Undef)
4768 .addReg(Reg, RegState::Undef);
4769
4770 // Turn the pseudo into an INC or DEC.
4771 MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r));
4772 MIB.addReg(Reg);
4773
4774 return true;
4775}
4776
4777static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB,
4778 const TargetInstrInfo &TII,
4779 const X86Subtarget &Subtarget) {
4780 MachineBasicBlock &MBB = *MIB->getParent();
4781 const DebugLoc &DL = MIB->getDebugLoc();
4782 int64_t Imm = MIB->getOperand(1).getImm();
4783 assert(Imm != 0 && "Using push/pop for 0 is not efficient.")(static_cast <bool> (Imm != 0 && "Using push/pop for 0 is not efficient."
) ? void (0) : __assert_fail ("Imm != 0 && \"Using push/pop for 0 is not efficient.\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4783, __extension__
__PRETTY_FUNCTION__))
;
4784 MachineBasicBlock::iterator I = MIB.getInstr();
4785
4786 int StackAdjustment;
4787
4788 if (Subtarget.is64Bit()) {
4789 assert(MIB->getOpcode() == X86::MOV64ImmSExti8 ||(static_cast <bool> (MIB->getOpcode() == X86::MOV64ImmSExti8
|| MIB->getOpcode() == X86::MOV32ImmSExti8) ? void (0) : __assert_fail
("MIB->getOpcode() == X86::MOV64ImmSExti8 || MIB->getOpcode() == X86::MOV32ImmSExti8"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4790, __extension__
__PRETTY_FUNCTION__))
4790 MIB->getOpcode() == X86::MOV32ImmSExti8)(static_cast <bool> (MIB->getOpcode() == X86::MOV64ImmSExti8
|| MIB->getOpcode() == X86::MOV32ImmSExti8) ? void (0) : __assert_fail
("MIB->getOpcode() == X86::MOV64ImmSExti8 || MIB->getOpcode() == X86::MOV32ImmSExti8"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4790, __extension__
__PRETTY_FUNCTION__))
;
4791
4792 // Can't use push/pop lowering if the function might write to the red zone.
4793 X86MachineFunctionInfo *X86FI =
4794 MBB.getParent()->getInfo<X86MachineFunctionInfo>();
4795 if (X86FI->getUsesRedZone()) {
4796 MIB->setDesc(TII.get(MIB->getOpcode() ==
4797 X86::MOV32ImmSExti8 ? X86::MOV32ri : X86::MOV64ri));
4798 return true;
4799 }
4800
4801 // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and
4802 // widen the register if necessary.
4803 StackAdjustment = 8;
4804 BuildMI(MBB, I, DL, TII.get(X86::PUSH64i8)).addImm(Imm);
4805 MIB->setDesc(TII.get(X86::POP64r));
4806 MIB->getOperand(0)
4807 .setReg(getX86SubSuperRegister(MIB.getReg(0), 64));
4808 } else {
4809 assert(MIB->getOpcode() == X86::MOV32ImmSExti8)(static_cast <bool> (MIB->getOpcode() == X86::MOV32ImmSExti8
) ? void (0) : __assert_fail ("MIB->getOpcode() == X86::MOV32ImmSExti8"
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4809, __extension__
__PRETTY_FUNCTION__))
;
4810 StackAdjustment = 4;
4811 BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm);
4812 MIB->setDesc(TII.get(X86::POP32r));
4813 }
4814 MIB->RemoveOperand(1);
4815 MIB->addImplicitDefUseOperands(*MBB.getParent());
4816
4817 // Build CFI if necessary.
4818 MachineFunction &MF = *MBB.getParent();
4819 const X86FrameLowering *TFL = Subtarget.getFrameLowering();
4820 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
4821 bool NeedsDwarfCFI = !IsWin64Prologue && MF.needsFrameMoves();
4822 bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
4823 if (EmitCFI) {
4824 TFL->BuildCFI(MBB, I, DL,
4825 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
4826 TFL->BuildCFI(MBB, std::next(I), DL,
4827 MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment));
4828 }
4829
4830 return true;
4831}
4832
4833// LoadStackGuard has so far only been implemented for 64-bit MachO. Different
4834// code sequence is needed for other targets.
4835static void expandLoadStackGuard(MachineInstrBuilder &MIB,
4836 const TargetInstrInfo &TII) {
4837 MachineBasicBlock &MBB = *MIB->getParent();
4838 const DebugLoc &DL = MIB->getDebugLoc();
4839 Register Reg = MIB.getReg(0);
4840 const GlobalValue *GV =
4841 cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
4842 auto Flags = MachineMemOperand::MOLoad |
4843 MachineMemOperand::MODereferenceable |
4844 MachineMemOperand::MOInvariant;
4845 MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4846 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 8, Align(8));
4847 MachineBasicBlock::iterator I = MIB.getInstr();
4848
4849 BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1)
4850 .addReg(0).addGlobalAddress(GV, 0, X86II::MO_GOTPCREL).addReg(0)
4851 .addMemOperand(MMO);
4852 MIB->setDebugLoc(DL);
4853 MIB->setDesc(TII.get(X86::MOV64rm));
4854 MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0);
4855}
4856
4857static bool expandXorFP(MachineInstrBuilder &MIB, const TargetInstrInfo &TII) {
4858 MachineBasicBlock &MBB = *MIB->getParent();
4859 MachineFunction &MF = *MBB.getParent();
4860 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
4861 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4862 unsigned XorOp =
4863 MIB->getOpcode() == X86::XOR64_FP ? X86::XOR64rr : X86::XOR32rr;
4864 MIB->setDesc(TII.get(XorOp));
4865 MIB.addReg(TRI->getFrameRegister(MF), RegState::Undef);
4866 return true;
4867}
4868
4869// This is used to handle spills for 128/256-bit registers when we have AVX512,
4870// but not VLX. If it uses an extended register we need to use an instruction
4871// that loads the lower 128/256-bit, but is available with only AVX512F.
4872static bool expandNOVLXLoad(MachineInstrBuilder &MIB,
4873 const TargetRegisterInfo *TRI,
4874 const MCInstrDesc &LoadDesc,
4875 const MCInstrDesc &BroadcastDesc,
4876 unsigned SubIdx) {
4877 Register DestReg = MIB.getReg(0);
4878 // Check if DestReg is XMM16-31 or YMM16-31.
4879 if (TRI->getEncodingValue(DestReg) < 16) {
4880 // We can use a normal VEX encoded load.
4881 MIB->setDesc(LoadDesc);
4882 } else {
4883 // Use a 128/256-bit VBROADCAST instruction.
4884 MIB->setDesc(BroadcastDesc);
4885 // Change the destination to a 512-bit register.
4886 DestReg = TRI->getMatchingSuperReg(DestReg, SubIdx, &X86::VR512RegClass);
4887 MIB->getOperand(0).setReg(DestReg);
4888 }
4889 return true;
4890}
4891
4892// This is used to handle spills for 128/256-bit registers when we have AVX512,
4893// but not VLX. If it uses an extended register we need to use an instruction
4894// that stores the lower 128/256-bit, but is available with only AVX512F.
4895static bool expandNOVLXStore(MachineInstrBuilder &MIB,
4896 const TargetRegisterInfo *TRI,
4897 const MCInstrDesc &StoreDesc,
4898 const MCInstrDesc &ExtractDesc,
4899 unsigned SubIdx) {
4900 Register SrcReg = MIB.getReg(X86::AddrNumOperands);
4901 // Check if DestReg is XMM16-31 or YMM16-31.
4902 if (TRI->getEncodingValue(SrcReg) < 16) {
4903 // We can use a normal VEX encoded store.
4904 MIB->setDesc(StoreDesc);
4905 } else {
4906 // Use a VEXTRACTF instruction.
4907 MIB->setDesc(ExtractDesc);
4908 // Change the destination to a 512-bit register.
4909 SrcReg = TRI->getMatchingSuperReg(SrcReg, SubIdx, &X86::VR512RegClass);
4910 MIB->getOperand(X86::AddrNumOperands).setReg(SrcReg);
4911 MIB.addImm(0x0); // Append immediate to extract from the lower bits.
4912 }
4913
4914 return true;
4915}
4916
4917static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) {
4918 MIB->setDesc(Desc);
4919 int64_t ShiftAmt = MIB->getOperand(2).getImm();
4920 // Temporarily remove the immediate so we can add another source register.
4921 MIB->RemoveOperand(2);
4922 // Add the register. Don't copy the kill flag if there is one.
4923 MIB.addReg(MIB.getReg(1),
4924 getUndefRegState(MIB->getOperand(1).isUndef()));
4925 // Add back the immediate.
4926 MIB.addImm(ShiftAmt);
4927 return true;
4928}
4929
4930bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
4931 bool HasAVX = Subtarget.hasAVX();
4932 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4933 switch (MI.getOpcode()) {
4934 case X86::MOV32r0:
4935 return Expand2AddrUndef(MIB, get(X86::XOR32rr));
4936 case X86::MOV32r1:
4937 return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
4938 case X86::MOV32r_1:
4939 return expandMOV32r1(MIB, *this, /*MinusOne=*/ true);
4940 case X86::MOV32ImmSExti8:
4941 case X86::MOV64ImmSExti8:
4942 return ExpandMOVImmSExti8(MIB, *this, Subtarget);
4943 case X86::SETB_C32r:
4944 return Expand2AddrUndef(MIB, get(X86::SBB32rr));
4945 case X86::SETB_C64r:
4946 return Expand2AddrUndef(MIB, get(X86::SBB64rr));
4947 case X86::MMX_SET0:
4948 return Expand2AddrUndef(MIB, get(X86::MMX_PXORrr));
4949 case X86::V_SET0:
4950 case X86::FsFLD0SS:
4951 case X86::FsFLD0SD:
4952 case X86::FsFLD0F128:
4953 return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
4954 case X86::AVX_SET0: {
4955 assert(HasAVX && "AVX not supported")(static_cast <bool> (HasAVX && "AVX not supported"
) ? void (0) : __assert_fail ("HasAVX && \"AVX not supported\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 4955, __extension__
__PRETTY_FUNCTION__))
;
4956 const TargetRegisterInfo *TRI = &getRegisterInfo();
4957 Register SrcReg = MIB.getReg(0);
4958 Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
4959 MIB->getOperand(0).setReg(XReg);
4960 Expand2AddrUndef(MIB, get(X86::VXORPSrr));
4961 MIB.addReg(SrcReg, RegState::ImplicitDefine);
4962 return true;
4963 }
4964 case X86::AVX512_128_SET0:
4965 case X86::AVX512_FsFLD0SH:
4966 case X86::AVX512_FsFLD0SS:
4967 case X86::AVX512_FsFLD0SD:
4968 case X86::AVX512_FsFLD0F128: {
4969 bool HasVLX = Subtarget.hasVLX();
4970 Register SrcReg = MIB.getReg(0);
4971 const TargetRegisterInfo *TRI = &getRegisterInfo();
4972 if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
4973 return Expand2AddrUndef(MIB,
4974 get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
4975 // Extended register without VLX. Use a larger XOR.
4976 SrcReg =
4977 TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
4978 MIB->getOperand(0).setReg(SrcReg);
4979 return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
4980 }
4981 case X86::AVX512_256_SET0:
4982 case X86::AVX512_512_SET0: {
4983 bool HasVLX = Subtarget.hasVLX();
4984 Register SrcReg = MIB.getReg(0);
4985 const TargetRegisterInfo *TRI = &getRegisterInfo();
4986 if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) {
4987 Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
4988 MIB->getOperand(0).setReg(XReg);
4989 Expand2AddrUndef(MIB,
4990 get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
4991 MIB.addReg(SrcReg, RegState::ImplicitDefine);
4992 return true;
4993 }
4994 if (MI.getOpcode() == X86::AVX512_256_SET0) {
4995 // No VLX so we must reference a zmm.
4996 unsigned ZReg =
4997 TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
4998 MIB->getOperand(0).setReg(ZReg);
4999 }
5000 return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
5001 }
5002 case X86::V_SETALLONES:
5003 return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
5004 case X86::AVX2_SETALLONES:
5005 return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
5006 case X86::AVX1_SETALLONES: {
5007 Register Reg = MIB.getReg(0);
5008 // VCMPPSYrri with an immediate 0xf should produce VCMPTRUEPS.
5009 MIB->setDesc(get(X86::VCMPPSYrri));
5010 MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf);
5011 return true;
5012 }
5013 case X86::AVX512_512_SETALLONES: {
5014 Register Reg = MIB.getReg(0);
5015 MIB->setDesc(get(X86::VPTERNLOGDZrri));
5016 // VPTERNLOGD needs 3 register inputs and an immediate.
5017 // 0xff will return 1s for any input.
5018 MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef)
5019 .addReg(Reg, RegState::Undef).addImm(0xff);
5020 return true;
5021 }
5022 case X86::AVX512_512_SEXT_MASK_32:
5023 case X86::AVX512_512_SEXT_MASK_64: {
5024 Register Reg = MIB.getReg(0);
5025 Register MaskReg = MIB.getReg(1);
5026 unsigned MaskState = getRegState(MIB->getOperand(1));
5027 unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ?
5028 X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz;
5029 MI.RemoveOperand(1);
5030 MIB->setDesc(get(Opc));
5031 // VPTERNLOG needs 3 register inputs and an immediate.
5032 // 0xff will return 1s for any input.
5033 MIB.addReg(Reg, RegState::Undef).addReg(MaskReg, MaskState)
5034 .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xff);
5035 return true;
5036 }
5037 case X86::VMOVAPSZ128rm_NOVLX:
5038 return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm),
5039 get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
5040 case X86::VMOVUPSZ128rm_NOVLX:
5041 return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSrm),
5042 get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
5043 case X86::VMOVAPSZ256rm_NOVLX:
5044 return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSYrm),
5045 get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
5046 case X86::VMOVUPSZ256rm_NOVLX:
5047 return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSYrm),
5048 get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
5049 case X86::VMOVAPSZ128mr_NOVLX:
5050 return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSmr),
5051 get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
5052 case X86::VMOVUPSZ128mr_NOVLX:
5053 return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSmr),
5054 get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
5055 case X86::VMOVAPSZ256mr_NOVLX:
5056 return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSYmr),
5057 get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
5058 case X86::VMOVUPSZ256mr_NOVLX:
5059 return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
5060 get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
5061 case X86::MOV32ri64: {
5062 Register Reg = MIB.getReg(0);
5063 Register Reg32 = RI.getSubReg(Reg, X86::sub_32bit);
5064 MI.setDesc(get(X86::MOV32ri));
5065 MIB->getOperand(0).setReg(Reg32);
5066 MIB.addReg(Reg, RegState::ImplicitDefine);
5067 return true;
5068 }
5069
5070 // KNL does not recognize dependency-breaking idioms for mask registers,
5071 // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
5072 // Using %k0 as the undef input register is a performance heuristic based
5073 // on the assumption that %k0 is used less frequently than the other mask
5074 // registers, since it is not usable as a write mask.
5075 // FIXME: A more advanced approach would be to choose the best input mask
5076 // register based on context.
5077 case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0);
5078 case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0);
5079 case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0);
5080 case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0);
5081 case X86::KSET1D: return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0);
5082 case X86::KSET1Q: return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0);
5083 case TargetOpcode::LOAD_STACK_GUARD:
5084 expandLoadStackGuard(MIB, *this);
5085 return true;
5086 case X86::XOR64_FP:
5087 case X86::XOR32_FP:
5088 return expandXorFP(MIB, *this);
5089 case X86::SHLDROT32ri: return expandSHXDROT(MIB, get(X86::SHLD32rri8));
5090 case X86::SHLDROT64ri: return expandSHXDROT(MIB, get(X86::SHLD64rri8));
5091 case X86::SHRDROT32ri: return expandSHXDROT(MIB, get(X86::SHRD32rri8));
5092 case X86::SHRDROT64ri: return expandSHXDROT(MIB, get(X86::SHRD64rri8));
5093 case X86::ADD8rr_DB: MIB->setDesc(get(X86::OR8rr)); break;
5094 case X86::ADD16rr_DB: MIB->setDesc(get(X86::OR16rr)); break;
5095 case X86::ADD32rr_DB: MIB->setDesc(get(X86::OR32rr)); break;
5096 case X86::ADD64rr_DB: MIB->setDesc(get(X86::OR64rr)); break;
5097 case X86::ADD8ri_DB: MIB->setDesc(get(X86::OR8ri)); break;
5098 case X86::ADD16ri_DB: MIB->setDesc(get(X86::OR16ri)); break;
5099 case X86::ADD32ri_DB: MIB->setDesc(get(X86::OR32ri)); break;
5100 case X86::ADD64ri32_DB: MIB->setDesc(get(X86::OR64ri32)); break;
5101 case X86::ADD16ri8_DB: MIB->setDesc(get(X86::OR16ri8)); break;
5102 case X86::ADD32ri8_DB: MIB->setDesc(get(X86::OR32ri8)); break;
5103 case X86::ADD64ri8_DB: MIB->setDesc(get(X86::OR64ri8)); break;
5104 }
5105 return false;
5106}
5107
5108/// Return true for all instructions that only update
5109/// the first 32 or 64-bits of the destination register and leave the rest
5110/// unmodified. This can be used to avoid folding loads if the instructions
5111/// only update part of the destination register, and the non-updated part is
5112/// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these
5113/// instructions breaks the partial register dependency and it can improve
5114/// performance. e.g.:
5115///
5116/// movss (%rdi), %xmm0
5117/// cvtss2sd %xmm0, %xmm0
5118///
5119/// Instead of
5120/// cvtss2sd (%rdi), %xmm0
5121///
5122/// FIXME: This should be turned into a TSFlags.
5123///
5124static bool hasPartialRegUpdate(unsigned Opcode,
5125 const X86Subtarget &Subtarget,
5126 bool ForLoadFold = false) {
5127 switch (Opcode) {
5128 case X86::CVTSI2SSrr:
5129 case X86::CVTSI2SSrm:
5130 case X86::CVTSI642SSrr:
5131 case X86::CVTSI642SSrm:
5132 case X86::CVTSI2SDrr:
5133 case X86::CVTSI2SDrm:
5134 case X86::CVTSI642SDrr:
5135 case X86::CVTSI642SDrm:
5136 // Load folding won't effect the undef register update since the input is
5137 // a GPR.
5138 return !ForLoadFold;
5139 case X86::CVTSD2SSrr:
5140 case X86::CVTSD2SSrm:
5141 case X86::CVTSS2SDrr:
5142 case X86::CVTSS2SDrm:
5143 case X86::MOVHPDrm:
5144 case X86::MOVHPSrm:
5145 case X86::MOVLPDrm:
5146 case X86::MOVLPSrm:
5147 case X86::RCPSSr:
5148 case X86::RCPSSm:
5149 case X86::RCPSSr_Int:
5150 case X86::RCPSSm_Int:
5151 case X86::ROUNDSDr:
5152 case X86::ROUNDSDm:
5153 case X86::ROUNDSSr:
5154 case X86::ROUNDSSm:
5155 case X86::RSQRTSSr:
5156 case X86::RSQRTSSm:
5157 case X86::RSQRTSSr_Int:
5158 case X86::RSQRTSSm_Int:
5159 case X86::SQRTSSr:
5160 case X86::SQRTSSm:
5161 case X86::SQRTSSr_Int:
5162 case X86::SQRTSSm_Int:
5163 case X86::SQRTSDr:
5164 case X86::SQRTSDm:
5165 case X86::SQRTSDr_Int:
5166 case X86::SQRTSDm_Int:
5167 return true;
5168 // GPR
5169 case X86::POPCNT32rm:
5170 case X86::POPCNT32rr:
5171 case X86::POPCNT64rm:
5172 case X86::POPCNT64rr:
5173 return Subtarget.hasPOPCNTFalseDeps();
5174 case X86::LZCNT32rm:
5175 case X86::LZCNT32rr:
5176 case X86::LZCNT64rm:
5177 case X86::LZCNT64rr:
5178 case X86::TZCNT32rm:
5179 case X86::TZCNT32rr:
5180 case X86::TZCNT64rm:
5181 case X86::TZCNT64rr:
5182 return Subtarget.hasLZCNTFalseDeps();
5183 }
5184
5185 return false;
5186}
5187
5188/// Inform the BreakFalseDeps pass how many idle
5189/// instructions we would like before a partial register update.
5190unsigned X86InstrInfo::getPartialRegUpdateClearance(
5191 const MachineInstr &MI, unsigned OpNum,
5192 const TargetRegisterInfo *TRI) const {
5193 if (OpNum != 0 || !hasPartialRegUpdate(MI.getOpcode(), Subtarget))
5194 return 0;
5195
5196 // If MI is marked as reading Reg, the partial register update is wanted.
5197 const MachineOperand &MO = MI.getOperand(0);
5198 Register Reg = MO.getReg();
5199 if (Reg.isVirtual()) {
5200 if (MO.readsReg() || MI.readsVirtualRegister(Reg))
5201 return 0;
5202 } else {
5203 if (MI.readsRegister(Reg, TRI))
5204 return 0;
5205 }
5206
5207 // If any instructions in the clearance range are reading Reg, insert a
5208 // dependency breaking instruction, which is inexpensive and is likely to
5209 // be hidden in other instruction's cycles.
5210 return PartialRegUpdateClearance;
5211}
5212
5213// Return true for any instruction the copies the high bits of the first source
5214// operand into the unused high bits of the destination operand.
5215// Also returns true for instructions that have two inputs where one may
5216// be undef and we want it to use the same register as the other input.
5217static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
5218 bool ForLoadFold = false) {
5219 // Set the OpNum parameter to the first source operand.
5220 switch (Opcode) {
5221 case X86::MMX_PUNPCKHBWrr:
5222 case X86::MMX_PUNPCKHWDrr:
5223 case X86::MMX_PUNPCKHDQrr:
5224 case X86::MMX_PUNPCKLBWrr:
5225 case X86::MMX_PUNPCKLWDrr:
5226 case X86::MMX_PUNPCKLDQrr:
5227 case X86::MOVHLPSrr:
5228 case X86::PACKSSWBrr:
5229 case X86::PACKUSWBrr:
5230 case X86::PACKSSDWrr:
5231 case X86::PACKUSDWrr:
5232 case X86::PUNPCKHBWrr:
5233 case X86::PUNPCKLBWrr:
5234 case X86::PUNPCKHWDrr:
5235 case X86::PUNPCKLWDrr:
5236 case X86::PUNPCKHDQrr:
5237 case X86::PUNPCKLDQrr:
5238 case X86::PUNPCKHQDQrr:
5239 case X86::PUNPCKLQDQrr:
5240 case X86::SHUFPDrri:
5241 case X86::SHUFPSrri:
5242 // These instructions are sometimes used with an undef first or second
5243 // source. Return true here so BreakFalseDeps will assign this source to the
5244 // same register as the first source to avoid a false dependency.
5245 // Operand 1 of these instructions is tied so they're separate from their
5246 // VEX counterparts.
5247 return OpNum == 2 && !ForLoadFold;
5248
5249 case X86::VMOVLHPSrr:
5250 case X86::VMOVLHPSZrr:
5251 case X86::VPACKSSWBrr:
5252 case X86::VPACKUSWBrr:
5253 case X86::VPACKSSDWrr:
5254 case X86::VPACKUSDWrr:
5255 case X86::VPACKSSWBZ128rr:
5256 case X86::VPACKUSWBZ128rr:
5257 case X86::VPACKSSDWZ128rr:
5258 case X86::VPACKUSDWZ128rr:
5259 case X86::VPERM2F128rr:
5260 case X86::VPERM2I128rr:
5261 case X86::VSHUFF32X4Z256rri:
5262 case X86::VSHUFF32X4Zrri:
5263 case X86::VSHUFF64X2Z256rri:
5264 case X86::VSHUFF64X2Zrri:
5265 case X86::VSHUFI32X4Z256rri:
5266 case X86::VSHUFI32X4Zrri:
5267 case X86::VSHUFI64X2Z256rri:
5268 case X86::VSHUFI64X2Zrri:
5269 case X86::VPUNPCKHBWrr:
5270 case X86::VPUNPCKLBWrr:
5271 case X86::VPUNPCKHBWYrr:
5272 case X86::VPUNPCKLBWYrr:
5273 case X86::VPUNPCKHBWZ128rr:
5274 case X86::VPUNPCKLBWZ128rr:
5275 case X86::VPUNPCKHBWZ256rr:
5276 case X86::VPUNPCKLBWZ256rr:
5277 case X86::VPUNPCKHBWZrr:
5278 case X86::VPUNPCKLBWZrr:
5279 case X86::VPUNPCKHWDrr:
5280 case X86::VPUNPCKLWDrr:
5281 case X86::VPUNPCKHWDYrr:
5282 case X86::VPUNPCKLWDYrr:
5283 case X86::VPUNPCKHWDZ128rr:
5284 case X86::VPUNPCKLWDZ128rr:
5285 case X86::VPUNPCKHWDZ256rr:
5286 case X86::VPUNPCKLWDZ256rr:
5287 case X86::VPUNPCKHWDZrr:
5288 case X86::VPUNPCKLWDZrr:
5289 case X86::VPUNPCKHDQrr:
5290 case X86::VPUNPCKLDQrr:
5291 case X86::VPUNPCKHDQYrr:
5292 case X86::VPUNPCKLDQYrr:
5293 case X86::VPUNPCKHDQZ128rr:
5294 case X86::VPUNPCKLDQZ128rr:
5295 case X86::VPUNPCKHDQZ256rr:
5296 case X86::VPUNPCKLDQZ256rr:
5297 case X86::VPUNPCKHDQZrr:
5298 case X86::VPUNPCKLDQZrr:
5299 case X86::VPUNPCKHQDQrr:
5300 case X86::VPUNPCKLQDQrr:
5301 case X86::VPUNPCKHQDQYrr:
5302 case X86::VPUNPCKLQDQYrr:
5303 case X86::VPUNPCKHQDQZ128rr:
5304 case X86::VPUNPCKLQDQZ128rr:
5305 case X86::VPUNPCKHQDQZ256rr:
5306 case X86::VPUNPCKLQDQZ256rr:
5307 case X86::VPUNPCKHQDQZrr:
5308 case X86::VPUNPCKLQDQZrr:
5309 // These instructions are sometimes used with an undef first or second
5310 // source. Return true here so BreakFalseDeps will assign this source to the
5311 // same register as the first source to avoid a false dependency.
5312 return (OpNum == 1 || OpNum == 2) && !ForLoadFold;
5313
5314 case X86::VCVTSI2SSrr:
5315 case X86::VCVTSI2SSrm:
5316 case X86::VCVTSI2SSrr_Int:
5317 case X86::VCVTSI2SSrm_Int:
5318 case X86::VCVTSI642SSrr:
5319 case X86::VCVTSI642SSrm:
5320 case X86::VCVTSI642SSrr_Int:
5321 case X86::VCVTSI642SSrm_Int:
5322 case X86::VCVTSI2SDrr:
5323 case X86::VCVTSI2SDrm:
5324 case X86::VCVTSI2SDrr_Int:
5325 case X86::VCVTSI2SDrm_Int:
5326 case X86::VCVTSI642SDrr:
5327 case X86::VCVTSI642SDrm:
5328 case X86::VCVTSI642SDrr_Int:
5329 case X86::VCVTSI642SDrm_Int:
5330 // AVX-512
5331 case X86::VCVTSI2SSZrr:
5332 case X86::VCVTSI2SSZrm:
5333 case X86::VCVTSI2SSZrr_Int:
5334 case X86::VCVTSI2SSZrrb_Int:
5335 case X86::VCVTSI2SSZrm_Int:
5336 case X86::VCVTSI642SSZrr:
5337 case X86::VCVTSI642SSZrm:
5338 case X86::VCVTSI642SSZrr_Int:
5339 case X86::VCVTSI642SSZrrb_Int:
5340 case X86::VCVTSI642SSZrm_Int:
5341 case X86::VCVTSI2SDZrr:
5342 case X86::VCVTSI2SDZrm:
5343 case X86::VCVTSI2SDZrr_Int:
5344 case X86::VCVTSI2SDZrm_Int:
5345 case X86::VCVTSI642SDZrr:
5346 case X86::VCVTSI642SDZrm:
5347 case X86::VCVTSI642SDZrr_Int:
5348 case X86::VCVTSI642SDZrrb_Int:
5349 case X86::VCVTSI642SDZrm_Int:
5350 case X86::VCVTUSI2SSZrr:
5351 case X86::VCVTUSI2SSZrm:
5352 case X86::VCVTUSI2SSZrr_Int:
5353 case X86::VCVTUSI2SSZrrb_Int:
5354 case X86::VCVTUSI2SSZrm_Int:
5355 case X86::VCVTUSI642SSZrr:
5356 case X86::VCVTUSI642SSZrm:
5357 case X86::VCVTUSI642SSZrr_Int:
5358 case X86::VCVTUSI642SSZrrb_Int:
5359 case X86::VCVTUSI642SSZrm_Int:
5360 case X86::VCVTUSI2SDZrr:
5361 case X86::VCVTUSI2SDZrm:
5362 case X86::VCVTUSI2SDZrr_Int:
5363 case X86::VCVTUSI2SDZrm_Int:
5364 case X86::VCVTUSI642SDZrr:
5365 case X86::VCVTUSI642SDZrm:
5366 case X86::VCVTUSI642SDZrr_Int:
5367 case X86::VCVTUSI642SDZrrb_Int:
5368 case X86::VCVTUSI642SDZrm_Int:
5369 case X86::VCVTSI2SHZrr:
5370 case X86::VCVTSI2SHZrm:
5371 case X86::VCVTSI2SHZrr_Int:
5372 case X86::VCVTSI2SHZrrb_Int:
5373 case X86::VCVTSI2SHZrm_Int:
5374 case X86::VCVTSI642SHZrr:
5375 case X86::VCVTSI642SHZrm:
5376 case X86::VCVTSI642SHZrr_Int:
5377 case X86::VCVTSI642SHZrrb_Int:
5378 case X86::VCVTSI642SHZrm_Int:
5379 case X86::VCVTUSI2SHZrr:
5380 case X86::VCVTUSI2SHZrm:
5381 case X86::VCVTUSI2SHZrr_Int:
5382 case X86::VCVTUSI2SHZrrb_Int:
5383 case X86::VCVTUSI2SHZrm_Int:
5384 case X86::VCVTUSI642SHZrr:
5385 case X86::VCVTUSI642SHZrm:
5386 case X86::VCVTUSI642SHZrr_Int:
5387 case X86::VCVTUSI642SHZrrb_Int:
5388 case X86::VCVTUSI642SHZrm_Int:
5389 // Load folding won't effect the undef register update since the input is
5390 // a GPR.
5391 return OpNum == 1 && !ForLoadFold;
5392 case X86::VCVTSD2SSrr:
5393 case X86::VCVTSD2SSrm:
5394 case X86::VCVTSD2SSrr_Int:
5395 case X86::VCVTSD2SSrm_Int:
5396 case X86::VCVTSS2SDrr:
5397 case X86::VCVTSS2SDrm:
5398 case X86::VCVTSS2SDrr_Int:
5399 case X86::VCVTSS2SDrm_Int:
5400 case X86::VRCPSSr:
5401 case X86::VRCPSSr_Int:
5402 case X86::VRCPSSm:
5403 case X86::VRCPSSm_Int:
5404 case X86::VROUNDSDr:
5405 case X86::VROUNDSDm:
5406 case X86::VROUNDSDr_Int:
5407 case X86::VROUNDSDm_Int:
5408 case X86::VROUNDSSr:
5409 case X86::VROUNDSSm:
5410 case X86::VROUNDSSr_Int:
5411 case X86::VROUNDSSm_Int:
5412 case X86::VRSQRTSSr:
5413 case X86::VRSQRTSSr_Int:
5414 case X86::VRSQRTSSm:
5415 case X86::VRSQRTSSm_Int:
5416 case X86::VSQRTSSr:
5417 case X86::VSQRTSSr_Int:
5418 case X86::VSQRTSSm:
5419 case X86::VSQRTSSm_Int:
5420 case X86::VSQRTSDr:
5421 case X86::VSQRTSDr_Int:
5422 case X86::VSQRTSDm:
5423 case X86::VSQRTSDm_Int:
5424 // AVX-512
5425 case X86::VCVTSD2SSZrr:
5426 case X86::VCVTSD2SSZrr_Int:
5427 case X86::VCVTSD2SSZrrb_Int:
5428 case X86::VCVTSD2SSZrm:
5429 case X86::VCVTSD2SSZrm_Int:
5430 case X86::VCVTSS2SDZrr:
5431 case X86::VCVTSS2SDZrr_Int:
5432 case X86::VCVTSS2SDZrrb_Int:
5433 case X86::VCVTSS2SDZrm:
5434 case X86::VCVTSS2SDZrm_Int:
5435 case X86::VGETEXPSDZr:
5436 case X86::VGETEXPSDZrb:
5437 case X86::VGETEXPSDZm:
5438 case X86::VGETEXPSSZr:
5439 case X86::VGETEXPSSZrb:
5440 case X86::VGETEXPSSZm:
5441 case X86::VGETMANTSDZrri:
5442 case X86::VGETMANTSDZrrib:
5443 case X86::VGETMANTSDZrmi:
5444 case X86::VGETMANTSSZrri:
5445 case X86::VGETMANTSSZrrib:
5446 case X86::VGETMANTSSZrmi:
5447 case X86::VRNDSCALESDZr:
5448 case X86::VRNDSCALESDZr_Int:
5449 case X86::VRNDSCALESDZrb_Int:
5450 case X86::VRNDSCALESDZm:
5451 case X86::VRNDSCALESDZm_Int:
5452 case X86::VRNDSCALESSZr:
5453 case X86::VRNDSCALESSZr_Int:
5454 case X86::VRNDSCALESSZrb_Int:
5455 case X86::VRNDSCALESSZm:
5456 case X86::VRNDSCALESSZm_Int:
5457 case X86::VRCP14SDZrr:
5458 case X86::VRCP14SDZrm:
5459 case X86::VRCP14SSZrr:
5460 case X86::VRCP14SSZrm:
5461 case X86::VRCPSHZrr:
5462 case X86::VRCPSHZrm:
5463 case X86::VRSQRTSHZrr:
5464 case X86::VRSQRTSHZrm:
5465 case X86::VREDUCESHZrmi:
5466 case X86::VREDUCESHZrri:
5467 case X86::VREDUCESHZrrib:
5468 case X86::VGETEXPSHZr:
5469 case X86::VGETEXPSHZrb:
5470 case X86::VGETEXPSHZm:
5471 case X86::VGETMANTSHZrri:
5472 case X86::VGETMANTSHZrrib:
5473 case X86::VGETMANTSHZrmi:
5474 case X86::VRNDSCALESHZr:
5475 case X86::VRNDSCALESHZr_Int:
5476 case X86::VRNDSCALESHZrb_Int:
5477 case X86::VRNDSCALESHZm:
5478 case X86::VRNDSCALESHZm_Int:
5479 case X86::VSQRTSHZr:
5480 case X86::VSQRTSHZr_Int:
5481 case X86::VSQRTSHZrb_Int:
5482 case X86::VSQRTSHZm:
5483 case X86::VSQRTSHZm_Int:
5484 case X86::VRCP28SDZr:
5485 case X86::VRCP28SDZrb:
5486 case X86::VRCP28SDZm:
5487 case X86::VRCP28SSZr:
5488 case X86::VRCP28SSZrb:
5489 case X86::VRCP28SSZm:
5490 case X86::VREDUCESSZrmi:
5491 case X86::VREDUCESSZrri:
5492 case X86::VREDUCESSZrrib:
5493 case X86::VRSQRT14SDZrr:
5494 case X86::VRSQRT14SDZrm:
5495 case X86::VRSQRT14SSZrr:
5496 case X86::VRSQRT14SSZrm:
5497 case X86::VRSQRT28SDZr:
5498 case X86::VRSQRT28SDZrb:
5499 case X86::VRSQRT28SDZm:
5500 case X86::VRSQRT28SSZr:
5501 case X86::VRSQRT28SSZrb:
5502 case X86::VRSQRT28SSZm:
5503 case X86::VSQRTSSZr:
5504 case X86::VSQRTSSZr_Int:
5505 case X86::VSQRTSSZrb_Int:
5506 case X86::VSQRTSSZm:
5507 case X86::VSQRTSSZm_Int:
5508 case X86::VSQRTSDZr:
5509 case X86::VSQRTSDZr_Int:
5510 case X86::VSQRTSDZrb_Int:
5511 case X86::VSQRTSDZm:
5512 case X86::VSQRTSDZm_Int:
5513 case X86::VCVTSD2SHZrr:
5514 case X86::VCVTSD2SHZrr_Int:
5515 case X86::VCVTSD2SHZrrb_Int:
5516 case X86::VCVTSD2SHZrm:
5517 case X86::VCVTSD2SHZrm_Int:
5518 case X86::VCVTSS2SHZrr:
5519 case X86::VCVTSS2SHZrr_Int:
5520 case X86::VCVTSS2SHZrrb_Int:
5521 case X86::VCVTSS2SHZrm:
5522 case X86::VCVTSS2SHZrm_Int:
5523 case X86::VCVTSH2SDZrr:
5524 case X86::VCVTSH2SDZrr_Int:
5525 case X86::VCVTSH2SDZrrb_Int:
5526 case X86::VCVTSH2SDZrm:
5527 case X86::VCVTSH2SDZrm_Int:
5528 case X86::VCVTSH2SSZrr:
5529 case X86::VCVTSH2SSZrr_Int:
5530 case X86::VCVTSH2SSZrrb_Int:
5531 case X86::VCVTSH2SSZrm:
5532 case X86::VCVTSH2SSZrm_Int:
5533 return OpNum == 1;
5534 case X86::VMOVSSZrrk:
5535 case X86::VMOVSDZrrk:
5536 return OpNum == 3 && !ForLoadFold;
5537 case X86::VMOVSSZrrkz:
5538 case X86::VMOVSDZrrkz:
5539 return OpNum == 2 && !ForLoadFold;
5540 }
5541
5542 return false;
5543}
5544
5545/// Inform the BreakFalseDeps pass how many idle instructions we would like
5546/// before certain undef register reads.
5547///
5548/// This catches the VCVTSI2SD family of instructions:
5549///
5550/// vcvtsi2sdq %rax, undef %xmm0, %xmm14
5551///
5552/// We should to be careful *not* to catch VXOR idioms which are presumably
5553/// handled specially in the pipeline:
5554///
5555/// vxorps undef %xmm1, undef %xmm1, %xmm1
5556///
5557/// Like getPartialRegUpdateClearance, this makes a strong assumption that the
5558/// high bits that are passed-through are not live.
5559unsigned
5560X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned OpNum,
5561 const TargetRegisterInfo *TRI) const {
5562 const MachineOperand &MO = MI.getOperand(OpNum);
5563 if (Register::isPhysicalRegister(MO.getReg()) &&
5564 hasUndefRegUpdate(MI.getOpcode(), OpNum))
5565 return UndefRegClearance;
5566
5567 return 0;
5568}
5569
5570void X86InstrInfo::breakPartialRegDependency(
5571 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
5572 Register Reg = MI.getOperand(OpNum).getReg();
5573 // If MI kills this register, the false dependence is already broken.
5574 if (MI.killsRegister(Reg, TRI))
5575 return;
5576
5577 if (X86::VR128RegClass.contains(Reg)) {
5578 // These instructions are all floating point domain, so xorps is the best
5579 // choice.
5580 unsigned Opc = Subtarget.hasAVX() ? X86::VXORPSrr : X86::XORPSrr;
5581 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(Opc), Reg)
5582 .addReg(Reg, RegState::Undef)
5583 .addReg(Reg, RegState::Undef);
5584 MI.addRegisterKilled(Reg, TRI, true);
5585 } else if (X86::VR256RegClass.contains(Reg)) {
5586 // Use vxorps to clear the full ymm register.
5587 // It wants to read and write the xmm sub-register.
5588 Register XReg = TRI->getSubReg(Reg, X86::sub_xmm);
5589 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VXORPSrr), XReg)
5590 .addReg(XReg, RegState::Undef)
5591 .addReg(XReg, RegState::Undef)
5592 .addReg(Reg, RegState::ImplicitDefine);
5593 MI.addRegisterKilled(Reg, TRI, true);
5594 } else if (X86::GR64RegClass.contains(Reg)) {
5595 // Using XOR32rr because it has shorter encoding and zeros up the upper bits
5596 // as well.
5597 Register XReg = TRI->getSubReg(Reg, X86::sub_32bit);
5598 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), XReg)
5599 .addReg(XReg, RegState::Undef)
5600 .addReg(XReg, RegState::Undef)
5601 .addReg(Reg, RegState::ImplicitDefine);
5602 MI.addRegisterKilled(Reg, TRI, true);
5603 } else if (X86::GR32RegClass.contains(Reg)) {
5604 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), Reg)
5605 .addReg(Reg, RegState::Undef)
5606 .addReg(Reg, RegState::Undef);
5607 MI.addRegisterKilled(Reg, TRI, true);
5608 }
5609}
5610
5611static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs,
5612 int PtrOffset = 0) {
5613 unsigned NumAddrOps = MOs.size();
5614
5615 if (NumAddrOps < 4) {
5616 // FrameIndex only - add an immediate offset (whether its zero or not).
5617 for (unsigned i = 0; i != NumAddrOps; ++i)
5618 MIB.add(MOs[i]);
5619 addOffset(MIB, PtrOffset);
5620 } else {
5621 // General Memory Addressing - we need to add any offset to an existing
5622 // offset.
5623 assert(MOs.size() == 5 && "Unexpected memory operand list length")(static_cast <bool> (MOs.size() == 5 && "Unexpected memory operand list length"
) ? void (0) : __assert_fail ("MOs.size() == 5 && \"Unexpected memory operand list length\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 5623, __extension__
__PRETTY_FUNCTION__))
;
5624 for (unsigned i = 0; i != NumAddrOps; ++i) {
5625 const MachineOperand &MO = MOs[i];
5626 if (i == 3 && PtrOffset != 0) {
5627 MIB.addDisp(MO, PtrOffset);
5628 } else {
5629 MIB.add(MO);
5630 }
5631 }
5632 }
5633}
5634
5635static void updateOperandRegConstraints(MachineFunction &MF,
5636 MachineInstr &NewMI,
5637 const TargetInstrInfo &TII) {
5638 MachineRegisterInfo &MRI = MF.getRegInfo();
5639 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
5640
5641 for (int Idx : llvm::seq<int>(0, NewMI.getNumOperands())) {
5642 MachineOperand &MO = NewMI.getOperand(Idx);
5643 // We only need to update constraints on virtual register operands.
5644 if (!MO.isReg())
5645 continue;
5646 Register Reg = MO.getReg();
5647 if (!Reg.isVirtual())
5648 continue;
5649
5650 auto *NewRC = MRI.constrainRegClass(
5651 Reg, TII.getRegClass(NewMI.getDesc(), Idx, &TRI, MF));
5652 if (!NewRC) {
5653 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-instr-info")) { dbgs() << "WARNING: Unable to update register constraint for operand "
<< Idx << " of instruction:\n"; NewMI.dump(); dbgs
() << "\n"; } } while (false)
5654 dbgs() << "WARNING: Unable to update register constraint for operand "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-instr-info")) { dbgs() << "WARNING: Unable to update register constraint for operand "
<< Idx << " of instruction:\n"; NewMI.dump(); dbgs
() << "\n"; } } while (false)
5655 << Idx << " of instruction:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-instr-info")) { dbgs() << "WARNING: Unable to update register constraint for operand "
<< Idx << " of instruction:\n"; NewMI.dump(); dbgs
() << "\n"; } } while (false)
5656 NewMI.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-instr-info")) { dbgs() << "WARNING: Unable to update register constraint for operand "
<< Idx << " of instruction:\n"; NewMI.dump(); dbgs
() << "\n"; } } while (false)
;
5657 }
5658 }
5659}
5660
5661static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
5662 ArrayRef<MachineOperand> MOs,
5663 MachineBasicBlock::iterator InsertPt,
5664 MachineInstr &MI,
5665 const TargetInstrInfo &TII) {
5666 // Create the base instruction with the memory operand as the first part.
5667 // Omit the implicit operands, something BuildMI can't do.
5668 MachineInstr *NewMI =
5669 MF.CreateMachineInstr(TII.get(Opcode), MI.getDebugLoc(), true);
5670 MachineInstrBuilder MIB(MF, NewMI);
5671 addOperands(MIB, MOs);
5672
5673 // Loop over the rest of the ri operands, converting them over.
5674 unsigned NumOps = MI.getDesc().getNumOperands() - 2;
5675 for (unsigned i = 0; i != NumOps; ++i) {
5676 MachineOperand &MO = MI.getOperand(i + 2);
5677 MIB.add(MO);
5678 }
5679 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), NumOps + 2))
5680 MIB.add(MO);
5681
5682 updateOperandRegConstraints(MF, *NewMI, TII);
5683
5684 MachineBasicBlock *MBB = InsertPt->getParent();
5685 MBB->insert(InsertPt, NewMI);
5686
5687 return MIB;
5688}
5689
5690static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
5691 unsigned OpNo, ArrayRef<MachineOperand> MOs,
5692 MachineBasicBlock::iterator InsertPt,
5693 MachineInstr &MI, const TargetInstrInfo &TII,
5694 int PtrOffset = 0) {
5695 // Omit the implicit operands, something BuildMI can't do.
5696 MachineInstr *NewMI =
5697 MF.CreateMachineInstr(TII.get(Opcode), MI.getDebugLoc(), true);
5698 MachineInstrBuilder MIB(MF, NewMI);
5699
5700 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
5701 MachineOperand &MO = MI.getOperand(i);
5702 if (i == OpNo) {
5703 assert(MO.isReg() && "Expected to fold into reg operand!")(static_cast <bool> (MO.isReg() && "Expected to fold into reg operand!"
) ? void (0) : __assert_fail ("MO.isReg() && \"Expected to fold into reg operand!\""
, "llvm/lib/Target/X86/X86InstrInfo.cpp", 5703, __extension__
__PRETTY_FUNCTION__))
;
5704 addOperands(MIB, MOs, PtrOffset);
5705 } else {
5706 MIB.add(MO);
5707 }
5708 }
5709
5710 updateOperandRegConstraints(MF, *NewMI, TII);
5711
5712 // Copy the NoFPExcept flag from the instruction we're fusing.
5713 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
5714 NewMI->setFlag(MachineInstr::MIFlag::NoFPExcept);
5715
5716 MachineBasicBlock *MBB = InsertPt->getParent();
5717 MBB->insert(InsertPt, NewMI);
5718
5719 return MIB;
5720}
5721
5722static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
5723 ArrayRef<MachineOperand> MOs,
5724 MachineBasicBlock::iterator InsertPt,
5725 MachineInstr &MI) {
5726 MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
5727 MI.getDebugLoc(), TII.get(Opcode));
5728 addOperands(MIB, MOs);
5729 return MIB.addImm(0);
5730}
5731
5732MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
5733 MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
5734 ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
5735 unsigned Size, Align Alignment) const {
5736 switch (MI.getOpcode()) {
5737 case X86::INSERTPSrr:
5738 case X86::VINSERTPSrr:
5739 case X86::VINSERTPSZrr:
5740 // Attempt to convert the load of inserted vector into a fold load
5741 // of a single float.
5742 if (OpNum == 2) {
5743 unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm();
5744 unsigned ZMask = Imm & 15;
5745 unsigned DstIdx = (Imm >> 4) & 3;
5746 unsigned SrcIdx = (Imm >> 6) & 3;
5747
5748 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
5749 const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
5750 unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
5751 if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment >= Align(4)) {
5752 int PtrOffset = SrcIdx * 4;
5753 unsigned NewImm = (DstIdx << 4) | ZMask;