LLVM 23.0.0git
X86FixupInstTuning.cpp
Go to the documentation of this file.
1//===-- X86FixupInstTunings.cpp - replace instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file does a tuning pass replacing slower machine instructions
10// with faster ones. We do this here, as opposed to during normal ISel, as
11// attempting to get the "right" instruction can break patterns. This pass
12// is not meant search for special cases where an instruction can be transformed
13// to another, it is only meant to do transformations where the old instruction
14// is always replacable with the new instructions. For example:
15//
16// `vpermq ymm` -> `vshufd ymm`
17// -- BAD, not always valid (lane cross/non-repeated mask)
18//
19// `vpermilps ymm` -> `vshufd ymm`
20// -- GOOD, always replaceable
21//
22//===----------------------------------------------------------------------===//
23
24#include "X86.h"
25#include "X86InstrInfo.h"
26#include "X86Subtarget.h"
27#include "llvm/ADT/Statistic.h"
32#include "llvm/IR/Analysis.h"
33
34using namespace llvm;
35
36#define DEBUG_TYPE "x86-fixup-inst-tuning"
37
38STATISTIC(NumInstChanges, "Number of instructions changes");
39
40namespace {
41class X86FixupInstTuningImpl {
42public:
43 bool runOnMachineFunction(MachineFunction &MF);
44
45private:
46 bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
48
49 const X86InstrInfo *TII = nullptr;
50 const X86Subtarget *ST = nullptr;
51 const MCSchedModel *SM = nullptr;
52};
53
54class X86FixupInstTuningLegacy : public MachineFunctionPass {
55public:
56 static char ID;
57
58 X86FixupInstTuningLegacy() : MachineFunctionPass(ID) {}
59
60 StringRef getPassName() const override { return "X86 Fixup Inst Tuning"; }
61
62 bool runOnMachineFunction(MachineFunction &MF) override;
63 bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
65
66 // This pass runs after regalloc and doesn't support VReg operands.
67 MachineFunctionProperties getRequiredProperties() const override {
68 return MachineFunctionProperties().setNoVRegs();
69 }
70};
71} // end anonymous namespace
72
73char X86FixupInstTuningLegacy ::ID = 0;
74
75INITIALIZE_PASS(X86FixupInstTuningLegacy, DEBUG_TYPE, DEBUG_TYPE, false, false)
76
78 return new X86FixupInstTuningLegacy();
79}
80
81template <typename T>
82static std::optional<bool> CmpOptionals(T NewVal, T CurVal) {
83 if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal)
84 return *NewVal < *CurVal;
85
86 return std::nullopt;
87}
88
89bool X86FixupInstTuningImpl::processInstruction(
92 MachineInstr &MI = *I;
93 unsigned Opc = MI.getOpcode();
94 unsigned NumOperands = MI.getDesc().getNumOperands();
95 bool OptSize = MF.getFunction().hasOptSize();
96
97 auto GetInstTput = [&](unsigned Opcode) -> std::optional<double> {
98 // We already checked that SchedModel exists in `NewOpcPreferable`.
100 *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));
101 };
102
103 auto GetInstLat = [&](unsigned Opcode) -> std::optional<double> {
104 // We already checked that SchedModel exists in `NewOpcPreferable`.
106 *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));
107 };
108
109 auto GetInstSize = [&](unsigned Opcode) -> std::optional<unsigned> {
110 if (unsigned Size = TII->get(Opcode).getSize())
111 return Size;
112 // Zero size means we where unable to compute it.
113 return std::nullopt;
114 };
115
116 auto NewOpcPreferable = [&](unsigned NewOpc,
117 bool ReplaceInTie = true) -> bool {
118 std::optional<bool> Res;
119 if (SM->hasInstrSchedModel()) {
120 // Compare tput -> lat -> code size.
121 Res = CmpOptionals(GetInstTput(NewOpc), GetInstTput(Opc));
122 if (Res.has_value())
123 return *Res;
124
125 Res = CmpOptionals(GetInstLat(NewOpc), GetInstLat(Opc));
126 if (Res.has_value())
127 return *Res;
128 }
129
130 Res = CmpOptionals(GetInstSize(Opc), GetInstSize(NewOpc));
131 if (Res.has_value())
132 return *Res;
133
134 // We either have either were unable to get tput/lat/codesize or all values
135 // were equal. Return specified option for a tie.
136 return ReplaceInTie;
137 };
138
139 // `vpermilpd r, i` -> `vshufpd r, r, i`
140 // `vpermilpd r, i, k` -> `vshufpd r, r, i, k`
141 // `vshufpd` is always as fast or faster than `vpermilpd` and takes
142 // 1 less byte of code size for VEX and EVEX encoding.
143 auto ProcessVPERMILPDri = [&](unsigned NewOpc) -> bool {
144 if (!NewOpcPreferable(NewOpc))
145 return false;
146 LLVM_DEBUG(dbgs() << "Replacing: " << MI);
147 {
148 unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
149 MI.removeOperand(NumOperands - 1);
150 MI.addOperand(MI.getOperand(NumOperands - 2));
151 MI.setDesc(TII->get(NewOpc));
152 MI.addOperand(MachineOperand::CreateImm(MaskImm));
153 }
154 LLVM_DEBUG(dbgs() << " With: " << MI);
155 return true;
156 };
157
158 // `vpermilps r, i` -> `vshufps r, r, i`
159 // `vpermilps r, i, k` -> `vshufps r, r, i, k`
160 // `vshufps` is always as fast or faster than `vpermilps` and takes
161 // 1 less byte of code size for VEX and EVEX encoding.
162 auto ProcessVPERMILPSri = [&](unsigned NewOpc) -> bool {
163 if (!NewOpcPreferable(NewOpc))
164 return false;
165 LLVM_DEBUG(dbgs() << "Replacing: " << MI);
166 {
167 unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
168 MI.removeOperand(NumOperands - 1);
169 MI.addOperand(MI.getOperand(NumOperands - 2));
170 MI.setDesc(TII->get(NewOpc));
171 MI.addOperand(MachineOperand::CreateImm(MaskImm));
172 }
173 LLVM_DEBUG(dbgs() << " With: " << MI);
174 return true;
175 };
176
177 // `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles.
178 // `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less
179 // byte of code size.
180 auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool {
181 // TODO: Might be work adding bypass delay if -Os/-Oz is enabled as
182 // `vpshufd` saves a byte of code size.
183 if (!ST->hasNoDomainDelayShuffle() ||
184 !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
185 return false;
186 LLVM_DEBUG(dbgs() << "Replacing: " << MI);
187 {
188 MI.setDesc(TII->get(NewOpc));
189 }
190 LLVM_DEBUG(dbgs() << " With: " << MI);
191 return true;
192 };
193
194 // `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00`
195 // `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff`
196 // `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00`
197 // `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff`
198 // `vunpcklpd r, m` -> `vunpcklqdq r, m, k`
199 // `vunpckhpd r, m` -> `vunpckhqdq r, m, k`
200 // `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k`
201 // `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k`
202 // 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd`
203 // -> `vunpck{l|h}qdq`
204 // 2) If `vshufpd` faster than `vunpck{l|h}pd`
205 // -> `vshufpd`
206 //
207 // `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay)
208 auto ProcessUNPCK = [&](unsigned NewOpc, unsigned MaskImm) -> bool {
209 if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
210 return false;
211 LLVM_DEBUG(dbgs() << "Replacing: " << MI);
212 {
213 MI.setDesc(TII->get(NewOpc));
214 MI.addOperand(MachineOperand::CreateImm(MaskImm));
215 }
216 LLVM_DEBUG(dbgs() << " With: " << MI);
217 return true;
218 };
219
220 auto ProcessUNPCKToIntDomain = [&](unsigned NewOpc) -> bool {
221 // TODO it may be worth it to set ReplaceInTie to `true` as there is no real
222 // downside to the integer unpck, but if someone doesn't specify exact
223 // target we won't find it faster.
224 if (!ST->hasNoDomainDelayShuffle() ||
225 !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
226 return false;
227 LLVM_DEBUG(dbgs() << "Replacing: " << MI);
228 {
229 MI.setDesc(TII->get(NewOpc));
230 }
231 LLVM_DEBUG(dbgs() << " With: " << MI);
232 return true;
233 };
234
235 auto ProcessUNPCKLPDrr = [&](unsigned NewOpcIntDomain,
236 unsigned NewOpc) -> bool {
237 if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
238 return true;
239 return ProcessUNPCK(NewOpc, 0x00);
240 };
241 auto ProcessUNPCKHPDrr = [&](unsigned NewOpcIntDomain,
242 unsigned NewOpc) -> bool {
243 if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
244 return true;
245 return ProcessUNPCK(NewOpc, 0xff);
246 };
247
248 auto ProcessUNPCKPDrm = [&](unsigned NewOpcIntDomain) -> bool {
249 return ProcessUNPCKToIntDomain(NewOpcIntDomain);
250 };
251
252 auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool {
253 return ProcessUNPCKToIntDomain(NewOpc);
254 };
255
256 auto ProcessBLENDWToBLENDD = [&](unsigned MovOpc, unsigned NumElts) -> bool {
257 if (!ST->hasAVX2() || !NewOpcPreferable(MovOpc))
258 return false;
259 // Convert to VPBLENDD if scaling the VPBLENDW mask down/up loses no bits.
260 APInt MaskW =
261 APInt(8, MI.getOperand(NumOperands - 1).getImm(), /*IsSigned=*/false);
262 APInt MaskD = APIntOps::ScaleBitMask(MaskW, 4, /*MatchAllBits=*/true);
263 if (MaskW != APIntOps::ScaleBitMask(MaskD, 8, /*MatchAllBits=*/true))
264 return false;
265 APInt NewMaskD = APInt::getSplat(NumElts, MaskD);
266 LLVM_DEBUG(dbgs() << "Replacing: " << MI);
267 {
268 MI.setDesc(TII->get(MovOpc));
269 MI.removeOperand(NumOperands - 1);
270 MI.addOperand(MachineOperand::CreateImm(NewMaskD.getZExtValue()));
271 }
272 LLVM_DEBUG(dbgs() << " With: " << MI);
273 return true;
274 };
275
276 auto ProcessBLENDToMOV = [&](unsigned MovOpc, unsigned Mask,
277 unsigned MovImm) -> bool {
278 if ((MI.getOperand(NumOperands - 1).getImm() & Mask) != MovImm)
279 return false;
280 if (!OptSize && !NewOpcPreferable(MovOpc))
281 return false;
282 LLVM_DEBUG(dbgs() << "Replacing: " << MI);
283 {
284 MI.setDesc(TII->get(MovOpc));
285 MI.removeOperand(NumOperands - 1);
286 }
287 LLVM_DEBUG(dbgs() << " With: " << MI);
288 return true;
289 };
290
291 // Is ADD(X,X) more efficient than SHL(X,1)?
292 auto ProcessShiftLeftToAdd = [&](unsigned AddOpc) -> bool {
293 if (MI.getOperand(NumOperands - 1).getImm() != 1)
294 return false;
295 if (!NewOpcPreferable(AddOpc, /*ReplaceInTie*/ true))
296 return false;
297 LLVM_DEBUG(dbgs() << "Replacing: " << MI);
298 {
299 MI.setDesc(TII->get(AddOpc));
300 MI.removeOperand(NumOperands - 1);
301 MI.addOperand(MI.getOperand(NumOperands - 2));
302 }
303 LLVM_DEBUG(dbgs() << " With: " << MI);
304 return false;
305 };
306
307 switch (Opc) {
308 case X86::BLENDPDrri:
309 return ProcessBLENDToMOV(X86::MOVSDrr, 0x3, 0x1);
310 case X86::VBLENDPDrri:
311 return ProcessBLENDToMOV(X86::VMOVSDrr, 0x3, 0x1);
312
313 case X86::BLENDPSrri:
314 return ProcessBLENDToMOV(X86::MOVSSrr, 0xF, 0x1) ||
315 ProcessBLENDToMOV(X86::MOVSDrr, 0xF, 0x3);
316 case X86::VBLENDPSrri:
317 return ProcessBLENDToMOV(X86::VMOVSSrr, 0xF, 0x1) ||
318 ProcessBLENDToMOV(X86::VMOVSDrr, 0xF, 0x3);
319
320 case X86::VPBLENDWrri:
321 // TODO: Add X86::VPBLENDWrmi handling
322 // TODO: Add X86::VPBLENDWYrri handling
323 // TODO: Add X86::VPBLENDWYrmi handling
324 return ProcessBLENDWToBLENDD(X86::VPBLENDDrri, 4);
325
326 case X86::VPERMILPDri:
327 return ProcessVPERMILPDri(X86::VSHUFPDrri);
328 case X86::VPERMILPDYri:
329 return ProcessVPERMILPDri(X86::VSHUFPDYrri);
330 case X86::VPERMILPDZ128ri:
331 return ProcessVPERMILPDri(X86::VSHUFPDZ128rri);
332 case X86::VPERMILPDZ256ri:
333 return ProcessVPERMILPDri(X86::VSHUFPDZ256rri);
334 case X86::VPERMILPDZri:
335 return ProcessVPERMILPDri(X86::VSHUFPDZrri);
336 case X86::VPERMILPDZ128rikz:
337 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz);
338 case X86::VPERMILPDZ256rikz:
339 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz);
340 case X86::VPERMILPDZrikz:
341 return ProcessVPERMILPDri(X86::VSHUFPDZrrikz);
342 case X86::VPERMILPDZ128rik:
343 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik);
344 case X86::VPERMILPDZ256rik:
345 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik);
346 case X86::VPERMILPDZrik:
347 return ProcessVPERMILPDri(X86::VSHUFPDZrrik);
348
349 case X86::VPERMILPSri:
350 return ProcessVPERMILPSri(X86::VSHUFPSrri);
351 case X86::VPERMILPSYri:
352 return ProcessVPERMILPSri(X86::VSHUFPSYrri);
353 case X86::VPERMILPSZ128ri:
354 return ProcessVPERMILPSri(X86::VSHUFPSZ128rri);
355 case X86::VPERMILPSZ256ri:
356 return ProcessVPERMILPSri(X86::VSHUFPSZ256rri);
357 case X86::VPERMILPSZri:
358 return ProcessVPERMILPSri(X86::VSHUFPSZrri);
359 case X86::VPERMILPSZ128rikz:
360 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz);
361 case X86::VPERMILPSZ256rikz:
362 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz);
363 case X86::VPERMILPSZrikz:
364 return ProcessVPERMILPSri(X86::VSHUFPSZrrikz);
365 case X86::VPERMILPSZ128rik:
366 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik);
367 case X86::VPERMILPSZ256rik:
368 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik);
369 case X86::VPERMILPSZrik:
370 return ProcessVPERMILPSri(X86::VSHUFPSZrrik);
371 case X86::VPERMILPSmi:
372 return ProcessVPERMILPSmi(X86::VPSHUFDmi);
373 case X86::VPERMILPSYmi:
374 // TODO: See if there is a more generic way we can test if the replacement
375 // instruction is supported.
376 return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) : false;
377 case X86::VPERMILPSZ128mi:
378 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi);
379 case X86::VPERMILPSZ256mi:
380 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi);
381 case X86::VPERMILPSZmi:
382 return ProcessVPERMILPSmi(X86::VPSHUFDZmi);
383 case X86::VPERMILPSZ128mikz:
384 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz);
385 case X86::VPERMILPSZ256mikz:
386 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz);
387 case X86::VPERMILPSZmikz:
388 return ProcessVPERMILPSmi(X86::VPSHUFDZmikz);
389 case X86::VPERMILPSZ128mik:
390 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik);
391 case X86::VPERMILPSZ256mik:
392 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik);
393 case X86::VPERMILPSZmik:
394 return ProcessVPERMILPSmi(X86::VPSHUFDZmik);
395
396 case X86::MOVLHPSrr:
397 case X86::UNPCKLPDrr:
398 return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri);
399 case X86::VMOVLHPSrr:
400 case X86::VUNPCKLPDrr:
401 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri);
402 case X86::VUNPCKLPDYrr:
403 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri);
404 // VMOVLHPS is always 128 bits.
405 case X86::VMOVLHPSZrr:
406 case X86::VUNPCKLPDZ128rr:
407 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri);
408 case X86::VUNPCKLPDZ256rr:
409 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri);
410 case X86::VUNPCKLPDZrr:
411 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri);
412 case X86::VUNPCKLPDZ128rrk:
413 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik);
414 case X86::VUNPCKLPDZ256rrk:
415 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik);
416 case X86::VUNPCKLPDZrrk:
417 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik);
418 case X86::VUNPCKLPDZ128rrkz:
419 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
420 case X86::VUNPCKLPDZ256rrkz:
421 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
422 case X86::VUNPCKLPDZrrkz:
423 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz);
424 case X86::UNPCKHPDrr:
425 return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri);
426 case X86::VUNPCKHPDrr:
427 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri);
428 case X86::VUNPCKHPDYrr:
429 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri);
430 case X86::VUNPCKHPDZ128rr:
431 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri);
432 case X86::VUNPCKHPDZ256rr:
433 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri);
434 case X86::VUNPCKHPDZrr:
435 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri);
436 case X86::VUNPCKHPDZ128rrk:
437 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik);
438 case X86::VUNPCKHPDZ256rrk:
439 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik);
440 case X86::VUNPCKHPDZrrk:
441 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik);
442 case X86::VUNPCKHPDZ128rrkz:
443 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
444 case X86::VUNPCKHPDZ256rrkz:
445 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
446 case X86::VUNPCKHPDZrrkz:
447 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz);
448 case X86::UNPCKLPDrm:
449 return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm);
450 case X86::VUNPCKLPDrm:
451 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm);
452 case X86::VUNPCKLPDYrm:
453 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm);
454 case X86::VUNPCKLPDZ128rm:
455 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm);
456 case X86::VUNPCKLPDZ256rm:
457 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm);
458 case X86::VUNPCKLPDZrm:
459 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm);
460 case X86::VUNPCKLPDZ128rmk:
461 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk);
462 case X86::VUNPCKLPDZ256rmk:
463 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk);
464 case X86::VUNPCKLPDZrmk:
465 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk);
466 case X86::VUNPCKLPDZ128rmkz:
467 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz);
468 case X86::VUNPCKLPDZ256rmkz:
469 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz);
470 case X86::VUNPCKLPDZrmkz:
471 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz);
472 case X86::UNPCKHPDrm:
473 return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm);
474 case X86::VUNPCKHPDrm:
475 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm);
476 case X86::VUNPCKHPDYrm:
477 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm);
478 case X86::VUNPCKHPDZ128rm:
479 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm);
480 case X86::VUNPCKHPDZ256rm:
481 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm);
482 case X86::VUNPCKHPDZrm:
483 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm);
484 case X86::VUNPCKHPDZ128rmk:
485 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk);
486 case X86::VUNPCKHPDZ256rmk:
487 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk);
488 case X86::VUNPCKHPDZrmk:
489 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk);
490 case X86::VUNPCKHPDZ128rmkz:
491 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz);
492 case X86::VUNPCKHPDZ256rmkz:
493 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz);
494 case X86::VUNPCKHPDZrmkz:
495 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz);
496
497 case X86::UNPCKLPSrr:
498 return ProcessUNPCKPS(X86::PUNPCKLDQrr);
499 case X86::VUNPCKLPSrr:
500 return ProcessUNPCKPS(X86::VPUNPCKLDQrr);
501 case X86::VUNPCKLPSYrr:
502 return ProcessUNPCKPS(X86::VPUNPCKLDQYrr);
503 case X86::VUNPCKLPSZ128rr:
504 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr);
505 case X86::VUNPCKLPSZ256rr:
506 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr);
507 case X86::VUNPCKLPSZrr:
508 return ProcessUNPCKPS(X86::VPUNPCKLDQZrr);
509 case X86::VUNPCKLPSZ128rrk:
510 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk);
511 case X86::VUNPCKLPSZ256rrk:
512 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk);
513 case X86::VUNPCKLPSZrrk:
514 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk);
515 case X86::VUNPCKLPSZ128rrkz:
516 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz);
517 case X86::VUNPCKLPSZ256rrkz:
518 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz);
519 case X86::VUNPCKLPSZrrkz:
520 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz);
521 case X86::UNPCKHPSrr:
522 return ProcessUNPCKPS(X86::PUNPCKHDQrr);
523 case X86::VUNPCKHPSrr:
524 return ProcessUNPCKPS(X86::VPUNPCKHDQrr);
525 case X86::VUNPCKHPSYrr:
526 return ProcessUNPCKPS(X86::VPUNPCKHDQYrr);
527 case X86::VUNPCKHPSZ128rr:
528 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr);
529 case X86::VUNPCKHPSZ256rr:
530 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr);
531 case X86::VUNPCKHPSZrr:
532 return ProcessUNPCKPS(X86::VPUNPCKHDQZrr);
533 case X86::VUNPCKHPSZ128rrk:
534 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk);
535 case X86::VUNPCKHPSZ256rrk:
536 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk);
537 case X86::VUNPCKHPSZrrk:
538 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk);
539 case X86::VUNPCKHPSZ128rrkz:
540 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz);
541 case X86::VUNPCKHPSZ256rrkz:
542 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz);
543 case X86::VUNPCKHPSZrrkz:
544 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz);
545 case X86::UNPCKLPSrm:
546 return ProcessUNPCKPS(X86::PUNPCKLDQrm);
547 case X86::VUNPCKLPSrm:
548 return ProcessUNPCKPS(X86::VPUNPCKLDQrm);
549 case X86::VUNPCKLPSYrm:
550 return ProcessUNPCKPS(X86::VPUNPCKLDQYrm);
551 case X86::VUNPCKLPSZ128rm:
552 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm);
553 case X86::VUNPCKLPSZ256rm:
554 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm);
555 case X86::VUNPCKLPSZrm:
556 return ProcessUNPCKPS(X86::VPUNPCKLDQZrm);
557 case X86::VUNPCKLPSZ128rmk:
558 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk);
559 case X86::VUNPCKLPSZ256rmk:
560 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk);
561 case X86::VUNPCKLPSZrmk:
562 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk);
563 case X86::VUNPCKLPSZ128rmkz:
564 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz);
565 case X86::VUNPCKLPSZ256rmkz:
566 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz);
567 case X86::VUNPCKLPSZrmkz:
568 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz);
569 case X86::UNPCKHPSrm:
570 return ProcessUNPCKPS(X86::PUNPCKHDQrm);
571 case X86::VUNPCKHPSrm:
572 return ProcessUNPCKPS(X86::VPUNPCKHDQrm);
573 case X86::VUNPCKHPSYrm:
574 return ProcessUNPCKPS(X86::VPUNPCKHDQYrm);
575 case X86::VUNPCKHPSZ128rm:
576 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm);
577 case X86::VUNPCKHPSZ256rm:
578 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm);
579 case X86::VUNPCKHPSZrm:
580 return ProcessUNPCKPS(X86::VPUNPCKHDQZrm);
581 case X86::VUNPCKHPSZ128rmk:
582 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk);
583 case X86::VUNPCKHPSZ256rmk:
584 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk);
585 case X86::VUNPCKHPSZrmk:
586 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk);
587 case X86::VUNPCKHPSZ128rmkz:
588 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz);
589 case X86::VUNPCKHPSZ256rmkz:
590 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz);
591 case X86::VUNPCKHPSZrmkz:
592 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz);
593
594 case X86::PSLLWri:
595 return ProcessShiftLeftToAdd(X86::PADDWrr);
596 case X86::VPSLLWri:
597 return ProcessShiftLeftToAdd(X86::VPADDWrr);
598 case X86::VPSLLWYri:
599 return ProcessShiftLeftToAdd(X86::VPADDWYrr);
600 case X86::VPSLLWZ128ri:
601 return ProcessShiftLeftToAdd(X86::VPADDWZ128rr);
602 case X86::VPSLLWZ256ri:
603 return ProcessShiftLeftToAdd(X86::VPADDWZ256rr);
604 case X86::VPSLLWZri:
605 return ProcessShiftLeftToAdd(X86::VPADDWZrr);
606 case X86::PSLLDri:
607 return ProcessShiftLeftToAdd(X86::PADDDrr);
608 case X86::VPSLLDri:
609 return ProcessShiftLeftToAdd(X86::VPADDDrr);
610 case X86::VPSLLDYri:
611 return ProcessShiftLeftToAdd(X86::VPADDDYrr);
612 case X86::VPSLLDZ128ri:
613 return ProcessShiftLeftToAdd(X86::VPADDDZ128rr);
614 case X86::VPSLLDZ256ri:
615 return ProcessShiftLeftToAdd(X86::VPADDDZ256rr);
616 case X86::VPSLLDZri:
617 return ProcessShiftLeftToAdd(X86::VPADDDZrr);
618 case X86::PSLLQri:
619 return ProcessShiftLeftToAdd(X86::PADDQrr);
620 case X86::VPSLLQri:
621 return ProcessShiftLeftToAdd(X86::VPADDQrr);
622 case X86::VPSLLQYri:
623 return ProcessShiftLeftToAdd(X86::VPADDQYrr);
624 case X86::VPSLLQZ128ri:
625 return ProcessShiftLeftToAdd(X86::VPADDQZ128rr);
626 case X86::VPSLLQZ256ri:
627 return ProcessShiftLeftToAdd(X86::VPADDQZ256rr);
628 case X86::VPSLLQZri:
629 return ProcessShiftLeftToAdd(X86::VPADDQZrr);
630
631 default:
632 return false;
633 }
634}
635
636bool X86FixupInstTuningImpl::runOnMachineFunction(MachineFunction &MF) {
637 LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n";);
638 bool Changed = false;
639 ST = &MF.getSubtarget<X86Subtarget>();
640 TII = ST->getInstrInfo();
641 SM = &ST->getSchedModel();
642
643 for (MachineBasicBlock &MBB : MF) {
644 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
645 if (processInstruction(MF, MBB, I)) {
646 ++NumInstChanges;
647 Changed = true;
648 }
649 }
650 }
651 LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n";);
652 return Changed;
653}
654
655bool X86FixupInstTuningLegacy::runOnMachineFunction(MachineFunction &MF) {
656 X86FixupInstTuningImpl Impl;
657 return Impl.runOnMachineFunction(MF);
658}
659
660PreservedAnalyses
663 X86FixupInstTuningImpl Impl;
664 return Impl.runOnMachineFunction(MF)
668}
MachineBasicBlock & MBB
Function Alias Analysis false
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
#define T
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static std::optional< bool > CmpOptionals(T NewVal, T CurVal)
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1549
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
unsigned getSize(const MachineInstr &MI) const
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
static MachineOperand CreateImm(int64_t Val)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
bool hasNoDomainDelayShuffle() const
const X86InstrInfo * getInstrInfo() const override
bool hasAVX2() const
Changed
Pass manager infrastructure for declaring and invalidating analyses.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3020
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionPass * createX86FixupInstTuningLegacyPass()
Machine model for scheduling, bundling, and heuristics.
Definition MCSchedule.h:258
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition MCSchedule.h:366
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition MCSchedule.h:340
static LLVM_ABI int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)