33#define DEBUG_TYPE "x86-fixup-inst-tuning"
35STATISTIC(NumInstChanges,
"Number of instructions changes");
53 MachineFunctionProperties::Property::NoVRegs);
63char X86FixupInstTuningPass::ID = 0;
68 return new X86FixupInstTuningPass();
73 if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal)
74 return *NewVal < *CurVal;
79bool X86FixupInstTuningPass::processInstruction(
83 unsigned Opc =
MI.getOpcode();
84 unsigned NumOperands =
MI.getDesc().getNumOperands();
86 auto GetInstTput = [&](
unsigned Opcode) -> std::optional<double> {
89 *ST, *(SM->getSchedClassDesc(
TII->get(Opcode).getSchedClass())));
92 auto GetInstLat = [&](
unsigned Opcode) -> std::optional<double> {
95 *ST, *(SM->getSchedClassDesc(
TII->get(Opcode).getSchedClass())));
98 auto GetInstSize = [&](
unsigned Opcode) -> std::optional<unsigned> {
105 auto NewOpcPreferable = [&](
unsigned NewOpc,
106 bool ReplaceInTie =
true) ->
bool {
107 std::optional<bool> Res;
108 if (SM->hasInstrSchedModel()) {
110 Res =
CmpOptionals(GetInstTput(NewOpc), GetInstTput(Opc));
114 Res =
CmpOptionals(GetInstLat(NewOpc), GetInstLat(Opc));
119 Res =
CmpOptionals(GetInstSize(Opc), GetInstSize(NewOpc));
132 auto ProcessVPERMILPDri = [&](
unsigned NewOpc) ->
bool {
133 if (!NewOpcPreferable(NewOpc))
135 unsigned MaskImm =
MI.getOperand(NumOperands - 1).getImm();
136 MI.removeOperand(NumOperands - 1);
137 MI.addOperand(
MI.getOperand(NumOperands - 2));
138 MI.setDesc(
TII->get(NewOpc));
147 auto ProcessVPERMILPSri = [&](
unsigned NewOpc) ->
bool {
148 if (!NewOpcPreferable(NewOpc))
150 unsigned MaskImm =
MI.getOperand(NumOperands - 1).getImm();
151 MI.removeOperand(NumOperands - 1);
152 MI.addOperand(
MI.getOperand(NumOperands - 2));
153 MI.setDesc(
TII->get(NewOpc));
161 auto ProcessVPERMILPSmi = [&](
unsigned NewOpc) ->
bool {
164 if (!
ST->hasNoDomainDelayShuffle() ||
165 !NewOpcPreferable(NewOpc,
false))
167 MI.setDesc(
TII->get(NewOpc));
185 auto ProcessUNPCK = [&](
unsigned NewOpc,
unsigned MaskImm) ->
bool {
186 if (!NewOpcPreferable(NewOpc,
false))
189 MI.setDesc(
TII->get(NewOpc));
194 auto ProcessUNPCKToIntDomain = [&](
unsigned NewOpc) ->
bool {
198 if (!
ST->hasNoDomainDelayShuffle() ||
199 !NewOpcPreferable(NewOpc,
false))
201 MI.setDesc(
TII->get(NewOpc));
205 auto ProcessUNPCKLPDrr = [&](
unsigned NewOpcIntDomain,
206 unsigned NewOpc) ->
bool {
207 if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
209 return ProcessUNPCK(NewOpc, 0x00);
211 auto ProcessUNPCKHPDrr = [&](
unsigned NewOpcIntDomain,
212 unsigned NewOpc) ->
bool {
213 if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
215 return ProcessUNPCK(NewOpc, 0xff);
218 auto ProcessUNPCKPDrm = [&](
unsigned NewOpcIntDomain) ->
bool {
219 return ProcessUNPCKToIntDomain(NewOpcIntDomain);
222 auto ProcessUNPCKPS = [&](
unsigned NewOpc) ->
bool {
223 return ProcessUNPCKToIntDomain(NewOpc);
227 case X86::VPERMILPDri:
228 return ProcessVPERMILPDri(X86::VSHUFPDrri);
229 case X86::VPERMILPDYri:
230 return ProcessVPERMILPDri(X86::VSHUFPDYrri);
231 case X86::VPERMILPDZ128ri:
232 return ProcessVPERMILPDri(X86::VSHUFPDZ128rri);
233 case X86::VPERMILPDZ256ri:
234 return ProcessVPERMILPDri(X86::VSHUFPDZ256rri);
235 case X86::VPERMILPDZri:
236 return ProcessVPERMILPDri(X86::VSHUFPDZrri);
237 case X86::VPERMILPDZ128rikz:
238 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz);
239 case X86::VPERMILPDZ256rikz:
240 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz);
241 case X86::VPERMILPDZrikz:
242 return ProcessVPERMILPDri(X86::VSHUFPDZrrikz);
243 case X86::VPERMILPDZ128rik:
244 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik);
245 case X86::VPERMILPDZ256rik:
246 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik);
247 case X86::VPERMILPDZrik:
248 return ProcessVPERMILPDri(X86::VSHUFPDZrrik);
250 case X86::VPERMILPSri:
251 return ProcessVPERMILPSri(X86::VSHUFPSrri);
252 case X86::VPERMILPSYri:
253 return ProcessVPERMILPSri(X86::VSHUFPSYrri);
254 case X86::VPERMILPSZ128ri:
255 return ProcessVPERMILPSri(X86::VSHUFPSZ128rri);
256 case X86::VPERMILPSZ256ri:
257 return ProcessVPERMILPSri(X86::VSHUFPSZ256rri);
258 case X86::VPERMILPSZri:
259 return ProcessVPERMILPSri(X86::VSHUFPSZrri);
260 case X86::VPERMILPSZ128rikz:
261 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz);
262 case X86::VPERMILPSZ256rikz:
263 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz);
264 case X86::VPERMILPSZrikz:
265 return ProcessVPERMILPSri(X86::VSHUFPSZrrikz);
266 case X86::VPERMILPSZ128rik:
267 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik);
268 case X86::VPERMILPSZ256rik:
269 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik);
270 case X86::VPERMILPSZrik:
271 return ProcessVPERMILPSri(X86::VSHUFPSZrrik);
272 case X86::VPERMILPSmi:
273 return ProcessVPERMILPSmi(X86::VPSHUFDmi);
274 case X86::VPERMILPSYmi:
277 return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) :
false;
278 case X86::VPERMILPSZ128mi:
279 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi);
280 case X86::VPERMILPSZ256mi:
281 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi);
282 case X86::VPERMILPSZmi:
283 return ProcessVPERMILPSmi(X86::VPSHUFDZmi);
284 case X86::VPERMILPSZ128mikz:
285 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz);
286 case X86::VPERMILPSZ256mikz:
287 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz);
288 case X86::VPERMILPSZmikz:
289 return ProcessVPERMILPSmi(X86::VPSHUFDZmikz);
290 case X86::VPERMILPSZ128mik:
291 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik);
292 case X86::VPERMILPSZ256mik:
293 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik);
294 case X86::VPERMILPSZmik:
295 return ProcessVPERMILPSmi(X86::VPSHUFDZmik);
298 case X86::UNPCKLPDrr:
299 return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri);
300 case X86::VMOVLHPSrr:
301 case X86::VUNPCKLPDrr:
302 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri);
303 case X86::VUNPCKLPDYrr:
304 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri);
306 case X86::VMOVLHPSZrr:
307 case X86::VUNPCKLPDZ128rr:
308 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri);
309 case X86::VUNPCKLPDZ256rr:
310 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri);
311 case X86::VUNPCKLPDZrr:
312 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri);
313 case X86::VUNPCKLPDZ128rrk:
314 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik);
315 case X86::VUNPCKLPDZ256rrk:
316 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik);
317 case X86::VUNPCKLPDZrrk:
318 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik);
319 case X86::VUNPCKLPDZ128rrkz:
320 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
321 case X86::VUNPCKLPDZ256rrkz:
322 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
323 case X86::VUNPCKLPDZrrkz:
324 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz);
325 case X86::UNPCKHPDrr:
326 return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri);
327 case X86::VUNPCKHPDrr:
328 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri);
329 case X86::VUNPCKHPDYrr:
330 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri);
331 case X86::VUNPCKHPDZ128rr:
332 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri);
333 case X86::VUNPCKHPDZ256rr:
334 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri);
335 case X86::VUNPCKHPDZrr:
336 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri);
337 case X86::VUNPCKHPDZ128rrk:
338 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik);
339 case X86::VUNPCKHPDZ256rrk:
340 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik);
341 case X86::VUNPCKHPDZrrk:
342 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik);
343 case X86::VUNPCKHPDZ128rrkz:
344 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
345 case X86::VUNPCKHPDZ256rrkz:
346 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
347 case X86::VUNPCKHPDZrrkz:
348 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz);
349 case X86::UNPCKLPDrm:
350 return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm);
351 case X86::VUNPCKLPDrm:
352 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm);
353 case X86::VUNPCKLPDYrm:
354 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm);
355 case X86::VUNPCKLPDZ128rm:
356 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm);
357 case X86::VUNPCKLPDZ256rm:
358 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm);
359 case X86::VUNPCKLPDZrm:
360 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm);
361 case X86::VUNPCKLPDZ128rmk:
362 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk);
363 case X86::VUNPCKLPDZ256rmk:
364 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk);
365 case X86::VUNPCKLPDZrmk:
366 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk);
367 case X86::VUNPCKLPDZ128rmkz:
368 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz);
369 case X86::VUNPCKLPDZ256rmkz:
370 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz);
371 case X86::VUNPCKLPDZrmkz:
372 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz);
373 case X86::UNPCKHPDrm:
374 return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm);
375 case X86::VUNPCKHPDrm:
376 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm);
377 case X86::VUNPCKHPDYrm:
378 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm);
379 case X86::VUNPCKHPDZ128rm:
380 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm);
381 case X86::VUNPCKHPDZ256rm:
382 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm);
383 case X86::VUNPCKHPDZrm:
384 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm);
385 case X86::VUNPCKHPDZ128rmk:
386 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk);
387 case X86::VUNPCKHPDZ256rmk:
388 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk);
389 case X86::VUNPCKHPDZrmk:
390 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk);
391 case X86::VUNPCKHPDZ128rmkz:
392 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz);
393 case X86::VUNPCKHPDZ256rmkz:
394 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz);
395 case X86::VUNPCKHPDZrmkz:
396 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz);
398 case X86::UNPCKLPSrr:
399 return ProcessUNPCKPS(X86::PUNPCKLDQrr);
400 case X86::VUNPCKLPSrr:
401 return ProcessUNPCKPS(X86::VPUNPCKLDQrr);
402 case X86::VUNPCKLPSYrr:
403 return ProcessUNPCKPS(X86::VPUNPCKLDQYrr);
404 case X86::VUNPCKLPSZ128rr:
405 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr);
406 case X86::VUNPCKLPSZ256rr:
407 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr);
408 case X86::VUNPCKLPSZrr:
409 return ProcessUNPCKPS(X86::VPUNPCKLDQZrr);
410 case X86::VUNPCKLPSZ128rrk:
411 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk);
412 case X86::VUNPCKLPSZ256rrk:
413 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk);
414 case X86::VUNPCKLPSZrrk:
415 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk);
416 case X86::VUNPCKLPSZ128rrkz:
417 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz);
418 case X86::VUNPCKLPSZ256rrkz:
419 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz);
420 case X86::VUNPCKLPSZrrkz:
421 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz);
422 case X86::UNPCKHPSrr:
423 return ProcessUNPCKPS(X86::PUNPCKHDQrr);
424 case X86::VUNPCKHPSrr:
425 return ProcessUNPCKPS(X86::VPUNPCKHDQrr);
426 case X86::VUNPCKHPSYrr:
427 return ProcessUNPCKPS(X86::VPUNPCKHDQYrr);
428 case X86::VUNPCKHPSZ128rr:
429 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr);
430 case X86::VUNPCKHPSZ256rr:
431 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr);
432 case X86::VUNPCKHPSZrr:
433 return ProcessUNPCKPS(X86::VPUNPCKHDQZrr);
434 case X86::VUNPCKHPSZ128rrk:
435 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk);
436 case X86::VUNPCKHPSZ256rrk:
437 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk);
438 case X86::VUNPCKHPSZrrk:
439 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk);
440 case X86::VUNPCKHPSZ128rrkz:
441 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz);
442 case X86::VUNPCKHPSZ256rrkz:
443 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz);
444 case X86::VUNPCKHPSZrrkz:
445 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz);
446 case X86::UNPCKLPSrm:
447 return ProcessUNPCKPS(X86::PUNPCKLDQrm);
448 case X86::VUNPCKLPSrm:
449 return ProcessUNPCKPS(X86::VPUNPCKLDQrm);
450 case X86::VUNPCKLPSYrm:
451 return ProcessUNPCKPS(X86::VPUNPCKLDQYrm);
452 case X86::VUNPCKLPSZ128rm:
453 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm);
454 case X86::VUNPCKLPSZ256rm:
455 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm);
456 case X86::VUNPCKLPSZrm:
457 return ProcessUNPCKPS(X86::VPUNPCKLDQZrm);
458 case X86::VUNPCKLPSZ128rmk:
459 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk);
460 case X86::VUNPCKLPSZ256rmk:
461 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk);
462 case X86::VUNPCKLPSZrmk:
463 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk);
464 case X86::VUNPCKLPSZ128rmkz:
465 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz);
466 case X86::VUNPCKLPSZ256rmkz:
467 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz);
468 case X86::VUNPCKLPSZrmkz:
469 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz);
470 case X86::UNPCKHPSrm:
471 return ProcessUNPCKPS(X86::PUNPCKHDQrm);
472 case X86::VUNPCKHPSrm:
473 return ProcessUNPCKPS(X86::VPUNPCKHDQrm);
474 case X86::VUNPCKHPSYrm:
475 return ProcessUNPCKPS(X86::VPUNPCKHDQYrm);
476 case X86::VUNPCKHPSZ128rm:
477 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm);
478 case X86::VUNPCKHPSZ256rm:
479 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm);
480 case X86::VUNPCKHPSZrm:
481 return ProcessUNPCKPS(X86::VPUNPCKHDQZrm);
482 case X86::VUNPCKHPSZ128rmk:
483 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk);
484 case X86::VUNPCKHPSZ256rmk:
485 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk);
486 case X86::VUNPCKHPSZrmk:
487 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk);
488 case X86::VUNPCKHPSZ128rmkz:
489 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz);
490 case X86::VUNPCKHPSZ256rmkz:
491 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz);
492 case X86::VUNPCKHPSZrmkz:
493 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz);
499bool X86FixupInstTuningPass::runOnMachineFunction(
MachineFunction &MF) {
501 bool Changed =
false;
503 TII =
ST->getInstrInfo();
504 SM = &
ST->getSchedModel();
508 if (processInstruction(MF,
MBB,
I)) {
const HexagonInstrInfo * TII
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< bool > CmpOptionals(T NewVal, T CurVal)
FunctionPass class - This class is used to implement most global optimizations.
unsigned getSize(const MachineInstr &MI) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
static MachineOperand CreateImm(int64_t Val)
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
StringRef - Represent a constant reference to a string, i.e.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createX86FixupInstTuning()
Return a pass that replaces equivalent slower instructions with faster ones.
Machine model for scheduling, bundling, and heuristics.
static int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
static double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)