LLVM 20.0.0git
X86FlagsCopyLowering.cpp
Go to the documentation of this file.
1//====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual
11/// flag bits.
12///
13/// We have to do this by carefully analyzing and rewriting the usage of the
14/// copied EFLAGS register because there is no general way to rematerialize the
15/// entire EFLAGS register safely and efficiently. Using `popf` both forces
16/// dynamic stack adjustment and can create correctness issues due to IF, TF,
17/// and other non-status flags being overwritten. Using sequences involving
18/// SAHF don't work on all x86 processors and are often quite slow compared to
19/// directly testing a single status preserved in its own GPR.
20///
21//===----------------------------------------------------------------------===//
22
23#include "X86.h"
24#include "X86InstrInfo.h"
25#include "X86Subtarget.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/ScopeExit.h"
32#include "llvm/ADT/Statistic.h"
48#include "llvm/IR/DebugLoc.h"
49#include "llvm/MC/MCSchedule.h"
50#include "llvm/Pass.h"
51#include "llvm/Support/Debug.h"
53#include <algorithm>
54#include <cassert>
55#include <iterator>
56#include <utility>
57
58using namespace llvm;
59
60#define PASS_KEY "x86-flags-copy-lowering"
61#define DEBUG_TYPE PASS_KEY
62
63STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
64STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
65STATISTIC(NumTestsInserted, "Number of test instructions inserted");
66STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
67STATISTIC(NumNFsConvertedTo, "Number of NF instructions converted to");
68
69namespace {
70
71// Convenient array type for storing registers associated with each condition.
72using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>;
73
74class X86FlagsCopyLoweringPass : public MachineFunctionPass {
75public:
76 X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) {}
77
78 StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; }
79 bool runOnMachineFunction(MachineFunction &MF) override;
80 void getAnalysisUsage(AnalysisUsage &AU) const override;
81
82 /// Pass identification, replacement for typeid.
83 static char ID;
84
85private:
86 MachineRegisterInfo *MRI = nullptr;
87 const X86Subtarget *Subtarget = nullptr;
88 const X86InstrInfo *TII = nullptr;
89 const TargetRegisterInfo *TRI = nullptr;
90 const TargetRegisterClass *PromoteRC = nullptr;
91 MachineDominatorTree *MDT = nullptr;
92
93 CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
95
96 Register promoteCondToReg(MachineBasicBlock &MBB,
98 const DebugLoc &TestLoc, X86::CondCode Cond);
99 std::pair<unsigned, bool> getCondOrInverseInReg(
101 const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs);
103 const DebugLoc &Loc, unsigned Reg);
104
105 void rewriteSetCC(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
106 const DebugLoc &Loc, MachineInstr &MI,
107 CondRegArray &CondRegs);
108 void rewriteArithmetic(MachineBasicBlock &MBB,
110 MachineInstr &MI, CondRegArray &CondRegs);
112 const DebugLoc &Loc, MachineInstr &MI, CondRegArray &CondRegs);
113};
114
115} // end anonymous namespace
116
117INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE,
118 "X86 EFLAGS copy lowering", false, false)
119INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE,
120 "X86 EFLAGS copy lowering", false, false)
121
123 return new X86FlagsCopyLoweringPass();
124}
125
126char X86FlagsCopyLoweringPass::ID = 0;
127
128void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
131}
132
133static bool isArithmeticOp(unsigned Opc) {
134 return X86::isADC(Opc) || X86::isSBB(Opc) || X86::isRCL(Opc) ||
135 X86::isRCR(Opc) || (Opc == X86::SETB_C32r || Opc == X86::SETB_C64r);
136}
137
139 MachineInstr &SplitI,
140 const X86InstrInfo &TII) {
142
143 assert(SplitI.getParent() == &MBB &&
144 "Split instruction must be in the split block!");
145 assert(SplitI.isBranch() &&
146 "Only designed to split a tail of branch instructions!");
148 "Must split on an actual jCC instruction!");
149
150 // Dig out the previous instruction to the split point.
151 MachineInstr &PrevI = *std::prev(SplitI.getIterator());
152 assert(PrevI.isBranch() && "Must split after a branch!");
154 "Must split after an actual jCC instruction!");
155 assert(!std::prev(PrevI.getIterator())->isTerminator() &&
156 "Must only have this one terminator prior to the split!");
157
158 // Grab the one successor edge that will stay in `MBB`.
159 MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB();
160
161 // Analyze the original block to see if we are actually splitting an edge
162 // into two edges. This can happen when we have multiple conditional jumps to
163 // the same successor.
164 bool IsEdgeSplit =
165 std::any_of(SplitI.getIterator(), MBB.instr_end(),
166 [&](MachineInstr &MI) {
167 assert(MI.isTerminator() &&
168 "Should only have spliced terminators!");
169 return llvm::any_of(
170 MI.operands(), [&](MachineOperand &MOp) {
171 return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc;
172 });
173 }) ||
174 MBB.getFallThrough() == &UnsplitSucc;
175
176 MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
177
178 // Insert the new block immediately after the current one. Any existing
179 // fallthrough will be sunk into this new block anyways.
180 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
181
182 // Splice the tail of instructions into the new block.
183 NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end());
184
185 // Copy the necessary succesors (and their probability info) into the new
186 // block.
187 for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI)
188 if (IsEdgeSplit || *SI != &UnsplitSucc)
189 NewMBB.copySuccessor(&MBB, SI);
190 // Normalize the probabilities if we didn't end up splitting the edge.
191 if (!IsEdgeSplit)
192 NewMBB.normalizeSuccProbs();
193
194 // Now replace all of the moved successors in the original block with the new
195 // block. This will merge their probabilities.
196 for (MachineBasicBlock *Succ : NewMBB.successors())
197 if (Succ != &UnsplitSucc)
198 MBB.replaceSuccessor(Succ, &NewMBB);
199
200 // We should always end up replacing at least one successor.
201 assert(MBB.isSuccessor(&NewMBB) &&
202 "Failed to make the new block a successor!");
203
204 // Now update all the PHIs.
205 for (MachineBasicBlock *Succ : NewMBB.successors()) {
206 for (MachineInstr &MI : *Succ) {
207 if (!MI.isPHI())
208 break;
209
210 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
211 OpIdx += 2) {
212 MachineOperand &OpV = MI.getOperand(OpIdx);
213 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
214 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
215 if (OpMBB.getMBB() != &MBB)
216 continue;
217
218 // Replace the operand for unsplit successors
219 if (!IsEdgeSplit || Succ != &UnsplitSucc) {
220 OpMBB.setMBB(&NewMBB);
221
222 // We have to continue scanning as there may be multiple entries in
223 // the PHI.
224 continue;
225 }
226
227 // When we have split the edge append a new successor.
228 MI.addOperand(MF, OpV);
229 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
230 break;
231 }
232 }
233 }
234
235 return NewMBB;
236}
237
239
241 const MachineOperand *FlagDef =
242 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
243 if (!FlagDef)
244 return NoClobber;
245 if (FlagDef->isDead() && X86::getNFVariant(MI.getOpcode()))
246 return EvitableClobber;
247
248 return InevitableClobber;
249}
250
251bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
252 LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
253 << " **********\n");
254
255 Subtarget = &MF.getSubtarget<X86Subtarget>();
256 MRI = &MF.getRegInfo();
257 TII = Subtarget->getInstrInfo();
258 TRI = Subtarget->getRegisterInfo();
259 PromoteRC = &X86::GR8RegClass;
260
261 if (MF.empty())
262 // Nothing to do for a degenerate empty function...
263 return false;
264
265 if (none_of(MRI->def_instructions(X86::EFLAGS), [](const MachineInstr &MI) {
266 return MI.getOpcode() == TargetOpcode::COPY;
267 }))
268 return false;
269
270 // We change the code, so we don't preserve the dominator tree anyway. If we
271 // got a valid MDT from the pass manager, use that, otherwise construct one
272 // now. This is an optimization that avoids unnecessary MDT construction for
273 // functions that have no flag copies.
274
275 auto MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
276 std::unique_ptr<MachineDominatorTree> OwnedMDT;
277 if (MDTWrapper) {
278 MDT = &MDTWrapper->getDomTree();
279 } else {
280 OwnedMDT = std::make_unique<MachineDominatorTree>(MF);
281 MDT = OwnedMDT.get();
282 }
283
284 // Collect the copies in RPO so that when there are chains where a copy is in
285 // turn copied again we visit the first one first. This ensures we can find
286 // viable locations for testing the original EFLAGS that dominate all the
287 // uses across complex CFGs.
290 for (MachineBasicBlock *MBB : RPOT)
291 for (MachineInstr &MI : *MBB)
292 if (MI.getOpcode() == TargetOpcode::COPY &&
293 MI.getOperand(0).getReg() == X86::EFLAGS)
294 Copies.insert(&MI);
295
296 // Try to elminate the copys by transform the instructions between copy and
297 // copydef to the NF (no flags update) variants, e.g.
298 //
299 // %1:gr64 = COPY $eflags
300 // OP1 implicit-def dead $eflags
301 // $eflags = COPY %1
302 // OP2 cc, implicit $eflags
303 //
304 // ->
305 //
306 // OP1_NF
307 // OP2 implicit $eflags
308 if (Subtarget->hasNF()) {
310 // CopyIIt may be invalidated by removing copies.
311 auto CopyIIt = Copies.begin(), CopyIEnd = Copies.end();
312 while (CopyIIt != CopyIEnd) {
313 auto NCopyIIt = std::next(CopyIIt);
314 SmallSetVector<MachineInstr *, 4> EvitableClobbers;
315 MachineInstr *CopyI = *CopyIIt;
316 MachineOperand &VOp = CopyI->getOperand(1);
317 MachineInstr *CopyDefI = MRI->getVRegDef(VOp.getReg());
318 MachineBasicBlock *CopyIMBB = CopyI->getParent();
319 MachineBasicBlock *CopyDefIMBB = CopyDefI->getParent();
320 // Walk all basic blocks reachable in depth-first iteration on the inverse
321 // CFG from CopyIMBB to CopyDefIMBB. These blocks are all the blocks that
322 // may be executed between the execution of CopyDefIMBB and CopyIMBB. On
323 // all execution paths, instructions from CopyDefI to CopyI (exclusive)
324 // has to be NF-convertible if it clobbers flags.
325 for (auto BI = idf_begin(CopyIMBB), BE = idf_end(CopyDefIMBB); BI != BE;
326 ++BI) {
327 MachineBasicBlock *MBB = *BI;
328 for (auto I = (MBB != CopyDefIMBB)
329 ? MBB->begin()
330 : std::next(MachineBasicBlock::iterator(CopyDefI)),
331 E = (MBB != CopyIMBB) ? MBB->end()
333 I != E; ++I) {
334 MachineInstr &MI = *I;
335 EFLAGSClobber ClobberType = getClobberType(MI);
336 if (ClobberType == NoClobber)
337 continue;
338
339 if (ClobberType == InevitableClobber)
340 goto ProcessNextCopyI;
341
342 assert(ClobberType == EvitableClobber && "unexpected workflow");
343 EvitableClobbers.insert(&MI);
344 }
345 }
346 // Covert evitable clobbers into NF variants and remove the copyies.
347 RemovedCopies.insert(CopyI);
348 CopyI->eraseFromParent();
349 if (MRI->use_nodbg_empty(CopyDefI->getOperand(0).getReg())) {
350 RemovedCopies.insert(CopyDefI);
351 CopyDefI->eraseFromParent();
352 }
353 ++NumCopiesEliminated;
354 for (auto *Clobber : EvitableClobbers) {
355 unsigned NewOpc = X86::getNFVariant(Clobber->getOpcode());
356 assert(NewOpc && "evitable clobber must have a NF variant");
357 Clobber->setDesc(TII->get(NewOpc));
358 Clobber->removeOperand(
359 Clobber->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)
360 ->getOperandNo());
361 ++NumNFsConvertedTo;
362 }
363 // Update liveins for basic blocks in the path
364 for (auto BI = idf_begin(CopyIMBB), BE = idf_end(CopyDefIMBB); BI != BE;
365 ++BI)
366 if (*BI != CopyDefIMBB)
367 BI->addLiveIn(X86::EFLAGS);
368 ProcessNextCopyI:
369 CopyIIt = NCopyIIt;
370 }
371 Copies.set_subtract(RemovedCopies);
372 }
373
374 // For the rest of copies that cannot be eliminated by NF transform, we use
375 // setcc to preserve the flags in GPR32 before OP1, and recheck its value
376 // before using the flags, e.g.
377 //
378 // %1:gr64 = COPY $eflags
379 // OP1 implicit-def dead $eflags
380 // $eflags = COPY %1
381 // OP2 cc, implicit $eflags
382 //
383 // ->
384 //
385 // %1:gr8 = SETCCr cc, implicit $eflags
386 // OP1 implicit-def dead $eflags
387 // TEST8rr %1, %1, implicit-def $eflags
388 // OP2 ne, implicit $eflags
389 for (MachineInstr *CopyI : Copies) {
390 MachineBasicBlock &MBB = *CopyI->getParent();
391
392 MachineOperand &VOp = CopyI->getOperand(1);
393 assert(VOp.isReg() &&
394 "The input to the copy for EFLAGS should always be a register!");
395 MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg());
396 if (CopyDefI.getOpcode() != TargetOpcode::COPY) {
397 // FIXME: The big likely candidate here are PHI nodes. We could in theory
398 // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard
399 // enough that it is probably better to change every other part of LLVM
400 // to avoid creating them. The issue is that once we have PHIs we won't
401 // know which original EFLAGS value we need to capture with our setCCs
402 // below. The end result will be computing a complete set of setCCs that
403 // we *might* want, computing them in every place where we copy *out* of
404 // EFLAGS and then doing SSA formation on all of them to insert necessary
405 // PHI nodes and consume those here. Then hoping that somehow we DCE the
406 // unnecessary ones. This DCE seems very unlikely to be successful and so
407 // we will almost certainly end up with a glut of dead setCC
408 // instructions. Until we have a motivating test case and fail to avoid
409 // it by changing other parts of LLVM's lowering, we refuse to handle
410 // this complex case here.
412 dbgs() << "ERROR: Encountered unexpected def of an eflags copy: ";
413 CopyDefI.dump());
415 "Cannot lower EFLAGS copy unless it is defined in turn by a copy!");
416 }
417
418 auto Cleanup = make_scope_exit([&] {
419 // All uses of the EFLAGS copy are now rewritten, kill the copy into
420 // eflags and if dead the copy from.
421 CopyI->eraseFromParent();
422 if (MRI->use_empty(CopyDefI.getOperand(0).getReg()))
423 CopyDefI.eraseFromParent();
424 ++NumCopiesEliminated;
425 });
426
427 MachineOperand &DOp = CopyI->getOperand(0);
428 assert(DOp.isDef() && "Expected register def!");
429 assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!");
430 if (DOp.isDead())
431 continue;
432
433 MachineBasicBlock *TestMBB = CopyDefI.getParent();
434 auto TestPos = CopyDefI.getIterator();
435 DebugLoc TestLoc = CopyDefI.getDebugLoc();
436
437 LLVM_DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump());
438
439 // Walk up across live-in EFLAGS to find where they were actually def'ed.
440 //
441 // This copy's def may just be part of a region of blocks covered by
442 // a single def of EFLAGS and we want to find the top of that region where
443 // possible.
444 //
445 // This is essentially a search for a *candidate* reaching definition
446 // location. We don't need to ever find the actual reaching definition here,
447 // but we want to walk up the dominator tree to find the highest point which
448 // would be viable for such a definition.
449 auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin,
451 // Scan backwards as we expect these to be relatively short and often find
452 // a clobber near the end.
453 return llvm::any_of(
455 // Flag any instruction (other than the copy we are
456 // currently rewriting) that defs EFLAGS.
457 return &MI != CopyI &&
458 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
459 });
460 };
461 auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB,
462 MachineBasicBlock *EndMBB) {
463 assert(MDT->dominates(BeginMBB, EndMBB) &&
464 "Only support paths down the dominator tree!");
467 // We terminate at the beginning. No need to scan it.
468 Visited.insert(BeginMBB);
469 Worklist.push_back(EndMBB);
470 do {
471 auto *MBB = Worklist.pop_back_val();
472 for (auto *PredMBB : MBB->predecessors()) {
473 if (!Visited.insert(PredMBB).second)
474 continue;
475 if (HasEFLAGSClobber(PredMBB->begin(), PredMBB->end()))
476 return true;
477 // Enqueue this block to walk its predecessors.
478 Worklist.push_back(PredMBB);
479 }
480 } while (!Worklist.empty());
481 // No clobber found along a path from the begin to end.
482 return false;
483 };
484 while (TestMBB->isLiveIn(X86::EFLAGS) && !TestMBB->pred_empty() &&
485 !HasEFLAGSClobber(TestMBB->begin(), TestPos)) {
486 // Find the nearest common dominator of the predecessors, as
487 // that will be the best candidate to hoist into.
488 MachineBasicBlock *HoistMBB =
489 std::accumulate(std::next(TestMBB->pred_begin()), TestMBB->pred_end(),
490 *TestMBB->pred_begin(),
491 [&](MachineBasicBlock *LHS, MachineBasicBlock *RHS) {
492 return MDT->findNearestCommonDominator(LHS, RHS);
493 });
494
495 // Now we need to scan all predecessors that may be reached along paths to
496 // the hoist block. A clobber anywhere in any of these blocks the hoist.
497 // Note that this even handles loops because we require *no* clobbers.
498 if (HasEFLAGSClobberPath(HoistMBB, TestMBB))
499 break;
500
501 // We also need the terminators to not sneakily clobber flags.
502 if (HasEFLAGSClobber(HoistMBB->getFirstTerminator()->getIterator(),
503 HoistMBB->instr_end()))
504 break;
505
506 // We found a viable location, hoist our test position to it.
507 TestMBB = HoistMBB;
508 TestPos = TestMBB->getFirstTerminator()->getIterator();
509 // Clear the debug location as it would just be confusing after hoisting.
510 TestLoc = DebugLoc();
511 }
512 LLVM_DEBUG({
513 auto DefIt = llvm::find_if(
514 llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)),
515 [&](MachineInstr &MI) {
516 return MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
517 });
518 if (DefIt.base() != TestMBB->instr_begin()) {
519 dbgs() << " Using EFLAGS defined by: ";
520 DefIt->dump();
521 } else {
522 dbgs() << " Using live-in flags for BB:\n";
523 TestMBB->dump();
524 }
525 });
526
527 // While rewriting uses, we buffer jumps and rewrite them in a second pass
528 // because doing so will perturb the CFG that we are walking to find the
529 // uses in the first place.
531
532 // Gather the condition flags that have already been preserved in
533 // registers. We do this from scratch each time as we expect there to be
534 // very few of them and we expect to not revisit the same copy definition
535 // many times. If either of those change sufficiently we could build a map
536 // of these up front instead.
537 CondRegArray CondRegs = collectCondsInRegs(*TestMBB, TestPos);
538
539 // Collect the basic blocks we need to scan. Typically this will just be
540 // a single basic block but we may have to scan multiple blocks if the
541 // EFLAGS copy lives into successors.
544 Blocks.push_back(&MBB);
545
546 do {
547 MachineBasicBlock &UseMBB = *Blocks.pop_back_val();
548
549 // Track when if/when we find a kill of the flags in this block.
550 bool FlagsKilled = false;
551
552 // In most cases, we walk from the beginning to the end of the block. But
553 // when the block is the same block as the copy is from, we will visit it
554 // twice. The first time we start from the copy and go to the end. The
555 // second time we start from the beginning and go to the copy. This lets
556 // us handle copies inside of cycles.
557 // FIXME: This loop is *super* confusing. This is at least in part
558 // a symptom of all of this routine needing to be refactored into
559 // documentable components. Once done, there may be a better way to write
560 // this loop.
561 for (auto MII = (&UseMBB == &MBB && !VisitedBlocks.count(&UseMBB))
562 ? std::next(CopyI->getIterator())
563 : UseMBB.instr_begin(),
564 MIE = UseMBB.instr_end();
565 MII != MIE;) {
566 MachineInstr &MI = *MII++;
567 // If we are in the original copy block and encounter either the copy
568 // def or the copy itself, break so that we don't re-process any part of
569 // the block or process the instructions in the range that was copied
570 // over.
571 if (&MI == CopyI || &MI == &CopyDefI) {
572 assert(&UseMBB == &MBB && VisitedBlocks.count(&MBB) &&
573 "Should only encounter these on the second pass over the "
574 "original block.");
575 break;
576 }
577
578 MachineOperand *FlagUse =
579 MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr);
580 FlagsKilled = MI.modifiesRegister(X86::EFLAGS, TRI);
581
582 if (!FlagUse && FlagsKilled)
583 break;
584 else if (!FlagUse)
585 continue;
586
587 LLVM_DEBUG(dbgs() << " Rewriting use: "; MI.dump());
588
589 // Check the kill flag before we rewrite as that may change it.
590 if (FlagUse->isKill())
591 FlagsKilled = true;
592
593 // Once we encounter a branch, the rest of the instructions must also be
594 // branches. We can't rewrite in place here, so we handle them below.
595 //
596 // Note that we don't have to handle tail calls here, even conditional
597 // tail calls, as those are not introduced into the X86 MI until post-RA
598 // branch folding or black placement. As a consequence, we get to deal
599 // with the simpler formulation of conditional branches followed by tail
600 // calls.
602 auto JmpIt = MI.getIterator();
603 do {
604 JmpIs.push_back(&*JmpIt);
605 ++JmpIt;
606 } while (JmpIt != UseMBB.instr_end() &&
608 break;
609 }
610
611 // Otherwise we can just rewrite in-place.
612 unsigned Opc = MI.getOpcode();
613 if (Opc == TargetOpcode::COPY) {
614 // Just replace this copy with the original copy def.
615 MRI->replaceRegWith(MI.getOperand(0).getReg(),
616 CopyDefI.getOperand(0).getReg());
617 MI.eraseFromParent();
618 } else if (X86::isSETCC(Opc)) {
619 rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, CondRegs);
620 } else if (isArithmeticOp(Opc)) {
621 rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, CondRegs);
622 } else {
623 rewriteMI(*TestMBB, TestPos, TestLoc, MI, CondRegs);
624 }
625
626 // If this was the last use of the flags, we're done.
627 if (FlagsKilled)
628 break;
629 }
630
631 // If the flags were killed, we're done with this block.
632 if (FlagsKilled)
633 continue;
634
635 // Otherwise we need to scan successors for ones where the flags live-in
636 // and queue those up for processing.
637 for (MachineBasicBlock *SuccMBB : UseMBB.successors())
638 if (SuccMBB->isLiveIn(X86::EFLAGS) &&
639 VisitedBlocks.insert(SuccMBB).second) {
640 // We currently don't do any PHI insertion and so we require that the
641 // test basic block dominates all of the use basic blocks. Further, we
642 // can't have a cycle from the test block back to itself as that would
643 // create a cycle requiring a PHI to break it.
644 //
645 // We could in theory do PHI insertion here if it becomes useful by
646 // just taking undef values in along every edge that we don't trace
647 // this EFLAGS copy along. This isn't as bad as fully general PHI
648 // insertion, but still seems like a great deal of complexity.
649 //
650 // Because it is theoretically possible that some earlier MI pass or
651 // other lowering transformation could induce this to happen, we do
652 // a hard check even in non-debug builds here.
653 if (SuccMBB == TestMBB || !MDT->dominates(TestMBB, SuccMBB)) {
654 LLVM_DEBUG({
655 dbgs()
656 << "ERROR: Encountered use that is not dominated by our test "
657 "basic block! Rewriting this would require inserting PHI "
658 "nodes to track the flag state across the CFG.\n\nTest "
659 "block:\n";
660 TestMBB->dump();
661 dbgs() << "Use block:\n";
662 SuccMBB->dump();
663 });
665 "Cannot lower EFLAGS copy when original copy def "
666 "does not dominate all uses.");
667 }
668
669 Blocks.push_back(SuccMBB);
670
671 // After this, EFLAGS will be recreated before each use.
672 SuccMBB->removeLiveIn(X86::EFLAGS);
673 }
674 } while (!Blocks.empty());
675
676 // Now rewrite the jumps that use the flags. These we handle specially
677 // because if there are multiple jumps in a single basic block we'll have
678 // to do surgery on the CFG.
679 MachineBasicBlock *LastJmpMBB = nullptr;
680 for (MachineInstr *JmpI : JmpIs) {
681 // Past the first jump within a basic block we need to split the blocks
682 // apart.
683 if (JmpI->getParent() == LastJmpMBB)
684 splitBlock(*JmpI->getParent(), *JmpI, *TII);
685 else
686 LastJmpMBB = JmpI->getParent();
687
688 rewriteMI(*TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
689 }
690
691 // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if
692 // the copy's def operand is itself a kill.
693 }
694
695#ifndef NDEBUG
696 for (MachineBasicBlock &MBB : MF)
697 for (MachineInstr &MI : MBB)
698 if (MI.getOpcode() == TargetOpcode::COPY &&
699 (MI.getOperand(0).getReg() == X86::EFLAGS ||
700 MI.getOperand(1).getReg() == X86::EFLAGS)) {
701 LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: ";
702 MI.dump());
703 llvm_unreachable("Unlowered EFLAGS copy!");
704 }
705#endif
706
707 return true;
708}
709
710/// Collect any conditions that have already been set in registers so that we
711/// can re-use them rather than adding duplicates.
712CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
714 CondRegArray CondRegs = {};
715
716 // Scan backwards across the range of instructions with live EFLAGS.
717 for (MachineInstr &MI :
720 if (Cond != X86::COND_INVALID && !MI.mayStore() &&
721 MI.getOperand(0).isReg() && MI.getOperand(0).getReg().isVirtual()) {
722 assert(MI.getOperand(0).isDef() &&
723 "A non-storing SETcc should always define a register!");
724 CondRegs[Cond] = MI.getOperand(0).getReg();
725 }
726
727 // Stop scanning when we see the first definition of the EFLAGS as prior to
728 // this we would potentially capture the wrong flag state.
729 if (MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr))
730 break;
731 }
732 return CondRegs;
733}
734
735Register X86FlagsCopyLoweringPass::promoteCondToReg(
737 const DebugLoc &TestLoc, X86::CondCode Cond) {
738 Register Reg = MRI->createVirtualRegister(PromoteRC);
739 auto SetI = BuildMI(TestMBB, TestPos, TestLoc, TII->get(X86::SETCCr), Reg)
740 .addImm(Cond);
741 (void)SetI;
742 LLVM_DEBUG(dbgs() << " save cond: "; SetI->dump());
743 ++NumSetCCsInserted;
744 return Reg;
745}
746
747std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg(
749 const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) {
750 unsigned &CondReg = CondRegs[Cond];
751 unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)];
752 if (!CondReg && !InvCondReg)
753 CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
754
755 if (CondReg)
756 return {CondReg, false};
757 else
758 return {InvCondReg, true};
759}
760
761void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB,
763 const DebugLoc &Loc, unsigned Reg) {
764 auto TestI =
765 BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg);
766 (void)TestI;
767 LLVM_DEBUG(dbgs() << " test cond: "; TestI->dump());
768 ++NumTestsInserted;
769}
770
771void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &MBB,
773 const DebugLoc &Loc,
775 CondRegArray &CondRegs) {
777 // Note that we can't usefully rewrite this to the inverse without complex
778 // analysis of the users of the setCC. Largely we rely on duplicates which
779 // could have been avoided already being avoided here.
780 unsigned &CondReg = CondRegs[Cond];
781 if (!CondReg)
782 CondReg = promoteCondToReg(MBB, Pos, Loc, Cond);
783
784 // Rewriting a register def is trivial: we just replace the register and
785 // remove the setcc.
786 if (!MI.mayStore()) {
787 assert(MI.getOperand(0).isReg() &&
788 "Cannot have a non-register defined operand to SETcc!");
789 Register OldReg = MI.getOperand(0).getReg();
790 // Drop Kill flags on the old register before replacing. CondReg may have
791 // a longer live range.
792 MRI->clearKillFlags(OldReg);
793 MRI->replaceRegWith(OldReg, CondReg);
794 MI.eraseFromParent();
795 return;
796 }
797
798 // Otherwise, we need to emit a store.
799 auto MIB = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
800 TII->get(X86::MOV8mr));
801 // Copy the address operands.
802 for (int i = 0; i < X86::AddrNumOperands; ++i)
803 MIB.add(MI.getOperand(i));
804
805 MIB.addReg(CondReg);
806 MIB.setMemRefs(MI.memoperands());
807 MI.eraseFromParent();
808}
809
810void X86FlagsCopyLoweringPass::rewriteArithmetic(
812 const DebugLoc &Loc, MachineInstr &MI, CondRegArray &CondRegs) {
813 // Arithmetic is either reading CF or OF.
814 X86::CondCode Cond = X86::COND_B; // CF == 1
815 // The addend to use to reset CF or OF when added to the flag value.
816 // Set up an addend that when one is added will need a carry due to not
817 // having a higher bit available.
818 int Addend = 255;
819
820 // Now get a register that contains the value of the flag input to the
821 // arithmetic. We require exactly this flag to simplify the arithmetic
822 // required to materialize it back into the flag.
823 unsigned &CondReg = CondRegs[Cond];
824 if (!CondReg)
825 CondReg = promoteCondToReg(MBB, Pos, Loc, Cond);
826
827 // Insert an instruction that will set the flag back to the desired value.
828 Register TmpReg = MRI->createVirtualRegister(PromoteRC);
829 auto AddI =
830 BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
831 TII->get(Subtarget->hasNDD() ? X86::ADD8ri_ND : X86::ADD8ri))
832 .addDef(TmpReg, RegState::Dead)
833 .addReg(CondReg)
834 .addImm(Addend);
835 (void)AddI;
836 LLVM_DEBUG(dbgs() << " add cond: "; AddI->dump());
837 ++NumAddsInserted;
838 MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
839}
840
842#define FROM_TO(A, B) \
843 case X86::CMOV##A##_Fp32: \
844 case X86::CMOV##A##_Fp64: \
845 case X86::CMOV##A##_Fp80: \
846 return X86::COND_##B;
847
848 switch (Opc) {
849 default:
850 return X86::COND_INVALID;
851 FROM_TO(B, B)
852 FROM_TO(E, E)
853 FROM_TO(P, P)
854 FROM_TO(BE, BE)
855 FROM_TO(NB, AE)
856 FROM_TO(NE, NE)
857 FROM_TO(NP, NP)
858 FROM_TO(NBE, A)
859 }
860#undef FROM_TO
861}
862
863static unsigned getOpcodeWithCC(unsigned Opc, X86::CondCode CC) {
864 assert((CC == X86::COND_E || CC == X86::COND_NE) && "Unexpected CC");
865#define CASE(A) \
866 case X86::CMOVB_##A: \
867 case X86::CMOVE_##A: \
868 case X86::CMOVP_##A: \
869 case X86::CMOVBE_##A: \
870 case X86::CMOVNB_##A: \
871 case X86::CMOVNE_##A: \
872 case X86::CMOVNP_##A: \
873 case X86::CMOVNBE_##A: \
874 return (CC == X86::COND_E) ? X86::CMOVE_##A : X86::CMOVNE_##A;
875 switch (Opc) {
876 default:
877 llvm_unreachable("Unexpected opcode");
878 CASE(Fp32)
879 CASE(Fp64)
880 CASE(Fp80)
881 }
882#undef CASE
883}
884
885void X86FlagsCopyLoweringPass::rewriteMI(MachineBasicBlock &MBB,
887 const DebugLoc &Loc, MachineInstr &MI,
888 CondRegArray &CondRegs) {
889 // First get the register containing this specific condition.
890 bool IsImplicitCC = false;
892 if (CC == X86::COND_INVALID) {
893 CC = getImplicitCondFromMI(MI.getOpcode());
894 IsImplicitCC = true;
895 }
896 assert(CC != X86::COND_INVALID && "Unknown EFLAG user!");
897 unsigned CondReg;
898 bool Inverted;
899 std::tie(CondReg, Inverted) =
900 getCondOrInverseInReg(MBB, Pos, Loc, CC, CondRegs);
901
902 // Insert a direct test of the saved register.
903 insertTest(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), CondReg);
904
905 // Rewrite the instruction to use the !ZF flag from the test, and then kill
906 // its use of the flags afterward.
907 X86::CondCode NewCC = Inverted ? X86::COND_E : X86::COND_NE;
908 if (IsImplicitCC)
909 MI.setDesc(TII->get(getOpcodeWithCC(MI.getOpcode(), NewCC)));
910 else
911 MI.getOperand(MI.getDesc().getNumOperands() - 1).setImm(NewCC);
912
913 MI.findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
914 LLVM_DEBUG(dbgs() << " fixed instruction: "; MI.dump());
915}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
static const HTTPClientCleanup Cleanup
Definition: HTTPClient.cpp:42
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
#define P(N)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static X86::CondCode getImplicitCondFromMI(unsigned Opc)
@ InevitableClobber
@ EvitableClobber
#define FROM_TO(A, B)
static MachineBasicBlock & splitBlock(MachineBasicBlock &MBB, MachineInstr &SplitI, const X86InstrInfo &TII)
static unsigned getOpcodeWithCC(unsigned Opc, X86::CondCode CC)
#define CASE(A)
static bool isArithmeticOp(unsigned Opc)
#define DEBUG_TYPE
static EFLAGSClobber getClobberType(const MachineInstr &MI)
X86 EFLAGS copy lowering
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
void normalizeSuccProbs()
Normalize probabilities of all successors so that the sum of them becomes one.
instr_iterator instr_begin()
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void copySuccessor(const MachineBasicBlock *Orig, succ_iterator I)
Copy a successor (and any probability info) from original block to this block's.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:988
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void dump() const
Definition: Pass.cpp:136
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:81
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Dead
Unused definition.
Reg
All possible values of the reg field in the ModR/M byte.
CondCode getCondFromBranch(const MachineInstr &MI)
@ AddrNumOperands
Definition: X86BaseInfo.h:36
CondCode getCondFromMI(const MachineInstr &MI)
Return the condition code of the instruction.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
CondCode getCondFromSETCC(const MachineInstr &MI)
unsigned getNFVariant(unsigned Opc)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
FunctionPass * createX86FlagsCopyLoweringPass()
Return a pass that lowers EFLAGS copy pseudo instructions.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
idf_iterator< T > idf_end(const T &G)
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1841
idf_iterator< T > idf_begin(const T &G)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766