LLVM 20.0.0git
HexagonExpandCondsets.cpp
Go to the documentation of this file.
1//===- HexagonExpandCondsets.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// Replace mux instructions with the corresponding legal instructions.
10// It is meant to work post-SSA, but still on virtual registers. It was
11// originally placed between register coalescing and machine instruction
12// scheduler.
13// In this place in the optimization sequence, live interval analysis had
14// been performed, and the live intervals should be preserved. A large part
15// of the code deals with preserving the liveness information.
16//
17// Liveness tracking aside, the main functionality of this pass is divided
18// into two steps. The first step is to replace an instruction
19// %0 = C2_mux %1, %2, %3
20// with a pair of conditional transfers
21// %0 = A2_tfrt %1, %2
22// %0 = A2_tfrf %1, %3
23// It is the intention that the execution of this pass could be terminated
24// after this step, and the code generated would be functionally correct.
25//
26// If the uses of the source values %1 and %2 are kills, and their
27// definitions are predicable, then in the second step, the conditional
28// transfers will then be rewritten as predicated instructions. E.g.
29// %0 = A2_or %1, %2
30// %3 = A2_tfrt %99, killed %0
31// will be rewritten as
32// %3 = A2_port %99, %1, %2
33//
34// This replacement has two variants: "up" and "down". Consider this case:
35// %0 = A2_or %1, %2
36// ... [intervening instructions] ...
37// %3 = A2_tfrt %99, killed %0
38// variant "up":
39// %3 = A2_port %99, %1, %2
40// ... [intervening instructions, %0->vreg3] ...
41// [deleted]
42// variant "down":
43// [deleted]
44// ... [intervening instructions] ...
45// %3 = A2_port %99, %1, %2
46//
47// Both, one or none of these variants may be valid, and checks are made
48// to rule out inapplicable variants.
49//
50// As an additional optimization, before either of the two steps above is
51// executed, the pass attempts to coalesce the target register with one of
52// the source registers, e.g. given an instruction
53// %3 = C2_mux %0, %1, %2
54// %3 will be coalesced with either %1 or %2. If this succeeds,
55// the instruction would then be (for example)
56// %3 = C2_mux %0, %3, %2
57// and, under certain circumstances, this could result in only one predicated
58// instruction:
59// %3 = A2_tfrf %0, %2
60//
61
62// Splitting a definition of a register into two predicated transfers
63// creates a complication in liveness tracking. Live interval computation
64// will see both instructions as actual definitions, and will mark the
65// first one as dead. The definition is not actually dead, and this
66// situation will need to be fixed. For example:
67// dead %1 = A2_tfrt ... ; marked as dead
68// %1 = A2_tfrf ...
69//
70// Since any of the individual predicated transfers may end up getting
71// removed (in case it is an identity copy), some pre-existing def may
72// be marked as dead after live interval recomputation:
73// dead %1 = ... ; marked as dead
74// ...
75// %1 = A2_tfrf ... ; if A2_tfrt is removed
76// This case happens if %1 was used as a source in A2_tfrt, which means
77// that is it actually live at the A2_tfrf, and so the now dead definition
78// of %1 will need to be updated to non-dead at some point.
79//
80// This issue could be remedied by adding implicit uses to the predicated
81// transfers, but this will create a problem with subsequent predication,
82// since the transfers will no longer be possible to reorder. To avoid
83// that, the initial splitting will not add any implicit uses. These
84// implicit uses will be added later, after predication. The extra price,
85// however, is that finding the locations where the implicit uses need
86// to be added, and updating the live ranges will be more involved.
87
88#include "HexagonInstrInfo.h"
89#include "HexagonRegisterInfo.h"
90#include "llvm/ADT/DenseMap.h"
91#include "llvm/ADT/SetVector.h"
93#include "llvm/ADT/StringRef.h"
107#include "llvm/IR/DebugLoc.h"
108#include "llvm/IR/Function.h"
110#include "llvm/MC/LaneBitmask.h"
111#include "llvm/Pass.h"
113#include "llvm/Support/Debug.h"
116#include <cassert>
117#include <iterator>
118#include <map>
119#include <set>
120#include <utility>
121
122#define DEBUG_TYPE "expand-condsets"
123
124using namespace llvm;
125
126static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit",
127 cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions"));
128static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit",
129 cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings"));
130
131namespace llvm {
132
135
136} // end namespace llvm
137
138namespace {
139
140 class HexagonExpandCondsets : public MachineFunctionPass {
141 public:
142 static char ID;
143
144 HexagonExpandCondsets() : MachineFunctionPass(ID) {
145 if (OptCoaLimit.getPosition())
146 CoaLimitActive = true, CoaLimit = OptCoaLimit;
147 if (OptTfrLimit.getPosition())
148 TfrLimitActive = true, TfrLimit = OptTfrLimit;
150 }
151
152 StringRef getPassName() const override { return "Hexagon Expand Condsets"; }
153
154 void getAnalysisUsage(AnalysisUsage &AU) const override {
161 }
162
163 bool runOnMachineFunction(MachineFunction &MF) override;
164
165 private:
166 const HexagonInstrInfo *HII = nullptr;
167 const TargetRegisterInfo *TRI = nullptr;
169 MachineRegisterInfo *MRI = nullptr;
170 LiveIntervals *LIS = nullptr;
171 bool CoaLimitActive = false;
172 bool TfrLimitActive = false;
173 unsigned CoaLimit;
174 unsigned TfrLimit;
175 unsigned CoaCounter = 0;
176 unsigned TfrCounter = 0;
177
178 // FIXME: Consolidate duplicate definitions of RegisterRef
179 struct RegisterRef {
180 RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()),
181 Sub(Op.getSubReg()) {}
182 RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {}
183
184 bool operator== (RegisterRef RR) const {
185 return Reg == RR.Reg && Sub == RR.Sub;
186 }
187 bool operator!= (RegisterRef RR) const { return !operator==(RR); }
188 bool operator< (RegisterRef RR) const {
189 return Reg < RR.Reg || (Reg == RR.Reg && Sub < RR.Sub);
190 }
191
193 unsigned Sub;
194 };
195
196 using ReferenceMap = DenseMap<unsigned, unsigned>;
197 enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) };
198 enum { Exec_Then = 0x10, Exec_Else = 0x20 };
199
200 unsigned getMaskForSub(unsigned Sub);
201 bool isCondset(const MachineInstr &MI);
202 LaneBitmask getLaneMask(Register Reg, unsigned Sub);
203
204 void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec);
205 bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec);
206
207 void updateDeadsInRange(Register Reg, LaneBitmask LM, LiveRange &Range);
208 void updateKillFlags(Register Reg);
209 void updateDeadFlags(Register Reg);
210 void recalculateLiveInterval(Register Reg);
211 void removeInstr(MachineInstr &MI);
212 void updateLiveness(const std::set<Register> &RegSet, bool Recalc,
213 bool UpdateKills, bool UpdateDeads);
214 void distributeLiveIntervals(const std::set<Register> &Regs);
215
216 unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond);
217 MachineInstr *genCondTfrFor(MachineOperand &SrcOp,
218 MachineBasicBlock::iterator At, unsigned DstR,
219 unsigned DstSR, const MachineOperand &PredOp, bool PredSense,
220 bool ReadUndef, bool ImpUse);
221 bool split(MachineInstr &MI, std::set<Register> &UpdRegs);
222
223 bool isPredicable(MachineInstr *MI);
224 MachineInstr *getReachingDefForPred(RegisterRef RD,
225 MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond);
226 bool canMoveOver(MachineInstr &MI, ReferenceMap &Defs, ReferenceMap &Uses);
227 bool canMoveMemTo(MachineInstr &MI, MachineInstr &ToI, bool IsDown);
228 void predicateAt(const MachineOperand &DefOp, MachineInstr &MI,
230 const MachineOperand &PredOp, bool Cond,
231 std::set<Register> &UpdRegs);
232 void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR,
235 bool predicate(MachineInstr &TfrI, bool Cond, std::set<Register> &UpdRegs);
236 bool predicateInBlock(MachineBasicBlock &B, std::set<Register> &UpdRegs);
237
238 bool isIntReg(RegisterRef RR, unsigned &BW);
239 bool isIntraBlocks(LiveInterval &LI);
240 bool coalesceRegisters(RegisterRef R1, RegisterRef R2);
241 bool coalesceSegments(const SmallVectorImpl<MachineInstr *> &Condsets,
242 std::set<Register> &UpdRegs);
243 };
244
245} // end anonymous namespace
246
247char HexagonExpandCondsets::ID = 0;
248
249namespace llvm {
250
251 char &HexagonExpandCondsetsID = HexagonExpandCondsets::ID;
252
253} // end namespace llvm
254
255INITIALIZE_PASS_BEGIN(HexagonExpandCondsets, "expand-condsets",
256 "Hexagon Expand Condsets", false, false)
260INITIALIZE_PASS_END(HexagonExpandCondsets, "expand-condsets",
261 "Hexagon Expand Condsets", false, false)
262
263unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) {
264 switch (Sub) {
265 case Hexagon::isub_lo:
266 case Hexagon::vsub_lo:
267 return Sub_Low;
268 case Hexagon::isub_hi:
269 case Hexagon::vsub_hi:
270 return Sub_High;
271 case Hexagon::NoSubRegister:
272 return Sub_None;
273 }
274 llvm_unreachable("Invalid subregister");
275}
276
277bool HexagonExpandCondsets::isCondset(const MachineInstr &MI) {
278 unsigned Opc = MI.getOpcode();
279 switch (Opc) {
280 case Hexagon::C2_mux:
281 case Hexagon::C2_muxii:
282 case Hexagon::C2_muxir:
283 case Hexagon::C2_muxri:
284 case Hexagon::PS_pselect:
285 return true;
286 break;
287 }
288 return false;
289}
290
291LaneBitmask HexagonExpandCondsets::getLaneMask(Register Reg, unsigned Sub) {
292 assert(Reg.isVirtual());
293 return Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub)
294 : MRI->getMaxLaneMaskForVReg(Reg);
295}
296
297void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map,
298 unsigned Exec) {
299 unsigned Mask = getMaskForSub(RR.Sub) | Exec;
300 Map[RR.Reg] |= Mask;
301}
302
303bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map,
304 unsigned Exec) {
305 ReferenceMap::iterator F = Map.find(RR.Reg);
306 if (F == Map.end())
307 return false;
308 unsigned Mask = getMaskForSub(RR.Sub) | Exec;
309 if (Mask & F->second)
310 return true;
311 return false;
312}
313
314void HexagonExpandCondsets::updateKillFlags(Register Reg) {
315 auto KillAt = [this,Reg] (SlotIndex K, LaneBitmask LM) -> void {
316 // Set the <kill> flag on a use of Reg whose lane mask is contained in LM.
317 MachineInstr *MI = LIS->getInstructionFromIndex(K);
318 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
319 MachineOperand &Op = MI->getOperand(i);
320 if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg ||
321 MI->isRegTiedToDefOperand(i))
322 continue;
323 LaneBitmask SLM = getLaneMask(Reg, Op.getSubReg());
324 if ((SLM & LM) == SLM) {
325 // Only set the kill flag on the first encountered use of Reg in this
326 // instruction.
327 Op.setIsKill(true);
328 break;
329 }
330 }
331 };
332
333 LiveInterval &LI = LIS->getInterval(Reg);
334 for (auto I = LI.begin(), E = LI.end(); I != E; ++I) {
335 if (!I->end.isRegister())
336 continue;
337 // Do not mark the end of the segment as <kill>, if the next segment
338 // starts with a predicated instruction.
339 auto NextI = std::next(I);
340 if (NextI != E && NextI->start.isRegister()) {
341 MachineInstr *DefI = LIS->getInstructionFromIndex(NextI->start);
342 if (HII->isPredicated(*DefI))
343 continue;
344 }
345 bool WholeReg = true;
346 if (LI.hasSubRanges()) {
347 auto EndsAtI = [I] (LiveInterval::SubRange &S) -> bool {
348 LiveRange::iterator F = S.find(I->end);
349 return F != S.end() && I->end == F->end;
350 };
351 // Check if all subranges end at I->end. If so, make sure to kill
352 // the whole register.
353 for (LiveInterval::SubRange &S : LI.subranges()) {
354 if (EndsAtI(S))
355 KillAt(I->end, S.LaneMask);
356 else
357 WholeReg = false;
358 }
359 }
360 if (WholeReg)
361 KillAt(I->end, MRI->getMaxLaneMaskForVReg(Reg));
362 }
363}
364
365void HexagonExpandCondsets::updateDeadsInRange(Register Reg, LaneBitmask LM,
366 LiveRange &Range) {
367 assert(Reg.isVirtual());
368 if (Range.empty())
369 return;
370
371 // Return two booleans: { def-modifes-reg, def-covers-reg }.
372 auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> std::pair<bool,bool> {
373 if (!Op.isReg() || !Op.isDef())
374 return { false, false };
375 Register DR = Op.getReg(), DSR = Op.getSubReg();
376 if (!DR.isVirtual() || DR != Reg)
377 return { false, false };
378 LaneBitmask SLM = getLaneMask(DR, DSR);
379 LaneBitmask A = SLM & LM;
380 return { A.any(), A == SLM };
381 };
382
383 // The splitting step will create pairs of predicated definitions without
384 // any implicit uses (since implicit uses would interfere with predication).
385 // This can cause the reaching defs to become dead after live range
386 // recomputation, even though they are not really dead.
387 // We need to identify predicated defs that need implicit uses, and
388 // dead defs that are not really dead, and correct both problems.
389
390 auto Dominate = [this] (SetVector<MachineBasicBlock*> &Defs,
391 MachineBasicBlock *Dest) -> bool {
392 for (MachineBasicBlock *D : Defs) {
393 if (D != Dest && MDT->dominates(D, Dest))
394 return true;
395 }
396 MachineBasicBlock *Entry = &Dest->getParent()->front();
397 SetVector<MachineBasicBlock*> Work(Dest->pred_begin(), Dest->pred_end());
398 for (unsigned i = 0; i < Work.size(); ++i) {
399 MachineBasicBlock *B = Work[i];
400 if (Defs.count(B))
401 continue;
402 if (B == Entry)
403 return false;
404 for (auto *P : B->predecessors())
405 Work.insert(P);
406 }
407 return true;
408 };
409
410 // First, try to extend live range within individual basic blocks. This
411 // will leave us only with dead defs that do not reach any predicated
412 // defs in the same block.
415 for (auto &Seg : Range) {
416 if (!Seg.start.isRegister())
417 continue;
418 MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start);
419 Defs.insert(DefI->getParent());
420 if (HII->isPredicated(*DefI))
421 PredDefs.push_back(Seg.start);
422 }
423
425 LiveInterval &LI = LIS->getInterval(Reg);
426 LI.computeSubRangeUndefs(Undefs, LM, *MRI, *LIS->getSlotIndexes());
427
428 for (auto &SI : PredDefs) {
429 MachineBasicBlock *BB = LIS->getMBBFromIndex(SI);
430 auto P = Range.extendInBlock(Undefs, LIS->getMBBStartIdx(BB), SI);
431 if (P.first != nullptr || P.second)
432 SI = SlotIndex();
433 }
434
435 // Calculate reachability for those predicated defs that were not handled
436 // by the in-block extension.
438 for (auto &SI : PredDefs) {
439 if (!SI.isValid())
440 continue;
441 MachineBasicBlock *BB = LIS->getMBBFromIndex(SI);
442 if (BB->pred_empty())
443 continue;
444 // If the defs from this range reach SI via all predecessors, it is live.
445 // It can happen that SI is reached by the defs through some paths, but
446 // not all. In the IR coming into this optimization, SI would not be
447 // considered live, since the defs would then not jointly dominate SI.
448 // That means that SI is an overwriting def, and no implicit use is
449 // needed at this point. Do not add SI to the extension points, since
450 // extendToIndices will abort if there is no joint dominance.
451 // If the abort was avoided by adding extra undefs added to Undefs,
452 // extendToIndices could actually indicate that SI is live, contrary
453 // to the original IR.
454 if (Dominate(Defs, BB))
455 ExtTo.push_back(SI);
456 }
457
458 if (!ExtTo.empty())
459 LIS->extendToIndices(Range, ExtTo, Undefs);
460
461 // Remove <dead> flags from all defs that are not dead after live range
462 // extension, and collect all def operands. They will be used to generate
463 // the necessary implicit uses.
464 // At the same time, add <dead> flag to all defs that are actually dead.
465 // This can happen, for example, when a mux with identical inputs is
466 // replaced with a COPY: the use of the predicate register disappears and
467 // the dead can become dead.
468 std::set<RegisterRef> DefRegs;
469 for (auto &Seg : Range) {
470 if (!Seg.start.isRegister())
471 continue;
472 MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start);
473 for (auto &Op : DefI->operands()) {
474 auto P = IsRegDef(Op);
475 if (P.second && Seg.end.isDead()) {
476 Op.setIsDead(true);
477 } else if (P.first) {
478 DefRegs.insert(Op);
479 Op.setIsDead(false);
480 }
481 }
482 }
483
484 // Now, add implicit uses to each predicated def that is reached
485 // by other defs.
486 for (auto &Seg : Range) {
487 if (!Seg.start.isRegister() || !Range.liveAt(Seg.start.getPrevSlot()))
488 continue;
489 MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start);
490 if (!HII->isPredicated(*DefI))
491 continue;
492 // Construct the set of all necessary implicit uses, based on the def
493 // operands in the instruction. We need to tie the implicit uses to
494 // the corresponding defs.
495 std::map<RegisterRef,unsigned> ImpUses;
496 for (unsigned i = 0, e = DefI->getNumOperands(); i != e; ++i) {
497 MachineOperand &Op = DefI->getOperand(i);
498 if (!Op.isReg() || !DefRegs.count(Op))
499 continue;
500 if (Op.isDef()) {
501 // Tied defs will always have corresponding uses, so no extra
502 // implicit uses are needed.
503 if (!Op.isTied())
504 ImpUses.insert({Op, i});
505 } else {
506 // This function can be called for the same register with different
507 // lane masks. If the def in this instruction was for the whole
508 // register, we can get here more than once. Avoid adding multiple
509 // implicit uses (or adding an implicit use when an explicit one is
510 // present).
511 if (Op.isTied())
512 ImpUses.erase(Op);
513 }
514 }
515 if (ImpUses.empty())
516 continue;
517 MachineFunction &MF = *DefI->getParent()->getParent();
518 for (auto [R, DefIdx] : ImpUses) {
519 MachineInstrBuilder(MF, DefI).addReg(R.Reg, RegState::Implicit, R.Sub);
520 DefI->tieOperands(DefIdx, DefI->getNumOperands()-1);
521 }
522 }
523}
524
525void HexagonExpandCondsets::updateDeadFlags(Register Reg) {
526 LiveInterval &LI = LIS->getInterval(Reg);
527 if (LI.hasSubRanges()) {
528 for (LiveInterval::SubRange &S : LI.subranges()) {
529 updateDeadsInRange(Reg, S.LaneMask, S);
530 LIS->shrinkToUses(S, Reg);
531 }
532 LI.clear();
533 LIS->constructMainRangeFromSubranges(LI);
534 } else {
535 updateDeadsInRange(Reg, MRI->getMaxLaneMaskForVReg(Reg), LI);
536 }
537}
538
539void HexagonExpandCondsets::recalculateLiveInterval(Register Reg) {
540 LIS->removeInterval(Reg);
541 LIS->createAndComputeVirtRegInterval(Reg);
542}
543
544void HexagonExpandCondsets::removeInstr(MachineInstr &MI) {
545 LIS->RemoveMachineInstrFromMaps(MI);
546 MI.eraseFromParent();
547}
548
549void HexagonExpandCondsets::updateLiveness(const std::set<Register> &RegSet,
550 bool Recalc, bool UpdateKills,
551 bool UpdateDeads) {
552 UpdateKills |= UpdateDeads;
553 for (Register R : RegSet) {
554 if (!R.isVirtual()) {
555 assert(R.isPhysical());
556 // There shouldn't be any physical registers as operands, except
557 // possibly reserved registers.
558 assert(MRI->isReserved(R));
559 continue;
560 }
561 if (Recalc)
562 recalculateLiveInterval(R);
563 if (UpdateKills)
564 MRI->clearKillFlags(R);
565 if (UpdateDeads)
566 updateDeadFlags(R);
567 // Fixing <dead> flags may extend live ranges, so reset <kill> flags
568 // after that.
569 if (UpdateKills)
570 updateKillFlags(R);
571 assert(LIS->getInterval(R).verify());
572 }
573}
574
575void HexagonExpandCondsets::distributeLiveIntervals(
576 const std::set<Register> &Regs) {
577 ConnectedVNInfoEqClasses EQC(*LIS);
578 for (Register R : Regs) {
579 if (!R.isVirtual())
580 continue;
581 LiveInterval &LI = LIS->getInterval(R);
582 unsigned NumComp = EQC.Classify(LI);
583 if (NumComp == 1)
584 continue;
585
587 const TargetRegisterClass *RC = MRI->getRegClass(LI.reg());
588 for (unsigned I = 1; I < NumComp; ++I) {
589 Register NewR = MRI->createVirtualRegister(RC);
590 NewLIs.push_back(&LIS->createEmptyInterval(NewR));
591 }
592 EQC.Distribute(LI, NewLIs.begin(), *MRI);
593 }
594}
595
596/// Get the opcode for a conditional transfer of the value in SO (source
597/// operand). The condition (true/false) is given in Cond.
598unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
599 bool IfTrue) {
600 if (SO.isReg()) {
601 MCRegister PhysR;
602 RegisterRef RS = SO;
603 if (RS.Reg.isVirtual()) {
604 const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg);
605 assert(VC->begin() != VC->end() && "Empty register class");
606 PhysR = *VC->begin();
607 } else {
608 PhysR = RS.Reg;
609 }
610 MCRegister PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub);
611 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS);
612 switch (TRI->getRegSizeInBits(*RC)) {
613 case 32:
614 return IfTrue ? Hexagon::A2_tfrt : Hexagon::A2_tfrf;
615 case 64:
616 return IfTrue ? Hexagon::A2_tfrpt : Hexagon::A2_tfrpf;
617 }
618 llvm_unreachable("Invalid register operand");
619 }
620 switch (SO.getType()) {
629 return IfTrue ? Hexagon::C2_cmoveit : Hexagon::C2_cmoveif;
630 default:
631 break;
632 }
633 llvm_unreachable("Unexpected source operand");
634}
635
636/// Generate a conditional transfer, copying the value SrcOp to the
637/// destination register DstR:DstSR, and using the predicate register from
638/// PredOp. The Cond argument specifies whether the predicate is to be
639/// if(PredOp), or if(!PredOp).
640MachineInstr *HexagonExpandCondsets::genCondTfrFor(MachineOperand &SrcOp,
642 unsigned DstR, unsigned DstSR, const MachineOperand &PredOp,
643 bool PredSense, bool ReadUndef, bool ImpUse) {
644 MachineInstr *MI = SrcOp.getParent();
645 MachineBasicBlock &B = *At->getParent();
646 const DebugLoc &DL = MI->getDebugLoc();
647
648 // Don't avoid identity copies here (i.e. if the source and the destination
649 // are the same registers). It is actually better to generate them here,
650 // since this would cause the copy to potentially be predicated in the next
651 // step. The predication will remove such a copy if it is unable to
652 /// predicate.
653
654 unsigned Opc = getCondTfrOpcode(SrcOp, PredSense);
655 unsigned DstState = RegState::Define | (ReadUndef ? RegState::Undef : 0);
656 unsigned PredState = getRegState(PredOp) & ~RegState::Kill;
658
659 if (SrcOp.isReg()) {
660 unsigned SrcState = getRegState(SrcOp);
661 if (RegisterRef(SrcOp) == RegisterRef(DstR, DstSR))
662 SrcState &= ~RegState::Kill;
663 MIB = BuildMI(B, At, DL, HII->get(Opc))
664 .addReg(DstR, DstState, DstSR)
665 .addReg(PredOp.getReg(), PredState, PredOp.getSubReg())
666 .addReg(SrcOp.getReg(), SrcState, SrcOp.getSubReg());
667 } else {
668 MIB = BuildMI(B, At, DL, HII->get(Opc))
669 .addReg(DstR, DstState, DstSR)
670 .addReg(PredOp.getReg(), PredState, PredOp.getSubReg())
671 .add(SrcOp);
672 }
673
674 LLVM_DEBUG(dbgs() << "created an initial copy: " << *MIB);
675 return &*MIB;
676}
677
678/// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function
679/// performs all necessary changes to complete the replacement.
680bool HexagonExpandCondsets::split(MachineInstr &MI,
681 std::set<Register> &UpdRegs) {
682 if (TfrLimitActive) {
683 if (TfrCounter >= TfrLimit)
684 return false;
685 TfrCounter++;
686 }
687 LLVM_DEBUG(dbgs() << "\nsplitting " << printMBBReference(*MI.getParent())
688 << ": " << MI);
689 MachineOperand &MD = MI.getOperand(0); // Definition
690 MachineOperand &MP = MI.getOperand(1); // Predicate register
691 assert(MD.isDef());
692 Register DR = MD.getReg(), DSR = MD.getSubReg();
693 bool ReadUndef = MD.isUndef();
695
696 auto updateRegs = [&UpdRegs] (const MachineInstr &MI) -> void {
697 for (auto &Op : MI.operands()) {
698 if (Op.isReg())
699 UpdRegs.insert(Op.getReg());
700 }
701 };
702
703 // If this is a mux of the same register, just replace it with COPY.
704 // Ideally, this would happen earlier, so that register coalescing would
705 // see it.
706 MachineOperand &ST = MI.getOperand(2);
707 MachineOperand &SF = MI.getOperand(3);
708 if (ST.isReg() && SF.isReg()) {
709 RegisterRef RT(ST);
710 if (RT == RegisterRef(SF)) {
711 // Copy regs to update first.
712 updateRegs(MI);
713 MI.setDesc(HII->get(TargetOpcode::COPY));
714 unsigned S = getRegState(ST);
715 while (MI.getNumOperands() > 1)
716 MI.removeOperand(MI.getNumOperands()-1);
717 MachineFunction &MF = *MI.getParent()->getParent();
718 MachineInstrBuilder(MF, MI).addReg(RT.Reg, S, RT.Sub);
719 return true;
720 }
721 }
722
723 // First, create the two invididual conditional transfers, and add each
724 // of them to the live intervals information. Do that first and then remove
725 // the old instruction from live intervals.
726 MachineInstr *TfrT =
727 genCondTfrFor(ST, At, DR, DSR, MP, true, ReadUndef, false);
728 MachineInstr *TfrF =
729 genCondTfrFor(SF, At, DR, DSR, MP, false, ReadUndef, true);
730 LIS->InsertMachineInstrInMaps(*TfrT);
731 LIS->InsertMachineInstrInMaps(*TfrF);
732
733 // Will need to recalculate live intervals for all registers in MI.
734 updateRegs(MI);
735
736 removeInstr(MI);
737 return true;
738}
739
740bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) {
741 if (HII->isPredicated(*MI) || !HII->isPredicable(*MI))
742 return false;
743 if (MI->hasUnmodeledSideEffects() || MI->mayStore())
744 return false;
745 // Reject instructions with multiple defs (e.g. post-increment loads).
746 bool HasDef = false;
747 for (auto &Op : MI->operands()) {
748 if (!Op.isReg() || !Op.isDef())
749 continue;
750 if (HasDef)
751 return false;
752 HasDef = true;
753 }
754 for (auto &Mo : MI->memoperands()) {
755 if (Mo->isVolatile() || Mo->isAtomic())
756 return false;
757 }
758 return true;
759}
760
761/// Find the reaching definition for a predicated use of RD. The RD is used
762/// under the conditions given by PredR and Cond, and this function will ignore
763/// definitions that set RD under the opposite conditions.
764MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD,
765 MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) {
766 MachineBasicBlock &B = *UseIt->getParent();
767 MachineBasicBlock::iterator I = UseIt, S = B.begin();
768 if (I == S)
769 return nullptr;
770
771 bool PredValid = true;
772 do {
773 --I;
774 MachineInstr *MI = &*I;
775 // Check if this instruction can be ignored, i.e. if it is predicated
776 // on the complementary condition.
777 if (PredValid && HII->isPredicated(*MI)) {
778 if (MI->readsRegister(PredR, /*TRI=*/nullptr) &&
779 (Cond != HII->isPredicatedTrue(*MI)))
780 continue;
781 }
782
783 // Check the defs. If the PredR is defined, invalidate it. If RD is
784 // defined, return the instruction or 0, depending on the circumstances.
785 for (auto &Op : MI->operands()) {
786 if (!Op.isReg() || !Op.isDef())
787 continue;
788 RegisterRef RR = Op;
789 if (RR.Reg == PredR) {
790 PredValid = false;
791 continue;
792 }
793 if (RR.Reg != RD.Reg)
794 continue;
795 // If the "Reg" part agrees, there is still the subregister to check.
796 // If we are looking for %1:loreg, we can skip %1:hireg, but
797 // not %1 (w/o subregisters).
798 if (RR.Sub == RD.Sub)
799 return MI;
800 if (RR.Sub == 0 || RD.Sub == 0)
801 return nullptr;
802 // We have different subregisters, so we can continue looking.
803 }
804 } while (I != S);
805
806 return nullptr;
807}
808
809/// Check if the instruction MI can be safely moved over a set of instructions
810/// whose side-effects (in terms of register defs and uses) are expressed in
811/// the maps Defs and Uses. These maps reflect the conditional defs and uses
812/// that depend on the same predicate register to allow moving instructions
813/// over instructions predicated on the opposite condition.
814bool HexagonExpandCondsets::canMoveOver(MachineInstr &MI, ReferenceMap &Defs,
815 ReferenceMap &Uses) {
816 // In order to be able to safely move MI over instructions that define
817 // "Defs" and use "Uses", no def operand from MI can be defined or used
818 // and no use operand can be defined.
819 for (auto &Op : MI.operands()) {
820 if (!Op.isReg())
821 continue;
822 RegisterRef RR = Op;
823 // For physical register we would need to check register aliases, etc.
824 // and we don't want to bother with that. It would be of little value
825 // before the actual register rewriting (from virtual to physical).
826 if (!RR.Reg.isVirtual())
827 return false;
828 // No redefs for any operand.
829 if (isRefInMap(RR, Defs, Exec_Then))
830 return false;
831 // For defs, there cannot be uses.
832 if (Op.isDef() && isRefInMap(RR, Uses, Exec_Then))
833 return false;
834 }
835 return true;
836}
837
838/// Check if the instruction accessing memory (TheI) can be moved to the
839/// location ToI.
840bool HexagonExpandCondsets::canMoveMemTo(MachineInstr &TheI, MachineInstr &ToI,
841 bool IsDown) {
842 bool IsLoad = TheI.mayLoad(), IsStore = TheI.mayStore();
843 if (!IsLoad && !IsStore)
844 return true;
845 if (HII->areMemAccessesTriviallyDisjoint(TheI, ToI))
846 return true;
847 if (TheI.hasUnmodeledSideEffects())
848 return false;
849
850 MachineBasicBlock::iterator StartI = IsDown ? TheI : ToI;
851 MachineBasicBlock::iterator EndI = IsDown ? ToI : TheI;
852 bool Ordered = TheI.hasOrderedMemoryRef();
853
854 // Search for aliased memory reference in (StartI, EndI).
855 for (MachineInstr &MI : llvm::make_range(std::next(StartI), EndI)) {
856 if (MI.hasUnmodeledSideEffects())
857 return false;
858 bool L = MI.mayLoad(), S = MI.mayStore();
859 if (!L && !S)
860 continue;
861 if (Ordered && MI.hasOrderedMemoryRef())
862 return false;
863
864 bool Conflict = (L && IsStore) || S;
865 if (Conflict)
866 return false;
867 }
868 return true;
869}
870
871/// Generate a predicated version of MI (where the condition is given via
872/// PredR and Cond) at the point indicated by Where.
873void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp,
876 const MachineOperand &PredOp, bool Cond,
877 std::set<Register> &UpdRegs) {
878 // The problem with updating live intervals is that we can move one def
879 // past another def. In particular, this can happen when moving an A2_tfrt
880 // over an A2_tfrf defining the same register. From the point of view of
881 // live intervals, these two instructions are two separate definitions,
882 // and each one starts another live segment. LiveIntervals's "handleMove"
883 // does not allow such moves, so we need to handle it ourselves. To avoid
884 // invalidating liveness data while we are using it, the move will be
885 // implemented in 4 steps: (1) add a clone of the instruction MI at the
886 // target location, (2) update liveness, (3) delete the old instruction,
887 // and (4) update liveness again.
888
889 MachineBasicBlock &B = *MI.getParent();
890 DebugLoc DL = Where->getDebugLoc(); // "Where" points to an instruction.
891 unsigned Opc = MI.getOpcode();
892 unsigned PredOpc = HII->getCondOpcode(Opc, !Cond);
893 MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc));
894 unsigned Ox = 0, NP = MI.getNumOperands();
895 // Skip all defs from MI first.
896 while (Ox < NP) {
897 MachineOperand &MO = MI.getOperand(Ox);
898 if (!MO.isReg() || !MO.isDef())
899 break;
900 Ox++;
901 }
902 // Add the new def, then the predicate register, then the rest of the
903 // operands.
904 MB.addReg(DefOp.getReg(), getRegState(DefOp), DefOp.getSubReg());
905 MB.addReg(PredOp.getReg(), PredOp.isUndef() ? RegState::Undef : 0,
906 PredOp.getSubReg());
907 while (Ox < NP) {
908 MachineOperand &MO = MI.getOperand(Ox);
909 if (!MO.isReg() || !MO.isImplicit())
910 MB.add(MO);
911 Ox++;
912 }
913 MB.cloneMemRefs(MI);
914
915 MachineInstr *NewI = MB;
916 NewI->clearKillInfo();
917 LIS->InsertMachineInstrInMaps(*NewI);
918
919 for (auto &Op : NewI->operands()) {
920 if (Op.isReg())
921 UpdRegs.insert(Op.getReg());
922 }
923}
924
925/// In the range [First, Last], rename all references to the "old" register RO
926/// to the "new" register RN, but only in instructions predicated on the given
927/// condition.
928void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN,
929 unsigned PredR, bool Cond, MachineBasicBlock::iterator First,
933 // Do not touch instructions that are not predicated, or are predicated
934 // on the opposite condition.
935 if (!HII->isPredicated(MI))
936 continue;
937 if (!MI.readsRegister(PredR, /*TRI=*/nullptr) ||
938 (Cond != HII->isPredicatedTrue(MI)))
939 continue;
940
941 for (auto &Op : MI.operands()) {
942 if (!Op.isReg() || RO != RegisterRef(Op))
943 continue;
944 Op.setReg(RN.Reg);
945 Op.setSubReg(RN.Sub);
946 // In practice, this isn't supposed to see any defs.
947 assert(!Op.isDef() && "Not expecting a def");
948 }
949 }
950}
951
952/// For a given conditional copy, predicate the definition of the source of
953/// the copy under the given condition (using the same predicate register as
954/// the copy).
955bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
956 std::set<Register> &UpdRegs) {
957 // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi).
958 unsigned Opc = TfrI.getOpcode();
959 (void)Opc;
960 assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf);
961 LLVM_DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false")
962 << ": " << TfrI);
963
964 MachineOperand &MD = TfrI.getOperand(0);
965 MachineOperand &MP = TfrI.getOperand(1);
966 MachineOperand &MS = TfrI.getOperand(2);
967 // The source operand should be a <kill>. This is not strictly necessary,
968 // but it makes things a lot simpler. Otherwise, we would need to rename
969 // some registers, which would complicate the transformation considerably.
970 if (!MS.isKill())
971 return false;
972 // Avoid predicating instructions that define a subregister if subregister
973 // liveness tracking is not enabled.
974 if (MD.getSubReg() && !MRI->shouldTrackSubRegLiveness(MD.getReg()))
975 return false;
976
977 RegisterRef RT(MS);
978 Register PredR = MP.getReg();
979 MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond);
980 if (!DefI || !isPredicable(DefI))
981 return false;
982
983 LLVM_DEBUG(dbgs() << "Source def: " << *DefI);
984
985 // Collect the information about registers defined and used between the
986 // DefI and the TfrI.
987 // Map: reg -> bitmask of subregs
988 ReferenceMap Uses, Defs;
989 MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI;
990
991 // Check if the predicate register is valid between DefI and TfrI.
992 // If it is, we can then ignore instructions predicated on the negated
993 // conditions when collecting def and use information.
994 bool PredValid = true;
995 for (MachineInstr &MI : llvm::make_range(std::next(DefIt), TfrIt)) {
996 if (!MI.modifiesRegister(PredR, nullptr))
997 continue;
998 PredValid = false;
999 break;
1000 }
1001
1002 for (MachineInstr &MI : llvm::make_range(std::next(DefIt), TfrIt)) {
1003 // If this instruction is predicated on the same register, it could
1004 // potentially be ignored.
1005 // By default assume that the instruction executes on the same condition
1006 // as TfrI (Exec_Then), and also on the opposite one (Exec_Else).
1007 unsigned Exec = Exec_Then | Exec_Else;
1008 if (PredValid && HII->isPredicated(MI) &&
1009 MI.readsRegister(PredR, /*TRI=*/nullptr))
1010 Exec = (Cond == HII->isPredicatedTrue(MI)) ? Exec_Then : Exec_Else;
1011
1012 for (auto &Op : MI.operands()) {
1013 if (!Op.isReg())
1014 continue;
1015 // We don't want to deal with physical registers. The reason is that
1016 // they can be aliased with other physical registers. Aliased virtual
1017 // registers must share the same register number, and can only differ
1018 // in the subregisters, which we are keeping track of. Physical
1019 // registers ters no longer have subregisters---their super- and
1020 // subregisters are other physical registers, and we are not checking
1021 // that.
1022 RegisterRef RR = Op;
1023 if (!RR.Reg.isVirtual())
1024 return false;
1025
1026 ReferenceMap &Map = Op.isDef() ? Defs : Uses;
1027 if (Op.isDef() && Op.isUndef()) {
1028 assert(RR.Sub && "Expecting a subregister on <def,read-undef>");
1029 // If this is a <def,read-undef>, then it invalidates the non-written
1030 // part of the register. For the purpose of checking the validity of
1031 // the move, assume that it modifies the whole register.
1032 RR.Sub = 0;
1033 }
1034 addRefToMap(RR, Map, Exec);
1035 }
1036 }
1037
1038 // The situation:
1039 // RT = DefI
1040 // ...
1041 // RD = TfrI ..., RT
1042
1043 // If the register-in-the-middle (RT) is used or redefined between
1044 // DefI and TfrI, we may not be able proceed with this transformation.
1045 // We can ignore a def that will not execute together with TfrI, and a
1046 // use that will. If there is such a use (that does execute together with
1047 // TfrI), we will not be able to move DefI down. If there is a use that
1048 // executed if TfrI's condition is false, then RT must be available
1049 // unconditionally (cannot be predicated).
1050 // Essentially, we need to be able to rename RT to RD in this segment.
1051 if (isRefInMap(RT, Defs, Exec_Then) || isRefInMap(RT, Uses, Exec_Else))
1052 return false;
1053 RegisterRef RD = MD;
1054 // If the predicate register is defined between DefI and TfrI, the only
1055 // potential thing to do would be to move the DefI down to TfrI, and then
1056 // predicate. The reaching def (DefI) must be movable down to the location
1057 // of the TfrI.
1058 // If the target register of the TfrI (RD) is not used or defined between
1059 // DefI and TfrI, consider moving TfrI up to DefI.
1060 bool CanUp = canMoveOver(TfrI, Defs, Uses);
1061 bool CanDown = canMoveOver(*DefI, Defs, Uses);
1062 // The TfrI does not access memory, but DefI could. Check if it's safe
1063 // to move DefI down to TfrI.
1064 if (DefI->mayLoadOrStore()) {
1065 if (!canMoveMemTo(*DefI, TfrI, true))
1066 CanDown = false;
1067 }
1068
1069 LLVM_DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no")
1070 << ", can move down: " << (CanDown ? "yes\n" : "no\n"));
1071 MachineBasicBlock::iterator PastDefIt = std::next(DefIt);
1072 if (CanUp)
1073 predicateAt(MD, *DefI, PastDefIt, MP, Cond, UpdRegs);
1074 else if (CanDown)
1075 predicateAt(MD, *DefI, TfrIt, MP, Cond, UpdRegs);
1076 else
1077 return false;
1078
1079 if (RT != RD) {
1080 renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt);
1081 UpdRegs.insert(RT.Reg);
1082 }
1083
1084 removeInstr(TfrI);
1085 removeInstr(*DefI);
1086 return true;
1087}
1088
1089/// Predicate all cases of conditional copies in the specified block.
1090bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B,
1091 std::set<Register> &UpdRegs) {
1092 bool Changed = false;
1094 unsigned Opc = MI.getOpcode();
1095 if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) {
1096 bool Done = predicate(MI, (Opc == Hexagon::A2_tfrt), UpdRegs);
1097 if (!Done) {
1098 // If we didn't predicate I, we may need to remove it in case it is
1099 // an "identity" copy, e.g. %1 = A2_tfrt %2, %1.
1100 if (RegisterRef(MI.getOperand(0)) == RegisterRef(MI.getOperand(2))) {
1101 for (auto &Op : MI.operands()) {
1102 if (Op.isReg())
1103 UpdRegs.insert(Op.getReg());
1104 }
1105 removeInstr(MI);
1106 }
1107 }
1108 Changed |= Done;
1109 }
1110 }
1111 return Changed;
1112}
1113
1114bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) {
1115 if (!RR.Reg.isVirtual())
1116 return false;
1117 const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg);
1118 if (RC == &Hexagon::IntRegsRegClass) {
1119 BW = 32;
1120 return true;
1121 }
1122 if (RC == &Hexagon::DoubleRegsRegClass) {
1123 BW = (RR.Sub != 0) ? 32 : 64;
1124 return true;
1125 }
1126 return false;
1127}
1128
1129bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) {
1130 for (LiveRange::Segment &LR : LI) {
1131 // Range must start at a register...
1132 if (!LR.start.isRegister())
1133 return false;
1134 // ...and end in a register or in a dead slot.
1135 if (!LR.end.isRegister() && !LR.end.isDead())
1136 return false;
1137 }
1138 return true;
1139}
1140
1141bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
1142 if (CoaLimitActive) {
1143 if (CoaCounter >= CoaLimit)
1144 return false;
1145 CoaCounter++;
1146 }
1147 unsigned BW1, BW2;
1148 if (!isIntReg(R1, BW1) || !isIntReg(R2, BW2) || BW1 != BW2)
1149 return false;
1150 if (MRI->isLiveIn(R1.Reg))
1151 return false;
1152 if (MRI->isLiveIn(R2.Reg))
1153 return false;
1154
1155 LiveInterval &L1 = LIS->getInterval(R1.Reg);
1156 LiveInterval &L2 = LIS->getInterval(R2.Reg);
1157 if (L2.empty())
1158 return false;
1159 if (L1.hasSubRanges() || L2.hasSubRanges())
1160 return false;
1161 bool Overlap = L1.overlaps(L2);
1162
1163 LLVM_DEBUG(dbgs() << "compatible registers: ("
1164 << (Overlap ? "overlap" : "disjoint") << ")\n "
1165 << printReg(R1.Reg, TRI, R1.Sub) << " " << L1 << "\n "
1166 << printReg(R2.Reg, TRI, R2.Sub) << " " << L2 << "\n");
1167 if (R1.Sub || R2.Sub)
1168 return false;
1169 if (Overlap)
1170 return false;
1171
1172 // Coalescing could have a negative impact on scheduling, so try to limit
1173 // to some reasonable extent. Only consider coalescing segments, when one
1174 // of them does not cross basic block boundaries.
1175 if (!isIntraBlocks(L1) && !isIntraBlocks(L2))
1176 return false;
1177
1178 MRI->replaceRegWith(R2.Reg, R1.Reg);
1179
1180 // Move all live segments from L2 to L1.
1181 using ValueInfoMap = DenseMap<VNInfo *, VNInfo *>;
1182 ValueInfoMap VM;
1183 for (LiveRange::Segment &I : L2) {
1184 VNInfo *NewVN, *OldVN = I.valno;
1185 ValueInfoMap::iterator F = VM.find(OldVN);
1186 if (F == VM.end()) {
1187 NewVN = L1.getNextValue(I.valno->def, LIS->getVNInfoAllocator());
1188 VM.insert(std::make_pair(OldVN, NewVN));
1189 } else {
1190 NewVN = F->second;
1191 }
1192 L1.addSegment(LiveRange::Segment(I.start, I.end, NewVN));
1193 }
1194 while (!L2.empty())
1195 L2.removeSegment(*L2.begin());
1196 LIS->removeInterval(R2.Reg);
1197
1198 updateKillFlags(R1.Reg);
1199 LLVM_DEBUG(dbgs() << "coalesced: " << L1 << "\n");
1200 assert(L1.verify());
1201
1202 return true;
1203}
1204
1205/// Attempt to coalesce one of the source registers to a MUX instruction with
1206/// the destination register. This could lead to having only one predicated
1207/// instruction in the end instead of two.
1208bool HexagonExpandCondsets::coalesceSegments(
1210 std::set<Register> &UpdRegs) {
1212 for (MachineInstr *MI : Condsets) {
1213 MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3);
1214 if (!S1.isReg() && !S2.isReg())
1215 continue;
1216 TwoRegs.push_back(MI);
1217 }
1218
1219 bool Changed = false;
1220 for (MachineInstr *CI : TwoRegs) {
1221 RegisterRef RD = CI->getOperand(0);
1222 RegisterRef RP = CI->getOperand(1);
1223 MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3);
1224 bool Done = false;
1225 // Consider this case:
1226 // %1 = instr1 ...
1227 // %2 = instr2 ...
1228 // %0 = C2_mux ..., %1, %2
1229 // If %0 was coalesced with %1, we could end up with the following
1230 // code:
1231 // %0 = instr1 ...
1232 // %2 = instr2 ...
1233 // %0 = A2_tfrf ..., %2
1234 // which will later become:
1235 // %0 = instr1 ...
1236 // %0 = instr2_cNotPt ...
1237 // i.e. there will be an unconditional definition (instr1) of %0
1238 // followed by a conditional one. The output dependency was there before
1239 // and it unavoidable, but if instr1 is predicable, we will no longer be
1240 // able to predicate it here.
1241 // To avoid this scenario, don't coalesce the destination register with
1242 // a source register that is defined by a predicable instruction.
1243 if (S1.isReg()) {
1244 RegisterRef RS = S1;
1245 MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true);
1246 if (!RDef || !HII->isPredicable(*RDef)) {
1247 Done = coalesceRegisters(RD, RegisterRef(S1));
1248 if (Done) {
1249 UpdRegs.insert(RD.Reg);
1250 UpdRegs.insert(S1.getReg());
1251 }
1252 }
1253 }
1254 if (!Done && S2.isReg()) {
1255 RegisterRef RS = S2;
1256 MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false);
1257 if (!RDef || !HII->isPredicable(*RDef)) {
1258 Done = coalesceRegisters(RD, RegisterRef(S2));
1259 if (Done) {
1260 UpdRegs.insert(RD.Reg);
1261 UpdRegs.insert(S2.getReg());
1262 }
1263 }
1264 }
1265 Changed |= Done;
1266 }
1267 return Changed;
1268}
1269
1270bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
1271 if (skipFunction(MF.getFunction()))
1272 return false;
1273
1274 HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo());
1276 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
1277 LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
1278 MRI = &MF.getRegInfo();
1279
1280 LLVM_DEBUG(LIS->print(dbgs() << "Before expand-condsets\n"));
1281
1282 bool Changed = false;
1283 std::set<Register> CoalUpd, PredUpd;
1284
1286 for (auto &B : MF) {
1287 for (auto &I : B) {
1288 if (isCondset(I))
1289 Condsets.push_back(&I);
1290 }
1291 }
1292
1293 // Try to coalesce the target of a mux with one of its sources.
1294 // This could eliminate a register copy in some circumstances.
1295 Changed |= coalesceSegments(Condsets, CoalUpd);
1296
1297 // Update kill flags on all source operands. This is done here because
1298 // at this moment (when expand-condsets runs), there are no kill flags
1299 // in the IR (they have been removed by live range analysis).
1300 // Updating them right before we split is the easiest, because splitting
1301 // adds definitions which would interfere with updating kills afterwards.
1302 std::set<Register> KillUpd;
1303 for (MachineInstr *MI : Condsets) {
1304 for (MachineOperand &Op : MI->operands()) {
1305 if (Op.isReg() && Op.isUse()) {
1306 if (!CoalUpd.count(Op.getReg()))
1307 KillUpd.insert(Op.getReg());
1308 }
1309 }
1310 }
1311 updateLiveness(KillUpd, false, true, false);
1312 LLVM_DEBUG(LIS->print(dbgs() << "After coalescing\n"));
1313
1314 // First, simply split all muxes into a pair of conditional transfers
1315 // and update the live intervals to reflect the new arrangement. The
1316 // goal is to update the kill flags, since predication will rely on
1317 // them.
1318 for (MachineInstr *MI : Condsets)
1319 Changed |= split(*MI, PredUpd);
1320 Condsets.clear(); // The contents of Condsets are invalid here anyway.
1321
1322 // Do not update live ranges after splitting. Recalculation of live
1323 // intervals removes kill flags, which were preserved by splitting on
1324 // the source operands of condsets. These kill flags are needed by
1325 // predication, and after splitting they are difficult to recalculate
1326 // (because of predicated defs), so make sure they are left untouched.
1327 // Predication does not use live intervals.
1328 LLVM_DEBUG(LIS->print(dbgs() << "After splitting\n"));
1329
1330 // Traverse all blocks and collapse predicable instructions feeding
1331 // conditional transfers into predicated instructions.
1332 // Walk over all the instructions again, so we may catch pre-existing
1333 // cases that were not created in the previous step.
1334 for (auto &B : MF)
1335 Changed |= predicateInBlock(B, PredUpd);
1336 LLVM_DEBUG(LIS->print(dbgs() << "After predicating\n"));
1337
1338 PredUpd.insert(CoalUpd.begin(), CoalUpd.end());
1339 updateLiveness(PredUpd, true, true, true);
1340
1341 if (Changed)
1342 distributeLiveIntervals(PredUpd);
1343
1344 LLVM_DEBUG({
1345 if (Changed)
1346 LIS->print(dbgs() << "After expand-condsets\n");
1347 });
1348
1349 return Changed;
1350}
1351
1352//===----------------------------------------------------------------------===//
1353// Public Constructor Functions
1354//===----------------------------------------------------------------------===//
1356 return new HexagonExpandCondsets();
1357}
unsigned const MachineRegisterInfo * MRI
static const LLT S1
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
bool End
Definition: ELF_riscv.cpp:480
static Expected< BitVector > expand(StringRef S, StringRef Original)
Definition: GlobPattern.cpp:21
static cl::opt< unsigned > OptCoaLimit("expand-condsets-coa-limit", cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings"))
expand condsets
static cl::opt< unsigned > OptTfrLimit("expand-condsets-tfr-limit", cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions"))
expand Hexagon Expand Condsets
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static void updateLiveness(MachineFunction &MF)
Helper function to update the liveness information for the callee-saved registers.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ConnectedVNInfoEqClasses - Helper class that can divide VNInfos in a LiveInterval into equivalence cl...
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
A live range for subregisters.
Definition: LiveInterval.h:694
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
Register reg() const
Definition: LiveInterval.h:718
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:810
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:782
bool verify(const MachineRegisterInfo *MRI=nullptr) const
Walks the interval and assert if any invariants fail to hold.
void computeSubRangeUndefs(SmallVectorImpl< SlotIndex > &Undefs, LaneBitmask LaneMask, const MachineRegisterInfo &MRI, const SlotIndexes &Indexes) const
For a given lane mask LaneMask, compute indexes at which the lane is marked undefined by subregister ...
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
iterator addSegment(Segment S)
Add the specified Segment to this range, merging segments as appropriate.
bool empty() const
Definition: LiveInterval.h:382
bool overlaps(const LiveRange &other) const
overlaps - Return true if the intersection of the two live ranges is not empty.
Definition: LiveInterval.h:448
iterator end()
Definition: LiveInterval.h:216
iterator begin()
Definition: LiveInterval.h:215
VNInfo * getNextValue(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator)
getNextValue - Create a new value number and return it.
Definition: LiveInterval.h:331
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:578
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:691
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
void clearKillInfo()
Clears kill flags on all operands.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
Register getReg() const
getReg - Returns the register number.
@ MO_Immediate
Immediate operand.
@ MO_ConstantPoolIndex
Address of indexed Constant in Constant Pool.
@ MO_GlobalAddress
Address of a global value.
@ MO_BlockAddress
Address of a basic block.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_JumpTableIndex
Address of indexed Jump Table for switch.
@ MO_TargetIndex
Target-dependent index+offset operand.
@ MO_FPImmediate
Floating-point immediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
bool empty() const
Definition: SmallVector.h:81
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
Register getReg() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool isIntReg(MCRegister Reg)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Done
Definition: Threading.h:61
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2082
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
FunctionPass * createHexagonExpandCondsets()
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
void initializeHexagonExpandCondsetsPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
char & HexagonExpandCondsetsID
DWARFExpression::Operation Op
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
This represents a simple continuous liveness interval for a value.
Definition: LiveInterval.h:162