LLVM 22.0.0git
HexagonGenInsert.cpp
Go to the documentation of this file.
1//===- HexagonGenInsert.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "BitTracker.h"
10#include "Hexagon.h"
11#include "HexagonBitTracker.h"
12#include "HexagonInstrInfo.h"
13#include "HexagonRegisterInfo.h"
14#include "HexagonSubtarget.h"
15#include "llvm/ADT/BitVector.h"
16#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/StringRef.h"
31#include "llvm/IR/DebugLoc.h"
33#include "llvm/Pass.h"
35#include "llvm/Support/Debug.h"
37#include "llvm/Support/Timer.h"
39#include <algorithm>
40#include <cassert>
41#include <cstdint>
42#include <iterator>
43#include <utility>
44#include <vector>
45
46#define DEBUG_TYPE "hexinsert"
47
48using namespace llvm;
49
51 VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), cl::Hidden,
52 cl::desc("Vreg# cutoff for insert generation."));
53// The distance cutoff is selected based on the precheckin-perf results:
54// cutoffs 20, 25, 35, and 40 are worse than 30.
56 VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden,
57 cl::desc("Vreg distance cutoff for insert "
58 "generation."));
59
60// Limit the container sizes for extreme cases where we run out of memory.
62 MaxORLSize("insert-max-orl", cl::init(4096), cl::Hidden,
63 cl::desc("Maximum size of OrderedRegisterList"));
64static cl::opt<unsigned> MaxIFMSize("insert-max-ifmap", cl::init(1024),
66 cl::desc("Maximum size of IFMap"));
67
68static cl::opt<bool> OptTiming("insert-timing", cl::Hidden,
69 cl::desc("Enable timing of insert generation"));
70static cl::opt<bool>
71 OptTimingDetail("insert-timing-detail", cl::Hidden,
72 cl::desc("Enable detailed timing of insert "
73 "generation"));
74
75static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden);
76static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden);
77// Whether to construct constant values via "insert". Could eliminate constant
78// extenders, but often not practical.
79static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden);
80
81// The preprocessor gets confused when the DEBUG macro is passed larger
82// chunks of code. Use this function to detect debugging.
83inline static bool isDebug() {
84#ifndef NDEBUG
86#else
87 return false;
88#endif
89}
90
91namespace {
92
93 // Set of virtual registers, based on BitVector.
94 struct RegisterSet : private BitVector {
95 RegisterSet() = default;
96 explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
97 RegisterSet(const RegisterSet &RS) = default;
98 RegisterSet &operator=(const RegisterSet &RS) = default;
99
100 using BitVector::clear;
101
102 unsigned find_first() const {
104 if (First < 0)
105 return 0;
106 return x2v(First);
107 }
108
109 unsigned find_next(unsigned Prev) const {
110 int Next = BitVector::find_next(v2x(Prev));
111 if (Next < 0)
112 return 0;
113 return x2v(Next);
114 }
115
116 RegisterSet &insert(unsigned R) {
117 unsigned Idx = v2x(R);
118 ensure(Idx);
119 return static_cast<RegisterSet&>(BitVector::set(Idx));
120 }
121 RegisterSet &remove(unsigned R) {
122 unsigned Idx = v2x(R);
123 if (Idx >= size())
124 return *this;
125 return static_cast<RegisterSet&>(BitVector::reset(Idx));
126 }
127
128 RegisterSet &insert(const RegisterSet &Rs) {
129 return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
130 }
131 RegisterSet &remove(const RegisterSet &Rs) {
132 return static_cast<RegisterSet&>(BitVector::reset(Rs));
133 }
134
135 reference operator[](unsigned R) {
136 unsigned Idx = v2x(R);
137 ensure(Idx);
138 return BitVector::operator[](Idx);
139 }
140 bool operator[](unsigned R) const {
141 unsigned Idx = v2x(R);
142 assert(Idx < size());
143 return BitVector::operator[](Idx);
144 }
145 bool has(unsigned R) const {
146 unsigned Idx = v2x(R);
147 if (Idx >= size())
148 return false;
149 return BitVector::test(Idx);
150 }
151
152 bool empty() const {
153 return !BitVector::any();
154 }
155 bool includes(const RegisterSet &Rs) const {
156 // A.BitVector::test(B) <=> A-B != {}
157 return !Rs.BitVector::test(*this);
158 }
159 bool intersects(const RegisterSet &Rs) const {
160 return BitVector::anyCommon(Rs);
161 }
162
163 private:
164 void ensure(unsigned Idx) {
165 if (size() <= Idx)
166 resize(std::max(Idx+1, 32U));
167 }
168
169 static inline unsigned v2x(unsigned v) {
170 return Register(v).virtRegIndex();
171 }
172
173 static inline unsigned x2v(unsigned x) {
174 return Register::index2VirtReg(x);
175 }
176 };
177
178 struct PrintRegSet {
179 PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
180 : RS(S), TRI(RI) {}
181
182 friend raw_ostream &operator<< (raw_ostream &OS,
183 const PrintRegSet &P);
184
185 private:
186 const RegisterSet &RS;
187 const TargetRegisterInfo *TRI;
188 };
189
190 raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
191 OS << '{';
192 for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
193 OS << ' ' << printReg(R, P.TRI);
194 OS << " }";
195 return OS;
196 }
197
198 // A convenience class to associate unsigned numbers (such as virtual
199 // registers) with unsigned numbers.
200 struct UnsignedMap : public DenseMap<unsigned,unsigned> {
201 UnsignedMap() = default;
202
203 private:
204 using BaseType = DenseMap<unsigned, unsigned>;
205 };
206
207 // A utility to establish an ordering between virtual registers:
208 // VRegA < VRegB <=> RegisterOrdering[VRegA] < RegisterOrdering[VRegB]
209 // This is meant as a cache for the ordering of virtual registers defined
210 // by a potentially expensive comparison function, or obtained by a proce-
211 // dure that should not be repeated each time two registers are compared.
212 struct RegisterOrdering : public UnsignedMap {
213 RegisterOrdering() = default;
214
215 unsigned operator[](unsigned VR) const {
216 const_iterator F = find(VR);
217 assert(F != end());
218 return F->second;
219 }
220
221 // Add operator(), so that objects of this class can be used as
222 // comparators in std::sort et al.
223 bool operator() (unsigned VR1, unsigned VR2) const {
224 return operator[](VR1) < operator[](VR2);
225 }
226 };
227
228 // Ordering of bit values. This class does not have operator[], but
229 // is supplies a comparison operator() for use in std:: algorithms.
230 // The order is as follows:
231 // - 0 < 1 < ref
232 // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg),
233 // or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos.
234 struct BitValueOrdering {
235 BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {}
236
237 bool operator() (const BitTracker::BitValue &V1,
238 const BitTracker::BitValue &V2) const;
239
240 const RegisterOrdering &BaseOrd;
241 };
242
243} // end anonymous namespace
244
245bool BitValueOrdering::operator() (const BitTracker::BitValue &V1,
246 const BitTracker::BitValue &V2) const {
247 if (V1 == V2)
248 return false;
249 // V1==0 => true, V2==0 => false
250 if (V1.is(0) || V2.is(0))
251 return V1.is(0);
252 // Neither of V1,V2 is 0, and V1!=V2.
253 // V2==1 => false, V1==1 => true
254 if (V2.is(1) || V1.is(1))
255 return !V2.is(1);
256 // Both V1,V2 are refs.
257 unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg];
258 if (Ind1 != Ind2)
259 return Ind1 < Ind2;
260 // If V1.Pos==V2.Pos
261 assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different");
262 return V1.RefI.Pos < V2.RefI.Pos;
263}
264
265namespace {
266
267 // Cache for the BitTracker's cell map. Map lookup has a logarithmic
268 // complexity, this class will memoize the lookup results to reduce
269 // the access time for repeated lookups of the same cell.
270 struct CellMapShadow {
271 CellMapShadow(const BitTracker &T) : BT(T) {}
272
273 const BitTracker::RegisterCell &lookup(unsigned VR) {
274 unsigned RInd = Register(VR).virtRegIndex();
275 // Grow the vector to at least 32 elements.
276 if (RInd >= CVect.size())
277 CVect.resize(std::max(RInd+16, 32U), nullptr);
278 const BitTracker::RegisterCell *CP = CVect[RInd];
279 if (CP == nullptr)
280 CP = CVect[RInd] = &BT.lookup(VR);
281 return *CP;
282 }
283
284 const BitTracker &BT;
285
286 private:
287 using CellVectType = std::vector<const BitTracker::RegisterCell *>;
288
289 CellVectType CVect;
290 };
291
292 // Comparator class for lexicographic ordering of virtual registers
293 // according to the corresponding BitTracker::RegisterCell objects.
294 struct RegisterCellLexCompare {
295 RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M)
296 : BitOrd(BO), CM(M) {}
297
298 bool operator() (unsigned VR1, unsigned VR2) const;
299
300 private:
301 const BitValueOrdering &BitOrd;
302 CellMapShadow &CM;
303 };
304
305 // Comparator class for lexicographic ordering of virtual registers
306 // according to the specified bits of the corresponding BitTracker::
307 // RegisterCell objects.
308 // Specifically, this class will be used to compare bit B of a register
309 // cell for a selected virtual register R with bit N of any register
310 // other than R.
311 struct RegisterCellBitCompareSel {
312 RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N,
313 const BitValueOrdering &BO, CellMapShadow &M)
314 : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {}
315
316 bool operator() (unsigned VR1, unsigned VR2) const;
317
318 private:
319 const unsigned SelR, SelB;
320 const unsigned BitN;
321 const BitValueOrdering &BitOrd;
322 CellMapShadow &CM;
323 };
324
325} // end anonymous namespace
326
327bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const {
328 // Ordering of registers, made up from two given orderings:
329 // - the ordering of the register numbers, and
330 // - the ordering of register cells.
331 // Def. R1 < R2 if:
332 // - cell(R1) < cell(R2), or
333 // - cell(R1) == cell(R2), and index(R1) < index(R2).
334 //
335 // For register cells, the ordering is lexicographic, with index 0 being
336 // the most significant.
337 if (VR1 == VR2)
338 return false;
339
340 const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2);
341 uint16_t W1 = RC1.width(), W2 = RC2.width();
342 for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) {
343 const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i];
344 if (V1 != V2)
345 return BitOrd(V1, V2);
346 }
347 // Cells are equal up until the common length.
348 if (W1 != W2)
349 return W1 < W2;
350
351 return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2];
352}
353
354bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const {
355 if (VR1 == VR2)
356 return false;
357 const BitTracker::RegisterCell &RC1 = CM.lookup(VR1);
358 const BitTracker::RegisterCell &RC2 = CM.lookup(VR2);
359 uint16_t W1 = RC1.width(), W2 = RC2.width();
360 uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN;
361 uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN;
362 // If Bit1 exceeds the width of VR1, then:
363 // - return false, if at the same time Bit2 exceeds VR2, or
364 // - return true, otherwise.
365 // (I.e. "a bit value that does not exist is less than any bit value
366 // that does exist".)
367 if (W1 <= Bit1)
368 return Bit2 < W2;
369 // If Bit1 is within VR1, but Bit2 is not within VR2, return false.
370 if (W2 <= Bit2)
371 return false;
372
373 const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2];
374 if (V1 != V2)
375 return BitOrd(V1, V2);
376 return false;
377}
378
379namespace {
380
381 class OrderedRegisterList {
382 using ListType = std::vector<unsigned>;
383 const unsigned MaxSize;
384
385 public:
386 OrderedRegisterList(const RegisterOrdering &RO)
387 : MaxSize(MaxORLSize), Ord(RO) {}
388
389 void insert(unsigned VR);
390 void remove(unsigned VR);
391
392 unsigned operator[](unsigned Idx) const {
393 assert(Idx < Seq.size());
394 return Seq[Idx];
395 }
396
397 unsigned size() const {
398 return Seq.size();
399 }
400
401 using iterator = ListType::iterator;
402 using const_iterator = ListType::const_iterator;
403
404 iterator begin() { return Seq.begin(); }
405 iterator end() { return Seq.end(); }
406 const_iterator begin() const { return Seq.begin(); }
407 const_iterator end() const { return Seq.end(); }
408
409 // Convenience function to convert an iterator to the corresponding index.
410 unsigned idx(iterator It) const { return It-begin(); }
411
412 private:
413 ListType Seq;
414 const RegisterOrdering &Ord;
415 };
416
417 struct PrintORL {
418 PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI)
419 : RL(L), TRI(RI) {}
420
421 friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P);
422
423 private:
424 const OrderedRegisterList &RL;
425 const TargetRegisterInfo *TRI;
426 };
427
428 raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) {
429 OS << '(';
430 OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end();
431 for (OrderedRegisterList::const_iterator I = B; I != E; ++I) {
432 if (I != B)
433 OS << ", ";
434 OS << printReg(*I, P.TRI);
435 }
436 OS << ')';
437 return OS;
438 }
439
440} // end anonymous namespace
441
442void OrderedRegisterList::insert(unsigned VR) {
443 iterator L = llvm::lower_bound(Seq, VR, Ord);
444 if (L == Seq.end())
445 Seq.push_back(VR);
446 else
447 Seq.insert(L, VR);
448
449 unsigned S = Seq.size();
450 if (S > MaxSize)
451 Seq.resize(MaxSize);
452 assert(Seq.size() <= MaxSize);
453}
454
455void OrderedRegisterList::remove(unsigned VR) {
456 iterator L = llvm::lower_bound(Seq, VR, Ord);
457 if (L != Seq.end())
458 Seq.erase(L);
459}
460
461namespace {
462
463 // A record of the insert form. The fields correspond to the operands
464 // of the "insert" instruction:
465 // ... = insert(SrcR, InsR, #Wdh, #Off)
466 struct IFRecord {
467 IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0)
468 : SrcR(SR), InsR(IR), Wdh(W), Off(O) {}
469
470 unsigned SrcR, InsR;
471 uint16_t Wdh, Off;
472 };
473
474 struct PrintIFR {
475 PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI)
476 : IFR(R), TRI(RI) {}
477
478 private:
479 friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P);
480
481 const IFRecord &IFR;
482 const TargetRegisterInfo *TRI;
483 };
484
485 raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) {
486 unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR;
487 OS << '(' << printReg(SrcR, P.TRI) << ',' << printReg(InsR, P.TRI)
488 << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')';
489 return OS;
490 }
491
492 using IFRecordWithRegSet = std::pair<IFRecord, RegisterSet>;
493
494} // end anonymous namespace
495
496namespace {
497
498 class HexagonGenInsert : public MachineFunctionPass {
499 public:
500 static char ID;
501
502 HexagonGenInsert() : MachineFunctionPass(ID) {}
503
504 StringRef getPassName() const override {
505 return "Hexagon generate \"insert\" instructions";
506 }
507
508 void getAnalysisUsage(AnalysisUsage &AU) const override {
509 AU.addRequired<MachineDominatorTreeWrapperPass>();
510 AU.addPreserved<MachineDominatorTreeWrapperPass>();
512 }
513
514 bool runOnMachineFunction(MachineFunction &MF) override;
515
516 private:
517 using PairMapType = DenseMap<std::pair<unsigned, unsigned>, unsigned>;
518
519 void buildOrderingMF(RegisterOrdering &RO) const;
520 void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const;
521 bool isIntClass(const TargetRegisterClass *RC) const;
522 bool isConstant(unsigned VR) const;
523 bool isSmallConstant(unsigned VR) const;
524 bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR,
525 uint16_t L, uint16_t S) const;
526 bool findSelfReference(unsigned VR) const;
527 bool findNonSelfReference(unsigned VR) const;
528 void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const;
529 void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const;
530 unsigned distance(const MachineBasicBlock *FromB,
531 const MachineBasicBlock *ToB, const UnsignedMap &RPO,
532 PairMapType &M) const;
533 unsigned distance(MachineBasicBlock::const_iterator FromI,
534 MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
535 PairMapType &M) const;
536 bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs);
537 void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs);
538 void findRemovableRegisters(unsigned VR, IFRecord IF,
539 RegisterSet &RMs) const;
540 void computeRemovableRegisters();
541
542 void pruneEmptyLists();
543 void pruneCoveredSets(unsigned VR);
544 void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M);
545 void pruneRegCopies(unsigned VR);
546 void pruneCandidates();
547 void selectCandidates();
548 bool generateInserts();
549
550 bool removeDeadCode(MachineDomTreeNode *N);
551
552 // IFRecord coupled with a set of potentially removable registers:
553 using IFListType = std::vector<IFRecordWithRegSet>;
554 using IFMapType = DenseMap<unsigned, IFListType>; // vreg -> IFListType
555
556 void dump_map() const;
557
558 const HexagonInstrInfo *HII = nullptr;
559 const HexagonRegisterInfo *HRI = nullptr;
560
561 MachineFunction *MFN;
562 MachineRegisterInfo *MRI;
563 MachineDominatorTree *MDT;
564 CellMapShadow *CMS;
565
566 RegisterOrdering BaseOrd;
567 RegisterOrdering CellOrd;
568 IFMapType IFMap;
569 };
570
571} // end anonymous namespace
572
573char HexagonGenInsert::ID = 0;
574
575void HexagonGenInsert::dump_map() const {
576 for (const auto &I : IFMap) {
577 dbgs() << " " << printReg(I.first, HRI) << ":\n";
578 const IFListType &LL = I.second;
579 for (const auto &J : LL)
580 dbgs() << " " << PrintIFR(J.first, HRI) << ", "
581 << PrintRegSet(J.second, HRI) << '\n';
582 }
583}
584
585void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
586 unsigned Index = 0;
587
588 for (const MachineBasicBlock &B : *MFN) {
589 if (!CMS->BT.reached(&B))
590 continue;
591
592 for (const MachineInstr &MI : B) {
593 for (const MachineOperand &MO : MI.operands()) {
594 if (MO.isReg() && MO.isDef()) {
595 Register R = MO.getReg();
596 assert(MO.getSubReg() == 0 && "Unexpected subregister in definition");
597 if (R.isVirtual())
598 RO.insert(std::make_pair(R, Index++));
599 }
600 }
601 }
602 }
603 // Since some virtual registers may have had their def and uses eliminated,
604 // they are no longer referenced in the code, and so they will not appear
605 // in the map.
606}
607
608void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
609 RegisterOrdering &RO) const {
610 // Create a vector of all virtual registers (collect them from the base
611 // ordering RB), and then sort it using the RegisterCell comparator.
612 BitValueOrdering BVO(RB);
613 RegisterCellLexCompare LexCmp(BVO, *CMS);
614
615 using SortableVectorType = std::vector<unsigned>;
616
617 SortableVectorType VRs;
618 for (auto &I : RB)
619 VRs.push_back(I.first);
620 llvm::sort(VRs, LexCmp);
621 // Transfer the results to the outgoing register ordering.
622 for (unsigned i = 0, n = VRs.size(); i < n; ++i)
623 RO.insert(std::make_pair(VRs[i], i));
624}
625
626inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const {
627 return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass;
628}
629
630bool HexagonGenInsert::isConstant(unsigned VR) const {
631 const BitTracker::RegisterCell &RC = CMS->lookup(VR);
632 uint16_t W = RC.width();
633 for (uint16_t i = 0; i < W; ++i) {
634 const BitTracker::BitValue &BV = RC[i];
635 if (BV.is(0) || BV.is(1))
636 continue;
637 return false;
638 }
639 return true;
640}
641
642bool HexagonGenInsert::isSmallConstant(unsigned VR) const {
643 const BitTracker::RegisterCell &RC = CMS->lookup(VR);
644 uint16_t W = RC.width();
645 if (W > 64)
646 return false;
647 uint64_t V = 0, B = 1;
648 for (uint16_t i = 0; i < W; ++i) {
649 const BitTracker::BitValue &BV = RC[i];
650 if (BV.is(1))
651 V |= B;
652 else if (!BV.is(0))
653 return false;
654 B <<= 1;
655 }
656
657 // For 32-bit registers, consider: Rd = #s16.
658 if (W == 32)
659 return isInt<16>(V);
660
661 // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8)
662 return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V));
663}
664
665bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR,
666 unsigned InsR, uint16_t L, uint16_t S) const {
667 const TargetRegisterClass *DstRC = MRI->getRegClass(DstR);
668 const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR);
669 const TargetRegisterClass *InsRC = MRI->getRegClass(InsR);
670 // Only integet (32-/64-bit) register classes.
671 if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC))
672 return false;
673 // The "source" register must be of the same class as DstR.
674 if (DstRC != SrcRC)
675 return false;
676 if (DstRC == InsRC)
677 return true;
678 // A 64-bit register can only be generated from other 64-bit registers.
679 if (DstRC == &Hexagon::DoubleRegsRegClass)
680 return false;
681 // Otherwise, the L and S cannot span 32-bit word boundary.
682 if (S < 32 && S+L > 32)
683 return false;
684 return true;
685}
686
687bool HexagonGenInsert::findSelfReference(unsigned VR) const {
688 const BitTracker::RegisterCell &RC = CMS->lookup(VR);
689 for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
690 const BitTracker::BitValue &V = RC[i];
691 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR)
692 return true;
693 }
694 return false;
695}
696
697bool HexagonGenInsert::findNonSelfReference(unsigned VR) const {
698 BitTracker::RegisterCell RC = CMS->lookup(VR);
699 for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
700 const BitTracker::BitValue &V = RC[i];
701 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR)
702 return true;
703 }
704 return false;
705}
706
707void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
708 RegisterSet &Defs) const {
709 for (const MachineOperand &MO : MI->operands()) {
710 if (!MO.isReg() || !MO.isDef())
711 continue;
712 Register R = MO.getReg();
713 if (!R.isVirtual())
714 continue;
715 Defs.insert(R);
716 }
717}
718
719void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
720 RegisterSet &Uses) const {
721 for (const MachineOperand &MO : MI->operands()) {
722 if (!MO.isReg() || !MO.isUse())
723 continue;
724 Register R = MO.getReg();
725 if (!R.isVirtual())
726 continue;
727 Uses.insert(R);
728 }
729}
730
731unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB,
732 const MachineBasicBlock *ToB, const UnsignedMap &RPO,
733 PairMapType &M) const {
734 // Forward distance from the end of a block to the beginning of it does
735 // not make sense. This function should not be called with FromB == ToB.
736 assert(FromB != ToB);
737
738 unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber();
739 // If we have already computed it, return the cached result.
740 PairMapType::iterator F = M.find(std::make_pair(FromN, ToN));
741 if (F != M.end())
742 return F->second;
743 unsigned ToRPO = RPO.lookup(ToN);
744
745 unsigned MaxD = 0;
746
747 for (const MachineBasicBlock *PB : ToB->predecessors()) {
748 // Skip back edges. Also, if FromB is a predecessor of ToB, the distance
749 // along that path will be 0, and we don't need to do any calculations
750 // on it.
751 if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO)
752 continue;
753 unsigned D = PB->size() + distance(FromB, PB, RPO, M);
754 if (D > MaxD)
755 MaxD = D;
756 }
757
758 // Memoize the result for later lookup.
759 M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD));
760 return MaxD;
761}
762
763unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI,
764 MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
765 PairMapType &M) const {
766 const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent();
767 if (FB == TB)
768 return std::distance(FromI, ToI);
769 unsigned D1 = std::distance(TB->begin(), ToI);
770 unsigned D2 = distance(FB, TB, RPO, M);
771 unsigned D3 = std::distance(FromI, FB->end());
772 return D1+D2+D3;
773}
774
775bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
776 OrderedRegisterList &AVs) {
777 if (isDebug()) {
778 dbgs() << __func__ << ": " << printReg(VR, HRI)
779 << " AVs: " << PrintORL(AVs, HRI) << "\n";
780 }
781 if (AVs.size() == 0)
782 return false;
783
784 using iterator = OrderedRegisterList::iterator;
785
786 BitValueOrdering BVO(BaseOrd);
787 const BitTracker::RegisterCell &RC = CMS->lookup(VR);
788 uint16_t W = RC.width();
789
790 using RSRecord = std::pair<unsigned, uint16_t>; // (reg,shift)
791 using RSListType = std::vector<RSRecord>;
792 // Have a map, with key being the matching prefix length, and the value
793 // being the list of pairs (R,S), where R's prefix matches VR at S.
794 // (DenseMap<uint16_t,RSListType> fails to instantiate.)
795 using LRSMapType = DenseMap<unsigned, RSListType>;
796 LRSMapType LM;
797
798 // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S,
799 // and find matching prefixes from AVs with the rotated RC. Such a prefix
800 // would match a string of bits (of length L) in RC starting at S.
801 for (uint16_t S = 0; S < W; ++S) {
802 iterator B = AVs.begin(), E = AVs.end();
803 // The registers in AVs are ordered according to the lexical order of
804 // the corresponding register cells. This means that the range of regis-
805 // ters in AVs that match a prefix of length L+1 will be contained in
806 // the range that matches a prefix of length L. This means that we can
807 // keep narrowing the search space as the prefix length goes up. This
808 // helps reduce the overall complexity of the search.
809 uint16_t L;
810 for (L = 0; L < W-S; ++L) {
811 // Compare against VR's bits starting at S, which emulates rotation
812 // of VR by S.
813 RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS);
814 iterator NewB = std::lower_bound(B, E, VR, RCB);
815 iterator NewE = std::upper_bound(NewB, E, VR, RCB);
816 // For the registers that are eliminated from the next range, L is
817 // the longest prefix matching VR at position S (their prefixes
818 // differ from VR at S+L). If L>0, record this information for later
819 // use.
820 if (L > 0) {
821 for (iterator I = B; I != NewB; ++I)
822 LM[L].push_back(std::make_pair(*I, S));
823 for (iterator I = NewE; I != E; ++I)
824 LM[L].push_back(std::make_pair(*I, S));
825 }
826 B = NewB, E = NewE;
827 if (B == E)
828 break;
829 }
830 // Record the final register range. If this range is non-empty, then
831 // L=W-S.
832 assert(B == E || L == W-S);
833 if (B != E) {
834 for (iterator I = B; I != E; ++I)
835 LM[L].push_back(std::make_pair(*I, S));
836 // If B!=E, then we found a range of registers whose prefixes cover the
837 // rest of VR from position S. There is no need to further advance S.
838 break;
839 }
840 }
841
842 if (isDebug()) {
843 dbgs() << "Prefixes matching register " << printReg(VR, HRI) << "\n";
844 for (const auto &I : LM) {
845 dbgs() << " L=" << I.first << ':';
846 const RSListType &LL = I.second;
847 for (const auto &J : LL)
848 dbgs() << " (" << printReg(J.first, HRI) << ",@" << J.second << ')';
849 dbgs() << '\n';
850 }
851 }
852
853 bool Recorded = false;
854
855 for (unsigned SrcR : AVs) {
856 int FDi = -1, LDi = -1; // First/last different bit.
857 const BitTracker::RegisterCell &AC = CMS->lookup(SrcR);
858 uint16_t AW = AC.width();
859 for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) {
860 if (RC[i] == AC[i])
861 continue;
862 if (FDi == -1)
863 FDi = i;
864 LDi = i;
865 }
866 if (FDi == -1)
867 continue; // TODO (future): Record identical registers.
868 // Look for a register whose prefix could patch the range [FD..LD]
869 // where VR and SrcR differ.
870 uint16_t FD = FDi, LD = LDi; // Switch to unsigned type.
871 uint16_t MinL = LD-FD+1;
872 for (uint16_t L = MinL; L < W; ++L) {
873 LRSMapType::iterator F = LM.find(L);
874 if (F == LM.end())
875 continue;
876 RSListType &LL = F->second;
877 for (const auto &I : LL) {
878 uint16_t S = I.second;
879 // MinL is the minimum length of the prefix. Any length above MinL
880 // allows some flexibility as to where the prefix can start:
881 // given the extra length EL=L-MinL, the prefix must start between
882 // max(0,FD-EL) and FD.
883 if (S > FD) // Starts too late.
884 continue;
885 uint16_t EL = L-MinL;
886 uint16_t LowS = (EL < FD) ? FD-EL : 0;
887 if (S < LowS) // Starts too early.
888 continue;
889 unsigned InsR = I.first;
890 if (!isValidInsertForm(VR, SrcR, InsR, L, S))
891 continue;
892 if (isDebug()) {
893 dbgs() << printReg(VR, HRI) << " = insert(" << printReg(SrcR, HRI)
894 << ',' << printReg(InsR, HRI) << ",#" << L << ",#"
895 << S << ")\n";
896 }
897 IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet());
898 IFMap[VR].push_back(RR);
899 Recorded = true;
900 }
901 }
902 }
903
904 return Recorded;
905}
906
907void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
908 OrderedRegisterList &AVs) {
909 if (isDebug())
910 dbgs() << "visiting block " << printMBBReference(*B) << "\n";
911
912 // First, check if this block is reachable at all. If not, the bit tracker
913 // will not have any information about registers in it.
914 if (!CMS->BT.reached(B))
915 return;
916
917 bool DoConst = OptConst;
918 // Keep a separate set of registers defined in this block, so that we
919 // can remove them from the list of available registers once all DT
920 // successors have been processed.
921 RegisterSet BlockDefs, InsDefs;
922 for (MachineInstr &MI : *B) {
923 InsDefs.clear();
924 getInstrDefs(&MI, InsDefs);
925 // Leave those alone. They are more transparent than "insert".
926 bool Skip = MI.isCopy() || MI.isRegSequence();
927
928 if (!Skip) {
929 // Visit all defined registers, and attempt to find the corresponding
930 // "insert" representations.
931 for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) {
932 // Do not collect registers that are known to be compile-time cons-
933 // tants, unless requested.
934 if (!DoConst && isConstant(VR))
935 continue;
936 // If VR's cell contains a reference to VR, then VR cannot be defined
937 // via "insert". If VR is a constant that can be generated in a single
938 // instruction (without constant extenders), generating it via insert
939 // makes no sense.
940 if (findSelfReference(VR) || isSmallConstant(VR))
941 continue;
942
943 findRecordInsertForms(VR, AVs);
944 // Stop if the map size is too large.
945 if (IFMap.size() > MaxIFMSize)
946 return;
947 }
948 }
949
950 // Insert the defined registers into the list of available registers
951 // after they have been processed.
952 for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR))
953 AVs.insert(VR);
954 BlockDefs.insert(InsDefs);
955 }
956
957 for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(B))) {
958 MachineBasicBlock *SB = DTN->getBlock();
959 collectInBlock(SB, AVs);
960 }
961
962 for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR))
963 AVs.remove(VR);
964}
965
966void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF,
967 RegisterSet &RMs) const {
968 // For a given register VR and a insert form, find the registers that are
969 // used by the current definition of VR, and which would no longer be
970 // needed for it after the definition of VR is replaced with the insert
971 // form. These are the registers that could potentially become dead.
972 RegisterSet Regs[2];
973
974 unsigned S = 0; // Register set selector.
975 Regs[S].insert(VR);
976
977 while (!Regs[S].empty()) {
978 // Breadth-first search.
979 unsigned OtherS = 1-S;
980 Regs[OtherS].clear();
981 for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) {
982 Regs[S].remove(R);
983 if (R == IF.SrcR || R == IF.InsR)
984 continue;
985 // Check if a given register has bits that are references to any other
986 // registers. This is to detect situations where the instruction that
987 // defines register R takes register Q as an operand, but R itself does
988 // not contain any bits from Q. Loads are examples of how this could
989 // happen:
990 // R = load Q
991 // In this case (assuming we do not have any knowledge about the loaded
992 // value), we must not treat R as a "conveyance" of the bits from Q.
993 // (The information in BT about R's bits would have them as constants,
994 // in case of zero-extending loads, or refs to R.)
995 if (!findNonSelfReference(R))
996 continue;
997 RMs.insert(R);
998 const MachineInstr *DefI = MRI->getVRegDef(R);
999 assert(DefI);
1000 // Do not iterate past PHI nodes to avoid infinite loops. This can
1001 // make the final set a bit less accurate, but the removable register
1002 // sets are an approximation anyway.
1003 if (DefI->isPHI())
1004 continue;
1005 getInstrUses(DefI, Regs[OtherS]);
1006 }
1007 S = OtherS;
1008 }
1009 // The register VR is added to the list as a side-effect of the algorithm,
1010 // but it is not "potentially removable". A potentially removable register
1011 // is one that may become unused (dead) after conversion to the insert form
1012 // IF, and obviously VR (or its replacement) will not become dead by apply-
1013 // ing IF.
1014 RMs.remove(VR);
1015}
1016
1017void HexagonGenInsert::computeRemovableRegisters() {
1018 for (auto &I : IFMap) {
1019 IFListType &LL = I.second;
1020 for (auto &J : LL)
1021 findRemovableRegisters(I.first, J.first, J.second);
1022 }
1023}
1024
1025void HexagonGenInsert::pruneEmptyLists() {
1026 // Remove all entries from the map, where the register has no insert forms
1027 // associated with it.
1028 using IterListType = SmallVector<IFMapType::iterator, 16>;
1029 IterListType Prune;
1030 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
1031 if (I->second.empty())
1032 Prune.push_back(I);
1033 }
1034 for (const auto &It : Prune)
1035 IFMap.erase(It);
1036}
1037
1038void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
1039 IFMapType::iterator F = IFMap.find(VR);
1040 assert(F != IFMap.end());
1041 IFListType &LL = F->second;
1042
1043 // First, examine the IF candidates for register VR whose removable-regis-
1044 // ter sets are empty. This means that a given candidate will not help eli-
1045 // minate any registers, but since "insert" is not a constant-extendable
1046 // instruction, using such a candidate may reduce code size if the defini-
1047 // tion of VR is constant-extended.
1048 // If there exists a candidate with a non-empty set, the ones with empty
1049 // sets will not be used and can be removed.
1050 MachineInstr *DefVR = MRI->getVRegDef(VR);
1051 bool DefEx = HII->isConstExtended(*DefVR);
1052 bool HasNE = false;
1053 for (const auto &I : LL) {
1054 if (I.second.empty())
1055 continue;
1056 HasNE = true;
1057 break;
1058 }
1059 if (!DefEx || HasNE) {
1060 // The definition of VR is not constant-extended, or there is a candidate
1061 // with a non-empty set. Remove all candidates with empty sets.
1062 auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool {
1063 return IR.second.empty();
1064 };
1065 llvm::erase_if(LL, IsEmpty);
1066 } else {
1067 // The definition of VR is constant-extended, and all candidates have
1068 // empty removable-register sets. Pick the maximum candidate, and remove
1069 // all others. The "maximum" does not have any special meaning here, it
1070 // is only so that the candidate that will remain on the list is selec-
1071 // ted deterministically.
1072 IFRecord MaxIF = LL[0].first;
1073 for (unsigned i = 1, n = LL.size(); i < n; ++i) {
1074 // If LL[MaxI] < LL[i], then MaxI = i.
1075 const IFRecord &IF = LL[i].first;
1076 unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR];
1077 unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR];
1078 if (M0 > R0)
1079 continue;
1080 if (M0 == R0) {
1081 if (M1 > R1)
1082 continue;
1083 if (M1 == R1) {
1084 if (MaxIF.Wdh > IF.Wdh)
1085 continue;
1086 if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off)
1087 continue;
1088 }
1089 }
1090 // MaxIF < IF.
1091 MaxIF = IF;
1092 }
1093 // Remove everything except the maximum candidate. All register sets
1094 // are empty, so no need to preserve anything.
1095 LL.clear();
1096 LL.push_back(std::make_pair(MaxIF, RegisterSet()));
1097 }
1098
1099 // Now, remove those whose sets of potentially removable registers are
1100 // contained in another IF candidate for VR. For example, given these
1101 // candidates for %45,
1102 // %45:
1103 // (%44,%41,#9,#8), { %42 }
1104 // (%43,%41,#9,#8), { %42 %44 }
1105 // remove the first one, since it is contained in the second one.
1106 for (unsigned i = 0, n = LL.size(); i < n; ) {
1107 const RegisterSet &RMi = LL[i].second;
1108 unsigned j = 0;
1109 while (j < n) {
1110 if (j != i && LL[j].second.includes(RMi))
1111 break;
1112 j++;
1113 }
1114 if (j == n) { // RMi not contained in anything else.
1115 i++;
1116 continue;
1117 }
1118 LL.erase(LL.begin()+i);
1119 n = LL.size();
1120 }
1121}
1122
1123void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO,
1124 PairMapType &M) {
1125 IFMapType::iterator F = IFMap.find(VR);
1126 assert(F != IFMap.end());
1127 IFListType &LL = F->second;
1128 unsigned Cutoff = VRegDistCutoff;
1129 const MachineInstr *DefV = MRI->getVRegDef(VR);
1130
1131 for (unsigned i = LL.size(); i > 0; --i) {
1132 unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR;
1133 const MachineInstr *DefS = MRI->getVRegDef(SR);
1134 const MachineInstr *DefI = MRI->getVRegDef(IR);
1135 unsigned DSV = distance(DefS, DefV, RPO, M);
1136 if (DSV < Cutoff) {
1137 unsigned DIV = distance(DefI, DefV, RPO, M);
1138 if (DIV < Cutoff)
1139 continue;
1140 }
1141 LL.erase(LL.begin()+(i-1));
1142 }
1143}
1144
1145void HexagonGenInsert::pruneRegCopies(unsigned VR) {
1146 IFMapType::iterator F = IFMap.find(VR);
1147 assert(F != IFMap.end());
1148 IFListType &LL = F->second;
1149
1150 auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool {
1151 return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32);
1152 };
1153 llvm::erase_if(LL, IsCopy);
1154}
1155
1156void HexagonGenInsert::pruneCandidates() {
1157 // Remove candidates that are not beneficial, regardless of the final
1158 // selection method.
1159 // First, remove candidates whose potentially removable set is a subset
1160 // of another candidate's set.
1161 for (const auto &I : IFMap)
1162 pruneCoveredSets(I.first);
1163
1164 UnsignedMap RPO;
1165
1166 using RPOTType = ReversePostOrderTraversal<const MachineFunction *>;
1167
1168 RPOTType RPOT(MFN);
1169 unsigned RPON = 0;
1170 for (const auto &I : RPOT)
1171 RPO[I->getNumber()] = RPON++;
1172
1173 PairMapType Memo; // Memoization map for distance calculation.
1174 // Remove candidates that would use registers defined too far away.
1175 for (const auto &I : IFMap)
1176 pruneUsesTooFar(I.first, RPO, Memo);
1177
1178 pruneEmptyLists();
1179
1180 for (const auto &I : IFMap)
1181 pruneRegCopies(I.first);
1182}
1183
1184namespace {
1185
1186 // Class for comparing IF candidates for registers that have multiple of
1187 // them. The smaller the candidate, according to this ordering, the better.
1188 // First, compare the number of zeros in the associated potentially remova-
1189 // ble register sets. "Zero" indicates that the register is very likely to
1190 // become dead after this transformation.
1191 // Second, compare "averages", i.e. use-count per size. The lower wins.
1192 // After that, it does not really matter which one is smaller. Resolve
1193 // the tie in some deterministic way.
1194 struct IFOrdering {
1195 IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO)
1196 : UseC(UC), BaseOrd(BO) {}
1197
1198 bool operator() (const IFRecordWithRegSet &A,
1199 const IFRecordWithRegSet &B) const;
1200
1201 private:
1202 void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
1203 unsigned &Sum) const;
1204
1205 const UnsignedMap &UseC;
1206 const RegisterOrdering &BaseOrd;
1207 };
1208
1209} // end anonymous namespace
1210
1211bool IFOrdering::operator() (const IFRecordWithRegSet &A,
1212 const IFRecordWithRegSet &B) const {
1213 unsigned SizeA = 0, ZeroA = 0, SumA = 0;
1214 unsigned SizeB = 0, ZeroB = 0, SumB = 0;
1215 stats(A.second, SizeA, ZeroA, SumA);
1216 stats(B.second, SizeB, ZeroB, SumB);
1217
1218 // We will pick the minimum element. The more zeros, the better.
1219 if (ZeroA != ZeroB)
1220 return ZeroA > ZeroB;
1221 // Compare SumA/SizeA with SumB/SizeB, lower is better.
1222 uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA;
1223 if (AvgA != AvgB)
1224 return AvgA < AvgB;
1225
1226 // The sets compare identical so far. Resort to comparing the IF records.
1227 // The actual values don't matter, this is only for determinism.
1228 unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR];
1229 if (OSA != OSB)
1230 return OSA < OSB;
1231 unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR];
1232 if (OIA != OIB)
1233 return OIA < OIB;
1234 if (A.first.Wdh != B.first.Wdh)
1235 return A.first.Wdh < B.first.Wdh;
1236 return A.first.Off < B.first.Off;
1237}
1238
1239void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
1240 unsigned &Sum) const {
1241 for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) {
1242 UnsignedMap::const_iterator F = UseC.find(R);
1243 assert(F != UseC.end());
1244 unsigned UC = F->second;
1245 if (UC == 0)
1246 Zero++;
1247 Sum += UC;
1248 Size++;
1249 }
1250}
1251
1252void HexagonGenInsert::selectCandidates() {
1253 // Some registers may have multiple valid candidates. Pick the best one
1254 // (or decide not to use any).
1255
1256 // Compute the "removability" measure of R:
1257 // For each potentially removable register R, record the number of regis-
1258 // ters with IF candidates, where R appears in at least one set.
1259 RegisterSet AllRMs;
1260 UnsignedMap UseC, RemC;
1261 IFMapType::iterator End = IFMap.end();
1262
1263 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1264 const IFListType &LL = I->second;
1266 for (const auto &J : LL)
1267 TT.insert(J.second);
1268 for (unsigned R = TT.find_first(); R; R = TT.find_next(R))
1269 RemC[R]++;
1270 AllRMs.insert(TT);
1271 }
1272
1273 for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) {
1274 using use_iterator = MachineRegisterInfo::use_nodbg_iterator;
1275 using InstrSet = SmallPtrSet<const MachineInstr *, 16>;
1276
1277 InstrSet UIs;
1278 // Count as the number of instructions in which R is used, not the
1279 // number of operands.
1280 use_iterator E = MRI->use_nodbg_end();
1281 for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I)
1282 UIs.insert(I->getParent());
1283 unsigned C = UIs.size();
1284 // Calculate a measure, which is the number of instructions using R,
1285 // minus the "removability" count computed earlier.
1286 unsigned D = RemC[R];
1287 UseC[R] = (C > D) ? C-D : 0; // doz
1288 }
1289
1290 bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0;
1291 if (!SelectAll0 && !SelectHas0)
1292 SelectAll0 = true;
1293
1294 // The smaller the number UseC for a given register R, the "less used"
1295 // R is aside from the opportunities for removal offered by generating
1296 // "insert" instructions.
1297 // Iterate over the IF map, and for those registers that have multiple
1298 // candidates, pick the minimum one according to IFOrdering.
1299 IFOrdering IFO(UseC, BaseOrd);
1300 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1301 IFListType &LL = I->second;
1302 if (LL.empty())
1303 continue;
1304 // Get the minimum element, remember it and clear the list. If the
1305 // element found is adequate, we will put it back on the list, other-
1306 // wise the list will remain empty, and the entry for this register
1307 // will be removed (i.e. this register will not be replaced by insert).
1308 IFListType::iterator MinI = llvm::min_element(LL, IFO);
1309 assert(MinI != LL.end());
1310 IFRecordWithRegSet M = *MinI;
1311 LL.clear();
1312
1313 // We want to make sure that this replacement will have a chance to be
1314 // beneficial, and that means that we want to have indication that some
1315 // register will be removed. The most likely registers to be eliminated
1316 // are the use operands in the definition of I->first. Accept/reject a
1317 // candidate based on how many of its uses it can potentially eliminate.
1318
1319 RegisterSet Us;
1320 const MachineInstr *DefI = MRI->getVRegDef(I->first);
1321 getInstrUses(DefI, Us);
1322 bool Accept = false;
1323
1324 if (SelectAll0) {
1325 bool All0 = true;
1326 for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
1327 if (UseC[R] == 0)
1328 continue;
1329 All0 = false;
1330 break;
1331 }
1332 Accept = All0;
1333 } else if (SelectHas0) {
1334 bool Has0 = false;
1335 for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
1336 if (UseC[R] != 0)
1337 continue;
1338 Has0 = true;
1339 break;
1340 }
1341 Accept = Has0;
1342 }
1343 if (Accept)
1344 LL.push_back(M);
1345 }
1346
1347 // Remove candidates that add uses of removable registers, unless the
1348 // removable registers are among replacement candidates.
1349 // Recompute the removable registers, since some candidates may have
1350 // been eliminated.
1351 AllRMs.clear();
1352 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1353 const IFListType &LL = I->second;
1354 if (!LL.empty())
1355 AllRMs.insert(LL[0].second);
1356 }
1357 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1358 IFListType &LL = I->second;
1359 if (LL.empty())
1360 continue;
1361 unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR;
1362 if (AllRMs[SR] || AllRMs[IR])
1363 LL.clear();
1364 }
1365
1366 pruneEmptyLists();
1367}
1368
1369bool HexagonGenInsert::generateInserts() {
1370 // Create a new register for each one from IFMap, and store them in the
1371 // map.
1372 UnsignedMap RegMap;
1373 for (auto &I : IFMap) {
1374 unsigned VR = I.first;
1375 const TargetRegisterClass *RC = MRI->getRegClass(VR);
1376 Register NewVR = MRI->createVirtualRegister(RC);
1377 RegMap[VR] = NewVR;
1378 }
1379
1380 // We can generate the "insert" instructions using potentially stale re-
1381 // gisters: SrcR and InsR for a given VR may be among other registers that
1382 // are also replaced. This is fine, we will do the mass "rauw" a bit later.
1383 for (auto &I : IFMap) {
1384 MachineInstr *MI = MRI->getVRegDef(I.first);
1385 MachineBasicBlock &B = *MI->getParent();
1386 DebugLoc DL = MI->getDebugLoc();
1387 unsigned NewR = RegMap[I.first];
1388 bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass;
1389 const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert)
1390 : HII->get(Hexagon::S2_insertp);
1391 IFRecord IF = I.second[0].first;
1392 unsigned Wdh = IF.Wdh, Off = IF.Off;
1393 unsigned InsS = 0;
1394 if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) {
1395 InsS = Hexagon::isub_lo;
1396 if (Off >= 32) {
1397 InsS = Hexagon::isub_hi;
1398 Off -= 32;
1399 }
1400 }
1401 // Advance to the proper location for inserting instructions. This could
1402 // be B.end().
1404 if (MI->isPHI())
1405 At = B.getFirstNonPHI();
1406
1407 BuildMI(B, At, DL, D, NewR)
1408 .addReg(IF.SrcR)
1409 .addReg(IF.InsR, 0, InsS)
1410 .addImm(Wdh)
1411 .addImm(Off);
1412
1413 MRI->clearKillFlags(IF.SrcR);
1414 MRI->clearKillFlags(IF.InsR);
1415 }
1416
1417 for (const auto &I : IFMap) {
1418 MachineInstr *DefI = MRI->getVRegDef(I.first);
1419 MRI->replaceRegWith(I.first, RegMap[I.first]);
1420 DefI->eraseFromParent();
1421 }
1422
1423 return true;
1424}
1425
1426bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
1427 bool Changed = false;
1428
1429 for (auto *DTN : children<MachineDomTreeNode*>(N))
1430 Changed |= removeDeadCode(DTN);
1431
1432 MachineBasicBlock *B = N->getBlock();
1433 std::vector<MachineInstr*> Instrs;
1434 for (MachineInstr &MI : llvm::reverse(*B))
1435 Instrs.push_back(&MI);
1436
1437 for (MachineInstr *MI : Instrs) {
1438 unsigned Opc = MI->getOpcode();
1439 // Do not touch lifetime markers. This is why the target-independent DCE
1440 // cannot be used.
1441 if (Opc == TargetOpcode::LIFETIME_START ||
1442 Opc == TargetOpcode::LIFETIME_END)
1443 continue;
1444 bool Store = false;
1445 if (MI->isInlineAsm() || !MI->isSafeToMove(Store))
1446 continue;
1447
1448 bool AllDead = true;
1449 SmallVector<unsigned,2> Regs;
1450 for (const MachineOperand &MO : MI->operands()) {
1451 if (!MO.isReg() || !MO.isDef())
1452 continue;
1453 Register R = MO.getReg();
1454 if (!R.isVirtual() || !MRI->use_nodbg_empty(R)) {
1455 AllDead = false;
1456 break;
1457 }
1458 Regs.push_back(R);
1459 }
1460 if (!AllDead)
1461 continue;
1462
1463 B->erase(MI);
1464 for (unsigned Reg : Regs)
1465 MRI->markUsesInDebugValueAsUndef(Reg);
1466 Changed = true;
1467 }
1468
1469 return Changed;
1470}
1471
1472bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
1473 if (skipFunction(MF.getFunction()))
1474 return false;
1475
1476 bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail;
1477 bool Changed = false;
1478
1479 // Verify: one, but not both.
1481
1482 IFMap.clear();
1483 BaseOrd.clear();
1484 CellOrd.clear();
1485
1486 const auto &ST = MF.getSubtarget<HexagonSubtarget>();
1487 HII = ST.getInstrInfo();
1488 HRI = ST.getRegisterInfo();
1489 MFN = &MF;
1490 MRI = &MF.getRegInfo();
1491 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
1492
1493 // Clean up before any further processing, so that dead code does not
1494 // get used in a newly generated "insert" instruction. Have a custom
1495 // version of DCE that preserves lifetime markers. Without it, merging
1496 // of stack objects can fail to recognize and merge disjoint objects
1497 // leading to unnecessary stack growth.
1498 Changed = removeDeadCode(MDT->getRootNode());
1499
1500 const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
1501 BitTracker BTLoc(HE, MF);
1502 BTLoc.trace(isDebug());
1503 BTLoc.run();
1504 CellMapShadow MS(BTLoc);
1505 CMS = &MS;
1506
1507 buildOrderingMF(BaseOrd);
1508 buildOrderingBT(BaseOrd, CellOrd);
1509
1510 if (isDebug()) {
1511 dbgs() << "Cell ordering:\n";
1512 for (const auto &I : CellOrd) {
1513 unsigned VR = I.first, Pos = I.second;
1514 dbgs() << printReg(VR, HRI) << " -> " << Pos << "\n";
1515 }
1516 }
1517
1518 // Collect candidates for conversion into the insert forms.
1519 MachineBasicBlock *RootB = MDT->getRoot();
1520 OrderedRegisterList AvailR(CellOrd);
1521
1522 const char *const TGName = "hexinsert";
1523 const char *const TGDesc = "Generate Insert Instructions";
1524
1525 {
1526 NamedRegionTimer _T("collection", "collection", TGName, TGDesc,
1527 TimingDetail);
1528 collectInBlock(RootB, AvailR);
1529 // Complete the information gathered in IFMap.
1530 computeRemovableRegisters();
1531 }
1532
1533 if (isDebug()) {
1534 dbgs() << "Candidates after collection:\n";
1535 dump_map();
1536 }
1537
1538 if (IFMap.empty())
1539 return Changed;
1540
1541 {
1542 NamedRegionTimer _T("pruning", "pruning", TGName, TGDesc, TimingDetail);
1543 pruneCandidates();
1544 }
1545
1546 if (isDebug()) {
1547 dbgs() << "Candidates after pruning:\n";
1548 dump_map();
1549 }
1550
1551 if (IFMap.empty())
1552 return Changed;
1553
1554 {
1555 NamedRegionTimer _T("selection", "selection", TGName, TGDesc, TimingDetail);
1556 selectCandidates();
1557 }
1558
1559 if (isDebug()) {
1560 dbgs() << "Candidates after selection:\n";
1561 dump_map();
1562 }
1563
1564 // Filter out vregs beyond the cutoff.
1565 if (VRegIndexCutoff.getPosition()) {
1566 unsigned Cutoff = VRegIndexCutoff;
1567
1568 using IterListType = SmallVector<IFMapType::iterator, 16>;
1569
1570 IterListType Out;
1571 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
1572 unsigned Idx = Register(I->first).virtRegIndex();
1573 if (Idx >= Cutoff)
1574 Out.push_back(I);
1575 }
1576 for (const auto &It : Out)
1577 IFMap.erase(It);
1578 }
1579 if (IFMap.empty())
1580 return Changed;
1581
1582 {
1583 NamedRegionTimer _T("generation", "generation", TGName, TGDesc,
1584 TimingDetail);
1585 generateInserts();
1586 }
1587
1588 return true;
1589}
1590
1592 return new HexagonGenInsert();
1593}
1594
1595//===----------------------------------------------------------------------===//
1596// Public Constructor Functions
1597//===----------------------------------------------------------------------===//
1598
1599INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert",
1600 "Hexagon generate \"insert\" instructions", false, false)
1602INITIALIZE_PASS_END(HexagonGenInsert, "hexinsert",
1603 "Hexagon generate \"insert\" instructions", false, false)
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isConstant(const MachineInstr &MI)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
BitTracker BT
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
This file defines the DenseMap class.
#define DEBUG_TYPE
This file defines the little GraphTraits<X> template class that should be specialized by classes that...
static cl::opt< bool > OptConst("insert-const", cl::init(false), cl::Hidden)
static bool isDebug()
static cl::opt< unsigned > MaxORLSize("insert-max-orl", cl::init(4096), cl::Hidden, cl::desc("Maximum size of OrderedRegisterList"))
static cl::opt< unsigned > MaxIFMSize("insert-max-ifmap", cl::init(1024), cl::Hidden, cl::desc("Maximum size of IFMap"))
static cl::opt< bool > OptTimingDetail("insert-timing-detail", cl::Hidden, cl::desc("Enable detailed timing of insert " "generation"))
static cl::opt< bool > OptTiming("insert-timing", cl::Hidden, cl::desc("Enable timing of insert generation"))
static cl::opt< unsigned > VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden, cl::desc("Vreg distance cutoff for insert " "generation."))
static cl::opt< bool > OptSelectAll0("insert-all0", cl::init(false), cl::Hidden)
static cl::opt< unsigned > VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), cl::Hidden, cl::desc("Vreg# cutoff for insert generation."))
static cl::opt< bool > OptSelectHas0("insert-has0", cl::init(false), cl::Hidden)
IRTranslator LLVM IR MI
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
Legalize the Machine IR a function s Machine IR
Definition Legalizer.cpp:80
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
block placement stats
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
#define P(N)
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
SmallSet< unsigned, 4 > RegisterSet
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool test(unsigned Idx) const
Definition BitVector.h:461
BitVector & reset()
Definition BitVector.h:392
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Definition BitVector.h:300
bool anyCommon(const BitVector &RHS) const
Test if any common bits are set.
Definition BitVector.h:489
BitVector & set()
Definition BitVector.h:351
bool any() const
any - Returns true if any bit is set.
Definition BitVector.h:170
BitVector & operator|=(const BitVector &RHS)
Definition BitVector.h:565
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
Definition BitVector.h:308
reference operator[](unsigned Idx)
Definition BitVector.h:444
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool isConstExtended(const MachineInstr &MI) const
MachineInstrBundleIterator< const MachineInstr > const_iterator
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
defusechain_iterator< true, false, true, true, false > use_nodbg_iterator
use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the specified register,...
SmallSet & operator=(const SmallSet &)=default
const_iterator end() const
Definition SmallSet.h:219
std::pair< const_iterator, bool > insert(const unsigned &V)
Definition SmallSet.h:181
void push_back(const T &Elt)
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ TB
TB - TwoByte - Set if this instruction has a two byte opcode, which starts with a 0x0F byte before th...
initializer< Ty > init(const Ty &Val)
bool empty() const
Definition BasicBlock.h:101
iterator end() const
Definition BasicBlock.h:89
BBIterator iterator
Definition BasicBlock.h:87
LLVM_ABI iterator begin() const
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1731
bool includes(R1 &&Range1, R2 &&Range2)
Provide wrappers to std::includes which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1914
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2000
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1657
LLVM_ABI bool isCurrentDebugType(const char *Type, int Level=0)
isCurrentDebugType - Return true if the specified string is the debug type specified on the command l...
Definition Debug.cpp:81
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
unsigned M1(unsigned Val)
Definition VE.h:377
LLVM_ABI bool DebugFlag
This boolean is set to true if the '-debug' command line option is specified.
Definition Debug.cpp:68
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1624
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:159
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FunctionPass * createHexagonGenInsert()
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:164
DomTreeNodeBase< MachineBasicBlock > MachineDomTreeNode
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:1974
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2100
iterator_range< typename GraphTraits< GraphType >::ChildIteratorType > children(const typename GraphTraits< GraphType >::NodeRef &G)
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
#define N
bool is(unsigned T) const
Definition BitTracker.h:206
const RegisterCell & lookup(unsigned Reg) const
Definition BitTracker.h:354
bool reached(const MachineBasicBlock *B) const