Line data Source code
1 : //==- llvm/CodeGen/ExecutionDepsFix.h - Execution Dependency Fix -*- C++ -*-==//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file Execution Dependency Fix pass.
11 : ///
12 : /// Some X86 SSE instructions like mov, and, or, xor are available in different
13 : /// variants for different operand types. These variant instructions are
14 : /// equivalent, but on Nehalem and newer cpus there is extra latency
15 : /// transferring data between integer and floating point domains. ARM cores
16 : /// have similar issues when they are configured with both VFP and NEON
17 : /// pipelines.
18 : ///
19 : /// This pass changes the variant instructions to minimize domain crossings.
20 : //
21 : //===----------------------------------------------------------------------===//
22 :
23 : #ifndef LLVM_CODEGEN_EXECUTIONDEPSFIX_H
24 : #define LLVM_CODEGEN_EXECUTIONDEPSFIX_H
25 :
26 : #include "llvm/ADT/DenseMap.h"
27 : #include "llvm/ADT/iterator_range.h"
28 : #include "llvm/ADT/SmallVector.h"
29 : #include "llvm/CodeGen/LivePhysRegs.h"
30 : #include "llvm/CodeGen/MachineFunction.h"
31 : #include "llvm/CodeGen/MachineFunctionPass.h"
32 : #include "llvm/CodeGen/RegisterClassInfo.h"
33 : #include "llvm/Pass.h"
34 : #include "llvm/Support/Allocator.h"
35 : #include "llvm/Support/MathExtras.h"
36 : #include <cassert>
37 : #include <limits>
38 : #include <utility>
39 : #include <vector>
40 :
41 : namespace llvm {
42 :
43 : class MachineBasicBlock;
44 : class MachineInstr;
45 : class TargetInstrInfo;
46 :
47 : /// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
48 : /// of execution domains.
49 : ///
50 : /// An open DomainValue represents a set of instructions that can still switch
51 : /// execution domain. Multiple registers may refer to the same open
52 : /// DomainValue - they will eventually be collapsed to the same execution
53 : /// domain.
54 : ///
55 : /// A collapsed DomainValue represents a single register that has been forced
56 : /// into one of more execution domains. There is a separate collapsed
57 : /// DomainValue for each register, but it may contain multiple execution
58 : /// domains. A register value is initially created in a single execution
59 : /// domain, but if we were forced to pay the penalty of a domain crossing, we
60 : /// keep track of the fact that the register is now available in multiple
61 : /// domains.
62 344908 : struct DomainValue {
63 : // Basic reference counting.
64 : unsigned Refs = 0;
65 :
66 : // Bitmask of available domains. For an open DomainValue, it is the still
67 : // possible domains for collapsing. For a collapsed DomainValue it is the
68 : // domains where the register is available for free.
69 : unsigned AvailableDomains;
70 :
71 : // Pointer to the next DomainValue in a chain. When two DomainValues are
72 : // merged, Victim.Next is set to point to Victor, so old DomainValue
73 : // references can be updated by following the chain.
74 : DomainValue *Next;
75 :
76 : // Twiddleable instructions using or defining these registers.
77 : SmallVector<MachineInstr*, 8> Instrs;
78 :
79 517362 : DomainValue() { clear(); }
80 :
81 : // A collapsed DomainValue has no instructions to twiddle - it simply keeps
82 : // track of the domains where the registers are already available.
83 1394406 : bool isCollapsed() const { return Instrs.empty(); }
84 :
85 : // Is domain available?
86 : bool hasDomain(unsigned domain) const {
87 : assert(domain <
88 : static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
89 : "undefined behavior");
90 80513 : return AvailableDomains & (1u << domain);
91 : }
92 :
93 : // Mark domain as available.
94 : void addDomain(unsigned domain) {
95 653090 : AvailableDomains |= 1u << domain;
96 : }
97 :
98 : // Restrict to a single domain available.
99 : void setSingleDomain(unsigned domain) {
100 93022 : AvailableDomains = 1u << domain;
101 : }
102 :
103 : // Return bitmask of domains that are available and in mask.
104 : unsigned getCommonDomains(unsigned mask) const {
105 240579 : return AvailableDomains & mask;
106 : }
107 :
108 : // First domain available.
109 : unsigned getFirstDomain() const {
110 437666 : return countTrailingZeros(AvailableDomains);
111 : }
112 :
113 : // Clear this DomainValue and point to next which has all its data.
114 : void clear() {
115 588124 : AvailableDomains = 0;
116 588124 : Next = nullptr;
117 1176248 : Instrs.clear();
118 : }
119 : };
120 :
121 : /// Information about a live register.
122 : struct LiveReg {
123 : /// Value currently in this register, or NULL when no value is being tracked.
124 : /// This counts as a DomainValue reference.
125 : DomainValue *Value;
126 :
127 : /// Instruction that defined this register, relative to the beginning of the
128 : /// current basic block. When a LiveReg is used to represent a live-out
129 : /// register, this value is relative to the end of the basic block, so it
130 : /// will be a negative number.
131 : int Def;
132 : };
133 :
134 50735 : class ExecutionDepsFix : public MachineFunctionPass {
135 : SpecificBumpPtrAllocator<DomainValue> Allocator;
136 : SmallVector<DomainValue*,16> Avail;
137 :
138 : const TargetRegisterClass *const RC;
139 : MachineFunction *MF;
140 : const TargetInstrInfo *TII;
141 : const TargetRegisterInfo *TRI;
142 : RegisterClassInfo RegClassInfo;
143 : std::vector<SmallVector<int, 1>> AliasMap;
144 : const unsigned NumRegs;
145 : LiveReg *LiveRegs;
146 : struct MBBInfo {
147 : // Keeps clearance and domain information for all registers. Note that this
148 : // is different from the usual definition notion of liveness. The CPU
149 : // doesn't care whether or not we consider a register killed.
150 : LiveReg *OutRegs = nullptr;
151 :
152 : // Whether we have gotten to this block in primary processing yet.
153 : bool PrimaryCompleted = false;
154 :
155 : // The number of predecessors for which primary processing has completed
156 : unsigned IncomingProcessed = 0;
157 :
158 : // The value of `IncomingProcessed` at the start of primary processing
159 : unsigned PrimaryIncoming = 0;
160 :
161 : // The number of predecessors for which all processing steps are done.
162 : unsigned IncomingCompleted = 0;
163 :
164 : MBBInfo() = default;
165 : };
166 : using MBBInfoMap = DenseMap<MachineBasicBlock *, MBBInfo>;
167 : MBBInfoMap MBBInfos;
168 :
169 : /// List of undefined register reads in this block in forward order.
170 : std::vector<std::pair<MachineInstr *, unsigned>> UndefReads;
171 :
172 : /// Storage for register unit liveness.
173 : LivePhysRegs LiveRegSet;
174 :
175 : /// Current instruction number.
176 : /// The first instruction in each basic block is 0.
177 : int CurInstr;
178 :
179 : public:
180 8504 : ExecutionDepsFix(char &PassID, const TargetRegisterClass &RC)
181 68032 : : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {}
182 :
183 8478 : void getAnalysisUsage(AnalysisUsage &AU) const override {
184 8478 : AU.setPreservesAll();
185 8478 : MachineFunctionPass::getAnalysisUsage(AU);
186 8478 : }
187 :
188 : bool runOnMachineFunction(MachineFunction &MF) override;
189 :
190 8481 : MachineFunctionProperties getRequiredProperties() const override {
191 25443 : return MachineFunctionProperties().set(
192 25443 : MachineFunctionProperties::Property::NoVRegs);
193 : }
194 :
195 : private:
196 : iterator_range<SmallVectorImpl<int>::const_iterator>
197 : regIndices(unsigned Reg) const;
198 : // DomainValue allocation.
199 : DomainValue *alloc(int domain = -1);
200 : DomainValue *retain(DomainValue *DV) {
201 921366 : if (DV) ++DV->Refs;
202 : return DV;
203 : }
204 : void release(DomainValue*);
205 : DomainValue *resolve(DomainValue*&);
206 :
207 : // LiveRegs manipulations.
208 : void setLiveReg(int rx, DomainValue *DV);
209 : void kill(int rx);
210 : void force(int rx, unsigned domain);
211 : void collapse(DomainValue *dv, unsigned domain);
212 : bool merge(DomainValue *A, DomainValue *B);
213 :
214 : void enterBasicBlock(MachineBasicBlock*);
215 : void leaveBasicBlock(MachineBasicBlock*);
216 : bool isBlockDone(MachineBasicBlock *);
217 : void processBasicBlock(MachineBasicBlock *MBB, bool PrimaryPass);
218 : bool visitInstr(MachineInstr *);
219 : void processDefs(MachineInstr *, bool breakDependency, bool Kill);
220 : void visitSoftInstr(MachineInstr*, unsigned mask);
221 : void visitHardInstr(MachineInstr*, unsigned domain);
222 : bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
223 : unsigned Pref);
224 : bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
225 : void processUndefReads(MachineBasicBlock*);
226 : };
227 :
228 : } // end namepsace llvm
229 :
230 : #endif // LLVM_CODEGEN_EXECUTIONDEPSFIX_H
|