File: | llvm/lib/Target/PowerPC/PPCInstrInfo.cpp |
Warning: | line 610, column 3 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file contains the PowerPC implementation of the TargetInstrInfo class. | |||
10 | // | |||
11 | //===----------------------------------------------------------------------===// | |||
12 | ||||
13 | #include "PPCInstrInfo.h" | |||
14 | #include "MCTargetDesc/PPCPredicates.h" | |||
15 | #include "PPC.h" | |||
16 | #include "PPCHazardRecognizers.h" | |||
17 | #include "PPCInstrBuilder.h" | |||
18 | #include "PPCMachineFunctionInfo.h" | |||
19 | #include "PPCTargetMachine.h" | |||
20 | #include "llvm/ADT/STLExtras.h" | |||
21 | #include "llvm/ADT/Statistic.h" | |||
22 | #include "llvm/Analysis/AliasAnalysis.h" | |||
23 | #include "llvm/CodeGen/LiveIntervals.h" | |||
24 | #include "llvm/CodeGen/MachineConstantPool.h" | |||
25 | #include "llvm/CodeGen/MachineFrameInfo.h" | |||
26 | #include "llvm/CodeGen/MachineFunctionPass.h" | |||
27 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
28 | #include "llvm/CodeGen/MachineMemOperand.h" | |||
29 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
30 | #include "llvm/CodeGen/PseudoSourceValue.h" | |||
31 | #include "llvm/CodeGen/RegisterClassInfo.h" | |||
32 | #include "llvm/CodeGen/RegisterPressure.h" | |||
33 | #include "llvm/CodeGen/ScheduleDAG.h" | |||
34 | #include "llvm/CodeGen/SlotIndexes.h" | |||
35 | #include "llvm/CodeGen/StackMaps.h" | |||
36 | #include "llvm/MC/MCAsmInfo.h" | |||
37 | #include "llvm/MC/MCInst.h" | |||
38 | #include "llvm/Support/CommandLine.h" | |||
39 | #include "llvm/Support/Debug.h" | |||
40 | #include "llvm/Support/ErrorHandling.h" | |||
41 | #include "llvm/Support/TargetRegistry.h" | |||
42 | #include "llvm/Support/raw_ostream.h" | |||
43 | ||||
44 | using namespace llvm; | |||
45 | ||||
46 | #define DEBUG_TYPE"ppc-instr-info" "ppc-instr-info" | |||
47 | ||||
48 | #define GET_INSTRMAP_INFO | |||
49 | #define GET_INSTRINFO_CTOR_DTOR | |||
50 | #include "PPCGenInstrInfo.inc" | |||
51 | ||||
52 | STATISTIC(NumStoreSPILLVSRRCAsVec,static llvm::Statistic NumStoreSPILLVSRRCAsVec = {"ppc-instr-info" , "NumStoreSPILLVSRRCAsVec", "Number of spillvsrrc spilled to stack as vec" } | |||
53 | "Number of spillvsrrc spilled to stack as vec")static llvm::Statistic NumStoreSPILLVSRRCAsVec = {"ppc-instr-info" , "NumStoreSPILLVSRRCAsVec", "Number of spillvsrrc spilled to stack as vec" }; | |||
54 | STATISTIC(NumStoreSPILLVSRRCAsGpr,static llvm::Statistic NumStoreSPILLVSRRCAsGpr = {"ppc-instr-info" , "NumStoreSPILLVSRRCAsGpr", "Number of spillvsrrc spilled to stack as gpr" } | |||
55 | "Number of spillvsrrc spilled to stack as gpr")static llvm::Statistic NumStoreSPILLVSRRCAsGpr = {"ppc-instr-info" , "NumStoreSPILLVSRRCAsGpr", "Number of spillvsrrc spilled to stack as gpr" }; | |||
56 | STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc")static llvm::Statistic NumGPRtoVSRSpill = {"ppc-instr-info", "NumGPRtoVSRSpill" , "Number of gpr spills to spillvsrrc"}; | |||
57 | STATISTIC(CmpIselsConverted,static llvm::Statistic CmpIselsConverted = {"ppc-instr-info", "CmpIselsConverted", "Number of ISELs that depend on comparison of constants converted" } | |||
58 | "Number of ISELs that depend on comparison of constants converted")static llvm::Statistic CmpIselsConverted = {"ppc-instr-info", "CmpIselsConverted", "Number of ISELs that depend on comparison of constants converted" }; | |||
59 | STATISTIC(MissedConvertibleImmediateInstrs,static llvm::Statistic MissedConvertibleImmediateInstrs = {"ppc-instr-info" , "MissedConvertibleImmediateInstrs", "Number of compare-immediate instructions fed by constants" } | |||
60 | "Number of compare-immediate instructions fed by constants")static llvm::Statistic MissedConvertibleImmediateInstrs = {"ppc-instr-info" , "MissedConvertibleImmediateInstrs", "Number of compare-immediate instructions fed by constants" }; | |||
61 | STATISTIC(NumRcRotatesConvertedToRcAnd,static llvm::Statistic NumRcRotatesConvertedToRcAnd = {"ppc-instr-info" , "NumRcRotatesConvertedToRcAnd", "Number of record-form rotates converted to record-form andi" } | |||
62 | "Number of record-form rotates converted to record-form andi")static llvm::Statistic NumRcRotatesConvertedToRcAnd = {"ppc-instr-info" , "NumRcRotatesConvertedToRcAnd", "Number of record-form rotates converted to record-form andi" }; | |||
63 | ||||
64 | static cl:: | |||
65 | opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, | |||
66 | cl::desc("Disable analysis for CTR loops")); | |||
67 | ||||
68 | static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt", | |||
69 | cl::desc("Disable compare instruction optimization"), cl::Hidden); | |||
70 | ||||
71 | static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", | |||
72 | cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), | |||
73 | cl::Hidden); | |||
74 | ||||
75 | static cl::opt<bool> | |||
76 | UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, | |||
77 | cl::desc("Use the old (incorrect) instruction latency calculation")); | |||
78 | ||||
79 | static cl::opt<float> | |||
80 | FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5), | |||
81 | cl::desc("register pressure factor for the transformations.")); | |||
82 | ||||
83 | static cl::opt<bool> EnableFMARegPressureReduction( | |||
84 | "ppc-fma-rp-reduction", cl::Hidden, cl::init(true), | |||
85 | cl::desc("enable register pressure reduce in machine combiner pass.")); | |||
86 | ||||
87 | // Pin the vtable to this file. | |||
88 | void PPCInstrInfo::anchor() {} | |||
89 | ||||
90 | PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI) | |||
91 | : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP, | |||
92 | /* CatchRetOpcode */ -1, | |||
93 | STI.isPPC64() ? PPC::BLR8 : PPC::BLR), | |||
94 | Subtarget(STI), RI(STI.getTargetMachine()) {} | |||
95 | ||||
96 | /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for | |||
97 | /// this target when scheduling the DAG. | |||
98 | ScheduleHazardRecognizer * | |||
99 | PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, | |||
100 | const ScheduleDAG *DAG) const { | |||
101 | unsigned Directive = | |||
102 | static_cast<const PPCSubtarget *>(STI)->getCPUDirective(); | |||
103 | if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 || | |||
104 | Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) { | |||
105 | const InstrItineraryData *II = | |||
106 | static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData(); | |||
107 | return new ScoreboardHazardRecognizer(II, DAG); | |||
108 | } | |||
109 | ||||
110 | return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); | |||
111 | } | |||
112 | ||||
113 | /// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer | |||
114 | /// to use for this target when scheduling the DAG. | |||
115 | ScheduleHazardRecognizer * | |||
116 | PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, | |||
117 | const ScheduleDAG *DAG) const { | |||
118 | unsigned Directive = | |||
119 | DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective(); | |||
120 | ||||
121 | // FIXME: Leaving this as-is until we have POWER9 scheduling info | |||
122 | if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8) | |||
123 | return new PPCDispatchGroupSBHazardRecognizer(II, DAG); | |||
124 | ||||
125 | // Most subtargets use a PPC970 recognizer. | |||
126 | if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 && | |||
127 | Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) { | |||
128 | assert(DAG->TII && "No InstrInfo?")(static_cast<void> (0)); | |||
129 | ||||
130 | return new PPCHazardRecognizer970(*DAG); | |||
131 | } | |||
132 | ||||
133 | return new ScoreboardHazardRecognizer(II, DAG); | |||
134 | } | |||
135 | ||||
136 | unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, | |||
137 | const MachineInstr &MI, | |||
138 | unsigned *PredCost) const { | |||
139 | if (!ItinData || UseOldLatencyCalc) | |||
140 | return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost); | |||
141 | ||||
142 | // The default implementation of getInstrLatency calls getStageLatency, but | |||
143 | // getStageLatency does not do the right thing for us. While we have | |||
144 | // itinerary, most cores are fully pipelined, and so the itineraries only | |||
145 | // express the first part of the pipeline, not every stage. Instead, we need | |||
146 | // to use the listed output operand cycle number (using operand 0 here, which | |||
147 | // is an output). | |||
148 | ||||
149 | unsigned Latency = 1; | |||
150 | unsigned DefClass = MI.getDesc().getSchedClass(); | |||
151 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { | |||
152 | const MachineOperand &MO = MI.getOperand(i); | |||
153 | if (!MO.isReg() || !MO.isDef() || MO.isImplicit()) | |||
154 | continue; | |||
155 | ||||
156 | int Cycle = ItinData->getOperandCycle(DefClass, i); | |||
157 | if (Cycle < 0) | |||
158 | continue; | |||
159 | ||||
160 | Latency = std::max(Latency, (unsigned) Cycle); | |||
161 | } | |||
162 | ||||
163 | return Latency; | |||
164 | } | |||
165 | ||||
166 | int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, | |||
167 | const MachineInstr &DefMI, unsigned DefIdx, | |||
168 | const MachineInstr &UseMI, | |||
169 | unsigned UseIdx) const { | |||
170 | int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx, | |||
171 | UseMI, UseIdx); | |||
172 | ||||
173 | if (!DefMI.getParent()) | |||
174 | return Latency; | |||
175 | ||||
176 | const MachineOperand &DefMO = DefMI.getOperand(DefIdx); | |||
177 | Register Reg = DefMO.getReg(); | |||
178 | ||||
179 | bool IsRegCR; | |||
180 | if (Register::isVirtualRegister(Reg)) { | |||
181 | const MachineRegisterInfo *MRI = | |||
182 | &DefMI.getParent()->getParent()->getRegInfo(); | |||
183 | IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) || | |||
184 | MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass); | |||
185 | } else { | |||
186 | IsRegCR = PPC::CRRCRegClass.contains(Reg) || | |||
187 | PPC::CRBITRCRegClass.contains(Reg); | |||
188 | } | |||
189 | ||||
190 | if (UseMI.isBranch() && IsRegCR) { | |||
191 | if (Latency < 0) | |||
192 | Latency = getInstrLatency(ItinData, DefMI); | |||
193 | ||||
194 | // On some cores, there is an additional delay between writing to a condition | |||
195 | // register, and using it from a branch. | |||
196 | unsigned Directive = Subtarget.getCPUDirective(); | |||
197 | switch (Directive) { | |||
198 | default: break; | |||
199 | case PPC::DIR_7400: | |||
200 | case PPC::DIR_750: | |||
201 | case PPC::DIR_970: | |||
202 | case PPC::DIR_E5500: | |||
203 | case PPC::DIR_PWR4: | |||
204 | case PPC::DIR_PWR5: | |||
205 | case PPC::DIR_PWR5X: | |||
206 | case PPC::DIR_PWR6: | |||
207 | case PPC::DIR_PWR6X: | |||
208 | case PPC::DIR_PWR7: | |||
209 | case PPC::DIR_PWR8: | |||
210 | // FIXME: Is this needed for POWER9? | |||
211 | Latency += 2; | |||
212 | break; | |||
213 | } | |||
214 | } | |||
215 | ||||
216 | return Latency; | |||
217 | } | |||
218 | ||||
219 | /// This is an architecture-specific helper function of reassociateOps. | |||
220 | /// Set special operand attributes for new instructions after reassociation. | |||
221 | void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, | |||
222 | MachineInstr &OldMI2, | |||
223 | MachineInstr &NewMI1, | |||
224 | MachineInstr &NewMI2) const { | |||
225 | // Propagate FP flags from the original instructions. | |||
226 | // But clear poison-generating flags because those may not be valid now. | |||
227 | uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); | |||
228 | NewMI1.setFlags(IntersectedFlags); | |||
229 | NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap); | |||
230 | NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap); | |||
231 | NewMI1.clearFlag(MachineInstr::MIFlag::IsExact); | |||
232 | ||||
233 | NewMI2.setFlags(IntersectedFlags); | |||
234 | NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap); | |||
235 | NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap); | |||
236 | NewMI2.clearFlag(MachineInstr::MIFlag::IsExact); | |||
237 | } | |||
238 | ||||
239 | void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &MI, | |||
240 | uint16_t Flags) const { | |||
241 | MI.setFlags(Flags); | |||
242 | MI.clearFlag(MachineInstr::MIFlag::NoSWrap); | |||
243 | MI.clearFlag(MachineInstr::MIFlag::NoUWrap); | |||
244 | MI.clearFlag(MachineInstr::MIFlag::IsExact); | |||
245 | } | |||
246 | ||||
247 | // This function does not list all associative and commutative operations, but | |||
248 | // only those worth feeding through the machine combiner in an attempt to | |||
249 | // reduce the critical path. Mostly, this means floating-point operations, | |||
250 | // because they have high latencies(>=5) (compared to other operations, such as | |||
251 | // and/or, which are also associative and commutative, but have low latencies). | |||
252 | bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { | |||
253 | switch (Inst.getOpcode()) { | |||
254 | // Floating point: | |||
255 | // FP Add: | |||
256 | case PPC::FADD: | |||
257 | case PPC::FADDS: | |||
258 | // FP Multiply: | |||
259 | case PPC::FMUL: | |||
260 | case PPC::FMULS: | |||
261 | // Altivec Add: | |||
262 | case PPC::VADDFP: | |||
263 | // VSX Add: | |||
264 | case PPC::XSADDDP: | |||
265 | case PPC::XVADDDP: | |||
266 | case PPC::XVADDSP: | |||
267 | case PPC::XSADDSP: | |||
268 | // VSX Multiply: | |||
269 | case PPC::XSMULDP: | |||
270 | case PPC::XVMULDP: | |||
271 | case PPC::XVMULSP: | |||
272 | case PPC::XSMULSP: | |||
273 | return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) && | |||
274 | Inst.getFlag(MachineInstr::MIFlag::FmNsz); | |||
275 | // Fixed point: | |||
276 | // Multiply: | |||
277 | case PPC::MULHD: | |||
278 | case PPC::MULLD: | |||
279 | case PPC::MULHW: | |||
280 | case PPC::MULLW: | |||
281 | return true; | |||
282 | default: | |||
283 | return false; | |||
284 | } | |||
285 | } | |||
286 | ||||
287 | #define InfoArrayIdxFMAInst0 0 | |||
288 | #define InfoArrayIdxFAddInst1 1 | |||
289 | #define InfoArrayIdxFMULInst2 2 | |||
290 | #define InfoArrayIdxAddOpIdx3 3 | |||
291 | #define InfoArrayIdxMULOpIdx4 4 | |||
292 | #define InfoArrayIdxFSubInst5 5 | |||
293 | // Array keeps info for FMA instructions: | |||
294 | // Index 0(InfoArrayIdxFMAInst): FMA instruction; | |||
295 | // Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA; | |||
296 | // Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA; | |||
297 | // Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands; | |||
298 | // Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands; | |||
299 | // second MUL operand index is plus 1; | |||
300 | // Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA. | |||
301 | static const uint16_t FMAOpIdxInfo[][6] = { | |||
302 | // FIXME: Add more FMA instructions like XSNMADDADP and so on. | |||
303 | {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP}, | |||
304 | {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP}, | |||
305 | {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP}, | |||
306 | {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP}, | |||
307 | {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB}, | |||
308 | {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}}; | |||
309 | ||||
310 | // Check if an opcode is a FMA instruction. If it is, return the index in array | |||
311 | // FMAOpIdxInfo. Otherwise, return -1. | |||
312 | int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const { | |||
313 | for (unsigned I = 0; I < array_lengthof(FMAOpIdxInfo); I++) | |||
314 | if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst0] == Opcode) | |||
315 | return I; | |||
316 | return -1; | |||
317 | } | |||
318 | ||||
319 | // On PowerPC target, we have two kinds of patterns related to FMA: | |||
320 | // 1: Improve ILP. | |||
321 | // Try to reassociate FMA chains like below: | |||
322 | // | |||
323 | // Pattern 1: | |||
324 | // A = FADD X, Y (Leaf) | |||
325 | // B = FMA A, M21, M22 (Prev) | |||
326 | // C = FMA B, M31, M32 (Root) | |||
327 | // --> | |||
328 | // A = FMA X, M21, M22 | |||
329 | // B = FMA Y, M31, M32 | |||
330 | // C = FADD A, B | |||
331 | // | |||
332 | // Pattern 2: | |||
333 | // A = FMA X, M11, M12 (Leaf) | |||
334 | // B = FMA A, M21, M22 (Prev) | |||
335 | // C = FMA B, M31, M32 (Root) | |||
336 | // --> | |||
337 | // A = FMUL M11, M12 | |||
338 | // B = FMA X, M21, M22 | |||
339 | // D = FMA A, M31, M32 | |||
340 | // C = FADD B, D | |||
341 | // | |||
342 | // breaking the dependency between A and B, allowing FMA to be executed in | |||
343 | // parallel (or back-to-back in a pipeline) instead of depending on each other. | |||
344 | // | |||
345 | // 2: Reduce register pressure. | |||
346 | // Try to reassociate FMA with FSUB and a constant like below: | |||
347 | // C is a floating point const. | |||
348 | // | |||
349 | // Pattern 1: | |||
350 | // A = FSUB X, Y (Leaf) | |||
351 | // D = FMA B, C, A (Root) | |||
352 | // --> | |||
353 | // A = FMA B, Y, -C | |||
354 | // D = FMA A, X, C | |||
355 | // | |||
356 | // Pattern 2: | |||
357 | // A = FSUB X, Y (Leaf) | |||
358 | // D = FMA B, A, C (Root) | |||
359 | // --> | |||
360 | // A = FMA B, Y, -C | |||
361 | // D = FMA A, X, C | |||
362 | // | |||
363 | // Before the transformation, A must be assigned with different hardware | |||
364 | // register with D. After the transformation, A and D must be assigned with | |||
365 | // same hardware register due to TIE attribute of FMA instructions. | |||
366 | // | |||
367 | bool PPCInstrInfo::getFMAPatterns( | |||
368 | MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, | |||
369 | bool DoRegPressureReduce) const { | |||
370 | MachineBasicBlock *MBB = Root.getParent(); | |||
371 | const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); | |||
372 | const TargetRegisterInfo *TRI = &getRegisterInfo(); | |||
373 | ||||
374 | auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) { | |||
375 | for (const auto &MO : Instr.explicit_operands()) | |||
376 | if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg()))) | |||
377 | return false; | |||
378 | return true; | |||
379 | }; | |||
380 | ||||
381 | auto IsReassociableAddOrSub = [&](const MachineInstr &Instr, | |||
382 | unsigned OpType) { | |||
383 | if (Instr.getOpcode() != | |||
384 | FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType]) | |||
385 | return false; | |||
386 | ||||
387 | // Instruction can be reassociated. | |||
388 | // fast math flags may prohibit reassociation. | |||
389 | if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) && | |||
390 | Instr.getFlag(MachineInstr::MIFlag::FmNsz))) | |||
391 | return false; | |||
392 | ||||
393 | // Instruction operands are virtual registers for reassociation. | |||
394 | if (!IsAllOpsVirtualReg(Instr)) | |||
395 | return false; | |||
396 | ||||
397 | // For register pressure reassociation, the FSub must have only one use as | |||
398 | // we want to delete the sub to save its def. | |||
399 | if (OpType == InfoArrayIdxFSubInst5 && | |||
400 | !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg())) | |||
401 | return false; | |||
402 | ||||
403 | return true; | |||
404 | }; | |||
405 | ||||
406 | auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx, | |||
407 | int16_t &MulOpIdx, bool IsLeaf) { | |||
408 | int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode()); | |||
409 | if (Idx < 0) | |||
410 | return false; | |||
411 | ||||
412 | // Instruction can be reassociated. | |||
413 | // fast math flags may prohibit reassociation. | |||
414 | if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) && | |||
415 | Instr.getFlag(MachineInstr::MIFlag::FmNsz))) | |||
416 | return false; | |||
417 | ||||
418 | // Instruction operands are virtual registers for reassociation. | |||
419 | if (!IsAllOpsVirtualReg(Instr)) | |||
420 | return false; | |||
421 | ||||
422 | MulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx4]; | |||
423 | if (IsLeaf) | |||
424 | return true; | |||
425 | ||||
426 | AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx3]; | |||
427 | ||||
428 | const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx); | |||
429 | MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg()); | |||
430 | // If 'add' operand's def is not in current block, don't do ILP related opt. | |||
431 | if (!MIAdd || MIAdd->getParent() != MBB) | |||
432 | return false; | |||
433 | ||||
434 | // If this is not Leaf FMA Instr, its 'add' operand should only have one use | |||
435 | // as this fma will be changed later. | |||
436 | return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg()); | |||
437 | }; | |||
438 | ||||
439 | int16_t AddOpIdx = -1; | |||
440 | int16_t MulOpIdx = -1; | |||
441 | ||||
442 | bool IsUsedOnceL = false; | |||
443 | bool IsUsedOnceR = false; | |||
444 | MachineInstr *MULInstrL = nullptr; | |||
445 | MachineInstr *MULInstrR = nullptr; | |||
446 | ||||
447 | auto IsRPReductionCandidate = [&]() { | |||
448 | // Currently, we only support float and double. | |||
449 | // FIXME: add support for other types. | |||
450 | unsigned Opcode = Root.getOpcode(); | |||
451 | if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP) | |||
452 | return false; | |||
453 | ||||
454 | // Root must be a valid FMA like instruction. | |||
455 | // Treat it as leaf as we don't care its add operand. | |||
456 | if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) { | |||
457 | assert((MulOpIdx >= 0) && "mul operand index not right!")(static_cast<void> (0)); | |||
458 | Register MULRegL = TRI->lookThruSingleUseCopyChain( | |||
459 | Root.getOperand(MulOpIdx).getReg(), MRI); | |||
460 | Register MULRegR = TRI->lookThruSingleUseCopyChain( | |||
461 | Root.getOperand(MulOpIdx + 1).getReg(), MRI); | |||
462 | if (!MULRegL && !MULRegR) | |||
463 | return false; | |||
464 | ||||
465 | if (MULRegL && !MULRegR) { | |||
466 | MULRegR = | |||
467 | TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI); | |||
468 | IsUsedOnceL = true; | |||
469 | } else if (!MULRegL && MULRegR) { | |||
470 | MULRegL = | |||
471 | TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI); | |||
472 | IsUsedOnceR = true; | |||
473 | } else { | |||
474 | IsUsedOnceL = true; | |||
475 | IsUsedOnceR = true; | |||
476 | } | |||
477 | ||||
478 | if (!Register::isVirtualRegister(MULRegL) || | |||
479 | !Register::isVirtualRegister(MULRegR)) | |||
480 | return false; | |||
481 | ||||
482 | MULInstrL = MRI->getVRegDef(MULRegL); | |||
483 | MULInstrR = MRI->getVRegDef(MULRegR); | |||
484 | return true; | |||
485 | } | |||
486 | return false; | |||
487 | }; | |||
488 | ||||
489 | // Register pressure fma reassociation patterns. | |||
490 | if (DoRegPressureReduce && IsRPReductionCandidate()) { | |||
491 | assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!")(static_cast<void> (0)); | |||
492 | // Register pressure pattern 1 | |||
493 | if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR && | |||
494 | IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst5)) { | |||
495 | LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n")do { } while (false); | |||
496 | Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BCA); | |||
497 | return true; | |||
498 | } | |||
499 | ||||
500 | // Register pressure pattern 2 | |||
501 | if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL && | |||
502 | IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst5))) { | |||
503 | LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n")do { } while (false); | |||
504 | Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BAC); | |||
505 | return true; | |||
506 | } | |||
507 | } | |||
508 | ||||
509 | // ILP fma reassociation patterns. | |||
510 | // Root must be a valid FMA like instruction. | |||
511 | AddOpIdx = -1; | |||
512 | if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false)) | |||
513 | return false; | |||
514 | ||||
515 | assert((AddOpIdx >= 0) && "add operand index not right!")(static_cast<void> (0)); | |||
516 | ||||
517 | Register RegB = Root.getOperand(AddOpIdx).getReg(); | |||
518 | MachineInstr *Prev = MRI->getUniqueVRegDef(RegB); | |||
519 | ||||
520 | // Prev must be a valid FMA like instruction. | |||
521 | AddOpIdx = -1; | |||
522 | if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false)) | |||
523 | return false; | |||
524 | ||||
525 | assert((AddOpIdx >= 0) && "add operand index not right!")(static_cast<void> (0)); | |||
526 | ||||
527 | Register RegA = Prev->getOperand(AddOpIdx).getReg(); | |||
528 | MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA); | |||
529 | AddOpIdx = -1; | |||
530 | if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) { | |||
531 | Patterns.push_back(MachineCombinerPattern::REASSOC_XMM_AMM_BMM); | |||
532 | LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n")do { } while (false); | |||
533 | return true; | |||
534 | } | |||
535 | if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst1)) { | |||
536 | Patterns.push_back(MachineCombinerPattern::REASSOC_XY_AMM_BMM); | |||
537 | LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n")do { } while (false); | |||
538 | return true; | |||
539 | } | |||
540 | return false; | |||
541 | } | |||
542 | ||||
543 | void PPCInstrInfo::finalizeInsInstrs( | |||
544 | MachineInstr &Root, MachineCombinerPattern &P, | |||
545 | SmallVectorImpl<MachineInstr *> &InsInstrs) const { | |||
546 | assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!")(static_cast<void> (0)); | |||
547 | ||||
548 | MachineFunction *MF = Root.getMF(); | |||
549 | MachineRegisterInfo *MRI = &MF->getRegInfo(); | |||
550 | const TargetRegisterInfo *TRI = &getRegisterInfo(); | |||
551 | MachineConstantPool *MCP = MF->getConstantPool(); | |||
552 | ||||
553 | int16_t Idx = getFMAOpIdxInfo(Root.getOpcode()); | |||
554 | if (Idx < 0) | |||
| ||||
555 | return; | |||
556 | ||||
557 | uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx4]; | |||
558 | ||||
559 | // For now we only need to fix up placeholder for register pressure reduce | |||
560 | // patterns. | |||
561 | Register ConstReg = 0; | |||
562 | switch (P) { | |||
563 | case MachineCombinerPattern::REASSOC_XY_BCA: | |||
564 | ConstReg = | |||
565 | TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI); | |||
566 | break; | |||
567 | case MachineCombinerPattern::REASSOC_XY_BAC: | |||
568 | ConstReg = | |||
569 | TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI); | |||
570 | break; | |||
571 | default: | |||
572 | // Not register pressure reduce patterns. | |||
573 | return; | |||
574 | } | |||
575 | ||||
576 | MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg); | |||
577 | // Get const value from const pool. | |||
578 | const Constant *C = getConstantFromConstantPool(ConstDefInstr); | |||
579 | assert(isa<llvm::ConstantFP>(C) && "not a valid constant!")(static_cast<void> (0)); | |||
580 | ||||
581 | // Get negative fp const. | |||
582 | APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF()); | |||
583 | F1.changeSign(); | |||
584 | Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1); | |||
585 | Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType()); | |||
586 | ||||
587 | // Put negative fp const into constant pool. | |||
588 | unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment); | |||
589 | ||||
590 | MachineOperand *Placeholder = nullptr; | |||
591 | // Record the placeholder PPC::ZERO8 we add in reassociateFMA. | |||
592 | for (auto *Inst : InsInstrs) { | |||
593 | for (MachineOperand &Operand : Inst->explicit_operands()) { | |||
594 | assert(Operand.isReg() && "Invalid instruction in InsInstrs!")(static_cast<void> (0)); | |||
595 | if (Operand.getReg() == PPC::ZERO8) { | |||
596 | Placeholder = &Operand; | |||
597 | break; | |||
598 | } | |||
599 | } | |||
600 | } | |||
601 | ||||
602 | assert(Placeholder && "Placeholder does not exist!")(static_cast<void> (0)); | |||
603 | ||||
604 | // Generate instructions to load the const fp from constant pool. | |||
605 | // We only support PPC64 and medium code model. | |||
606 | Register LoadNewConst = | |||
607 | generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs); | |||
608 | ||||
609 | // Fill the placeholder with the new load from constant pool. | |||
610 | Placeholder->setReg(LoadNewConst); | |||
| ||||
611 | } | |||
612 | ||||
613 | bool PPCInstrInfo::shouldReduceRegisterPressure( | |||
614 | MachineBasicBlock *MBB, RegisterClassInfo *RegClassInfo) const { | |||
615 | ||||
616 | if (!EnableFMARegPressureReduction) | |||
617 | return false; | |||
618 | ||||
619 | // Currently, we only enable register pressure reducing in machine combiner | |||
620 | // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector | |||
621 | // support. | |||
622 | // | |||
623 | // So we need following instructions to access a TOC entry: | |||
624 | // | |||
625 | // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0 | |||
626 | // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0, | |||
627 | // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool) | |||
628 | // | |||
629 | // FIXME: add more supported targets, like Small and Large code model, PPC32, | |||
630 | // AIX. | |||
631 | if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() && | |||
632 | Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium)) | |||
633 | return false; | |||
634 | ||||
635 | const TargetRegisterInfo *TRI = &getRegisterInfo(); | |||
636 | MachineFunction *MF = MBB->getParent(); | |||
637 | MachineRegisterInfo *MRI = &MF->getRegInfo(); | |||
638 | ||||
639 | auto GetMBBPressure = [&](MachineBasicBlock *MBB) -> std::vector<unsigned> { | |||
640 | RegionPressure Pressure; | |||
641 | RegPressureTracker RPTracker(Pressure); | |||
642 | ||||
643 | // Initialize the register pressure tracker. | |||
644 | RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(), | |||
645 | /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); | |||
646 | ||||
647 | for (MachineBasicBlock::iterator MII = MBB->instr_end(), | |||
648 | MIE = MBB->instr_begin(); | |||
649 | MII != MIE; --MII) { | |||
650 | MachineInstr &MI = *std::prev(MII); | |||
651 | if (MI.isDebugValue() || MI.isDebugLabel()) | |||
652 | continue; | |||
653 | RegisterOperands RegOpers; | |||
654 | RegOpers.collect(MI, *TRI, *MRI, false, false); | |||
655 | RPTracker.recedeSkipDebugValues(); | |||
656 | assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!")(static_cast<void> (0)); | |||
657 | RPTracker.recede(RegOpers); | |||
658 | } | |||
659 | ||||
660 | // Close the RPTracker to finalize live ins. | |||
661 | RPTracker.closeRegion(); | |||
662 | ||||
663 | return RPTracker.getPressure().MaxSetPressure; | |||
664 | }; | |||
665 | ||||
666 | // For now we only care about float and double type fma. | |||
667 | unsigned VSSRCLimit = TRI->getRegPressureSetLimit( | |||
668 | *MBB->getParent(), PPC::RegisterPressureSets::VSSRC); | |||
669 | ||||
670 | // Only reduce register pressure when pressure is high. | |||
671 | return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] > | |||
672 | (float)VSSRCLimit * FMARPFactor; | |||
673 | } | |||
674 | ||||
675 | bool PPCInstrInfo::isLoadFromConstantPool(MachineInstr *I) const { | |||
676 | // I has only one memory operand which is load from constant pool. | |||
677 | if (!I->hasOneMemOperand()) | |||
678 | return false; | |||
679 | ||||
680 | MachineMemOperand *Op = I->memoperands()[0]; | |||
681 | return Op->isLoad() && Op->getPseudoValue() && | |||
682 | Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool; | |||
683 | } | |||
684 | ||||
685 | Register PPCInstrInfo::generateLoadForNewConst( | |||
686 | unsigned Idx, MachineInstr *MI, Type *Ty, | |||
687 | SmallVectorImpl<MachineInstr *> &InsInstrs) const { | |||
688 | // Now we only support PPC64, Medium code model and P9 with vector. | |||
689 | // We have immutable pattern to access const pool. See function | |||
690 | // shouldReduceRegisterPressure. | |||
691 | assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&(static_cast<void> (0)) | |||
692 | Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium) &&(static_cast<void> (0)) | |||
693 | "Target not supported!\n")(static_cast<void> (0)); | |||
694 | ||||
695 | MachineFunction *MF = MI->getMF(); | |||
696 | MachineRegisterInfo *MRI = &MF->getRegInfo(); | |||
697 | ||||
698 | // Generate ADDIStocHA8 | |||
699 | Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); | |||
700 | MachineInstrBuilder TOCOffset = | |||
701 | BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1) | |||
702 | .addReg(PPC::X2) | |||
703 | .addConstantPoolIndex(Idx); | |||
704 | ||||
705 | assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&(static_cast<void> (0)) | |||
706 | "Only float and double are supported!")(static_cast<void> (0)); | |||
707 | ||||
708 | unsigned LoadOpcode; | |||
709 | // Should be float type or double type. | |||
710 | if (Ty->isFloatTy()) | |||
711 | LoadOpcode = PPC::DFLOADf32; | |||
712 | else | |||
713 | LoadOpcode = PPC::DFLOADf64; | |||
714 | ||||
715 | const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg()); | |||
716 | Register VReg2 = MRI->createVirtualRegister(RC); | |||
717 | MachineMemOperand *MMO = MF->getMachineMemOperand( | |||
718 | MachinePointerInfo::getConstantPool(*MF), MachineMemOperand::MOLoad, | |||
719 | Ty->getScalarSizeInBits() / 8, MF->getDataLayout().getPrefTypeAlign(Ty)); | |||
720 | ||||
721 | // Generate Load from constant pool. | |||
722 | MachineInstrBuilder Load = | |||
723 | BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2) | |||
724 | .addConstantPoolIndex(Idx) | |||
725 | .addReg(VReg1, getKillRegState(true)) | |||
726 | .addMemOperand(MMO); | |||
727 | ||||
728 | Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO); | |||
729 | ||||
730 | // Insert the toc load instructions into InsInstrs. | |||
731 | InsInstrs.insert(InsInstrs.begin(), Load); | |||
732 | InsInstrs.insert(InsInstrs.begin(), TOCOffset); | |||
733 | return VReg2; | |||
734 | } | |||
735 | ||||
736 | // This function returns the const value in constant pool if the \p I is a load | |||
737 | // from constant pool. | |||
738 | const Constant * | |||
739 | PPCInstrInfo::getConstantFromConstantPool(MachineInstr *I) const { | |||
740 | MachineFunction *MF = I->getMF(); | |||
741 | MachineRegisterInfo *MRI = &MF->getRegInfo(); | |||
742 | MachineConstantPool *MCP = MF->getConstantPool(); | |||
743 | assert(I->mayLoad() && "Should be a load instruction.\n")(static_cast<void> (0)); | |||
744 | for (auto MO : I->uses()) { | |||
745 | if (!MO.isReg()) | |||
746 | continue; | |||
747 | Register Reg = MO.getReg(); | |||
748 | if (Reg == 0 || !Register::isVirtualRegister(Reg)) | |||
749 | continue; | |||
750 | // Find the toc address. | |||
751 | MachineInstr *DefMI = MRI->getVRegDef(Reg); | |||
752 | for (auto MO2 : DefMI->uses()) | |||
753 | if (MO2.isCPI()) | |||
754 | return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal; | |||
755 | } | |||
756 | return nullptr; | |||
757 | } | |||
758 | ||||
759 | bool PPCInstrInfo::getMachineCombinerPatterns( | |||
760 | MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, | |||
761 | bool DoRegPressureReduce) const { | |||
762 | // Using the machine combiner in this way is potentially expensive, so | |||
763 | // restrict to when aggressive optimizations are desired. | |||
764 | if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive) | |||
765 | return false; | |||
766 | ||||
767 | if (getFMAPatterns(Root, Patterns, DoRegPressureReduce)) | |||
768 | return true; | |||
769 | ||||
770 | return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, | |||
771 | DoRegPressureReduce); | |||
772 | } | |||
773 | ||||
774 | void PPCInstrInfo::genAlternativeCodeSequence( | |||
775 | MachineInstr &Root, MachineCombinerPattern Pattern, | |||
776 | SmallVectorImpl<MachineInstr *> &InsInstrs, | |||
777 | SmallVectorImpl<MachineInstr *> &DelInstrs, | |||
778 | DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { | |||
779 | switch (Pattern) { | |||
780 | case MachineCombinerPattern::REASSOC_XY_AMM_BMM: | |||
781 | case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: | |||
782 | case MachineCombinerPattern::REASSOC_XY_BCA: | |||
783 | case MachineCombinerPattern::REASSOC_XY_BAC: | |||
784 | reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg); | |||
785 | break; | |||
786 | default: | |||
787 | // Reassociate default patterns. | |||
788 | TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, | |||
789 | DelInstrs, InstrIdxForVirtReg); | |||
790 | break; | |||
791 | } | |||
792 | } | |||
793 | ||||
794 | void PPCInstrInfo::reassociateFMA( | |||
795 | MachineInstr &Root, MachineCombinerPattern Pattern, | |||
796 | SmallVectorImpl<MachineInstr *> &InsInstrs, | |||
797 | SmallVectorImpl<MachineInstr *> &DelInstrs, | |||
798 | DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { | |||
799 | MachineFunction *MF = Root.getMF(); | |||
800 | MachineRegisterInfo &MRI = MF->getRegInfo(); | |||
801 | const TargetRegisterInfo *TRI = &getRegisterInfo(); | |||
802 | MachineOperand &OpC = Root.getOperand(0); | |||
803 | Register RegC = OpC.getReg(); | |||
804 | const TargetRegisterClass *RC = MRI.getRegClass(RegC); | |||
805 | MRI.constrainRegClass(RegC, RC); | |||
806 | ||||
807 | unsigned FmaOp = Root.getOpcode(); | |||
808 | int16_t Idx = getFMAOpIdxInfo(FmaOp); | |||
809 | assert(Idx >= 0 && "Root must be a FMA instruction")(static_cast<void> (0)); | |||
810 | ||||
811 | bool IsILPReassociate = | |||
812 | (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) || | |||
813 | (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM); | |||
814 | ||||
815 | uint16_t AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx3]; | |||
816 | uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx4]; | |||
817 | ||||
818 | MachineInstr *Prev = nullptr; | |||
819 | MachineInstr *Leaf = nullptr; | |||
820 | switch (Pattern) { | |||
821 | default: | |||
822 | llvm_unreachable("not recognized pattern!")__builtin_unreachable(); | |||
823 | case MachineCombinerPattern::REASSOC_XY_AMM_BMM: | |||
824 | case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: | |||
825 | Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg()); | |||
826 | Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg()); | |||
827 | break; | |||
828 | case MachineCombinerPattern::REASSOC_XY_BAC: { | |||
829 | Register MULReg = | |||
830 | TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI); | |||
831 | Leaf = MRI.getVRegDef(MULReg); | |||
832 | break; | |||
833 | } | |||
834 | case MachineCombinerPattern::REASSOC_XY_BCA: { | |||
835 | Register MULReg = TRI->lookThruCopyLike( | |||
836 | Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI); | |||
837 | Leaf = MRI.getVRegDef(MULReg); | |||
838 | break; | |||
839 | } | |||
840 | } | |||
841 | ||||
842 | uint16_t IntersectedFlags = 0; | |||
843 | if (IsILPReassociate) | |||
844 | IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags(); | |||
845 | else | |||
846 | IntersectedFlags = Root.getFlags() & Leaf->getFlags(); | |||
847 | ||||
848 | auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg, | |||
849 | bool &KillFlag) { | |||
850 | Reg = Operand.getReg(); | |||
851 | MRI.constrainRegClass(Reg, RC); | |||
852 | KillFlag = Operand.isKill(); | |||
853 | }; | |||
854 | ||||
855 | auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1, | |||
856 | Register &MulOp2, Register &AddOp, | |||
857 | bool &MulOp1KillFlag, bool &MulOp2KillFlag, | |||
858 | bool &AddOpKillFlag) { | |||
859 | GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag); | |||
860 | GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag); | |||
861 | GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag); | |||
862 | }; | |||
863 | ||||
864 | Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11, | |||
865 | RegA21, RegB; | |||
866 | bool KillX = false, KillY = false, KillM11 = false, KillM12 = false, | |||
867 | KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false, | |||
868 | KillA11 = false, KillA21 = false, KillB = false; | |||
869 | ||||
870 | GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB); | |||
871 | ||||
872 | if (IsILPReassociate) | |||
873 | GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21); | |||
874 | ||||
875 | if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) { | |||
876 | GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11); | |||
877 | GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX); | |||
878 | } else if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) { | |||
879 | GetOperandInfo(Leaf->getOperand(1), RegX, KillX); | |||
880 | GetOperandInfo(Leaf->getOperand(2), RegY, KillY); | |||
881 | } else { | |||
882 | // Get FSUB instruction info. | |||
883 | GetOperandInfo(Leaf->getOperand(1), RegX, KillX); | |||
884 | GetOperandInfo(Leaf->getOperand(2), RegY, KillY); | |||
885 | } | |||
886 | ||||
887 | // Create new virtual registers for the new results instead of | |||
888 | // recycling legacy ones because the MachineCombiner's computation of the | |||
889 | // critical path requires a new register definition rather than an existing | |||
890 | // one. | |||
891 | // For register pressure reassociation, we only need create one virtual | |||
892 | // register for the new fma. | |||
893 | Register NewVRA = MRI.createVirtualRegister(RC); | |||
894 | InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0)); | |||
895 | ||||
896 | Register NewVRB = 0; | |||
897 | if (IsILPReassociate) { | |||
898 | NewVRB = MRI.createVirtualRegister(RC); | |||
899 | InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1)); | |||
900 | } | |||
901 | ||||
902 | Register NewVRD = 0; | |||
903 | if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) { | |||
904 | NewVRD = MRI.createVirtualRegister(RC); | |||
905 | InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2)); | |||
906 | } | |||
907 | ||||
908 | auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd, | |||
909 | Register RegMul1, bool KillRegMul1, | |||
910 | Register RegMul2, bool KillRegMul2) { | |||
911 | MI->getOperand(AddOpIdx).setReg(RegAdd); | |||
912 | MI->getOperand(AddOpIdx).setIsKill(KillAdd); | |||
913 | MI->getOperand(FirstMulOpIdx).setReg(RegMul1); | |||
914 | MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1); | |||
915 | MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2); | |||
916 | MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2); | |||
917 | }; | |||
918 | ||||
919 | MachineInstrBuilder NewARegPressure, NewCRegPressure; | |||
920 | switch (Pattern) { | |||
921 | default: | |||
922 | llvm_unreachable("not recognized pattern!")__builtin_unreachable(); | |||
923 | case MachineCombinerPattern::REASSOC_XY_AMM_BMM: { | |||
924 | // Create new instructions for insertion. | |||
925 | MachineInstrBuilder MINewB = | |||
926 | BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB) | |||
927 | .addReg(RegX, getKillRegState(KillX)) | |||
928 | .addReg(RegM21, getKillRegState(KillM21)) | |||
929 | .addReg(RegM22, getKillRegState(KillM22)); | |||
930 | MachineInstrBuilder MINewA = | |||
931 | BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA) | |||
932 | .addReg(RegY, getKillRegState(KillY)) | |||
933 | .addReg(RegM31, getKillRegState(KillM31)) | |||
934 | .addReg(RegM32, getKillRegState(KillM32)); | |||
935 | // If AddOpIdx is not 1, adjust the order. | |||
936 | if (AddOpIdx != 1) { | |||
937 | AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22); | |||
938 | AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32); | |||
939 | } | |||
940 | ||||
941 | MachineInstrBuilder MINewC = | |||
942 | BuildMI(*MF, Root.getDebugLoc(), | |||
943 | get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst1]), RegC) | |||
944 | .addReg(NewVRB, getKillRegState(true)) | |||
945 | .addReg(NewVRA, getKillRegState(true)); | |||
946 | ||||
947 | // Update flags for newly created instructions. | |||
948 | setSpecialOperandAttr(*MINewA, IntersectedFlags); | |||
949 | setSpecialOperandAttr(*MINewB, IntersectedFlags); | |||
950 | setSpecialOperandAttr(*MINewC, IntersectedFlags); | |||
951 | ||||
952 | // Record new instructions for insertion. | |||
953 | InsInstrs.push_back(MINewA); | |||
954 | InsInstrs.push_back(MINewB); | |||
955 | InsInstrs.push_back(MINewC); | |||
956 | break; | |||
957 | } | |||
958 | case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: { | |||
959 | assert(NewVRD && "new FMA register not created!")(static_cast<void> (0)); | |||
960 | // Create new instructions for insertion. | |||
961 | MachineInstrBuilder MINewA = | |||
962 | BuildMI(*MF, Leaf->getDebugLoc(), | |||
963 | get(FMAOpIdxInfo[Idx][InfoArrayIdxFMULInst2]), NewVRA) | |||
964 | .addReg(RegM11, getKillRegState(KillM11)) | |||
965 | .addReg(RegM12, getKillRegState(KillM12)); | |||
966 | MachineInstrBuilder MINewB = | |||
967 | BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB) | |||
968 | .addReg(RegX, getKillRegState(KillX)) | |||
969 | .addReg(RegM21, getKillRegState(KillM21)) | |||
970 | .addReg(RegM22, getKillRegState(KillM22)); | |||
971 | MachineInstrBuilder MINewD = | |||
972 | BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD) | |||
973 | .addReg(NewVRA, getKillRegState(true)) | |||
974 | .addReg(RegM31, getKillRegState(KillM31)) | |||
975 | .addReg(RegM32, getKillRegState(KillM32)); | |||
976 | // If AddOpIdx is not 1, adjust the order. | |||
977 | if (AddOpIdx != 1) { | |||
978 | AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22); | |||
979 | AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32, | |||
980 | KillM32); | |||
981 | } | |||
982 | ||||
983 | MachineInstrBuilder MINewC = | |||
984 | BuildMI(*MF, Root.getDebugLoc(), | |||
985 | get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst1]), RegC) | |||
986 | .addReg(NewVRB, getKillRegState(true)) | |||
987 | .addReg(NewVRD, getKillRegState(true)); | |||
988 | ||||
989 | // Update flags for newly created instructions. | |||
990 | setSpecialOperandAttr(*MINewA, IntersectedFlags); | |||
991 | setSpecialOperandAttr(*MINewB, IntersectedFlags); | |||
992 | setSpecialOperandAttr(*MINewD, IntersectedFlags); | |||
993 | setSpecialOperandAttr(*MINewC, IntersectedFlags); | |||
994 | ||||
995 | // Record new instructions for insertion. | |||
996 | InsInstrs.push_back(MINewA); | |||
997 | InsInstrs.push_back(MINewB); | |||
998 | InsInstrs.push_back(MINewD); | |||
999 | InsInstrs.push_back(MINewC); | |||
1000 | break; | |||
1001 | } | |||
1002 | case MachineCombinerPattern::REASSOC_XY_BAC: | |||
1003 | case MachineCombinerPattern::REASSOC_XY_BCA: { | |||
1004 | Register VarReg; | |||
1005 | bool KillVarReg = false; | |||
1006 | if (Pattern == MachineCombinerPattern::REASSOC_XY_BCA) { | |||
1007 | VarReg = RegM31; | |||
1008 | KillVarReg = KillM31; | |||
1009 | } else { | |||
1010 | VarReg = RegM32; | |||
1011 | KillVarReg = KillM32; | |||
1012 | } | |||
1013 | // We don't want to get negative const from memory pool too early, as the | |||
1014 | // created entry will not be deleted even if it has no users. Since all | |||
1015 | // operand of Leaf and Root are virtual register, we use zero register | |||
1016 | // here as a placeholder. When the InsInstrs is selected in | |||
1017 | // MachineCombiner, we call finalizeInsInstrs to replace the zero register | |||
1018 | // with a virtual register which is a load from constant pool. | |||
1019 | NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA) | |||
1020 | .addReg(RegB, getKillRegState(RegB)) | |||
1021 | .addReg(RegY, getKillRegState(KillY)) | |||
1022 | .addReg(PPC::ZERO8); | |||
1023 | NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC) | |||
1024 | .addReg(NewVRA, getKillRegState(true)) | |||
1025 | .addReg(RegX, getKillRegState(KillX)) | |||
1026 | .addReg(VarReg, getKillRegState(KillVarReg)); | |||
1027 | // For now, we only support xsmaddadp/xsmaddasp, their add operand are | |||
1028 | // both at index 1, no need to adjust. | |||
1029 | // FIXME: when add more fma instructions support, like fma/fmas, adjust | |||
1030 | // the operand index here. | |||
1031 | break; | |||
1032 | } | |||
1033 | } | |||
1034 | ||||
1035 | if (!IsILPReassociate) { | |||
1036 | setSpecialOperandAttr(*NewARegPressure, IntersectedFlags); | |||
1037 | setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags); | |||
1038 | ||||
1039 | InsInstrs.push_back(NewARegPressure); | |||
1040 | InsInstrs.push_back(NewCRegPressure); | |||
1041 | } | |||
1042 | ||||
1043 | assert(!InsInstrs.empty() &&(static_cast<void> (0)) | |||
1044 | "Insertion instructions set should not be empty!")(static_cast<void> (0)); | |||
1045 | ||||
1046 | // Record old instructions for deletion. | |||
1047 | DelInstrs.push_back(Leaf); | |||
1048 | if (IsILPReassociate) | |||
1049 | DelInstrs.push_back(Prev); | |||
1050 | DelInstrs.push_back(&Root); | |||
1051 | } | |||
1052 | ||||
1053 | // Detect 32 -> 64-bit extensions where we may reuse the low sub-register. | |||
1054 | bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, | |||
1055 | Register &SrcReg, Register &DstReg, | |||
1056 | unsigned &SubIdx) const { | |||
1057 | switch (MI.getOpcode()) { | |||
1058 | default: return false; | |||
1059 | case PPC::EXTSW: | |||
1060 | case PPC::EXTSW_32: | |||
1061 | case PPC::EXTSW_32_64: | |||
1062 | SrcReg = MI.getOperand(1).getReg(); | |||
1063 | DstReg = MI.getOperand(0).getReg(); | |||
1064 | SubIdx = PPC::sub_32; | |||
1065 | return true; | |||
1066 | } | |||
1067 | } | |||
1068 | ||||
1069 | unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, | |||
1070 | int &FrameIndex) const { | |||
1071 | unsigned Opcode = MI.getOpcode(); | |||
1072 | const unsigned *OpcodesForSpill = getLoadOpcodesForSpillArray(); | |||
1073 | const unsigned *End = OpcodesForSpill + SOK_LastOpcodeSpill; | |||
1074 | ||||
1075 | if (End != std::find(OpcodesForSpill, End, Opcode)) { | |||
1076 | // Check for the operands added by addFrameReference (the immediate is the | |||
1077 | // offset which defaults to 0). | |||
1078 | if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() && | |||
1079 | MI.getOperand(2).isFI()) { | |||
1080 | FrameIndex = MI.getOperand(2).getIndex(); | |||
1081 | return MI.getOperand(0).getReg(); | |||
1082 | } | |||
1083 | } | |||
1084 | return 0; | |||
1085 | } | |||
1086 | ||||
1087 | // For opcodes with the ReMaterializable flag set, this function is called to | |||
1088 | // verify the instruction is really rematable. | |||
1089 | bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, | |||
1090 | AliasAnalysis *AA) const { | |||
1091 | switch (MI.getOpcode()) { | |||
1092 | default: | |||
1093 | // This function should only be called for opcodes with the ReMaterializable | |||
1094 | // flag set. | |||
1095 | llvm_unreachable("Unknown rematerializable operation!")__builtin_unreachable(); | |||
1096 | break; | |||
1097 | case PPC::LI: | |||
1098 | case PPC::LI8: | |||
1099 | case PPC::PLI: | |||
1100 | case PPC::PLI8: | |||
1101 | case PPC::LIS: | |||
1102 | case PPC::LIS8: | |||
1103 | case PPC::ADDIStocHA: | |||
1104 | case PPC::ADDIStocHA8: | |||
1105 | case PPC::ADDItocL: | |||
1106 | case PPC::LOAD_STACK_GUARD: | |||
1107 | case PPC::XXLXORz: | |||
1108 | case PPC::XXLXORspz: | |||
1109 | case PPC::XXLXORdpz: | |||
1110 | case PPC::XXLEQVOnes: | |||
1111 | case PPC::XXSPLTI32DX: | |||
1112 | case PPC::V_SET0B: | |||
1113 | case PPC::V_SET0H: | |||
1114 | case PPC::V_SET0: | |||
1115 | case PPC::V_SETALLONESB: | |||
1116 | case PPC::V_SETALLONESH: | |||
1117 | case PPC::V_SETALLONES: | |||
1118 | case PPC::CRSET: | |||
1119 | case PPC::CRUNSET: | |||
1120 | case PPC::XXSETACCZ: | |||
1121 | return true; | |||
1122 | } | |||
1123 | return false; | |||
1124 | } | |||
1125 | ||||
1126 | unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI, | |||
1127 | int &FrameIndex) const { | |||
1128 | unsigned Opcode = MI.getOpcode(); | |||
1129 | const unsigned *OpcodesForSpill = getStoreOpcodesForSpillArray(); | |||
1130 | const unsigned *End = OpcodesForSpill + SOK_LastOpcodeSpill; | |||
1131 | ||||
1132 | if (End != std::find(OpcodesForSpill, End, Opcode)) { | |||
1133 | if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() && | |||
1134 | MI.getOperand(2).isFI()) { | |||
1135 | FrameIndex = MI.getOperand(2).getIndex(); | |||
1136 | return MI.getOperand(0).getReg(); | |||
1137 | } | |||
1138 | } | |||
1139 | return 0; | |||
1140 | } | |||
1141 | ||||
1142 | MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, | |||
1143 | unsigned OpIdx1, | |||
1144 | unsigned OpIdx2) const { | |||
1145 | MachineFunction &MF = *MI.getParent()->getParent(); | |||
1146 | ||||
1147 | // Normal instructions can be commuted the obvious way. | |||
1148 | if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec) | |||
1149 | return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); | |||
1150 | // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a | |||
1151 | // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because | |||
1152 | // changing the relative order of the mask operands might change what happens | |||
1153 | // to the high-bits of the mask (and, thus, the result). | |||
1154 | ||||
1155 | // Cannot commute if it has a non-zero rotate count. | |||
1156 | if (MI.getOperand(3).getImm() != 0) | |||
1157 | return nullptr; | |||
1158 | ||||
1159 | // If we have a zero rotate count, we have: | |||
1160 | // M = mask(MB,ME) | |||
1161 | // Op0 = (Op1 & ~M) | (Op2 & M) | |||
1162 | // Change this to: | |||
1163 | // M = mask((ME+1)&31, (MB-1)&31) | |||
1164 | // Op0 = (Op2 & ~M) | (Op1 & M) | |||
1165 | ||||
1166 | // Swap op1/op2 | |||
1167 | assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&(static_cast<void> (0)) | |||
1168 | "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.")(static_cast<void> (0)); | |||
1169 | Register Reg0 = MI.getOperand(0).getReg(); | |||
1170 | Register Reg1 = MI.getOperand(1).getReg(); | |||
1171 | Register Reg2 = MI.getOperand(2).getReg(); | |||
1172 | unsigned SubReg1 = MI.getOperand(1).getSubReg(); | |||
1173 | unsigned SubReg2 = MI.getOperand(2).getSubReg(); | |||
1174 | bool Reg1IsKill = MI.getOperand(1).isKill(); | |||
1175 | bool Reg2IsKill = MI.getOperand(2).isKill(); | |||
1176 | bool ChangeReg0 = false; | |||
1177 | // If machine instrs are no longer in two-address forms, update | |||
1178 | // destination register as well. | |||
1179 | if (Reg0 == Reg1) { | |||
1180 | // Must be two address instruction! | |||
1181 | assert(MI.getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&(static_cast<void> (0)) | |||
1182 | "Expecting a two-address instruction!")(static_cast<void> (0)); | |||
1183 | assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch")(static_cast<void> (0)); | |||
1184 | Reg2IsKill = false; | |||
1185 | ChangeReg0 = true; | |||
1186 | } | |||
1187 | ||||
1188 | // Masks. | |||
1189 | unsigned MB = MI.getOperand(4).getImm(); | |||
1190 | unsigned ME = MI.getOperand(5).getImm(); | |||
1191 | ||||
1192 | // We can't commute a trivial mask (there is no way to represent an all-zero | |||
1193 | // mask). | |||
1194 | if (MB == 0 && ME == 31) | |||
1195 | return nullptr; | |||
1196 | ||||
1197 | if (NewMI) { | |||
1198 | // Create a new instruction. | |||
1199 | Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg(); | |||
1200 | bool Reg0IsDead = MI.getOperand(0).isDead(); | |||
1201 | return BuildMI(MF, MI.getDebugLoc(), MI.getDesc()) | |||
1202 | .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead)) | |||
1203 | .addReg(Reg2, getKillRegState(Reg2IsKill)) | |||
1204 | .addReg(Reg1, getKillRegState(Reg1IsKill)) | |||
1205 | .addImm((ME + 1) & 31) | |||
1206 | .addImm((MB - 1) & 31); | |||
1207 | } | |||
1208 | ||||
1209 | if (ChangeReg0) { | |||
1210 | MI.getOperand(0).setReg(Reg2); | |||
1211 | MI.getOperand(0).setSubReg(SubReg2); | |||
1212 | } | |||
1213 | MI.getOperand(2).setReg(Reg1); | |||
1214 | MI.getOperand(1).setReg(Reg2); | |||
1215 | MI.getOperand(2).setSubReg(SubReg1); | |||
1216 | MI.getOperand(1).setSubReg(SubReg2); | |||
1217 | MI.getOperand(2).setIsKill(Reg1IsKill); | |||
1218 | MI.getOperand(1).setIsKill(Reg2IsKill); | |||
1219 | ||||
1220 | // Swap the mask around. | |||
1221 | MI.getOperand(4).setImm((ME + 1) & 31); | |||
1222 | MI.getOperand(5).setImm((MB - 1) & 31); | |||
1223 | return &MI; | |||
1224 | } | |||
1225 | ||||
1226 | bool PPCInstrInfo::findCommutedOpIndices(const MachineInstr &MI, | |||
1227 | unsigned &SrcOpIdx1, | |||
1228 | unsigned &SrcOpIdx2) const { | |||
1229 | // For VSX A-Type FMA instructions, it is the first two operands that can be | |||
1230 | // commuted, however, because the non-encoded tied input operand is listed | |||
1231 | // first, the operands to swap are actually the second and third. | |||
1232 | ||||
1233 | int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode()); | |||
1234 | if (AltOpc == -1) | |||
1235 | return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); | |||
1236 | ||||
1237 | // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1 | |||
1238 | // and SrcOpIdx2. | |||
1239 | return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3); | |||
1240 | } | |||
1241 | ||||
1242 | void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, | |||
1243 | MachineBasicBlock::iterator MI) const { | |||
1244 | // This function is used for scheduling, and the nop wanted here is the type | |||
1245 | // that terminates dispatch groups on the POWER cores. | |||
1246 | unsigned Directive = Subtarget.getCPUDirective(); | |||
1247 | unsigned Opcode; | |||
1248 | switch (Directive) { | |||
1249 | default: Opcode = PPC::NOP; break; | |||
1250 | case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break; | |||
1251 | case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break; | |||
1252 | case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */ | |||
1253 | // FIXME: Update when POWER9 scheduling model is ready. | |||
1254 | case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break; | |||
1255 | } | |||
1256 | ||||
1257 | DebugLoc DL; | |||
1258 | BuildMI(MBB, MI, DL, get(Opcode)); | |||
1259 | } | |||
1260 | ||||
1261 | /// Return the noop instruction to use for a noop. | |||
1262 | MCInst PPCInstrInfo::getNop() const { | |||
1263 | MCInst Nop; | |||
1264 | Nop.setOpcode(PPC::NOP); | |||
1265 | return Nop; | |||
1266 | } | |||
1267 | ||||
1268 | // Branch analysis. | |||
1269 | // Note: If the condition register is set to CTR or CTR8 then this is a | |||
1270 | // BDNZ (imm == 1) or BDZ (imm == 0) branch. | |||
1271 | bool PPCInstrInfo::analyzeBranch(MachineBasicBlock &MBB, | |||
1272 | MachineBasicBlock *&TBB, | |||
1273 | MachineBasicBlock *&FBB, | |||
1274 | SmallVectorImpl<MachineOperand> &Cond, | |||
1275 | bool AllowModify) const { | |||
1276 | bool isPPC64 = Subtarget.isPPC64(); | |||
1277 | ||||
1278 | // If the block has no terminators, it just falls into the block after it. | |||
1279 | MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); | |||
1280 | if (I == MBB.end()) | |||
1281 | return false; | |||
1282 | ||||
1283 | if (!isUnpredicatedTerminator(*I)) | |||
1284 | return false; | |||
1285 | ||||
1286 | if (AllowModify) { | |||
1287 | // If the BB ends with an unconditional branch to the fallthrough BB, | |||
1288 | // we eliminate the branch instruction. | |||
1289 | if (I->getOpcode() == PPC::B && | |||
1290 | MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { | |||
1291 | I->eraseFromParent(); | |||
1292 | ||||
1293 | // We update iterator after deleting the last branch. | |||
1294 | I = MBB.getLastNonDebugInstr(); | |||
1295 | if (I == MBB.end() || !isUnpredicatedTerminator(*I)) | |||
1296 | return false; | |||
1297 | } | |||
1298 | } | |||
1299 | ||||
1300 | // Get the last instruction in the block. | |||
1301 | MachineInstr &LastInst = *I; | |||
1302 | ||||
1303 | // If there is only one terminator instruction, process it. | |||
1304 | if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { | |||
1305 | if (LastInst.getOpcode() == PPC::B) { | |||
1306 | if (!LastInst.getOperand(0).isMBB()) | |||
1307 | return true; | |||
1308 | TBB = LastInst.getOperand(0).getMBB(); | |||
1309 | return false; | |||
1310 | } else if (LastInst.getOpcode() == PPC::BCC) { | |||
1311 | if (!LastInst.getOperand(2).isMBB()) | |||
1312 | return true; | |||
1313 | // Block ends with fall-through condbranch. | |||
1314 | TBB = LastInst.getOperand(2).getMBB(); | |||
1315 | Cond.push_back(LastInst.getOperand(0)); | |||
1316 | Cond.push_back(LastInst.getOperand(1)); | |||
1317 | return false; | |||
1318 | } else if (LastInst.getOpcode() == PPC::BC) { | |||
1319 | if (!LastInst.getOperand(1).isMBB()) | |||
1320 | return true; | |||
1321 | // Block ends with fall-through condbranch. | |||
1322 | TBB = LastInst.getOperand(1).getMBB(); | |||
1323 | Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); | |||
1324 | Cond.push_back(LastInst.getOperand(0)); | |||
1325 | return false; | |||
1326 | } else if (LastInst.getOpcode() == PPC::BCn) { | |||
1327 | if (!LastInst.getOperand(1).isMBB()) | |||
1328 | return true; | |||
1329 | // Block ends with fall-through condbranch. | |||
1330 | TBB = LastInst.getOperand(1).getMBB(); | |||
1331 | Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET)); | |||
1332 | Cond.push_back(LastInst.getOperand(0)); | |||
1333 | return false; | |||
1334 | } else if (LastInst.getOpcode() == PPC::BDNZ8 || | |||
1335 | LastInst.getOpcode() == PPC::BDNZ) { | |||
1336 | if (!LastInst.getOperand(0).isMBB()) | |||
1337 | return true; | |||
1338 | if (DisableCTRLoopAnal) | |||
1339 | return true; | |||
1340 | TBB = LastInst.getOperand(0).getMBB(); | |||
1341 | Cond.push_back(MachineOperand::CreateImm(1)); | |||
1342 | Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, | |||
1343 | true)); | |||
1344 | return false; | |||
1345 | } else if (LastInst.getOpcode() == PPC::BDZ8 || | |||
1346 | LastInst.getOpcode() == PPC::BDZ) { | |||
1347 | if (!LastInst.getOperand(0).isMBB()) | |||
1348 | return true; | |||
1349 | if (DisableCTRLoopAnal) | |||
1350 | return true; | |||
1351 | TBB = LastInst.getOperand(0).getMBB(); | |||
1352 | Cond.push_back(MachineOperand::CreateImm(0)); | |||
1353 | Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, | |||
1354 | true)); | |||
1355 | return false; | |||
1356 | } | |||
1357 | ||||
1358 | // Otherwise, don't know what this is. | |||
1359 | return true; | |||
1360 | } | |||
1361 | ||||
1362 | // Get the instruction before it if it's a terminator. | |||
1363 | MachineInstr &SecondLastInst = *I; | |||
1364 | ||||
1365 | // If there are three terminators, we don't know what sort of block this is. | |||
1366 | if (I != MBB.begin() && isUnpredicatedTerminator(*--I)) | |||
1367 | return true; | |||
1368 | ||||
1369 | // If the block ends with PPC::B and PPC:BCC, handle it. | |||
1370 | if (SecondLastInst.getOpcode() == PPC::BCC && | |||
1371 | LastInst.getOpcode() == PPC::B) { | |||
1372 | if (!SecondLastInst.getOperand(2).isMBB() || | |||
1373 | !LastInst.getOperand(0).isMBB()) | |||
1374 | return true; | |||
1375 | TBB = SecondLastInst.getOperand(2).getMBB(); | |||
1376 | Cond.push_back(SecondLastInst.getOperand(0)); | |||
1377 | Cond.push_back(SecondLastInst.getOperand(1)); | |||
1378 | FBB = LastInst.getOperand(0).getMBB(); | |||
1379 | return false; | |||
1380 | } else if (SecondLastInst.getOpcode() == PPC::BC && | |||
1381 | LastInst.getOpcode() == PPC::B) { | |||
1382 | if (!SecondLastInst.getOperand(1).isMBB() || | |||
1383 | !LastInst.getOperand(0).isMBB()) | |||
1384 | return true; | |||
1385 | TBB = SecondLastInst.getOperand(1).getMBB(); | |||
1386 | Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); | |||
1387 | Cond.push_back(SecondLastInst.getOperand(0)); | |||
1388 | FBB = LastInst.getOperand(0).getMBB(); | |||
1389 | return false; | |||
1390 | } else if (SecondLastInst.getOpcode() == PPC::BCn && | |||
1391 | LastInst.getOpcode() == PPC::B) { | |||
1392 | if (!SecondLastInst.getOperand(1).isMBB() || | |||
1393 | !LastInst.getOperand(0).isMBB()) | |||
1394 | return true; | |||
1395 | TBB = SecondLastInst.getOperand(1).getMBB(); | |||
1396 | Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET)); | |||
1397 | Cond.push_back(SecondLastInst.getOperand(0)); | |||
1398 | FBB = LastInst.getOperand(0).getMBB(); | |||
1399 | return false; | |||
1400 | } else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 || | |||
1401 | SecondLastInst.getOpcode() == PPC::BDNZ) && | |||
1402 | LastInst.getOpcode() == PPC::B) { | |||
1403 | if (!SecondLastInst.getOperand(0).isMBB() || | |||
1404 | !LastInst.getOperand(0).isMBB()) | |||
1405 | return true; | |||
1406 | if (DisableCTRLoopAnal) | |||
1407 | return true; | |||
1408 | TBB = SecondLastInst.getOperand(0).getMBB(); | |||
1409 | Cond.push_back(MachineOperand::CreateImm(1)); | |||
1410 | Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, | |||
1411 | true)); | |||
1412 | FBB = LastInst.getOperand(0).getMBB(); | |||
1413 | return false; | |||
1414 | } else if ((SecondLastInst.getOpcode() == PPC::BDZ8 || | |||
1415 | SecondLastInst.getOpcode() == PPC::BDZ) && | |||
1416 | LastInst.getOpcode() == PPC::B) { | |||
1417 | if (!SecondLastInst.getOperand(0).isMBB() || | |||
1418 | !LastInst.getOperand(0).isMBB()) | |||
1419 | return true; | |||
1420 | if (DisableCTRLoopAnal) | |||
1421 | return true; | |||
1422 | TBB = SecondLastInst.getOperand(0).getMBB(); | |||
1423 | Cond.push_back(MachineOperand::CreateImm(0)); | |||
1424 | Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, | |||
1425 | true)); | |||
1426 | FBB = LastInst.getOperand(0).getMBB(); | |||
1427 | return false; | |||
1428 | } | |||
1429 | ||||
1430 | // If the block ends with two PPC:Bs, handle it. The second one is not | |||
1431 | // executed, so remove it. | |||
1432 | if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) { | |||
1433 | if (!SecondLastInst.getOperand(0).isMBB()) | |||
1434 | return true; | |||
1435 | TBB = SecondLastInst.getOperand(0).getMBB(); | |||
1436 | I = LastInst; | |||
1437 | if (AllowModify) | |||
1438 | I->eraseFromParent(); | |||
1439 | return false; | |||
1440 | } | |||
1441 | ||||
1442 | // Otherwise, can't handle this. | |||
1443 | return true; | |||
1444 | } | |||
1445 | ||||
1446 | unsigned PPCInstrInfo::removeBranch(MachineBasicBlock &MBB, | |||
1447 | int *BytesRemoved) const { | |||
1448 | assert(!BytesRemoved && "code size not handled")(static_cast<void> (0)); | |||
1449 | ||||
1450 | MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); | |||
1451 | if (I == MBB.end()) | |||
1452 | return 0; | |||
1453 | ||||
1454 | if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC && | |||
1455 | I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn && | |||
1456 | I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ && | |||
1457 | I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ) | |||
1458 | return 0; | |||
1459 | ||||
1460 | // Remove the branch. | |||
1461 | I->eraseFromParent(); | |||
1462 | ||||
1463 | I = MBB.end(); | |||
1464 | ||||
1465 | if (I == MBB.begin()) return 1; | |||
1466 | --I; | |||
1467 | if (I->getOpcode() != PPC::BCC && | |||
1468 | I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn && | |||
1469 | I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ && | |||
1470 | I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ) | |||
1471 | return 1; | |||
1472 | ||||
1473 | // Remove the branch. | |||
1474 | I->eraseFromParent(); | |||
1475 | return 2; | |||
1476 | } | |||
1477 | ||||
1478 | unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB, | |||
1479 | MachineBasicBlock *TBB, | |||
1480 | MachineBasicBlock *FBB, | |||
1481 | ArrayRef<MachineOperand> Cond, | |||
1482 | const DebugLoc &DL, | |||
1483 | int *BytesAdded) const { | |||
1484 | // Shouldn't be a fall through. | |||
1485 | assert(TBB && "insertBranch must not be told to insert a fallthrough")(static_cast<void> (0)); | |||
1486 | assert((Cond.size() == 2 || Cond.size() == 0) &&(static_cast<void> (0)) | |||
1487 | "PPC branch conditions have two components!")(static_cast<void> (0)); | |||
1488 | assert(!BytesAdded && "code size not handled")(static_cast<void> (0)); | |||
1489 | ||||
1490 | bool isPPC64 = Subtarget.isPPC64(); | |||
1491 | ||||
1492 | // One-way branch. | |||
1493 | if (!FBB) { | |||
1494 | if (Cond.empty()) // Unconditional branch | |||
1495 | BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB); | |||
1496 | else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8) | |||
1497 | BuildMI(&MBB, DL, get(Cond[0].getImm() ? | |||
1498 | (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : | |||
1499 | (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB); | |||
1500 | else if (Cond[0].getImm() == PPC::PRED_BIT_SET) | |||
1501 | BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB); | |||
1502 | else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET) | |||
1503 | BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB); | |||
1504 | else // Conditional branch | |||
1505 | BuildMI(&MBB, DL, get(PPC::BCC)) | |||
1506 | .addImm(Cond[0].getImm()) | |||
1507 | .add(Cond[1]) | |||
1508 | .addMBB(TBB); | |||
1509 | return 1; | |||
1510 | } | |||
1511 | ||||
1512 | // Two-way Conditional Branch. | |||
1513 | if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8) | |||
1514 | BuildMI(&MBB, DL, get(Cond[0].getImm() ? | |||
1515 | (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : | |||
1516 | (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB); | |||
1517 | else if (Cond[0].getImm() == PPC::PRED_BIT_SET) | |||
1518 | BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB); | |||
1519 | else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET) | |||
1520 | BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB); | |||
1521 | else | |||
1522 | BuildMI(&MBB, DL, get(PPC::BCC)) | |||
1523 | .addImm(Cond[0].getImm()) | |||
1524 | .add(Cond[1]) | |||
1525 | .addMBB(TBB); | |||
1526 | BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB); | |||
1527 | return 2; | |||
1528 | } | |||
1529 | ||||
1530 | // Select analysis. | |||
1531 | bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, | |||
1532 | ArrayRef<MachineOperand> Cond, | |||
1533 | Register DstReg, Register TrueReg, | |||
1534 | Register FalseReg, int &CondCycles, | |||
1535 | int &TrueCycles, int &FalseCycles) const { | |||
1536 | if (Cond.size() != 2) | |||
1537 | return false; | |||
1538 | ||||
1539 | // If this is really a bdnz-like condition, then it cannot be turned into a | |||
1540 | // select. | |||
1541 | if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8) | |||
1542 | return false; | |||
1543 | ||||
1544 | // Check register classes. | |||
1545 | const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); | |||
1546 | const TargetRegisterClass *RC = | |||
1547 | RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); | |||
1548 | if (!RC) | |||
1549 | return false; | |||
1550 | ||||
1551 | // isel is for regular integer GPRs only. | |||
1552 | if (!PPC::GPRCRegClass.hasSubClassEq(RC) && | |||
1553 | !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) && | |||
1554 | !PPC::G8RCRegClass.hasSubClassEq(RC) && | |||
1555 | !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) | |||
1556 | return false; | |||
1557 | ||||
1558 | // FIXME: These numbers are for the A2, how well they work for other cores is | |||
1559 | // an open question. On the A2, the isel instruction has a 2-cycle latency | |||
1560 | // but single-cycle throughput. These numbers are used in combination with | |||
1561 | // the MispredictPenalty setting from the active SchedMachineModel. | |||
1562 | CondCycles = 1; | |||
1563 | TrueCycles = 1; | |||
1564 | FalseCycles = 1; | |||
1565 | ||||
1566 | return true; | |||
1567 | } | |||
1568 | ||||
1569 | void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, | |||
1570 | MachineBasicBlock::iterator MI, | |||
1571 | const DebugLoc &dl, Register DestReg, | |||
1572 | ArrayRef<MachineOperand> Cond, Register TrueReg, | |||
1573 | Register FalseReg) const { | |||
1574 | assert(Cond.size() == 2 &&(static_cast<void> (0)) | |||
1575 | "PPC branch conditions have two components!")(static_cast<void> (0)); | |||
1576 | ||||
1577 | // Get the register classes. | |||
1578 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); | |||
1579 | const TargetRegisterClass *RC = | |||
1580 | RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); | |||
1581 | assert(RC && "TrueReg and FalseReg must have overlapping register classes")(static_cast<void> (0)); | |||
1582 | ||||
1583 | bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) || | |||
1584 | PPC::G8RC_NOX0RegClass.hasSubClassEq(RC); | |||
1585 | assert((Is64Bit ||(static_cast<void> (0)) | |||
1586 | PPC::GPRCRegClass.hasSubClassEq(RC) ||(static_cast<void> (0)) | |||
1587 | PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&(static_cast<void> (0)) | |||
1588 | "isel is for regular integer GPRs only")(static_cast<void> (0)); | |||
1589 | ||||
1590 | unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL; | |||
1591 | auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm()); | |||
1592 | ||||
1593 | unsigned SubIdx = 0; | |||
1594 | bool SwapOps = false; | |||
1595 | switch (SelectPred) { | |||
1596 | case PPC::PRED_EQ: | |||
1597 | case PPC::PRED_EQ_MINUS: | |||
1598 | case PPC::PRED_EQ_PLUS: | |||
1599 | SubIdx = PPC::sub_eq; SwapOps = false; break; | |||
1600 | case PPC::PRED_NE: | |||
1601 | case PPC::PRED_NE_MINUS: | |||
1602 | case PPC::PRED_NE_PLUS: | |||
1603 | SubIdx = PPC::sub_eq; SwapOps = true; break; | |||
1604 | case PPC::PRED_LT: | |||
1605 | case PPC::PRED_LT_MINUS: | |||
1606 | case PPC::PRED_LT_PLUS: | |||
1607 | SubIdx = PPC::sub_lt; SwapOps = false; break; | |||
1608 | case PPC::PRED_GE: | |||
1609 | case PPC::PRED_GE_MINUS: | |||
1610 | case PPC::PRED_GE_PLUS: | |||
1611 | SubIdx = PPC::sub_lt; SwapOps = true; break; | |||
1612 | case PPC::PRED_GT: | |||
1613 | case PPC::PRED_GT_MINUS: | |||
1614 | case PPC::PRED_GT_PLUS: | |||
1615 | SubIdx = PPC::sub_gt; SwapOps = false; break; | |||
1616 | case PPC::PRED_LE: | |||
1617 | case PPC::PRED_LE_MINUS: | |||
1618 | case PPC::PRED_LE_PLUS: | |||
1619 | SubIdx = PPC::sub_gt; SwapOps = true; break; | |||
1620 | case PPC::PRED_UN: | |||
1621 | case PPC::PRED_UN_MINUS: | |||
1622 | case PPC::PRED_UN_PLUS: | |||
1623 | SubIdx = PPC::sub_un; SwapOps = false; break; | |||
1624 | case PPC::PRED_NU: | |||
1625 | case PPC::PRED_NU_MINUS: | |||
1626 | case PPC::PRED_NU_PLUS: | |||
1627 | SubIdx = PPC::sub_un; SwapOps = true; break; | |||
1628 | case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break; | |||
1629 | case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break; | |||
1630 | } | |||
1631 | ||||
1632 | Register FirstReg = SwapOps ? FalseReg : TrueReg, | |||
1633 | SecondReg = SwapOps ? TrueReg : FalseReg; | |||
1634 | ||||
1635 | // The first input register of isel cannot be r0. If it is a member | |||
1636 | // of a register class that can be r0, then copy it first (the | |||
1637 | // register allocator should eliminate the copy). | |||
1638 | if (MRI.getRegClass(FirstReg)->contains(PPC::R0) || | |||
1639 | MRI.getRegClass(FirstReg)->contains(PPC::X0)) { | |||
1640 | const TargetRegisterClass *FirstRC = | |||
1641 | MRI.getRegClass(FirstReg)->contains(PPC::X0) ? | |||
1642 | &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass; | |||
1643 | Register OldFirstReg = FirstReg; | |||
1644 | FirstReg = MRI.createVirtualRegister(FirstRC); | |||
1645 | BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg) | |||
1646 | .addReg(OldFirstReg); | |||
1647 | } | |||
1648 | ||||
1649 | BuildMI(MBB, MI, dl, get(OpCode), DestReg) | |||
1650 | .addReg(FirstReg).addReg(SecondReg) | |||
1651 | .addReg(Cond[1].getReg(), 0, SubIdx); | |||
1652 | } | |||
1653 | ||||
1654 | static unsigned getCRBitValue(unsigned CRBit) { | |||
1655 | unsigned Ret = 4; | |||
1656 | if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT || | |||
1657 | CRBit == PPC::CR2LT || CRBit == PPC::CR3LT || | |||
1658 | CRBit == PPC::CR4LT || CRBit == PPC::CR5LT || | |||
1659 | CRBit == PPC::CR6LT || CRBit == PPC::CR7LT) | |||
1660 | Ret = 3; | |||
1661 | if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT || | |||
1662 | CRBit == PPC::CR2GT || CRBit == PPC::CR3GT || | |||
1663 | CRBit == PPC::CR4GT || CRBit == PPC::CR5GT || | |||
1664 | CRBit == PPC::CR6GT || CRBit == PPC::CR7GT) | |||
1665 | Ret = 2; | |||
1666 | if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ || | |||
1667 | CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ || | |||
1668 | CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ || | |||
1669 | CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ) | |||
1670 | Ret = 1; | |||
1671 | if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN || | |||
1672 | CRBit == PPC::CR2UN || CRBit == PPC::CR3UN || | |||
1673 | CRBit == PPC::CR4UN || CRBit == PPC::CR5UN || | |||
1674 | CRBit == PPC::CR6UN || CRBit == PPC::CR7UN) | |||
1675 | Ret = 0; | |||
1676 | ||||
1677 | assert(Ret != 4 && "Invalid CR bit register")(static_cast<void> (0)); | |||
1678 | return Ret; | |||
1679 | } | |||
1680 | ||||
1681 | void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, | |||
1682 | MachineBasicBlock::iterator I, | |||
1683 | const DebugLoc &DL, MCRegister DestReg, | |||
1684 | MCRegister SrcReg, bool KillSrc) const { | |||
1685 | // We can end up with self copies and similar things as a result of VSX copy | |||
1686 | // legalization. Promote them here. | |||
1687 | const TargetRegisterInfo *TRI = &getRegisterInfo(); | |||
1688 | if (PPC::F8RCRegClass.contains(DestReg) && | |||
1689 | PPC::VSRCRegClass.contains(SrcReg)) { | |||
1690 | MCRegister SuperReg = | |||
1691 | TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass); | |||
1692 | ||||
1693 | if (VSXSelfCopyCrash && SrcReg == SuperReg) | |||
1694 | llvm_unreachable("nop VSX copy")__builtin_unreachable(); | |||
1695 | ||||
1696 | DestReg = SuperReg; | |||
1697 | } else if (PPC::F8RCRegClass.contains(SrcReg) && | |||
1698 | PPC::VSRCRegClass.contains(DestReg)) { | |||
1699 | MCRegister SuperReg = | |||
1700 | TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass); | |||
1701 | ||||
1702 | if (VSXSelfCopyCrash && DestReg == SuperReg) | |||
1703 | llvm_unreachable("nop VSX copy")__builtin_unreachable(); | |||
1704 | ||||
1705 | SrcReg = SuperReg; | |||
1706 | } | |||
1707 | ||||
1708 | // Different class register copy | |||
1709 | if (PPC::CRBITRCRegClass.contains(SrcReg) && | |||
1710 | PPC::GPRCRegClass.contains(DestReg)) { | |||
1711 | MCRegister CRReg = getCRFromCRBit(SrcReg); | |||
1712 | BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg); | |||
1713 | getKillRegState(KillSrc); | |||
1714 | // Rotate the CR bit in the CR fields to be the least significant bit and | |||
1715 | // then mask with 0x1 (MB = ME = 31). | |||
1716 | BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg) | |||
1717 | .addReg(DestReg, RegState::Kill) | |||
1718 | .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg))) | |||
1719 | .addImm(31) | |||
1720 | .addImm(31); | |||
1721 | return; | |||
1722 | } else if (PPC::CRRCRegClass.contains(SrcReg) && | |||
1723 | (PPC::G8RCRegClass.contains(DestReg) || | |||
1724 | PPC::GPRCRegClass.contains(DestReg))) { | |||
1725 | bool Is64Bit = PPC::G8RCRegClass.contains(DestReg); | |||
1726 | unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF; | |||
1727 | unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM; | |||
1728 | unsigned CRNum = TRI->getEncodingValue(SrcReg); | |||
1729 | BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg); | |||
1730 | getKillRegState(KillSrc); | |||
1731 | if (CRNum == 7) | |||
1732 | return; | |||
1733 | // Shift the CR bits to make the CR field in the lowest 4 bits of GRC. | |||
1734 | BuildMI(MBB, I, DL, get(ShCode), DestReg) | |||
1735 | .addReg(DestReg, RegState::Kill) | |||
1736 | .addImm(CRNum * 4 + 4) | |||
1737 | .addImm(28) | |||
1738 | .addImm(31); | |||
1739 | return; | |||
1740 | } else if (PPC::G8RCRegClass.contains(SrcReg) && | |||
1741 | PPC::VSFRCRegClass.contains(DestReg)) { | |||
1742 | assert(Subtarget.hasDirectMove() &&(static_cast<void> (0)) | |||
1743 | "Subtarget doesn't support directmove, don't know how to copy.")(static_cast<void> (0)); | |||
1744 | BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg); | |||
1745 | NumGPRtoVSRSpill++; | |||
1746 | getKillRegState(KillSrc); | |||
1747 | return; | |||
1748 | } else if (PPC::VSFRCRegClass.contains(SrcReg) && | |||
1749 | PPC::G8RCRegClass.contains(DestReg)) { | |||
1750 | assert(Subtarget.hasDirectMove() &&(static_cast<void> (0)) | |||
1751 | "Subtarget doesn't support directmove, don't know how to copy.")(static_cast<void> (0)); | |||
1752 | BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg); | |||
1753 | getKillRegState(KillSrc); | |||
1754 | return; | |||
1755 | } else if (PPC::SPERCRegClass.contains(SrcReg) && | |||
1756 | PPC::GPRCRegClass.contains(DestReg)) { | |||
1757 | BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg); | |||
1758 | getKillRegState(KillSrc); | |||
1759 | return; | |||
1760 | } else if (PPC::GPRCRegClass.contains(SrcReg) && | |||
1761 | PPC::SPERCRegClass.contains(DestReg)) { | |||
1762 | BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg); | |||
1763 | getKillRegState(KillSrc); | |||
1764 | return; | |||
1765 | } | |||
1766 | ||||
1767 | unsigned Opc; | |||
1768 | if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) | |||
1769 | Opc = PPC::OR; | |||
1770 | else if (PPC::G8RCRegClass.contains(DestReg, SrcReg)) | |||
1771 | Opc = PPC::OR8; | |||
1772 | else if (PPC::F4RCRegClass.contains(DestReg, SrcReg)) | |||
1773 | Opc = PPC::FMR; | |||
1774 | else if (PPC::CRRCRegClass.contains(DestReg, SrcReg)) | |||
1775 | Opc = PPC::MCRF; | |||
1776 | else if (PPC::VRRCRegClass.contains(DestReg, SrcReg)) | |||
1777 | Opc = PPC::VOR; | |||
1778 | else if (PPC::VSRCRegClass.contains(DestReg, SrcReg)) | |||
1779 | // There are two different ways this can be done: | |||
1780 | // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only | |||
1781 | // issue in VSU pipeline 0. | |||
1782 | // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but | |||
1783 | // can go to either pipeline. | |||
1784 | // We'll always use xxlor here, because in practically all cases where | |||
1785 | // copies are generated, they are close enough to some use that the | |||
1786 | // lower-latency form is preferable. | |||
1787 | Opc = PPC::XXLOR; | |||
1788 | else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) || | |||
1789 | PPC::VSSRCRegClass.contains(DestReg, SrcReg)) | |||
1790 | Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf; | |||
1791 | else if (Subtarget.pairedVectorMemops() && | |||
1792 | PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) { | |||
1793 | if (SrcReg > PPC::VSRp15) | |||
1794 | SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2; | |||
1795 | else | |||
1796 | SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2; | |||
1797 | if (DestReg > PPC::VSRp15) | |||
1798 | DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2; | |||
1799 | else | |||
1800 | DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2; | |||
1801 | BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg). | |||
1802 | addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc)); | |||
1803 | BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1). | |||
1804 | addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc)); | |||
1805 | return; | |||
1806 | } | |||
1807 | else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) | |||
1808 | Opc = PPC::CROR; | |||
1809 | else if (PPC::SPERCRegClass.contains(DestReg, SrcReg)) | |||
1810 | Opc = PPC::EVOR; | |||
1811 | else if ((PPC::ACCRCRegClass.contains(DestReg) || | |||
1812 | PPC::UACCRCRegClass.contains(DestReg)) && | |||
1813 | (PPC::ACCRCRegClass.contains(SrcReg) || | |||
1814 | PPC::UACCRCRegClass.contains(SrcReg))) { | |||
1815 | // If primed, de-prime the source register, copy the individual registers | |||
1816 | // and prime the destination if needed. The vector subregisters are | |||
1817 | // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the | |||
1818 | // source is primed, we need to re-prime it after the copy as well. | |||
1819 | PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg); | |||
1820 | bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg); | |||
1821 | bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg); | |||
1822 | MCRegister VSLSrcReg = | |||
1823 | PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4; | |||
1824 | MCRegister VSLDestReg = | |||
1825 | PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4; | |||
1826 | if (SrcPrimed) | |||
1827 | BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg); | |||
1828 | for (unsigned Idx = 0; Idx < 4; Idx++) | |||
1829 | BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx) | |||
1830 | .addReg(VSLSrcReg + Idx) | |||
1831 | .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc)); | |||
1832 | if (DestPrimed) | |||
1833 | BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg); | |||
1834 | if (SrcPrimed && !KillSrc) | |||
1835 | BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg); | |||
1836 | return; | |||
1837 | } else if (PPC::G8pRCRegClass.contains(DestReg) && | |||
1838 | PPC::G8pRCRegClass.contains(SrcReg)) { | |||
1839 | // TODO: Handle G8RC to G8pRC (and vice versa) copy. | |||
1840 | unsigned DestRegIdx = DestReg - PPC::G8p0; | |||
1841 | MCRegister DestRegSub0 = PPC::X0 + 2 * DestRegIdx; | |||
1842 | MCRegister DestRegSub1 = PPC::X0 + 2 * DestRegIdx + 1; | |||
1843 | unsigned SrcRegIdx = SrcReg - PPC::G8p0; | |||
1844 | MCRegister SrcRegSub0 = PPC::X0 + 2 * SrcRegIdx; | |||
1845 | MCRegister SrcRegSub1 = PPC::X0 + 2 * SrcRegIdx + 1; | |||
1846 | BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub0) | |||
1847 | .addReg(SrcRegSub0) | |||
1848 | .addReg(SrcRegSub0, getKillRegState(KillSrc)); | |||
1849 | BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub1) | |||
1850 | .addReg(SrcRegSub1) | |||
1851 | .addReg(SrcRegSub1, getKillRegState(KillSrc)); | |||
1852 | return; | |||
1853 | } else | |||
1854 | llvm_unreachable("Impossible reg-to-reg copy")__builtin_unreachable(); | |||
1855 | ||||
1856 | const MCInstrDesc &MCID = get(Opc); | |||
1857 | if (MCID.getNumOperands() == 3) | |||
1858 | BuildMI(MBB, I, DL, MCID, DestReg) | |||
1859 | .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc)); | |||
1860 | else | |||
1861 | BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc)); | |||
1862 | } | |||
1863 | ||||
1864 | unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const { | |||
1865 | int OpcodeIndex = 0; | |||
1866 | ||||
1867 | if (PPC::GPRCRegClass.hasSubClassEq(RC) || | |||
1868 | PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) { | |||
1869 | OpcodeIndex = SOK_Int4Spill; | |||
1870 | } else if (PPC::G8RCRegClass.hasSubClassEq(RC) || | |||
1871 | PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) { | |||
1872 | OpcodeIndex = SOK_Int8Spill; | |||
1873 | } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) { | |||
1874 | OpcodeIndex = SOK_Float8Spill; | |||
1875 | } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) { | |||
1876 | OpcodeIndex = SOK_Float4Spill; | |||
1877 | } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) { | |||
1878 | OpcodeIndex = SOK_SPESpill; | |||
1879 | } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { | |||
1880 | OpcodeIndex = SOK_CRSpill; | |||
1881 | } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { | |||
1882 | OpcodeIndex = SOK_CRBitSpill; | |||
1883 | } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { | |||
1884 | OpcodeIndex = SOK_VRVectorSpill; | |||
1885 | } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { | |||
1886 | OpcodeIndex = SOK_VSXVectorSpill; | |||
1887 | } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) { | |||
1888 | OpcodeIndex = SOK_VectorFloat8Spill; | |||
1889 | } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) { | |||
1890 | OpcodeIndex = SOK_VectorFloat4Spill; | |||
1891 | } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) { | |||
1892 | OpcodeIndex = SOK_SpillToVSR; | |||
1893 | } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) { | |||
1894 | assert(Subtarget.pairedVectorMemops() &&(static_cast<void> (0)) | |||
1895 | "Register unexpected when paired memops are disabled.")(static_cast<void> (0)); | |||
1896 | OpcodeIndex = SOK_AccumulatorSpill; | |||
1897 | } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) { | |||
1898 | assert(Subtarget.pairedVectorMemops() &&(static_cast<void> (0)) | |||
1899 | "Register unexpected when paired memops are disabled.")(static_cast<void> (0)); | |||
1900 | OpcodeIndex = SOK_UAccumulatorSpill; | |||
1901 | } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) { | |||
1902 | assert(Subtarget.pairedVectorMemops() &&(static_cast<void> (0)) | |||
1903 | "Register unexpected when paired memops are disabled.")(static_cast<void> (0)); | |||
1904 | OpcodeIndex = SOK_PairedVecSpill; | |||
1905 | } else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) { | |||
1906 | OpcodeIndex = SOK_PairedG8Spill; | |||
1907 | } else { | |||
1908 | llvm_unreachable("Unknown regclass!")__builtin_unreachable(); | |||
1909 | } | |||
1910 | return OpcodeIndex; | |||
1911 | } | |||
1912 | ||||
1913 | unsigned | |||
1914 | PPCInstrInfo::getStoreOpcodeForSpill(const TargetRegisterClass *RC) const { | |||
1915 | const unsigned *OpcodesForSpill = getStoreOpcodesForSpillArray(); | |||
1916 | return OpcodesForSpill[getSpillIndex(RC)]; | |||
1917 | } | |||
1918 | ||||
1919 | unsigned | |||
1920 | PPCInstrInfo::getLoadOpcodeForSpill(const TargetRegisterClass *RC) const { | |||
1921 | const unsigned *OpcodesForSpill = getLoadOpcodesForSpillArray(); | |||
1922 | return OpcodesForSpill[getSpillIndex(RC)]; | |||
1923 | } | |||
1924 | ||||
1925 | void PPCInstrInfo::StoreRegToStackSlot( | |||
1926 | MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, | |||
1927 | const TargetRegisterClass *RC, | |||
1928 | SmallVectorImpl<MachineInstr *> &NewMIs) const { | |||
1929 | unsigned Opcode = getStoreOpcodeForSpill(RC); | |||
1930 | DebugLoc DL; | |||
1931 | ||||
1932 | PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); | |||
1933 | FuncInfo->setHasSpills(); | |||
1934 | ||||
1935 | NewMIs.push_back(addFrameReference( | |||
1936 | BuildMI(MF, DL, get(Opcode)).addReg(SrcReg, getKillRegState(isKill)), | |||
1937 | FrameIdx)); | |||
1938 | ||||
1939 | if (PPC::CRRCRegClass.hasSubClassEq(RC) || | |||
1940 | PPC::CRBITRCRegClass.hasSubClassEq(RC)) | |||
1941 | FuncInfo->setSpillsCR(); | |||
1942 | ||||
1943 | if (isXFormMemOp(Opcode)) | |||
1944 | FuncInfo->setHasNonRISpills(); | |||
1945 | } | |||
1946 | ||||
1947 | void PPCInstrInfo::storeRegToStackSlotNoUpd( | |||
1948 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, | |||
1949 | bool isKill, int FrameIdx, const TargetRegisterClass *RC, | |||
1950 | const TargetRegisterInfo *TRI) const { | |||
1951 | MachineFunction &MF = *MBB.getParent(); | |||
1952 | SmallVector<MachineInstr *, 4> NewMIs; | |||
1953 | ||||
1954 | StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs); | |||
1955 | ||||
1956 | for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) | |||
1957 | MBB.insert(MI, NewMIs[i]); | |||
1958 | ||||
1959 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
1960 | MachineMemOperand *MMO = MF.getMachineMemOperand( | |||
1961 | MachinePointerInfo::getFixedStack(MF, FrameIdx), | |||
1962 | MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), | |||
1963 | MFI.getObjectAlign(FrameIdx)); | |||
1964 | NewMIs.back()->addMemOperand(MF, MMO); | |||
1965 | } | |||
1966 | ||||
1967 | void PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, | |||
1968 | MachineBasicBlock::iterator MI, | |||
1969 | Register SrcReg, bool isKill, | |||
1970 | int FrameIdx, | |||
1971 | const TargetRegisterClass *RC, | |||
1972 | const TargetRegisterInfo *TRI) const { | |||
1973 | // We need to avoid a situation in which the value from a VRRC register is | |||
1974 | // spilled using an Altivec instruction and reloaded into a VSRC register | |||
1975 | // using a VSX instruction. The issue with this is that the VSX | |||
1976 | // load/store instructions swap the doublewords in the vector and the Altivec | |||
1977 | // ones don't. The register classes on the spill/reload may be different if | |||
1978 | // the register is defined using an Altivec instruction and is then used by a | |||
1979 | // VSX instruction. | |||
1980 | RC = updatedRC(RC); | |||
1981 | storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI); | |||
1982 | } | |||
1983 | ||||
1984 | void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL, | |||
1985 | unsigned DestReg, int FrameIdx, | |||
1986 | const TargetRegisterClass *RC, | |||
1987 | SmallVectorImpl<MachineInstr *> &NewMIs) | |||
1988 | const { | |||
1989 | unsigned Opcode = getLoadOpcodeForSpill(RC); | |||
1990 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg), | |||
1991 | FrameIdx)); | |||
1992 | PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); | |||
1993 | ||||
1994 | if (PPC::CRRCRegClass.hasSubClassEq(RC) || | |||
1995 | PPC::CRBITRCRegClass.hasSubClassEq(RC)) | |||
1996 | FuncInfo->setSpillsCR(); | |||
1997 | ||||
1998 | if (isXFormMemOp(Opcode)) | |||
1999 | FuncInfo->setHasNonRISpills(); | |||
2000 | } | |||
2001 | ||||
2002 | void PPCInstrInfo::loadRegFromStackSlotNoUpd( | |||
2003 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, | |||
2004 | int FrameIdx, const TargetRegisterClass *RC, | |||
2005 | const TargetRegisterInfo *TRI) const { | |||
2006 | MachineFunction &MF = *MBB.getParent(); | |||
2007 | SmallVector<MachineInstr*, 4> NewMIs; | |||
2008 | DebugLoc DL; | |||
2009 | if (MI != MBB.end()) DL = MI->getDebugLoc(); | |||
2010 | ||||
2011 | PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); | |||
2012 | FuncInfo->setHasSpills(); | |||
2013 | ||||
2014 | LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs); | |||
2015 | ||||
2016 | for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) | |||
2017 | MBB.insert(MI, NewMIs[i]); | |||
2018 | ||||
2019 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
2020 | MachineMemOperand *MMO = MF.getMachineMemOperand( | |||
2021 | MachinePointerInfo::getFixedStack(MF, FrameIdx), | |||
2022 | MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), | |||
2023 | MFI.getObjectAlign(FrameIdx)); | |||
2024 | NewMIs.back()->addMemOperand(MF, MMO); | |||
2025 | } | |||
2026 | ||||
2027 | void PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, | |||
2028 | MachineBasicBlock::iterator MI, | |||
2029 | Register DestReg, int FrameIdx, | |||
2030 | const TargetRegisterClass *RC, | |||
2031 | const TargetRegisterInfo *TRI) const { | |||
2032 | // We need to avoid a situation in which the value from a VRRC register is | |||
2033 | // spilled using an Altivec instruction and reloaded into a VSRC register | |||
2034 | // using a VSX instruction. The issue with this is that the VSX | |||
2035 | // load/store instructions swap the doublewords in the vector and the Altivec | |||
2036 | // ones don't. The register classes on the spill/reload may be different if | |||
2037 | // the register is defined using an Altivec instruction and is then used by a | |||
2038 | // VSX instruction. | |||
2039 | RC = updatedRC(RC); | |||
2040 | ||||
2041 | loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI); | |||
2042 | } | |||
2043 | ||||
2044 | bool PPCInstrInfo:: | |||
2045 | reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { | |||
2046 | assert(Cond.size() == 2 && "Invalid PPC branch opcode!")(static_cast<void> (0)); | |||
2047 | if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR) | |||
2048 | Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0); | |||
2049 | else | |||
2050 | // Leave the CR# the same, but invert the condition. | |||
2051 | Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm())); | |||
2052 | return false; | |||
2053 | } | |||
2054 | ||||
2055 | // For some instructions, it is legal to fold ZERO into the RA register field. | |||
2056 | // This function performs that fold by replacing the operand with PPC::ZERO, | |||
2057 | // it does not consider whether the load immediate zero is no longer in use. | |||
2058 | bool PPCInstrInfo::onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, | |||
2059 | Register Reg) const { | |||
2060 | // A zero immediate should always be loaded with a single li. | |||
2061 | unsigned DefOpc = DefMI.getOpcode(); | |||
2062 | if (DefOpc != PPC::LI && DefOpc != PPC::LI8) | |||
2063 | return false; | |||
2064 | if (!DefMI.getOperand(1).isImm()) | |||
2065 | return false; | |||
2066 | if (DefMI.getOperand(1).getImm() != 0) | |||
2067 | return false; | |||
2068 | ||||
2069 | // Note that we cannot here invert the arguments of an isel in order to fold | |||
2070 | // a ZERO into what is presented as the second argument. All we have here | |||
2071 | // is the condition bit, and that might come from a CR-logical bit operation. | |||
2072 | ||||
2073 | const MCInstrDesc &UseMCID = UseMI.getDesc(); | |||
2074 | ||||
2075 | // Only fold into real machine instructions. | |||
2076 | if (UseMCID.isPseudo()) | |||
2077 | return false; | |||
2078 | ||||
2079 | // We need to find which of the User's operands is to be folded, that will be | |||
2080 | // the operand that matches the given register ID. | |||
2081 | unsigned UseIdx; | |||
2082 | for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx) | |||
2083 | if (UseMI.getOperand(UseIdx).isReg() && | |||
2084 | UseMI.getOperand(UseIdx).getReg() == Reg) | |||
2085 | break; | |||
2086 | ||||
2087 | assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI")(static_cast<void> (0)); | |||
2088 | assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg")(static_cast<void> (0)); | |||
2089 | ||||
2090 | const MCOperandInfo *UseInfo = &UseMCID.OpInfo[UseIdx]; | |||
2091 | ||||
2092 | // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0 | |||
2093 | // register (which might also be specified as a pointer class kind). | |||
2094 | if (UseInfo->isLookupPtrRegClass()) { | |||
2095 | if (UseInfo->RegClass /* Kind */ != 1) | |||
2096 | return false; | |||
2097 | } else { | |||
2098 | if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID && | |||
2099 | UseInfo->RegClass != PPC::G8RC_NOX0RegClassID) | |||
2100 | return false; | |||
2101 | } | |||
2102 | ||||
2103 | // Make sure this is not tied to an output register (or otherwise | |||
2104 | // constrained). This is true for ST?UX registers, for example, which | |||
2105 | // are tied to their output registers. | |||
2106 | if (UseInfo->Constraints != 0) | |||
2107 | return false; | |||
2108 | ||||
2109 | MCRegister ZeroReg; | |||
2110 | if (UseInfo->isLookupPtrRegClass()) { | |||
2111 | bool isPPC64 = Subtarget.isPPC64(); | |||
2112 | ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO; | |||
2113 | } else { | |||
2114 | ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ? | |||
2115 | PPC::ZERO8 : PPC::ZERO; | |||
2116 | } | |||
2117 | ||||
2118 | UseMI.getOperand(UseIdx).setReg(ZeroReg); | |||
2119 | return true; | |||
2120 | } | |||
2121 | ||||
2122 | // Folds zero into instructions which have a load immediate zero as an operand | |||
2123 | // but also recognize zero as immediate zero. If the definition of the load | |||
2124 | // has no more users it is deleted. | |||
2125 | bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, | |||
2126 | Register Reg, MachineRegisterInfo *MRI) const { | |||
2127 | bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg); | |||
2128 | if (MRI->use_nodbg_empty(Reg)) | |||
2129 | DefMI.eraseFromParent(); | |||
2130 | return Changed; | |||
2131 | } | |||
2132 | ||||
2133 | static bool MBBDefinesCTR(MachineBasicBlock &MBB) { | |||
2134 | for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); | |||
2135 | I != IE; ++I) | |||
2136 | if (I->definesRegister(PPC::CTR) || I->definesRegister(PPC::CTR8)) | |||
2137 | return true; | |||
2138 | return false; | |||
2139 | } | |||
2140 | ||||
2141 | // We should make sure that, if we're going to predicate both sides of a | |||
2142 | // condition (a diamond), that both sides don't define the counter register. We | |||
2143 | // can predicate counter-decrement-based branches, but while that predicates | |||
2144 | // the branching, it does not predicate the counter decrement. If we tried to | |||
2145 | // merge the triangle into one predicated block, we'd decrement the counter | |||
2146 | // twice. | |||
2147 | bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, | |||
2148 | unsigned NumT, unsigned ExtraT, | |||
2149 | MachineBasicBlock &FMBB, | |||
2150 | unsigned NumF, unsigned ExtraF, | |||
2151 | BranchProbability Probability) const { | |||
2152 | return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB)); | |||
2153 | } | |||
2154 | ||||
2155 | ||||
2156 | bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const { | |||
2157 | // The predicated branches are identified by their type, not really by the | |||
2158 | // explicit presence of a predicate. Furthermore, some of them can be | |||
2159 | // predicated more than once. Because if conversion won't try to predicate | |||
2160 | // any instruction which already claims to be predicated (by returning true | |||
2161 | // here), always return false. In doing so, we let isPredicable() be the | |||
2162 | // final word on whether not the instruction can be (further) predicated. | |||
2163 | ||||
2164 | return false; | |||
2165 | } | |||
2166 | ||||
2167 | bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI, | |||
2168 | const MachineBasicBlock *MBB, | |||
2169 | const MachineFunction &MF) const { | |||
2170 | // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion | |||
2171 | // across them, since some FP operations may change content of FPSCR. | |||
2172 | // TODO: Model FPSCR in PPC instruction definitions and remove the workaround | |||
2173 | if (MI.getOpcode() == PPC::MFFS || MI.getOpcode() == PPC::MTFSF) | |||
2174 | return true; | |||
2175 | return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF); | |||
2176 | } | |||
2177 | ||||
2178 | bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, | |||
2179 | ArrayRef<MachineOperand> Pred) const { | |||
2180 | unsigned OpC = MI.getOpcode(); | |||
2181 | if (OpC == PPC::BLR || OpC == PPC::BLR8) { | |||
2182 | if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) { | |||
2183 | bool isPPC64 = Subtarget.isPPC64(); | |||
2184 | MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) | |||
2185 | : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR))); | |||
2186 | // Need add Def and Use for CTR implicit operand. | |||
2187 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) | |||
2188 | .addReg(Pred[1].getReg(), RegState::Implicit) | |||
2189 | .addReg(Pred[1].getReg(), RegState::ImplicitDefine); | |||
2190 | } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) { | |||
2191 | MI.setDesc(get(PPC::BCLR)); | |||
2192 | MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]); | |||
2193 | } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { | |||
2194 | MI.setDesc(get(PPC::BCLRn)); | |||
2195 | MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]); | |||
2196 | } else { | |||
2197 | MI.setDesc(get(PPC::BCCLR)); | |||
2198 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) | |||
2199 | .addImm(Pred[0].getImm()) | |||
2200 | .add(Pred[1]); | |||
2201 | } | |||
2202 | ||||
2203 | return true; | |||
2204 | } else if (OpC == PPC::B) { | |||
2205 | if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) { | |||
2206 | bool isPPC64 = Subtarget.isPPC64(); | |||
2207 | MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) | |||
2208 | : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))); | |||
2209 | // Need add Def and Use for CTR implicit operand. | |||
2210 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) | |||
2211 | .addReg(Pred[1].getReg(), RegState::Implicit) | |||
2212 | .addReg(Pred[1].getReg(), RegState::ImplicitDefine); | |||
2213 | } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) { | |||
2214 | MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); | |||
2215 | MI.RemoveOperand(0); | |||
2216 | ||||
2217 | MI.setDesc(get(PPC::BC)); | |||
2218 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) | |||
2219 | .add(Pred[1]) | |||
2220 | .addMBB(MBB); | |||
2221 | } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { | |||
2222 | MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); | |||
2223 | MI.RemoveOperand(0); | |||
2224 | ||||
2225 | MI.setDesc(get(PPC::BCn)); | |||
2226 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) | |||
2227 | .add(Pred[1]) | |||
2228 | .addMBB(MBB); | |||
2229 | } else { | |||
2230 | MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); | |||
2231 | MI.RemoveOperand(0); | |||
2232 | ||||
2233 | MI.setDesc(get(PPC::BCC)); | |||
2234 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) | |||
2235 | .addImm(Pred[0].getImm()) | |||
2236 | .add(Pred[1]) | |||
2237 | .addMBB(MBB); | |||
2238 | } | |||
2239 | ||||
2240 | return true; | |||
2241 | } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL || | |||
2242 | OpC == PPC::BCTRL8) { | |||
2243 | if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) | |||
2244 | llvm_unreachable("Cannot predicate bctr[l] on the ctr register")__builtin_unreachable(); | |||
2245 | ||||
2246 | bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8; | |||
2247 | bool isPPC64 = Subtarget.isPPC64(); | |||
2248 | ||||
2249 | if (Pred[0].getImm() == PPC::PRED_BIT_SET) { | |||
2250 | MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) | |||
2251 | : (setLR ? PPC::BCCTRL : PPC::BCCTR))); | |||
2252 | MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]); | |||
2253 | } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { | |||
2254 | MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n) | |||
2255 | : (setLR ? PPC::BCCTRLn : PPC::BCCTRn))); | |||
2256 | MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]); | |||
2257 | } else { | |||
2258 | MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8) | |||
2259 | : (setLR ? PPC::BCCCTRL : PPC::BCCCTR))); | |||
2260 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) | |||
2261 | .addImm(Pred[0].getImm()) | |||
2262 | .add(Pred[1]); | |||
2263 | } | |||
2264 | ||||
2265 | // Need add Def and Use for LR implicit operand. | |||
2266 | if (setLR) | |||
2267 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) | |||
2268 | .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit) | |||
2269 | .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine); | |||
2270 | ||||
2271 | return true; | |||
2272 | } | |||
2273 | ||||
2274 | return false; | |||
2275 | } | |||
2276 | ||||
2277 | bool PPCInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, | |||
2278 | ArrayRef<MachineOperand> Pred2) const { | |||
2279 | assert(Pred1.size() == 2 && "Invalid PPC first predicate")(static_cast<void> (0)); | |||
2280 | assert(Pred2.size() == 2 && "Invalid PPC second predicate")(static_cast<void> (0)); | |||
2281 | ||||
2282 | if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR) | |||
2283 | return false; | |||
2284 | if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR) | |||
2285 | return false; | |||
2286 | ||||
2287 | // P1 can only subsume P2 if they test the same condition register. | |||
2288 | if (Pred1[1].getReg() != Pred2[1].getReg()) | |||
2289 | return false; | |||
2290 | ||||
2291 | PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm(); | |||
2292 | PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm(); | |||
2293 | ||||
2294 | if (P1 == P2) | |||
2295 | return true; | |||
2296 | ||||
2297 | // Does P1 subsume P2, e.g. GE subsumes GT. | |||
2298 | if (P1 == PPC::PRED_LE && | |||
2299 | (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ)) | |||
2300 | return true; | |||
2301 | if (P1 == PPC::PRED_GE && | |||
2302 | (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ)) | |||
2303 | return true; | |||
2304 | ||||
2305 | return false; | |||
2306 | } | |||
2307 | ||||
2308 | bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI, | |||
2309 | std::vector<MachineOperand> &Pred, | |||
2310 | bool SkipDead) const { | |||
2311 | // Note: At the present time, the contents of Pred from this function is | |||
2312 | // unused by IfConversion. This implementation follows ARM by pushing the | |||
2313 | // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of | |||
2314 | // predicate, instructions defining CTR or CTR8 are also included as | |||
2315 | // predicate-defining instructions. | |||
2316 | ||||
2317 | const TargetRegisterClass *RCs[] = | |||
2318 | { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass, | |||
2319 | &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass }; | |||
2320 | ||||
2321 | bool Found = false; | |||
2322 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { | |||
2323 | const MachineOperand &MO = MI.getOperand(i); | |||
2324 | for (unsigned c = 0; c < array_lengthof(RCs) && !Found; ++c) { | |||
2325 | const TargetRegisterClass *RC = RCs[c]; | |||
2326 | if (MO.isReg()) { | |||
2327 | if (MO.isDef() && RC->contains(MO.getReg())) { | |||
2328 | Pred.push_back(MO); | |||
2329 | Found = true; | |||
2330 | } | |||
2331 | } else if (MO.isRegMask()) { | |||
2332 | for (TargetRegisterClass::iterator I = RC->begin(), | |||
2333 | IE = RC->end(); I != IE; ++I) | |||
2334 | if (MO.clobbersPhysReg(*I)) { | |||
2335 | Pred.push_back(MO); | |||
2336 | Found = true; | |||
2337 | } | |||
2338 | } | |||
2339 | } | |||
2340 | } | |||
2341 | ||||
2342 | return Found; | |||
2343 | } | |||
2344 | ||||
2345 | bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, | |||
2346 | Register &SrcReg2, int64_t &Mask, | |||
2347 | int64_t &Value) const { | |||
2348 | unsigned Opc = MI.getOpcode(); | |||
2349 | ||||
2350 | switch (Opc) { | |||
2351 | default: return false; | |||
2352 | case PPC::CMPWI: | |||
2353 | case PPC::CMPLWI: | |||
2354 | case PPC::CMPDI: | |||
2355 | case PPC::CMPLDI: | |||
2356 | SrcReg = MI.getOperand(1).getReg(); | |||
2357 | SrcReg2 = 0; | |||
2358 | Value = MI.getOperand(2).getImm(); | |||
2359 | Mask = 0xFFFF; | |||
2360 | return true; | |||
2361 | case PPC::CMPW: | |||
2362 | case PPC::CMPLW: | |||
2363 | case PPC::CMPD: | |||
2364 | case PPC::CMPLD: | |||
2365 | case PPC::FCMPUS: | |||
2366 | case PPC::FCMPUD: | |||
2367 | SrcReg = MI.getOperand(1).getReg(); | |||
2368 | SrcReg2 = MI.getOperand(2).getReg(); | |||
2369 | Value = 0; | |||
2370 | Mask = 0; | |||
2371 | return true; | |||
2372 | } | |||
2373 | } | |||
2374 | ||||
2375 | bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, | |||
2376 | Register SrcReg2, int64_t Mask, | |||
2377 | int64_t Value, | |||
2378 | const MachineRegisterInfo *MRI) const { | |||
2379 | if (DisableCmpOpt) | |||
2380 | return false; | |||
2381 | ||||
2382 | int OpC = CmpInstr.getOpcode(); | |||
2383 | Register CRReg = CmpInstr.getOperand(0).getReg(); | |||
2384 | ||||
2385 | // FP record forms set CR1 based on the exception status bits, not a | |||
2386 | // comparison with zero. | |||
2387 | if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD) | |||
2388 | return false; | |||
2389 | ||||
2390 | const TargetRegisterInfo *TRI = &getRegisterInfo(); | |||
2391 | // The record forms set the condition register based on a signed comparison | |||
2392 | // with zero (so says the ISA manual). This is not as straightforward as it | |||
2393 | // seems, however, because this is always a 64-bit comparison on PPC64, even | |||
2394 | // for instructions that are 32-bit in nature (like slw for example). | |||
2395 | // So, on PPC32, for unsigned comparisons, we can use the record forms only | |||
2396 | // for equality checks (as those don't depend on the sign). On PPC64, | |||
2397 | // we are restricted to equality for unsigned 64-bit comparisons and for | |||
2398 | // signed 32-bit comparisons the applicability is more restricted. | |||
2399 | bool isPPC64 = Subtarget.isPPC64(); | |||
2400 | bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW; | |||
2401 | bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW; | |||
2402 | bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD; | |||
2403 | ||||
2404 | // Look through copies unless that gets us to a physical register. | |||
2405 | Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI); | |||
2406 | if (ActualSrc.isVirtual()) | |||
2407 | SrcReg = ActualSrc; | |||
2408 | ||||
2409 | // Get the unique definition of SrcReg. | |||
2410 | MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); | |||
2411 | if (!MI) return false; | |||
2412 | ||||
2413 | bool equalityOnly = false; | |||
2414 | bool noSub = false; | |||
2415 | if (isPPC64) { | |||
2416 | if (is32BitSignedCompare) { | |||
2417 | // We can perform this optimization only if MI is sign-extending. | |||
2418 | if (isSignExtended(*MI)) | |||
2419 | noSub = true; | |||
2420 | else | |||
2421 | return false; | |||
2422 | } else if (is32BitUnsignedCompare) { | |||
2423 | // We can perform this optimization, equality only, if MI is | |||
2424 | // zero-extending. | |||
2425 | if (isZeroExtended(*MI)) { | |||
2426 | noSub = true; | |||
2427 | equalityOnly = true; | |||
2428 | } else | |||
2429 | return false; | |||
2430 | } else | |||
2431 | equalityOnly = is64BitUnsignedCompare; | |||
2432 | } else | |||
2433 | equalityOnly = is32BitUnsignedCompare; | |||
2434 | ||||
2435 | if (equalityOnly) { | |||
2436 | // We need to check the uses of the condition register in order to reject | |||
2437 | // non-equality comparisons. | |||
2438 | for (MachineRegisterInfo::use_instr_iterator | |||
2439 | I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end(); | |||
2440 | I != IE; ++I) { | |||
2441 | MachineInstr *UseMI = &*I; | |||
2442 | if (UseMI->getOpcode() == PPC::BCC) { | |||
2443 | PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm(); | |||
2444 | unsigned PredCond = PPC::getPredicateCondition(Pred); | |||
2445 | // We ignore hint bits when checking for non-equality comparisons. | |||
2446 | if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE) | |||
2447 | return false; | |||
2448 | } else if (UseMI->getOpcode() == PPC::ISEL || | |||
2449 | UseMI->getOpcode() == PPC::ISEL8) { | |||
2450 | unsigned SubIdx = UseMI->getOperand(3).getSubReg(); | |||
2451 | if (SubIdx != PPC::sub_eq) | |||
2452 | return false; | |||
2453 | } else | |||
2454 | return false; | |||
2455 | } | |||
2456 | } | |||
2457 | ||||
2458 | MachineBasicBlock::iterator I = CmpInstr; | |||
2459 | ||||
2460 | // Scan forward to find the first use of the compare. | |||
2461 | for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL; | |||
2462 | ++I) { | |||
2463 | bool FoundUse = false; | |||
2464 | for (MachineRegisterInfo::use_instr_iterator | |||
2465 | J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end(); | |||
2466 | J != JE; ++J) | |||
2467 | if (&*J == &*I) { | |||
2468 | FoundUse = true; | |||
2469 | break; | |||
2470 | } | |||
2471 | ||||
2472 | if (FoundUse) | |||
2473 | break; | |||
2474 | } | |||
2475 | ||||
2476 | SmallVector<std::pair<MachineOperand*, PPC::Predicate>, 4> PredsToUpdate; | |||
2477 | SmallVector<std::pair<MachineOperand*, unsigned>, 4> SubRegsToUpdate; | |||
2478 | ||||
2479 | // There are two possible candidates which can be changed to set CR[01]. | |||
2480 | // One is MI, the other is a SUB instruction. | |||
2481 | // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). | |||
2482 | MachineInstr *Sub = nullptr; | |||
2483 | if (SrcReg2 != 0) | |||
2484 | // MI is not a candidate for CMPrr. | |||
2485 | MI = nullptr; | |||
2486 | // FIXME: Conservatively refuse to convert an instruction which isn't in the | |||
2487 | // same BB as the comparison. This is to allow the check below to avoid calls | |||
2488 | // (and other explicit clobbers); instead we should really check for these | |||
2489 | // more explicitly (in at least a few predecessors). | |||
2490 | else if (MI->getParent() != CmpInstr.getParent()) | |||
2491 | return false; | |||
2492 | else if (Value != 0) { | |||
2493 | // The record-form instructions set CR bit based on signed comparison | |||
2494 | // against 0. We try to convert a compare against 1 or -1 into a compare | |||
2495 | // against 0 to exploit record-form instructions. For example, we change | |||
2496 | // the condition "greater than -1" into "greater than or equal to 0" | |||
2497 | // and "less than 1" into "less than or equal to 0". | |||
2498 | ||||
2499 | // Since we optimize comparison based on a specific branch condition, | |||
2500 | // we don't optimize if condition code is used by more than once. | |||
2501 | if (equalityOnly || !MRI->hasOneUse(CRReg)) | |||
2502 | return false; | |||
2503 | ||||
2504 | MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg); | |||
2505 | if (UseMI->getOpcode() != PPC::BCC) | |||
2506 | return false; | |||
2507 | ||||
2508 | PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm(); | |||
2509 | unsigned PredCond = PPC::getPredicateCondition(Pred); | |||
2510 | unsigned PredHint = PPC::getPredicateHint(Pred); | |||
2511 | int16_t Immed = (int16_t)Value; | |||
2512 | ||||
2513 | // When modifying the condition in the predicate, we propagate hint bits | |||
2514 | // from the original predicate to the new one. | |||
2515 | if (Immed == -1 && PredCond == PPC::PRED_GT) | |||
2516 | // We convert "greater than -1" into "greater than or equal to 0", | |||
2517 | // since we are assuming signed comparison by !equalityOnly | |||
2518 | Pred = PPC::getPredicate(PPC::PRED_GE, PredHint); | |||
2519 | else if (Immed == -1 && PredCond == PPC::PRED_LE) | |||
2520 | // We convert "less than or equal to -1" into "less than 0". | |||
2521 | Pred = PPC::getPredicate(PPC::PRED_LT, PredHint); | |||
2522 | else if (Immed == 1 && PredCond == PPC::PRED_LT) | |||
2523 | // We convert "less than 1" into "less than or equal to 0". | |||
2524 | Pred = PPC::getPredicate(PPC::PRED_LE, PredHint); | |||
2525 | else if (Immed == 1 && PredCond == PPC::PRED_GE) | |||
2526 | // We convert "greater than or equal to 1" into "greater than 0". | |||
2527 | Pred = PPC::getPredicate(PPC::PRED_GT, PredHint); | |||
2528 | else | |||
2529 | return false; | |||
2530 | ||||
2531 | PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), Pred)); | |||
2532 | } | |||
2533 | ||||
2534 | // Search for Sub. | |||
2535 | --I; | |||
2536 | ||||
2537 | // Get ready to iterate backward from CmpInstr. | |||
2538 | MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin(); | |||
2539 | ||||
2540 | for (; I != E && !noSub; --I) { | |||
2541 | const MachineInstr &Instr = *I; | |||
2542 | unsigned IOpC = Instr.getOpcode(); | |||
2543 | ||||
2544 | if (&*I != &CmpInstr && (Instr.modifiesRegister(PPC::CR0, TRI) || | |||
2545 | Instr.readsRegister(PPC::CR0, TRI))) | |||
2546 | // This instruction modifies or uses the record condition register after | |||
2547 | // the one we want to change. While we could do this transformation, it | |||
2548 | // would likely not be profitable. This transformation removes one | |||
2549 | // instruction, and so even forcing RA to generate one move probably | |||
2550 | // makes it unprofitable. | |||
2551 | return false; | |||
2552 | ||||
2553 | // Check whether CmpInstr can be made redundant by the current instruction. | |||
2554 | if ((OpC == PPC::CMPW || OpC == PPC::CMPLW || | |||
2555 | OpC == PPC::CMPD || OpC == PPC::CMPLD) && | |||
2556 | (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) && | |||
2557 | ((Instr.getOperand(1).getReg() == SrcReg && | |||
2558 | Instr.getOperand(2).getReg() == SrcReg2) || | |||
2559 | (Instr.getOperand(1).getReg() == SrcReg2 && | |||
2560 | Instr.getOperand(2).getReg() == SrcReg))) { | |||
2561 | Sub = &*I; | |||
2562 | break; | |||
2563 | } | |||
2564 | ||||
2565 | if (I == B) | |||
2566 | // The 'and' is below the comparison instruction. | |||
2567 | return false; | |||
2568 | } | |||
2569 | ||||
2570 | // Return false if no candidates exist. | |||
2571 | if (!MI && !Sub) | |||
2572 | return false; | |||
2573 | ||||
2574 | // The single candidate is called MI. | |||
2575 | if (!MI) MI = Sub; | |||
2576 | ||||
2577 | int NewOpC = -1; | |||
2578 | int MIOpC = MI->getOpcode(); | |||
2579 | if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec || | |||
2580 | MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec) | |||
2581 | NewOpC = MIOpC; | |||
2582 | else { | |||
2583 | NewOpC = PPC::getRecordFormOpcode(MIOpC); | |||
2584 | if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1) | |||
2585 | NewOpC = MIOpC; | |||
2586 | } | |||
2587 | ||||
2588 | // FIXME: On the non-embedded POWER architectures, only some of the record | |||
2589 | // forms are fast, and we should use only the fast ones. | |||
2590 | ||||
2591 | // The defining instruction has a record form (or is already a record | |||
2592 | // form). It is possible, however, that we'll need to reverse the condition | |||
2593 | // code of the users. | |||
2594 | if (NewOpC == -1) | |||
2595 | return false; | |||
2596 | ||||
2597 | // This transformation should not be performed if `nsw` is missing and is not | |||
2598 | // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in | |||
2599 | // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in | |||
2600 | // CRReg can reflect if compared values are equal, this optz is still valid. | |||
2601 | if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) && | |||
2602 | Sub && !Sub->getFlag(MachineInstr::NoSWrap)) | |||
2603 | return false; | |||
2604 | ||||
2605 | // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP | |||
2606 | // needs to be updated to be based on SUB. Push the condition code | |||
2607 | // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the | |||
2608 | // condition code of these operands will be modified. | |||
2609 | // Here, Value == 0 means we haven't converted comparison against 1 or -1 to | |||
2610 | // comparison against 0, which may modify predicate. | |||
2611 | bool ShouldSwap = false; | |||
2612 | if (Sub && Value == 0) { | |||
2613 | ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && | |||
2614 | Sub->getOperand(2).getReg() == SrcReg; | |||
2615 | ||||
2616 | // The operands to subf are the opposite of sub, so only in the fixed-point | |||
2617 | // case, invert the order. | |||
2618 | ShouldSwap = !ShouldSwap; | |||
2619 | } | |||
2620 | ||||
2621 | if (ShouldSwap) | |||
2622 | for (MachineRegisterInfo::use_instr_iterator | |||
2623 | I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end(); | |||
2624 | I != IE; ++I) { | |||
2625 | MachineInstr *UseMI = &*I; | |||
2626 | if (UseMI->getOpcode() == PPC::BCC) { | |||
2627 | PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm(); | |||
2628 | unsigned PredCond = PPC::getPredicateCondition(Pred); | |||
2629 | assert((!equalityOnly ||(static_cast<void> (0)) | |||
2630 | PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE) &&(static_cast<void> (0)) | |||
2631 | "Invalid predicate for equality-only optimization")(static_cast<void> (0)); | |||
2632 | (void)PredCond; // To suppress warning in release build. | |||
2633 | PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), | |||
2634 | PPC::getSwappedPredicate(Pred))); | |||
2635 | } else if (UseMI->getOpcode() == PPC::ISEL || | |||
2636 | UseMI->getOpcode() == PPC::ISEL8) { | |||
2637 | unsigned NewSubReg = UseMI->getOperand(3).getSubReg(); | |||
2638 | assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&(static_cast<void> (0)) | |||
2639 | "Invalid CR bit for equality-only optimization")(static_cast<void> (0)); | |||
2640 | ||||
2641 | if (NewSubReg == PPC::sub_lt) | |||
2642 | NewSubReg = PPC::sub_gt; | |||
2643 | else if (NewSubReg == PPC::sub_gt) | |||
2644 | NewSubReg = PPC::sub_lt; | |||
2645 | ||||
2646 | SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)), | |||
2647 | NewSubReg)); | |||
2648 | } else // We need to abort on a user we don't understand. | |||
2649 | return false; | |||
2650 | } | |||
2651 | assert(!(Value != 0 && ShouldSwap) &&(static_cast<void> (0)) | |||
2652 | "Non-zero immediate support and ShouldSwap"(static_cast<void> (0)) | |||
2653 | "may conflict in updating predicate")(static_cast<void> (0)); | |||
2654 | ||||
2655 | // Create a new virtual register to hold the value of the CR set by the | |||
2656 | // record-form instruction. If the instruction was not previously in | |||
2657 | // record form, then set the kill flag on the CR. | |||
2658 | CmpInstr.eraseFromParent(); | |||
2659 | ||||
2660 | MachineBasicBlock::iterator MII = MI; | |||
2661 | BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(), | |||
2662 | get(TargetOpcode::COPY), CRReg) | |||
2663 | .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0); | |||
2664 | ||||
2665 | // Even if CR0 register were dead before, it is alive now since the | |||
2666 | // instruction we just built uses it. | |||
2667 | MI->clearRegisterDeads(PPC::CR0); | |||
2668 | ||||
2669 | if (MIOpC != NewOpC) { | |||
2670 | // We need to be careful here: we're replacing one instruction with | |||
2671 | // another, and we need to make sure that we get all of the right | |||
2672 | // implicit uses and defs. On the other hand, the caller may be holding | |||
2673 | // an iterator to this instruction, and so we can't delete it (this is | |||
2674 | // specifically the case if this is the instruction directly after the | |||
2675 | // compare). | |||
2676 | ||||
2677 | // Rotates are expensive instructions. If we're emitting a record-form | |||
2678 | // rotate that can just be an andi/andis, we should just emit that. | |||
2679 | if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) { | |||
2680 | Register GPRRes = MI->getOperand(0).getReg(); | |||
2681 | int64_t SH = MI->getOperand(2).getImm(); | |||
2682 | int64_t MB = MI->getOperand(3).getImm(); | |||
2683 | int64_t ME = MI->getOperand(4).getImm(); | |||
2684 | // We can only do this if both the start and end of the mask are in the | |||
2685 | // same halfword. | |||
2686 | bool MBInLoHWord = MB >= 16; | |||
2687 | bool MEInLoHWord = ME >= 16; | |||
2688 | uint64_t Mask = ~0LLU; | |||
2689 | ||||
2690 | if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) { | |||
2691 | Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1); | |||
2692 | // The mask value needs to shift right 16 if we're emitting andis. | |||
2693 | Mask >>= MBInLoHWord ? 0 : 16; | |||
2694 | NewOpC = MIOpC == PPC::RLWINM | |||
2695 | ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec) | |||
2696 | : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec); | |||
2697 | } else if (MRI->use_empty(GPRRes) && (ME == 31) && | |||
2698 | (ME - MB + 1 == SH) && (MB >= 16)) { | |||
2699 | // If we are rotating by the exact number of bits as are in the mask | |||
2700 | // and the mask is in the least significant bits of the register, | |||
2701 | // that's just an andis. (as long as the GPR result has no uses). | |||
2702 | Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1); | |||
2703 | Mask >>= 16; | |||
2704 | NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec; | |||
2705 | } | |||
2706 | // If we've set the mask, we can transform. | |||
2707 | if (Mask != ~0LLU) { | |||
2708 | MI->RemoveOperand(4); | |||
2709 | MI->RemoveOperand(3); | |||
2710 | MI->getOperand(2).setImm(Mask); | |||
2711 | NumRcRotatesConvertedToRcAnd++; | |||
2712 | } | |||
2713 | } else if (MIOpC == PPC::RLDICL && MI->getOperand(2).getImm() == 0) { | |||
2714 | int64_t MB = MI->getOperand(3).getImm(); | |||
2715 | if (MB >= 48) { | |||
2716 | uint64_t Mask = (1LLU << (63 - MB + 1)) - 1; | |||
2717 | NewOpC = PPC::ANDI8_rec; | |||
2718 | MI->RemoveOperand(3); | |||
2719 | MI->getOperand(2).setImm(Mask); | |||
2720 | NumRcRotatesConvertedToRcAnd++; | |||
2721 | } | |||
2722 | } | |||
2723 | ||||
2724 | const MCInstrDesc &NewDesc = get(NewOpC); | |||
2725 | MI->setDesc(NewDesc); | |||
2726 | ||||
2727 | if (NewDesc.ImplicitDefs) | |||
2728 | for (const MCPhysReg *ImpDefs = NewDesc.getImplicitDefs(); | |||
2729 | *ImpDefs; ++ImpDefs) | |||
2730 | if (!MI->definesRegister(*ImpDefs)) | |||
2731 | MI->addOperand(*MI->getParent()->getParent(), | |||
2732 | MachineOperand::CreateReg(*ImpDefs, true, true)); | |||
2733 | if (NewDesc.ImplicitUses) | |||
2734 | for (const MCPhysReg *ImpUses = NewDesc.getImplicitUses(); | |||
2735 | *ImpUses; ++ImpUses) | |||
2736 | if (!MI->readsRegister(*ImpUses)) | |||
2737 | MI->addOperand(*MI->getParent()->getParent(), | |||
2738 | MachineOperand::CreateReg(*ImpUses, false, true)); | |||
2739 | } | |||
2740 | assert(MI->definesRegister(PPC::CR0) &&(static_cast<void> (0)) | |||
2741 | "Record-form instruction does not define cr0?")(static_cast<void> (0)); | |||
2742 | ||||
2743 | // Modify the condition code of operands in OperandsToUpdate. | |||
2744 | // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to | |||
2745 | // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. | |||
2746 | for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++) | |||
2747 | PredsToUpdate[i].first->setImm(PredsToUpdate[i].second); | |||
2748 | ||||
2749 | for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++) | |||
2750 | SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second); | |||
2751 | ||||
2752 | return true; | |||
2753 | } | |||
2754 | ||||
2755 | bool PPCInstrInfo::getMemOperandsWithOffsetWidth( | |||
2756 | const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, | |||
2757 | int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, | |||
2758 | const TargetRegisterInfo *TRI) const { | |||
2759 | const MachineOperand *BaseOp; | |||
2760 | OffsetIsScalable = false; | |||
2761 | if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) | |||
2762 | return false; | |||
2763 | BaseOps.push_back(BaseOp); | |||
2764 | return true; | |||
2765 | } | |||
2766 | ||||
2767 | static bool isLdStSafeToCluster(const MachineInstr &LdSt, | |||
2768 | const TargetRegisterInfo *TRI) { | |||
2769 | // If this is a volatile load/store, don't mess with it. | |||
2770 | if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3) | |||
2771 | return false; | |||
2772 | ||||
2773 | if (LdSt.getOperand(2).isFI()) | |||
2774 | return true; | |||
2775 | ||||
2776 | assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.")(static_cast<void> (0)); | |||
2777 | // Can't cluster if the instruction modifies the base register | |||
2778 | // or it is update form. e.g. ld r2,3(r2) | |||
2779 | if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI)) | |||
2780 | return false; | |||
2781 | ||||
2782 | return true; | |||
2783 | } | |||
2784 | ||||
2785 | // Only cluster instruction pair that have the same opcode, and they are | |||
2786 | // clusterable according to PowerPC specification. | |||
2787 | static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, | |||
2788 | const PPCSubtarget &Subtarget) { | |||
2789 | switch (FirstOpc) { | |||
2790 | default: | |||
2791 | return false; | |||
2792 | case PPC::STD: | |||
2793 | case PPC::STFD: | |||
2794 | case PPC::STXSD: | |||
2795 | case PPC::DFSTOREf64: | |||
2796 | return FirstOpc == SecondOpc; | |||
2797 | // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with | |||
2798 | // 32bit and 64bit instruction selection. They are clusterable pair though | |||
2799 | // they are different opcode. | |||
2800 | case PPC::STW: | |||
2801 | case PPC::STW8: | |||
2802 | return SecondOpc == PPC::STW || SecondOpc == PPC::STW8; | |||
2803 | } | |||
2804 | } | |||
2805 | ||||
2806 | bool PPCInstrInfo::shouldClusterMemOps( | |||
2807 | ArrayRef<const MachineOperand *> BaseOps1, | |||
2808 | ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads, | |||
2809 | unsigned NumBytes) const { | |||
2810 | ||||
2811 | assert(BaseOps1.size() == 1 && BaseOps2.size() == 1)(static_cast<void> (0)); | |||
2812 | const MachineOperand &BaseOp1 = *BaseOps1.front(); | |||
2813 | const MachineOperand &BaseOp2 = *BaseOps2.front(); | |||
2814 | assert((BaseOp1.isReg() || BaseOp1.isFI()) &&(static_cast<void> (0)) | |||
2815 | "Only base registers and frame indices are supported.")(static_cast<void> (0)); | |||
2816 | ||||
2817 | // The NumLoads means the number of loads that has been clustered. | |||
2818 | // Don't cluster memory op if there are already two ops clustered at least. | |||
2819 | if (NumLoads > 2) | |||
2820 | return false; | |||
2821 | ||||
2822 | // Cluster the load/store only when they have the same base | |||
2823 | // register or FI. | |||
2824 | if ((BaseOp1.isReg() != BaseOp2.isReg()) || | |||
2825 | (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) || | |||
2826 | (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex())) | |||
2827 | return false; | |||
2828 | ||||
2829 | // Check if the load/store are clusterable according to the PowerPC | |||
2830 | // specification. | |||
2831 | const MachineInstr &FirstLdSt = *BaseOp1.getParent(); | |||
2832 | const MachineInstr &SecondLdSt = *BaseOp2.getParent(); | |||
2833 | unsigned FirstOpc = FirstLdSt.getOpcode(); | |||
2834 | unsigned SecondOpc = SecondLdSt.getOpcode(); | |||
2835 | const TargetRegisterInfo *TRI = &getRegisterInfo(); | |||
2836 | // Cluster the load/store only when they have the same opcode, and they are | |||
2837 | // clusterable opcode according to PowerPC specification. | |||
2838 | if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget)) | |||
2839 | return false; | |||
2840 | ||||
2841 | // Can't cluster load/store that have ordered or volatile memory reference. | |||
2842 | if (!isLdStSafeToCluster(FirstLdSt, TRI) || | |||
2843 | !isLdStSafeToCluster(SecondLdSt, TRI)) | |||
2844 | return false; | |||
2845 | ||||
2846 | int64_t Offset1 = 0, Offset2 = 0; | |||
2847 | unsigned Width1 = 0, Width2 = 0; | |||
2848 | const MachineOperand *Base1 = nullptr, *Base2 = nullptr; | |||
2849 | if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) || | |||
2850 | !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) || | |||
2851 | Width1 != Width2) | |||
2852 | return false; | |||
2853 | ||||
2854 | assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&(static_cast<void> (0)) | |||
2855 | "getMemOperandWithOffsetWidth return incorrect base op")(static_cast<void> (0)); | |||
2856 | // The caller should already have ordered FirstMemOp/SecondMemOp by offset. | |||
2857 | assert(Offset1 <= Offset2 && "Caller should have ordered offsets.")(static_cast<void> (0)); | |||
2858 | return Offset1 + Width1 == Offset2; | |||
2859 | } | |||
2860 | ||||
2861 | /// GetInstSize - Return the number of bytes of code the specified | |||
2862 | /// instruction may be. This returns the maximum number of bytes. | |||
2863 | /// | |||
2864 | unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { | |||
2865 | unsigned Opcode = MI.getOpcode(); | |||
2866 | ||||
2867 | if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) { | |||
2868 | const MachineFunction *MF = MI.getParent()->getParent(); | |||
2869 | const char *AsmStr = MI.getOperand(0).getSymbolName(); | |||
2870 | return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); | |||
2871 | } else if (Opcode == TargetOpcode::STACKMAP) { | |||
2872 | StackMapOpers Opers(&MI); | |||
2873 | return Opers.getNumPatchBytes(); | |||
2874 | } else if (Opcode == TargetOpcode::PATCHPOINT) { | |||
2875 | PatchPointOpers Opers(&MI); | |||
2876 | return Opers.getNumPatchBytes(); | |||
2877 | } else { | |||
2878 | return get(Opcode).getSize(); | |||
2879 | } | |||
2880 | } | |||
2881 | ||||
2882 | std::pair<unsigned, unsigned> | |||
2883 | PPCInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { | |||
2884 | const unsigned Mask = PPCII::MO_ACCESS_MASK; | |||
2885 | return std::make_pair(TF & Mask, TF & ~Mask); | |||
2886 | } | |||
2887 | ||||
2888 | ArrayRef<std::pair<unsigned, const char *>> | |||
2889 | PPCInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { | |||
2890 | using namespace PPCII; | |||
2891 | static const std::pair<unsigned, const char *> TargetFlags[] = { | |||
2892 | {MO_LO, "ppc-lo"}, | |||
2893 | {MO_HA, "ppc-ha"}, | |||
2894 | {MO_TPREL_LO, "ppc-tprel-lo"}, | |||
2895 | {MO_TPREL_HA, "ppc-tprel-ha"}, | |||
2896 | {MO_DTPREL_LO, "ppc-dtprel-lo"}, | |||
2897 | {MO_TLSLD_LO, "ppc-tlsld-lo"}, | |||
2898 | {MO_TOC_LO, "ppc-toc-lo"}, | |||
2899 | {MO_TLS, "ppc-tls"}}; | |||
2900 | return makeArrayRef(TargetFlags); | |||
2901 | } | |||
2902 | ||||
2903 | ArrayRef<std::pair<unsigned, const char *>> | |||
2904 | PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { | |||
2905 | using namespace PPCII; | |||
2906 | static const std::pair<unsigned, const char *> TargetFlags[] = { | |||
2907 | {MO_PLT, "ppc-plt"}, | |||
2908 | {MO_PIC_FLAG, "ppc-pic"}, | |||
2909 | {MO_PCREL_FLAG, "ppc-pcrel"}, | |||
2910 | {MO_GOT_FLAG, "ppc-got"}, | |||
2911 | {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"}, | |||
2912 | {MO_TLSGD_FLAG, "ppc-tlsgd"}, | |||
2913 | {MO_TLSLD_FLAG, "ppc-tlsld"}, | |||
2914 | {MO_TPREL_FLAG, "ppc-tprel"}, | |||
2915 | {MO_TLSGDM_FLAG, "ppc-tlsgdm"}, | |||
2916 | {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"}, | |||
2917 | {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"}, | |||
2918 | {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"}}; | |||
2919 | return makeArrayRef(TargetFlags); | |||
2920 | } | |||
2921 | ||||
2922 | // Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction. | |||
2923 | // The VSX versions have the advantage of a full 64-register target whereas | |||
2924 | // the FP ones have the advantage of lower latency and higher throughput. So | |||
2925 | // what we are after is using the faster instructions in low register pressure | |||
2926 | // situations and using the larger register file in high register pressure | |||
2927 | // situations. | |||
2928 | bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const { | |||
2929 | unsigned UpperOpcode, LowerOpcode; | |||
2930 | switch (MI.getOpcode()) { | |||
2931 | case PPC::DFLOADf32: | |||
2932 | UpperOpcode = PPC::LXSSP; | |||
2933 | LowerOpcode = PPC::LFS; | |||
2934 | break; | |||
2935 | case PPC::DFLOADf64: | |||
2936 | UpperOpcode = PPC::LXSD; | |||
2937 | LowerOpcode = PPC::LFD; | |||
2938 | break; | |||
2939 | case PPC::DFSTOREf32: | |||
2940 | UpperOpcode = PPC::STXSSP; | |||
2941 | LowerOpcode = PPC::STFS; | |||
2942 | break; | |||
2943 | case PPC::DFSTOREf64: | |||
2944 | UpperOpcode = PPC::STXSD; | |||
2945 | LowerOpcode = PPC::STFD; | |||
2946 | break; | |||
2947 | case PPC::XFLOADf32: | |||
2948 | UpperOpcode = PPC::LXSSPX; | |||
2949 | LowerOpcode = PPC::LFSX; | |||
2950 | break; | |||
2951 | case PPC::XFLOADf64: | |||
2952 | UpperOpcode = PPC::LXSDX; | |||
2953 | LowerOpcode = PPC::LFDX; | |||
2954 | break; | |||
2955 | case PPC::XFSTOREf32: | |||
2956 | UpperOpcode = PPC::STXSSPX; | |||
2957 | LowerOpcode = PPC::STFSX; | |||
2958 | break; | |||
2959 | case PPC::XFSTOREf64: | |||
2960 | UpperOpcode = PPC::STXSDX; | |||
2961 | LowerOpcode = PPC::STFDX; | |||
2962 | break; | |||
2963 | case PPC::LIWAX: | |||
2964 | UpperOpcode = PPC::LXSIWAX; | |||
2965 | LowerOpcode = PPC::LFIWAX; | |||
2966 | break; | |||
2967 | case PPC::LIWZX: | |||
2968 | UpperOpcode = PPC::LXSIWZX; | |||
2969 | LowerOpcode = PPC::LFIWZX; | |||
2970 | break; | |||
2971 | case PPC::STIWX: | |||
2972 | UpperOpcode = PPC::STXSIWX; | |||
2973 | LowerOpcode = PPC::STFIWX; | |||
2974 | break; | |||
2975 | default: | |||
2976 | llvm_unreachable("Unknown Operation!")__builtin_unreachable(); | |||
2977 | } | |||
2978 | ||||
2979 | Register TargetReg = MI.getOperand(0).getReg(); | |||
2980 | unsigned Opcode; | |||
2981 | if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) || | |||
2982 | (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31)) | |||
2983 | Opcode = LowerOpcode; | |||
2984 | else | |||
2985 | Opcode = UpperOpcode; | |||
2986 | MI.setDesc(get(Opcode)); | |||
2987 | return true; | |||
2988 | } | |||
2989 | ||||
2990 | static bool isAnImmediateOperand(const MachineOperand &MO) { | |||
2991 | return MO.isCPI() || MO.isGlobal() || MO.isImm(); | |||
2992 | } | |||
2993 | ||||
2994 | bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { | |||
2995 | auto &MBB = *MI.getParent(); | |||
2996 | auto DL = MI.getDebugLoc(); | |||
2997 | ||||
2998 | switch (MI.getOpcode()) { | |||
2999 | case PPC::BUILD_UACC: { | |||
3000 | MCRegister ACC = MI.getOperand(0).getReg(); | |||
3001 | MCRegister UACC = MI.getOperand(1).getReg(); | |||
3002 | if (ACC - PPC::ACC0 != UACC - PPC::UACC0) { | |||
3003 | MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4; | |||
3004 | MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4; | |||
3005 | // FIXME: This can easily be improved to look up to the top of the MBB | |||
3006 | // to see if the inputs are XXLOR's. If they are and SrcReg is killed, | |||
3007 | // we can just re-target any such XXLOR's to DstVSR + offset. | |||
3008 | for (int VecNo = 0; VecNo < 4; VecNo++) | |||
3009 | BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo) | |||
3010 | .addReg(SrcVSR + VecNo) | |||
3011 | .addReg(SrcVSR + VecNo); | |||
3012 | } | |||
3013 | // BUILD_UACC is expanded to 4 copies of the underlying vsx registers. | |||
3014 | // So after building the 4 copies, we can replace the BUILD_UACC instruction | |||
3015 | // with a NOP. | |||
3016 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
3017 | } | |||
3018 | case PPC::KILL_PAIR: { | |||
3019 | MI.setDesc(get(PPC::UNENCODED_NOP)); | |||
3020 | MI.RemoveOperand(1); | |||
3021 | MI.RemoveOperand(0); | |||
3022 | return true; | |||
3023 | } | |||
3024 | case TargetOpcode::LOAD_STACK_GUARD: { | |||
3025 | assert(Subtarget.isTargetLinux() &&(static_cast<void> (0)) | |||
3026 | "Only Linux target is expected to contain LOAD_STACK_GUARD")(static_cast<void> (0)); | |||
3027 | const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008; | |||
3028 | const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2; | |||
3029 | MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ)); | |||
3030 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) | |||
3031 | .addImm(Offset) | |||
3032 | .addReg(Reg); | |||
3033 | return true; | |||
3034 | } | |||
3035 | case PPC::DFLOADf32: | |||
3036 | case PPC::DFLOADf64: | |||
3037 | case PPC::DFSTOREf32: | |||
3038 | case PPC::DFSTOREf64: { | |||
3039 | assert(Subtarget.hasP9Vector() &&(static_cast<void> (0)) | |||
3040 | "Invalid D-Form Pseudo-ops on Pre-P9 target.")(static_cast<void> (0)); | |||
3041 | assert(MI.getOperand(2).isReg() &&(static_cast<void> (0)) | |||
3042 | isAnImmediateOperand(MI.getOperand(1)) &&(static_cast<void> (0)) | |||
3043 | "D-form op must have register and immediate operands")(static_cast<void> (0)); | |||
3044 | return expandVSXMemPseudo(MI); | |||
3045 | } | |||
3046 | case PPC::XFLOADf32: | |||
3047 | case PPC::XFSTOREf32: | |||
3048 | case PPC::LIWAX: | |||
3049 | case PPC::LIWZX: | |||
3050 | case PPC::STIWX: { | |||
3051 | assert(Subtarget.hasP8Vector() &&(static_cast<void> (0)) | |||
3052 | "Invalid X-Form Pseudo-ops on Pre-P8 target.")(static_cast<void> (0)); | |||
3053 | assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&(static_cast<void> (0)) | |||
3054 | "X-form op must have register and register operands")(static_cast<void> (0)); | |||
3055 | return expandVSXMemPseudo(MI); | |||
3056 | } | |||
3057 | case PPC::XFLOADf64: | |||
3058 | case PPC::XFSTOREf64: { | |||
3059 | assert(Subtarget.hasVSX() &&(static_cast<void> (0)) | |||
3060 | "Invalid X-Form Pseudo-ops on target that has no VSX.")(static_cast<void> (0)); | |||
3061 | assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&(static_cast<void> (0)) | |||
3062 | "X-form op must have register and register operands")(static_cast<void> (0)); | |||
3063 | return expandVSXMemPseudo(MI); | |||
3064 | } | |||
3065 | case PPC::SPILLTOVSR_LD: { | |||
3066 | Register TargetReg = MI.getOperand(0).getReg(); | |||
3067 | if (PPC::VSFRCRegClass.contains(TargetReg)) { | |||
3068 | MI.setDesc(get(PPC::DFLOADf64)); | |||
3069 | return expandPostRAPseudo(MI); | |||
3070 | } | |||
3071 | else | |||
3072 | MI.setDesc(get(PPC::LD)); | |||
3073 | return true; | |||
3074 | } | |||
3075 | case PPC::SPILLTOVSR_ST: { | |||
3076 | Register SrcReg = MI.getOperand(0).getReg(); | |||
3077 | if (PPC::VSFRCRegClass.contains(SrcReg)) { | |||
3078 | NumStoreSPILLVSRRCAsVec++; | |||
3079 | MI.setDesc(get(PPC::DFSTOREf64)); | |||
3080 | return expandPostRAPseudo(MI); | |||
3081 | } else { | |||
3082 | NumStoreSPILLVSRRCAsGpr++; | |||
3083 | MI.setDesc(get(PPC::STD)); | |||
3084 | } | |||
3085 | return true; | |||
3086 | } | |||
3087 | case PPC::SPILLTOVSR_LDX: { | |||
3088 | Register TargetReg = MI.getOperand(0).getReg(); | |||
3089 | if (PPC::VSFRCRegClass.contains(TargetReg)) | |||
3090 | MI.setDesc(get(PPC::LXSDX)); | |||
3091 | else | |||
3092 | MI.setDesc(get(PPC::LDX)); | |||
3093 | return true; | |||
3094 | } | |||
3095 | case PPC::SPILLTOVSR_STX: { | |||
3096 | Register SrcReg = MI.getOperand(0).getReg(); | |||
3097 | if (PPC::VSFRCRegClass.contains(SrcReg)) { | |||
3098 | NumStoreSPILLVSRRCAsVec++; | |||
3099 | MI.setDesc(get(PPC::STXSDX)); | |||
3100 | } else { | |||
3101 | NumStoreSPILLVSRRCAsGpr++; | |||
3102 | MI.setDesc(get(PPC::STDX)); | |||
3103 | } | |||
3104 | return true; | |||
3105 | } | |||
3106 | ||||
3107 | // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass. | |||
3108 | case PPC::CFENCE8: { | |||
3109 | auto Val = MI.getOperand(0).getReg(); | |||
3110 | BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val); | |||
3111 | BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP)) | |||
3112 | .addImm(PPC::PRED_NE_MINUS) | |||
3113 | .addReg(PPC::CR7) | |||
3114 | .addImm(1); | |||
3115 | MI.setDesc(get(PPC::ISYNC)); | |||
3116 | MI.RemoveOperand(0); | |||
3117 | return true; | |||
3118 | } | |||
3119 | } | |||
3120 | return false; | |||
3121 | } | |||
3122 | ||||
3123 | // Essentially a compile-time implementation of a compare->isel sequence. | |||
3124 | // It takes two constants to compare, along with the true/false registers | |||
3125 | // and the comparison type (as a subreg to a CR field) and returns one | |||
3126 | // of the true/false registers, depending on the comparison results. | |||
3127 | static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc, | |||
3128 | unsigned TrueReg, unsigned FalseReg, | |||
3129 | unsigned CRSubReg) { | |||
3130 | // Signed comparisons. The immediates are assumed to be sign-extended. | |||
3131 | if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) { | |||
3132 | switch (CRSubReg) { | |||
3133 | default: llvm_unreachable("Unknown integer comparison type.")__builtin_unreachable(); | |||
3134 | case PPC::sub_lt: | |||
3135 | return Imm1 < Imm2 ? TrueReg : FalseReg; | |||
3136 | case PPC::sub_gt: | |||
3137 | return Imm1 > Imm2 ? TrueReg : FalseReg; | |||
3138 | case PPC::sub_eq: | |||
3139 | return Imm1 == Imm2 ? TrueReg : FalseReg; | |||
3140 | } | |||
3141 | } | |||
3142 | // Unsigned comparisons. | |||
3143 | else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) { | |||
3144 | switch (CRSubReg) { | |||
3145 | default: llvm_unreachable("Unknown integer comparison type.")__builtin_unreachable(); | |||
3146 | case PPC::sub_lt: | |||
3147 | return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg; | |||
3148 | case PPC::sub_gt: | |||
3149 | return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg; | |||
3150 | case PPC::sub_eq: | |||
3151 | return Imm1 == Imm2 ? TrueReg : FalseReg; | |||
3152 | } | |||
3153 | } | |||
3154 | return PPC::NoRegister; | |||
3155 | } | |||
3156 | ||||
3157 | void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI, | |||
3158 | unsigned OpNo, | |||
3159 | int64_t Imm) const { | |||
3160 | assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG")(static_cast<void> (0)); | |||
3161 | // Replace the REG with the Immediate. | |||
3162 | Register InUseReg = MI.getOperand(OpNo).getReg(); | |||
3163 | MI.getOperand(OpNo).ChangeToImmediate(Imm); | |||
3164 | ||||
3165 | // We need to make sure that the MI didn't have any implicit use | |||
3166 | // of this REG any more. We don't call MI.implicit_operands().empty() to | |||
3167 | // return early, since MI's MCID might be changed in calling context, as a | |||
3168 | // result its number of explicit operands may be changed, thus the begin of | |||
3169 | // implicit operand is changed. | |||
3170 | const TargetRegisterInfo *TRI = &getRegisterInfo(); | |||
3171 | int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, false, TRI); | |||
3172 | if (UseOpIdx >= 0) { | |||
3173 | MachineOperand &MO = MI.getOperand(UseOpIdx); | |||
3174 | if (MO.isImplicit()) | |||
3175 | // The operands must always be in the following order: | |||
3176 | // - explicit reg defs, | |||
3177 | // - other explicit operands (reg uses, immediates, etc.), | |||
3178 | // - implicit reg defs | |||
3179 | // - implicit reg uses | |||
3180 | // Therefore, removing the implicit operand won't change the explicit | |||
3181 | // operands layout. | |||
3182 | MI.RemoveOperand(UseOpIdx); | |||
3183 | } | |||
3184 | } | |||
3185 | ||||
3186 | // Replace an instruction with one that materializes a constant (and sets | |||
3187 | // CR0 if the original instruction was a record-form instruction). | |||
3188 | void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI, | |||
3189 | const LoadImmediateInfo &LII) const { | |||
3190 | // Remove existing operands. | |||
3191 | int OperandToKeep = LII.SetCR ? 1 : 0; | |||
3192 | for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--) | |||
3193 | MI.RemoveOperand(i); | |||
3194 | ||||
3195 | // Replace the instruction. | |||
3196 | if (LII.SetCR) { | |||
3197 | MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec)); | |||
3198 | // Set the immediate. | |||
3199 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) | |||
3200 | .addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine); | |||
3201 | return; | |||
3202 | } | |||
3203 | else | |||
3204 | MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI)); | |||
3205 | ||||
3206 | // Set the immediate. | |||
3207 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) | |||
3208 | .addImm(LII.Imm); | |||
3209 | } | |||
3210 | ||||
3211 | MachineInstr *PPCInstrInfo::getDefMIPostRA(unsigned Reg, MachineInstr &MI, | |||
3212 | bool &SeenIntermediateUse) const { | |||
3213 | assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&(static_cast<void> (0)) | |||
3214 | "Should be called after register allocation.")(static_cast<void> (0)); | |||
3215 | const TargetRegisterInfo *TRI = &getRegisterInfo(); | |||
3216 | MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI; | |||
3217 | It++; | |||
3218 | SeenIntermediateUse = false; | |||
3219 | for (; It != E; ++It) { | |||
3220 | if (It->modifiesRegister(Reg, TRI)) | |||
3221 | return &*It; | |||
3222 | if (It->readsRegister(Reg, TRI)) | |||
3223 | SeenIntermediateUse = true; | |||
3224 | } | |||
3225 | return nullptr; | |||
3226 | } | |||
3227 | ||||
3228 | MachineInstr *PPCInstrInfo::getForwardingDefMI( | |||
3229 | MachineInstr &MI, | |||
3230 | unsigned &OpNoForForwarding, | |||
3231 | bool &SeenIntermediateUse) const { | |||
3232 | OpNoForForwarding = ~0U; | |||
3233 | MachineInstr *DefMI = nullptr; | |||
3234 | MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); | |||
3235 | const TargetRegisterInfo *TRI = &getRegisterInfo(); | |||
3236 | // If we're in SSA, get the defs through the MRI. Otherwise, only look | |||
3237 | // within the basic block to see if the register is defined using an | |||
3238 | // LI/LI8/ADDI/ADDI8. | |||
3239 | if (MRI->isSSA()) { | |||
3240 | for (int i = 1, e = MI.getNumOperands(); i < e; i++) { | |||
3241 | if (!MI.getOperand(i).isReg()) | |||
3242 | continue; | |||
3243 | Register Reg = MI.getOperand(i).getReg(); | |||
3244 | if (!Register::isVirtualRegister(Reg)) | |||
3245 | continue; | |||
3246 | unsigned TrueReg = TRI->lookThruCopyLike(Reg, MRI); | |||
3247 | if (Register::isVirtualRegister(TrueReg)) { | |||
3248 | DefMI = MRI->getVRegDef(TrueReg); | |||
3249 | if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8 || | |||
3250 | DefMI->getOpcode() == PPC::ADDI || | |||
3251 | DefMI->getOpcode() == PPC::ADDI8) { | |||
3252 | OpNoForForwarding = i; | |||
3253 | // The ADDI and LI operand maybe exist in one instruction at same | |||
3254 | // time. we prefer to fold LI operand as LI only has one Imm operand | |||
3255 | // and is more possible to be converted. So if current DefMI is | |||
3256 | // ADDI/ADDI8, we continue to find possible LI/LI8. | |||
3257 | if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) | |||
3258 | break; | |||
3259 | } | |||
3260 | } | |||
3261 | } | |||
3262 | } else { | |||
3263 | // Looking back through the definition for each operand could be expensive, | |||
3264 | // so exit early if this isn't an instruction that either has an immediate | |||
3265 | // form or is already an immediate form that we can handle. | |||
3266 | ImmInstrInfo III; | |||
3267 | unsigned Opc = MI.getOpcode(); | |||
3268 | bool ConvertibleImmForm = | |||
3269 | Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI || | |||
3270 | Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 || | |||
3271 | Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI || | |||
3272 | Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec || | |||
3273 | Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 || | |||
3274 | Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 || | |||
3275 | Opc == PPC::RLWINM8_rec; | |||
3276 | bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg()) | |||
3277 | ? isVFRegister(MI.getOperand(0).getReg()) | |||
3278 | : false; | |||
3279 | if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true)) | |||
3280 | return nullptr; | |||
3281 | ||||
3282 | // Don't convert or %X, %Y, %Y since that's just a register move. | |||
3283 | if ((Opc == PPC::OR || Opc == PPC::OR8) && | |||
3284 | MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) | |||
3285 | return nullptr; | |||
3286 | for (int i = 1, e = MI.getNumOperands(); i < e; i++) { | |||
3287 | MachineOperand &MO = MI.getOperand(i); | |||
3288 | SeenIntermediateUse = false; | |||
3289 | if (MO.isReg() && MO.isUse() && !MO.isImplicit()) { | |||
3290 | Register Reg = MI.getOperand(i).getReg(); | |||
3291 | // If we see another use of this reg between the def and the MI, | |||
3292 | // we want to flat it so the def isn't deleted. | |||
3293 | MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse); | |||
3294 | if (DefMI) { | |||
3295 | // Is this register defined by some form of add-immediate (including | |||
3296 | // load-immediate) within this basic block? | |||
3297 | switch (DefMI->getOpcode()) { | |||
3298 | default: | |||
3299 | break; | |||
3300 | case PPC::LI: | |||
3301 | case PPC::LI8: | |||
3302 | case PPC::ADDItocL: | |||
3303 | case PPC::ADDI: | |||
3304 | case PPC::ADDI8: | |||
3305 | OpNoForForwarding = i; | |||
3306 | return DefMI; | |||
3307 | } | |||
3308 | } | |||
3309 | } | |||
3310 | } | |||
3311 | } | |||
3312 | return OpNoForForwarding == ~0U ? nullptr : DefMI; | |||
3313 | } | |||
3314 | ||||
3315 | unsigned PPCInstrInfo::getSpillTarget() const { | |||
3316 | // With P10, we may need to spill paired vector registers or accumulator | |||
3317 | // registers. MMA implies paired vectors, so we can just check that. | |||
3318 | bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops(); | |||
3319 | return IsP10Variant ? 2 : Subtarget.hasP9Vector() ? 1 : 0; | |||
3320 | } | |||
3321 | ||||
3322 | const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const { | |||
3323 | return StoreSpillOpcodesArray[getSpillTarget()]; | |||
3324 | } | |||
3325 | ||||
3326 | const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const { | |||
3327 | return LoadSpillOpcodesArray[getSpillTarget()]; | |||
3328 | } | |||
3329 | ||||
3330 | void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr *StartMI, MachineInstr *EndMI, | |||
3331 | unsigned RegNo) const { | |||
3332 | // Conservatively clear kill flag for the register if the instructions are in | |||
3333 | // different basic blocks and in SSA form, because the kill flag may no longer | |||
3334 | // be right. There is no need to bother with dead flags since defs with no | |||
3335 | // uses will be handled by DCE. | |||
3336 | MachineRegisterInfo &MRI = StartMI->getParent()->getParent()->getRegInfo(); | |||
3337 | if (MRI.isSSA() && (StartMI->getParent() != EndMI->getParent())) { | |||
3338 | MRI.clearKillFlags(RegNo); | |||
3339 | return; | |||
3340 | } | |||
3341 | ||||
3342 | // Instructions between [StartMI, EndMI] should be in same basic block. | |||
3343 | assert((StartMI->getParent() == EndMI->getParent()) &&(static_cast<void> (0)) | |||
3344 | "Instructions are not in same basic block")(static_cast<void> (0)); | |||
3345 | ||||
3346 | // If before RA, StartMI may be def through COPY, we need to adjust it to the | |||
3347 | // real def. See function getForwardingDefMI. | |||
3348 | if (MRI.isSSA()) { | |||
3349 | bool Reads, Writes; | |||
3350 | std::tie(Reads, Writes) = StartMI->readsWritesVirtualRegister(RegNo); | |||
3351 | if (!Reads && !Writes) { | |||
3352 | assert(Register::isVirtualRegister(RegNo) &&(static_cast<void> (0)) | |||
3353 | "Must be a virtual register")(static_cast<void> (0)); | |||
3354 | // Get real def and ignore copies. | |||
3355 | StartMI = MRI.getVRegDef(RegNo); | |||
3356 | } | |||
3357 | } | |||
3358 | ||||
3359 | bool IsKillSet = false; | |||
3360 | ||||
3361 | auto clearOperandKillInfo = [=] (MachineInstr &MI, unsigned Index) { | |||
3362 | MachineOperand &MO = MI.getOperand(Index); | |||
3363 | if (MO.isReg() && MO.isUse() && MO.isKill() && | |||
3364 | getRegisterInfo().regsOverlap(MO.getReg(), RegNo)) | |||
3365 | MO.setIsKill(false); | |||
3366 | }; | |||
3367 | ||||
3368 | // Set killed flag for EndMI. | |||
3369 | // No need to do anything if EndMI defines RegNo. | |||
3370 | int UseIndex = | |||
3371 | EndMI->findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo()); | |||
3372 | if (UseIndex != -1) { | |||
3373 | EndMI->getOperand(UseIndex).setIsKill(true); | |||
3374 | IsKillSet = true; | |||
3375 | // Clear killed flag for other EndMI operands related to RegNo. In some | |||
3376 | // upexpected cases, killed may be set multiple times for same register | |||
3377 | // operand in same MI. | |||
3378 | for (int i = 0, e = EndMI->getNumOperands(); i != e; ++i) | |||
3379 | if (i != UseIndex) | |||
3380 | clearOperandKillInfo(*EndMI, i); | |||
3381 | } | |||
3382 | ||||
3383 | // Walking the inst in reverse order (EndMI -> StartMI]. | |||
3384 | MachineBasicBlock::reverse_iterator It = *EndMI; | |||
3385 | MachineBasicBlock::reverse_iterator E = EndMI->getParent()->rend(); | |||
3386 | // EndMI has been handled above, skip it here. | |||
3387 | It++; | |||
3388 | MachineOperand *MO = nullptr; | |||
3389 | for (; It != E; ++It) { | |||
3390 | // Skip insturctions which could not be a def/use of RegNo. | |||
3391 | if (It->isDebugInstr() || It->isPosition()) | |||
3392 | continue; | |||
3393 | ||||
3394 | // Clear killed flag for all It operands related to RegNo. In some | |||
3395 | // upexpected cases, killed may be set multiple times for same register | |||
3396 | // operand in same MI. | |||
3397 | for (int i = 0, e = It->getNumOperands(); i != e; ++i) | |||
3398 | clearOperandKillInfo(*It, i); | |||
3399 | ||||
3400 | // If killed is not set, set killed for its last use or set dead for its def | |||
3401 | // if no use found. | |||
3402 | if (!IsKillSet) { | |||
3403 | if ((MO = It->findRegisterUseOperand(RegNo, false, &getRegisterInfo()))) { | |||
3404 | // Use found, set it killed. | |||
3405 | IsKillSet = true; | |||
3406 | MO->setIsKill(true); | |||
3407 | continue; | |||
3408 | } else if ((MO = It->findRegisterDefOperand(RegNo, false, true, | |||
3409 | &getRegisterInfo()))) { | |||
3410 | // No use found, set dead for its def. | |||
3411 | assert(&*It == StartMI && "No new def between StartMI and EndMI.")(static_cast<void> (0)); | |||
3412 | MO->setIsDead(true); | |||
3413 | break; | |||
3414 | } | |||
3415 | } | |||
3416 | ||||
3417 | if ((&*It) == StartMI) | |||
3418 | break; | |||
3419 | } | |||
3420 | // Ensure RegMo liveness is killed after EndMI. | |||
3421 | assert((IsKillSet || (MO && MO->isDead())) &&(static_cast<void> (0)) | |||
3422 | "RegNo should be killed or dead")(static_cast<void> (0)); | |||
3423 | } | |||
3424 | ||||
3425 | // This opt tries to convert the following imm form to an index form to save an | |||
3426 | // add for stack variables. | |||
3427 | // Return false if no such pattern found. | |||
3428 | // | |||
3429 | // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi | |||
3430 | // ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg | |||
3431 | // Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed) | |||
3432 | // | |||
3433 | // can be converted to: | |||
3434 | // | |||
3435 | // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm) | |||
3436 | // Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed) | |||
3437 | // | |||
3438 | // In order to eliminate ADD instr, make sure that: | |||
3439 | // 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in | |||
3440 | // new ADDI instr and ADDI can only take int16 Imm. | |||
3441 | // 2: ToBeChangedReg must be killed in ADD instr and there is no other use | |||
3442 | // between ADDI and ADD instr since its original def in ADDI will be changed | |||
3443 | // in new ADDI instr. And also there should be no new def for it between | |||
3444 | // ADD and Imm instr as ToBeChangedReg will be used in Index instr. | |||
3445 | // 3: ToBeDeletedReg must be killed in Imm instr and there is no other use | |||
3446 | // between ADD and Imm instr since ADD instr will be eliminated. | |||
3447 | // 4: ScaleReg must not be redefined between ADD and Imm instr since it will be | |||
3448 | // moved to Index instr. | |||
3449 | bool PPCInstrInfo::foldFrameOffset(MachineInstr &MI) const { | |||
3450 | MachineFunction *MF = MI.getParent()->getParent(); | |||
3451 | MachineRegisterInfo *MRI = &MF->getRegInfo(); | |||
3452 | bool PostRA = !MRI->isSSA(); | |||
3453 | // Do this opt after PEI which is after RA. The reason is stack slot expansion | |||
3454 | // in PEI may expose such opportunities since in PEI, stack slot offsets to | |||
3455 | // frame base(OffsetAddi) are determined. | |||
3456 | if (!PostRA) | |||
3457 | return false; | |||
3458 | unsigned ToBeDeletedReg = 0; | |||
3459 | int64_t OffsetImm = 0; | |||
3460 | unsigned XFormOpcode = 0; | |||
3461 | ImmInstrInfo III; | |||
3462 | ||||
3463 | // Check if Imm instr meets requirement. | |||
3464 | if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm, | |||
3465 | III)) | |||
3466 | return false; | |||
3467 | ||||
3468 | bool OtherIntermediateUse = false; | |||
3469 | MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse); | |||
3470 | ||||
3471 | // Exit if there is other use between ADD and Imm instr or no def found. | |||
3472 | if (OtherIntermediateUse || !ADDMI) | |||
3473 | return false; | |||
3474 | ||||
3475 | // Check if ADD instr meets requirement. | |||
3476 | if (!isADDInstrEligibleForFolding(*ADDMI)) | |||
3477 | return false; | |||
3478 | ||||
3479 | unsigned ScaleRegIdx = 0; | |||
3480 | int64_t OffsetAddi = 0; | |||
3481 | MachineInstr *ADDIMI = nullptr; | |||
3482 | ||||
3483 | // Check if there is a valid ToBeChangedReg in ADDMI. | |||
3484 | // 1: It must be killed. | |||
3485 | // 2: Its definition must be a valid ADDIMI. | |||
3486 | // 3: It must satify int16 offset requirement. | |||
3487 | if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm)) | |||
3488 | ScaleRegIdx = 2; | |||
3489 | else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm)) | |||
3490 | ScaleRegIdx = 1; | |||
3491 | else | |||
3492 | return false; | |||
3493 | ||||
3494 | assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.")(static_cast<void> (0)); | |||
3495 | unsigned ToBeChangedReg = ADDIMI->getOperand(0).getReg(); | |||
3496 | unsigned ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg(); | |||
3497 | auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start, | |||
3498 | MachineBasicBlock::iterator End) { | |||
3499 | for (auto It = ++Start; It != End; It++) | |||
3500 | if (It->modifiesRegister(Reg, &getRegisterInfo())) | |||
3501 | return true; | |||
3502 | return false; | |||
3503 | }; | |||
3504 | ||||
3505 | // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is | |||
3506 | // treated as special zero when ScaleReg is R0/X0 register. | |||
3507 | if (III.ZeroIsSpecialOrig == III.ImmOpNo && | |||
3508 | (ScaleReg == PPC::R0 || ScaleReg == PPC::X0)) | |||
3509 | return false; | |||
3510 | ||||
3511 | // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr | |||
3512 | // and Imm Instr. | |||
3513 | if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI)) | |||
3514 | return false; | |||
3515 | ||||
3516 | // Now start to do the transformation. | |||
3517 | LLVM_DEBUG(dbgs() << "Replace instruction: "do { } while (false) | |||
3518 | << "\n")do { } while (false); | |||
3519 | LLVM_DEBUG(ADDIMI->dump())do { } while (false); | |||
3520 | LLVM_DEBUG(ADDMI->dump())do { } while (false); | |||
3521 | LLVM_DEBUG(MI.dump())do { } while (false); | |||
3522 | LLVM_DEBUG(dbgs() << "with: "do { } while (false) | |||
3523 | << "\n")do { } while (false); | |||
3524 | ||||
3525 | // Update ADDI instr. | |||
3526 | ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm); | |||
3527 | ||||
3528 | // Update Imm instr. | |||
3529 | MI.setDesc(get(XFormOpcode)); | |||
3530 | MI.getOperand(III.ImmOpNo) | |||
3531 | .ChangeToRegister(ScaleReg, false, false, | |||
3532 | ADDMI->getOperand(ScaleRegIdx).isKill()); | |||
3533 | ||||
3534 | MI.getOperand(III.OpNoForForwarding) | |||
3535 | .ChangeToRegister(ToBeChangedReg, false, false, true); | |||
3536 | ||||
3537 | // Eliminate ADD instr. | |||
3538 | ADDMI->eraseFromParent(); | |||
3539 | ||||
3540 | LLVM_DEBUG(ADDIMI->dump())do { } while (false); | |||
3541 | LLVM_DEBUG(MI.dump())do { } while (false); | |||
3542 | ||||
3543 | return true; | |||
3544 | } | |||
3545 | ||||
3546 | bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, | |||
3547 | int64_t &Imm) const { | |||
3548 | unsigned Opc = ADDIMI.getOpcode(); | |||
3549 | ||||
3550 | // Exit if the instruction is not ADDI. | |||
3551 | if (Opc != PPC::ADDI && Opc != PPC::ADDI8) | |||
3552 | return false; | |||
3553 | ||||
3554 | // The operand may not necessarily be an immediate - it could be a relocation. | |||
3555 | if (!ADDIMI.getOperand(2).isImm()) | |||
3556 | return false; | |||
3557 | ||||
3558 | Imm = ADDIMI.getOperand(2).getImm(); | |||
3559 | ||||
3560 | return true; | |||
3561 | } | |||
3562 | ||||
3563 | bool PPCInstrInfo::isADDInstrEligibleForFolding(MachineInstr &ADDMI) const { | |||
3564 | unsigned Opc = ADDMI.getOpcode(); | |||
3565 | ||||
3566 | // Exit if the instruction is not ADD. | |||
3567 | return Opc == PPC::ADD4 || Opc == PPC::ADD8; | |||
3568 | } | |||
3569 | ||||
3570 | bool PPCInstrInfo::isImmInstrEligibleForFolding(MachineInstr &MI, | |||
3571 | unsigned &ToBeDeletedReg, | |||
3572 | unsigned &XFormOpcode, | |||
3573 | int64_t &OffsetImm, | |||
3574 | ImmInstrInfo &III) const { | |||
3575 | // Only handle load/store. | |||
3576 | if (!MI.mayLoadOrStore()) | |||
3577 | return false; | |||
3578 | ||||
3579 | unsigned Opc = MI.getOpcode(); | |||
3580 | ||||
3581 | XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc); | |||
3582 | ||||
3583 | // Exit if instruction has no index form. | |||
3584 | if (XFormOpcode == PPC::INSTRUCTION_LIST_END) | |||
3585 | return false; | |||
3586 | ||||
3587 | // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap. | |||
3588 | if (!instrHasImmForm(XFormOpcode, isVFRegister(MI.getOperand(0).getReg()), | |||
3589 | III, true)) | |||
3590 | return false; | |||
3591 | ||||
3592 | if (!III.IsSummingOperands) | |||
3593 | return false; | |||
3594 | ||||
3595 | MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo); | |||
3596 | MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding); | |||
3597 | // Only support imm operands, not relocation slots or others. | |||
3598 | if (!ImmOperand.isImm()) | |||
3599 | return false; | |||
3600 | ||||
3601 | assert(RegOperand.isReg() && "Instruction format is not right")(static_cast<void> (0)); | |||
3602 | ||||
3603 | // There are other use for ToBeDeletedReg after Imm instr, can not delete it. | |||
3604 | if (!RegOperand.isKill()) | |||
3605 | return false; | |||
3606 | ||||
3607 | ToBeDeletedReg = RegOperand.getReg(); | |||
3608 | OffsetImm = ImmOperand.getImm(); | |||
3609 | ||||
3610 | return true; | |||
3611 | } | |||
3612 | ||||
3613 | bool PPCInstrInfo::isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index, | |||
3614 | MachineInstr *&ADDIMI, | |||
3615 | int64_t &OffsetAddi, | |||
3616 | int64_t OffsetImm) const { | |||
3617 | assert((Index == 1 || Index == 2) && "Invalid operand index for add.")(static_cast<void> (0)); | |||
3618 | MachineOperand &MO = ADDMI->getOperand(Index); | |||
3619 | ||||
3620 | if (!MO.isKill()) | |||
3621 | return false; | |||
3622 | ||||
3623 | bool OtherIntermediateUse = false; | |||
3624 | ||||
3625 | ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse); | |||
3626 | // Currently handle only one "add + Imminstr" pair case, exit if other | |||
3627 | // intermediate use for ToBeChangedReg found. | |||
3628 | // TODO: handle the cases where there are other "add + Imminstr" pairs | |||
3629 | // with same offset in Imminstr which is like: | |||
3630 | // | |||
3631 | // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi | |||
3632 | // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1 | |||
3633 | // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed) | |||
3634 | // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2 | |||
3635 | // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed) | |||
3636 | // | |||
3637 | // can be converted to: | |||
3638 | // | |||
3639 | // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, | |||
3640 | // (OffsetAddi + OffsetImm) | |||
3641 | // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg | |||
3642 | // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed) | |||
3643 | ||||
3644 | if (OtherIntermediateUse || !ADDIMI) | |||
3645 | return false; | |||
3646 | // Check if ADDI instr meets requirement. | |||
3647 | if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi)) | |||
3648 | return false; | |||
3649 | ||||
3650 | if (isInt<16>(OffsetAddi + OffsetImm)) | |||
3651 | return true; | |||
3652 | return false; | |||
3653 | } | |||
3654 | ||||
3655 | // If this instruction has an immediate form and one of its operands is a | |||
3656 | // result of a load-immediate or an add-immediate, convert it to | |||
3657 | // the immediate form if the constant is in range. | |||
3658 | bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, | |||
3659 | MachineInstr **KilledDef) const { | |||
3660 | MachineFunction *MF = MI.getParent()->getParent(); | |||
3661 | MachineRegisterInfo *MRI = &MF->getRegInfo(); | |||
3662 | bool PostRA = !MRI->isSSA(); | |||
3663 | bool SeenIntermediateUse = true; | |||
3664 | unsigned ForwardingOperand = ~0U; | |||
3665 | MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand, | |||
3666 | SeenIntermediateUse); | |||
3667 | if (!DefMI) | |||
3668 | return false; | |||
3669 | assert(ForwardingOperand < MI.getNumOperands() &&(static_cast<void> (0)) | |||
3670 | "The forwarding operand needs to be valid at this point")(static_cast<void> (0)); | |||
3671 | bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill(); | |||
3672 | bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled; | |||
3673 | if (KilledDef && KillFwdDefMI) | |||
3674 | *KilledDef = DefMI; | |||
3675 | ||||
3676 | // If this is a imm instruction and its register operands is produced by ADDI, | |||
3677 | // put the imm into imm inst directly. | |||
3678 | if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) != | |||
3679 | PPC::INSTRUCTION_LIST_END && | |||
3680 | transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand)) | |||
3681 | return true; | |||
3682 | ||||
3683 | ImmInstrInfo III; | |||
3684 | bool IsVFReg = MI.getOperand(0).isReg() | |||
3685 | ? isVFRegister(MI.getOperand(0).getReg()) | |||
3686 | : false; | |||
3687 | bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA); | |||
3688 | // If this is a reg+reg instruction that has a reg+imm form, | |||
3689 | // and one of the operands is produced by an add-immediate, | |||
3690 | // try to convert it. | |||
3691 | if (HasImmForm && | |||
3692 | transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI, | |||
3693 | KillFwdDefMI)) | |||
3694 | return true; | |||
3695 | ||||
3696 | // If this is a reg+reg instruction that has a reg+imm form, | |||
3697 | // and one of the operands is produced by LI, convert it now. | |||
3698 | if (HasImmForm && | |||
3699 | transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI)) | |||
3700 | return true; | |||
3701 | ||||
3702 | // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI | |||
3703 | // can be simpified to LI. | |||
3704 | if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef)) | |||
3705 | return true; | |||
3706 | ||||
3707 | return false; | |||
3708 | } | |||
3709 | ||||
3710 | bool PPCInstrInfo::combineRLWINM(MachineInstr &MI, | |||
3711 | MachineInstr **ToErase) const { | |||
3712 | MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); | |||
3713 | unsigned FoldingReg = MI.getOperand(1).getReg(); | |||
3714 | if (!Register::isVirtualRegister(FoldingReg)) | |||
3715 | return false; | |||
3716 | MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg); | |||
3717 | if (SrcMI->getOpcode() != PPC::RLWINM && | |||
3718 | SrcMI->getOpcode() != PPC::RLWINM_rec && | |||
3719 | SrcMI->getOpcode() != PPC::RLWINM8 && | |||
3720 | SrcMI->getOpcode() != PPC::RLWINM8_rec) | |||
3721 | return false; | |||
3722 | assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&(static_cast<void> (0)) | |||
3723 | MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&(static_cast<void> (0)) | |||
3724 | SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&(static_cast<void> (0)) | |||
3725 | "Invalid PPC::RLWINM Instruction!")(static_cast<void> (0)); | |||
3726 | uint64_t SHSrc = SrcMI->getOperand(2).getImm(); | |||
3727 | uint64_t SHMI = MI.getOperand(2).getImm(); | |||
3728 | uint64_t MBSrc = SrcMI->getOperand(3).getImm(); | |||
3729 | uint64_t MBMI = MI.getOperand(3).getImm(); | |||
3730 | uint64_t MESrc = SrcMI->getOperand(4).getImm(); | |||
3731 | uint64_t MEMI = MI.getOperand(4).getImm(); | |||
3732 | ||||
3733 | assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&(static_cast<void> (0)) | |||
3734 | "Invalid PPC::RLWINM Instruction!")(static_cast<void> (0)); | |||
3735 | // If MBMI is bigger than MEMI, we always can not get run of ones. | |||
3736 | // RotatedSrcMask non-wrap: | |||
3737 | // 0........31|32........63 | |||
3738 | // RotatedSrcMask: B---E B---E | |||
3739 | // MaskMI: -----------|--E B------ | |||
3740 | // Result: ----- --- (Bad candidate) | |||
3741 | // | |||
3742 | // RotatedSrcMask wrap: | |||
3743 | // 0........31|32........63 | |||
3744 | // RotatedSrcMask: --E B----|--E B---- | |||
3745 | // MaskMI: -----------|--E B------ | |||
3746 | // Result: --- -----|--- ----- (Bad candidate) | |||
3747 | // | |||
3748 | // One special case is RotatedSrcMask is a full set mask. | |||
3749 | // RotatedSrcMask full: | |||
3750 | // 0........31|32........63 | |||
3751 | // RotatedSrcMask: ------EB---|-------EB--- | |||
3752 | // MaskMI: -----------|--E B------ | |||
3753 | // Result: -----------|--- ------- (Good candidate) | |||
3754 | ||||
3755 | // Mark special case. | |||
3756 | bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31); | |||
3757 | ||||
3758 | // For other MBMI > MEMI cases, just return. | |||
3759 | if ((MBMI > MEMI) && !SrcMaskFull) | |||
3760 | return false; | |||
3761 | ||||
3762 | // Handle MBMI <= MEMI cases. | |||
3763 | APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI); | |||
3764 | // In MI, we only need low 32 bits of SrcMI, just consider about low 32 | |||
3765 | // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0, | |||
3766 | // while in PowerPC ISA, lowerest bit is at index 63. | |||
3767 | APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc); | |||
3768 | ||||
3769 | APInt RotatedSrcMask = MaskSrc.rotl(SHMI); | |||
3770 | APInt FinalMask = RotatedSrcMask & MaskMI; | |||
3771 | uint32_t NewMB, NewME; | |||
3772 | bool Simplified = false; | |||
3773 | ||||
3774 | // If final mask is 0, MI result should be 0 too. | |||
3775 | if (FinalMask.isNullValue()) { | |||
3776 | bool Is64Bit = | |||
3777 | (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec); | |||
3778 | Simplified = true; | |||
3779 | LLVM_DEBUG(dbgs() << "Replace Instr: ")do { } while (false); | |||
3780 | LLVM_DEBUG(MI.dump())do { } while (false); | |||
3781 | ||||
3782 | if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) { | |||
3783 | // Replace MI with "LI 0" | |||
3784 | MI.RemoveOperand(4); | |||
3785 | MI.RemoveOperand(3); | |||
3786 | MI.RemoveOperand(2); | |||
3787 | MI.getOperand(1).ChangeToImmediate(0); | |||
3788 | MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI)); | |||
3789 | } else { | |||
3790 | // Replace MI with "ANDI_rec reg, 0" | |||
3791 | MI.RemoveOperand(4); | |||
3792 | MI.RemoveOperand(3); | |||
3793 | MI.getOperand(2).setImm(0); | |||
3794 | MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec)); | |||
3795 | MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); | |||
3796 | if (SrcMI->getOperand(1).isKill()) { | |||
3797 | MI.getOperand(1).setIsKill(true); | |||
3798 | SrcMI->getOperand(1).setIsKill(false); | |||
3799 | } else | |||
3800 | // About to replace MI.getOperand(1), clear its kill flag. | |||
3801 | MI.getOperand(1).setIsKill(false); | |||
3802 | } | |||
3803 | ||||
3804 | LLVM_DEBUG(dbgs() << "With: ")do { } while (false); | |||
3805 | LLVM_DEBUG(MI.dump())do { } while (false); | |||
3806 | ||||
3807 | } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) && | |||
3808 | NewMB <= NewME) || | |||
3809 | SrcMaskFull) { | |||
3810 | // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger | |||
3811 | // than NewME. Otherwise we get a 64 bit value after folding, but MI | |||
3812 | // return a 32 bit value. | |||
3813 | Simplified = true; | |||
3814 | LLVM_DEBUG(dbgs() << "Converting Instr: ")do { } while (false); | |||
3815 | LLVM_DEBUG(MI.dump())do { } while (false); | |||
3816 | ||||
3817 | uint16_t NewSH = (SHSrc + SHMI) % 32; | |||
3818 | MI.getOperand(2).setImm(NewSH); | |||
3819 | // If SrcMI mask is full, no need to update MBMI and MEMI. | |||
3820 | if (!SrcMaskFull) { | |||
3821 | MI.getOperand(3).setImm(NewMB); | |||
3822 | MI.getOperand(4).setImm(NewME); | |||
3823 | } | |||
3824 | MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); | |||
3825 | if (SrcMI->getOperand(1).isKill()) { | |||
3826 | MI.getOperand(1).setIsKill(true); | |||
3827 | SrcMI->getOperand(1).setIsKill(false); | |||
3828 | } else | |||
3829 | // About to replace MI.getOperand(1), clear its kill flag. | |||
3830 | MI.getOperand(1).setIsKill(false); | |||
3831 | ||||
3832 | LLVM_DEBUG(dbgs() << "To: ")do { } while (false); | |||
3833 | LLVM_DEBUG(MI.dump())do { } while (false); | |||
3834 | } | |||
3835 | if (Simplified & MRI->use_nodbg_empty(FoldingReg) && | |||
3836 | !SrcMI->hasImplicitDef()) { | |||
3837 | // If FoldingReg has no non-debug use and it has no implicit def (it | |||
3838 | // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI. | |||
3839 | // Otherwise keep it. | |||
3840 | *ToErase = SrcMI; | |||
3841 | LLVM_DEBUG(dbgs() << "Delete dead instruction: ")do { } while (false); | |||
3842 | LLVM_DEBUG(SrcMI->dump())do { } while (false); | |||
3843 | } | |||
3844 | return Simplified; | |||
3845 | } | |||
3846 | ||||
3847 | bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg, | |||
3848 | ImmInstrInfo &III, bool PostRA) const { | |||
3849 | // The vast majority of the instructions would need their operand 2 replaced | |||
3850 | // with an immediate when switching to the reg+imm form. A marked exception | |||
3851 | // are the update form loads/stores for which a constant operand 2 would need | |||
3852 | // to turn into a displacement and move operand 1 to the operand 2 position. | |||
3853 | III.ImmOpNo = 2; | |||
3854 | III.OpNoForForwarding = 2; | |||
3855 | III.ImmWidth = 16; | |||
3856 | III.ImmMustBeMultipleOf = 1; | |||
3857 | III.TruncateImmTo = 0; | |||
3858 | III.IsSummingOperands = false; | |||
3859 | switch (Opc) { | |||
3860 | default: return false; | |||
3861 | case PPC::ADD4: | |||
3862 | case PPC::ADD8: | |||
3863 | III.SignedImm = true; | |||
3864 | III.ZeroIsSpecialOrig = 0; | |||
3865 | III.ZeroIsSpecialNew = 1; | |||
3866 | III.IsCommutative = true; | |||
3867 | III.IsSummingOperands = true; | |||
3868 | III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8; | |||
3869 | break; | |||
3870 | case PPC::ADDC: | |||
3871 | case PPC::ADDC8: | |||
3872 | III.SignedImm = true; | |||
3873 | III.ZeroIsSpecialOrig = 0; | |||
3874 | III.ZeroIsSpecialNew = 0; | |||
3875 | III.IsCommutative = true; | |||
3876 | III.IsSummingOperands = true; | |||
3877 | III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8; | |||
3878 | break; | |||
3879 | case PPC::ADDC_rec: | |||
3880 | III.SignedImm = true; | |||
3881 | III.ZeroIsSpecialOrig = 0; | |||
3882 | III.ZeroIsSpecialNew = 0; | |||
3883 | III.IsCommutative = true; | |||
3884 | III.IsSummingOperands = true; | |||
3885 | III.ImmOpcode = PPC::ADDIC_rec; | |||
3886 | break; | |||
3887 | case PPC::SUBFC: | |||
3888 | case PPC::SUBFC8: | |||
3889 | III.SignedImm = true; | |||
3890 | III.ZeroIsSpecialOrig = 0; | |||
3891 | III.ZeroIsSpecialNew = 0; | |||
3892 | III.IsCommutative = false; | |||
3893 | III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8; | |||
3894 | break; | |||
3895 | case PPC::CMPW: | |||
3896 | case PPC::CMPD: | |||
3897 | III.SignedImm = true; | |||
3898 | III.ZeroIsSpecialOrig = 0; | |||
3899 | III.ZeroIsSpecialNew = 0; | |||
3900 | III.IsCommutative = false; | |||
3901 | III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI; | |||
3902 | break; | |||
3903 | case PPC::CMPLW: | |||
3904 | case PPC::CMPLD: | |||
3905 | III.SignedImm = false; | |||
3906 | III.ZeroIsSpecialOrig = 0; | |||
3907 | III.ZeroIsSpecialNew = 0; | |||
3908 | III.IsCommutative = false; | |||
3909 | III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI; | |||
3910 | break; | |||
3911 | case PPC::AND_rec: | |||
3912 | case PPC::AND8_rec: | |||
3913 | case PPC::OR: | |||
3914 | case PPC::OR8: | |||
3915 | case PPC::XOR: | |||
3916 | case PPC::XOR8: | |||
3917 | III.SignedImm = false; | |||
3918 | III.ZeroIsSpecialOrig = 0; | |||
3919 | III.ZeroIsSpecialNew = 0; | |||
3920 | III.IsCommutative = true; | |||
3921 | switch(Opc) { | |||
3922 | default: llvm_unreachable("Unknown opcode")__builtin_unreachable(); | |||
3923 | case PPC::AND_rec: | |||
3924 | III.ImmOpcode = PPC::ANDI_rec; | |||
3925 | break; | |||
3926 | case PPC::AND8_rec: | |||
3927 | III.ImmOpcode = PPC::ANDI8_rec; | |||
3928 | break; | |||
3929 | case PPC::OR: III.ImmOpcode = PPC::ORI; break; | |||
3930 | case PPC::OR8: III.ImmOpcode = PPC::ORI8; break; | |||
3931 | case PPC::XOR: III.ImmOpcode = PPC::XORI; break; | |||
3932 | case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break; | |||
3933 | } | |||
3934 | break; | |||
3935 | case PPC::RLWNM: | |||
3936 | case PPC::RLWNM8: | |||
3937 | case PPC::RLWNM_rec: | |||
3938 | case PPC::RLWNM8_rec: | |||
3939 | case PPC::SLW: | |||
3940 | case PPC::SLW8: | |||
3941 | case PPC::SLW_rec: | |||
3942 | case PPC::SLW8_rec: | |||
3943 | case PPC::SRW: | |||
3944 | case PPC::SRW8: | |||
3945 | case PPC::SRW_rec: | |||
3946 | case PPC::SRW8_rec: | |||
3947 | case PPC::SRAW: | |||
3948 | case PPC::SRAW_rec: | |||
3949 | III.SignedImm = false; | |||
3950 | III.ZeroIsSpecialOrig = 0; | |||
3951 | III.ZeroIsSpecialNew = 0; | |||
3952 | III.IsCommutative = false; | |||
3953 | // This isn't actually true, but the instructions ignore any of the | |||
3954 | // upper bits, so any immediate loaded with an LI is acceptable. | |||
3955 | // This does not apply to shift right algebraic because a value | |||
3956 | // out of range will produce a -1/0. | |||
3957 | III.ImmWidth = 16; | |||
3958 | if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec || | |||
3959 | Opc == PPC::RLWNM8_rec) | |||
3960 | III.TruncateImmTo = 5; | |||
3961 | else | |||
3962 | III.TruncateImmTo = 6; | |||
3963 | switch(Opc) { | |||
3964 | default: llvm_unreachable("Unknown opcode")__builtin_unreachable(); | |||
3965 | case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break; | |||
3966 | case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break; | |||
3967 | case PPC::RLWNM_rec: | |||
3968 | III.ImmOpcode = PPC::RLWINM_rec; | |||
3969 | break; | |||
3970 | case PPC::RLWNM8_rec: | |||
3971 | III.ImmOpcode = PPC::RLWINM8_rec; | |||
3972 | break; | |||
3973 | case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break; | |||
3974 | case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break; | |||
3975 | case PPC::SLW_rec: | |||
3976 | III.ImmOpcode = PPC::RLWINM_rec; | |||
3977 | break; | |||
3978 | case PPC::SLW8_rec: | |||
3979 | III.ImmOpcode = PPC::RLWINM8_rec; | |||
3980 | break; | |||
3981 | case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break; | |||
3982 | case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break; | |||
3983 | case PPC::SRW_rec: | |||
3984 | III.ImmOpcode = PPC::RLWINM_rec; | |||
3985 | break; | |||
3986 | case PPC::SRW8_rec: | |||
3987 | III.ImmOpcode = PPC::RLWINM8_rec; | |||
3988 | break; | |||
3989 | case PPC::SRAW: | |||
3990 | III.ImmWidth = 5; | |||
3991 | III.TruncateImmTo = 0; | |||
3992 | III.ImmOpcode = PPC::SRAWI; | |||
3993 | break; | |||
3994 | case PPC::SRAW_rec: | |||
3995 | III.ImmWidth = 5; | |||
3996 | III.TruncateImmTo = 0; | |||
3997 | III.ImmOpcode = PPC::SRAWI_rec; | |||
3998 | break; | |||
3999 | } | |||
4000 | break; | |||
4001 | case PPC::RLDCL: | |||
4002 | case PPC::RLDCL_rec: | |||
4003 | case PPC::RLDCR: | |||
4004 | case PPC::RLDCR_rec: | |||
4005 | case PPC::SLD: | |||
4006 | case PPC::SLD_rec: | |||
4007 | case PPC::SRD: | |||
4008 | case PPC::SRD_rec: | |||
4009 | case PPC::SRAD: | |||
4010 | case PPC::SRAD_rec: | |||
4011 | III.SignedImm = false; | |||
4012 | III.ZeroIsSpecialOrig = 0; | |||
4013 | III.ZeroIsSpecialNew = 0; | |||
4014 | III.IsCommutative = false; | |||
4015 | // This isn't actually true, but the instructions ignore any of the | |||
4016 | // upper bits, so any immediate loaded with an LI is acceptable. | |||
4017 | // This does not apply to shift right algebraic because a value | |||
4018 | // out of range will produce a -1/0. | |||
4019 | III.ImmWidth = 16; | |||
4020 | if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR || | |||
4021 | Opc == PPC::RLDCR_rec) | |||
4022 | III.TruncateImmTo = 6; | |||
4023 | else | |||
4024 | III.TruncateImmTo = 7; | |||
4025 | switch(Opc) { | |||
4026 | default: llvm_unreachable("Unknown opcode")__builtin_unreachable(); | |||
4027 | case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break; | |||
4028 | case PPC::RLDCL_rec: | |||
4029 | III.ImmOpcode = PPC::RLDICL_rec; | |||
4030 | break; | |||
4031 | case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break; | |||
4032 | case PPC::RLDCR_rec: | |||
4033 | III.ImmOpcode = PPC::RLDICR_rec; | |||
4034 | break; | |||
4035 | case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break; | |||
4036 | case PPC::SLD_rec: | |||
4037 | III.ImmOpcode = PPC::RLDICR_rec; | |||
4038 | break; | |||
4039 | case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break; | |||
4040 | case PPC::SRD_rec: | |||
4041 | III.ImmOpcode = PPC::RLDICL_rec; | |||
4042 | break; | |||
4043 | case PPC::SRAD: | |||
4044 | III.ImmWidth = 6; | |||
4045 | III.TruncateImmTo = 0; | |||
4046 | III.ImmOpcode = PPC::SRADI; | |||
4047 | break; | |||
4048 | case PPC::SRAD_rec: | |||
4049 | III.ImmWidth = 6; | |||
4050 | III.TruncateImmTo = 0; | |||
4051 | III.ImmOpcode = PPC::SRADI_rec; | |||
4052 | break; | |||
4053 | } | |||
4054 | break; | |||
4055 | // Loads and stores: | |||
4056 | case PPC::LBZX: | |||
4057 | case PPC::LBZX8: | |||
4058 | case PPC::LHZX: | |||
4059 | case PPC::LHZX8: | |||
4060 | case PPC::LHAX: | |||
4061 | case PPC::LHAX8: | |||
4062 | case PPC::LWZX: | |||
4063 | case PPC::LWZX8: | |||
4064 | case PPC::LWAX: | |||
4065 | case PPC::LDX: | |||
4066 | case PPC::LFSX: | |||
4067 | case PPC::LFDX: | |||
4068 | case PPC::STBX: | |||
4069 | case PPC::STBX8: | |||
4070 | case PPC::STHX: | |||
4071 | case PPC::STHX8: | |||
4072 | case PPC::STWX: | |||
4073 | case PPC::STWX8: | |||
4074 | case PPC::STDX: | |||
4075 | case PPC::STFSX: | |||
4076 | case PPC::STFDX: | |||
4077 | III.SignedImm = true; | |||
4078 | III.ZeroIsSpecialOrig = 1; | |||
4079 | III.ZeroIsSpecialNew = 2; | |||
4080 | III.IsCommutative = true; | |||
4081 | III.IsSummingOperands = true; | |||
4082 | III.ImmOpNo = 1; | |||
4083 | III.OpNoForForwarding = 2; | |||
4084 | switch(Opc) { | |||
4085 | default: llvm_unreachable("Unknown opcode")__builtin_unreachable(); | |||
4086 | case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break; | |||
4087 | case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break; | |||
4088 | case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break; | |||
4089 | case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break; | |||
4090 | case PPC::LHAX: III.ImmOpcode = PPC::LHA; break; | |||
4091 | case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break; | |||
4092 | case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break; | |||
4093 | case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break; | |||
4094 | case PPC::LWAX: | |||
4095 | III.ImmOpcode = PPC::LWA; | |||
4096 | III.ImmMustBeMultipleOf = 4; | |||
4097 | break; | |||
4098 | case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break; | |||
4099 | case PPC::LFSX: III.ImmOpcode = PPC::LFS; break; | |||
4100 | case PPC::LFDX: III.ImmOpcode = PPC::LFD; break; | |||
4101 | case PPC::STBX: III.ImmOpcode = PPC::STB; break; | |||
4102 | case PPC::STBX8: III.ImmOpcode = PPC::STB8; break; | |||
4103 | case PPC::STHX: III.ImmOpcode = PPC::STH; break; | |||
4104 | case PPC::STHX8: III.ImmOpcode = PPC::STH8; break; | |||
4105 | case PPC::STWX: III.ImmOpcode = PPC::STW; break; | |||
4106 | case PPC::STWX8: III.ImmOpcode = PPC::STW8; break; | |||
4107 | case PPC::STDX: | |||
4108 | III.ImmOpcode = PPC::STD; | |||
4109 | III.ImmMustBeMultipleOf = 4; | |||
4110 | break; | |||
4111 | case PPC::STFSX: III.ImmOpcode = PPC::STFS; break; | |||
4112 | case PPC::STFDX: III.ImmOpcode = PPC::STFD; break; | |||
4113 | } | |||
4114 | break; | |||
4115 | case PPC::LBZUX: | |||
4116 | case PPC::LBZUX8: | |||
4117 | case PPC::LHZUX: | |||
4118 | case PPC::LHZUX8: | |||
4119 | case PPC::LHAUX: | |||
4120 | case PPC::LHAUX8: | |||
4121 | case PPC::LWZUX: | |||
4122 | case PPC::LWZUX8: | |||
4123 | case PPC::LDUX: | |||
4124 | case PPC::LFSUX: | |||
4125 | case PPC::LFDUX: | |||
4126 | case PPC::STBUX: | |||
4127 | case PPC::STBUX8: | |||
4128 | case PPC::STHUX: | |||
4129 | case PPC::STHUX8: | |||
4130 | case PPC::STWUX: | |||
4131 | case PPC::STWUX8: | |||
4132 | case PPC::STDUX: | |||
4133 | case PPC::STFSUX: | |||
4134 | case PPC::STFDUX: | |||
4135 | III.SignedImm = true; | |||
4136 | III.ZeroIsSpecialOrig = 2; | |||
4137 | III.ZeroIsSpecialNew = 3; | |||
4138 | III.IsCommutative = false; | |||
4139 | III.IsSummingOperands = true; | |||
4140 | III.ImmOpNo = 2; | |||
4141 | III.OpNoForForwarding = 3; | |||
4142 | switch(Opc) { | |||
4143 | default: llvm_unreachable("Unknown opcode")__builtin_unreachable(); | |||
4144 | case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break; | |||
4145 | case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break; | |||
4146 | case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break; | |||
4147 | case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break; | |||
4148 | case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break; | |||
4149 | case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break; | |||
4150 | case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break; | |||
4151 | case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break; | |||
4152 | case PPC::LDUX: | |||
4153 | III.ImmOpcode = PPC::LDU; | |||
4154 | III.ImmMustBeMultipleOf = 4; | |||
4155 | break; | |||
4156 | case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break; | |||
4157 | case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break; | |||
4158 | case PPC::STBUX: III.ImmOpcode = PPC::STBU; break; | |||
4159 | case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break; | |||
4160 | case PPC::STHUX: III.ImmOpcode = PPC::STHU; break; | |||
4161 | case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break; | |||
4162 | case PPC::STWUX: III.ImmOpcode = PPC::STWU; break; | |||
4163 | case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break; | |||
4164 | case PPC::STDUX: | |||
4165 | III.ImmOpcode = PPC::STDU; | |||
4166 | III.ImmMustBeMultipleOf = 4; | |||
4167 | break; | |||
4168 | case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break; | |||
4169 | case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break; | |||
4170 | } | |||
4171 | break; | |||
4172 | // Power9 and up only. For some of these, the X-Form version has access to all | |||
4173 | // 64 VSR's whereas the D-Form only has access to the VR's. We replace those | |||
4174 | // with pseudo-ops pre-ra and for post-ra, we check that the register loaded | |||
4175 | // into or stored from is one of the VR registers. | |||
4176 | case PPC::LXVX: | |||
4177 | case PPC::LXSSPX: | |||
4178 | case PPC::LXSDX: | |||
4179 | case PPC::STXVX: | |||
4180 | case PPC::STXSSPX: | |||
4181 | case PPC::STXSDX: | |||
4182 | case PPC::XFLOADf32: | |||
4183 | case PPC::XFLOADf64: | |||
4184 | case PPC::XFSTOREf32: | |||
4185 | case PPC::XFSTOREf64: | |||
4186 | if (!Subtarget.hasP9Vector()) | |||
4187 | return false; | |||
4188 | III.SignedImm = true; | |||
4189 | III.ZeroIsSpecialOrig = 1; | |||
4190 | III.ZeroIsSpecialNew = 2; | |||
4191 | III.IsCommutative = true; | |||
4192 | III.IsSummingOperands = true; | |||
4193 | III.ImmOpNo = 1; | |||
4194 | III.OpNoForForwarding = 2; | |||
4195 | III.ImmMustBeMultipleOf = 4; | |||
4196 | switch(Opc) { | |||
4197 | default: llvm_unreachable("Unknown opcode")__builtin_unreachable(); | |||
4198 | case PPC::LXVX: | |||
4199 | III.ImmOpcode = PPC::LXV; | |||
4200 | III.ImmMustBeMultipleOf = 16; | |||
4201 | break; | |||
4202 | case PPC::LXSSPX: | |||
4203 | if (PostRA) { | |||
4204 | if (IsVFReg) | |||
4205 | III.ImmOpcode = PPC::LXSSP; | |||
4206 | else { | |||
4207 | III.ImmOpcode = PPC::LFS; | |||
4208 | III.ImmMustBeMultipleOf = 1; | |||
4209 | } | |||
4210 | break; | |||
4211 | } | |||
4212 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
4213 | case PPC::XFLOADf32: | |||
4214 | III.ImmOpcode = PPC::DFLOADf32; | |||
4215 | break; | |||
4216 | case PPC::LXSDX: | |||
4217 | if (PostRA) { | |||
4218 | if (IsVFReg) | |||
4219 | III.ImmOpcode = PPC::LXSD; | |||
4220 | else { | |||
4221 | III.ImmOpcode = PPC::LFD; | |||
4222 | III.ImmMustBeMultipleOf = 1; | |||
4223 | } | |||
4224 | break; | |||
4225 | } | |||
4226 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
4227 | case PPC::XFLOADf64: | |||
4228 | III.ImmOpcode = PPC::DFLOADf64; | |||
4229 | break; | |||
4230 | case PPC::STXVX: | |||
4231 | III.ImmOpcode = PPC::STXV; | |||
4232 | III.ImmMustBeMultipleOf = 16; | |||
4233 | break; | |||
4234 | case PPC::STXSSPX: | |||
4235 | if (PostRA) { | |||
4236 | if (IsVFReg) | |||
4237 | III.ImmOpcode = PPC::STXSSP; | |||
4238 | else { | |||
4239 | III.ImmOpcode = PPC::STFS; | |||
4240 | III.ImmMustBeMultipleOf = 1; | |||
4241 | } | |||
4242 | break; | |||
4243 | } | |||
4244 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
4245 | case PPC::XFSTOREf32: | |||
4246 | III.ImmOpcode = PPC::DFSTOREf32; | |||
4247 | break; | |||
4248 | case PPC::STXSDX: | |||
4249 | if (PostRA) { | |||
4250 | if (IsVFReg) | |||
4251 | III.ImmOpcode = PPC::STXSD; | |||
4252 | else { | |||
4253 | III.ImmOpcode = PPC::STFD; | |||
4254 | III.ImmMustBeMultipleOf = 1; | |||
4255 | } | |||
4256 | break; | |||
4257 | } | |||
4258 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | |||
4259 | case PPC::XFSTOREf64: | |||
4260 | III.ImmOpcode = PPC::DFSTOREf64; | |||
4261 | break; | |||
4262 | } | |||
4263 | break; | |||
4264 | } | |||
4265 | return true; | |||
4266 | } | |||
4267 | ||||
4268 | // Utility function for swaping two arbitrary operands of an instruction. | |||
4269 | static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) { | |||
4270 | assert(Op1 != Op2 && "Cannot swap operand with itself.")(static_cast<void> (0)); | |||
4271 | ||||
4272 | unsigned MaxOp = std::max(Op1, Op2); | |||
4273 | unsigned MinOp = std::min(Op1, Op2); | |||
4274 | MachineOperand MOp1 = MI.getOperand(MinOp); | |||
4275 | MachineOperand MOp2 = MI.getOperand(MaxOp); | |||
4276 | MI.RemoveOperand(std::max(Op1, Op2)); | |||
4277 | MI.RemoveOperand(std::min(Op1, Op2)); | |||
4278 | ||||
4279 | // If the operands we are swapping are the two at the end (the common case) | |||
4280 | // we can just remove both and add them in the opposite order. | |||
4281 | if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) { | |||
4282 | MI.addOperand(MOp2); | |||
4283 | MI.addOperand(MOp1); | |||
4284 | } else { | |||
4285 | // Store all operands in a temporary vector, remove them and re-add in the | |||
4286 | // right order. | |||
4287 | SmallVector<MachineOperand, 2> MOps; | |||
4288 | unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops. | |||
4289 | for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) { | |||
4290 | MOps.push_back(MI.getOperand(i)); | |||
4291 | MI.RemoveOperand(i); | |||
4292 | } | |||
4293 | // MOp2 needs to be added next. | |||
4294 | MI.addOperand(MOp2); | |||
4295 | // Now add the rest. | |||
4296 | for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) { | |||
4297 | if (i == MaxOp) | |||
4298 | MI.addOperand(MOp1); | |||
4299 | else { | |||
4300 | MI.addOperand(MOps.back()); | |||
4301 | MOps.pop_back(); | |||
4302 | } | |||
4303 | } | |||
4304 | } | |||
4305 | } | |||
4306 | ||||
4307 | // Check if the 'MI' that has the index OpNoForForwarding | |||
4308 | // meets the requirement described in the ImmInstrInfo. | |||
4309 | bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI, | |||
4310 | const ImmInstrInfo &III, | |||
4311 | unsigned OpNoForForwarding | |||
4312 | ) const { | |||
4313 | // As the algorithm of checking for PPC::ZERO/PPC::ZERO8 | |||
4314 | // would not work pre-RA, we can only do the check post RA. | |||
4315 | MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); | |||
4316 | if (MRI.isSSA()) | |||
4317 | return false; | |||
4318 | ||||
4319 | // Cannot do the transform if MI isn't summing the operands. | |||
4320 | if (!III.IsSummingOperands) | |||
4321 | return false; | |||
4322 | ||||
4323 | // The instruction we are trying to replace must have the ZeroIsSpecialOrig set. | |||
4324 | if (!III.ZeroIsSpecialOrig) | |||
4325 | return false; | |||
4326 | ||||
4327 | // We cannot do the transform if the operand we are trying to replace | |||
4328 | // isn't the same as the operand the instruction allows. | |||
4329 | if (OpNoForForwarding != III.OpNoForForwarding) | |||
4330 | return false; | |||
4331 | ||||
4332 | // Check if the instruction we are trying to transform really has | |||
4333 | // the special zero register as its operand. | |||
4334 | if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO && | |||
4335 | MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8) | |||
4336 | return false; | |||
4337 | ||||
4338 | // This machine instruction is convertible if it is, | |||
4339 | // 1. summing the operands. | |||
4340 | // 2. one of the operands is special zero register. | |||
4341 | // 3. the operand we are trying to replace is allowed by the MI. | |||
4342 | return true; | |||
4343 | } | |||
4344 | ||||
4345 | // Check if the DefMI is the add inst and set the ImmMO and RegMO | |||
4346 | // accordingly. | |||
4347 | bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI, | |||
4348 | const ImmInstrInfo &III, | |||
4349 | MachineOperand *&ImmMO, | |||
4350 | MachineOperand *&RegMO) const { | |||
4351 | unsigned Opc = DefMI.getOpcode(); | |||
4352 | if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8) | |||
4353 | return false; | |||
4354 | ||||
4355 | assert(DefMI.getNumOperands() >= 3 &&(static_cast<void> (0)) | |||
4356 | "Add inst must have at least three operands")(static_cast<void> (0)); | |||
4357 | RegMO = &DefMI.getOperand(1); | |||
4358 | ImmMO = &DefMI.getOperand(2); | |||
4359 | ||||
4360 | // Before RA, ADDI first operand could be a frame index. | |||
4361 | if (!RegMO->isReg()) | |||
4362 | return false; | |||
4363 | ||||
4364 | // This DefMI is elgible for forwarding if it is: | |||
4365 | // 1. add inst | |||
4366 | // 2. one of the operands is Imm/CPI/Global. | |||
4367 | return isAnImmediateOperand(*ImmMO); | |||
4368 | } | |||
4369 | ||||
4370 | bool PPCInstrInfo::isRegElgibleForForwarding( | |||
4371 | const MachineOperand &RegMO, const MachineInstr &DefMI, | |||
4372 | const MachineInstr &MI, bool KillDefMI, | |||
4373 | bool &IsFwdFeederRegKilled) const { | |||
4374 | // x = addi y, imm | |||
4375 | // ... | |||
4376 | // z = lfdx 0, x -> z = lfd imm(y) | |||
4377 | // The Reg "y" can be forwarded to the MI(z) only when there is no DEF | |||
4378 | // of "y" between the DEF of "x" and "z". | |||
4379 | // The query is only valid post RA. | |||
4380 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); | |||
4381 | if (MRI.isSSA()) | |||
4382 | return false; | |||
4383 | ||||
4384 | Register Reg = RegMO.getReg(); | |||
4385 | ||||
4386 | // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg. | |||
4387 | MachineBasicBlock::const_reverse_iterator It = MI; | |||
4388 | MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend(); | |||
4389 | It++; | |||
4390 | for (; It != E; ++It) { | |||
4391 | if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI) | |||
4392 | return false; | |||
4393 | else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI) | |||
4394 | IsFwdFeederRegKilled = true; | |||
4395 | // Made it to DefMI without encountering a clobber. | |||
4396 | if ((&*It) == &DefMI) | |||
4397 | break; | |||
4398 | } | |||
4399 | assert((&*It) == &DefMI && "DefMI is missing")(static_cast<void> (0)); | |||
4400 | ||||
4401 | // If DefMI also defines the register to be forwarded, we can only forward it | |||
4402 | // if DefMI is being erased. | |||
4403 | if (DefMI.modifiesRegister(Reg, &getRegisterInfo())) | |||
4404 | return KillDefMI; | |||
4405 | ||||
4406 | return true; | |||
4407 | } | |||
4408 | ||||
4409 | bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO, | |||
4410 | const MachineInstr &DefMI, | |||
4411 | const ImmInstrInfo &III, | |||
4412 | int64_t &Imm, | |||
4413 | int64_t BaseImm) const { | |||
4414 | assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate")(static_cast<void> (0)); | |||
4415 | if (DefMI.getOpcode() == PPC::ADDItocL) { | |||
4416 | // The operand for ADDItocL is CPI, which isn't imm at compiling time, | |||
4417 | // However, we know that, it is 16-bit width, and has the alignment of 4. | |||
4418 | // Check if the instruction met the requirement. | |||
4419 | if (III.ImmMustBeMultipleOf > 4 || | |||
4420 | III.TruncateImmTo || III.ImmWidth != 16) | |||
4421 | return false; | |||
4422 | ||||
4423 | // Going from XForm to DForm loads means that the displacement needs to be | |||
4424 | // not just an immediate but also a multiple of 4, or 16 depending on the | |||
4425 | // load. A DForm load cannot be represented if it is a multiple of say 2. | |||
4426 | // XForm loads do not have this restriction. | |||
4427 | if (ImmMO.isGlobal()) { | |||
4428 | const DataLayout &DL = ImmMO.getGlobal()->getParent()->getDataLayout(); | |||
4429 | if (ImmMO.getGlobal()->getPointerAlignment(DL) < III.ImmMustBeMultipleOf) | |||
4430 | return false; | |||
4431 | } | |||
4432 | ||||
4433 | return true; | |||
4434 | } | |||
4435 | ||||
4436 | if (ImmMO.isImm()) { | |||
4437 | // It is Imm, we need to check if the Imm fit the range. | |||
4438 | // Sign-extend to 64-bits. | |||
4439 | // DefMI may be folded with another imm form instruction, the result Imm is | |||
4440 | // the sum of Imm of DefMI and BaseImm which is from imm form instruction. | |||
4441 | APInt ActualValue(64, ImmMO.getImm() + BaseImm, true); | |||
4442 | if (III.SignedImm && !ActualValue.isSignedIntN(III.ImmWidth)) | |||
4443 | return false; | |||
4444 | if (!III.SignedImm && !ActualValue.isIntN(III.ImmWidth)) | |||
4445 | return false; | |||
4446 | Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm); | |||
4447 | ||||
4448 | if (Imm % III.ImmMustBeMultipleOf) | |||
4449 | return false; | |||
4450 | if (III.TruncateImmTo) | |||
4451 | Imm &= ((1 << III.TruncateImmTo) - 1); | |||
4452 | } | |||
4453 | else | |||
4454 | return false; | |||
4455 | ||||
4456 | // This ImmMO is forwarded if it meets the requriement describle | |||
4457 | // in ImmInstrInfo | |||
4458 | return true; | |||
4459 | } | |||
4460 | ||||
4461 | bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI, | |||
4462 | unsigned OpNoForForwarding, | |||
4463 | MachineInstr **KilledDef) const { | |||
4464 | if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) || | |||
4465 | !DefMI.getOperand(1).isImm()) | |||
4466 | return false; | |||
4467 | ||||
4468 | MachineFunction *MF = MI.getParent()->getParent(); | |||
4469 | MachineRegisterInfo *MRI = &MF->getRegInfo(); | |||
4470 | bool PostRA = !MRI->isSSA(); | |||
4471 | ||||
4472 | int64_t Immediate = DefMI.getOperand(1).getImm(); | |||
4473 | // Sign-extend to 64-bits. | |||
4474 | int64_t SExtImm = SignExtend64<16>(Immediate); | |||
4475 | ||||
4476 | bool IsForwardingOperandKilled = MI.getOperand(OpNoForForwarding).isKill(); | |||
4477 | Register ForwardingOperandReg = MI.getOperand(OpNoForForwarding).getReg(); | |||
4478 | ||||
4479 | bool ReplaceWithLI = false; | |||
4480 | bool Is64BitLI = false; | |||
4481 | int64_t NewImm = 0; | |||
4482 | bool SetCR = false; | |||
4483 | unsigned Opc = MI.getOpcode(); | |||
4484 | switch (Opc) { | |||
4485 | default: | |||
4486 | return false; | |||
4487 | ||||
4488 | // FIXME: Any branches conditional on such a comparison can be made | |||
4489 | // unconditional. At this time, this happens too infrequently to be worth | |||
4490 | // the implementation effort, but if that ever changes, we could convert | |||
4491 | // such a pattern here. | |||
4492 | case PPC::CMPWI: | |||
4493 | case PPC::CMPLWI: | |||
4494 | case PPC::CMPDI: | |||
4495 | case PPC::CMPLDI: { | |||
4496 | // Doing this post-RA would require dataflow analysis to reliably find uses | |||
4497 | // of the CR register set by the compare. | |||
4498 | // No need to fixup killed/dead flag since this transformation is only valid | |||
4499 | // before RA. | |||
4500 | if (PostRA) | |||
4501 | return false; | |||
4502 | // If a compare-immediate is fed by an immediate and is itself an input of | |||
4503 | // an ISEL (the most common case) into a COPY of the correct register. | |||
4504 | bool Changed = false; | |||
4505 | Register DefReg = MI.getOperand(0).getReg(); | |||
4506 | int64_t Comparand = MI.getOperand(2).getImm(); | |||
4507 | int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0 | |||
4508 | ? (Comparand | 0xFFFFFFFFFFFF0000) | |||
4509 | : Comparand; | |||
4510 | ||||
4511 | for (auto &CompareUseMI : MRI->use_instructions(DefReg)) { | |||
4512 | unsigned UseOpc = CompareUseMI.getOpcode(); | |||
4513 | if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8) | |||
4514 | continue; | |||
4515 | unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg(); | |||
4516 | Register TrueReg = CompareUseMI.getOperand(1).getReg(); | |||
4517 | Register FalseReg = CompareUseMI.getOperand(2).getReg(); | |||
4518 | unsigned RegToCopy = | |||
4519 | selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg); | |||
4520 | if (RegToCopy == PPC::NoRegister) | |||
4521 | continue; | |||
4522 | // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0. | |||
4523 | if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) { | |||
4524 | CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI)); | |||
4525 | replaceInstrOperandWithImm(CompareUseMI, 1, 0); | |||
4526 | CompareUseMI.RemoveOperand(3); | |||
4527 | CompareUseMI.RemoveOperand(2); | |||
4528 | continue; | |||
4529 | } | |||
4530 | LLVM_DEBUG(do { } while (false) | |||
4531 | dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n")do { } while (false); | |||
4532 | LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump())do { } while (false); | |||
4533 | LLVM_DEBUG(dbgs() << "Is converted to:\n")do { } while (false); | |||
4534 | // Convert to copy and remove unneeded operands. | |||
4535 | CompareUseMI.setDesc(get(PPC::COPY)); | |||
4536 | CompareUseMI.RemoveOperand(3); | |||
4537 | CompareUseMI.RemoveOperand(RegToCopy == TrueReg ? 2 : 1); | |||
4538 | CmpIselsConverted++; | |||
4539 | Changed = true; | |||
4540 | LLVM_DEBUG(CompareUseMI.dump())do { } while (false); | |||
4541 | } | |||
4542 | if (Changed) | |||
4543 | return true; | |||
4544 | // This may end up incremented multiple times since this function is called | |||
4545 | // during a fixed-point transformation, but it is only meant to indicate the | |||
4546 | // presence of this opportunity. | |||
4547 | MissedConvertibleImmediateInstrs++; | |||
4548 | return false; | |||
4549 | } | |||
4550 | ||||
4551 | // Immediate forms - may simply be convertable to an LI. | |||
4552 | case PPC::ADDI: | |||
4553 | case PPC::ADDI8: { | |||
4554 | // Does the sum fit in a 16-bit signed field? | |||
4555 | int64_t Addend = MI.getOperand(2).getImm(); | |||
4556 | if (isInt<16>(Addend + SExtImm)) { | |||
4557 | ReplaceWithLI = true; | |||
4558 | Is64BitLI = Opc == PPC::ADDI8; | |||
4559 | NewImm = Addend + SExtImm; | |||
4560 | break; | |||
4561 | } | |||
4562 | return false; | |||
4563 | } | |||
4564 | case PPC::SUBFIC: | |||
4565 | case PPC::SUBFIC8: { | |||
4566 | // Only transform this if the CARRY implicit operand is dead. | |||
4567 | if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead()) | |||
4568 | return false; | |||
4569 | int64_t Minuend = MI.getOperand(2).getImm(); | |||
4570 | if (isInt<16>(Minuend - SExtImm)) { | |||
4571 | ReplaceWithLI = true; | |||
4572 | Is64BitLI = Opc == PPC::SUBFIC8; | |||
4573 | NewImm = Minuend - SExtImm; | |||
4574 | break; | |||
4575 | } | |||
4576 | return false; | |||
4577 | } | |||
4578 | case PPC::RLDICL: | |||
4579 | case PPC::RLDICL_rec: | |||
4580 | case PPC::RLDICL_32: | |||
4581 | case PPC::RLDICL_32_64: { | |||
4582 | // Use APInt's rotate function. | |||
4583 | int64_t SH = MI.getOperand(2).getImm(); | |||
4584 | int64_t MB = MI.getOperand(3).getImm(); | |||
4585 | APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32, | |||
4586 | SExtImm, true); | |||
4587 | InVal = InVal.rotl(SH); | |||
4588 | uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1; | |||
4589 | InVal &= Mask; | |||
4590 | // Can't replace negative values with an LI as that will sign-extend | |||
4591 | // and not clear the left bits. If we're setting the CR bit, we will use | |||
4592 | // ANDI_rec which won't sign extend, so that's safe. | |||
4593 | if (isUInt<15>(InVal.getSExtValue()) || | |||
4594 | (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) { | |||
4595 | ReplaceWithLI = true; | |||
4596 | Is64BitLI = Opc != PPC::RLDICL_32; | |||
4597 | NewImm = InVal.getSExtValue(); | |||
4598 | SetCR = Opc == PPC::RLDICL_rec; | |||
4599 | break; | |||
4600 | } | |||
4601 | return false; | |||
4602 | } | |||
4603 | case PPC::RLWINM: | |||
4604 | case PPC::RLWINM8: | |||
4605 | case PPC::RLWINM_rec: | |||
4606 | case PPC::RLWINM8_rec: { | |||
4607 | int64_t SH = MI.getOperand(2).getImm(); | |||
4608 | int64_t MB = MI.getOperand(3).getImm(); | |||
4609 | int64_t ME = MI.getOperand(4).getImm(); | |||
4610 | APInt InVal(32, SExtImm, true); | |||
4611 | InVal = InVal.rotl(SH); | |||
4612 | APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB); | |||
4613 | InVal &= Mask; | |||
4614 | // Can't replace negative values with an LI as that will sign-extend | |||
4615 | // and not clear the left bits. If we're setting the CR bit, we will use | |||
4616 | // ANDI_rec which won't sign extend, so that's safe. | |||
4617 | bool ValueFits = isUInt<15>(InVal.getSExtValue()); | |||
4618 | ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) && | |||
4619 | isUInt<16>(InVal.getSExtValue())); | |||
4620 | if (ValueFits) { | |||
4621 | ReplaceWithLI = true; | |||
4622 | Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec; | |||
4623 | NewImm = InVal.getSExtValue(); | |||
4624 | SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec; | |||
4625 | break; | |||
4626 | } | |||
4627 | return false; | |||
4628 | } | |||
4629 | case PPC::ORI: | |||
4630 | case PPC::ORI8: | |||
4631 | case PPC::XORI: | |||
4632 | case PPC::XORI8: { | |||
4633 | int64_t LogicalImm = MI.getOperand(2).getImm(); | |||
4634 | int64_t Result = 0; | |||
4635 | if (Opc == PPC::ORI || Opc == PPC::ORI8) | |||
4636 | Result = LogicalImm | SExtImm; | |||
4637 | else | |||
4638 | Result = LogicalImm ^ SExtImm; | |||
4639 | if (isInt<16>(Result)) { | |||
4640 | ReplaceWithLI = true; | |||
4641 | Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8; | |||
4642 | NewImm = Result; | |||
4643 | break; | |||
4644 | } | |||
4645 | return false; | |||
4646 | } | |||
4647 | } | |||
4648 | ||||
4649 | if (ReplaceWithLI) { | |||
4650 | // We need to be careful with CR-setting instructions we're replacing. | |||
4651 | if (SetCR) { | |||
4652 | // We don't know anything about uses when we're out of SSA, so only | |||
4653 | // replace if the new immediate will be reproduced. | |||
4654 | bool ImmChanged = (SExtImm & NewImm) != NewImm; | |||
4655 | if (PostRA && ImmChanged) | |||
4656 | return false; | |||
4657 | ||||
4658 | if (!PostRA) { | |||
4659 | // If the defining load-immediate has no other uses, we can just replace | |||
4660 | // the immediate with the new immediate. | |||
4661 | if (MRI->hasOneUse(DefMI.getOperand(0).getReg())) | |||
4662 | DefMI.getOperand(1).setImm(NewImm); | |||
4663 | ||||
4664 | // If we're not using the GPR result of the CR-setting instruction, we | |||
4665 | // just need to and with zero/non-zero depending on the new immediate. | |||
4666 | else if (MRI->use_empty(MI.getOperand(0).getReg())) { | |||
4667 | if (NewImm) { | |||
4668 | assert(Immediate && "Transformation converted zero to non-zero?")(static_cast<void> (0)); | |||
4669 | NewImm = Immediate; | |||
4670 | } | |||
4671 | } else if (ImmChanged) | |||
4672 | return false; | |||
4673 | } | |||
4674 | } | |||
4675 | ||||
4676 | LLVM_DEBUG(dbgs() << "Replacing instruction:\n")do { } while (false); | |||
4677 | LLVM_DEBUG(MI.dump())do { } while (false); | |||
4678 | LLVM_DEBUG(dbgs() << "Fed by:\n")do { } while (false); | |||
4679 | LLVM_DEBUG(DefMI.dump())do { } while (false); | |||
4680 | LoadImmediateInfo LII; | |||
4681 | LII.Imm = NewImm; | |||
4682 | LII.Is64Bit = Is64BitLI; | |||
4683 | LII.SetCR = SetCR; | |||
4684 | // If we're setting the CR, the original load-immediate must be kept (as an | |||
4685 | // operand to ANDI_rec/ANDI8_rec). | |||
4686 | if (KilledDef && SetCR) | |||
4687 | *KilledDef = nullptr; | |||
4688 | replaceInstrWithLI(MI, LII); | |||
4689 | ||||
4690 | // Fixup killed/dead flag after transformation. | |||
4691 | // Pattern: | |||
4692 | // ForwardingOperandReg = LI imm1 | |||
4693 | // y = op2 imm2, ForwardingOperandReg(killed) | |||
4694 | if (IsForwardingOperandKilled) | |||
4695 | fixupIsDeadOrKill(&DefMI, &MI, ForwardingOperandReg); | |||
4696 | ||||
4697 | LLVM_DEBUG(dbgs() << "With:\n")do { } while (false); | |||
4698 | LLVM_DEBUG(MI.dump())do { } while (false); | |||
4699 | return true; | |||
4700 | } | |||
4701 | return false; | |||
4702 | } | |||
4703 | ||||
4704 | bool PPCInstrInfo::transformToNewImmFormFedByAdd( | |||
4705 | MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const { | |||
4706 | MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); | |||
4707 | bool PostRA = !MRI->isSSA(); | |||
4708 | // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI | |||
4709 | // for post-ra. | |||
4710 | if (PostRA) | |||
4711 | return false; | |||
4712 | ||||
4713 | // Only handle load/store. | |||
4714 | if (!MI.mayLoadOrStore()) | |||
4715 | return false; | |||
4716 | ||||
4717 | unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode()); | |||
4718 | ||||
4719 | assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) &&(static_cast<void> (0)) | |||
4720 | "MI must have x-form opcode")(static_cast<void> (0)); | |||
4721 | ||||
4722 | // get Imm Form info. | |||
4723 | ImmInstrInfo III; | |||
4724 | bool IsVFReg = MI.getOperand(0).isReg() | |||
4725 | ? isVFRegister(MI.getOperand(0).getReg()) | |||
4726 | : false; | |||
4727 | ||||
4728 | if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA)) | |||
4729 | return false; | |||
4730 | ||||
4731 | if (!III.IsSummingOperands) | |||
4732 | return false; | |||
4733 | ||||
4734 | if (OpNoForForwarding != III.OpNoForForwarding) | |||
4735 | return false; | |||
4736 | ||||
4737 | MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo); | |||
4738 | if (!ImmOperandMI.isImm()) | |||
4739 | return false; | |||
4740 | ||||
4741 | // Check DefMI. | |||
4742 | MachineOperand *ImmMO = nullptr; | |||
4743 | MachineOperand *RegMO = nullptr; | |||
4744 | if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO)) | |||
4745 | return false; | |||
4746 | assert(ImmMO && RegMO && "Imm and Reg operand must have been set")(static_cast<void> (0)); | |||
4747 | ||||
4748 | // Check Imm. | |||
4749 | // Set ImmBase from imm instruction as base and get new Imm inside | |||
4750 | // isImmElgibleForForwarding. | |||
4751 | int64_t ImmBase = ImmOperandMI.getImm(); | |||
4752 | int64_t Imm = 0; | |||
4753 | if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase)) | |||
4754 | return false; | |||
4755 | ||||
4756 | // Get killed info in case fixup needed after transformation. | |||
4757 | unsigned ForwardKilledOperandReg = ~0U; | |||
4758 | if (MI.getOperand(III.OpNoForForwarding).isKill()) | |||
4759 | ForwardKilledOperandReg = MI.getOperand(III.OpNoForForwarding).getReg(); | |||
4760 | ||||
4761 | // Do the transform | |||
4762 | LLVM_DEBUG(dbgs() << "Replacing instruction:\n")do { } while (false); | |||
4763 | LLVM_DEBUG(MI.dump())do { } while (false); | |||
4764 | LLVM_DEBUG(dbgs() << "Fed by:\n")do { } while (false); | |||
4765 | LLVM_DEBUG(DefMI.dump())do { } while (false); | |||
4766 | ||||
4767 | MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg()); | |||
4768 | if (RegMO->isKill()) { | |||
4769 | MI.getOperand(III.OpNoForForwarding).setIsKill(true); | |||
4770 | // Clear the killed flag in RegMO. Doing this here can handle some cases | |||
4771 | // that DefMI and MI are not in same basic block. | |||
4772 | RegMO->setIsKill(false); | |||
4773 | } | |||
4774 | MI.getOperand(III.ImmOpNo).setImm(Imm); | |||
4775 | ||||
4776 | // FIXME: fix kill/dead flag if MI and DefMI are not in same basic block. | |||
4777 | if (DefMI.getParent() == MI.getParent()) { | |||
4778 | // Check if reg is killed between MI and DefMI. | |||
4779 | auto IsKilledFor = [&](unsigned Reg) { | |||
4780 | MachineBasicBlock::const_reverse_iterator It = MI; | |||
4781 | MachineBasicBlock::const_reverse_iterator E = DefMI; | |||
4782 | It++; | |||
4783 | for (; It != E; ++It) { | |||
4784 | if (It->killsRegister(Reg)) | |||
4785 | return true; | |||
4786 | } | |||
4787 | return false; | |||
4788 | }; | |||
4789 | ||||
4790 | // Update kill flag | |||
4791 | if (RegMO->isKill() || IsKilledFor(RegMO->getReg())) | |||
4792 | fixupIsDeadOrKill(&DefMI, &MI, RegMO->getReg()); | |||
4793 | if (ForwardKilledOperandReg != ~0U) | |||
4794 | fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg); | |||
4795 | } | |||
4796 | ||||
4797 | LLVM_DEBUG(dbgs() << "With:\n")do { } while (false); | |||
4798 | LLVM_DEBUG(MI.dump())do { } while (false); | |||
4799 | return true; | |||
4800 | } | |||
4801 | ||||
4802 | // If an X-Form instruction is fed by an add-immediate and one of its operands | |||
4803 | // is the literal zero, attempt to forward the source of the add-immediate to | |||
4804 | // the corresponding D-Form instruction with the displacement coming from | |||
4805 | // the immediate being added. | |||
4806 | bool PPCInstrInfo::transformToImmFormFedByAdd( | |||
4807 | MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding, | |||
4808 | MachineInstr &DefMI, bool KillDefMI) const { | |||
4809 | // RegMO ImmMO | |||
4810 | // | | | |||
4811 | // x = addi reg, imm <----- DefMI | |||
4812 | // y = op 0 , x <----- MI | |||
4813 | // | | |||
4814 | // OpNoForForwarding | |||
4815 | // Check if the MI meet the requirement described in the III. | |||
4816 | if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding)) | |||
4817 | return false; | |||
4818 | ||||
4819 | // Check if the DefMI meet the requirement | |||
4820 | // described in the III. If yes, set the ImmMO and RegMO accordingly. | |||
4821 | MachineOperand *ImmMO = nullptr; | |||
4822 | MachineOperand *RegMO = nullptr; | |||
4823 | if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO)) | |||
4824 | return false; | |||
4825 | assert(ImmMO && RegMO && "Imm and Reg operand must have been set")(static_cast<void> (0)); | |||
4826 | ||||
4827 | // As we get the Imm operand now, we need to check if the ImmMO meet | |||
4828 | // the requirement described in the III. If yes set the Imm. | |||
4829 | int64_t Imm = 0; | |||
4830 | if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm)) | |||
4831 | return false; | |||
4832 | ||||
4833 | bool IsFwdFeederRegKilled = false; | |||
4834 | // Check if the RegMO can be forwarded to MI. | |||
4835 | if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI, | |||
4836 | IsFwdFeederRegKilled)) | |||
4837 | return false; | |||
4838 | ||||
4839 | // Get killed info in case fixup needed after transformation. | |||
4840 | unsigned ForwardKilledOperandReg = ~0U; | |||
4841 | MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); | |||
4842 | bool PostRA = !MRI.isSSA(); | |||
4843 | if (PostRA && MI.getOperand(OpNoForForwarding).isKill()) | |||
4844 | ForwardKilledOperandReg = MI.getOperand(OpNoForForwarding).getReg(); | |||
4845 | ||||
4846 | // We know that, the MI and DefMI both meet the pattern, and | |||
4847 | // the Imm also meet the requirement with the new Imm-form. | |||
4848 | // It is safe to do the transformation now. | |||
4849 | LLVM_DEBUG(dbgs() << "Replacing instruction:\n")do { } while (false); | |||
4850 | LLVM_DEBUG(MI.dump())do { } while (false); | |||
4851 | LLVM_DEBUG(dbgs() << "Fed by:\n")do { } while (false); | |||
4852 | LLVM_DEBUG(DefMI.dump())do { } while (false); | |||
4853 | ||||
4854 | // Update the base reg first. | |||
4855 | MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(), | |||
4856 | false, false, | |||
4857 | RegMO->isKill()); | |||
4858 | ||||
4859 | // Then, update the imm. | |||
4860 | if (ImmMO->isImm()) { | |||
4861 | // If the ImmMO is Imm, change the operand that has ZERO to that Imm | |||
4862 | // directly. | |||
4863 | replaceInstrOperandWithImm(MI, III.ZeroIsSpecialOrig, Imm); | |||
4864 | } | |||
4865 | else { | |||
4866 | // Otherwise, it is Constant Pool Index(CPI) or Global, | |||
4867 | // which is relocation in fact. We need to replace the special zero | |||
4868 | // register with ImmMO. | |||
4869 | // Before that, we need to fixup the target flags for imm. | |||
4870 | // For some reason, we miss to set the flag for the ImmMO if it is CPI. | |||
4871 | if (DefMI.getOpcode() == PPC::ADDItocL) | |||
4872 | ImmMO->setTargetFlags(PPCII::MO_TOC_LO); | |||
4873 | ||||
4874 | // MI didn't have the interface such as MI.setOperand(i) though | |||
4875 | // it has MI.getOperand(i). To repalce the ZERO MachineOperand with | |||
4876 | // ImmMO, we need to remove ZERO operand and all the operands behind it, | |||
4877 | // and, add the ImmMO, then, move back all the operands behind ZERO. | |||
4878 | SmallVector<MachineOperand, 2> MOps; | |||
4879 | for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) { | |||
4880 | MOps.push_back(MI.getOperand(i)); | |||
4881 | MI.RemoveOperand(i); | |||
4882 | } | |||
4883 | ||||
4884 | // Remove the last MO in the list, which is ZERO operand in fact. | |||
4885 | MOps.pop_back(); | |||
4886 | // Add the imm operand. | |||
4887 | MI.addOperand(*ImmMO); | |||
4888 | // Now add the rest back. | |||
4889 | for (auto &MO : MOps) | |||
4890 | MI.addOperand(MO); | |||
4891 | } | |||
4892 | ||||
4893 | // Update the opcode. | |||
4894 | MI.setDesc(get(III.ImmOpcode)); | |||
4895 | ||||
4896 | // Fix up killed/dead flag after transformation. | |||
4897 | // Pattern 1: | |||
4898 | // x = ADD KilledFwdFeederReg, imm | |||
4899 | // n = opn KilledFwdFeederReg(killed), regn | |||
4900 | // y = XOP 0, x | |||
4901 | // Pattern 2: | |||
4902 | // x = ADD reg(killed), imm | |||
4903 | // y = XOP 0, x | |||
4904 | if (IsFwdFeederRegKilled || RegMO->isKill()) | |||
4905 | fixupIsDeadOrKill(&DefMI, &MI, RegMO->getReg()); | |||
4906 | // Pattern 3: | |||
4907 | // ForwardKilledOperandReg = ADD reg, imm | |||
4908 | // y = XOP 0, ForwardKilledOperandReg(killed) | |||
4909 | if (ForwardKilledOperandReg != ~0U) | |||
4910 | fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg); | |||
4911 | ||||
4912 | LLVM_DEBUG(dbgs() << "With:\n")do { } while (false); | |||
4913 | LLVM_DEBUG(MI.dump())do { } while (false); | |||
4914 | ||||
4915 | return true; | |||
4916 | } | |||
4917 | ||||
4918 | bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, | |||
4919 | const ImmInstrInfo &III, | |||
4920 | unsigned ConstantOpNo, | |||
4921 | MachineInstr &DefMI) const { | |||
4922 | // DefMI must be LI or LI8. | |||
4923 | if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) || | |||
4924 | !DefMI.getOperand(1).isImm()) | |||
4925 | return false; | |||
4926 | ||||
4927 | // Get Imm operand and Sign-extend to 64-bits. | |||
4928 | int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm()); | |||
4929 | ||||
4930 | MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); | |||
4931 | bool PostRA = !MRI.isSSA(); | |||
4932 | // Exit early if we can't convert this. | |||
4933 | if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative) | |||
4934 | return false; | |||
4935 | if (Imm % III.ImmMustBeMultipleOf) | |||
4936 | return false; | |||
4937 | if (III.TruncateImmTo) | |||
4938 | Imm &= ((1 << III.TruncateImmTo) - 1); | |||
4939 | if (III.SignedImm) { | |||
4940 | APInt ActualValue(64, Imm, true); | |||
4941 | if (!ActualValue.isSignedIntN(III.ImmWidth)) | |||
4942 | return false; | |||
4943 | } else { | |||
4944 | uint64_t UnsignedMax = (1 << III.ImmWidth) - 1; | |||
4945 | if ((uint64_t)Imm > UnsignedMax) | |||
4946 | return false; | |||
4947 | } | |||
4948 | ||||
4949 | // If we're post-RA, the instructions don't agree on whether register zero is | |||
4950 | // special, we can transform this as long as the register operand that will | |||
4951 | // end up in the location where zero is special isn't R0. | |||
4952 | if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) { | |||
4953 | unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig : | |||
4954 | III.ZeroIsSpecialNew + 1; | |||
4955 | Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg(); | |||
4956 | Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg(); | |||
4957 | // If R0 is in the operand where zero is special for the new instruction, | |||
4958 | // it is unsafe to transform if the constant operand isn't that operand. | |||
4959 | if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) && | |||
4960 | ConstantOpNo != III.ZeroIsSpecialNew) | |||
4961 | return false; | |||
4962 | if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) && | |||
4963 | ConstantOpNo != PosForOrigZero) | |||
4964 | return false; | |||
4965 | } | |||
4966 | ||||
4967 | // Get killed info in case fixup needed after transformation. | |||
4968 | unsigned ForwardKilledOperandReg = ~0U; | |||
4969 | if (PostRA && MI.getOperand(ConstantOpNo).isKill()) | |||
4970 | ForwardKilledOperandReg = MI.getOperand(ConstantOpNo).getReg(); | |||
4971 | ||||
4972 | unsigned Opc = MI.getOpcode(); | |||
4973 | bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec || | |||
4974 | Opc == PPC::SRW || Opc == PPC::SRW_rec || | |||
4975 | Opc == PPC::SLW8 || Opc == PPC::SLW8_rec || | |||
4976 | Opc == PPC::SRW8 || Opc == PPC::SRW8_rec; | |||
4977 | bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec || | |||
4978 | Opc == PPC::SRD || Opc == PPC::SRD_rec; | |||
4979 | bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec || | |||
4980 | Opc == PPC::SLD_rec || Opc == PPC::SRD_rec; | |||
4981 | bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD || | |||
4982 | Opc == PPC::SRD_rec; | |||
4983 | ||||
4984 | MI.setDesc(get(III.ImmOpcode)); | |||
4985 | if (ConstantOpNo == III.OpNoForForwarding) { | |||
4986 | // Converting shifts to immediate form is a bit tricky since they may do | |||
4987 | // one of three things: | |||
4988 | // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero | |||
4989 | // 2. If the shift amount is zero, the result is unchanged (save for maybe | |||
4990 | // setting CR0) | |||
4991 | // 3. If the shift amount is in [1, OpSize), it's just a shift | |||
4992 | if (SpecialShift32 || SpecialShift64) { | |||
4993 | LoadImmediateInfo LII; | |||
4994 | LII.Imm = 0; | |||
4995 | LII.SetCR = SetCR; | |||
4996 | LII.Is64Bit = SpecialShift64; | |||
4997 | uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F); | |||
4998 | if (Imm & (SpecialShift32 ? 0x20 : 0x40)) | |||
4999 | replaceInstrWithLI(MI, LII); | |||
5000 | // Shifts by zero don't change the value. If we don't need to set CR0, | |||
5001 | // just convert this to a COPY. Can't do this post-RA since we've already | |||
5002 | // cleaned up the copies. | |||
5003 | else if (!SetCR && ShAmt == 0 && !PostRA) { | |||
5004 | MI.RemoveOperand(2); | |||
5005 | MI.setDesc(get(PPC::COPY)); | |||
5006 | } else { | |||
5007 | // The 32 bit and 64 bit instructions are quite different. | |||
5008 | if (SpecialShift32) { | |||
5009 | // Left shifts use (N, 0, 31-N). | |||
5010 | // Right shifts use (32-N, N, 31) if 0 < N < 32. | |||
5011 | // use (0, 0, 31) if N == 0. | |||
5012 | uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt; | |||
5013 | uint64_t MB = RightShift ? ShAmt : 0; | |||
5014 | uint64_t ME = RightShift ? 31 : 31 - ShAmt; | |||
5015 | replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH); | |||
5016 | MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB) | |||
5017 | .addImm(ME); | |||
5018 | } else { | |||
5019 | // Left shifts use (N, 63-N). | |||
5020 | // Right shifts use (64-N, N) if 0 < N < 64. | |||
5021 | // use (0, 0) if N == 0. | |||
5022 | uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt; | |||
5023 | uint64_t ME = RightShift ? ShAmt : 63 - ShAmt; | |||
5024 | replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH); | |||
5025 | MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME); | |||
5026 | } | |||
5027 | } | |||
5028 | } else | |||
5029 | replaceInstrOperandWithImm(MI, ConstantOpNo, Imm); | |||
5030 | } | |||
5031 | // Convert commutative instructions (switch the operands and convert the | |||
5032 | // desired one to an immediate. | |||
5033 | else if (III.IsCommutative) { | |||
5034 | replaceInstrOperandWithImm(MI, ConstantOpNo, Imm); | |||
5035 | swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding); | |||
5036 | } else | |||
5037 | llvm_unreachable("Should have exited early!")__builtin_unreachable(); | |||
5038 | ||||
5039 | // For instructions for which the constant register replaces a different | |||
5040 | // operand than where the immediate goes, we need to swap them. | |||
5041 | if (III.OpNoForForwarding != III.ImmOpNo) | |||
5042 | swapMIOperands(MI, III.OpNoForForwarding, III.ImmOpNo); | |||
5043 | ||||
5044 | // If the special R0/X0 register index are different for original instruction | |||
5045 | // and new instruction, we need to fix up the register class in new | |||
5046 | // instruction. | |||
5047 | if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) { | |||
5048 | if (III.ZeroIsSpecialNew) { | |||
5049 | // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no | |||
5050 | // need to fix up register class. | |||
5051 | Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg(); | |||
5052 | if (Register::isVirtualRegister(RegToModify)) { | |||
5053 | const TargetRegisterClass *NewRC = | |||
5054 | MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ? | |||
5055 | &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass; | |||
5056 | MRI.setRegClass(RegToModify, NewRC); | |||
5057 | } | |||
5058 | } | |||
5059 | } | |||
5060 | ||||
5061 | // Fix up killed/dead flag after transformation. | |||
5062 | // Pattern: | |||
5063 | // ForwardKilledOperandReg = LI imm | |||
5064 | // y = XOP reg, ForwardKilledOperandReg(killed) | |||
5065 | if (ForwardKilledOperandReg != ~0U) | |||
5066 | fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg); | |||
5067 | return true; | |||
5068 | } | |||
5069 | ||||
5070 | const TargetRegisterClass * | |||
5071 | PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const { | |||
5072 | if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass) | |||
5073 | return &PPC::VSRCRegClass; | |||
5074 | return RC; | |||
5075 | } | |||
5076 | ||||
5077 | int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) { | |||
5078 | return PPC::getRecordFormOpcode(Opcode); | |||
5079 | } | |||
5080 | ||||
5081 | // This function returns true if the machine instruction | |||
5082 | // always outputs a value by sign-extending a 32 bit value, | |||
5083 | // i.e. 0 to 31-th bits are same as 32-th bit. | |||
5084 | static bool isSignExtendingOp(const MachineInstr &MI) { | |||
5085 | int Opcode = MI.getOpcode(); | |||
5086 | if (Opcode == PPC::LI || Opcode == PPC::LI8 || Opcode == PPC::LIS || | |||
5087 | Opcode == PPC::LIS8 || Opcode == PPC::SRAW || Opcode == PPC::SRAW_rec || | |||
5088 | Opcode == PPC::SRAWI || Opcode == PPC::SRAWI_rec || Opcode == PPC::LWA || | |||
5089 | Opcode == PPC::LWAX || Opcode == PPC::LWA_32 || Opcode == PPC::LWAX_32 || | |||
5090 | Opcode == PPC::LHA || Opcode == PPC::LHAX || Opcode == PPC::LHA8 || | |||
5091 | Opcode == PPC::LHAX8 || Opcode == PPC::LBZ || Opcode == PPC::LBZX || | |||
5092 | Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || Opcode == PPC::LBZU || | |||
5093 | Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 || | |||
5094 | Opcode == PPC::LHZ || Opcode == PPC::LHZX || Opcode == PPC::LHZ8 || | |||
5095 | Opcode == PPC::LHZX8 || Opcode == PPC::LHZU || Opcode == PPC::LHZUX || | |||
5096 | Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 || Opcode == PPC::EXTSB || | |||
5097 | Opcode == PPC::EXTSB_rec || Opcode == PPC::EXTSH || | |||
5098 | Opcode == PPC::EXTSH_rec || Opcode == PPC::EXTSB8 || | |||
5099 | Opcode == PPC::EXTSH8 || Opcode == PPC::EXTSW || | |||
5100 | Opcode == PPC::EXTSW_rec || Opcode == PPC::SETB || Opcode == PPC::SETB8 || | |||
5101 | Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 || | |||
5102 | Opcode == PPC::EXTSB8_32_64) | |||
5103 | return true; | |||
5104 | ||||
5105 | if (Opcode == PPC::RLDICL && MI.getOperand(3).getImm() >= 33) | |||
5106 | return true; | |||
5107 | ||||
5108 | if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec || | |||
5109 | Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) && | |||
5110 | MI.getOperand(3).getImm() > 0 && | |||
5111 | MI.getOperand(3).getImm() <= MI.getOperand(4).getImm()) | |||
5112 | return true; | |||
5113 | ||||
5114 | return false; | |||
5115 | } | |||
5116 | ||||
5117 | // This function returns true if the machine instruction | |||
5118 | // always outputs zeros in higher 32 bits. | |||
5119 | static bool isZeroExtendingOp(const MachineInstr &MI) { | |||
5120 | int Opcode = MI.getOpcode(); | |||
5121 | // The 16-bit immediate is sign-extended in li/lis. | |||
5122 | // If the most significant bit is zero, all higher bits are zero. | |||
5123 | if (Opcode == PPC::LI || Opcode == PPC::LI8 || | |||
5124 | Opcode == PPC::LIS || Opcode == PPC::LIS8) { | |||
5125 | int64_t Imm = MI.getOperand(1).getImm(); | |||
5126 | if (((uint64_t)Imm & ~0x7FFFuLL) == 0) | |||
5127 | return true; | |||
5128 | } | |||
5129 | ||||
5130 | // We have some variations of rotate-and-mask instructions | |||
5131 | // that clear higher 32-bits. | |||
5132 | if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec || | |||
5133 | Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec || | |||
5134 | Opcode == PPC::RLDICL_32_64) && | |||
5135 | MI.getOperand(3).getImm() >= 32) | |||
5136 | return true; | |||
5137 | ||||
5138 | if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) && | |||
5139 | MI.getOperand(3).getImm() >= 32 && | |||
5140 | MI.getOperand(3).getImm() <= 63 - MI.getOperand(2).getImm()) | |||
5141 | return true; | |||
5142 | ||||
5143 | if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec || | |||
5144 | Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec || | |||
5145 | Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) && | |||
5146 | MI.getOperand(3).getImm() <= MI.getOperand(4).getImm()) | |||
5147 | return true; | |||
5148 | ||||
5149 | // There are other instructions that clear higher 32-bits. | |||
5150 | if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec || | |||
5151 | Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec || | |||
5152 | Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8 || | |||
5153 | Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec || | |||
5154 | Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec || | |||
5155 | Opcode == PPC::POPCNTD || Opcode == PPC::POPCNTW || Opcode == PPC::SLW || | |||
5156 | Opcode == PPC::SLW_rec || Opcode == PPC::SRW || Opcode == PPC::SRW_rec || | |||
5157 | Opcode == PPC::SLW8 || Opcode == PPC::SRW8 || Opcode == PPC::SLWI || | |||
5158 | Opcode == PPC::SLWI_rec || Opcode == PPC::SRWI || | |||
5159 | Opcode == PPC::SRWI_rec || Opcode == PPC::LWZ || Opcode == PPC::LWZX || | |||
5160 | Opcode == PPC::LWZU || Opcode == PPC::LWZUX || Opcode == PPC::LWBRX || | |||
5161 | Opcode == PPC::LHBRX || Opcode == PPC::LHZ || Opcode == PPC::LHZX || | |||
5162 | Opcode == PPC::LHZU || Opcode == PPC::LHZUX || Opcode == PPC::LBZ || | |||
5163 | Opcode == PPC::LBZX || Opcode == PPC::LBZU || Opcode == PPC::LBZUX || | |||
5164 | Opcode == PPC::LWZ8 || Opcode == PPC::LWZX8 || Opcode == PPC::LWZU8 || | |||
5165 | Opcode == PPC::LWZUX8 || Opcode == PPC::LWBRX8 || Opcode == PPC::LHBRX8 || | |||
5166 | Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || Opcode == PPC::LHZU8 || | |||
5167 | Opcode == PPC::LHZUX8 || Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || | |||
5168 | Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 || | |||
5169 | Opcode == PPC::ANDI_rec || Opcode == PPC::ANDIS_rec || | |||
5170 | Opcode == PPC::ROTRWI || Opcode == PPC::ROTRWI_rec || | |||
5171 | Opcode == PPC::EXTLWI || Opcode == PPC::EXTLWI_rec || | |||
5172 | Opcode == PPC::MFVSRWZ) | |||
5173 | return true; | |||
5174 | ||||
5175 | return false; | |||
5176 | } | |||
5177 | ||||
5178 | // This function returns true if the input MachineInstr is a TOC save | |||
5179 | // instruction. | |||
5180 | bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const { | |||
5181 | if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg()) | |||
5182 | return false; | |||
5183 | unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); | |||
5184 | unsigned StackOffset = MI.getOperand(1).getImm(); | |||
5185 | Register StackReg = MI.getOperand(2).getReg(); | |||
5186 | Register SPReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; | |||
5187 | if (StackReg == SPReg && StackOffset == TOCSaveOffset) | |||
5188 | return true; | |||
5189 | ||||
5190 | return false; | |||
5191 | } | |||
5192 | ||||
5193 | // We limit the max depth to track incoming values of PHIs or binary ops | |||
5194 | // (e.g. AND) to avoid excessive cost. | |||
5195 | const unsigned MAX_DEPTH = 1; | |||
5196 | ||||
5197 | bool | |||
5198 | PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, | |||
5199 | const unsigned Depth) const { | |||
5200 | const MachineFunction *MF = MI.getParent()->getParent(); | |||
5201 | const MachineRegisterInfo *MRI = &MF->getRegInfo(); | |||
5202 | ||||
5203 | // If we know this instruction returns sign- or zero-extended result, | |||
5204 | // return true. | |||
5205 | if (SignExt ? isSignExtendingOp(MI): | |||
5206 | isZeroExtendingOp(MI)) | |||
5207 | return true; | |||
5208 | ||||
5209 | switch (MI.getOpcode()) { | |||
5210 | case PPC::COPY: { | |||
5211 | Register SrcReg = MI.getOperand(1).getReg(); | |||
5212 | ||||
5213 | // In both ELFv1 and v2 ABI, method parameters and the return value | |||
5214 | // are sign- or zero-extended. | |||
5215 | if (MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) { | |||
5216 | const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); | |||
5217 | // We check the ZExt/SExt flags for a method parameter. | |||
5218 | if (MI.getParent()->getBasicBlock() == | |||
5219 | &MF->getFunction().getEntryBlock()) { | |||
5220 | Register VReg = MI.getOperand(0).getReg(); | |||
5221 | if (MF->getRegInfo().isLiveIn(VReg)) | |||
5222 | return SignExt ? FuncInfo->isLiveInSExt(VReg) : | |||
5223 | FuncInfo->isLiveInZExt(VReg); | |||
5224 | } | |||
5225 | ||||
5226 | // For a method return value, we check the ZExt/SExt flags in attribute. | |||
5227 | // We assume the following code sequence for method call. | |||
5228 | // ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1 | |||
5229 | // BL8_NOP @func,... | |||
5230 | // ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1 | |||
5231 | // %5 = COPY %x3; G8RC:%5 | |||
5232 | if (SrcReg == PPC::X3) { | |||
5233 | const MachineBasicBlock *MBB = MI.getParent(); | |||
5234 | MachineBasicBlock::const_instr_iterator II = | |||
5235 | MachineBasicBlock::const_instr_iterator(&MI); | |||
5236 | if (II != MBB->instr_begin() && | |||
5237 | (--II)->getOpcode() == PPC::ADJCALLSTACKUP) { | |||
5238 | const MachineInstr &CallMI = *(--II); | |||
5239 | if (CallMI.isCall() && CallMI.getOperand(0).isGlobal()) { | |||
5240 | const Function *CalleeFn = | |||
5241 | dyn_cast<Function>(CallMI.getOperand(0).getGlobal()); | |||
5242 | if (!CalleeFn) | |||
5243 | return false; | |||
5244 | const IntegerType *IntTy = | |||
5245 | dyn_cast<IntegerType>(CalleeFn->getReturnType()); | |||
5246 | const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs(); | |||
5247 | if (IntTy && IntTy->getBitWidth() <= 32) | |||
5248 | return Attrs.hasAttribute(SignExt ? Attribute::SExt : | |||
5249 | Attribute::ZExt); | |||
5250 | } | |||
5251 | } | |||
5252 | } | |||
5253 | } | |||
5254 | ||||
5255 | // If this is a copy from another register, we recursively check source. | |||
5256 | if (!Register::isVirtualRegister(SrcReg)) | |||
5257 | return false; | |||
5258 | const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); | |||
5259 | if (SrcMI != NULL__null) | |||
5260 | return isSignOrZeroExtended(*SrcMI, SignExt, Depth); | |||
5261 | ||||
5262 | return false; | |||
5263 | } | |||
5264 | ||||
5265 | case PPC::ANDI_rec: | |||
5266 | case PPC::ANDIS_rec: | |||
5267 | case PPC::ORI: | |||
5268 | case PPC::ORIS: | |||
5269 | case PPC::XORI: | |||
5270 | case PPC::XORIS: | |||
5271 | case PPC::ANDI8_rec: | |||
5272 | case PPC::ANDIS8_rec: | |||
5273 | case PPC::ORI8: | |||
5274 | case PPC::ORIS8: | |||
5275 | case PPC::XORI8: | |||
5276 | case PPC::XORIS8: { | |||
5277 | // logical operation with 16-bit immediate does not change the upper bits. | |||
5278 | // So, we track the operand register as we do for register copy. | |||
5279 | Register SrcReg = MI.getOperand(1).getReg(); | |||
5280 | if (!Register::isVirtualRegister(SrcReg)) | |||
5281 | return false; | |||
5282 | const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); | |||
5283 | if (SrcMI != NULL__null) | |||
5284 | return isSignOrZeroExtended(*SrcMI, SignExt, Depth); | |||
5285 | ||||
5286 | return false; | |||
5287 | } | |||
5288 | ||||
5289 | // If all incoming values are sign-/zero-extended, | |||
5290 | // the output of OR, ISEL or PHI is also sign-/zero-extended. | |||
5291 | case PPC::OR: | |||
5292 | case PPC::OR8: | |||
5293 | case PPC::ISEL: | |||
5294 | case PPC::PHI: { | |||
5295 | if (Depth >= MAX_DEPTH) | |||
5296 | return false; | |||
5297 | ||||
5298 | // The input registers for PHI are operand 1, 3, ... | |||
5299 | // The input registers for others are operand 1 and 2. | |||
5300 | unsigned E = 3, D = 1; | |||
5301 | if (MI.getOpcode() == PPC::PHI) { | |||
5302 | E = MI.getNumOperands(); | |||
5303 | D = 2; | |||
5304 | } | |||
5305 | ||||
5306 | for (unsigned I = 1; I != E; I += D) { | |||
5307 | if (MI.getOperand(I).isReg()) { | |||
5308 | Register SrcReg = MI.getOperand(I).getReg(); | |||
5309 | if (!Register::isVirtualRegister(SrcReg)) | |||
5310 | return false; | |||
5311 | const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); | |||
5312 | if (SrcMI == NULL__null || !isSignOrZeroExtended(*SrcMI, SignExt, Depth+1)) | |||
5313 | return false; | |||
5314 | } | |||
5315 | else | |||
5316 | return false; | |||
5317 | } | |||
5318 | return true; | |||
5319 | } | |||
5320 | ||||
5321 | // If at least one of the incoming values of an AND is zero extended | |||
5322 | // then the output is also zero-extended. If both of the incoming values | |||
5323 | // are sign-extended then the output is also sign extended. | |||
5324 | case PPC::AND: | |||
5325 | case PPC::AND8: { | |||
5326 | if (Depth >= MAX_DEPTH) | |||
5327 | return false; | |||
5328 | ||||
5329 | assert(MI.getOperand(1).isReg() && MI.getOperand(2).isReg())(static_cast<void> (0)); | |||
5330 | ||||
5331 | Register SrcReg1 = MI.getOperand(1).getReg(); | |||
5332 | Register SrcReg2 = MI.getOperand(2).getReg(); | |||
5333 | ||||
5334 | if (!Register::isVirtualRegister(SrcReg1) || | |||
5335 | !Register::isVirtualRegister(SrcReg2)) | |||
5336 | return false; | |||
5337 | ||||
5338 | const MachineInstr *MISrc1 = MRI->getVRegDef(SrcReg1); | |||
5339 | const MachineInstr *MISrc2 = MRI->getVRegDef(SrcReg2); | |||
5340 | if (!MISrc1 || !MISrc2) | |||
5341 | return false; | |||
5342 | ||||
5343 | if(SignExt) | |||
5344 | return isSignOrZeroExtended(*MISrc1, SignExt, Depth+1) && | |||
5345 | isSignOrZeroExtended(*MISrc2, SignExt, Depth+1); | |||
5346 | else | |||
5347 | return isSignOrZeroExtended(*MISrc1, SignExt, Depth+1) || | |||
5348 | isSignOrZeroExtended(*MISrc2, SignExt, Depth+1); | |||
5349 | } | |||
5350 | ||||
5351 | default: | |||
5352 | break; | |||
5353 | } | |||
5354 | return false; | |||
5355 | } | |||
5356 | ||||
5357 | bool PPCInstrInfo::isBDNZ(unsigned Opcode) const { | |||
5358 | return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ)); | |||
5359 | } | |||
5360 | ||||
5361 | namespace { | |||
5362 | class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { | |||
5363 | MachineInstr *Loop, *EndLoop, *LoopCount; | |||
5364 | MachineFunction *MF; | |||
5365 | const TargetInstrInfo *TII; | |||
5366 | int64_t TripCount; | |||
5367 | ||||
5368 | public: | |||
5369 | PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop, | |||
5370 | MachineInstr *LoopCount) | |||
5371 | : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount), | |||
5372 | MF(Loop->getParent()->getParent()), | |||
5373 | TII(MF->getSubtarget().getInstrInfo()) { | |||
5374 | // Inspect the Loop instruction up-front, as it may be deleted when we call | |||
5375 | // createTripCountGreaterCondition. | |||
5376 | if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) | |||
5377 | TripCount = LoopCount->getOperand(1).getImm(); | |||
5378 | else | |||
5379 | TripCount = -1; | |||
5380 | } | |||
5381 | ||||
5382 | bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { | |||
5383 | // Only ignore the terminator. | |||
5384 | return MI == EndLoop; | |||
5385 | } | |||
5386 | ||||
5387 | Optional<bool> | |||
5388 | createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, | |||
5389 | SmallVectorImpl<MachineOperand> &Cond) override { | |||
5390 | if (TripCount == -1) { | |||
5391 | // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, | |||
5392 | // so we don't need to generate any thing here. | |||
5393 | Cond.push_back(MachineOperand::CreateImm(0)); | |||
5394 | Cond.push_back(MachineOperand::CreateReg( | |||
5395 | MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR, | |||
5396 | true)); | |||
5397 | return {}; | |||
5398 | } | |||
5399 | ||||
5400 | return TripCount > TC; | |||
5401 | } | |||
5402 | ||||
5403 | void setPreheader(MachineBasicBlock *NewPreheader) override { | |||
5404 | // Do nothing. We want the LOOP setup instruction to stay in the *old* | |||
5405 | // preheader, so we can use BDZ in the prologs to adapt the loop trip count. | |||
5406 | } | |||
5407 | ||||
5408 | void adjustTripCount(int TripCountAdjust) override { | |||
5409 | // If the loop trip count is a compile-time value, then just change the | |||
5410 | // value. | |||
5411 | if (LoopCount->getOpcode() == PPC::LI8 || | |||
5412 | LoopCount->getOpcode() == PPC::LI) { | |||
5413 | int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust; | |||
5414 | LoopCount->getOperand(1).setImm(TripCount); | |||
5415 | return; | |||
5416 | } | |||
5417 | ||||
5418 | // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, | |||
5419 | // so we don't need to generate any thing here. | |||
5420 | } | |||
5421 | ||||
5422 | void disposed() override { | |||
5423 | Loop->eraseFromParent(); | |||
5424 | // Ensure the loop setup instruction is deleted too. | |||
5425 | LoopCount->eraseFromParent(); | |||
5426 | } | |||
5427 | }; | |||
5428 | } // namespace | |||
5429 | ||||
5430 | std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> | |||
5431 | PPCInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { | |||
5432 | // We really "analyze" only hardware loops right now. | |||
5433 | MachineBasicBlock::iterator I = LoopBB->getFirstTerminator(); | |||
5434 | MachineBasicBlock *Preheader = *LoopBB->pred_begin(); | |||
5435 | if (Preheader == LoopBB) | |||
5436 | Preheader = *std::next(LoopBB->pred_begin()); | |||
5437 | MachineFunction *MF = Preheader->getParent(); | |||
5438 | ||||
5439 | if (I != LoopBB->end() && isBDNZ(I->getOpcode())) { | |||
5440 | SmallPtrSet<MachineBasicBlock *, 8> Visited; | |||
5441 | if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) { | |||
5442 | Register LoopCountReg = LoopInst->getOperand(0).getReg(); | |||
5443 | MachineRegisterInfo &MRI = MF->getRegInfo(); | |||
5444 | MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg); | |||
5445 | return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount); | |||
5446 | } | |||
5447 | } | |||
5448 | return nullptr; | |||
5449 | } | |||
5450 | ||||
5451 | MachineInstr *PPCInstrInfo::findLoopInstr( | |||
5452 | MachineBasicBlock &PreHeader, | |||
5453 | SmallPtrSet<MachineBasicBlock *, 8> &Visited) const { | |||
5454 | ||||
5455 | unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop); | |||
5456 | ||||
5457 | // The loop set-up instruction should be in preheader | |||
5458 | for (auto &I : PreHeader.instrs()) | |||
5459 | if (I.getOpcode() == LOOPi) | |||
5460 | return &I; | |||
5461 | return nullptr; | |||
5462 | } | |||
5463 | ||||
5464 | // Return true if get the base operand, byte offset of an instruction and the | |||
5465 | // memory width. Width is the size of memory that is being loaded/stored. | |||
5466 | bool PPCInstrInfo::getMemOperandWithOffsetWidth( | |||
5467 | const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, | |||
5468 | unsigned &Width, const TargetRegisterInfo *TRI) const { | |||
5469 | if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3) | |||
5470 | return false; | |||
5471 | ||||
5472 | // Handle only loads/stores with base register followed by immediate offset. | |||
5473 | if (!LdSt.getOperand(1).isImm() || | |||
5474 | (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI())) | |||
5475 | return false; | |||
5476 | if (!LdSt.getOperand(1).isImm() || | |||
5477 | (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI())) | |||
5478 | return false; | |||
5479 | ||||
5480 | if (!LdSt.hasOneMemOperand()) | |||
5481 | return false; | |||
5482 | ||||
5483 | Width = (*LdSt.memoperands_begin())->getSize(); | |||
5484 | Offset = LdSt.getOperand(1).getImm(); | |||
5485 | BaseReg = &LdSt.getOperand(2); | |||
5486 | return true; | |||
5487 | } | |||
5488 | ||||
5489 | bool PPCInstrInfo::areMemAccessesTriviallyDisjoint( | |||
5490 | const MachineInstr &MIa, const MachineInstr &MIb) const { | |||
5491 | assert(MIa.mayLoadOrStore() && "MIa must be a load or store.")(static_cast<void> (0)); | |||
5492 | assert(MIb.mayLoadOrStore() && "MIb must be a load or store.")(static_cast<void> (0)); | |||
5493 | ||||
5494 | if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || | |||
5495 | MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) | |||
5496 | return false; | |||
5497 | ||||
5498 | // Retrieve the base register, offset from the base register and width. Width | |||
5499 | // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If | |||
5500 | // base registers are identical, and the offset of a lower memory access + | |||
5501 | // the width doesn't overlap the offset of a higher memory access, | |||
5502 | // then the memory accesses are different. | |||
5503 | const TargetRegisterInfo *TRI = &getRegisterInfo(); | |||
5504 | const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; | |||
5505 | int64_t OffsetA = 0, OffsetB = 0; | |||
5506 | unsigned int WidthA = 0, WidthB = 0; | |||
5507 | if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && | |||
5508 | getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { | |||
5509 | if (BaseOpA->isIdenticalTo(*BaseOpB)) { | |||
5510 | int LowOffset = std::min(OffsetA, OffsetB); | |||
5511 | int HighOffset = std::max(OffsetA, OffsetB); | |||
5512 | int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; | |||
5513 | if (LowOffset + LowWidth <= HighOffset) | |||
5514 | return true; | |||
5515 | } | |||
5516 | } | |||
5517 | return false; | |||
5518 | } |