Line data Source code
1 : //===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : ///
10 : /// \file
11 : /// This file implements a register stacking pass.
12 : ///
13 : /// This pass reorders instructions to put register uses and defs in an order
14 : /// such that they form single-use expression trees. Registers fitting this form
15 : /// are then marked as "stackified", meaning references to them are replaced by
16 : /// "push" and "pop" from the value stack.
17 : ///
18 : /// This is primarily a code size optimization, since temporary values on the
19 : /// value stack don't need to be named.
20 : ///
21 : //===----------------------------------------------------------------------===//
22 :
23 : #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
24 : #include "WebAssembly.h"
25 : #include "WebAssemblyMachineFunctionInfo.h"
26 : #include "WebAssemblySubtarget.h"
27 : #include "WebAssemblyUtilities.h"
28 : #include "llvm/ADT/SmallPtrSet.h"
29 : #include "llvm/Analysis/AliasAnalysis.h"
30 : #include "llvm/CodeGen/LiveIntervals.h"
31 : #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
32 : #include "llvm/CodeGen/MachineDominators.h"
33 : #include "llvm/CodeGen/MachineInstrBuilder.h"
34 : #include "llvm/CodeGen/MachineModuleInfoImpls.h"
35 : #include "llvm/CodeGen/MachineRegisterInfo.h"
36 : #include "llvm/CodeGen/Passes.h"
37 : #include "llvm/Support/Debug.h"
38 : #include "llvm/Support/raw_ostream.h"
39 : using namespace llvm;
40 :
41 : #define DEBUG_TYPE "wasm-reg-stackify"
42 :
43 : namespace {
44 : class WebAssemblyRegStackify final : public MachineFunctionPass {
45 298 : StringRef getPassName() const override {
46 298 : return "WebAssembly Register Stackify";
47 : }
48 :
49 298 : void getAnalysisUsage(AnalysisUsage &AU) const override {
50 298 : AU.setPreservesCFG();
51 : AU.addRequired<AAResultsWrapperPass>();
52 : AU.addRequired<MachineDominatorTree>();
53 : AU.addRequired<LiveIntervals>();
54 : AU.addPreserved<MachineBlockFrequencyInfo>();
55 : AU.addPreserved<SlotIndexes>();
56 : AU.addPreserved<LiveIntervals>();
57 298 : AU.addPreservedID(LiveVariablesID);
58 : AU.addPreserved<MachineDominatorTree>();
59 298 : MachineFunctionPass::getAnalysisUsage(AU);
60 298 : }
61 :
62 : bool runOnMachineFunction(MachineFunction &MF) override;
63 :
64 : public:
65 : static char ID; // Pass identification, replacement for typeid
66 298 : WebAssemblyRegStackify() : MachineFunctionPass(ID) {}
67 : };
68 : } // end anonymous namespace
69 :
70 : char WebAssemblyRegStackify::ID = 0;
71 199030 : INITIALIZE_PASS(WebAssemblyRegStackify, DEBUG_TYPE,
72 : "Reorder instructions to use the WebAssembly value stack",
73 : false, false)
74 :
75 298 : FunctionPass *llvm::createWebAssemblyRegStackify() {
76 298 : return new WebAssemblyRegStackify();
77 : }
78 :
79 : // Decorate the given instruction with implicit operands that enforce the
80 : // expression stack ordering constraints for an instruction which is on
81 : // the expression stack.
82 17137 : static void ImposeStackOrdering(MachineInstr *MI) {
83 : // Write the opaque VALUE_STACK register.
84 17137 : if (!MI->definesRegister(WebAssembly::VALUE_STACK))
85 17137 : MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
86 : /*isDef=*/true,
87 : /*isImp=*/true));
88 :
89 : // Also read the opaque VALUE_STACK register.
90 17137 : if (!MI->readsRegister(WebAssembly::VALUE_STACK))
91 17137 : MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
92 : /*isDef=*/false,
93 : /*isImp=*/true));
94 17137 : }
95 :
96 : // Convert an IMPLICIT_DEF instruction into an instruction which defines
97 : // a constant zero value.
98 1 : static void ConvertImplicitDefToConstZero(MachineInstr *MI,
99 : MachineRegisterInfo &MRI,
100 : const TargetInstrInfo *TII,
101 : MachineFunction &MF) {
102 : assert(MI->getOpcode() == TargetOpcode::IMPLICIT_DEF);
103 :
104 1 : const auto *RegClass = MRI.getRegClass(MI->getOperand(0).getReg());
105 1 : if (RegClass == &WebAssembly::I32RegClass) {
106 1 : MI->setDesc(TII->get(WebAssembly::CONST_I32));
107 1 : MI->addOperand(MachineOperand::CreateImm(0));
108 0 : } else if (RegClass == &WebAssembly::I64RegClass) {
109 0 : MI->setDesc(TII->get(WebAssembly::CONST_I64));
110 0 : MI->addOperand(MachineOperand::CreateImm(0));
111 0 : } else if (RegClass == &WebAssembly::F32RegClass) {
112 0 : MI->setDesc(TII->get(WebAssembly::CONST_F32));
113 0 : ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
114 0 : Type::getFloatTy(MF.getFunction().getContext())));
115 0 : MI->addOperand(MachineOperand::CreateFPImm(Val));
116 0 : } else if (RegClass == &WebAssembly::F64RegClass) {
117 0 : MI->setDesc(TII->get(WebAssembly::CONST_F64));
118 0 : ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
119 0 : Type::getDoubleTy(MF.getFunction().getContext())));
120 0 : MI->addOperand(MachineOperand::CreateFPImm(Val));
121 : } else {
122 0 : llvm_unreachable("Unexpected reg class");
123 : }
124 1 : }
125 :
126 : // Determine whether a call to the callee referenced by
127 : // MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
128 : // effects.
129 171 : static void QueryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
130 : bool &Write, bool &Effects, bool &StackPointer) {
131 : // All calls can use the stack pointer.
132 171 : StackPointer = true;
133 :
134 171 : const MachineOperand &MO = MI.getOperand(CalleeOpNo);
135 171 : if (MO.isGlobal()) {
136 129 : const Constant *GV = MO.getGlobal();
137 : if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
138 : if (!GA->isInterposable())
139 : GV = GA->getAliasee();
140 :
141 : if (const Function *F = dyn_cast<Function>(GV)) {
142 124 : if (!F->doesNotThrow())
143 120 : Effects = true;
144 124 : if (F->doesNotAccessMemory())
145 : return;
146 122 : if (F->onlyReadsMemory()) {
147 2 : Read = true;
148 2 : return;
149 : }
150 : }
151 : }
152 :
153 : // Assume the worst.
154 167 : Write = true;
155 167 : Read = true;
156 167 : Effects = true;
157 : }
158 :
159 : // Determine whether MI reads memory, writes memory, has side effects,
160 : // and/or uses the stack pointer value.
161 14869 : static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
162 : bool &Write, bool &Effects, bool &StackPointer) {
163 : assert(!MI.isTerminator());
164 :
165 : if (MI.isDebugInstr() || MI.isPosition())
166 : return;
167 :
168 : // Check for loads.
169 14859 : if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(&AA))
170 1469 : Read = true;
171 :
172 : // Check for stores.
173 14859 : if (MI.mayStore()) {
174 317 : Write = true;
175 :
176 : // Check for stores to __stack_pointer.
177 582 : for (auto MMO : MI.memoperands()) {
178 : const MachinePointerInfo &MPI = MMO->getPointerInfo();
179 265 : if (MPI.V.is<const PseudoSourceValue *>()) {
180 : auto PSV = MPI.V.get<const PseudoSourceValue *>();
181 : if (const ExternalSymbolPseudoSourceValue *EPSV =
182 : dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
183 0 : if (StringRef(EPSV->getSymbol()) == "__stack_pointer") {
184 0 : StackPointer = true;
185 : }
186 : }
187 : }
188 14542 : } else if (MI.hasOrderedMemoryRef()) {
189 790 : switch (MI.getOpcode()) {
190 : case WebAssembly::DIV_S_I32:
191 : case WebAssembly::DIV_S_I64:
192 : case WebAssembly::REM_S_I32:
193 : case WebAssembly::REM_S_I64:
194 : case WebAssembly::DIV_U_I32:
195 : case WebAssembly::DIV_U_I64:
196 : case WebAssembly::REM_U_I32:
197 : case WebAssembly::REM_U_I64:
198 : case WebAssembly::I32_TRUNC_S_F32:
199 : case WebAssembly::I64_TRUNC_S_F32:
200 : case WebAssembly::I32_TRUNC_S_F64:
201 : case WebAssembly::I64_TRUNC_S_F64:
202 : case WebAssembly::I32_TRUNC_U_F32:
203 : case WebAssembly::I64_TRUNC_U_F32:
204 : case WebAssembly::I32_TRUNC_U_F64:
205 : case WebAssembly::I64_TRUNC_U_F64:
206 : // These instruction have hasUnmodeledSideEffects() returning true
207 : // because they trap on overflow and invalid so they can't be arbitrarily
208 : // moved, however hasOrderedMemoryRef() interprets this plus their lack
209 : // of memoperands as having a potential unknown memory reference.
210 : break;
211 : default:
212 : // Record volatile accesses, unless it's a call, as calls are handled
213 : // specially below.
214 330 : if (!MI.isCall()) {
215 159 : Write = true;
216 159 : Effects = true;
217 : }
218 : break;
219 : }
220 : }
221 :
222 : // Check for side effects.
223 14859 : if (MI.hasUnmodeledSideEffects()) {
224 240 : switch (MI.getOpcode()) {
225 : case WebAssembly::DIV_S_I32:
226 : case WebAssembly::DIV_S_I64:
227 : case WebAssembly::REM_S_I32:
228 : case WebAssembly::REM_S_I64:
229 : case WebAssembly::DIV_U_I32:
230 : case WebAssembly::DIV_U_I64:
231 : case WebAssembly::REM_U_I32:
232 : case WebAssembly::REM_U_I64:
233 : case WebAssembly::I32_TRUNC_S_F32:
234 : case WebAssembly::I64_TRUNC_S_F32:
235 : case WebAssembly::I32_TRUNC_S_F64:
236 : case WebAssembly::I64_TRUNC_S_F64:
237 : case WebAssembly::I32_TRUNC_U_F32:
238 : case WebAssembly::I64_TRUNC_U_F32:
239 : case WebAssembly::I32_TRUNC_U_F64:
240 : case WebAssembly::I64_TRUNC_U_F64:
241 : // These instructions have hasUnmodeledSideEffects() returning true
242 : // because they trap on overflow and invalid so they can't be arbitrarily
243 : // moved, however in the specific case of register stackifying, it is safe
244 : // to move them because overflow and invalid are Undefined Behavior.
245 : break;
246 55 : default:
247 55 : Effects = true;
248 55 : break;
249 : }
250 : }
251 :
252 : // Analyze calls.
253 14859 : if (MI.isCall()) {
254 171 : unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI);
255 171 : QueryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer);
256 : }
257 : }
258 :
259 : // Test whether Def is safe and profitable to rematerialize.
260 2861 : static bool ShouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
261 : const WebAssemblyInstrInfo *TII) {
262 2861 : return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA);
263 : }
264 :
265 : // Identify the definition for this register at this point. This is a
266 : // generalization of MachineRegisterInfo::getUniqueVRegDef that uses
267 : // LiveIntervals to handle complex cases.
268 22872 : static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
269 : const MachineRegisterInfo &MRI,
270 : const LiveIntervals &LIS) {
271 : // Most registers are in SSA form here so we try a quick MRI query first.
272 22872 : if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
273 : return Def;
274 :
275 : // MRI doesn't know what the Def is. Try asking LIS.
276 244 : if (const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
277 : LIS.getInstructionIndex(*Insert)))
278 : return LIS.getInstructionFromIndex(ValNo->def);
279 :
280 : return nullptr;
281 : }
282 :
283 : // Test whether Reg, as defined at Def, has exactly one use. This is a
284 : // generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
285 : // to handle complex cases.
286 0 : static bool HasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI,
287 : MachineDominatorTree &MDT, LiveIntervals &LIS) {
288 : // Most registers are in SSA form here so we try a quick MRI query first.
289 0 : if (MRI.hasOneUse(Reg))
290 0 : return true;
291 :
292 : bool HasOne = false;
293 0 : const LiveInterval &LI = LIS.getInterval(Reg);
294 : const VNInfo *DefVNI =
295 0 : LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot());
296 : assert(DefVNI);
297 0 : for (auto &I : MRI.use_nodbg_operands(Reg)) {
298 0 : const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
299 0 : if (Result.valueIn() == DefVNI) {
300 0 : if (!Result.isKill())
301 0 : return false;
302 0 : if (HasOne)
303 0 : return false;
304 : HasOne = true;
305 : }
306 : }
307 : return HasOne;
308 : }
309 :
310 : // Test whether it's safe to move Def to just before Insert.
311 : // TODO: Compute memory dependencies in a way that doesn't require always
312 : // walking the block.
313 : // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
314 : // more precise.
315 13159 : static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
316 : AliasAnalysis &AA, const MachineRegisterInfo &MRI) {
317 : assert(Def->getParent() == Insert->getParent());
318 :
319 : // Check for register dependencies.
320 : SmallVector<unsigned, 4> MutableRegisters;
321 62876 : for (const MachineOperand &MO : Def->operands()) {
322 49717 : if (!MO.isReg() || MO.isUndef())
323 20309 : continue;
324 42806 : unsigned Reg = MO.getReg();
325 :
326 : // If the register is dead here and at Insert, ignore it.
327 66681 : if (MO.isDead() && Insert->definesRegister(Reg) &&
328 11935 : !Insert->readsRegister(Reg))
329 : continue;
330 :
331 61742 : if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
332 : // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
333 : // from moving down, and we've already checked for that.
334 1463 : if (Reg == WebAssembly::ARGUMENTS)
335 : continue;
336 : // If the physical register is never modified, ignore it.
337 242 : if (!MRI.isPhysRegModified(Reg))
338 : continue;
339 : // Otherwise, it's a physical register with unknown liveness.
340 0 : return false;
341 : }
342 :
343 : // If one of the operands isn't in SSA form, it has different values at
344 : // different times, and we need to make sure we don't move our use across
345 : // a different def.
346 29408 : if (!MO.isDef() && !MRI.hasOneDef(Reg))
347 198 : MutableRegisters.push_back(Reg);
348 : }
349 :
350 13159 : bool Read = false, Write = false, Effects = false, StackPointer = false;
351 13159 : Query(*Def, AA, Read, Write, Effects, StackPointer);
352 :
353 : // If the instruction does not access memory and has no side effects, it has
354 : // no additional dependencies.
355 13159 : bool HasMutableRegisters = !MutableRegisters.empty();
356 13159 : if (!Read && !Write && !Effects && !StackPointer && !HasMutableRegisters)
357 : return true;
358 :
359 : // Scan through the intervening instructions between Def and Insert.
360 : MachineBasicBlock::const_iterator D(Def), I(Insert);
361 3035 : for (--I; I != D; --I) {
362 1710 : bool InterveningRead = false;
363 1710 : bool InterveningWrite = false;
364 1710 : bool InterveningEffects = false;
365 1710 : bool InterveningStackPointer = false;
366 1710 : Query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects,
367 : InterveningStackPointer);
368 1710 : if (Effects && InterveningEffects)
369 118 : return false;
370 1691 : if (Read && InterveningWrite)
371 : return false;
372 1640 : if (Write && (InterveningRead || InterveningWrite))
373 : return false;
374 1638 : if (StackPointer && InterveningStackPointer)
375 : return false;
376 :
377 1795 : for (unsigned Reg : MutableRegisters)
378 774 : for (const MachineOperand &MO : I->operands())
379 617 : if (MO.isReg() && MO.isDef() && MO.getReg() == Reg)
380 : return false;
381 : }
382 :
383 : return true;
384 : }
385 :
386 : /// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
387 0 : static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
388 : const MachineBasicBlock &MBB,
389 : const MachineRegisterInfo &MRI,
390 : const MachineDominatorTree &MDT,
391 : LiveIntervals &LIS,
392 : WebAssemblyFunctionInfo &MFI) {
393 0 : const LiveInterval &LI = LIS.getInterval(Reg);
394 :
395 0 : const MachineInstr *OneUseInst = OneUse.getParent();
396 0 : VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst));
397 :
398 0 : for (const MachineOperand &Use : MRI.use_nodbg_operands(Reg)) {
399 0 : if (&Use == &OneUse)
400 0 : continue;
401 :
402 0 : const MachineInstr *UseInst = Use.getParent();
403 0 : VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst));
404 :
405 0 : if (UseVNI != OneUseVNI)
406 0 : continue;
407 :
408 0 : const MachineInstr *OneUseInst = OneUse.getParent();
409 0 : if (UseInst == OneUseInst) {
410 : // Another use in the same instruction. We need to ensure that the one
411 : // selected use happens "before" it.
412 0 : if (&OneUse > &Use)
413 0 : return false;
414 : } else {
415 : // Test that the use is dominated by the one selected use.
416 0 : while (!MDT.dominates(OneUseInst, UseInst)) {
417 : // Actually, dominating is over-conservative. Test that the use would
418 : // happen after the one selected use in the stack evaluation order.
419 : //
420 : // This is needed as a consequence of using implicit get_locals for
421 : // uses and implicit set_locals for defs.
422 0 : if (UseInst->getDesc().getNumDefs() == 0)
423 0 : return false;
424 0 : const MachineOperand &MO = UseInst->getOperand(0);
425 0 : if (!MO.isReg())
426 0 : return false;
427 0 : unsigned DefReg = MO.getReg();
428 0 : if (!TargetRegisterInfo::isVirtualRegister(DefReg) ||
429 : !MFI.isVRegStackified(DefReg))
430 0 : return false;
431 : assert(MRI.hasOneNonDBGUse(DefReg));
432 0 : const MachineOperand &NewUse = *MRI.use_nodbg_begin(DefReg);
433 0 : const MachineInstr *NewUseInst = NewUse.getParent();
434 0 : if (NewUseInst == OneUseInst) {
435 0 : if (&OneUse > &NewUse)
436 0 : return false;
437 : break;
438 : }
439 : UseInst = NewUseInst;
440 : }
441 : }
442 : }
443 : return true;
444 : }
445 :
446 : /// Get the appropriate tee opcode for the given register class.
447 : static unsigned GetTeeOpcode(const TargetRegisterClass *RC) {
448 243 : if (RC == &WebAssembly::I32RegClass)
449 : return WebAssembly::TEE_I32;
450 37 : if (RC == &WebAssembly::I64RegClass)
451 : return WebAssembly::TEE_I64;
452 6 : if (RC == &WebAssembly::F32RegClass)
453 : return WebAssembly::TEE_F32;
454 6 : if (RC == &WebAssembly::F64RegClass)
455 : return WebAssembly::TEE_F64;
456 0 : if (RC == &WebAssembly::V128RegClass)
457 : return WebAssembly::TEE_V128;
458 0 : llvm_unreachable("Unexpected register class");
459 : }
460 :
461 : // Shrink LI to its uses, cleaning up LI.
462 1550 : static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
463 1550 : if (LIS.shrinkToUses(&LI)) {
464 : SmallVector<LiveInterval *, 4> SplitLIs;
465 0 : LIS.splitSeparateComponents(LI, SplitLIs);
466 : }
467 1550 : }
468 :
469 11039 : static void MoveDebugValues(unsigned Reg, MachineInstr *Insert,
470 : MachineBasicBlock &MBB, MachineRegisterInfo &MRI) {
471 33661 : for (auto &Op : MRI.reg_operands(Reg)) {
472 22622 : MachineInstr *MI = Op.getParent();
473 : assert(MI != nullptr);
474 22622 : if (MI->isDebugValue() && MI->getParent() == &MBB)
475 6 : MBB.splice(Insert, &MBB, MI);
476 : }
477 11039 : }
478 :
479 162 : static void UpdateDebugValuesReg(unsigned Reg, unsigned NewReg,
480 : MachineBasicBlock &MBB,
481 : MachineRegisterInfo &MRI) {
482 192 : for (auto &Op : MRI.reg_operands(Reg)) {
483 30 : MachineInstr *MI = Op.getParent();
484 : assert(MI != nullptr);
485 30 : if (MI->isDebugValue() && MI->getParent() == &MBB)
486 5 : Op.setReg(NewReg);
487 : }
488 162 : }
489 :
490 : /// A single-use def in the same block with no intervening memory or register
491 : /// dependencies; move the def down and nest it with the current instruction.
492 10647 : static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand &Op,
493 : MachineInstr *Def, MachineBasicBlock &MBB,
494 : MachineInstr *Insert, LiveIntervals &LIS,
495 : WebAssemblyFunctionInfo &MFI,
496 : MachineRegisterInfo &MRI) {
497 : LLVM_DEBUG(dbgs() << "Move for single use: "; Def->dump());
498 :
499 10647 : MBB.splice(Insert, &MBB, Def);
500 10647 : MoveDebugValues(Reg, Insert, MBB, MRI);
501 10647 : LIS.handleMove(*Def);
502 :
503 10647 : if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) {
504 : // No one else is using this register for anything so we can just stackify
505 : // it in place.
506 10634 : MFI.stackifyVReg(Reg);
507 : } else {
508 : // The register may have unrelated uses or defs; create a new register for
509 : // just our one def and use so that we can stackify it.
510 13 : unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
511 13 : Def->getOperand(0).setReg(NewReg);
512 13 : Op.setReg(NewReg);
513 :
514 : // Tell LiveIntervals about the new register.
515 : LIS.createAndComputeVirtRegInterval(NewReg);
516 :
517 : // Tell LiveIntervals about the changes to the old register.
518 13 : LiveInterval &LI = LIS.getInterval(Reg);
519 13 : LI.removeSegment(LIS.getInstructionIndex(*Def).getRegSlot(),
520 13 : LIS.getInstructionIndex(*Op.getParent()).getRegSlot(),
521 : /*RemoveDeadValNo=*/true);
522 :
523 13 : MFI.stackifyVReg(NewReg);
524 :
525 13 : UpdateDebugValuesReg(Reg, NewReg, MBB, MRI);
526 :
527 : LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
528 : }
529 :
530 10647 : ImposeStackOrdering(Def);
531 10647 : return Def;
532 : }
533 :
534 1793 : static void CloneDebugValues(unsigned Reg, MachineInstr *Insert,
535 : unsigned TargetReg, MachineBasicBlock &MBB,
536 : MachineRegisterInfo &MRI,
537 : const WebAssemblyInstrInfo *TII) {
538 : SmallPtrSet<MachineInstr *, 4> Instrs;
539 17214 : for (auto &Op : MRI.reg_operands(Reg)) {
540 15421 : MachineInstr *MI = Op.getParent();
541 : assert(MI != nullptr);
542 15421 : if (MI->isDebugValue() && MI->getParent() == &MBB &&
543 15421 : Instrs.find(MI) == Instrs.end())
544 2 : Instrs.insert(MI);
545 : }
546 1795 : for (const auto &MI : Instrs) {
547 4 : MachineInstr &Clone = TII->duplicate(MBB, Insert, *MI);
548 10 : for (unsigned i = 0, e = Clone.getNumOperands(); i != e; ++i) {
549 8 : MachineOperand &MO = Clone.getOperand(i);
550 8 : if (MO.isReg() && MO.getReg() == Reg)
551 2 : MO.setReg(TargetReg);
552 : }
553 : LLVM_DEBUG(dbgs() << " - - Cloned DBG_VALUE: "; Clone.dump());
554 : }
555 1793 : }
556 :
557 : /// A trivially cloneable instruction; clone it and nest the new copy with the
558 : /// current instruction.
559 1456 : static MachineInstr *RematerializeCheapDef(
560 : unsigned Reg, MachineOperand &Op, MachineInstr &Def, MachineBasicBlock &MBB,
561 : MachineBasicBlock::instr_iterator Insert, LiveIntervals &LIS,
562 : WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI,
563 : const WebAssemblyInstrInfo *TII, const WebAssemblyRegisterInfo *TRI) {
564 : LLVM_DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump());
565 : LLVM_DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());
566 :
567 1456 : unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
568 2912 : TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
569 1456 : Op.setReg(NewReg);
570 : MachineInstr *Clone = &*std::prev(Insert);
571 1456 : LIS.InsertMachineInstrInMaps(*Clone);
572 : LIS.createAndComputeVirtRegInterval(NewReg);
573 1456 : MFI.stackifyVReg(NewReg);
574 1456 : ImposeStackOrdering(Clone);
575 :
576 : LLVM_DEBUG(dbgs() << " - Cloned to "; Clone->dump());
577 :
578 : // Shrink the interval.
579 : bool IsDead = MRI.use_empty(Reg);
580 1456 : if (!IsDead) {
581 1307 : LiveInterval &LI = LIS.getInterval(Reg);
582 1307 : ShrinkToUses(LI, LIS);
583 1307 : IsDead = !LI.liveAt(LIS.getInstructionIndex(Def).getDeadSlot());
584 : }
585 :
586 : // If that was the last use of the original, delete the original.
587 : // Move or clone corresponding DBG_VALUEs to the 'Insert' location.
588 1456 : if (IsDead) {
589 : LLVM_DEBUG(dbgs() << " - Deleting original\n");
590 149 : SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot();
591 149 : LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
592 149 : LIS.removeInterval(Reg);
593 149 : LIS.RemoveMachineInstrFromMaps(Def);
594 149 : Def.eraseFromParent();
595 :
596 149 : MoveDebugValues(Reg, &*Insert, MBB, MRI);
597 149 : UpdateDebugValuesReg(Reg, NewReg, MBB, MRI);
598 : } else {
599 1307 : CloneDebugValues(Reg, &*Insert, NewReg, MBB, MRI, TII);
600 : }
601 :
602 1456 : return Clone;
603 : }
604 :
605 : /// A multiple-use def in the same block with no intervening memory or register
606 : /// dependencies; move the def down, nest it with the current instruction, and
607 : /// insert a tee to satisfy the rest of the uses. As an illustration, rewrite
608 : /// this:
609 : ///
610 : /// Reg = INST ... // Def
611 : /// INST ..., Reg, ... // Insert
612 : /// INST ..., Reg, ...
613 : /// INST ..., Reg, ...
614 : ///
615 : /// to this:
616 : ///
617 : /// DefReg = INST ... // Def (to become the new Insert)
618 : /// TeeReg, Reg = TEE_... DefReg
619 : /// INST ..., TeeReg, ... // Insert
620 : /// INST ..., Reg, ...
621 : /// INST ..., Reg, ...
622 : ///
623 : /// with DefReg and TeeReg stackified. This eliminates a get_local from the
624 : /// resulting code.
625 243 : static MachineInstr *MoveAndTeeForMultiUse(
626 : unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
627 : MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
628 : MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
629 : LLVM_DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
630 :
631 : // Move Def into place.
632 243 : MBB.splice(Insert, &MBB, Def);
633 243 : LIS.handleMove(*Def);
634 :
635 : // Create the Tee and attach the registers.
636 : const auto *RegClass = MRI.getRegClass(Reg);
637 243 : unsigned TeeReg = MRI.createVirtualRegister(RegClass);
638 243 : unsigned DefReg = MRI.createVirtualRegister(RegClass);
639 243 : MachineOperand &DefMO = Def->getOperand(0);
640 243 : MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
641 243 : TII->get(GetTeeOpcode(RegClass)), TeeReg)
642 243 : .addReg(Reg, RegState::Define)
643 243 : .addReg(DefReg, getUndefRegState(DefMO.isDead()));
644 243 : Op.setReg(TeeReg);
645 243 : DefMO.setReg(DefReg);
646 243 : SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot();
647 243 : SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot();
648 :
649 243 : MoveDebugValues(Reg, Insert, MBB, MRI);
650 :
651 : // Tell LiveIntervals we moved the original vreg def from Def to Tee.
652 243 : LiveInterval &LI = LIS.getInterval(Reg);
653 243 : LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx);
654 243 : VNInfo *ValNo = LI.getVNInfoAt(DefIdx);
655 243 : I->start = TeeIdx;
656 243 : ValNo->def = TeeIdx;
657 243 : ShrinkToUses(LI, LIS);
658 :
659 : // Finish stackifying the new regs.
660 : LIS.createAndComputeVirtRegInterval(TeeReg);
661 : LIS.createAndComputeVirtRegInterval(DefReg);
662 243 : MFI.stackifyVReg(DefReg);
663 243 : MFI.stackifyVReg(TeeReg);
664 243 : ImposeStackOrdering(Def);
665 243 : ImposeStackOrdering(Tee);
666 :
667 243 : CloneDebugValues(Reg, Tee, DefReg, MBB, MRI, TII);
668 243 : CloneDebugValues(Reg, Insert, TeeReg, MBB, MRI, TII);
669 :
670 : LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
671 : LLVM_DEBUG(dbgs() << " - Tee instruction: "; Tee->dump());
672 243 : return Def;
673 : }
674 :
675 : namespace {
676 : /// A stack for walking the tree of instructions being built, visiting the
677 : /// MachineOperands in DFS order.
678 : class TreeWalkerState {
679 : typedef MachineInstr::mop_iterator mop_iterator;
680 : typedef std::reverse_iterator<mop_iterator> mop_reverse_iterator;
681 : typedef iterator_range<mop_reverse_iterator> RangeTy;
682 : SmallVector<RangeTy, 4> Worklist;
683 :
684 : public:
685 13955 : explicit TreeWalkerState(MachineInstr *Insert) {
686 13955 : const iterator_range<mop_iterator> &Range = Insert->explicit_uses();
687 13955 : if (Range.begin() != Range.end())
688 25530 : Worklist.push_back(reverse(Range));
689 13955 : }
690 :
691 57376 : bool Done() const { return Worklist.empty(); }
692 :
693 : MachineOperand &Pop() {
694 : RangeTy &Range = Worklist.back();
695 : MachineOperand &Op = *Range.begin();
696 43421 : Range = drop_begin(Range, 1);
697 43421 : if (Range.begin() == Range.end())
698 : Worklist.pop_back();
699 : assert((Worklist.empty() ||
700 : Worklist.back().begin() != Worklist.back().end()) &&
701 : "Empty ranges shouldn't remain in the worklist");
702 : return Op;
703 : }
704 :
705 : /// Push Instr's operands onto the stack to be visited.
706 12346 : void PushOperands(MachineInstr *Instr) {
707 12346 : const iterator_range<mop_iterator> &Range(Instr->explicit_uses());
708 12346 : if (Range.begin() != Range.end())
709 24692 : Worklist.push_back(reverse(Range));
710 12346 : }
711 :
712 : /// Some of Instr's operands are on the top of the stack; remove them and
713 : /// re-insert them starting from the beginning (because we've commuted them).
714 15 : void ResetTopOperands(MachineInstr *Instr) {
715 : assert(HasRemainingOperands(Instr) &&
716 : "Reseting operands should only be done when the instruction has "
717 : "an operand still on the stack");
718 15 : Worklist.back() = reverse(Instr->explicit_uses());
719 15 : }
720 :
721 : /// Test whether Instr has operands remaining to be visited at the top of
722 : /// the stack.
723 : bool HasRemainingOperands(const MachineInstr *Instr) const {
724 118 : if (Worklist.empty())
725 : return false;
726 : const RangeTy &Range = Worklist.back();
727 92 : return Range.begin() != Range.end() && Range.begin()->getParent() == Instr;
728 : }
729 :
730 : /// Test whether the given register is present on the stack, indicating an
731 : /// operand in the tree that we haven't visited yet. Moving a definition of
732 : /// Reg to a point in the tree after that would change its value.
733 : ///
734 : /// This is needed as a consequence of using implicit get_locals for
735 : /// uses and implicit set_locals for defs.
736 : bool IsOnStack(unsigned Reg) const {
737 31752 : for (const RangeTy &Range : Worklist)
738 54439 : for (const MachineOperand &MO : Range)
739 35728 : if (MO.isReg() && MO.getReg() == Reg)
740 : return true;
741 : return false;
742 : }
743 : };
744 :
745 : /// State to keep track of whether commuting is in flight or whether it's been
746 : /// tried for the current instruction and didn't work.
747 : class CommutingState {
748 : /// There are effectively three states: the initial state where we haven't
749 : /// started commuting anything and we don't know anything yet, the tenative
750 : /// state where we've commuted the operands of the current instruction and are
751 : /// revisting it, and the declined state where we've reverted the operands
752 : /// back to their original order and will no longer commute it further.
753 : bool TentativelyCommuting;
754 : bool Declined;
755 :
756 : /// During the tentative state, these hold the operand indices of the commuted
757 : /// operands.
758 : unsigned Operand0, Operand1;
759 :
760 : public:
761 13955 : CommutingState() : TentativelyCommuting(false), Declined(false) {}
762 :
763 : /// Stackification for an operand was not successful due to ordering
764 : /// constraints. If possible, and if we haven't already tried it and declined
765 : /// it, commute Insert's operands and prepare to revisit it.
766 135 : void MaybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker,
767 : const WebAssemblyInstrInfo *TII) {
768 135 : if (TentativelyCommuting) {
769 : assert(!Declined &&
770 : "Don't decline commuting until you've finished trying it");
771 : // Commuting didn't help. Revert it.
772 9 : TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
773 9 : TentativelyCommuting = false;
774 9 : Declined = true;
775 126 : } else if (!Declined && TreeWalker.HasRemainingOperands(Insert)) {
776 72 : Operand0 = TargetInstrInfo::CommuteAnyOperandIndex;
777 72 : Operand1 = TargetInstrInfo::CommuteAnyOperandIndex;
778 72 : if (TII->findCommutedOpIndices(*Insert, Operand0, Operand1)) {
779 : // Tentatively commute the operands and try again.
780 15 : TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
781 15 : TreeWalker.ResetTopOperands(Insert);
782 15 : TentativelyCommuting = true;
783 15 : Declined = false;
784 : }
785 : }
786 135 : }
787 :
788 : /// Stackification for some operand was successful. Reset to the default
789 : /// state.
790 0 : void Reset() {
791 12346 : TentativelyCommuting = false;
792 12346 : Declined = false;
793 0 : }
794 : };
795 : } // end anonymous namespace
796 :
797 2891 : bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
798 : LLVM_DEBUG(dbgs() << "********** Register Stackifying **********\n"
799 : "********** Function: "
800 : << MF.getName() << '\n');
801 :
802 : bool Changed = false;
803 2891 : MachineRegisterInfo &MRI = MF.getRegInfo();
804 : WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
805 2891 : const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
806 : const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
807 2891 : AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
808 2891 : MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
809 2891 : LiveIntervals &LIS = getAnalysis<LiveIntervals>();
810 :
811 : // Walk the instructions from the bottom up. Currently we don't look past
812 : // block boundaries, and the blocks aren't ordered so the block visitation
813 : // order isn't significant, but we may want to change this in the future.
814 6317 : for (MachineBasicBlock &MBB : MF) {
815 : // Don't use a range-based for loop, because we modify the list as we're
816 : // iterating over it and the end iterator may change.
817 17412 : for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) {
818 : MachineInstr *Insert = &*MII;
819 : // Don't nest anything inside an inline asm, because we don't have
820 : // constraints for $push inputs.
821 27972 : if (Insert->getOpcode() == TargetOpcode::INLINEASM)
822 31 : continue;
823 :
824 : // Ignore debugging intrinsics.
825 13976 : if (Insert->getOpcode() == TargetOpcode::DBG_VALUE)
826 : continue;
827 :
828 : // Iterate through the inputs in reverse order, since we'll be pulling
829 : // operands off the stack in LIFO order.
830 : CommutingState Commuting;
831 13955 : TreeWalkerState TreeWalker(Insert);
832 57376 : while (!TreeWalker.Done()) {
833 : MachineOperand &Op = TreeWalker.Pop();
834 :
835 : // We're only interested in explicit virtual register operands.
836 43421 : if (!Op.isReg())
837 : continue;
838 :
839 22872 : unsigned Reg = Op.getReg();
840 : assert(Op.isUse() && "explicit_uses() should only iterate over uses");
841 : assert(!Op.isImplicit() &&
842 : "explicit_uses() should only iterate over explicit operands");
843 22872 : if (TargetRegisterInfo::isPhysicalRegister(Reg))
844 : continue;
845 :
846 : // Identify the definition for this register at this point.
847 22872 : MachineInstr *Def = GetVRegDef(Reg, Insert, MRI, LIS);
848 22872 : if (!Def)
849 : continue;
850 :
851 : // Don't nest an INLINE_ASM def into anything, because we don't have
852 : // constraints for $pop outputs.
853 45382 : if (Def->getOpcode() == TargetOpcode::INLINEASM)
854 : continue;
855 :
856 : // Argument instructions represent live-in registers and not real
857 : // instructions.
858 22684 : if (WebAssembly::isArgument(*Def))
859 : continue;
860 :
861 : // Decide which strategy to take. Prefer to move a single-use value
862 : // over cloning it, and prefer cloning over introducing a tee.
863 : // For moving, we require the def to be in the same block as the use;
864 : // this makes things simpler (LiveIntervals' handleMove function only
865 : // supports intra-block moves) and it's MachineSink's job to catch all
866 : // the sinking opportunities anyway.
867 13508 : bool SameBlock = Def->getParent() == &MBB;
868 26549 : bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, MRI) &&
869 : !TreeWalker.IsOnStack(Reg);
870 12986 : if (CanMove && HasOneUse(Reg, Def, MRI, MDT, LIS)) {
871 10647 : Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
872 2861 : } else if (ShouldRematerialize(*Def, AA, TII)) {
873 : Insert =
874 1456 : RematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(),
875 : LIS, MFI, MRI, TII, TRI);
876 2534 : } else if (CanMove &&
877 1129 : OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) {
878 243 : Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
879 : MRI, TII);
880 : } else {
881 : // We failed to stackify the operand. If the problem was ordering
882 : // constraints, Commuting may be able to help.
883 1162 : if (!CanMove && SameBlock)
884 135 : Commuting.MaybeCommute(Insert, TreeWalker, TII);
885 : // Proceed to the next operand.
886 1162 : continue;
887 : }
888 :
889 : // If the instruction we just stackified is an IMPLICIT_DEF, convert it
890 : // to a constant 0 so that the def is explicit, and the push/pop
891 : // correspondence is maintained.
892 24692 : if (Insert->getOpcode() == TargetOpcode::IMPLICIT_DEF)
893 1 : ConvertImplicitDefToConstZero(Insert, MRI, TII, MF);
894 :
895 : // We stackified an operand. Add the defining instruction's operands to
896 : // the worklist stack now to continue to build an ever deeper tree.
897 : Commuting.Reset();
898 12346 : TreeWalker.PushOperands(Insert);
899 : }
900 :
901 : // If we stackified any operands, skip over the tree to start looking for
902 : // the next instruction we can build a tree on.
903 13955 : if (Insert != &*MII) {
904 4548 : ImposeStackOrdering(&*MII);
905 : MII = MachineBasicBlock::iterator(Insert).getReverse();
906 : Changed = true;
907 : }
908 : }
909 : }
910 :
911 : // If we used VALUE_STACK anywhere, add it to the live-in sets everywhere so
912 : // that it never looks like a use-before-def.
913 2891 : if (Changed) {
914 2501 : MF.getRegInfo().addLiveIn(WebAssembly::VALUE_STACK);
915 5531 : for (MachineBasicBlock &MBB : MF)
916 : MBB.addLiveIn(WebAssembly::VALUE_STACK);
917 : }
918 :
919 : #ifndef NDEBUG
920 : // Verify that pushes and pops are performed in LIFO order.
921 : SmallVector<unsigned, 0> Stack;
922 : for (MachineBasicBlock &MBB : MF) {
923 : for (MachineInstr &MI : MBB) {
924 : if (MI.isDebugInstr())
925 : continue;
926 : for (MachineOperand &MO : reverse(MI.explicit_operands())) {
927 : if (!MO.isReg())
928 : continue;
929 : unsigned Reg = MO.getReg();
930 :
931 : if (MFI.isVRegStackified(Reg)) {
932 : if (MO.isDef())
933 : Stack.push_back(Reg);
934 : else
935 : assert(Stack.pop_back_val() == Reg &&
936 : "Register stack pop should be paired with a push");
937 : }
938 : }
939 : }
940 : // TODO: Generalize this code to support keeping values on the stack across
941 : // basic block boundaries.
942 : assert(Stack.empty() &&
943 : "Register stack pushes and pops should be balanced");
944 : }
945 : #endif
946 :
947 2891 : return Changed;
948 : }
|