diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1233,17 +1233,35 @@ return false; } - /// Get the base operand and byte offset of an instruction that reads/writes - /// memory. + /// Get the base address register and byte offset of an instruction that + /// reads/writes memory. /// It returns false if MI does not read/write memory. /// It returns false if no base operand and offset was found. /// It is not guaranteed to always recognize base operand and offsets in all /// cases. - virtual bool getMemOperandWithOffset(const MachineInstr &MI, - const MachineOperand *&BaseOp, + /// This default implementation calls getMemBaseOpWithOffset to see if it + /// returns a base operand that is a register (not a frame index). + virtual bool getMemBaseRegWithOffset(const MachineInstr &MI, + Register &BaseReg, int64_t &Offset, - const TargetRegisterInfo *TRI) const { - return false; + const TargetRegisterInfo *TRI) const; + + /// Get the base address operand and byte offset of an instruction that + /// reads/writes memory. This default implementation calls + /// getMemAddressOperands to see if it returns a single base register operand + /// followed by zero or more immediate offset operands, assumed to be in + /// bytes. + virtual bool getMemBaseOpWithOffset(const MachineInstr &MI, + const MachineOperand *&BaseOp, + int64_t &Offset, + const TargetRegisterInfo *TRI) const; + + /// Get all operands of a memory instruction that contribute to the address, + /// in a form suitable for sorting (to group clusterable mem ops) and for + /// passing into shouldClusterMemOps. On failure, return no operands at all. + virtual void getMemAddressOperands(const MachineInstr &MI, + SmallVectorImpl &AddrOps, + const TargetRegisterInfo *TRI) const { } /// Return true if the instruction contains a base register and offset. If @@ -1266,8 +1284,8 @@ /// or /// DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); /// to TargetPassConfig::createMachineScheduler() to have an effect. - virtual bool shouldClusterMemOps(const MachineOperand &BaseOp1, - const MachineOperand &BaseOp2, + virtual bool shouldClusterMemOps(ArrayRef AddrOps1, + ArrayRef AddrOps2, unsigned NumLoads) const { llvm_unreachable("target did not implement shouldClusterMemOps()"); } diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -364,10 +364,10 @@ unsigned PointerReg, ArrayRef PrevInsts) { int64_t Offset; - const MachineOperand *BaseOp; + Register BaseReg; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) || - !BaseOp->isReg() || BaseOp->getReg() != PointerReg) + if (!TII->getMemBaseRegWithOffset(MI, BaseReg, Offset, TRI) || + BaseReg != PointerReg) return SR_Unsuitable; // We want the mem access to be issued at a sane offset from PointerReg, diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -693,8 +693,8 @@ // offset, then mark the dependence as loop carried potentially. const MachineOperand *BaseOp1, *BaseOp2; int64_t Offset1, Offset2; - if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) && - TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) { + if (TII->getMemBaseOpWithOffset(LdMI, BaseOp1, Offset1, TRI) && + TII->getMemBaseOpWithOffset(MI, BaseOp2, Offset2, TRI)) { if (BaseOp1->isIdenticalTo(*BaseOp2) && (int)Offset1 < (int)Offset2) { assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) && @@ -2056,16 +2056,11 @@ /// during each iteration. Set Delta to the amount of the change. bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - const MachineOperand *BaseOp; + Register BaseReg; int64_t Offset; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + if (!TII->getMemBaseRegWithOffset(MI, BaseReg, Offset, TRI)) return false; - if (!BaseOp->isReg()) - return false; - - Register BaseReg = BaseOp->getReg(); - MachineRegisterInfo &MRI = MF.getRegInfo(); // Check if there is a Phi. If so, get the definition in the loop. MachineInstr *BaseDef = MRI.getVRegDef(BaseReg); @@ -2234,19 +2229,19 @@ if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD)) return true; - const MachineOperand *BaseOpS, *BaseOpD; + Register BaseRegS, BaseRegD; int64_t OffsetS, OffsetD; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) || - !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI)) + if (!TII->getMemBaseRegWithOffset(*SI, BaseRegS, OffsetS, TRI) || + !TII->getMemBaseRegWithOffset(*DI, BaseRegD, OffsetD, TRI)) return true; - if (!BaseOpS->isIdenticalTo(*BaseOpD)) + if (BaseRegS != BaseRegD) return true; // Check that the base register is incremented by a constant value for each // iteration. - MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg()); + MachineInstr *Def = MRI.getVRegDef(BaseRegS); if (!Def || !Def->isPHI()) return true; unsigned InitVal = 0; diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1471,41 +1471,48 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation { struct MemOpInfo { SUnit *SU; - const MachineOperand *BaseOp; - int64_t Offset; - - MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs) - : SU(su), BaseOp(Op), Offset(ofs) {} - - bool operator<(const MemOpInfo &RHS) const { - if (BaseOp->getType() != RHS.BaseOp->getType()) - return BaseOp->getType() < RHS.BaseOp->getType(); - - if (BaseOp->isReg()) - return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) < - std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset, - RHS.SU->NodeNum); - if (BaseOp->isFI()) { + SmallVector AddrOps; + + MemOpInfo(SUnit *SU, ArrayRef AddrOps) + : SU(SU), AddrOps(AddrOps.begin(), AddrOps.end()) {} + + static bool Compare(const MachineOperand *const &A, + const MachineOperand *const &B) { + if (A->getType() != B->getType()) + return A->getType() < B->getType(); + if (A->isReg()) + return A->getReg() < B->getReg(); + if (A->isImm()) + return A->getImm() < B->getImm(); + if (A->isFI()) { const MachineFunction &MF = - *BaseOp->getParent()->getParent()->getParent(); + *A->getParent()->getParent()->getParent(); const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; - // Can't use tuple comparison here since we might need to use a - // different order when the stack grows down. - if (BaseOp->getIndex() != RHS.BaseOp->getIndex()) - return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex() - : BaseOp->getIndex() < RHS.BaseOp->getIndex(); - - if (Offset != RHS.Offset) - return Offset < RHS.Offset; - - return SU->NodeNum < RHS.SU->NodeNum; + return StackGrowsDown ? A->getIndex() > B->getIndex() + : A->getIndex() < B->getIndex(); } llvm_unreachable("MemOpClusterMutation only supports register or frame " "index bases."); } + + bool operator<(const MemOpInfo &RHS) const { + if (std::lexicographical_compare(AddrOps.begin(), + AddrOps.end(), + RHS.AddrOps.begin(), + RHS.AddrOps.end(), + Compare)) + return true; + if (std::lexicographical_compare(RHS.AddrOps.begin(), + RHS.AddrOps.end(), + AddrOps.begin(), + AddrOps.end(), + Compare)) + return false; + return SU->NodeNum < RHS.SU->NodeNum; + } }; const TargetInstrInfo *TII; @@ -1558,12 +1565,18 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( ArrayRef MemOps, ScheduleDAGInstrs *DAG) { + if (MemOps.size() < 2) + return; SmallVector MemOpRecords; for (SUnit *SU : MemOps) { - const MachineOperand *BaseOp; - int64_t Offset; - if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI)) - MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset)); + SmallVector AddrOps; + TII->getMemAddressOperands(*SU->getInstr(), AddrOps, TRI); +#ifndef NDEBUG + for (auto *Op : AddrOps) + assert(Op); +#endif + if (!AddrOps.empty()) + MemOpRecords.push_back(MemOpInfo(SU, AddrOps)); } if (MemOpRecords.size() < 2) return; @@ -1573,8 +1586,8 @@ for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { SUnit *SUa = MemOpRecords[Idx].SU; SUnit *SUb = MemOpRecords[Idx+1].SU; - if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp, - *MemOpRecords[Idx + 1].BaseOp, + if (TII->shouldClusterMemOps(MemOpRecords[Idx].AddrOps, + MemOpRecords[Idx + 1].AddrOps, ClusterLength) && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -762,12 +762,9 @@ !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit)) return false; - const MachineOperand *BaseOp; + Register BaseReg; int64_t Offset; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) - return false; - - if (!BaseOp->isReg()) + if (!TII->getMemBaseRegWithOffset(MI, BaseReg, Offset, TRI)) return false; if (!(MI.mayLoad() && !MI.isPredicable())) @@ -780,7 +777,7 @@ return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 && (MBP.Predicate == MachineBranchPredicate::PRED_NE || MBP.Predicate == MachineBranchPredicate::PRED_EQ) && - MBP.LHS.getReg() == BaseOp->getReg(); + MBP.LHS.getReg() == BaseReg; } /// If the sunk instruction is a copy, try to forward the copy instead of diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -911,16 +911,11 @@ /// during each iteration. Set Delta to the amount of the change. bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - const MachineOperand *BaseOp; + Register BaseReg; int64_t Offset; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + if (!TII->getMemBaseRegWithOffset(MI, BaseReg, Offset, TRI)) return false; - if (!BaseOp->isReg()) - return false; - - Register BaseReg = BaseOp->getReg(); - MachineRegisterInfo &MRI = MF.getRegInfo(); // Check if there is a Phi. If so, get the definition in the loop. MachineInstr *BaseDef = MRI.getVRegDef(BaseReg); diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1006,6 +1006,41 @@ return !DisableHazardRecognizer; } +bool TargetInstrInfo::getMemBaseRegWithOffset(const MachineInstr &MI, + Register &BaseReg, + int64_t &Offset, + const TargetRegisterInfo *TRI) const { + const MachineOperand *BaseOp; + if (!getMemBaseOpWithOffset(MI, BaseOp, Offset, TRI) || !BaseOp->isReg() || + BaseOp->getSubReg() != 0) + return false; + BaseReg = BaseOp->getReg(); + return true; +} + +bool TargetInstrInfo::getMemBaseOpWithOffset(const MachineInstr &MI, + const MachineOperand *&BaseOp, + int64_t &Offset, + const TargetRegisterInfo *TRI) const { + SmallVector AddrOps; + getMemAddressOperands(MI, AddrOps, TRI); + if (AddrOps.empty()) + return false; + + BaseOp = AddrOps.front(); + if (!BaseOp->isReg() && !BaseOp->isFI()) + return false; + + Offset = 0; + for (auto *OffsetOp : makeArrayRef(AddrOps).slice(1)) { + if (!OffsetOp->isImm()) + return false; + Offset += OffsetOp->getImm(); + } + + return true; +} + // Default implementation of CreateTargetRAHazardRecognizer. ScheduleHazardRecognizer *TargetInstrInfo:: CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, @@ -1164,9 +1199,17 @@ if (!PSV || PSV->mayAlias(&MFI)) return None; - const MachineOperand *BaseOp; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + SmallVector AddrOps; + TII->getMemAddressOperands(MI, AddrOps, TRI); + if (AddrOps.empty()) return None; + const MachineOperand *BaseOp = AddrOps.front(); + Offset = 0; + for (auto *Op : makeArrayRef(AddrOps).slice(1)) { + if (!Op->isImm()) + return None; + Offset += Op->getImm(); + } assert(MI.getNumExplicitDefs() == 1 && "Can currently only handle mem instructions with a single define"); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -112,16 +112,29 @@ /// Hint that pairing the given load or store is unprofitable. static void suppressLdStPair(MachineInstr &MI); - bool getMemOperandWithOffset(const MachineInstr &MI, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const override; - - bool getMemOperandWithOffsetWidth(const MachineInstr &MI, - const MachineOperand *&BaseOp, + bool getMemBaseRegWithOffsetWidth(const MachineInstr &MI, + Register &BaseReg, int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const; + bool getMemBaseOpWithOffset(const MachineInstr &MI, + const MachineOperand *&BaseOp, int64_t &Offset, + const TargetRegisterInfo *TRI) const override; + + bool getMemBaseOpWithOffsetWidth(const MachineInstr &MI, + const MachineOperand *&BaseOp, + int64_t &Offset, unsigned &Width, + const TargetRegisterInfo *TRI) const; + + void getMemAddressOperands(const MachineInstr &MI, + SmallVectorImpl &AddrOps, + const TargetRegisterInfo *TRI) const override; + + void getMemAddressOperandsWidth(const MachineInstr &MI, + SmallVectorImpl &AddrOps, + unsigned &Width, + const TargetRegisterInfo *TRI) const; + /// Return the immediate offset of the base register in a load/store \p LdSt. MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const; @@ -132,8 +145,8 @@ static bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width, int64_t &MinOffset, int64_t &MaxOffset); - bool shouldClusterMemOps(const MachineOperand &BaseOp1, - const MachineOperand &BaseOp2, + bool shouldClusterMemOps(ArrayRef AddrOps1, + ArrayRef AddrOps2, unsigned NumLoads) const override; void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -948,8 +948,8 @@ // base are identical, and the offset of a lower memory access + // the width doesn't overlap the offset of a higher memory access, // then the memory accesses are different. - if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && - getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { + if (getMemBaseOpWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && + getMemBaseOpWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { if (BaseOpA->isIdenticalTo(*BaseOpB)) { int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; @@ -1978,61 +1978,89 @@ return true; } -bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const { - if (!LdSt.mayLoadOrStore()) +bool AArch64InstrInfo::getMemBaseRegWithOffsetWidth( + const MachineInstr &MI, Register &BaseReg, int64_t &Offset, + unsigned &Width, const TargetRegisterInfo *TRI) const { + const MachineOperand *BaseOp; + if (!getMemBaseOpWithOffsetWidth(MI, BaseOp, Offset, Width, TRI) + || !BaseOp->isReg() || BaseOp->getSubReg() != 0) return false; + BaseReg = BaseOp->getReg(); + return true; +} +bool AArch64InstrInfo::getMemBaseOpWithOffset( + const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, + const TargetRegisterInfo *TRI) const { unsigned Width; - return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI); + return getMemBaseOpWithOffsetWidth(MI, BaseOp, Offset, Width, TRI); } -bool AArch64InstrInfo::getMemOperandWithOffsetWidth( - const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, +bool AArch64InstrInfo::getMemBaseOpWithOffsetWidth( + const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const { - assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); + if (!TargetInstrInfo::getMemBaseOpWithOffset(MI, BaseOp, Offset, TRI)) + return false; + unsigned Scale = 0; + int64_t Dummy1, Dummy2; + if (!getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2)) + return false; + + // Compute the byte offset. Offset is calculated as the immediate operand + // multiplied by the scaling factor. Unscaled instructions have scaling factor + // set to 1. + Offset *= Scale; + return true; +} + +void AArch64InstrInfo::getMemAddressOperands( + const MachineInstr &LdSt, SmallVectorImpl &AddrOps, + const TargetRegisterInfo *TRI) const { + unsigned Width; + getMemAddressOperandsWidth(LdSt, AddrOps, Width, TRI); +} + +void AArch64InstrInfo::getMemAddressOperandsWidth( + const MachineInstr &LdSt, SmallVectorImpl &AddrOps, + unsigned &Width, const TargetRegisterInfo *TRI) const { + if (!LdSt.mayLoadOrStore()) + return; + // Handle only loads/stores with base register followed by immediate offset. if (LdSt.getNumExplicitOperands() == 3) { // Non-paired instruction (e.g., ldr x1, [x0, #8]). if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || !LdSt.getOperand(2).isImm()) - return false; + return; } else if (LdSt.getNumExplicitOperands() == 4) { // Paired instruction (e.g., ldp x1, x2, [x0, #8]). if (!LdSt.getOperand(1).isReg() || (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) || !LdSt.getOperand(3).isImm()) - return false; + return; } else - return false; + return; - // Get the scaling factor for the instruction and set the width for the - // instruction. - unsigned Scale = 0; + // Get the width for the instruction. + unsigned Scale; int64_t Dummy1, Dummy2; // If this returns false, then it's an instruction we don't want to handle. if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2)) - return false; + return; - // Compute the offset. Offset is calculated as the immediate operand - // multiplied by the scaling factor. Unscaled instructions have scaling factor - // set to 1. + const MachineOperand *BaseOp, *OffsetOp; if (LdSt.getNumExplicitOperands() == 3) { BaseOp = &LdSt.getOperand(1); - Offset = LdSt.getOperand(2).getImm() * Scale; + OffsetOp = &LdSt.getOperand(2); } else { assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); BaseOp = &LdSt.getOperand(2); - Offset = LdSt.getOperand(3).getImm() * Scale; + OffsetOp = &LdSt.getOperand(3); } - if (!BaseOp->isReg() && !BaseOp->isFI()) - return false; - - return true; + AddrOps.push_back(BaseOp); + AddrOps.push_back(OffsetOp); } MachineOperand & @@ -2355,10 +2383,13 @@ /// Detect opportunities for ldp/stp formation. /// -/// Only called for LdSt for which getMemOperandWithOffset returns true. -bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1, - const MachineOperand &BaseOp2, +/// Only called for LdSt for which getMemBaseRegWithOffset returns true. +bool AArch64InstrInfo::shouldClusterMemOps(ArrayRef AddrOps1, + ArrayRef AddrOps2, unsigned NumLoads) const { + assert(AddrOps1.size() == 2 && AddrOps2.size() == 2); + const MachineOperand &BaseOp1 = *AddrOps1[0]; + const MachineOperand &BaseOp2 = *AddrOps2[0]; const MachineInstr &FirstLdSt = *BaseOp1.getParent(); const MachineInstr &SecondLdSt = *BaseOp2.getParent(); if (BaseOp1.getType() != BaseOp2.getType()) @@ -5802,13 +5833,13 @@ // At this point, we have a stack instruction that we might need to // fix up. We'll handle it if it's a load or store. if (MI.mayLoadOrStore()) { - const MachineOperand *Base; // Filled with the base operand of MI. + Register Base; // Filled with the base register of MI. int64_t Offset; // Filled with the offset of MI. // Does it allow us to offset the base operand and is the base the // register SP? - if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() || - Base->getReg() != AArch64::SP) + if (!getMemBaseRegWithOffset(MI, Base, Offset, &TRI) || + Base != AArch64::SP) return false; // Find the minimum/maximum offset for this instruction and check @@ -6185,14 +6216,14 @@ void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { for (MachineInstr &MI : MBB) { - const MachineOperand *Base; + Register Base; unsigned Width; int64_t Offset; // Is this a load or store with an immediate offset with SP as the base? if (!MI.mayLoadOrStore() || - !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) || - (Base->isReg() && Base->getReg() != AArch64::SP)) + !getMemBaseRegWithOffsetWidth(MI, Base, Offset, Width, &RI) || + Base != AArch64::SP) continue; // It is, so we have to fix it up. diff --git a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp --- a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp +++ b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp @@ -147,11 +147,9 @@ for (auto &MI : MBB) { if (!isNarrowFPStore(MI)) continue; - const MachineOperand *BaseOp; + Register BaseReg; int64_t Offset; - if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) && - BaseOp->isReg()) { - Register BaseReg = BaseOp->getReg(); + if (TII->getMemBaseRegWithOffset(MI, BaseReg, Offset, TRI)) { if (PrevBaseReg == BaseReg) { // If this block can take STPs, skip ahead to the next block. if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent())) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -179,13 +179,17 @@ int64_t &Offset1, int64_t &Offset2) const override; - bool getMemOperandWithOffset(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const final; + bool getMemBaseOpWithOffset(const MachineInstr &LdSt, + const MachineOperand *&BaseOp, + int64_t &Offset, + const TargetRegisterInfo *TRI) const final; - bool shouldClusterMemOps(const MachineOperand &BaseOp1, - const MachineOperand &BaseOp2, + void getMemAddressOperands(const MachineInstr &LdSt, + SmallVectorImpl &AddrOps, + const TargetRegisterInfo *TRI) const final; + + bool shouldClusterMemOps(ArrayRef AddrOps1, + ArrayRef AddrOps2, unsigned NumLoads) const override; bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -256,144 +256,127 @@ } } -bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const { - if (!LdSt.mayLoadOrStore()) +bool SIInstrInfo::getMemBaseOpWithOffset(const MachineInstr &LdSt, + const MachineOperand *&BaseOp, + int64_t &Offset, + const TargetRegisterInfo *TRI) const { + if (!TargetInstrInfo::getMemBaseOpWithOffset(LdSt, BaseOp, Offset, TRI)) return false; - unsigned Opc = LdSt.getOpcode(); + // If getMemAddressOperands returned an offset in words, convert it to bytes. + if (isDS(LdSt) && !getNamedOperand(LdSt, AMDGPU::OpName::offset)) { + unsigned Opc = LdSt.getOpcode(); + + int64_t EltSize; + if (LdSt.mayLoad()) + EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16; + else { + assert(LdSt.mayStore()); + int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); + EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8; + } + + if (isStride64(Opc)) + EltSize *= 64; + + Offset *= EltSize; + } + return true; +} + +void SIInstrInfo::getMemAddressOperands(const MachineInstr &LdSt, + SmallVectorImpl &AddrOps, + const TargetRegisterInfo *TRI) const { + const MachineOperand *BaseOp; + const MachineOperand *OffsetOp; + + if (!LdSt.mayLoadOrStore()) + return; if (isDS(LdSt)) { - const MachineOperand *OffsetImm = - getNamedOperand(LdSt, AMDGPU::OpName::offset); - if (OffsetImm) { + OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset); + if (OffsetOp) { // Normal, single offset LDS instruction. BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr); // TODO: ds_consume/ds_append use M0 for the base address. Is it safe to // report that here? - if (!BaseOp || !BaseOp->isReg()) - return false; - - Offset = OffsetImm->getImm(); - - return true; + if (!BaseOp) + return; + AddrOps.push_back(BaseOp); + AddrOps.push_back(OffsetOp); + return; } // The 2 offset instructions use offset0 and offset1 instead. We can treat // these as a load with a single offset if the 2 offsets are consecutive. We // will use this for some partially aligned loads. - const MachineOperand *Offset0Imm = + const MachineOperand *Offset0Op = getNamedOperand(LdSt, AMDGPU::OpName::offset0); - const MachineOperand *Offset1Imm = + const MachineOperand *Offset1Op = getNamedOperand(LdSt, AMDGPU::OpName::offset1); - uint8_t Offset0 = Offset0Imm->getImm(); - uint8_t Offset1 = Offset1Imm->getImm(); - - if (Offset1 > Offset0 && Offset1 - Offset0 == 1) { - // Each of these offsets is in element sized units, so we need to convert - // to bytes of the individual reads. - - unsigned EltSize; - if (LdSt.mayLoad()) - EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16; - else { - assert(LdSt.mayStore()); - int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); - EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8; - } - - if (isStride64(Opc)) - EltSize *= 64; + unsigned Offset0 = (uint8_t)Offset0Op->getImm(); + unsigned Offset1 = (uint8_t)Offset1Op->getImm(); + if (Offset0 + 1 == Offset1) { BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr); - if (!BaseOp->isReg()) - return false; - - Offset = EltSize * Offset0; - - return true; + AddrOps.push_back(BaseOp); + AddrOps.push_back(Offset0Op); } - - return false; - } - - if (isMUBUF(LdSt) || isMTBUF(LdSt)) { - const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset); - if (SOffset && SOffset->isReg()) { + } else if (isMUBUF(LdSt) || isMTBUF(LdSt)) { + const MachineOperand *SOffsetOp = + getNamedOperand(LdSt, AMDGPU::OpName::soffset); + if (SOffsetOp && SOffsetOp->isReg()) { // We can only handle this if it's a stack access, as any other resource // would require reporting multiple base registers. const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); if (AddrReg && !AddrReg->isFI()) - return false; + return; const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc); const SIMachineFunctionInfo *MFI = LdSt.getParent()->getParent()->getInfo(); if (RSrc->getReg() != MFI->getScratchRSrcReg()) - return false; + return; - const MachineOperand *OffsetImm = - getNamedOperand(LdSt, AMDGPU::OpName::offset); - BaseOp = SOffset; - Offset = OffsetImm->getImm(); - return true; + AddrOps.push_back(SOffsetOp); + OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset); + AddrOps.push_back(OffsetOp); + return; } - const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); - if (!AddrReg) - return false; - - const MachineOperand *OffsetImm = - getNamedOperand(LdSt, AMDGPU::OpName::offset); - BaseOp = AddrReg; - Offset = OffsetImm->getImm(); - if (SOffset) // soffset can be an inline immediate. - Offset += SOffset->getImm(); - - if (!BaseOp->isReg()) - return false; - - return true; - } - - if (isSMRD(LdSt)) { - const MachineOperand *OffsetImm = - getNamedOperand(LdSt, AMDGPU::OpName::offset); - if (!OffsetImm) - return false; - - const MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase); - BaseOp = SBaseReg; - Offset = OffsetImm->getImm(); - if (!BaseOp->isReg()) - return false; - - return true; - } - - if (isFLAT(LdSt)) { - const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); - if (VAddr) { - // Can't analyze 2 offsets. + BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); + if (!BaseOp) + return; + AddrOps.push_back(BaseOp); + OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset); + AddrOps.push_back(OffsetOp); + if (SOffsetOp) // soffset can be an inline immediate. + AddrOps.push_back(SOffsetOp); + } else if (isSMRD(LdSt)) { + BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::sbase); + if (!BaseOp) // e.g. S_MEMTIME + return; + AddrOps.push_back(BaseOp); + OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset); + if (OffsetOp) + AddrOps.push_back(OffsetOp); + } else if (isFLAT(LdSt)) { + // Instructions have either vaddr or saddr or both. + BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); + if (BaseOp) { if (getNamedOperand(LdSt, AMDGPU::OpName::saddr)) - return false; - - BaseOp = VAddr; - } else { - // scratch instructions have either vaddr or saddr. - BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr); + // For compatibility with the old code, don't handle this case yet. + // FIXME remove this restriction! + return; + AddrOps.push_back(BaseOp); } - - Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm(); - if (!BaseOp->isReg()) - return false; - return true; + BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr); + if (BaseOp) + AddrOps.push_back(BaseOp); + OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset); + AddrOps.push_back(OffsetOp); } - - return false; } static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, @@ -431,9 +414,11 @@ return Base1 == Base2; } -bool SIInstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1, - const MachineOperand &BaseOp2, +bool SIInstrInfo::shouldClusterMemOps(ArrayRef AddrOps1, + ArrayRef AddrOps2, unsigned NumLoads) const { + const MachineOperand &BaseOp1 = *AddrOps1[0]; + const MachineOperand &BaseOp2 = *AddrOps2[0]; const MachineInstr &FirstLdSt = *BaseOp1.getParent(); const MachineInstr &SecondLdSt = *BaseOp2.getParent(); @@ -2528,8 +2513,8 @@ const MachineOperand *BaseOp0, *BaseOp1; int64_t Offset0, Offset1; - if (getMemOperandWithOffset(MIa, BaseOp0, Offset0, &RI) && - getMemOperandWithOffset(MIb, BaseOp1, Offset1, &RI)) { + if (getMemBaseOpWithOffset(MIa, BaseOp0, Offset0, &RI) && + getMemBaseOpWithOffset(MIb, BaseOp1, Offset1, &RI)) { if (!BaseOp0->isIdenticalTo(*BaseOp1)) return false; diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -1952,8 +1952,8 @@ int64_t OffLatReg; if (SITII->isLowLatencyInstruction(*SU->getInstr())) { IsLowLatencySU[i] = 1; - if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg, - TRI)) + if (SITII->getMemBaseOpWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg, + TRI)) LowLatencyOffset[i] = OffLatReg; } else if (SITII->isHighLatencyInstruction(*SU->getInstr())) IsHighLatencySU[i] = 1; diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -204,10 +204,9 @@ bool expandPostRAPseudo(MachineInstr &MI) const override; /// Get the base register and byte offset of a load/store instr. - bool getMemOperandWithOffset(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const override; + void getMemAddressOperands(const MachineInstr &LdSt, + SmallVectorImpl &AddrOps, + const TargetRegisterInfo *TRI = nullptr) const override; /// Reverses the branch condition of the specified condition list, /// returning false on success and true if it cannot be reversed. @@ -427,8 +426,8 @@ bool predOpcodeHasNot(ArrayRef Cond) const; unsigned getAddrMode(const MachineInstr &MI) const; - MachineOperand *getBaseAndOffset(const MachineInstr &MI, int64_t &Offset, - unsigned &AccessSize) const; + bool getBaseAndOffset(const MachineInstr &MI, Register &BaseReg, + int64_t &Offset) const; SmallVector getBranchingInstrs(MachineBasicBlock& MBB) const; unsigned getCExtOpNum(const MachineInstr &MI) const; HexagonII::CompoundGroup diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -2939,15 +2939,6 @@ return false; } -/// Get the base register and byte offset of a load/store instr. -bool HexagonInstrInfo::getMemOperandWithOffset( - const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, - const TargetRegisterInfo *TRI) const { - unsigned AccessSize = 0; - BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize); - return BaseOp != nullptr && BaseOp->isReg(); -} - /// Can these instructions execute at the same time in a bundle. bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const { @@ -3150,40 +3141,22 @@ return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask; } -// Returns the base register in a memory access (load/store). The offset is -// returned in Offset and the access size is returned in AccessSize. -// If the base operand has a subregister or the offset field does not contain -// an immediate value, return nullptr. -MachineOperand *HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI, - int64_t &Offset, - unsigned &AccessSize) const { - // Return if it is not a base+offset type instruction or a MemOp. - if (getAddrMode(MI) != HexagonII::BaseImmOffset && - getAddrMode(MI) != HexagonII::BaseLongOffset && - !isMemOp(MI) && !isPostIncrement(MI)) - return nullptr; - - AccessSize = getMemAccessSize(MI); - +void HexagonInstrInfo::getMemAddressOperands(const MachineInstr &MI, + SmallVectorImpl &AddrOps, + const TargetRegisterInfo *TRI) const { unsigned BasePos = 0, OffsetPos = 0; if (!getBaseAndOffsetPosition(MI, BasePos, OffsetPos)) - return nullptr; + return; + + const MachineOperand *BaseOp = &MI.getOperand(BasePos); + AddrOps.push_back(BaseOp); // Post increment updates its EA after the mem access, // so we need to treat its offset as zero. - if (isPostIncrement(MI)) { - Offset = 0; - } else { - const MachineOperand &OffsetOp = MI.getOperand(OffsetPos); - if (!OffsetOp.isImm()) - return nullptr; - Offset = OffsetOp.getImm(); + if (!isPostIncrement(MI)) { + const MachineOperand *OffsetOp = &MI.getOperand(OffsetPos); + AddrOps.push_back(OffsetOp); } - - const MachineOperand &BaseOp = MI.getOperand(BasePos); - if (BaseOp.getSubReg() != 0) - return nullptr; - return &const_cast(BaseOp); } /// Return the position of the base and offset operands for this instruction. diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -274,11 +274,13 @@ if (!L0.mayLoad() || L0.mayStore() || HII.getAddrMode(L0) != HexagonII::BaseImmOffset) continue; + Register BaseReg0; int64_t Offset0; - unsigned Size0; - MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0); + if (!HII.getMemBaseRegWithOffset(L0, BaseReg0, Offset0, nullptr)) + continue; // Is the access size is longer than the L1 cache line, skip the check. - if (BaseOp0 == nullptr || !BaseOp0->isReg() || Size0 >= 32) + unsigned Size0 = HII.getMemAccessSize(L0); + if (Size0 >= 32) continue; // Scan only up to 32 instructions ahead (to avoid n^2 complexity). for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) { @@ -287,11 +289,13 @@ if (!L1.mayLoad() || L1.mayStore() || HII.getAddrMode(L1) != HexagonII::BaseImmOffset) continue; + Register BaseReg1; int64_t Offset1; - unsigned Size1; - MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1); - if (BaseOp1 == nullptr || !BaseOp1->isReg() || Size1 >= 32 || - BaseOp0->getReg() != BaseOp1->getReg()) + if (!HII.getMemBaseRegWithOffset(L1, BaseReg1, Offset1, nullptr) || + BaseReg0 != BaseReg1) + continue; + unsigned Size1 = HII.getMemAccessSize(L1); + if (Size1 >= 32) continue; // Check bits 3 and 4 of the offset: if they differ, a bank conflict // is unlikely. diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/llvm/lib/Target/Lanai/LanaiInstrInfo.h --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.h +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.h @@ -67,15 +67,15 @@ bool expandPostRAPseudo(MachineInstr &MI) const override; - bool getMemOperandWithOffset(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const override; - - bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, unsigned &Width, - const TargetRegisterInfo *TRI) const; + bool getMemBaseOpWithOffset(const MachineInstr &LdSt, + const MachineOperand *&BaseOp, + int64_t &Offset, + const TargetRegisterInfo *TRI) const override; + + bool getMemBaseOpWithOffsetWidth(const MachineInstr &LdSt, + const MachineOperand *&BaseOp, + int64_t &Offset, unsigned &Width, + const TargetRegisterInfo *TRI) const; std::pair decomposeMachineOperandsTargetFlags(unsigned TF) const override; diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp @@ -103,8 +103,8 @@ const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; unsigned int WidthA = 0, WidthB = 0; - if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && - getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { + if (getMemBaseOpWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && + getMemBaseOpWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { if (BaseOpA->isIdenticalTo(*BaseOpB)) { int LowOffset = std::min(OffsetA, OffsetB); int HighOffset = std::max(OffsetA, OffsetB); @@ -754,7 +754,7 @@ return 0; } -bool LanaiInstrInfo::getMemOperandWithOffsetWidth( +bool LanaiInstrInfo::getMemBaseOpWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, unsigned &Width, const TargetRegisterInfo * /*TRI*/) const { // Handle only loads/stores with base register followed by immediate offset @@ -795,10 +795,9 @@ return true; } -bool LanaiInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const { +bool LanaiInstrInfo::getMemBaseOpWithOffset( + const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, + const TargetRegisterInfo *TRI) const { switch (LdSt.getOpcode()) { default: return false; @@ -812,6 +811,6 @@ case Lanai::LDBs_RI: case Lanai::LDBz_RI: unsigned Width; - return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI); + return getMemBaseOpWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI); } } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -361,10 +361,10 @@ /// Return true if get the base operand, byte offset of an instruction and /// the memory width. Width is the size of memory that is being /// loaded/stored (e.g. 1, 2, 4, 8). - bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, unsigned &Width, - const TargetRegisterInfo *TRI) const; + bool getMemBaseOpWithOffsetWidth(const MachineInstr &LdSt, + const MachineOperand *&BaseOp, + int64_t &Offset, unsigned &Width, + const TargetRegisterInfo *TRI) const; /// Return true if two MIs access different memory addresses and false /// otherwise diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -4262,9 +4262,12 @@ // Return true if get the base operand, byte offset of an instruction and the // memory width. Width is the size of memory that is being loaded/stored. -bool PPCInstrInfo::getMemOperandWithOffsetWidth( - const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, - unsigned &Width, const TargetRegisterInfo *TRI) const { +bool PPCInstrInfo::getMemBaseOpWithOffsetWidth( + const MachineInstr &LdSt, + const MachineOperand *&BaseReg, + int64_t &Offset, + unsigned &Width, + const TargetRegisterInfo *TRI) const { if (!LdSt.mayLoadOrStore()) return false; @@ -4301,8 +4304,8 @@ const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; unsigned int WidthA = 0, WidthB = 0; - if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && - getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { + if (getMemBaseOpWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && + getMemBaseOpWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { if (BaseOpA->isIdenticalTo(*BaseOpB)) { int LowOffset = std::min(OffsetA, OffsetB); int HighOffset = std::max(OffsetA, OffsetB); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -86,10 +86,10 @@ bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override; - bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, unsigned &Width, - const TargetRegisterInfo *TRI) const; + bool getMemBaseOpWithOffsetWidth(const MachineInstr &LdSt, + const MachineOperand *&BaseOp, + int64_t &Offset, unsigned &Width, + const TargetRegisterInfo *TRI) const; bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -559,7 +559,7 @@ // Return true if get the base operand, byte offset of an instruction and the // memory width. Width is the size of memory that is being loaded/stored. -bool RISCVInstrInfo::getMemOperandWithOffsetWidth( +bool RISCVInstrInfo::getMemBaseOpWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const { if (!LdSt.mayLoadOrStore()) @@ -600,8 +600,8 @@ const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; unsigned int WidthA = 0, WidthB = 0; - if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && - getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { + if (getMemBaseOpWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && + getMemBaseOpWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { if (BaseOpA->isIdenticalTo(*BaseOpB)) { int LowOffset = std::min(OffsetA, OffsetB); int HighOffset = std::max(OffsetA, OffsetB); diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -291,10 +291,9 @@ SmallVectorImpl &Cond, bool AllowModify) const override; - bool getMemOperandWithOffset(const MachineInstr &LdSt, - const MachineOperand *&BaseOp, - int64_t &Offset, - const TargetRegisterInfo *TRI) const override; + void getMemAddressOperands(const MachineInstr &LdSt, + SmallVectorImpl &AddrOps, + const TargetRegisterInfo *TRI) const override; bool analyzeBranchPredicate(MachineBasicBlock &MBB, TargetInstrInfo::MachineBranchPredicate &MBP, bool AllowModify = false) const override; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3189,39 +3189,36 @@ } } -bool X86InstrInfo::getMemOperandWithOffset( - const MachineInstr &MemOp, const MachineOperand *&BaseOp, int64_t &Offset, +void X86InstrInfo::getMemAddressOperands( + const MachineInstr &MemOp, SmallVectorImpl &AddrOps, const TargetRegisterInfo *TRI) const { const MCInstrDesc &Desc = MemOp.getDesc(); int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags); if (MemRefBegin < 0) - return false; + return; MemRefBegin += X86II::getOperandBias(Desc); - BaseOp = &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg); + const MachineOperand *BaseOp = + &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg); if (!BaseOp->isReg()) // Can be an MO_FrameIndex - return false; + return; if (MemOp.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1) - return false; + return; if (MemOp.getOperand(MemRefBegin + X86::AddrIndexReg).getReg() != X86::NoRegister) - return false; + return; const MachineOperand &DispMO = MemOp.getOperand(MemRefBegin + X86::AddrDisp); // Displacement can be symbolic if (!DispMO.isImm()) - return false; - - Offset = DispMO.getImm(); - - if (!BaseOp->isReg()) - return false; + return; - return true; + AddrOps.push_back(BaseOp); + AddrOps.push_back(&DispMO); } static unsigned getStoreRegOpcode(unsigned SrcReg, diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll b/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll --- a/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll @@ -187,8 +187,8 @@ ; CI: s_mov_b32 m0 ; GFX9-NOT: m0 -; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1 ; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129 +; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1 ; GCN: s_endpgm define amdgpu_kernel void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1