diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1233,17 +1233,35 @@
     return false;
   }
 
-  /// Get the base operand and byte offset of an instruction that reads/writes
-  /// memory.
+  /// Get the base address register and byte offset of an instruction that
+  /// reads/writes memory.
   /// It returns false if MI does not read/write memory.
   /// It returns false if no base operand and offset was found.
   /// It is not guaranteed to always recognize base operand and offsets in all
   /// cases.
-  virtual bool getMemOperandWithOffset(const MachineInstr &MI,
-                                       const MachineOperand *&BaseOp,
+  /// This default implementation calls getMemBaseOpWithOffset to see if it
+  /// returns a base operand that is a register (not a frame index).
+  virtual bool getMemBaseRegWithOffset(const MachineInstr &MI,
+                                       Register &BaseReg,
                                        int64_t &Offset,
-                                       const TargetRegisterInfo *TRI) const {
-    return false;
+                                       const TargetRegisterInfo *TRI) const;
+
+  /// Get the base address operand and byte offset of an instruction that
+  /// reads/writes memory. This default implementation calls
+  /// getMemAddressOperands to see if it returns a single base register operand
+  /// followed by zero or more immediate offset operands, assumed to be in
+  /// bytes.
+  virtual bool getMemBaseOpWithOffset(const MachineInstr &MI,
+                                      const MachineOperand *&BaseOp,
+                                      int64_t &Offset,
+                                      const TargetRegisterInfo *TRI) const;
+
+  /// Get all operands of a memory instruction that contribute to the address,
+  /// in a form suitable for sorting (to group clusterable mem ops) and for
+  /// passing into shouldClusterMemOps. On failure, return no operands at all.
+  virtual void getMemAddressOperands(const MachineInstr &MI,
+                                     SmallVectorImpl<const MachineOperand *> &AddrOps,
+                                     const TargetRegisterInfo *TRI) const {
   }
 
   /// Return true if the instruction contains a base register and offset. If
@@ -1266,8 +1284,8 @@
   /// or
   ///   DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
   /// to TargetPassConfig::createMachineScheduler() to have an effect.
-  virtual bool shouldClusterMemOps(const MachineOperand &BaseOp1,
-                                   const MachineOperand &BaseOp2,
+  virtual bool shouldClusterMemOps(ArrayRef<const MachineOperand *> AddrOps1,
+                                   ArrayRef<const MachineOperand *> AddrOps2,
                                    unsigned NumLoads) const {
     llvm_unreachable("target did not implement shouldClusterMemOps()");
   }
diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp
--- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -364,10 +364,10 @@
                                        unsigned PointerReg,
                                        ArrayRef<MachineInstr *> PrevInsts) {
   int64_t Offset;
-  const MachineOperand *BaseOp;
+  Register BaseReg;
 
-  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) ||
-      !BaseOp->isReg() || BaseOp->getReg() != PointerReg)
+  if (!TII->getMemBaseRegWithOffset(MI, BaseReg, Offset, TRI) ||
+      BaseReg != PointerReg)
     return SR_Unsuitable;
 
   // We want the mem access to be issued at a sane offset from PointerReg,
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -693,8 +693,8 @@
           // offset, then mark the dependence as loop carried potentially.
           const MachineOperand *BaseOp1, *BaseOp2;
           int64_t Offset1, Offset2;
-          if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
-              TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
+          if (TII->getMemBaseOpWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
+              TII->getMemBaseOpWithOffset(MI, BaseOp2, Offset2, TRI)) {
             if (BaseOp1->isIdenticalTo(*BaseOp2) &&
                 (int)Offset1 < (int)Offset2) {
               assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) &&
@@ -2056,16 +2056,11 @@
 /// during each iteration. Set Delta to the amount of the change.
 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  const MachineOperand *BaseOp;
+  Register BaseReg;
   int64_t Offset;
-  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
+  if (!TII->getMemBaseRegWithOffset(MI, BaseReg, Offset, TRI))
     return false;
 
-  if (!BaseOp->isReg())
-    return false;
-
-  Register BaseReg = BaseOp->getReg();
-
   MachineRegisterInfo &MRI = MF.getRegInfo();
   // Check if there is a Phi. If so, get the definition in the loop.
   MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
@@ -2234,19 +2229,19 @@
   if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
     return true;
 
-  const MachineOperand *BaseOpS, *BaseOpD;
+  Register BaseRegS, BaseRegD;
   int64_t OffsetS, OffsetD;
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) ||
-      !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI))
+  if (!TII->getMemBaseRegWithOffset(*SI, BaseRegS, OffsetS, TRI) ||
+      !TII->getMemBaseRegWithOffset(*DI, BaseRegD, OffsetD, TRI))
     return true;
 
-  if (!BaseOpS->isIdenticalTo(*BaseOpD))
+  if (BaseRegS != BaseRegD)
     return true;
 
   // Check that the base register is incremented by a constant value for each
   // iteration.
-  MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
+  MachineInstr *Def = MRI.getVRegDef(BaseRegS);
   if (!Def || !Def->isPHI())
     return true;
   unsigned InitVal = 0;
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1471,41 +1471,48 @@
 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
   struct MemOpInfo {
     SUnit *SU;
-    const MachineOperand *BaseOp;
-    int64_t Offset;
-
-    MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs)
-        : SU(su), BaseOp(Op), Offset(ofs) {}
-
-    bool operator<(const MemOpInfo &RHS) const {
-      if (BaseOp->getType() != RHS.BaseOp->getType())
-        return BaseOp->getType() < RHS.BaseOp->getType();
-
-      if (BaseOp->isReg())
-        return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
-               std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
-                               RHS.SU->NodeNum);
-      if (BaseOp->isFI()) {
+    SmallVector<const MachineOperand *, 4> AddrOps;
+
+    MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> AddrOps)
+        : SU(SU), AddrOps(AddrOps.begin(), AddrOps.end()) {}
+
+    static bool Compare(const MachineOperand *const &A,
+                        const MachineOperand *const &B) {
+      if (A->getType() != B->getType())
+        return A->getType() < B->getType();
+      if (A->isReg())
+        return A->getReg() < B->getReg();
+      if (A->isImm())
+        return A->getImm() < B->getImm();
+      if (A->isFI()) {
         const MachineFunction &MF =
-            *BaseOp->getParent()->getParent()->getParent();
+            *A->getParent()->getParent()->getParent();
         const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
         bool StackGrowsDown = TFI.getStackGrowthDirection() ==
                               TargetFrameLowering::StackGrowsDown;
-        // Can't use tuple comparison here since we might need to use a
-        // different order when the stack grows down.
-        if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
-          return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
-                                : BaseOp->getIndex() < RHS.BaseOp->getIndex();
-
-        if (Offset != RHS.Offset)
-          return Offset < RHS.Offset;
-
-        return SU->NodeNum < RHS.SU->NodeNum;
+        return StackGrowsDown ? A->getIndex() > B->getIndex()
+                              : A->getIndex() < B->getIndex();
       }
 
       llvm_unreachable("MemOpClusterMutation only supports register or frame "
                        "index bases.");
     }
+
+    bool operator<(const MemOpInfo &RHS) const {
+      if (std::lexicographical_compare(AddrOps.begin(),
+                                       AddrOps.end(),
+                                       RHS.AddrOps.begin(),
+                                       RHS.AddrOps.end(),
+                                       Compare))
+        return true;
+      if (std::lexicographical_compare(RHS.AddrOps.begin(),
+                                       RHS.AddrOps.end(),
+                                       AddrOps.begin(),
+                                       AddrOps.end(),
+                                       Compare))
+        return false;
+      return SU->NodeNum < RHS.SU->NodeNum;
+    }
   };
 
   const TargetInstrInfo *TII;
@@ -1558,12 +1565,18 @@
 
 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
     ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
+  if (MemOps.size() < 2)
+    return;
   SmallVector<MemOpInfo, 32> MemOpRecords;
   for (SUnit *SU : MemOps) {
-    const MachineOperand *BaseOp;
-    int64_t Offset;
-    if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
-      MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
+    SmallVector<const MachineOperand *, 4> AddrOps;
+    TII->getMemAddressOperands(*SU->getInstr(), AddrOps, TRI);
+#ifndef NDEBUG
+    for (auto *Op : AddrOps)
+      assert(Op);
+#endif
+    if (!AddrOps.empty())
+      MemOpRecords.push_back(MemOpInfo(SU, AddrOps));
   }
   if (MemOpRecords.size() < 2)
     return;
@@ -1573,8 +1586,8 @@
   for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
     SUnit *SUa = MemOpRecords[Idx].SU;
     SUnit *SUb = MemOpRecords[Idx+1].SU;
-    if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
-                                 *MemOpRecords[Idx + 1].BaseOp,
+    if (TII->shouldClusterMemOps(MemOpRecords[Idx].AddrOps,
+                                 MemOpRecords[Idx + 1].AddrOps,
                                  ClusterLength) &&
         DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
       LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -762,12 +762,9 @@
       !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
     return false;
 
-  const MachineOperand *BaseOp;
+  Register BaseReg;
   int64_t Offset;
-  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
-    return false;
-
-  if (!BaseOp->isReg())
+  if (!TII->getMemBaseRegWithOffset(MI, BaseReg, Offset, TRI))
     return false;
 
   if (!(MI.mayLoad() && !MI.isPredicable()))
@@ -780,7 +777,7 @@
   return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
          (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
           MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
-         MBP.LHS.getReg() == BaseOp->getReg();
+         MBP.LHS.getReg() == BaseReg;
 }
 
 /// If the sunk instruction is a copy, try to forward the copy instead of
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -911,16 +911,11 @@
 /// during each iteration. Set Delta to the amount of the change.
 bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  const MachineOperand *BaseOp;
+  Register BaseReg;
   int64_t Offset;
-  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
+  if (!TII->getMemBaseRegWithOffset(MI, BaseReg, Offset, TRI))
     return false;
 
-  if (!BaseOp->isReg())
-    return false;
-
-  Register BaseReg = BaseOp->getReg();
-
   MachineRegisterInfo &MRI = MF.getRegInfo();
   // Check if there is a Phi. If so, get the definition in the loop.
   MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -1006,6 +1006,41 @@
   return !DisableHazardRecognizer;
 }
 
+bool TargetInstrInfo::getMemBaseRegWithOffset(const MachineInstr &MI,
+                                              Register &BaseReg,
+                                              int64_t &Offset,
+                                              const TargetRegisterInfo *TRI) const {
+  const MachineOperand *BaseOp;
+  if (!getMemBaseOpWithOffset(MI, BaseOp, Offset, TRI) || !BaseOp->isReg() ||
+      BaseOp->getSubReg() != 0)
+    return false;
+  BaseReg = BaseOp->getReg();
+  return true;
+}
+
+bool TargetInstrInfo::getMemBaseOpWithOffset(const MachineInstr &MI,
+                                             const MachineOperand *&BaseOp,
+                                             int64_t &Offset,
+                                             const TargetRegisterInfo *TRI) const {
+  SmallVector<const MachineOperand *, 4> AddrOps;
+  getMemAddressOperands(MI, AddrOps, TRI);
+  if (AddrOps.empty())
+    return false;
+
+  BaseOp = AddrOps.front();
+  if (!BaseOp->isReg() && !BaseOp->isFI())
+    return false;
+
+  Offset = 0;
+  for (auto *OffsetOp : makeArrayRef(AddrOps).slice(1)) {
+    if (!OffsetOp->isImm())
+      return false;
+    Offset += OffsetOp->getImm();
+  }
+
+  return true;
+}
+
 // Default implementation of CreateTargetRAHazardRecognizer.
 ScheduleHazardRecognizer *TargetInstrInfo::
 CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
@@ -1164,9 +1199,17 @@
     if (!PSV || PSV->mayAlias(&MFI))
       return None;
 
-    const MachineOperand *BaseOp;
-    if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
+    SmallVector<const MachineOperand *, 4> AddrOps;
+    TII->getMemAddressOperands(MI, AddrOps, TRI);
+    if (AddrOps.empty())
       return None;
+    const MachineOperand *BaseOp = AddrOps.front();
+    Offset = 0;
+    for (auto *Op : makeArrayRef(AddrOps).slice(1)) {
+      if (!Op->isImm())
+        return None;
+      Offset += Op->getImm();
+    }
 
     assert(MI.getNumExplicitDefs() == 1 &&
            "Can currently only handle mem instructions with a single define");
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -112,16 +112,29 @@
   /// Hint that pairing the given load or store is unprofitable.
   static void suppressLdStPair(MachineInstr &MI);
 
-  bool getMemOperandWithOffset(const MachineInstr &MI,
-                               const MachineOperand *&BaseOp,
-                               int64_t &Offset,
-                               const TargetRegisterInfo *TRI) const override;
-
-  bool getMemOperandWithOffsetWidth(const MachineInstr &MI,
-                                    const MachineOperand *&BaseOp,
+  bool getMemBaseRegWithOffsetWidth(const MachineInstr &MI,
+                                    Register &BaseReg,
                                     int64_t &Offset, unsigned &Width,
                                     const TargetRegisterInfo *TRI) const;
 
+  bool getMemBaseOpWithOffset(const MachineInstr &MI,
+                              const MachineOperand *&BaseOp, int64_t &Offset,
+                              const TargetRegisterInfo *TRI) const override;
+
+  bool getMemBaseOpWithOffsetWidth(const MachineInstr &MI,
+                                   const MachineOperand *&BaseOp,
+                                   int64_t &Offset, unsigned &Width,
+                                   const TargetRegisterInfo *TRI) const;
+
+  void getMemAddressOperands(const MachineInstr &MI,
+                             SmallVectorImpl<const MachineOperand *> &AddrOps,
+                             const TargetRegisterInfo *TRI) const override;
+
+  void getMemAddressOperandsWidth(const MachineInstr &MI,
+                                  SmallVectorImpl<const MachineOperand *> &AddrOps,
+                                  unsigned &Width,
+                                  const TargetRegisterInfo *TRI) const;
+
   /// Return the immediate offset of the base register in a load/store \p LdSt.
   MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const;
 
@@ -132,8 +145,8 @@
   static bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width,
                            int64_t &MinOffset, int64_t &MaxOffset);
 
-  bool shouldClusterMemOps(const MachineOperand &BaseOp1,
-                           const MachineOperand &BaseOp2,
+  bool shouldClusterMemOps(ArrayRef<const MachineOperand *> AddrOps1,
+                           ArrayRef<const MachineOperand *> AddrOps2,
                            unsigned NumLoads) const override;
 
   void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -948,8 +948,8 @@
   // base are identical, and the offset of a lower memory access +
   // the width doesn't overlap the offset of a higher memory access,
   // then the memory accesses are different.
-  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
-      getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
+  if (getMemBaseOpWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
+      getMemBaseOpWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
     if (BaseOpA->isIdenticalTo(*BaseOpB)) {
       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
@@ -1978,61 +1978,89 @@
   return true;
 }
 
-bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
-                                          const MachineOperand *&BaseOp,
-                                          int64_t &Offset,
-                                          const TargetRegisterInfo *TRI) const {
-  if (!LdSt.mayLoadOrStore())
+bool AArch64InstrInfo::getMemBaseRegWithOffsetWidth(
+    const MachineInstr &MI, Register &BaseReg, int64_t &Offset,
+    unsigned &Width, const TargetRegisterInfo *TRI) const {
+  const MachineOperand *BaseOp;
+  if (!getMemBaseOpWithOffsetWidth(MI, BaseOp, Offset, Width, TRI)
+      || !BaseOp->isReg() || BaseOp->getSubReg() != 0)
     return false;
+  BaseReg = BaseOp->getReg();
+  return true;
+}
 
+bool AArch64InstrInfo::getMemBaseOpWithOffset(
+    const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset,
+    const TargetRegisterInfo *TRI) const {
   unsigned Width;
-  return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
+  return getMemBaseOpWithOffsetWidth(MI, BaseOp, Offset, Width, TRI);
 }
 
-bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
-    const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
+bool AArch64InstrInfo::getMemBaseOpWithOffsetWidth(
+    const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset,
     unsigned &Width, const TargetRegisterInfo *TRI) const {
-  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+  if (!TargetInstrInfo::getMemBaseOpWithOffset(MI, BaseOp, Offset, TRI))
+    return false;
+  unsigned Scale = 0;
+  int64_t Dummy1, Dummy2;
+  if (!getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2))
+    return false;
+
+  // Compute the byte offset. Offset is calculated as the immediate operand
+  // multiplied by the scaling factor. Unscaled instructions have scaling factor
+  // set to 1.
+  Offset *= Scale;
+  return true;
+}
+
+void AArch64InstrInfo::getMemAddressOperands(
+    const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &AddrOps,
+    const TargetRegisterInfo *TRI) const {
+  unsigned Width;
+  getMemAddressOperandsWidth(LdSt, AddrOps, Width, TRI);
+}
+
+void AArch64InstrInfo::getMemAddressOperandsWidth(
+    const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &AddrOps,
+    unsigned &Width, const TargetRegisterInfo *TRI) const {
+  if (!LdSt.mayLoadOrStore())
+    return;
+
   // Handle only loads/stores with base register followed by immediate offset.
   if (LdSt.getNumExplicitOperands() == 3) {
     // Non-paired instruction (e.g., ldr x1, [x0, #8]).
     if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
         !LdSt.getOperand(2).isImm())
-      return false;
+      return;
   } else if (LdSt.getNumExplicitOperands() == 4) {
     // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
     if (!LdSt.getOperand(1).isReg() ||
         (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
         !LdSt.getOperand(3).isImm())
-      return false;
+      return;
   } else
-    return false;
+    return;
 
-  // Get the scaling factor for the instruction and set the width for the
-  // instruction.
-  unsigned Scale = 0;
+  // Get the width for the instruction.
+  unsigned Scale;
   int64_t Dummy1, Dummy2;
 
   // If this returns false, then it's an instruction we don't want to handle.
   if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
-    return false;
+    return;
 
-  // Compute the offset. Offset is calculated as the immediate operand
-  // multiplied by the scaling factor. Unscaled instructions have scaling factor
-  // set to 1.
+  const MachineOperand *BaseOp, *OffsetOp;
   if (LdSt.getNumExplicitOperands() == 3) {
     BaseOp = &LdSt.getOperand(1);
-    Offset = LdSt.getOperand(2).getImm() * Scale;
+    OffsetOp = &LdSt.getOperand(2);
   } else {
     assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
     BaseOp = &LdSt.getOperand(2);
-    Offset = LdSt.getOperand(3).getImm() * Scale;
+    OffsetOp = &LdSt.getOperand(3);
   }
 
-  if (!BaseOp->isReg() && !BaseOp->isFI())
-    return false;
-
-  return true;
+  AddrOps.push_back(BaseOp);
+  AddrOps.push_back(OffsetOp);
 }
 
 MachineOperand &
@@ -2355,10 +2383,13 @@
 
 /// Detect opportunities for ldp/stp formation.
 ///
-/// Only called for LdSt for which getMemOperandWithOffset returns true.
-bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
-                                           const MachineOperand &BaseOp2,
+/// Only called for LdSt for which getMemBaseRegWithOffset returns true.
+bool AArch64InstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> AddrOps1,
+                                           ArrayRef<const MachineOperand *> AddrOps2,
                                            unsigned NumLoads) const {
+  assert(AddrOps1.size() == 2 && AddrOps2.size() == 2);
+  const MachineOperand &BaseOp1 = *AddrOps1[0];
+  const MachineOperand &BaseOp2 = *AddrOps2[0];
   const MachineInstr &FirstLdSt = *BaseOp1.getParent();
   const MachineInstr &SecondLdSt = *BaseOp2.getParent();
   if (BaseOp1.getType() != BaseOp2.getType())
@@ -5802,13 +5833,13 @@
     // At this point, we have a stack instruction that we might need to
     // fix up. We'll handle it if it's a load or store.
     if (MI.mayLoadOrStore()) {
-      const MachineOperand *Base; // Filled with the base operand of MI.
+      Register Base;              // Filled with the base register of MI.
       int64_t Offset;             // Filled with the offset of MI.
 
       // Does it allow us to offset the base operand and is the base the
       // register SP?
-      if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() ||
-          Base->getReg() != AArch64::SP)
+      if (!getMemBaseRegWithOffset(MI, Base, Offset, &TRI) ||
+          Base != AArch64::SP)
         return false;
 
       // Find the minimum/maximum offset for this instruction and check
@@ -6185,14 +6216,14 @@
 
 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
   for (MachineInstr &MI : MBB) {
-    const MachineOperand *Base;
+    Register Base;
     unsigned Width;
     int64_t Offset;
 
     // Is this a load or store with an immediate offset with SP as the base?
     if (!MI.mayLoadOrStore() ||
-        !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) ||
-        (Base->isReg() && Base->getReg() != AArch64::SP))
+        !getMemBaseRegWithOffsetWidth(MI, Base, Offset, Width, &RI) ||
+        Base != AArch64::SP)
       continue;
 
     // It is, so we have to fix it up.
diff --git a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
--- a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -147,11 +147,9 @@
     for (auto &MI : MBB) {
       if (!isNarrowFPStore(MI))
         continue;
-      const MachineOperand *BaseOp;
+      Register BaseReg;
       int64_t Offset;
-      if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) &&
-          BaseOp->isReg()) {
-        Register BaseReg = BaseOp->getReg();
+      if (TII->getMemBaseRegWithOffset(MI, BaseReg, Offset, TRI)) {
         if (PrevBaseReg == BaseReg) {
           // If this block can take STPs, skip ahead to the next block.
           if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent()))
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -179,13 +179,17 @@
                                int64_t &Offset1,
                                int64_t &Offset2) const override;
 
-  bool getMemOperandWithOffset(const MachineInstr &LdSt,
-                               const MachineOperand *&BaseOp,
-                               int64_t &Offset,
-                               const TargetRegisterInfo *TRI) const final;
+  bool getMemBaseOpWithOffset(const MachineInstr &LdSt,
+                              const MachineOperand *&BaseOp,
+                              int64_t &Offset,
+                              const TargetRegisterInfo *TRI) const final;
 
-  bool shouldClusterMemOps(const MachineOperand &BaseOp1,
-                           const MachineOperand &BaseOp2,
+  void getMemAddressOperands(const MachineInstr &LdSt,
+                             SmallVectorImpl<const MachineOperand *> &AddrOps,
+                             const TargetRegisterInfo *TRI) const final;
+
+  bool shouldClusterMemOps(ArrayRef<const MachineOperand *> AddrOps1,
+                           ArrayRef<const MachineOperand *> AddrOps2,
                            unsigned NumLoads) const override;
 
   bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -256,144 +256,127 @@
   }
 }
 
-bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
-                                          const MachineOperand *&BaseOp,
-                                          int64_t &Offset,
-                                          const TargetRegisterInfo *TRI) const {
-  if (!LdSt.mayLoadOrStore())
+bool SIInstrInfo::getMemBaseOpWithOffset(const MachineInstr &LdSt,
+                                         const MachineOperand *&BaseOp,
+                                         int64_t &Offset,
+                                         const TargetRegisterInfo *TRI) const {
+  if (!TargetInstrInfo::getMemBaseOpWithOffset(LdSt, BaseOp, Offset, TRI))
     return false;
 
-  unsigned Opc = LdSt.getOpcode();
+  // If getMemAddressOperands returned an offset in words, convert it to bytes.
+  if (isDS(LdSt) && !getNamedOperand(LdSt, AMDGPU::OpName::offset)) {
+    unsigned Opc = LdSt.getOpcode();
+
+    int64_t EltSize;
+    if (LdSt.mayLoad())
+      EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16;
+    else {
+      assert(LdSt.mayStore());
+      int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
+      EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8;
+    }
+
+    if (isStride64(Opc))
+      EltSize *= 64;
+
+    Offset *= EltSize;
+  }
+  return true;
+}
+
+void SIInstrInfo::getMemAddressOperands(const MachineInstr &LdSt,
+                                        SmallVectorImpl<const MachineOperand *> &AddrOps,
+                                        const TargetRegisterInfo *TRI) const {
+  const MachineOperand *BaseOp;
+  const MachineOperand *OffsetOp;
+
+  if (!LdSt.mayLoadOrStore())
+    return;
 
   if (isDS(LdSt)) {
-    const MachineOperand *OffsetImm =
-        getNamedOperand(LdSt, AMDGPU::OpName::offset);
-    if (OffsetImm) {
+    OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset);
+    if (OffsetOp) {
       // Normal, single offset LDS instruction.
       BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
       // TODO: ds_consume/ds_append use M0 for the base address. Is it safe to
       // report that here?
-      if (!BaseOp || !BaseOp->isReg())
-        return false;
-
-      Offset = OffsetImm->getImm();
-
-      return true;
+      if (!BaseOp)
+        return;
+      AddrOps.push_back(BaseOp);
+      AddrOps.push_back(OffsetOp);
+      return;
     }
 
     // The 2 offset instructions use offset0 and offset1 instead. We can treat
     // these as a load with a single offset if the 2 offsets are consecutive. We
     // will use this for some partially aligned loads.
-    const MachineOperand *Offset0Imm =
+    const MachineOperand *Offset0Op =
         getNamedOperand(LdSt, AMDGPU::OpName::offset0);
-    const MachineOperand *Offset1Imm =
+    const MachineOperand *Offset1Op =
         getNamedOperand(LdSt, AMDGPU::OpName::offset1);
 
-    uint8_t Offset0 = Offset0Imm->getImm();
-    uint8_t Offset1 = Offset1Imm->getImm();
-
-    if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
-      // Each of these offsets is in element sized units, so we need to convert
-      // to bytes of the individual reads.
-
-      unsigned EltSize;
-      if (LdSt.mayLoad())
-        EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16;
-      else {
-        assert(LdSt.mayStore());
-        int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
-        EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8;
-      }
-
-      if (isStride64(Opc))
-        EltSize *= 64;
+    unsigned Offset0 = (uint8_t)Offset0Op->getImm();
+    unsigned Offset1 = (uint8_t)Offset1Op->getImm();
 
+    if (Offset0 + 1 == Offset1) {
       BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
-      if (!BaseOp->isReg())
-        return false;
-
-      Offset = EltSize * Offset0;
-
-      return true;
+      AddrOps.push_back(BaseOp);
+      AddrOps.push_back(Offset0Op);
     }
-
-    return false;
-  }
-
-  if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
-    const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
-    if (SOffset && SOffset->isReg()) {
+  } else if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
+    const MachineOperand *SOffsetOp =
+        getNamedOperand(LdSt, AMDGPU::OpName::soffset);
+    if (SOffsetOp && SOffsetOp->isReg()) {
       // We can only handle this if it's a stack access, as any other resource
       // would require reporting multiple base registers.
       const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
       if (AddrReg && !AddrReg->isFI())
-        return false;
+        return;
 
       const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc);
       const SIMachineFunctionInfo *MFI
         = LdSt.getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
       if (RSrc->getReg() != MFI->getScratchRSrcReg())
-        return false;
+        return;
 
-      const MachineOperand *OffsetImm =
-        getNamedOperand(LdSt, AMDGPU::OpName::offset);
-      BaseOp = SOffset;
-      Offset = OffsetImm->getImm();
-      return true;
+      AddrOps.push_back(SOffsetOp);
+      OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset);
+      AddrOps.push_back(OffsetOp);
+      return;
     }
 
-    const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
-    if (!AddrReg)
-      return false;
-
-    const MachineOperand *OffsetImm =
-        getNamedOperand(LdSt, AMDGPU::OpName::offset);
-    BaseOp = AddrReg;
-    Offset = OffsetImm->getImm();
-    if (SOffset) // soffset can be an inline immediate.
-      Offset += SOffset->getImm();
-
-    if (!BaseOp->isReg())
-      return false;
-
-    return true;
-  }
-
-  if (isSMRD(LdSt)) {
-    const MachineOperand *OffsetImm =
-        getNamedOperand(LdSt, AMDGPU::OpName::offset);
-    if (!OffsetImm)
-      return false;
-
-    const MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
-    BaseOp = SBaseReg;
-    Offset = OffsetImm->getImm();
-    if (!BaseOp->isReg())
-      return false;
-
-    return true;
-  }
-
-  if (isFLAT(LdSt)) {
-    const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
-    if (VAddr) {
-      // Can't analyze 2 offsets.
+    BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+    if (!BaseOp)
+      return;
+    AddrOps.push_back(BaseOp);
+    OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset);
+    AddrOps.push_back(OffsetOp);
+    if (SOffsetOp) // soffset can be an inline immediate.
+      AddrOps.push_back(SOffsetOp);
+  } else if (isSMRD(LdSt)) {
+    BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
+    if (!BaseOp) // e.g. S_MEMTIME
+      return;
+    AddrOps.push_back(BaseOp);
+    OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset);
+    if (OffsetOp)
+      AddrOps.push_back(OffsetOp);
+  } else if (isFLAT(LdSt)) {
+    // Instructions have either vaddr or saddr or both.
+    BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+    if (BaseOp) {
       if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
-        return false;
-
-      BaseOp = VAddr;
-    } else {
-      // scratch instructions have either vaddr or saddr.
-      BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr);
+        // For compatibility with the old code, don't handle this case yet.
+        // FIXME remove this restriction!
+        return;
+      AddrOps.push_back(BaseOp);
     }
-
-    Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
-    if (!BaseOp->isReg())
-      return false;
-    return true;
+    BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr);
+    if (BaseOp)
+      AddrOps.push_back(BaseOp);
+    OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset);
+    AddrOps.push_back(OffsetOp);
   }
-
-  return false;
 }
 
 static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
@@ -431,9 +414,11 @@
   return Base1 == Base2;
 }
 
-bool SIInstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
-                                      const MachineOperand &BaseOp2,
+bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> AddrOps1,
+                                      ArrayRef<const MachineOperand *> AddrOps2,
                                       unsigned NumLoads) const {
+  const MachineOperand &BaseOp1 = *AddrOps1[0];
+  const MachineOperand &BaseOp2 = *AddrOps2[0];
   const MachineInstr &FirstLdSt = *BaseOp1.getParent();
   const MachineInstr &SecondLdSt = *BaseOp2.getParent();
 
@@ -2528,8 +2513,8 @@
   const MachineOperand *BaseOp0, *BaseOp1;
   int64_t Offset0, Offset1;
 
-  if (getMemOperandWithOffset(MIa, BaseOp0, Offset0, &RI) &&
-      getMemOperandWithOffset(MIb, BaseOp1, Offset1, &RI)) {
+  if (getMemBaseOpWithOffset(MIa, BaseOp0, Offset0, &RI) &&
+      getMemBaseOpWithOffset(MIb, BaseOp1, Offset1, &RI)) {
     if (!BaseOp0->isIdenticalTo(*BaseOp1))
       return false;
 
diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
--- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -1952,8 +1952,8 @@
     int64_t OffLatReg;
     if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
       IsLowLatencySU[i] = 1;
-      if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,
-                                         TRI))
+      if (SITII->getMemBaseOpWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,
+                                        TRI))
         LowLatencyOffset[i] = OffLatReg;
     } else if (SITII->isHighLatencyInstruction(*SU->getInstr()))
       IsHighLatencySU[i] = 1;
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -204,10 +204,9 @@
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
   /// Get the base register and byte offset of a load/store instr.
-  bool getMemOperandWithOffset(const MachineInstr &LdSt,
-                               const MachineOperand *&BaseOp,
-                               int64_t &Offset,
-                               const TargetRegisterInfo *TRI) const override;
+  void getMemAddressOperands(const MachineInstr &LdSt,
+                             SmallVectorImpl<const MachineOperand *> &AddrOps,
+                             const TargetRegisterInfo *TRI = nullptr) const override;
 
   /// Reverses the branch condition of the specified condition list,
   /// returning false on success and true if it cannot be reversed.
@@ -427,8 +426,8 @@
   bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
 
   unsigned getAddrMode(const MachineInstr &MI) const;
-  MachineOperand *getBaseAndOffset(const MachineInstr &MI, int64_t &Offset,
-                                   unsigned &AccessSize) const;
+  bool getBaseAndOffset(const MachineInstr &MI, Register &BaseReg,
+                        int64_t &Offset) const;
   SmallVector<MachineInstr*,2> getBranchingInstrs(MachineBasicBlock& MBB) const;
   unsigned getCExtOpNum(const MachineInstr &MI) const;
   HexagonII::CompoundGroup
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -2939,15 +2939,6 @@
   return false;
 }
 
-/// Get the base register and byte offset of a load/store instr.
-bool HexagonInstrInfo::getMemOperandWithOffset(
-    const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
-    const TargetRegisterInfo *TRI) const {
-  unsigned AccessSize = 0;
-  BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize);
-  return BaseOp != nullptr && BaseOp->isReg();
-}
-
 /// Can these instructions execute at the same time in a bundle.
 bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First,
       const MachineInstr &Second) const {
@@ -3150,40 +3141,22 @@
   return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask;
 }
 
-// Returns the base register in a memory access (load/store). The offset is
-// returned in Offset and the access size is returned in AccessSize.
-// If the base operand has a subregister or the offset field does not contain
-// an immediate value, return nullptr.
-MachineOperand *HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
-                                                   int64_t &Offset,
-                                                   unsigned &AccessSize) const {
-  // Return if it is not a base+offset type instruction or a MemOp.
-  if (getAddrMode(MI) != HexagonII::BaseImmOffset &&
-      getAddrMode(MI) != HexagonII::BaseLongOffset &&
-      !isMemOp(MI) && !isPostIncrement(MI))
-    return nullptr;
-
-  AccessSize = getMemAccessSize(MI);
-
+void HexagonInstrInfo::getMemAddressOperands(const MachineInstr &MI,
+                                             SmallVectorImpl<const MachineOperand *> &AddrOps,
+                                             const TargetRegisterInfo *TRI) const {
   unsigned BasePos = 0, OffsetPos = 0;
   if (!getBaseAndOffsetPosition(MI, BasePos, OffsetPos))
-    return nullptr;
+    return;
+
+  const MachineOperand *BaseOp = &MI.getOperand(BasePos);
+  AddrOps.push_back(BaseOp);
 
   // Post increment updates its EA after the mem access,
   // so we need to treat its offset as zero.
-  if (isPostIncrement(MI)) {
-    Offset = 0;
-  } else {
-    const MachineOperand &OffsetOp = MI.getOperand(OffsetPos);
-    if (!OffsetOp.isImm())
-      return nullptr;
-    Offset = OffsetOp.getImm();
+  if (!isPostIncrement(MI)) {
+    const MachineOperand *OffsetOp = &MI.getOperand(OffsetPos);
+    AddrOps.push_back(OffsetOp);
   }
-
-  const MachineOperand &BaseOp = MI.getOperand(BasePos);
-  if (BaseOp.getSubReg() != 0)
-    return nullptr;
-  return &const_cast<MachineOperand&>(BaseOp);
 }
 
 /// Return the position of the base and offset operands for this instruction.
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -274,11 +274,13 @@
     if (!L0.mayLoad() || L0.mayStore() ||
         HII.getAddrMode(L0) != HexagonII::BaseImmOffset)
       continue;
+    Register BaseReg0;
     int64_t Offset0;
-    unsigned Size0;
-    MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0);
+    if (!HII.getMemBaseRegWithOffset(L0, BaseReg0, Offset0, nullptr))
+      continue;
     // Is the access size is longer than the L1 cache line, skip the check.
-    if (BaseOp0 == nullptr || !BaseOp0->isReg() || Size0 >= 32)
+    unsigned Size0 = HII.getMemAccessSize(L0);
+    if (Size0 >= 32)
       continue;
     // Scan only up to 32 instructions ahead (to avoid n^2 complexity).
     for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) {
@@ -287,11 +289,13 @@
       if (!L1.mayLoad() || L1.mayStore() ||
           HII.getAddrMode(L1) != HexagonII::BaseImmOffset)
         continue;
+      Register BaseReg1;
       int64_t Offset1;
-      unsigned Size1;
-      MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1);
-      if (BaseOp1 == nullptr || !BaseOp1->isReg() || Size1 >= 32 ||
-          BaseOp0->getReg() != BaseOp1->getReg())
+      if (!HII.getMemBaseRegWithOffset(L1, BaseReg1, Offset1, nullptr) ||
+          BaseReg0 != BaseReg1)
+        continue;
+      unsigned Size1 = HII.getMemAccessSize(L1);
+      if (Size1 >= 32)
         continue;
       // Check bits 3 and 4 of the offset: if they differ, a bank conflict
       // is unlikely.
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/llvm/lib/Target/Lanai/LanaiInstrInfo.h
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.h
@@ -67,15 +67,15 @@
 
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
-  bool getMemOperandWithOffset(const MachineInstr &LdSt,
-                               const MachineOperand *&BaseOp,
-                               int64_t &Offset,
-                               const TargetRegisterInfo *TRI) const override;
-
-  bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
-                                    const MachineOperand *&BaseOp,
-                                    int64_t &Offset, unsigned &Width,
-                                    const TargetRegisterInfo *TRI) const;
+  bool getMemBaseOpWithOffset(const MachineInstr &LdSt,
+                              const MachineOperand *&BaseOp,
+                              int64_t &Offset,
+                              const TargetRegisterInfo *TRI) const override;
+
+  bool getMemBaseOpWithOffsetWidth(const MachineInstr &LdSt,
+                                   const MachineOperand *&BaseOp,
+                                   int64_t &Offset, unsigned &Width,
+                                   const TargetRegisterInfo *TRI) const;
 
   std::pair<unsigned, unsigned>
   decomposeMachineOperandsTargetFlags(unsigned TF) const override;
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -103,8 +103,8 @@
   const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
   int64_t OffsetA = 0, OffsetB = 0;
   unsigned int WidthA = 0, WidthB = 0;
-  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
-      getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
+  if (getMemBaseOpWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
+      getMemBaseOpWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
     if (BaseOpA->isIdenticalTo(*BaseOpB)) {
       int LowOffset = std::min(OffsetA, OffsetB);
       int HighOffset = std::max(OffsetA, OffsetB);
@@ -754,7 +754,7 @@
   return 0;
 }
 
-bool LanaiInstrInfo::getMemOperandWithOffsetWidth(
+bool LanaiInstrInfo::getMemBaseOpWithOffsetWidth(
     const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
     unsigned &Width, const TargetRegisterInfo * /*TRI*/) const {
   // Handle only loads/stores with base register followed by immediate offset
@@ -795,10 +795,9 @@
   return true;
 }
 
-bool LanaiInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
-                                        const MachineOperand *&BaseOp,
-                                        int64_t &Offset,
-                                        const TargetRegisterInfo *TRI) const {
+bool LanaiInstrInfo::getMemBaseOpWithOffset(
+    const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
+    const TargetRegisterInfo *TRI) const {
   switch (LdSt.getOpcode()) {
   default:
     return false;
@@ -812,6 +811,6 @@
   case Lanai::LDBs_RI:
   case Lanai::LDBz_RI:
     unsigned Width;
-    return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
+    return getMemBaseOpWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
   }
 }
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -361,10 +361,10 @@
   /// Return true if get the base operand, byte offset of an instruction and
   /// the memory width. Width is the size of memory that is being
   /// loaded/stored (e.g. 1, 2, 4, 8).
-  bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
-                                    const MachineOperand *&BaseOp,
-                                    int64_t &Offset, unsigned &Width,
-                                    const TargetRegisterInfo *TRI) const;
+  bool getMemBaseOpWithOffsetWidth(const MachineInstr &LdSt,
+                                   const MachineOperand *&BaseOp,
+                                   int64_t &Offset, unsigned &Width,
+                                   const TargetRegisterInfo *TRI) const;
 
   /// Return true if two MIs access different memory addresses and false
   /// otherwise
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -4262,9 +4262,12 @@
 
 // Return true if get the base operand, byte offset of an instruction and the
 // memory width. Width is the size of memory that is being loaded/stored.
-bool PPCInstrInfo::getMemOperandWithOffsetWidth(
-    const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
-    unsigned &Width, const TargetRegisterInfo *TRI) const {
+bool PPCInstrInfo::getMemBaseOpWithOffsetWidth(
+  const MachineInstr &LdSt,
+  const MachineOperand *&BaseReg,
+  int64_t &Offset,
+  unsigned &Width,
+  const TargetRegisterInfo *TRI) const {
   if (!LdSt.mayLoadOrStore())
     return false;
 
@@ -4301,8 +4304,8 @@
   const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
   int64_t OffsetA = 0, OffsetB = 0;
   unsigned int WidthA = 0, WidthB = 0;
-  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
-      getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
+  if (getMemBaseOpWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
+      getMemBaseOpWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
     if (BaseOpA->isIdenticalTo(*BaseOpB)) {
       int LowOffset = std::min(OffsetA, OffsetB);
       int HighOffset = std::max(OffsetA, OffsetB);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -86,10 +86,10 @@
   bool verifyInstruction(const MachineInstr &MI,
                          StringRef &ErrInfo) const override;
 
-  bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
-                                    const MachineOperand *&BaseOp,
-                                    int64_t &Offset, unsigned &Width,
-                                    const TargetRegisterInfo *TRI) const;
+  bool getMemBaseOpWithOffsetWidth(const MachineInstr &LdSt,
+                                   const MachineOperand *&BaseOp,
+                                   int64_t &Offset, unsigned &Width,
+                                   const TargetRegisterInfo *TRI) const;
 
   bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
                                        const MachineInstr &MIb) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -559,7 +559,7 @@
 
 // Return true if get the base operand, byte offset of an instruction and the
 // memory width. Width is the size of memory that is being loaded/stored.
-bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
+bool RISCVInstrInfo::getMemBaseOpWithOffsetWidth(
     const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
     unsigned &Width, const TargetRegisterInfo *TRI) const {
   if (!LdSt.mayLoadOrStore())
@@ -600,8 +600,8 @@
   const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
   int64_t OffsetA = 0, OffsetB = 0;
   unsigned int WidthA = 0, WidthB = 0;
-  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
-      getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
+  if (getMemBaseOpWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
+      getMemBaseOpWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
     if (BaseOpA->isIdenticalTo(*BaseOpB)) {
       int LowOffset = std::min(OffsetA, OffsetB);
       int HighOffset = std::max(OffsetA, OffsetB);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -291,10 +291,9 @@
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
 
-  bool getMemOperandWithOffset(const MachineInstr &LdSt,
-                               const MachineOperand *&BaseOp,
-                               int64_t &Offset,
-                               const TargetRegisterInfo *TRI) const override;
+  void getMemAddressOperands(const MachineInstr &LdSt,
+                             SmallVectorImpl<const MachineOperand *> &AddrOps,
+                             const TargetRegisterInfo *TRI) const override;
   bool analyzeBranchPredicate(MachineBasicBlock &MBB,
                               TargetInstrInfo::MachineBranchPredicate &MBP,
                               bool AllowModify = false) const override;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3189,39 +3189,36 @@
   }
 }
 
-bool X86InstrInfo::getMemOperandWithOffset(
-    const MachineInstr &MemOp, const MachineOperand *&BaseOp, int64_t &Offset,
+void X86InstrInfo::getMemAddressOperands(
+    const MachineInstr &MemOp, SmallVectorImpl<const MachineOperand *> &AddrOps,
     const TargetRegisterInfo *TRI) const {
   const MCInstrDesc &Desc = MemOp.getDesc();
   int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
   if (MemRefBegin < 0)
-    return false;
+    return;
 
   MemRefBegin += X86II::getOperandBias(Desc);
 
-  BaseOp = &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
+  const MachineOperand *BaseOp =
+      &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
   if (!BaseOp->isReg()) // Can be an MO_FrameIndex
-    return false;
+    return;
 
   if (MemOp.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1)
-    return false;
+    return;
 
   if (MemOp.getOperand(MemRefBegin + X86::AddrIndexReg).getReg() !=
       X86::NoRegister)
-    return false;
+    return;
 
   const MachineOperand &DispMO = MemOp.getOperand(MemRefBegin + X86::AddrDisp);
 
   // Displacement can be symbolic
   if (!DispMO.isImm())
-    return false;
-
-  Offset = DispMO.getImm();
-
-  if (!BaseOp->isReg())
-    return false;
+    return;
 
-  return true;
+  AddrOps.push_back(BaseOp);
+  AddrOps.push_back(&DispMO);
 }
 
 static unsigned getStoreRegOpcode(unsigned SrcReg,
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll b/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll
--- a/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll
@@ -187,8 +187,8 @@
 ; CI: s_mov_b32 m0
 ; GFX9-NOT: m0
 
-; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
 ; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
+; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
 ; GCN: s_endpgm
 define amdgpu_kernel void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1