LLVM API Documentation
00001 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file contains the Base ARM implementation of the TargetInstrInfo class. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "ARMBaseInstrInfo.h" 00015 #include "ARM.h" 00016 #include "ARMBaseRegisterInfo.h" 00017 #include "ARMConstantPoolValue.h" 00018 #include "ARMHazardRecognizer.h" 00019 #include "ARMMachineFunctionInfo.h" 00020 #include "MCTargetDesc/ARMAddressingModes.h" 00021 #include "llvm/ADT/STLExtras.h" 00022 #include "llvm/CodeGen/LiveVariables.h" 00023 #include "llvm/CodeGen/MachineConstantPool.h" 00024 #include "llvm/CodeGen/MachineFrameInfo.h" 00025 #include "llvm/CodeGen/MachineInstrBuilder.h" 00026 #include "llvm/CodeGen/MachineJumpTableInfo.h" 00027 #include "llvm/CodeGen/MachineMemOperand.h" 00028 #include "llvm/CodeGen/MachineRegisterInfo.h" 00029 #include "llvm/CodeGen/SelectionDAGNodes.h" 00030 #include "llvm/IR/Constants.h" 00031 #include "llvm/IR/Function.h" 00032 #include "llvm/IR/GlobalValue.h" 00033 #include "llvm/MC/MCAsmInfo.h" 00034 #include "llvm/Support/BranchProbability.h" 00035 #include "llvm/Support/CommandLine.h" 00036 #include "llvm/Support/Debug.h" 00037 #include "llvm/Support/ErrorHandling.h" 00038 00039 #define GET_INSTRINFO_CTOR 00040 #include "ARMGenInstrInfo.inc" 00041 00042 using namespace llvm; 00043 00044 static cl::opt<bool> 00045 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, 00046 cl::desc("Enable ARM 2-addr to 3-addr conv")); 00047 00048 static cl::opt<bool> 00049 WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), 00050 cl::desc("Widen ARM vmovs to vmovd when possible")); 00051 00052 static cl::opt<unsigned> 00053 SwiftPartialUpdateClearance("swift-partial-update-clearance", 00054 cl::Hidden, cl::init(12), 00055 cl::desc("Clearance before partial register updates")); 00056 00057 /// ARM_MLxEntry - Record information about MLA / MLS instructions. 00058 struct ARM_MLxEntry { 00059 uint16_t MLxOpc; // MLA / MLS opcode 00060 uint16_t MulOpc; // Expanded multiplication opcode 00061 uint16_t AddSubOpc; // Expanded add / sub opcode 00062 bool NegAcc; // True if the acc is negated before the add / sub. 00063 bool HasLane; // True if instruction has an extra "lane" operand. 00064 }; 00065 00066 static const ARM_MLxEntry ARM_MLxTable[] = { 00067 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane 00068 // fp scalar ops 00069 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false }, 00070 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false }, 00071 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false }, 00072 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false }, 00073 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false }, 00074 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false }, 00075 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false }, 00076 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false }, 00077 00078 // fp SIMD ops 00079 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false }, 00080 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false }, 00081 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false }, 00082 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false }, 00083 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true }, 00084 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true }, 00085 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true }, 00086 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, 00087 }; 00088 00089 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) 00090 : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), 00091 Subtarget(STI) { 00092 for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) { 00093 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) 00094 assert(false && "Duplicated entries?"); 00095 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc); 00096 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc); 00097 } 00098 } 00099 00100 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl 00101 // currently defaults to no prepass hazard recognizer. 00102 ScheduleHazardRecognizer *ARMBaseInstrInfo:: 00103 CreateTargetHazardRecognizer(const TargetMachine *TM, 00104 const ScheduleDAG *DAG) const { 00105 if (usePreRAHazardRecognizer()) { 00106 const InstrItineraryData *II = TM->getInstrItineraryData(); 00107 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); 00108 } 00109 return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG); 00110 } 00111 00112 ScheduleHazardRecognizer *ARMBaseInstrInfo:: 00113 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 00114 const ScheduleDAG *DAG) const { 00115 if (Subtarget.isThumb2() || Subtarget.hasVFP2()) 00116 return (ScheduleHazardRecognizer *) 00117 new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG); 00118 return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); 00119 } 00120 00121 MachineInstr * 00122 ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 00123 MachineBasicBlock::iterator &MBBI, 00124 LiveVariables *LV) const { 00125 // FIXME: Thumb2 support. 00126 00127 if (!EnableARM3Addr) 00128 return NULL; 00129 00130 MachineInstr *MI = MBBI; 00131 MachineFunction &MF = *MI->getParent()->getParent(); 00132 uint64_t TSFlags = MI->getDesc().TSFlags; 00133 bool isPre = false; 00134 switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { 00135 default: return NULL; 00136 case ARMII::IndexModePre: 00137 isPre = true; 00138 break; 00139 case ARMII::IndexModePost: 00140 break; 00141 } 00142 00143 // Try splitting an indexed load/store to an un-indexed one plus an add/sub 00144 // operation. 00145 unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); 00146 if (MemOpc == 0) 00147 return NULL; 00148 00149 MachineInstr *UpdateMI = NULL; 00150 MachineInstr *MemMI = NULL; 00151 unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); 00152 const MCInstrDesc &MCID = MI->getDesc(); 00153 unsigned NumOps = MCID.getNumOperands(); 00154 bool isLoad = !MI->mayStore(); 00155 const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); 00156 const MachineOperand &Base = MI->getOperand(2); 00157 const MachineOperand &Offset = MI->getOperand(NumOps-3); 00158 unsigned WBReg = WB.getReg(); 00159 unsigned BaseReg = Base.getReg(); 00160 unsigned OffReg = Offset.getReg(); 00161 unsigned OffImm = MI->getOperand(NumOps-2).getImm(); 00162 ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); 00163 switch (AddrMode) { 00164 default: llvm_unreachable("Unknown indexed op!"); 00165 case ARMII::AddrMode2: { 00166 bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; 00167 unsigned Amt = ARM_AM::getAM2Offset(OffImm); 00168 if (OffReg == 0) { 00169 if (ARM_AM::getSOImmVal(Amt) == -1) 00170 // Can't encode it in a so_imm operand. This transformation will 00171 // add more than 1 instruction. Abandon! 00172 return NULL; 00173 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 00174 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 00175 .addReg(BaseReg).addImm(Amt) 00176 .addImm(Pred).addReg(0).addReg(0); 00177 } else if (Amt != 0) { 00178 ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); 00179 unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); 00180 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 00181 get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) 00182 .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) 00183 .addImm(Pred).addReg(0).addReg(0); 00184 } else 00185 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 00186 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 00187 .addReg(BaseReg).addReg(OffReg) 00188 .addImm(Pred).addReg(0).addReg(0); 00189 break; 00190 } 00191 case ARMII::AddrMode3 : { 00192 bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; 00193 unsigned Amt = ARM_AM::getAM3Offset(OffImm); 00194 if (OffReg == 0) 00195 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. 00196 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 00197 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 00198 .addReg(BaseReg).addImm(Amt) 00199 .addImm(Pred).addReg(0).addReg(0); 00200 else 00201 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 00202 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 00203 .addReg(BaseReg).addReg(OffReg) 00204 .addImm(Pred).addReg(0).addReg(0); 00205 break; 00206 } 00207 } 00208 00209 std::vector<MachineInstr*> NewMIs; 00210 if (isPre) { 00211 if (isLoad) 00212 MemMI = BuildMI(MF, MI->getDebugLoc(), 00213 get(MemOpc), MI->getOperand(0).getReg()) 00214 .addReg(WBReg).addImm(0).addImm(Pred); 00215 else 00216 MemMI = BuildMI(MF, MI->getDebugLoc(), 00217 get(MemOpc)).addReg(MI->getOperand(1).getReg()) 00218 .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); 00219 NewMIs.push_back(MemMI); 00220 NewMIs.push_back(UpdateMI); 00221 } else { 00222 if (isLoad) 00223 MemMI = BuildMI(MF, MI->getDebugLoc(), 00224 get(MemOpc), MI->getOperand(0).getReg()) 00225 .addReg(BaseReg).addImm(0).addImm(Pred); 00226 else 00227 MemMI = BuildMI(MF, MI->getDebugLoc(), 00228 get(MemOpc)).addReg(MI->getOperand(1).getReg()) 00229 .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); 00230 if (WB.isDead()) 00231 UpdateMI->getOperand(0).setIsDead(); 00232 NewMIs.push_back(UpdateMI); 00233 NewMIs.push_back(MemMI); 00234 } 00235 00236 // Transfer LiveVariables states, kill / dead info. 00237 if (LV) { 00238 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00239 MachineOperand &MO = MI->getOperand(i); 00240 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 00241 unsigned Reg = MO.getReg(); 00242 00243 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); 00244 if (MO.isDef()) { 00245 MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; 00246 if (MO.isDead()) 00247 LV->addVirtualRegisterDead(Reg, NewMI); 00248 } 00249 if (MO.isUse() && MO.isKill()) { 00250 for (unsigned j = 0; j < 2; ++j) { 00251 // Look at the two new MI's in reverse order. 00252 MachineInstr *NewMI = NewMIs[j]; 00253 if (!NewMI->readsRegister(Reg)) 00254 continue; 00255 LV->addVirtualRegisterKilled(Reg, NewMI); 00256 if (VI.removeKill(MI)) 00257 VI.Kills.push_back(NewMI); 00258 break; 00259 } 00260 } 00261 } 00262 } 00263 } 00264 00265 MFI->insert(MBBI, NewMIs[1]); 00266 MFI->insert(MBBI, NewMIs[0]); 00267 return NewMIs[0]; 00268 } 00269 00270 // Branch analysis. 00271 bool 00272 ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, 00273 MachineBasicBlock *&FBB, 00274 SmallVectorImpl<MachineOperand> &Cond, 00275 bool AllowModify) const { 00276 // If the block has no terminators, it just falls into the block after it. 00277 MachineBasicBlock::iterator I = MBB.end(); 00278 if (I == MBB.begin()) 00279 return false; 00280 --I; 00281 while (I->isDebugValue()) { 00282 if (I == MBB.begin()) 00283 return false; 00284 --I; 00285 } 00286 00287 // Get the last instruction in the block. 00288 MachineInstr *LastInst = I; 00289 unsigned LastOpc = LastInst->getOpcode(); 00290 00291 // Check if it's an indirect branch first, this should return 'unanalyzable' 00292 // even if it's predicated. 00293 if (isIndirectBranchOpcode(LastOpc)) 00294 return true; 00295 00296 if (!isUnpredicatedTerminator(I)) 00297 return false; 00298 00299 // If there is only one terminator instruction, process it. 00300 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 00301 if (isUncondBranchOpcode(LastOpc)) { 00302 TBB = LastInst->getOperand(0).getMBB(); 00303 return false; 00304 } 00305 if (isCondBranchOpcode(LastOpc)) { 00306 // Block ends with fall-through condbranch. 00307 TBB = LastInst->getOperand(0).getMBB(); 00308 Cond.push_back(LastInst->getOperand(1)); 00309 Cond.push_back(LastInst->getOperand(2)); 00310 return false; 00311 } 00312 return true; // Can't handle indirect branch. 00313 } 00314 00315 // Get the instruction before it if it is a terminator. 00316 MachineInstr *SecondLastInst = I; 00317 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 00318 00319 // If AllowModify is true and the block ends with two or more unconditional 00320 // branches, delete all but the first unconditional branch. 00321 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 00322 while (isUncondBranchOpcode(SecondLastOpc)) { 00323 LastInst->eraseFromParent(); 00324 LastInst = SecondLastInst; 00325 LastOpc = LastInst->getOpcode(); 00326 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 00327 // Return now the only terminator is an unconditional branch. 00328 TBB = LastInst->getOperand(0).getMBB(); 00329 return false; 00330 } else { 00331 SecondLastInst = I; 00332 SecondLastOpc = SecondLastInst->getOpcode(); 00333 } 00334 } 00335 } 00336 00337 // If there are three terminators, we don't know what sort of block this is. 00338 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) 00339 return true; 00340 00341 // If the block ends with a B and a Bcc, handle it. 00342 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 00343 TBB = SecondLastInst->getOperand(0).getMBB(); 00344 Cond.push_back(SecondLastInst->getOperand(1)); 00345 Cond.push_back(SecondLastInst->getOperand(2)); 00346 FBB = LastInst->getOperand(0).getMBB(); 00347 return false; 00348 } 00349 00350 // If the block ends with two unconditional branches, handle it. The second 00351 // one is not executed, so remove it. 00352 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 00353 TBB = SecondLastInst->getOperand(0).getMBB(); 00354 I = LastInst; 00355 if (AllowModify) 00356 I->eraseFromParent(); 00357 return false; 00358 } 00359 00360 // ...likewise if it ends with a branch table followed by an unconditional 00361 // branch. The branch folder can create these, and we must get rid of them for 00362 // correctness of Thumb constant islands. 00363 if ((isJumpTableBranchOpcode(SecondLastOpc) || 00364 isIndirectBranchOpcode(SecondLastOpc)) && 00365 isUncondBranchOpcode(LastOpc)) { 00366 I = LastInst; 00367 if (AllowModify) 00368 I->eraseFromParent(); 00369 return true; 00370 } 00371 00372 // Otherwise, can't handle this. 00373 return true; 00374 } 00375 00376 00377 unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 00378 MachineBasicBlock::iterator I = MBB.end(); 00379 if (I == MBB.begin()) return 0; 00380 --I; 00381 while (I->isDebugValue()) { 00382 if (I == MBB.begin()) 00383 return 0; 00384 --I; 00385 } 00386 if (!isUncondBranchOpcode(I->getOpcode()) && 00387 !isCondBranchOpcode(I->getOpcode())) 00388 return 0; 00389 00390 // Remove the branch. 00391 I->eraseFromParent(); 00392 00393 I = MBB.end(); 00394 00395 if (I == MBB.begin()) return 1; 00396 --I; 00397 if (!isCondBranchOpcode(I->getOpcode())) 00398 return 1; 00399 00400 // Remove the branch. 00401 I->eraseFromParent(); 00402 return 2; 00403 } 00404 00405 unsigned 00406 ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 00407 MachineBasicBlock *FBB, 00408 const SmallVectorImpl<MachineOperand> &Cond, 00409 DebugLoc DL) const { 00410 ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); 00411 int BOpc = !AFI->isThumbFunction() 00412 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); 00413 int BccOpc = !AFI->isThumbFunction() 00414 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); 00415 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); 00416 00417 // Shouldn't be a fall through. 00418 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 00419 assert((Cond.size() == 2 || Cond.size() == 0) && 00420 "ARM branch conditions have two components!"); 00421 00422 if (FBB == 0) { 00423 if (Cond.empty()) { // Unconditional branch? 00424 if (isThumb) 00425 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0); 00426 else 00427 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); 00428 } else 00429 BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) 00430 .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); 00431 return 1; 00432 } 00433 00434 // Two-way conditional branch. 00435 BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) 00436 .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); 00437 if (isThumb) 00438 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0); 00439 else 00440 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); 00441 return 2; 00442 } 00443 00444 bool ARMBaseInstrInfo:: 00445 ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 00446 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); 00447 Cond[0].setImm(ARMCC::getOppositeCondition(CC)); 00448 return false; 00449 } 00450 00451 bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { 00452 if (MI->isBundle()) { 00453 MachineBasicBlock::const_instr_iterator I = MI; 00454 MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); 00455 while (++I != E && I->isInsideBundle()) { 00456 int PIdx = I->findFirstPredOperandIdx(); 00457 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL) 00458 return true; 00459 } 00460 return false; 00461 } 00462 00463 int PIdx = MI->findFirstPredOperandIdx(); 00464 return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; 00465 } 00466 00467 bool ARMBaseInstrInfo:: 00468 PredicateInstruction(MachineInstr *MI, 00469 const SmallVectorImpl<MachineOperand> &Pred) const { 00470 unsigned Opc = MI->getOpcode(); 00471 if (isUncondBranchOpcode(Opc)) { 00472 MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); 00473 MachineInstrBuilder(*MI->getParent()->getParent(), MI) 00474 .addImm(Pred[0].getImm()) 00475 .addReg(Pred[1].getReg()); 00476 return true; 00477 } 00478 00479 int PIdx = MI->findFirstPredOperandIdx(); 00480 if (PIdx != -1) { 00481 MachineOperand &PMO = MI->getOperand(PIdx); 00482 PMO.setImm(Pred[0].getImm()); 00483 MI->getOperand(PIdx+1).setReg(Pred[1].getReg()); 00484 return true; 00485 } 00486 return false; 00487 } 00488 00489 bool ARMBaseInstrInfo:: 00490 SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 00491 const SmallVectorImpl<MachineOperand> &Pred2) const { 00492 if (Pred1.size() > 2 || Pred2.size() > 2) 00493 return false; 00494 00495 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); 00496 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); 00497 if (CC1 == CC2) 00498 return true; 00499 00500 switch (CC1) { 00501 default: 00502 return false; 00503 case ARMCC::AL: 00504 return true; 00505 case ARMCC::HS: 00506 return CC2 == ARMCC::HI; 00507 case ARMCC::LS: 00508 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; 00509 case ARMCC::GE: 00510 return CC2 == ARMCC::GT; 00511 case ARMCC::LE: 00512 return CC2 == ARMCC::LT; 00513 } 00514 } 00515 00516 bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, 00517 std::vector<MachineOperand> &Pred) const { 00518 bool Found = false; 00519 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00520 const MachineOperand &MO = MI->getOperand(i); 00521 if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) || 00522 (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) { 00523 Pred.push_back(MO); 00524 Found = true; 00525 } 00526 } 00527 00528 return Found; 00529 } 00530 00531 /// isPredicable - Return true if the specified instruction can be predicated. 00532 /// By default, this returns true for every instruction with a 00533 /// PredicateOperand. 00534 bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { 00535 if (!MI->isPredicable()) 00536 return false; 00537 00538 if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { 00539 ARMFunctionInfo *AFI = 00540 MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); 00541 return AFI->isThumb2Function(); 00542 } 00543 return true; 00544 } 00545 00546 /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. 00547 LLVM_ATTRIBUTE_NOINLINE 00548 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, 00549 unsigned JTI); 00550 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, 00551 unsigned JTI) { 00552 assert(JTI < JT.size()); 00553 return JT[JTI].MBBs.size(); 00554 } 00555 00556 /// GetInstSize - Return the size of the specified MachineInstr. 00557 /// 00558 unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 00559 const MachineBasicBlock &MBB = *MI->getParent(); 00560 const MachineFunction *MF = MBB.getParent(); 00561 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 00562 00563 const MCInstrDesc &MCID = MI->getDesc(); 00564 if (MCID.getSize()) 00565 return MCID.getSize(); 00566 00567 // If this machine instr is an inline asm, measure it. 00568 if (MI->getOpcode() == ARM::INLINEASM) 00569 return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); 00570 if (MI->isLabel()) 00571 return 0; 00572 unsigned Opc = MI->getOpcode(); 00573 switch (Opc) { 00574 case TargetOpcode::IMPLICIT_DEF: 00575 case TargetOpcode::KILL: 00576 case TargetOpcode::PROLOG_LABEL: 00577 case TargetOpcode::EH_LABEL: 00578 case TargetOpcode::DBG_VALUE: 00579 return 0; 00580 case TargetOpcode::BUNDLE: 00581 return getInstBundleLength(MI); 00582 case ARM::MOVi16_ga_pcrel: 00583 case ARM::MOVTi16_ga_pcrel: 00584 case ARM::t2MOVi16_ga_pcrel: 00585 case ARM::t2MOVTi16_ga_pcrel: 00586 return 4; 00587 case ARM::MOVi32imm: 00588 case ARM::t2MOVi32imm: 00589 return 8; 00590 case ARM::CONSTPOOL_ENTRY: 00591 // If this machine instr is a constant pool entry, its size is recorded as 00592 // operand #2. 00593 return MI->getOperand(2).getImm(); 00594 case ARM::Int_eh_sjlj_longjmp: 00595 return 16; 00596 case ARM::tInt_eh_sjlj_longjmp: 00597 return 10; 00598 case ARM::Int_eh_sjlj_setjmp: 00599 case ARM::Int_eh_sjlj_setjmp_nofp: 00600 return 20; 00601 case ARM::tInt_eh_sjlj_setjmp: 00602 case ARM::t2Int_eh_sjlj_setjmp: 00603 case ARM::t2Int_eh_sjlj_setjmp_nofp: 00604 return 12; 00605 case ARM::BR_JTr: 00606 case ARM::BR_JTm: 00607 case ARM::BR_JTadd: 00608 case ARM::tBR_JTr: 00609 case ARM::t2BR_JT: 00610 case ARM::t2TBB_JT: 00611 case ARM::t2TBH_JT: { 00612 // These are jumptable branches, i.e. a branch followed by an inlined 00613 // jumptable. The size is 4 + 4 * number of entries. For TBB, each 00614 // entry is one byte; TBH two byte each. 00615 unsigned EntrySize = (Opc == ARM::t2TBB_JT) 00616 ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); 00617 unsigned NumOps = MCID.getNumOperands(); 00618 MachineOperand JTOP = 00619 MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); 00620 unsigned JTI = JTOP.getIndex(); 00621 const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); 00622 assert(MJTI != 0); 00623 const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); 00624 assert(JTI < JT.size()); 00625 // Thumb instructions are 2 byte aligned, but JT entries are 4 byte 00626 // 4 aligned. The assembler / linker may add 2 byte padding just before 00627 // the JT entries. The size does not include this padding; the 00628 // constant islands pass does separate bookkeeping for it. 00629 // FIXME: If we know the size of the function is less than (1 << 16) *2 00630 // bytes, we can use 16-bit entries instead. Then there won't be an 00631 // alignment issue. 00632 unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; 00633 unsigned NumEntries = getNumJTEntries(JT, JTI); 00634 if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) 00635 // Make sure the instruction that follows TBB is 2-byte aligned. 00636 // FIXME: Constant island pass should insert an "ALIGN" instruction 00637 // instead. 00638 ++NumEntries; 00639 return NumEntries * EntrySize + InstSize; 00640 } 00641 default: 00642 // Otherwise, pseudo-instruction sizes are zero. 00643 return 0; 00644 } 00645 } 00646 00647 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { 00648 unsigned Size = 0; 00649 MachineBasicBlock::const_instr_iterator I = MI; 00650 MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); 00651 while (++I != E && I->isInsideBundle()) { 00652 assert(!I->isBundle() && "No nested bundle!"); 00653 Size += GetInstSizeInBytes(&*I); 00654 } 00655 return Size; 00656 } 00657 00658 void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 00659 MachineBasicBlock::iterator I, DebugLoc DL, 00660 unsigned DestReg, unsigned SrcReg, 00661 bool KillSrc) const { 00662 bool GPRDest = ARM::GPRRegClass.contains(DestReg); 00663 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); 00664 00665 if (GPRDest && GPRSrc) { 00666 AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) 00667 .addReg(SrcReg, getKillRegState(KillSrc)))); 00668 return; 00669 } 00670 00671 bool SPRDest = ARM::SPRRegClass.contains(DestReg); 00672 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); 00673 00674 unsigned Opc = 0; 00675 if (SPRDest && SPRSrc) 00676 Opc = ARM::VMOVS; 00677 else if (GPRDest && SPRSrc) 00678 Opc = ARM::VMOVRS; 00679 else if (SPRDest && GPRSrc) 00680 Opc = ARM::VMOVSR; 00681 else if (ARM::DPRRegClass.contains(DestReg, SrcReg)) 00682 Opc = ARM::VMOVD; 00683 else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) 00684 Opc = ARM::VORRq; 00685 00686 if (Opc) { 00687 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); 00688 MIB.addReg(SrcReg, getKillRegState(KillSrc)); 00689 if (Opc == ARM::VORRq) 00690 MIB.addReg(SrcReg, getKillRegState(KillSrc)); 00691 AddDefaultPred(MIB); 00692 return; 00693 } 00694 00695 // Handle register classes that require multiple instructions. 00696 unsigned BeginIdx = 0; 00697 unsigned SubRegs = 0; 00698 int Spacing = 1; 00699 00700 // Use VORRq when possible. 00701 if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) 00702 Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2; 00703 else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) 00704 Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4; 00705 // Fall back to VMOVD. 00706 else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) 00707 Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2; 00708 else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) 00709 Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3; 00710 else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) 00711 Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4; 00712 else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) 00713 Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2; 00714 00715 else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) 00716 Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2; 00717 else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) 00718 Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2; 00719 else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) 00720 Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2; 00721 00722 assert(Opc && "Impossible reg-to-reg copy"); 00723 00724 const TargetRegisterInfo *TRI = &getRegisterInfo(); 00725 MachineInstrBuilder Mov; 00726 00727 // Copy register tuples backward when the first Dest reg overlaps with SrcReg. 00728 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { 00729 BeginIdx = BeginIdx + ((SubRegs-1)*Spacing); 00730 Spacing = -Spacing; 00731 } 00732 #ifndef NDEBUG 00733 SmallSet<unsigned, 4> DstRegs; 00734 #endif 00735 for (unsigned i = 0; i != SubRegs; ++i) { 00736 unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); 00737 unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); 00738 assert(Dst && Src && "Bad sub-register"); 00739 #ifndef NDEBUG 00740 assert(!DstRegs.count(Src) && "destructive vector copy"); 00741 DstRegs.insert(Dst); 00742 #endif 00743 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) 00744 .addReg(Src); 00745 // VORR takes two source operands. 00746 if (Opc == ARM::VORRq) 00747 Mov.addReg(Src); 00748 Mov = AddDefaultPred(Mov); 00749 } 00750 // Add implicit super-register defs and kills to the last instruction. 00751 Mov->addRegisterDefined(DestReg, TRI); 00752 if (KillSrc) 00753 Mov->addRegisterKilled(SrcReg, TRI); 00754 } 00755 00756 const MachineInstrBuilder & 00757 ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, 00758 unsigned SubIdx, unsigned State, 00759 const TargetRegisterInfo *TRI) const { 00760 if (!SubIdx) 00761 return MIB.addReg(Reg, State); 00762 00763 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 00764 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 00765 return MIB.addReg(Reg, State, SubIdx); 00766 } 00767 00768 void ARMBaseInstrInfo:: 00769 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 00770 unsigned SrcReg, bool isKill, int FI, 00771 const TargetRegisterClass *RC, 00772 const TargetRegisterInfo *TRI) const { 00773 DebugLoc DL; 00774 if (I != MBB.end()) DL = I->getDebugLoc(); 00775 MachineFunction &MF = *MBB.getParent(); 00776 MachineFrameInfo &MFI = *MF.getFrameInfo(); 00777 unsigned Align = MFI.getObjectAlignment(FI); 00778 00779 MachineMemOperand *MMO = 00780 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 00781 MachineMemOperand::MOStore, 00782 MFI.getObjectSize(FI), 00783 Align); 00784 00785 switch (RC->getSize()) { 00786 case 4: 00787 if (ARM::GPRRegClass.hasSubClassEq(RC)) { 00788 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12)) 00789 .addReg(SrcReg, getKillRegState(isKill)) 00790 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 00791 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 00792 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) 00793 .addReg(SrcReg, getKillRegState(isKill)) 00794 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 00795 } else 00796 llvm_unreachable("Unknown reg class!"); 00797 break; 00798 case 8: 00799 if (ARM::DPRRegClass.hasSubClassEq(RC)) { 00800 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) 00801 .addReg(SrcReg, getKillRegState(isKill)) 00802 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 00803 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { 00804 if (Subtarget.hasV5TEOps()) { 00805 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD)); 00806 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); 00807 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); 00808 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); 00809 00810 AddDefaultPred(MIB); 00811 } else { 00812 // Fallback to STM instruction, which has existed since the dawn of 00813 // time. 00814 MachineInstrBuilder MIB = 00815 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) 00816 .addFrameIndex(FI).addMemOperand(MMO)); 00817 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); 00818 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); 00819 } 00820 } else 00821 llvm_unreachable("Unknown reg class!"); 00822 break; 00823 case 16: 00824 if (ARM::DPairRegClass.hasSubClassEq(RC)) { 00825 // Use aligned spills if the stack can be realigned. 00826 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 00827 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) 00828 .addFrameIndex(FI).addImm(16) 00829 .addReg(SrcReg, getKillRegState(isKill)) 00830 .addMemOperand(MMO)); 00831 } else { 00832 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) 00833 .addReg(SrcReg, getKillRegState(isKill)) 00834 .addFrameIndex(FI) 00835 .addMemOperand(MMO)); 00836 } 00837 } else 00838 llvm_unreachable("Unknown reg class!"); 00839 break; 00840 case 24: 00841 if (ARM::DTripleRegClass.hasSubClassEq(RC)) { 00842 // Use aligned spills if the stack can be realigned. 00843 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 00844 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo)) 00845 .addFrameIndex(FI).addImm(16) 00846 .addReg(SrcReg, getKillRegState(isKill)) 00847 .addMemOperand(MMO)); 00848 } else { 00849 MachineInstrBuilder MIB = 00850 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 00851 .addFrameIndex(FI)) 00852 .addMemOperand(MMO); 00853 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 00854 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 00855 AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 00856 } 00857 } else 00858 llvm_unreachable("Unknown reg class!"); 00859 break; 00860 case 32: 00861 if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { 00862 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 00863 // FIXME: It's possible to only store part of the QQ register if the 00864 // spilled def has a sub-register index. 00865 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) 00866 .addFrameIndex(FI).addImm(16) 00867 .addReg(SrcReg, getKillRegState(isKill)) 00868 .addMemOperand(MMO)); 00869 } else { 00870 MachineInstrBuilder MIB = 00871 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 00872 .addFrameIndex(FI)) 00873 .addMemOperand(MMO); 00874 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 00875 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 00876 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 00877 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 00878 } 00879 } else 00880 llvm_unreachable("Unknown reg class!"); 00881 break; 00882 case 64: 00883 if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 00884 MachineInstrBuilder MIB = 00885 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 00886 .addFrameIndex(FI)) 00887 .addMemOperand(MMO); 00888 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 00889 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 00890 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 00891 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 00892 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); 00893 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); 00894 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); 00895 AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); 00896 } else 00897 llvm_unreachable("Unknown reg class!"); 00898 break; 00899 default: 00900 llvm_unreachable("Unknown reg class!"); 00901 } 00902 } 00903 00904 unsigned 00905 ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, 00906 int &FrameIndex) const { 00907 switch (MI->getOpcode()) { 00908 default: break; 00909 case ARM::STRrs: 00910 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. 00911 if (MI->getOperand(1).isFI() && 00912 MI->getOperand(2).isReg() && 00913 MI->getOperand(3).isImm() && 00914 MI->getOperand(2).getReg() == 0 && 00915 MI->getOperand(3).getImm() == 0) { 00916 FrameIndex = MI->getOperand(1).getIndex(); 00917 return MI->getOperand(0).getReg(); 00918 } 00919 break; 00920 case ARM::STRi12: 00921 case ARM::t2STRi12: 00922 case ARM::tSTRspi: 00923 case ARM::VSTRD: 00924 case ARM::VSTRS: 00925 if (MI->getOperand(1).isFI() && 00926 MI->getOperand(2).isImm() && 00927 MI->getOperand(2).getImm() == 0) { 00928 FrameIndex = MI->getOperand(1).getIndex(); 00929 return MI->getOperand(0).getReg(); 00930 } 00931 break; 00932 case ARM::VST1q64: 00933 case ARM::VST1d64TPseudo: 00934 case ARM::VST1d64QPseudo: 00935 if (MI->getOperand(0).isFI() && 00936 MI->getOperand(2).getSubReg() == 0) { 00937 FrameIndex = MI->getOperand(0).getIndex(); 00938 return MI->getOperand(2).getReg(); 00939 } 00940 break; 00941 case ARM::VSTMQIA: 00942 if (MI->getOperand(1).isFI() && 00943 MI->getOperand(0).getSubReg() == 0) { 00944 FrameIndex = MI->getOperand(1).getIndex(); 00945 return MI->getOperand(0).getReg(); 00946 } 00947 break; 00948 } 00949 00950 return 0; 00951 } 00952 00953 unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, 00954 int &FrameIndex) const { 00955 const MachineMemOperand *Dummy; 00956 return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); 00957 } 00958 00959 void ARMBaseInstrInfo:: 00960 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 00961 unsigned DestReg, int FI, 00962 const TargetRegisterClass *RC, 00963 const TargetRegisterInfo *TRI) const { 00964 DebugLoc DL; 00965 if (I != MBB.end()) DL = I->getDebugLoc(); 00966 MachineFunction &MF = *MBB.getParent(); 00967 MachineFrameInfo &MFI = *MF.getFrameInfo(); 00968 unsigned Align = MFI.getObjectAlignment(FI); 00969 MachineMemOperand *MMO = 00970 MF.getMachineMemOperand( 00971 MachinePointerInfo::getFixedStack(FI), 00972 MachineMemOperand::MOLoad, 00973 MFI.getObjectSize(FI), 00974 Align); 00975 00976 switch (RC->getSize()) { 00977 case 4: 00978 if (ARM::GPRRegClass.hasSubClassEq(RC)) { 00979 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) 00980 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 00981 00982 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 00983 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) 00984 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 00985 } else 00986 llvm_unreachable("Unknown reg class!"); 00987 break; 00988 case 8: 00989 if (ARM::DPRRegClass.hasSubClassEq(RC)) { 00990 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) 00991 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 00992 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { 00993 MachineInstrBuilder MIB; 00994 00995 if (Subtarget.hasV5TEOps()) { 00996 MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); 00997 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); 00998 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); 00999 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); 01000 01001 AddDefaultPred(MIB); 01002 } else { 01003 // Fallback to LDM instruction, which has existed since the dawn of 01004 // time. 01005 MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA)) 01006 .addFrameIndex(FI).addMemOperand(MMO)); 01007 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); 01008 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); 01009 } 01010 01011 if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 01012 MIB.addReg(DestReg, RegState::ImplicitDefine); 01013 } else 01014 llvm_unreachable("Unknown reg class!"); 01015 break; 01016 case 16: 01017 if (ARM::DPairRegClass.hasSubClassEq(RC)) { 01018 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 01019 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) 01020 .addFrameIndex(FI).addImm(16) 01021 .addMemOperand(MMO)); 01022 } else { 01023 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) 01024 .addFrameIndex(FI) 01025 .addMemOperand(MMO)); 01026 } 01027 } else 01028 llvm_unreachable("Unknown reg class!"); 01029 break; 01030 case 24: 01031 if (ARM::DTripleRegClass.hasSubClassEq(RC)) { 01032 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 01033 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) 01034 .addFrameIndex(FI).addImm(16) 01035 .addMemOperand(MMO)); 01036 } else { 01037 MachineInstrBuilder MIB = 01038 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 01039 .addFrameIndex(FI) 01040 .addMemOperand(MMO)); 01041 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 01042 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 01043 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 01044 if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 01045 MIB.addReg(DestReg, RegState::ImplicitDefine); 01046 } 01047 } else 01048 llvm_unreachable("Unknown reg class!"); 01049 break; 01050 case 32: 01051 if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { 01052 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 01053 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) 01054 .addFrameIndex(FI).addImm(16) 01055 .addMemOperand(MMO)); 01056 } else { 01057 MachineInstrBuilder MIB = 01058 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 01059 .addFrameIndex(FI)) 01060 .addMemOperand(MMO); 01061 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 01062 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 01063 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 01064 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); 01065 if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 01066 MIB.addReg(DestReg, RegState::ImplicitDefine); 01067 } 01068 } else 01069 llvm_unreachable("Unknown reg class!"); 01070 break; 01071 case 64: 01072 if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 01073 MachineInstrBuilder MIB = 01074 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 01075 .addFrameIndex(FI)) 01076 .addMemOperand(MMO); 01077 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 01078 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 01079 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 01080 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); 01081 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI); 01082 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); 01083 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); 01084 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); 01085 if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 01086 MIB.addReg(DestReg, RegState::ImplicitDefine); 01087 } else 01088 llvm_unreachable("Unknown reg class!"); 01089 break; 01090 default: 01091 llvm_unreachable("Unknown regclass!"); 01092 } 01093 } 01094 01095 unsigned 01096 ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 01097 int &FrameIndex) const { 01098 switch (MI->getOpcode()) { 01099 default: break; 01100 case ARM::LDRrs: 01101 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. 01102 if (MI->getOperand(1).isFI() && 01103 MI->getOperand(2).isReg() && 01104 MI->getOperand(3).isImm() && 01105 MI->getOperand(2).getReg() == 0 && 01106 MI->getOperand(3).getImm() == 0) { 01107 FrameIndex = MI->getOperand(1).getIndex(); 01108 return MI->getOperand(0).getReg(); 01109 } 01110 break; 01111 case ARM::LDRi12: 01112 case ARM::t2LDRi12: 01113 case ARM::tLDRspi: 01114 case ARM::VLDRD: 01115 case ARM::VLDRS: 01116 if (MI->getOperand(1).isFI() && 01117 MI->getOperand(2).isImm() && 01118 MI->getOperand(2).getImm() == 0) { 01119 FrameIndex = MI->getOperand(1).getIndex(); 01120 return MI->getOperand(0).getReg(); 01121 } 01122 break; 01123 case ARM::VLD1q64: 01124 case ARM::VLD1d64TPseudo: 01125 case ARM::VLD1d64QPseudo: 01126 if (MI->getOperand(1).isFI() && 01127 MI->getOperand(0).getSubReg() == 0) { 01128 FrameIndex = MI->getOperand(1).getIndex(); 01129 return MI->getOperand(0).getReg(); 01130 } 01131 break; 01132 case ARM::VLDMQIA: 01133 if (MI->getOperand(1).isFI() && 01134 MI->getOperand(0).getSubReg() == 0) { 01135 FrameIndex = MI->getOperand(1).getIndex(); 01136 return MI->getOperand(0).getReg(); 01137 } 01138 break; 01139 } 01140 01141 return 0; 01142 } 01143 01144 unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 01145 int &FrameIndex) const { 01146 const MachineMemOperand *Dummy; 01147 return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); 01148 } 01149 01150 bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ 01151 // This hook gets to expand COPY instructions before they become 01152 // copyPhysReg() calls. Look for VMOVS instructions that can legally be 01153 // widened to VMOVD. We prefer the VMOVD when possible because it may be 01154 // changed into a VORR that can go down the NEON pipeline. 01155 if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15()) 01156 return false; 01157 01158 // Look for a copy between even S-registers. That is where we keep floats 01159 // when using NEON v2f32 instructions for f32 arithmetic. 01160 unsigned DstRegS = MI->getOperand(0).getReg(); 01161 unsigned SrcRegS = MI->getOperand(1).getReg(); 01162 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) 01163 return false; 01164 01165 const TargetRegisterInfo *TRI = &getRegisterInfo(); 01166 unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, 01167 &ARM::DPRRegClass); 01168 unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, 01169 &ARM::DPRRegClass); 01170 if (!DstRegD || !SrcRegD) 01171 return false; 01172 01173 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only 01174 // legal if the COPY already defines the full DstRegD, and it isn't a 01175 // sub-register insertion. 01176 if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI)) 01177 return false; 01178 01179 // A dead copy shouldn't show up here, but reject it just in case. 01180 if (MI->getOperand(0).isDead()) 01181 return false; 01182 01183 // All clear, widen the COPY. 01184 DEBUG(dbgs() << "widening: " << *MI); 01185 MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 01186 01187 // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg 01188 // or some other super-register. 01189 int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD); 01190 if (ImpDefIdx != -1) 01191 MI->RemoveOperand(ImpDefIdx); 01192 01193 // Change the opcode and operands. 01194 MI->setDesc(get(ARM::VMOVD)); 01195 MI->getOperand(0).setReg(DstRegD); 01196 MI->getOperand(1).setReg(SrcRegD); 01197 AddDefaultPred(MIB); 01198 01199 // We are now reading SrcRegD instead of SrcRegS. This may upset the 01200 // register scavenger and machine verifier, so we need to indicate that we 01201 // are reading an undefined value from SrcRegD, but a proper value from 01202 // SrcRegS. 01203 MI->getOperand(1).setIsUndef(); 01204 MIB.addReg(SrcRegS, RegState::Implicit); 01205 01206 // SrcRegD may actually contain an unrelated value in the ssub_1 01207 // sub-register. Don't kill it. Only kill the ssub_0 sub-register. 01208 if (MI->getOperand(1).isKill()) { 01209 MI->getOperand(1).setIsKill(false); 01210 MI->addRegisterKilled(SrcRegS, TRI, true); 01211 } 01212 01213 DEBUG(dbgs() << "replaced by: " << *MI); 01214 return true; 01215 } 01216 01217 MachineInstr* 01218 ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, 01219 int FrameIx, uint64_t Offset, 01220 const MDNode *MDPtr, 01221 DebugLoc DL) const { 01222 MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE)) 01223 .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); 01224 return &*MIB; 01225 } 01226 01227 /// Create a copy of a const pool value. Update CPI to the new index and return 01228 /// the label UID. 01229 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { 01230 MachineConstantPool *MCP = MF.getConstantPool(); 01231 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 01232 01233 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; 01234 assert(MCPE.isMachineConstantPoolEntry() && 01235 "Expecting a machine constantpool entry!"); 01236 ARMConstantPoolValue *ACPV = 01237 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); 01238 01239 unsigned PCLabelId = AFI->createPICLabelUId(); 01240 ARMConstantPoolValue *NewCPV = 0; 01241 // FIXME: The below assumes PIC relocation model and that the function 01242 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and 01243 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR 01244 // instructions, so that's probably OK, but is PIC always correct when 01245 // we get here? 01246 if (ACPV->isGlobalValue()) 01247 NewCPV = ARMConstantPoolConstant:: 01248 Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, 01249 ARMCP::CPValue, 4); 01250 else if (ACPV->isExtSymbol()) 01251 NewCPV = ARMConstantPoolSymbol:: 01252 Create(MF.getFunction()->getContext(), 01253 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4); 01254 else if (ACPV->isBlockAddress()) 01255 NewCPV = ARMConstantPoolConstant:: 01256 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId, 01257 ARMCP::CPBlockAddress, 4); 01258 else if (ACPV->isLSDA()) 01259 NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId, 01260 ARMCP::CPLSDA, 4); 01261 else if (ACPV->isMachineBasicBlock()) 01262 NewCPV = ARMConstantPoolMBB:: 01263 Create(MF.getFunction()->getContext(), 01264 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4); 01265 else 01266 llvm_unreachable("Unexpected ARM constantpool value type!!"); 01267 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); 01268 return PCLabelId; 01269 } 01270 01271 void ARMBaseInstrInfo:: 01272 reMaterialize(MachineBasicBlock &MBB, 01273 MachineBasicBlock::iterator I, 01274 unsigned DestReg, unsigned SubIdx, 01275 const MachineInstr *Orig, 01276 const TargetRegisterInfo &TRI) const { 01277 unsigned Opcode = Orig->getOpcode(); 01278 switch (Opcode) { 01279 default: { 01280 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 01281 MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); 01282 MBB.insert(I, MI); 01283 break; 01284 } 01285 case ARM::tLDRpci_pic: 01286 case ARM::t2LDRpci_pic: { 01287 MachineFunction &MF = *MBB.getParent(); 01288 unsigned CPI = Orig->getOperand(1).getIndex(); 01289 unsigned PCLabelId = duplicateCPV(MF, CPI); 01290 MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), 01291 DestReg) 01292 .addConstantPoolIndex(CPI).addImm(PCLabelId); 01293 MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); 01294 break; 01295 } 01296 } 01297 } 01298 01299 MachineInstr * 01300 ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { 01301 MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF); 01302 switch(Orig->getOpcode()) { 01303 case ARM::tLDRpci_pic: 01304 case ARM::t2LDRpci_pic: { 01305 unsigned CPI = Orig->getOperand(1).getIndex(); 01306 unsigned PCLabelId = duplicateCPV(MF, CPI); 01307 Orig->getOperand(1).setIndex(CPI); 01308 Orig->getOperand(2).setImm(PCLabelId); 01309 break; 01310 } 01311 } 01312 return MI; 01313 } 01314 01315 bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, 01316 const MachineInstr *MI1, 01317 const MachineRegisterInfo *MRI) const { 01318 int Opcode = MI0->getOpcode(); 01319 if (Opcode == ARM::t2LDRpci || 01320 Opcode == ARM::t2LDRpci_pic || 01321 Opcode == ARM::tLDRpci || 01322 Opcode == ARM::tLDRpci_pic || 01323 Opcode == ARM::MOV_ga_dyn || 01324 Opcode == ARM::MOV_ga_pcrel || 01325 Opcode == ARM::MOV_ga_pcrel_ldr || 01326 Opcode == ARM::t2MOV_ga_dyn || 01327 Opcode == ARM::t2MOV_ga_pcrel) { 01328 if (MI1->getOpcode() != Opcode) 01329 return false; 01330 if (MI0->getNumOperands() != MI1->getNumOperands()) 01331 return false; 01332 01333 const MachineOperand &MO0 = MI0->getOperand(1); 01334 const MachineOperand &MO1 = MI1->getOperand(1); 01335 if (MO0.getOffset() != MO1.getOffset()) 01336 return false; 01337 01338 if (Opcode == ARM::MOV_ga_dyn || 01339 Opcode == ARM::MOV_ga_pcrel || 01340 Opcode == ARM::MOV_ga_pcrel_ldr || 01341 Opcode == ARM::t2MOV_ga_dyn || 01342 Opcode == ARM::t2MOV_ga_pcrel) 01343 // Ignore the PC labels. 01344 return MO0.getGlobal() == MO1.getGlobal(); 01345 01346 const MachineFunction *MF = MI0->getParent()->getParent(); 01347 const MachineConstantPool *MCP = MF->getConstantPool(); 01348 int CPI0 = MO0.getIndex(); 01349 int CPI1 = MO1.getIndex(); 01350 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; 01351 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; 01352 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); 01353 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); 01354 if (isARMCP0 && isARMCP1) { 01355 ARMConstantPoolValue *ACPV0 = 01356 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); 01357 ARMConstantPoolValue *ACPV1 = 01358 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); 01359 return ACPV0->hasSameValue(ACPV1); 01360 } else if (!isARMCP0 && !isARMCP1) { 01361 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; 01362 } 01363 return false; 01364 } else if (Opcode == ARM::PICLDR) { 01365 if (MI1->getOpcode() != Opcode) 01366 return false; 01367 if (MI0->getNumOperands() != MI1->getNumOperands()) 01368 return false; 01369 01370 unsigned Addr0 = MI0->getOperand(1).getReg(); 01371 unsigned Addr1 = MI1->getOperand(1).getReg(); 01372 if (Addr0 != Addr1) { 01373 if (!MRI || 01374 !TargetRegisterInfo::isVirtualRegister(Addr0) || 01375 !TargetRegisterInfo::isVirtualRegister(Addr1)) 01376 return false; 01377 01378 // This assumes SSA form. 01379 MachineInstr *Def0 = MRI->getVRegDef(Addr0); 01380 MachineInstr *Def1 = MRI->getVRegDef(Addr1); 01381 // Check if the loaded value, e.g. a constantpool of a global address, are 01382 // the same. 01383 if (!produceSameValue(Def0, Def1, MRI)) 01384 return false; 01385 } 01386 01387 for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) { 01388 // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg 01389 const MachineOperand &MO0 = MI0->getOperand(i); 01390 const MachineOperand &MO1 = MI1->getOperand(i); 01391 if (!MO0.isIdenticalTo(MO1)) 01392 return false; 01393 } 01394 return true; 01395 } 01396 01397 return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); 01398 } 01399 01400 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to 01401 /// determine if two loads are loading from the same base address. It should 01402 /// only return true if the base pointers are the same and the only differences 01403 /// between the two addresses is the offset. It also returns the offsets by 01404 /// reference. 01405 /// 01406 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched 01407 /// is permanently disabled. 01408 bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 01409 int64_t &Offset1, 01410 int64_t &Offset2) const { 01411 // Don't worry about Thumb: just ARM and Thumb2. 01412 if (Subtarget.isThumb1Only()) return false; 01413 01414 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 01415 return false; 01416 01417 switch (Load1->getMachineOpcode()) { 01418 default: 01419 return false; 01420 case ARM::LDRi12: 01421 case ARM::LDRBi12: 01422 case ARM::LDRD: 01423 case ARM::LDRH: 01424 case ARM::LDRSB: 01425 case ARM::LDRSH: 01426 case ARM::VLDRD: 01427 case ARM::VLDRS: 01428 case ARM::t2LDRi8: 01429 case ARM::t2LDRDi8: 01430 case ARM::t2LDRSHi8: 01431 case ARM::t2LDRi12: 01432 case ARM::t2LDRSHi12: 01433 break; 01434 } 01435 01436 switch (Load2->getMachineOpcode()) { 01437 default: 01438 return false; 01439 case ARM::LDRi12: 01440 case ARM::LDRBi12: 01441 case ARM::LDRD: 01442 case ARM::LDRH: 01443 case ARM::LDRSB: 01444 case ARM::LDRSH: 01445 case ARM::VLDRD: 01446 case ARM::VLDRS: 01447 case ARM::t2LDRi8: 01448 case ARM::t2LDRSHi8: 01449 case ARM::t2LDRi12: 01450 case ARM::t2LDRSHi12: 01451 break; 01452 } 01453 01454 // Check if base addresses and chain operands match. 01455 if (Load1->getOperand(0) != Load2->getOperand(0) || 01456 Load1->getOperand(4) != Load2->getOperand(4)) 01457 return false; 01458 01459 // Index should be Reg0. 01460 if (Load1->getOperand(3) != Load2->getOperand(3)) 01461 return false; 01462 01463 // Determine the offsets. 01464 if (isa<ConstantSDNode>(Load1->getOperand(1)) && 01465 isa<ConstantSDNode>(Load2->getOperand(1))) { 01466 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); 01467 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); 01468 return true; 01469 } 01470 01471 return false; 01472 } 01473 01474 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to 01475 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should 01476 /// be scheduled togther. On some targets if two loads are loading from 01477 /// addresses in the same cache line, it's better if they are scheduled 01478 /// together. This function takes two integers that represent the load offsets 01479 /// from the common base address. It returns true if it decides it's desirable 01480 /// to schedule the two loads together. "NumLoads" is the number of loads that 01481 /// have already been scheduled after Load1. 01482 /// 01483 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched 01484 /// is permanently disabled. 01485 bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 01486 int64_t Offset1, int64_t Offset2, 01487 unsigned NumLoads) const { 01488 // Don't worry about Thumb: just ARM and Thumb2. 01489 if (Subtarget.isThumb1Only()) return false; 01490 01491 assert(Offset2 > Offset1); 01492 01493 if ((Offset2 - Offset1) / 8 > 64) 01494 return false; 01495 01496 if (Load1->getMachineOpcode() != Load2->getMachineOpcode()) 01497 return false; // FIXME: overly conservative? 01498 01499 // Four loads in a row should be sufficient. 01500 if (NumLoads >= 3) 01501 return false; 01502 01503 return true; 01504 } 01505 01506 bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, 01507 const MachineBasicBlock *MBB, 01508 const MachineFunction &MF) const { 01509 // Debug info is never a scheduling boundary. It's necessary to be explicit 01510 // due to the special treatment of IT instructions below, otherwise a 01511 // dbg_value followed by an IT will result in the IT instruction being 01512 // considered a scheduling hazard, which is wrong. It should be the actual 01513 // instruction preceding the dbg_value instruction(s), just like it is 01514 // when debug info is not present. 01515 if (MI->isDebugValue()) 01516 return false; 01517 01518 // Terminators and labels can't be scheduled around. 01519 if (MI->isTerminator() || MI->isLabel()) 01520 return true; 01521 01522 // Treat the start of the IT block as a scheduling boundary, but schedule 01523 // t2IT along with all instructions following it. 01524 // FIXME: This is a big hammer. But the alternative is to add all potential 01525 // true and anti dependencies to IT block instructions as implicit operands 01526 // to the t2IT instruction. The added compile time and complexity does not 01527 // seem worth it. 01528 MachineBasicBlock::const_iterator I = MI; 01529 // Make sure to skip any dbg_value instructions 01530 while (++I != MBB->end() && I->isDebugValue()) 01531 ; 01532 if (I != MBB->end() && I->getOpcode() == ARM::t2IT) 01533 return true; 01534 01535 // Don't attempt to schedule around any instruction that defines 01536 // a stack-oriented pointer, as it's unlikely to be profitable. This 01537 // saves compile time, because it doesn't require every single 01538 // stack slot reference to depend on the instruction that does the 01539 // modification. 01540 // Calls don't actually change the stack pointer, even if they have imp-defs. 01541 // No ARM calling conventions change the stack pointer. (X86 calling 01542 // conventions sometimes do). 01543 if (!MI->isCall() && MI->definesRegister(ARM::SP)) 01544 return true; 01545 01546 return false; 01547 } 01548 01549 bool ARMBaseInstrInfo:: 01550 isProfitableToIfCvt(MachineBasicBlock &MBB, 01551 unsigned NumCycles, unsigned ExtraPredCycles, 01552 const BranchProbability &Probability) const { 01553 if (!NumCycles) 01554 return false; 01555 01556 // Attempt to estimate the relative costs of predication versus branching. 01557 unsigned UnpredCost = Probability.getNumerator() * NumCycles; 01558 UnpredCost /= Probability.getDenominator(); 01559 UnpredCost += 1; // The branch itself 01560 UnpredCost += Subtarget.getMispredictionPenalty() / 10; 01561 01562 return (NumCycles + ExtraPredCycles) <= UnpredCost; 01563 } 01564 01565 bool ARMBaseInstrInfo:: 01566 isProfitableToIfCvt(MachineBasicBlock &TMBB, 01567 unsigned TCycles, unsigned TExtra, 01568 MachineBasicBlock &FMBB, 01569 unsigned FCycles, unsigned FExtra, 01570 const BranchProbability &Probability) const { 01571 if (!TCycles || !FCycles) 01572 return false; 01573 01574 // Attempt to estimate the relative costs of predication versus branching. 01575 unsigned TUnpredCost = Probability.getNumerator() * TCycles; 01576 TUnpredCost /= Probability.getDenominator(); 01577 01578 uint32_t Comp = Probability.getDenominator() - Probability.getNumerator(); 01579 unsigned FUnpredCost = Comp * FCycles; 01580 FUnpredCost /= Probability.getDenominator(); 01581 01582 unsigned UnpredCost = TUnpredCost + FUnpredCost; 01583 UnpredCost += 1; // The branch itself 01584 UnpredCost += Subtarget.getMispredictionPenalty() / 10; 01585 01586 return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; 01587 } 01588 01589 bool 01590 ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 01591 MachineBasicBlock &FMBB) const { 01592 // Reduce false anti-dependencies to let Swift's out-of-order execution 01593 // engine do its thing. 01594 return Subtarget.isSwift(); 01595 } 01596 01597 /// getInstrPredicate - If instruction is predicated, returns its predicate 01598 /// condition, otherwise returns AL. It also returns the condition code 01599 /// register by reference. 01600 ARMCC::CondCodes 01601 llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { 01602 int PIdx = MI->findFirstPredOperandIdx(); 01603 if (PIdx == -1) { 01604 PredReg = 0; 01605 return ARMCC::AL; 01606 } 01607 01608 PredReg = MI->getOperand(PIdx+1).getReg(); 01609 return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); 01610 } 01611 01612 01613 int llvm::getMatchingCondBranchOpcode(int Opc) { 01614 if (Opc == ARM::B) 01615 return ARM::Bcc; 01616 if (Opc == ARM::tB) 01617 return ARM::tBcc; 01618 if (Opc == ARM::t2B) 01619 return ARM::t2Bcc; 01620 01621 llvm_unreachable("Unknown unconditional branch opcode!"); 01622 } 01623 01624 /// commuteInstruction - Handle commutable instructions. 01625 MachineInstr * 01626 ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { 01627 switch (MI->getOpcode()) { 01628 case ARM::MOVCCr: 01629 case ARM::t2MOVCCr: { 01630 // MOVCC can be commuted by inverting the condition. 01631 unsigned PredReg = 0; 01632 ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); 01633 // MOVCC AL can't be inverted. Shouldn't happen. 01634 if (CC == ARMCC::AL || PredReg != ARM::CPSR) 01635 return NULL; 01636 MI = TargetInstrInfo::commuteInstruction(MI, NewMI); 01637 if (!MI) 01638 return NULL; 01639 // After swapping the MOVCC operands, also invert the condition. 01640 MI->getOperand(MI->findFirstPredOperandIdx()) 01641 .setImm(ARMCC::getOppositeCondition(CC)); 01642 return MI; 01643 } 01644 } 01645 return TargetInstrInfo::commuteInstruction(MI, NewMI); 01646 } 01647 01648 /// Identify instructions that can be folded into a MOVCC instruction, and 01649 /// return the defining instruction. 01650 static MachineInstr *canFoldIntoMOVCC(unsigned Reg, 01651 const MachineRegisterInfo &MRI, 01652 const TargetInstrInfo *TII) { 01653 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 01654 return 0; 01655 if (!MRI.hasOneNonDBGUse(Reg)) 01656 return 0; 01657 MachineInstr *MI = MRI.getVRegDef(Reg); 01658 if (!MI) 01659 return 0; 01660 // MI is folded into the MOVCC by predicating it. 01661 if (!MI->isPredicable()) 01662 return 0; 01663 // Check if MI has any non-dead defs or physreg uses. This also detects 01664 // predicated instructions which will be reading CPSR. 01665 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 01666 const MachineOperand &MO = MI->getOperand(i); 01667 // Reject frame index operands, PEI can't handle the predicated pseudos. 01668 if (MO.isFI() || MO.isCPI() || MO.isJTI()) 01669 return 0; 01670 if (!MO.isReg()) 01671 continue; 01672 // MI can't have any tied operands, that would conflict with predication. 01673 if (MO.isTied()) 01674 return 0; 01675 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 01676 return 0; 01677 if (MO.isDef() && !MO.isDead()) 01678 return 0; 01679 } 01680 bool DontMoveAcrossStores = true; 01681 if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores)) 01682 return 0; 01683 return MI; 01684 } 01685 01686 bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, 01687 SmallVectorImpl<MachineOperand> &Cond, 01688 unsigned &TrueOp, unsigned &FalseOp, 01689 bool &Optimizable) const { 01690 assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && 01691 "Unknown select instruction"); 01692 // MOVCC operands: 01693 // 0: Def. 01694 // 1: True use. 01695 // 2: False use. 01696 // 3: Condition code. 01697 // 4: CPSR use. 01698 TrueOp = 1; 01699 FalseOp = 2; 01700 Cond.push_back(MI->getOperand(3)); 01701 Cond.push_back(MI->getOperand(4)); 01702 // We can always fold a def. 01703 Optimizable = true; 01704 return false; 01705 } 01706 01707 MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, 01708 bool PreferFalse) const { 01709 assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && 01710 "Unknown select instruction"); 01711 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 01712 MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this); 01713 bool Invert = !DefMI; 01714 if (!DefMI) 01715 DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); 01716 if (!DefMI) 01717 return 0; 01718 01719 // Create a new predicated version of DefMI. 01720 // Rfalse is the first use. 01721 MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 01722 DefMI->getDesc(), 01723 MI->getOperand(0).getReg()); 01724 01725 // Copy all the DefMI operands, excluding its (null) predicate. 01726 const MCInstrDesc &DefDesc = DefMI->getDesc(); 01727 for (unsigned i = 1, e = DefDesc.getNumOperands(); 01728 i != e && !DefDesc.OpInfo[i].isPredicate(); ++i) 01729 NewMI.addOperand(DefMI->getOperand(i)); 01730 01731 unsigned CondCode = MI->getOperand(3).getImm(); 01732 if (Invert) 01733 NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode))); 01734 else 01735 NewMI.addImm(CondCode); 01736 NewMI.addOperand(MI->getOperand(4)); 01737 01738 // DefMI is not the -S version that sets CPSR, so add an optional %noreg. 01739 if (NewMI->hasOptionalDef()) 01740 AddDefaultCC(NewMI); 01741 01742 // The output register value when the predicate is false is an implicit 01743 // register operand tied to the first def. 01744 // The tie makes the register allocator ensure the FalseReg is allocated the 01745 // same register as operand 0. 01746 MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); 01747 FalseReg.setImplicit(); 01748 NewMI.addOperand(FalseReg); 01749 NewMI->tieOperands(0, NewMI->getNumOperands() - 1); 01750 01751 // The caller will erase MI, but not DefMI. 01752 DefMI->eraseFromParent(); 01753 return NewMI; 01754 } 01755 01756 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the 01757 /// instruction is encoded with an 'S' bit is determined by the optional CPSR 01758 /// def operand. 01759 /// 01760 /// This will go away once we can teach tblgen how to set the optional CPSR def 01761 /// operand itself. 01762 struct AddSubFlagsOpcodePair { 01763 uint16_t PseudoOpc; 01764 uint16_t MachineOpc; 01765 }; 01766 01767 static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { 01768 {ARM::ADDSri, ARM::ADDri}, 01769 {ARM::ADDSrr, ARM::ADDrr}, 01770 {ARM::ADDSrsi, ARM::ADDrsi}, 01771 {ARM::ADDSrsr, ARM::ADDrsr}, 01772 01773 {ARM::SUBSri, ARM::SUBri}, 01774 {ARM::SUBSrr, ARM::SUBrr}, 01775 {ARM::SUBSrsi, ARM::SUBrsi}, 01776 {ARM::SUBSrsr, ARM::SUBrsr}, 01777 01778 {ARM::RSBSri, ARM::RSBri}, 01779 {ARM::RSBSrsi, ARM::RSBrsi}, 01780 {ARM::RSBSrsr, ARM::RSBrsr}, 01781 01782 {ARM::t2ADDSri, ARM::t2ADDri}, 01783 {ARM::t2ADDSrr, ARM::t2ADDrr}, 01784 {ARM::t2ADDSrs, ARM::t2ADDrs}, 01785 01786 {ARM::t2SUBSri, ARM::t2SUBri}, 01787 {ARM::t2SUBSrr, ARM::t2SUBrr}, 01788 {ARM::t2SUBSrs, ARM::t2SUBrs}, 01789 01790 {ARM::t2RSBSri, ARM::t2RSBri}, 01791 {ARM::t2RSBSrs, ARM::t2RSBrs}, 01792 }; 01793 01794 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { 01795 for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i) 01796 if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc) 01797 return AddSubFlagsOpcodeMap[i].MachineOpc; 01798 return 0; 01799 } 01800 01801 void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, 01802 MachineBasicBlock::iterator &MBBI, DebugLoc dl, 01803 unsigned DestReg, unsigned BaseReg, int NumBytes, 01804 ARMCC::CondCodes Pred, unsigned PredReg, 01805 const ARMBaseInstrInfo &TII, unsigned MIFlags) { 01806 bool isSub = NumBytes < 0; 01807 if (isSub) NumBytes = -NumBytes; 01808 01809 while (NumBytes) { 01810 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); 01811 unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); 01812 assert(ThisVal && "Didn't extract field correctly"); 01813 01814 // We will handle these bits from offset, clear them. 01815 NumBytes &= ~ThisVal; 01816 01817 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); 01818 01819 // Build the new ADD / SUB. 01820 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; 01821 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) 01822 .addReg(BaseReg, RegState::Kill).addImm(ThisVal) 01823 .addImm((unsigned)Pred).addReg(PredReg).addReg(0) 01824 .setMIFlags(MIFlags); 01825 BaseReg = DestReg; 01826 } 01827 } 01828 01829 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 01830 unsigned FrameReg, int &Offset, 01831 const ARMBaseInstrInfo &TII) { 01832 unsigned Opcode = MI.getOpcode(); 01833 const MCInstrDesc &Desc = MI.getDesc(); 01834 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); 01835 bool isSub = false; 01836 01837 // Memory operands in inline assembly always use AddrMode2. 01838 if (Opcode == ARM::INLINEASM) 01839 AddrMode = ARMII::AddrMode2; 01840 01841 if (Opcode == ARM::ADDri) { 01842 Offset += MI.getOperand(FrameRegIdx+1).getImm(); 01843 if (Offset == 0) { 01844 // Turn it into a move. 01845 MI.setDesc(TII.get(ARM::MOVr)); 01846 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 01847 MI.RemoveOperand(FrameRegIdx+1); 01848 Offset = 0; 01849 return true; 01850 } else if (Offset < 0) { 01851 Offset = -Offset; 01852 isSub = true; 01853 MI.setDesc(TII.get(ARM::SUBri)); 01854 } 01855 01856 // Common case: small offset, fits into instruction. 01857 if (ARM_AM::getSOImmVal(Offset) != -1) { 01858 // Replace the FrameIndex with sp / fp 01859 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 01860 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); 01861 Offset = 0; 01862 return true; 01863 } 01864 01865 // Otherwise, pull as much of the immedidate into this ADDri/SUBri 01866 // as possible. 01867 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); 01868 unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); 01869 01870 // We will handle these bits from offset, clear them. 01871 Offset &= ~ThisImmVal; 01872 01873 // Get the properly encoded SOImmVal field. 01874 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && 01875 "Bit extraction didn't work?"); 01876 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); 01877 } else { 01878 unsigned ImmIdx = 0; 01879 int InstrOffs = 0; 01880 unsigned NumBits = 0; 01881 unsigned Scale = 1; 01882 switch (AddrMode) { 01883 case ARMII::AddrMode_i12: { 01884 ImmIdx = FrameRegIdx + 1; 01885 InstrOffs = MI.getOperand(ImmIdx).getImm(); 01886 NumBits = 12; 01887 break; 01888 } 01889 case ARMII::AddrMode2: { 01890 ImmIdx = FrameRegIdx+2; 01891 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); 01892 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 01893 InstrOffs *= -1; 01894 NumBits = 12; 01895 break; 01896 } 01897 case ARMII::AddrMode3: { 01898 ImmIdx = FrameRegIdx+2; 01899 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); 01900 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 01901 InstrOffs *= -1; 01902 NumBits = 8; 01903 break; 01904 } 01905 case ARMII::AddrMode4: 01906 case ARMII::AddrMode6: 01907 // Can't fold any offset even if it's zero. 01908 return false; 01909 case ARMII::AddrMode5: { 01910 ImmIdx = FrameRegIdx+1; 01911 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); 01912 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 01913 InstrOffs *= -1; 01914 NumBits = 8; 01915 Scale = 4; 01916 break; 01917 } 01918 default: 01919 llvm_unreachable("Unsupported addressing mode!"); 01920 } 01921 01922 Offset += InstrOffs * Scale; 01923 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); 01924 if (Offset < 0) { 01925 Offset = -Offset; 01926 isSub = true; 01927 } 01928 01929 // Attempt to fold address comp. if opcode has offset bits 01930 if (NumBits > 0) { 01931 // Common case: small offset, fits into instruction. 01932 MachineOperand &ImmOp = MI.getOperand(ImmIdx); 01933 int ImmedOffset = Offset / Scale; 01934 unsigned Mask = (1 << NumBits) - 1; 01935 if ((unsigned)Offset <= Mask * Scale) { 01936 // Replace the FrameIndex with sp 01937 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 01938 // FIXME: When addrmode2 goes away, this will simplify (like the 01939 // T2 version), as the LDR.i12 versions don't need the encoding 01940 // tricks for the offset value. 01941 if (isSub) { 01942 if (AddrMode == ARMII::AddrMode_i12) 01943 ImmedOffset = -ImmedOffset; 01944 else 01945 ImmedOffset |= 1 << NumBits; 01946 } 01947 ImmOp.ChangeToImmediate(ImmedOffset); 01948 Offset = 0; 01949 return true; 01950 } 01951 01952 // Otherwise, it didn't fit. Pull in what we can to simplify the immed. 01953 ImmedOffset = ImmedOffset & Mask; 01954 if (isSub) { 01955 if (AddrMode == ARMII::AddrMode_i12) 01956 ImmedOffset = -ImmedOffset; 01957 else 01958 ImmedOffset |= 1 << NumBits; 01959 } 01960 ImmOp.ChangeToImmediate(ImmedOffset); 01961 Offset &= ~(Mask*Scale); 01962 } 01963 } 01964 01965 Offset = (isSub) ? -Offset : Offset; 01966 return Offset == 0; 01967 } 01968 01969 /// analyzeCompare - For a comparison instruction, return the source registers 01970 /// in SrcReg and SrcReg2 if having two register operands, and the value it 01971 /// compares against in CmpValue. Return true if the comparison instruction 01972 /// can be analyzed. 01973 bool ARMBaseInstrInfo:: 01974 analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, 01975 int &CmpMask, int &CmpValue) const { 01976 switch (MI->getOpcode()) { 01977 default: break; 01978 case ARM::CMPri: 01979 case ARM::t2CMPri: 01980 SrcReg = MI->getOperand(0).getReg(); 01981 SrcReg2 = 0; 01982 CmpMask = ~0; 01983 CmpValue = MI->getOperand(1).getImm(); 01984 return true; 01985 case ARM::CMPrr: 01986 case ARM::t2CMPrr: 01987 SrcReg = MI->getOperand(0).getReg(); 01988 SrcReg2 = MI->getOperand(1).getReg(); 01989 CmpMask = ~0; 01990 CmpValue = 0; 01991 return true; 01992 case ARM::TSTri: 01993 case ARM::t2TSTri: 01994 SrcReg = MI->getOperand(0).getReg(); 01995 SrcReg2 = 0; 01996 CmpMask = MI->getOperand(1).getImm(); 01997 CmpValue = 0; 01998 return true; 01999 } 02000 02001 return false; 02002 } 02003 02004 /// isSuitableForMask - Identify a suitable 'and' instruction that 02005 /// operates on the given source register and applies the same mask 02006 /// as a 'tst' instruction. Provide a limited look-through for copies. 02007 /// When successful, MI will hold the found instruction. 02008 static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, 02009 int CmpMask, bool CommonUse) { 02010 switch (MI->getOpcode()) { 02011 case ARM::ANDri: 02012 case ARM::t2ANDri: 02013 if (CmpMask != MI->getOperand(2).getImm()) 02014 return false; 02015 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg()) 02016 return true; 02017 break; 02018 case ARM::COPY: { 02019 // Walk down one instruction which is potentially an 'and'. 02020 const MachineInstr &Copy = *MI; 02021 MachineBasicBlock::iterator AND( 02022 llvm::next(MachineBasicBlock::iterator(MI))); 02023 if (AND == MI->getParent()->end()) return false; 02024 MI = AND; 02025 return isSuitableForMask(MI, Copy.getOperand(0).getReg(), 02026 CmpMask, true); 02027 } 02028 } 02029 02030 return false; 02031 } 02032 02033 /// getSwappedCondition - assume the flags are set by MI(a,b), return 02034 /// the condition code if we modify the instructions such that flags are 02035 /// set by MI(b,a). 02036 inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { 02037 switch (CC) { 02038 default: return ARMCC::AL; 02039 case ARMCC::EQ: return ARMCC::EQ; 02040 case ARMCC::NE: return ARMCC::NE; 02041 case ARMCC::HS: return ARMCC::LS; 02042 case ARMCC::LO: return ARMCC::HI; 02043 case ARMCC::HI: return ARMCC::LO; 02044 case ARMCC::LS: return ARMCC::HS; 02045 case ARMCC::GE: return ARMCC::LE; 02046 case ARMCC::LT: return ARMCC::GT; 02047 case ARMCC::GT: return ARMCC::LT; 02048 case ARMCC::LE: return ARMCC::GE; 02049 } 02050 } 02051 02052 /// isRedundantFlagInstr - check whether the first instruction, whose only 02053 /// purpose is to update flags, can be made redundant. 02054 /// CMPrr can be made redundant by SUBrr if the operands are the same. 02055 /// CMPri can be made redundant by SUBri if the operands are the same. 02056 /// This function can be extended later on. 02057 inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, 02058 unsigned SrcReg2, int ImmValue, 02059 MachineInstr *OI) { 02060 if ((CmpI->getOpcode() == ARM::CMPrr || 02061 CmpI->getOpcode() == ARM::t2CMPrr) && 02062 (OI->getOpcode() == ARM::SUBrr || 02063 OI->getOpcode() == ARM::t2SUBrr) && 02064 ((OI->getOperand(1).getReg() == SrcReg && 02065 OI->getOperand(2).getReg() == SrcReg2) || 02066 (OI->getOperand(1).getReg() == SrcReg2 && 02067 OI->getOperand(2).getReg() == SrcReg))) 02068 return true; 02069 02070 if ((CmpI->getOpcode() == ARM::CMPri || 02071 CmpI->getOpcode() == ARM::t2CMPri) && 02072 (OI->getOpcode() == ARM::SUBri || 02073 OI->getOpcode() == ARM::t2SUBri) && 02074 OI->getOperand(1).getReg() == SrcReg && 02075 OI->getOperand(2).getImm() == ImmValue) 02076 return true; 02077 return false; 02078 } 02079 02080 /// optimizeCompareInstr - Convert the instruction supplying the argument to the 02081 /// comparison into one that sets the zero bit in the flags register; 02082 /// Remove a redundant Compare instruction if an earlier instruction can set the 02083 /// flags in the same way as Compare. 02084 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two 02085 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the 02086 /// condition code of instructions which use the flags. 02087 bool ARMBaseInstrInfo:: 02088 optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, 02089 int CmpMask, int CmpValue, 02090 const MachineRegisterInfo *MRI) const { 02091 // Get the unique definition of SrcReg. 02092 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 02093 if (!MI) return false; 02094 02095 // Masked compares sometimes use the same register as the corresponding 'and'. 02096 if (CmpMask != ~0) { 02097 if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) { 02098 MI = 0; 02099 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), 02100 UE = MRI->use_end(); UI != UE; ++UI) { 02101 if (UI->getParent() != CmpInstr->getParent()) continue; 02102 MachineInstr *PotentialAND = &*UI; 02103 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || 02104 isPredicated(PotentialAND)) 02105 continue; 02106 MI = PotentialAND; 02107 break; 02108 } 02109 if (!MI) return false; 02110 } 02111 } 02112 02113 // Get ready to iterate backward from CmpInstr. 02114 MachineBasicBlock::iterator I = CmpInstr, E = MI, 02115 B = CmpInstr->getParent()->begin(); 02116 02117 // Early exit if CmpInstr is at the beginning of the BB. 02118 if (I == B) return false; 02119 02120 // There are two possible candidates which can be changed to set CPSR: 02121 // One is MI, the other is a SUB instruction. 02122 // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). 02123 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). 02124 MachineInstr *Sub = NULL; 02125 if (SrcReg2 != 0) 02126 // MI is not a candidate for CMPrr. 02127 MI = NULL; 02128 else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { 02129 // Conservatively refuse to convert an instruction which isn't in the same 02130 // BB as the comparison. 02131 // For CMPri, we need to check Sub, thus we can't return here. 02132 if (CmpInstr->getOpcode() == ARM::CMPri || 02133 CmpInstr->getOpcode() == ARM::t2CMPri) 02134 MI = NULL; 02135 else 02136 return false; 02137 } 02138 02139 // Check that CPSR isn't set between the comparison instruction and the one we 02140 // want to change. At the same time, search for Sub. 02141 const TargetRegisterInfo *TRI = &getRegisterInfo(); 02142 --I; 02143 for (; I != E; --I) { 02144 const MachineInstr &Instr = *I; 02145 02146 if (Instr.modifiesRegister(ARM::CPSR, TRI) || 02147 Instr.readsRegister(ARM::CPSR, TRI)) 02148 // This instruction modifies or uses CPSR after the one we want to 02149 // change. We can't do this transformation. 02150 return false; 02151 02152 // Check whether CmpInstr can be made redundant by the current instruction. 02153 if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) { 02154 Sub = &*I; 02155 break; 02156 } 02157 02158 if (I == B) 02159 // The 'and' is below the comparison instruction. 02160 return false; 02161 } 02162 02163 // Return false if no candidates exist. 02164 if (!MI && !Sub) 02165 return false; 02166 02167 // The single candidate is called MI. 02168 if (!MI) MI = Sub; 02169 02170 // We can't use a predicated instruction - it doesn't always write the flags. 02171 if (isPredicated(MI)) 02172 return false; 02173 02174 switch (MI->getOpcode()) { 02175 default: break; 02176 case ARM::RSBrr: 02177 case ARM::RSBri: 02178 case ARM::RSCrr: 02179 case ARM::RSCri: 02180 case ARM::ADDrr: 02181 case ARM::ADDri: 02182 case ARM::ADCrr: 02183 case ARM::ADCri: 02184 case ARM::SUBrr: 02185 case ARM::SUBri: 02186 case ARM::SBCrr: 02187 case ARM::SBCri: 02188 case ARM::t2RSBri: 02189 case ARM::t2ADDrr: 02190 case ARM::t2ADDri: 02191 case ARM::t2ADCrr: 02192 case ARM::t2ADCri: 02193 case ARM::t2SUBrr: 02194 case ARM::t2SUBri: 02195 case ARM::t2SBCrr: 02196 case ARM::t2SBCri: 02197 case ARM::ANDrr: 02198 case ARM::ANDri: 02199 case ARM::t2ANDrr: 02200 case ARM::t2ANDri: 02201 case ARM::ORRrr: 02202 case ARM::ORRri: 02203 case ARM::t2ORRrr: 02204 case ARM::t2ORRri: 02205 case ARM::EORrr: 02206 case ARM::EORri: 02207 case ARM::t2EORrr: 02208 case ARM::t2EORri: { 02209 // Scan forward for the use of CPSR 02210 // When checking against MI: if it's a conditional code requires 02211 // checking of V bit, then this is not safe to do. 02212 // It is safe to remove CmpInstr if CPSR is redefined or killed. 02213 // If we are done with the basic block, we need to check whether CPSR is 02214 // live-out. 02215 SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> 02216 OperandsToUpdate; 02217 bool isSafe = false; 02218 I = CmpInstr; 02219 E = CmpInstr->getParent()->end(); 02220 while (!isSafe && ++I != E) { 02221 const MachineInstr &Instr = *I; 02222 for (unsigned IO = 0, EO = Instr.getNumOperands(); 02223 !isSafe && IO != EO; ++IO) { 02224 const MachineOperand &MO = Instr.getOperand(IO); 02225 if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) { 02226 isSafe = true; 02227 break; 02228 } 02229 if (!MO.isReg() || MO.getReg() != ARM::CPSR) 02230 continue; 02231 if (MO.isDef()) { 02232 isSafe = true; 02233 break; 02234 } 02235 // Condition code is after the operand before CPSR. 02236 ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); 02237 if (Sub) { 02238 ARMCC::CondCodes NewCC = getSwappedCondition(CC); 02239 if (NewCC == ARMCC::AL) 02240 return false; 02241 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based 02242 // on CMP needs to be updated to be based on SUB. 02243 // Push the condition code operands to OperandsToUpdate. 02244 // If it is safe to remove CmpInstr, the condition code of these 02245 // operands will be modified. 02246 if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && 02247 Sub->getOperand(2).getReg() == SrcReg) 02248 OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)), 02249 NewCC)); 02250 } 02251 else 02252 switch (CC) { 02253 default: 02254 // CPSR can be used multiple times, we should continue. 02255 break; 02256 case ARMCC::VS: 02257 case ARMCC::VC: 02258 case ARMCC::GE: 02259 case ARMCC::LT: 02260 case ARMCC::GT: 02261 case ARMCC::LE: 02262 return false; 02263 } 02264 } 02265 } 02266 02267 // If CPSR is not killed nor re-defined, we should check whether it is 02268 // live-out. If it is live-out, do not optimize. 02269 if (!isSafe) { 02270 MachineBasicBlock *MBB = CmpInstr->getParent(); 02271 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 02272 SE = MBB->succ_end(); SI != SE; ++SI) 02273 if ((*SI)->isLiveIn(ARM::CPSR)) 02274 return false; 02275 } 02276 02277 // Toggle the optional operand to CPSR. 02278 MI->getOperand(5).setReg(ARM::CPSR); 02279 MI->getOperand(5).setIsDef(true); 02280 assert(!isPredicated(MI) && "Can't use flags from predicated instruction"); 02281 CmpInstr->eraseFromParent(); 02282 02283 // Modify the condition code of operands in OperandsToUpdate. 02284 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to 02285 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. 02286 for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) 02287 OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); 02288 return true; 02289 } 02290 } 02291 02292 return false; 02293 } 02294 02295 bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, 02296 MachineInstr *DefMI, unsigned Reg, 02297 MachineRegisterInfo *MRI) const { 02298 // Fold large immediates into add, sub, or, xor. 02299 unsigned DefOpc = DefMI->getOpcode(); 02300 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) 02301 return false; 02302 if (!DefMI->getOperand(1).isImm()) 02303 // Could be t2MOVi32imm <ga:xx> 02304 return false; 02305 02306 if (!MRI->hasOneNonDBGUse(Reg)) 02307 return false; 02308 02309 const MCInstrDesc &DefMCID = DefMI->getDesc(); 02310 if (DefMCID.hasOptionalDef()) { 02311 unsigned NumOps = DefMCID.getNumOperands(); 02312 const MachineOperand &MO = DefMI->getOperand(NumOps-1); 02313 if (MO.getReg() == ARM::CPSR && !MO.isDead()) 02314 // If DefMI defines CPSR and it is not dead, it's obviously not safe 02315 // to delete DefMI. 02316 return false; 02317 } 02318 02319 const MCInstrDesc &UseMCID = UseMI->getDesc(); 02320 if (UseMCID.hasOptionalDef()) { 02321 unsigned NumOps = UseMCID.getNumOperands(); 02322 if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR) 02323 // If the instruction sets the flag, do not attempt this optimization 02324 // since it may change the semantics of the code. 02325 return false; 02326 } 02327 02328 unsigned UseOpc = UseMI->getOpcode(); 02329 unsigned NewUseOpc = 0; 02330 uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm(); 02331 uint32_t SOImmValV1 = 0, SOImmValV2 = 0; 02332 bool Commute = false; 02333 switch (UseOpc) { 02334 default: return false; 02335 case ARM::SUBrr: 02336 case ARM::ADDrr: 02337 case ARM::ORRrr: 02338 case ARM::EORrr: 02339 case ARM::t2SUBrr: 02340 case ARM::t2ADDrr: 02341 case ARM::t2ORRrr: 02342 case ARM::t2EORrr: { 02343 Commute = UseMI->getOperand(2).getReg() != Reg; 02344 switch (UseOpc) { 02345 default: break; 02346 case ARM::SUBrr: { 02347 if (Commute) 02348 return false; 02349 ImmVal = -ImmVal; 02350 NewUseOpc = ARM::SUBri; 02351 // Fallthrough 02352 } 02353 case ARM::ADDrr: 02354 case ARM::ORRrr: 02355 case ARM::EORrr: { 02356 if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) 02357 return false; 02358 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); 02359 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); 02360 switch (UseOpc) { 02361 default: break; 02362 case ARM::ADDrr: NewUseOpc = ARM::ADDri; break; 02363 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; 02364 case ARM::EORrr: NewUseOpc = ARM::EORri; break; 02365 } 02366 break; 02367 } 02368 case ARM::t2SUBrr: { 02369 if (Commute) 02370 return false; 02371 ImmVal = -ImmVal; 02372 NewUseOpc = ARM::t2SUBri; 02373 // Fallthrough 02374 } 02375 case ARM::t2ADDrr: 02376 case ARM::t2ORRrr: 02377 case ARM::t2EORrr: { 02378 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) 02379 return false; 02380 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); 02381 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); 02382 switch (UseOpc) { 02383 default: break; 02384 case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break; 02385 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; 02386 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; 02387 } 02388 break; 02389 } 02390 } 02391 } 02392 } 02393 02394 unsigned OpIdx = Commute ? 2 : 1; 02395 unsigned Reg1 = UseMI->getOperand(OpIdx).getReg(); 02396 bool isKill = UseMI->getOperand(OpIdx).isKill(); 02397 unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); 02398 AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(), 02399 UseMI, UseMI->getDebugLoc(), 02400 get(NewUseOpc), NewReg) 02401 .addReg(Reg1, getKillRegState(isKill)) 02402 .addImm(SOImmValV1))); 02403 UseMI->setDesc(get(NewUseOpc)); 02404 UseMI->getOperand(1).setReg(NewReg); 02405 UseMI->getOperand(1).setIsKill(); 02406 UseMI->getOperand(2).ChangeToImmediate(SOImmValV2); 02407 DefMI->eraseFromParent(); 02408 return true; 02409 } 02410 02411 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, 02412 const MachineInstr *MI) { 02413 switch (MI->getOpcode()) { 02414 default: { 02415 const MCInstrDesc &Desc = MI->getDesc(); 02416 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass()); 02417 assert(UOps >= 0 && "bad # UOps"); 02418 return UOps; 02419 } 02420 02421 case ARM::LDRrs: 02422 case ARM::LDRBrs: 02423 case ARM::STRrs: 02424 case ARM::STRBrs: { 02425 unsigned ShOpVal = MI->getOperand(3).getImm(); 02426 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 02427 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 02428 if (!isSub && 02429 (ShImm == 0 || 02430 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 02431 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 02432 return 1; 02433 return 2; 02434 } 02435 02436 case ARM::LDRH: 02437 case ARM::STRH: { 02438 if (!MI->getOperand(2).getReg()) 02439 return 1; 02440 02441 unsigned ShOpVal = MI->getOperand(3).getImm(); 02442 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 02443 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 02444 if (!isSub && 02445 (ShImm == 0 || 02446 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 02447 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 02448 return 1; 02449 return 2; 02450 } 02451 02452 case ARM::LDRSB: 02453 case ARM::LDRSH: 02454 return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2; 02455 02456 case ARM::LDRSB_POST: 02457 case ARM::LDRSH_POST: { 02458 unsigned Rt = MI->getOperand(0).getReg(); 02459 unsigned Rm = MI->getOperand(3).getReg(); 02460 return (Rt == Rm) ? 4 : 3; 02461 } 02462 02463 case ARM::LDR_PRE_REG: 02464 case ARM::LDRB_PRE_REG: { 02465 unsigned Rt = MI->getOperand(0).getReg(); 02466 unsigned Rm = MI->getOperand(3).getReg(); 02467 if (Rt == Rm) 02468 return 3; 02469 unsigned ShOpVal = MI->getOperand(4).getImm(); 02470 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 02471 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 02472 if (!isSub && 02473 (ShImm == 0 || 02474 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 02475 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 02476 return 2; 02477 return 3; 02478 } 02479 02480 case ARM::STR_PRE_REG: 02481 case ARM::STRB_PRE_REG: { 02482 unsigned ShOpVal = MI->getOperand(4).getImm(); 02483 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 02484 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 02485 if (!isSub && 02486 (ShImm == 0 || 02487 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 02488 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 02489 return 2; 02490 return 3; 02491 } 02492 02493 case ARM::LDRH_PRE: 02494 case ARM::STRH_PRE: { 02495 unsigned Rt = MI->getOperand(0).getReg(); 02496 unsigned Rm = MI->getOperand(3).getReg(); 02497 if (!Rm) 02498 return 2; 02499 if (Rt == Rm) 02500 return 3; 02501 return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) 02502 ? 3 : 2; 02503 } 02504 02505 case ARM::LDR_POST_REG: 02506 case ARM::LDRB_POST_REG: 02507 case ARM::LDRH_POST: { 02508 unsigned Rt = MI->getOperand(0).getReg(); 02509 unsigned Rm = MI->getOperand(3).getReg(); 02510 return (Rt == Rm) ? 3 : 2; 02511 } 02512 02513 case ARM::LDR_PRE_IMM: 02514 case ARM::LDRB_PRE_IMM: 02515 case ARM::LDR_POST_IMM: 02516 case ARM::LDRB_POST_IMM: 02517 case ARM::STRB_POST_IMM: 02518 case ARM::STRB_POST_REG: 02519 case ARM::STRB_PRE_IMM: 02520 case ARM::STRH_POST: 02521 case ARM::STR_POST_IMM: 02522 case ARM::STR_POST_REG: 02523 case ARM::STR_PRE_IMM: 02524 return 2; 02525 02526 case ARM::LDRSB_PRE: 02527 case ARM::LDRSH_PRE: { 02528 unsigned Rm = MI->getOperand(3).getReg(); 02529 if (Rm == 0) 02530 return 3; 02531 unsigned Rt = MI->getOperand(0).getReg(); 02532 if (Rt == Rm) 02533 return 4; 02534 unsigned ShOpVal = MI->getOperand(4).getImm(); 02535 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 02536 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 02537 if (!isSub && 02538 (ShImm == 0 || 02539 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 02540 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 02541 return 3; 02542 return 4; 02543 } 02544 02545 case ARM::LDRD: { 02546 unsigned Rt = MI->getOperand(0).getReg(); 02547 unsigned Rn = MI->getOperand(2).getReg(); 02548 unsigned Rm = MI->getOperand(3).getReg(); 02549 if (Rm) 02550 return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; 02551 return (Rt == Rn) ? 3 : 2; 02552 } 02553 02554 case ARM::STRD: { 02555 unsigned Rm = MI->getOperand(3).getReg(); 02556 if (Rm) 02557 return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; 02558 return 2; 02559 } 02560 02561 case ARM::LDRD_POST: 02562 case ARM::t2LDRD_POST: 02563 return 3; 02564 02565 case ARM::STRD_POST: 02566 case ARM::t2STRD_POST: 02567 return 4; 02568 02569 case ARM::LDRD_PRE: { 02570 unsigned Rt = MI->getOperand(0).getReg(); 02571 unsigned Rn = MI->getOperand(3).getReg(); 02572 unsigned Rm = MI->getOperand(4).getReg(); 02573 if (Rm) 02574 return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; 02575 return (Rt == Rn) ? 4 : 3; 02576 } 02577 02578 case ARM::t2LDRD_PRE: { 02579 unsigned Rt = MI->getOperand(0).getReg(); 02580 unsigned Rn = MI->getOperand(3).getReg(); 02581 return (Rt == Rn) ? 4 : 3; 02582 } 02583 02584 case ARM::STRD_PRE: { 02585 unsigned Rm = MI->getOperand(4).getReg(); 02586 if (Rm) 02587 return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; 02588 return 3; 02589 } 02590 02591 case ARM::t2STRD_PRE: 02592 return 3; 02593 02594 case ARM::t2LDR_POST: 02595 case ARM::t2LDRB_POST: 02596 case ARM::t2LDRB_PRE: 02597 case ARM::t2LDRSBi12: 02598 case ARM::t2LDRSBi8: 02599 case ARM::t2LDRSBpci: 02600 case ARM::t2LDRSBs: 02601 case ARM::t2LDRH_POST: 02602 case ARM::t2LDRH_PRE: 02603 case ARM::t2LDRSBT: 02604 case ARM::t2LDRSB_POST: 02605 case ARM::t2LDRSB_PRE: 02606 case ARM::t2LDRSH_POST: 02607 case ARM::t2LDRSH_PRE: 02608 case ARM::t2LDRSHi12: 02609 case ARM::t2LDRSHi8: 02610 case ARM::t2LDRSHpci: 02611 case ARM::t2LDRSHs: 02612 return 2; 02613 02614 case ARM::t2LDRDi8: { 02615 unsigned Rt = MI->getOperand(0).getReg(); 02616 unsigned Rn = MI->getOperand(2).getReg(); 02617 return (Rt == Rn) ? 3 : 2; 02618 } 02619 02620 case ARM::t2STRB_POST: 02621 case ARM::t2STRB_PRE: 02622 case ARM::t2STRBs: 02623 case ARM::t2STRDi8: 02624 case ARM::t2STRH_POST: 02625 case ARM::t2STRH_PRE: 02626 case ARM::t2STRHs: 02627 case ARM::t2STR_POST: 02628 case ARM::t2STR_PRE: 02629 case ARM::t2STRs: 02630 return 2; 02631 } 02632 } 02633 02634 // Return the number of 32-bit words loaded by LDM or stored by STM. If this 02635 // can't be easily determined return 0 (missing MachineMemOperand). 02636 // 02637 // FIXME: The current MachineInstr design does not support relying on machine 02638 // mem operands to determine the width of a memory access. Instead, we expect 02639 // the target to provide this information based on the instruction opcode and 02640 // operands. However, using MachineMemOperand is a the best solution now for 02641 // two reasons: 02642 // 02643 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI 02644 // operands. This is much more dangerous than using the MachineMemOperand 02645 // sizes because CodeGen passes can insert/remove optional machine operands. In 02646 // fact, it's totally incorrect for preRA passes and appears to be wrong for 02647 // postRA passes as well. 02648 // 02649 // 2) getNumLDMAddresses is only used by the scheduling machine model and any 02650 // machine model that calls this should handle the unknown (zero size) case. 02651 // 02652 // Long term, we should require a target hook that verifies MachineMemOperand 02653 // sizes during MC lowering. That target hook should be local to MC lowering 02654 // because we can't ensure that it is aware of other MI forms. Doing this will 02655 // ensure that MachineMemOperands are correctly propagated through all passes. 02656 unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const { 02657 unsigned Size = 0; 02658 for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), 02659 E = MI->memoperands_end(); I != E; ++I) { 02660 Size += (*I)->getSize(); 02661 } 02662 return Size / 4; 02663 } 02664 02665 unsigned 02666 ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, 02667 const MachineInstr *MI) const { 02668 if (!ItinData || ItinData->isEmpty()) 02669 return 1; 02670 02671 const MCInstrDesc &Desc = MI->getDesc(); 02672 unsigned Class = Desc.getSchedClass(); 02673 int ItinUOps = ItinData->getNumMicroOps(Class); 02674 if (ItinUOps >= 0) { 02675 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) 02676 return getNumMicroOpsSwiftLdSt(ItinData, MI); 02677 02678 return ItinUOps; 02679 } 02680 02681 unsigned Opc = MI->getOpcode(); 02682 switch (Opc) { 02683 default: 02684 llvm_unreachable("Unexpected multi-uops instruction!"); 02685 case ARM::VLDMQIA: 02686 case ARM::VSTMQIA: 02687 return 2; 02688 02689 // The number of uOps for load / store multiple are determined by the number 02690 // registers. 02691 // 02692 // On Cortex-A8, each pair of register loads / stores can be scheduled on the 02693 // same cycle. The scheduling for the first load / store must be done 02694 // separately by assuming the address is not 64-bit aligned. 02695 // 02696 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address 02697 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON 02698 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. 02699 case ARM::VLDMDIA: 02700 case ARM::VLDMDIA_UPD: 02701 case ARM::VLDMDDB_UPD: 02702 case ARM::VLDMSIA: 02703 case ARM::VLDMSIA_UPD: 02704 case ARM::VLDMSDB_UPD: 02705 case ARM::VSTMDIA: 02706 case ARM::VSTMDIA_UPD: 02707 case ARM::VSTMDDB_UPD: 02708 case ARM::VSTMSIA: 02709 case ARM::VSTMSIA_UPD: 02710 case ARM::VSTMSDB_UPD: { 02711 unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); 02712 return (NumRegs / 2) + (NumRegs % 2) + 1; 02713 } 02714 02715 case ARM::LDMIA_RET: 02716 case ARM::LDMIA: 02717 case ARM::LDMDA: 02718 case ARM::LDMDB: 02719 case ARM::LDMIB: 02720 case ARM::LDMIA_UPD: 02721 case ARM::LDMDA_UPD: 02722 case ARM::LDMDB_UPD: 02723 case ARM::LDMIB_UPD: 02724 case ARM::STMIA: 02725 case ARM::STMDA: 02726 case ARM::STMDB: 02727 case ARM::STMIB: 02728 case ARM::STMIA_UPD: 02729 case ARM::STMDA_UPD: 02730 case ARM::STMDB_UPD: 02731 case ARM::STMIB_UPD: 02732 case ARM::tLDMIA: 02733 case ARM::tLDMIA_UPD: 02734 case ARM::tSTMIA_UPD: 02735 case ARM::tPOP_RET: 02736 case ARM::tPOP: 02737 case ARM::tPUSH: 02738 case ARM::t2LDMIA_RET: 02739 case ARM::t2LDMIA: 02740 case ARM::t2LDMDB: 02741 case ARM::t2LDMIA_UPD: 02742 case ARM::t2LDMDB_UPD: 02743 case ARM::t2STMIA: 02744 case ARM::t2STMDB: 02745 case ARM::t2STMIA_UPD: 02746 case ARM::t2STMDB_UPD: { 02747 unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; 02748 if (Subtarget.isSwift()) { 02749 int UOps = 1 + NumRegs; // One for address computation, one for each ld / st. 02750 switch (Opc) { 02751 default: break; 02752 case ARM::VLDMDIA_UPD: 02753 case ARM::VLDMDDB_UPD: 02754 case ARM::VLDMSIA_UPD: 02755 case ARM::VLDMSDB_UPD: 02756 case ARM::VSTMDIA_UPD: 02757 case ARM::VSTMDDB_UPD: 02758 case ARM::VSTMSIA_UPD: 02759 case ARM::VSTMSDB_UPD: 02760 case ARM::LDMIA_UPD: 02761 case ARM::LDMDA_UPD: 02762 case ARM::LDMDB_UPD: 02763 case ARM::LDMIB_UPD: 02764 case ARM::STMIA_UPD: 02765 case ARM::STMDA_UPD: 02766 case ARM::STMDB_UPD: 02767 case ARM::STMIB_UPD: 02768 case ARM::tLDMIA_UPD: 02769 case ARM::tSTMIA_UPD: 02770 case ARM::t2LDMIA_UPD: 02771 case ARM::t2LDMDB_UPD: 02772 case ARM::t2STMIA_UPD: 02773 case ARM::t2STMDB_UPD: 02774 ++UOps; // One for base register writeback. 02775 break; 02776 case ARM::LDMIA_RET: 02777 case ARM::tPOP_RET: 02778 case ARM::t2LDMIA_RET: 02779 UOps += 2; // One for base reg wb, one for write to pc. 02780 break; 02781 } 02782 return UOps; 02783 } else if (Subtarget.isCortexA8()) { 02784 if (NumRegs < 4) 02785 return 2; 02786 // 4 registers would be issued: 2, 2. 02787 // 5 registers would be issued: 2, 2, 1. 02788 int A8UOps = (NumRegs / 2); 02789 if (NumRegs % 2) 02790 ++A8UOps; 02791 return A8UOps; 02792 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 02793 int A9UOps = (NumRegs / 2); 02794 // If there are odd number of registers or if it's not 64-bit aligned, 02795 // then it takes an extra AGU (Address Generation Unit) cycle. 02796 if ((NumRegs % 2) || 02797 !MI->hasOneMemOperand() || 02798 (*MI->memoperands_begin())->getAlignment() < 8) 02799 ++A9UOps; 02800 return A9UOps; 02801 } else { 02802 // Assume the worst. 02803 return NumRegs; 02804 } 02805 } 02806 } 02807 } 02808 02809 int 02810 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, 02811 const MCInstrDesc &DefMCID, 02812 unsigned DefClass, 02813 unsigned DefIdx, unsigned DefAlign) const { 02814 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 02815 if (RegNo <= 0) 02816 // Def is the address writeback. 02817 return ItinData->getOperandCycle(DefClass, DefIdx); 02818 02819 int DefCycle; 02820 if (Subtarget.isCortexA8()) { 02821 // (regno / 2) + (regno % 2) + 1 02822 DefCycle = RegNo / 2 + 1; 02823 if (RegNo % 2) 02824 ++DefCycle; 02825 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 02826 DefCycle = RegNo; 02827 bool isSLoad = false; 02828 02829 switch (DefMCID.getOpcode()) { 02830 default: break; 02831 case ARM::VLDMSIA: 02832 case ARM::VLDMSIA_UPD: 02833 case ARM::VLDMSDB_UPD: 02834 isSLoad = true; 02835 break; 02836 } 02837 02838 // If there are odd number of 'S' registers or if it's not 64-bit aligned, 02839 // then it takes an extra cycle. 02840 if ((isSLoad && (RegNo % 2)) || DefAlign < 8) 02841 ++DefCycle; 02842 } else { 02843 // Assume the worst. 02844 DefCycle = RegNo + 2; 02845 } 02846 02847 return DefCycle; 02848 } 02849 02850 int 02851 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, 02852 const MCInstrDesc &DefMCID, 02853 unsigned DefClass, 02854 unsigned DefIdx, unsigned DefAlign) const { 02855 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 02856 if (RegNo <= 0) 02857 // Def is the address writeback. 02858 return ItinData->getOperandCycle(DefClass, DefIdx); 02859 02860 int DefCycle; 02861 if (Subtarget.isCortexA8()) { 02862 // 4 registers would be issued: 1, 2, 1. 02863 // 5 registers would be issued: 1, 2, 2. 02864 DefCycle = RegNo / 2; 02865 if (DefCycle < 1) 02866 DefCycle = 1; 02867 // Result latency is issue cycle + 2: E2. 02868 DefCycle += 2; 02869 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 02870 DefCycle = (RegNo / 2); 02871 // If there are odd number of registers or if it's not 64-bit aligned, 02872 // then it takes an extra AGU (Address Generation Unit) cycle. 02873 if ((RegNo % 2) || DefAlign < 8) 02874 ++DefCycle; 02875 // Result latency is AGU cycles + 2. 02876 DefCycle += 2; 02877 } else { 02878 // Assume the worst. 02879 DefCycle = RegNo + 2; 02880 } 02881 02882 return DefCycle; 02883 } 02884 02885 int 02886 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, 02887 const MCInstrDesc &UseMCID, 02888 unsigned UseClass, 02889 unsigned UseIdx, unsigned UseAlign) const { 02890 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 02891 if (RegNo <= 0) 02892 return ItinData->getOperandCycle(UseClass, UseIdx); 02893 02894 int UseCycle; 02895 if (Subtarget.isCortexA8()) { 02896 // (regno / 2) + (regno % 2) + 1 02897 UseCycle = RegNo / 2 + 1; 02898 if (RegNo % 2) 02899 ++UseCycle; 02900 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 02901 UseCycle = RegNo; 02902 bool isSStore = false; 02903 02904 switch (UseMCID.getOpcode()) { 02905 default: break; 02906 case ARM::VSTMSIA: 02907 case ARM::VSTMSIA_UPD: 02908 case ARM::VSTMSDB_UPD: 02909 isSStore = true; 02910 break; 02911 } 02912 02913 // If there are odd number of 'S' registers or if it's not 64-bit aligned, 02914 // then it takes an extra cycle. 02915 if ((isSStore && (RegNo % 2)) || UseAlign < 8) 02916 ++UseCycle; 02917 } else { 02918 // Assume the worst. 02919 UseCycle = RegNo + 2; 02920 } 02921 02922 return UseCycle; 02923 } 02924 02925 int 02926 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, 02927 const MCInstrDesc &UseMCID, 02928 unsigned UseClass, 02929 unsigned UseIdx, unsigned UseAlign) const { 02930 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 02931 if (RegNo <= 0) 02932 return ItinData->getOperandCycle(UseClass, UseIdx); 02933 02934 int UseCycle; 02935 if (Subtarget.isCortexA8()) { 02936 UseCycle = RegNo / 2; 02937 if (UseCycle < 2) 02938 UseCycle = 2; 02939 // Read in E3. 02940 UseCycle += 2; 02941 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 02942 UseCycle = (RegNo / 2); 02943 // If there are odd number of registers or if it's not 64-bit aligned, 02944 // then it takes an extra AGU (Address Generation Unit) cycle. 02945 if ((RegNo % 2) || UseAlign < 8) 02946 ++UseCycle; 02947 } else { 02948 // Assume the worst. 02949 UseCycle = 1; 02950 } 02951 return UseCycle; 02952 } 02953 02954 int 02955 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 02956 const MCInstrDesc &DefMCID, 02957 unsigned DefIdx, unsigned DefAlign, 02958 const MCInstrDesc &UseMCID, 02959 unsigned UseIdx, unsigned UseAlign) const { 02960 unsigned DefClass = DefMCID.getSchedClass(); 02961 unsigned UseClass = UseMCID.getSchedClass(); 02962 02963 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) 02964 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); 02965 02966 // This may be a def / use of a variable_ops instruction, the operand 02967 // latency might be determinable dynamically. Let the target try to 02968 // figure it out. 02969 int DefCycle = -1; 02970 bool LdmBypass = false; 02971 switch (DefMCID.getOpcode()) { 02972 default: 02973 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 02974 break; 02975 02976 case ARM::VLDMDIA: 02977 case ARM::VLDMDIA_UPD: 02978 case ARM::VLDMDDB_UPD: 02979 case ARM::VLDMSIA: 02980 case ARM::VLDMSIA_UPD: 02981 case ARM::VLDMSDB_UPD: 02982 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 02983 break; 02984 02985 case ARM::LDMIA_RET: 02986 case ARM::LDMIA: 02987 case ARM::LDMDA: 02988 case ARM::LDMDB: 02989 case ARM::LDMIB: 02990 case ARM::LDMIA_UPD: 02991 case ARM::LDMDA_UPD: 02992 case ARM::LDMDB_UPD: 02993 case ARM::LDMIB_UPD: 02994 case ARM::tLDMIA: 02995 case ARM::tLDMIA_UPD: 02996 case ARM::tPUSH: 02997 case ARM::t2LDMIA_RET: 02998 case ARM::t2LDMIA: 02999 case ARM::t2LDMDB: 03000 case ARM::t2LDMIA_UPD: 03001 case ARM::t2LDMDB_UPD: 03002 LdmBypass = 1; 03003 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 03004 break; 03005 } 03006 03007 if (DefCycle == -1) 03008 // We can't seem to determine the result latency of the def, assume it's 2. 03009 DefCycle = 2; 03010 03011 int UseCycle = -1; 03012 switch (UseMCID.getOpcode()) { 03013 default: 03014 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); 03015 break; 03016 03017 case ARM::VSTMDIA: 03018 case ARM::VSTMDIA_UPD: 03019 case ARM::VSTMDDB_UPD: 03020 case ARM::VSTMSIA: 03021 case ARM::VSTMSIA_UPD: 03022 case ARM::VSTMSDB_UPD: 03023 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 03024 break; 03025 03026 case ARM::STMIA: 03027 case ARM::STMDA: 03028 case ARM::STMDB: 03029 case ARM::STMIB: 03030 case ARM::STMIA_UPD: 03031 case ARM::STMDA_UPD: 03032 case ARM::STMDB_UPD: 03033 case ARM::STMIB_UPD: 03034 case ARM::tSTMIA_UPD: 03035 case ARM::tPOP_RET: 03036 case ARM::tPOP: 03037 case ARM::t2STMIA: 03038 case ARM::t2STMDB: 03039 case ARM::t2STMIA_UPD: 03040 case ARM::t2STMDB_UPD: 03041 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 03042 break; 03043 } 03044 03045 if (UseCycle == -1) 03046 // Assume it's read in the first stage. 03047 UseCycle = 1; 03048 03049 UseCycle = DefCycle - UseCycle + 1; 03050 if (UseCycle > 0) { 03051 if (LdmBypass) { 03052 // It's a variable_ops instruction so we can't use DefIdx here. Just use 03053 // first def operand. 03054 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1, 03055 UseClass, UseIdx)) 03056 --UseCycle; 03057 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, 03058 UseClass, UseIdx)) { 03059 --UseCycle; 03060 } 03061 } 03062 03063 return UseCycle; 03064 } 03065 03066 static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, 03067 const MachineInstr *MI, unsigned Reg, 03068 unsigned &DefIdx, unsigned &Dist) { 03069 Dist = 0; 03070 03071 MachineBasicBlock::const_iterator I = MI; ++I; 03072 MachineBasicBlock::const_instr_iterator II = 03073 llvm::prior(I.getInstrIterator()); 03074 assert(II->isInsideBundle() && "Empty bundle?"); 03075 03076 int Idx = -1; 03077 while (II->isInsideBundle()) { 03078 Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI); 03079 if (Idx != -1) 03080 break; 03081 --II; 03082 ++Dist; 03083 } 03084 03085 assert(Idx != -1 && "Cannot find bundled definition!"); 03086 DefIdx = Idx; 03087 return II; 03088 } 03089 03090 static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, 03091 const MachineInstr *MI, unsigned Reg, 03092 unsigned &UseIdx, unsigned &Dist) { 03093 Dist = 0; 03094 03095 MachineBasicBlock::const_instr_iterator II = MI; ++II; 03096 assert(II->isInsideBundle() && "Empty bundle?"); 03097 MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); 03098 03099 // FIXME: This doesn't properly handle multiple uses. 03100 int Idx = -1; 03101 while (II != E && II->isInsideBundle()) { 03102 Idx = II->findRegisterUseOperandIdx(Reg, false, TRI); 03103 if (Idx != -1) 03104 break; 03105 if (II->getOpcode() != ARM::t2IT) 03106 ++Dist; 03107 ++II; 03108 } 03109 03110 if (Idx == -1) { 03111 Dist = 0; 03112 return 0; 03113 } 03114 03115 UseIdx = Idx; 03116 return II; 03117 } 03118 03119 /// Return the number of cycles to add to (or subtract from) the static 03120 /// itinerary based on the def opcode and alignment. The caller will ensure that 03121 /// adjusted latency is at least one cycle. 03122 static int adjustDefLatency(const ARMSubtarget &Subtarget, 03123 const MachineInstr *DefMI, 03124 const MCInstrDesc *DefMCID, unsigned DefAlign) { 03125 int Adjust = 0; 03126 if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) { 03127 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 03128 // variants are one cycle cheaper. 03129 switch (DefMCID->getOpcode()) { 03130 default: break; 03131 case ARM::LDRrs: 03132 case ARM::LDRBrs: { 03133 unsigned ShOpVal = DefMI->getOperand(3).getImm(); 03134 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 03135 if (ShImm == 0 || 03136 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 03137 --Adjust; 03138 break; 03139 } 03140 case ARM::t2LDRs: 03141 case ARM::t2LDRBs: 03142 case ARM::t2LDRHs: 03143 case ARM::t2LDRSHs: { 03144 // Thumb2 mode: lsl only. 03145 unsigned ShAmt = DefMI->getOperand(3).getImm(); 03146 if (ShAmt == 0 || ShAmt == 2) 03147 --Adjust; 03148 break; 03149 } 03150 } 03151 } else if (Subtarget.isSwift()) { 03152 // FIXME: Properly handle all of the latency adjustments for address 03153 // writeback. 03154 switch (DefMCID->getOpcode()) { 03155 default: break; 03156 case ARM::LDRrs: 03157 case ARM::LDRBrs: { 03158 unsigned ShOpVal = DefMI->getOperand(3).getImm(); 03159 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 03160 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 03161 if (!isSub && 03162 (ShImm == 0 || 03163 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 03164 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 03165 Adjust -= 2; 03166 else if (!isSub && 03167 ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) 03168 --Adjust; 03169 break; 03170 } 03171 case ARM::t2LDRs: 03172 case ARM::t2LDRBs: 03173 case ARM::t2LDRHs: 03174 case ARM::t2LDRSHs: { 03175 // Thumb2 mode: lsl only. 03176 unsigned ShAmt = DefMI->getOperand(3).getImm(); 03177 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) 03178 Adjust -= 2; 03179 break; 03180 } 03181 } 03182 } 03183 03184 if (DefAlign < 8 && Subtarget.isLikeA9()) { 03185 switch (DefMCID->getOpcode()) { 03186 default: break; 03187 case ARM::VLD1q8: 03188 case ARM::VLD1q16: 03189 case ARM::VLD1q32: 03190 case ARM::VLD1q64: 03191 case ARM::VLD1q8wb_fixed: 03192 case ARM::VLD1q16wb_fixed: 03193 case ARM::VLD1q32wb_fixed: 03194 case ARM::VLD1q64wb_fixed: 03195 case ARM::VLD1q8wb_register: 03196 case ARM::VLD1q16wb_register: 03197 case ARM::VLD1q32wb_register: 03198 case ARM::VLD1q64wb_register: 03199 case ARM::VLD2d8: 03200 case ARM::VLD2d16: 03201 case ARM::VLD2d32: 03202 case ARM::VLD2q8: 03203 case ARM::VLD2q16: 03204 case ARM::VLD2q32: 03205 case ARM::VLD2d8wb_fixed: 03206 case ARM::VLD2d16wb_fixed: 03207 case ARM::VLD2d32wb_fixed: 03208 case ARM::VLD2q8wb_fixed: 03209 case ARM::VLD2q16wb_fixed: 03210 case ARM::VLD2q32wb_fixed: 03211 case ARM::VLD2d8wb_register: 03212 case ARM::VLD2d16wb_register: 03213 case ARM::VLD2d32wb_register: 03214 case ARM::VLD2q8wb_register: 03215 case ARM::VLD2q16wb_register: 03216 case ARM::VLD2q32wb_register: 03217 case ARM::VLD3d8: 03218 case ARM::VLD3d16: 03219 case ARM::VLD3d32: 03220 case ARM::VLD1d64T: 03221 case ARM::VLD3d8_UPD: 03222 case ARM::VLD3d16_UPD: 03223 case ARM::VLD3d32_UPD: 03224 case ARM::VLD1d64Twb_fixed: 03225 case ARM::VLD1d64Twb_register: 03226 case ARM::VLD3q8_UPD: 03227 case ARM::VLD3q16_UPD: 03228 case ARM::VLD3q32_UPD: 03229 case ARM::VLD4d8: 03230 case ARM::VLD4d16: 03231 case ARM::VLD4d32: 03232 case ARM::VLD1d64Q: 03233 case ARM::VLD4d8_UPD: 03234 case ARM::VLD4d16_UPD: 03235 case ARM::VLD4d32_UPD: 03236 case ARM::VLD1d64Qwb_fixed: 03237 case ARM::VLD1d64Qwb_register: 03238 case ARM::VLD4q8_UPD: 03239 case ARM::VLD4q16_UPD: 03240 case ARM::VLD4q32_UPD: 03241 case ARM::VLD1DUPq8: 03242 case ARM::VLD1DUPq16: 03243 case ARM::VLD1DUPq32: 03244 case ARM::VLD1DUPq8wb_fixed: 03245 case ARM::VLD1DUPq16wb_fixed: 03246 case ARM::VLD1DUPq32wb_fixed: 03247 case ARM::VLD1DUPq8wb_register: 03248 case ARM::VLD1DUPq16wb_register: 03249 case ARM::VLD1DUPq32wb_register: 03250 case ARM::VLD2DUPd8: 03251 case ARM::VLD2DUPd16: 03252 case ARM::VLD2DUPd32: 03253 case ARM::VLD2DUPd8wb_fixed: 03254 case ARM::VLD2DUPd16wb_fixed: 03255 case ARM::VLD2DUPd32wb_fixed: 03256 case ARM::VLD2DUPd8wb_register: 03257 case ARM::VLD2DUPd16wb_register: 03258 case ARM::VLD2DUPd32wb_register: 03259 case ARM::VLD4DUPd8: 03260 case ARM::VLD4DUPd16: 03261 case ARM::VLD4DUPd32: 03262 case ARM::VLD4DUPd8_UPD: 03263 case ARM::VLD4DUPd16_UPD: 03264 case ARM::VLD4DUPd32_UPD: 03265 case ARM::VLD1LNd8: 03266 case ARM::VLD1LNd16: 03267 case ARM::VLD1LNd32: 03268 case ARM::VLD1LNd8_UPD: 03269 case ARM::VLD1LNd16_UPD: 03270 case ARM::VLD1LNd32_UPD: 03271 case ARM::VLD2LNd8: 03272 case ARM::VLD2LNd16: 03273 case ARM::VLD2LNd32: 03274 case ARM::VLD2LNq16: 03275 case ARM::VLD2LNq32: 03276 case ARM::VLD2LNd8_UPD: 03277 case ARM::VLD2LNd16_UPD: 03278 case ARM::VLD2LNd32_UPD: 03279 case ARM::VLD2LNq16_UPD: 03280 case ARM::VLD2LNq32_UPD: 03281 case ARM::VLD4LNd8: 03282 case ARM::VLD4LNd16: 03283 case ARM::VLD4LNd32: 03284 case ARM::VLD4LNq16: 03285 case ARM::VLD4LNq32: 03286 case ARM::VLD4LNd8_UPD: 03287 case ARM::VLD4LNd16_UPD: 03288 case ARM::VLD4LNd32_UPD: 03289 case ARM::VLD4LNq16_UPD: 03290 case ARM::VLD4LNq32_UPD: 03291 // If the address is not 64-bit aligned, the latencies of these 03292 // instructions increases by one. 03293 ++Adjust; 03294 break; 03295 } 03296 } 03297 return Adjust; 03298 } 03299 03300 03301 03302 int 03303 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 03304 const MachineInstr *DefMI, unsigned DefIdx, 03305 const MachineInstr *UseMI, 03306 unsigned UseIdx) const { 03307 // No operand latency. The caller may fall back to getInstrLatency. 03308 if (!ItinData || ItinData->isEmpty()) 03309 return -1; 03310 03311 const MachineOperand &DefMO = DefMI->getOperand(DefIdx); 03312 unsigned Reg = DefMO.getReg(); 03313 const MCInstrDesc *DefMCID = &DefMI->getDesc(); 03314 const MCInstrDesc *UseMCID = &UseMI->getDesc(); 03315 03316 unsigned DefAdj = 0; 03317 if (DefMI->isBundle()) { 03318 DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj); 03319 DefMCID = &DefMI->getDesc(); 03320 } 03321 if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || 03322 DefMI->isRegSequence() || DefMI->isImplicitDef()) { 03323 return 1; 03324 } 03325 03326 unsigned UseAdj = 0; 03327 if (UseMI->isBundle()) { 03328 unsigned NewUseIdx; 03329 const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, 03330 Reg, NewUseIdx, UseAdj); 03331 if (!NewUseMI) 03332 return -1; 03333 03334 UseMI = NewUseMI; 03335 UseIdx = NewUseIdx; 03336 UseMCID = &UseMI->getDesc(); 03337 } 03338 03339 if (Reg == ARM::CPSR) { 03340 if (DefMI->getOpcode() == ARM::FMSTAT) { 03341 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) 03342 return Subtarget.isLikeA9() ? 1 : 20; 03343 } 03344 03345 // CPSR set and branch can be paired in the same cycle. 03346 if (UseMI->isBranch()) 03347 return 0; 03348 03349 // Otherwise it takes the instruction latency (generally one). 03350 unsigned Latency = getInstrLatency(ItinData, DefMI); 03351 03352 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to 03353 // its uses. Instructions which are otherwise scheduled between them may 03354 // incur a code size penalty (not able to use the CPSR setting 16-bit 03355 // instructions). 03356 if (Latency > 0 && Subtarget.isThumb2()) { 03357 const MachineFunction *MF = DefMI->getParent()->getParent(); 03358 if (MF->getFunction()->getAttributes(). 03359 hasAttribute(AttributeSet::FunctionIndex, 03360 Attribute::OptimizeForSize)) 03361 --Latency; 03362 } 03363 return Latency; 03364 } 03365 03366 if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit()) 03367 return -1; 03368 03369 unsigned DefAlign = DefMI->hasOneMemOperand() 03370 ? (*DefMI->memoperands_begin())->getAlignment() : 0; 03371 unsigned UseAlign = UseMI->hasOneMemOperand() 03372 ? (*UseMI->memoperands_begin())->getAlignment() : 0; 03373 03374 // Get the itinerary's latency if possible, and handle variable_ops. 03375 int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign, 03376 *UseMCID, UseIdx, UseAlign); 03377 // Unable to find operand latency. The caller may resort to getInstrLatency. 03378 if (Latency < 0) 03379 return Latency; 03380 03381 // Adjust for IT block position. 03382 int Adj = DefAdj + UseAdj; 03383 03384 // Adjust for dynamic def-side opcode variants not captured by the itinerary. 03385 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign); 03386 if (Adj >= 0 || (int)Latency > -Adj) { 03387 return Latency + Adj; 03388 } 03389 // Return the itinerary latency, which may be zero but not less than zero. 03390 return Latency; 03391 } 03392 03393 int 03394 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 03395 SDNode *DefNode, unsigned DefIdx, 03396 SDNode *UseNode, unsigned UseIdx) const { 03397 if (!DefNode->isMachineOpcode()) 03398 return 1; 03399 03400 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode()); 03401 03402 if (isZeroCost(DefMCID.Opcode)) 03403 return 0; 03404 03405 if (!ItinData || ItinData->isEmpty()) 03406 return DefMCID.mayLoad() ? 3 : 1; 03407 03408 if (!UseNode->isMachineOpcode()) { 03409 int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); 03410 if (Subtarget.isLikeA9() || Subtarget.isSwift()) 03411 return Latency <= 2 ? 1 : Latency - 1; 03412 else 03413 return Latency <= 3 ? 1 : Latency - 2; 03414 } 03415 03416 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); 03417 const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode); 03418 unsigned DefAlign = !DefMN->memoperands_empty() 03419 ? (*DefMN->memoperands_begin())->getAlignment() : 0; 03420 const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode); 03421 unsigned UseAlign = !UseMN->memoperands_empty() 03422 ? (*UseMN->memoperands_begin())->getAlignment() : 0; 03423 int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, 03424 UseMCID, UseIdx, UseAlign); 03425 03426 if (Latency > 1 && 03427 (Subtarget.isCortexA8() || Subtarget.isLikeA9())) { 03428 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 03429 // variants are one cycle cheaper. 03430 switch (DefMCID.getOpcode()) { 03431 default: break; 03432 case ARM::LDRrs: 03433 case ARM::LDRBrs: { 03434 unsigned ShOpVal = 03435 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 03436 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 03437 if (ShImm == 0 || 03438 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 03439 --Latency; 03440 break; 03441 } 03442 case ARM::t2LDRs: 03443 case ARM::t2LDRBs: 03444 case ARM::t2LDRHs: 03445 case ARM::t2LDRSHs: { 03446 // Thumb2 mode: lsl only. 03447 unsigned ShAmt = 03448 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 03449 if (ShAmt == 0 || ShAmt == 2) 03450 --Latency; 03451 break; 03452 } 03453 } 03454 } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) { 03455 // FIXME: Properly handle all of the latency adjustments for address 03456 // writeback. 03457 switch (DefMCID.getOpcode()) { 03458 default: break; 03459 case ARM::LDRrs: 03460 case ARM::LDRBrs: { 03461 unsigned ShOpVal = 03462 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 03463 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 03464 if (ShImm == 0 || 03465 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 03466 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 03467 Latency -= 2; 03468 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) 03469 --Latency; 03470 break; 03471 } 03472 case ARM::t2LDRs: 03473 case ARM::t2LDRBs: 03474 case ARM::t2LDRHs: 03475 case ARM::t2LDRSHs: { 03476 // Thumb2 mode: lsl 0-3 only. 03477 Latency -= 2; 03478 break; 03479 } 03480 } 03481 } 03482 03483 if (DefAlign < 8 && Subtarget.isLikeA9()) 03484 switch (DefMCID.getOpcode()) { 03485 default: break; 03486 case ARM::VLD1q8: 03487 case ARM::VLD1q16: 03488 case ARM::VLD1q32: 03489 case ARM::VLD1q64: 03490 case ARM::VLD1q8wb_register: 03491 case ARM::VLD1q16wb_register: 03492 case ARM::VLD1q32wb_register: 03493 case ARM::VLD1q64wb_register: 03494 case ARM::VLD1q8wb_fixed: 03495 case ARM::VLD1q16wb_fixed: 03496 case ARM::VLD1q32wb_fixed: 03497 case ARM::VLD1q64wb_fixed: 03498 case ARM::VLD2d8: 03499 case ARM::VLD2d16: 03500 case ARM::VLD2d32: 03501 case ARM::VLD2q8Pseudo: 03502 case ARM::VLD2q16Pseudo: 03503 case ARM::VLD2q32Pseudo: 03504 case ARM::VLD2d8wb_fixed: 03505 case ARM::VLD2d16wb_fixed: 03506 case ARM::VLD2d32wb_fixed: 03507 case ARM::VLD2q8PseudoWB_fixed: 03508 case ARM::VLD2q16PseudoWB_fixed: 03509 case ARM::VLD2q32PseudoWB_fixed: 03510 case ARM::VLD2d8wb_register: 03511 case ARM::VLD2d16wb_register: 03512 case ARM::VLD2d32wb_register: 03513 case ARM::VLD2q8PseudoWB_register: 03514 case ARM::VLD2q16PseudoWB_register: 03515 case ARM::VLD2q32PseudoWB_register: 03516 case ARM::VLD3d8Pseudo: 03517 case ARM::VLD3d16Pseudo: 03518 case ARM::VLD3d32Pseudo: 03519 case ARM::VLD1d64TPseudo: 03520 case ARM::VLD3d8Pseudo_UPD: 03521 case ARM::VLD3d16Pseudo_UPD: 03522 case ARM::VLD3d32Pseudo_UPD: 03523 case ARM::VLD3q8Pseudo_UPD: 03524 case ARM::VLD3q16Pseudo_UPD: 03525 case ARM::VLD3q32Pseudo_UPD: 03526 case ARM::VLD3q8oddPseudo: 03527 case ARM::VLD3q16oddPseudo: 03528 case ARM::VLD3q32oddPseudo: 03529 case ARM::VLD3q8oddPseudo_UPD: 03530 case ARM::VLD3q16oddPseudo_UPD: 03531 case ARM::VLD3q32oddPseudo_UPD: 03532 case ARM::VLD4d8Pseudo: 03533 case ARM::VLD4d16Pseudo: 03534 case ARM::VLD4d32Pseudo: 03535 case ARM::VLD1d64QPseudo: 03536 case ARM::VLD4d8Pseudo_UPD: 03537 case ARM::VLD4d16Pseudo_UPD: 03538 case ARM::VLD4d32Pseudo_UPD: 03539 case ARM::VLD4q8Pseudo_UPD: 03540 case ARM::VLD4q16Pseudo_UPD: 03541 case ARM::VLD4q32Pseudo_UPD: 03542 case ARM::VLD4q8oddPseudo: 03543 case ARM::VLD4q16oddPseudo: 03544 case ARM::VLD4q32oddPseudo: 03545 case ARM::VLD4q8oddPseudo_UPD: 03546 case ARM::VLD4q16oddPseudo_UPD: 03547 case ARM::VLD4q32oddPseudo_UPD: 03548 case ARM::VLD1DUPq8: 03549 case ARM::VLD1DUPq16: 03550 case ARM::VLD1DUPq32: 03551 case ARM::VLD1DUPq8wb_fixed: 03552 case ARM::VLD1DUPq16wb_fixed: 03553 case ARM::VLD1DUPq32wb_fixed: 03554 case ARM::VLD1DUPq8wb_register: 03555 case ARM::VLD1DUPq16wb_register: 03556 case ARM::VLD1DUPq32wb_register: 03557 case ARM::VLD2DUPd8: 03558 case ARM::VLD2DUPd16: 03559 case ARM::VLD2DUPd32: 03560 case ARM::VLD2DUPd8wb_fixed: 03561 case ARM::VLD2DUPd16wb_fixed: 03562 case ARM::VLD2DUPd32wb_fixed: 03563 case ARM::VLD2DUPd8wb_register: 03564 case ARM::VLD2DUPd16wb_register: 03565 case ARM::VLD2DUPd32wb_register: 03566 case ARM::VLD4DUPd8Pseudo: 03567 case ARM::VLD4DUPd16Pseudo: 03568 case ARM::VLD4DUPd32Pseudo: 03569 case ARM::VLD4DUPd8Pseudo_UPD: 03570 case ARM::VLD4DUPd16Pseudo_UPD: 03571 case ARM::VLD4DUPd32Pseudo_UPD: 03572 case ARM::VLD1LNq8Pseudo: 03573 case ARM::VLD1LNq16Pseudo: 03574 case ARM::VLD1LNq32Pseudo: 03575 case ARM::VLD1LNq8Pseudo_UPD: 03576 case ARM::VLD1LNq16Pseudo_UPD: 03577 case ARM::VLD1LNq32Pseudo_UPD: 03578 case ARM::VLD2LNd8Pseudo: 03579 case ARM::VLD2LNd16Pseudo: 03580 case ARM::VLD2LNd32Pseudo: 03581 case ARM::VLD2LNq16Pseudo: 03582 case ARM::VLD2LNq32Pseudo: 03583 case ARM::VLD2LNd8Pseudo_UPD: 03584 case ARM::VLD2LNd16Pseudo_UPD: 03585 case ARM::VLD2LNd32Pseudo_UPD: 03586 case ARM::VLD2LNq16Pseudo_UPD: 03587 case ARM::VLD2LNq32Pseudo_UPD: 03588 case ARM::VLD4LNd8Pseudo: 03589 case ARM::VLD4LNd16Pseudo: 03590 case ARM::VLD4LNd32Pseudo: 03591 case ARM::VLD4LNq16Pseudo: 03592 case ARM::VLD4LNq32Pseudo: 03593 case ARM::VLD4LNd8Pseudo_UPD: 03594 case ARM::VLD4LNd16Pseudo_UPD: 03595 case ARM::VLD4LNd32Pseudo_UPD: 03596 case ARM::VLD4LNq16Pseudo_UPD: 03597 case ARM::VLD4LNq32Pseudo_UPD: 03598 // If the address is not 64-bit aligned, the latencies of these 03599 // instructions increases by one. 03600 ++Latency; 03601 break; 03602 } 03603 03604 return Latency; 03605 } 03606 03607 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 03608 const MachineInstr *MI, 03609 unsigned *PredCost) const { 03610 if (MI->isCopyLike() || MI->isInsertSubreg() || 03611 MI->isRegSequence() || MI->isImplicitDef()) 03612 return 1; 03613 03614 // An instruction scheduler typically runs on unbundled instructions, however 03615 // other passes may query the latency of a bundled instruction. 03616 if (MI->isBundle()) { 03617 unsigned Latency = 0; 03618 MachineBasicBlock::const_instr_iterator I = MI; 03619 MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); 03620 while (++I != E && I->isInsideBundle()) { 03621 if (I->getOpcode() != ARM::t2IT) 03622 Latency += getInstrLatency(ItinData, I, PredCost); 03623 } 03624 return Latency; 03625 } 03626 03627 const MCInstrDesc &MCID = MI->getDesc(); 03628 if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) { 03629 // When predicated, CPSR is an additional source operand for CPSR updating 03630 // instructions, this apparently increases their latencies. 03631 *PredCost = 1; 03632 } 03633 // Be sure to call getStageLatency for an empty itinerary in case it has a 03634 // valid MinLatency property. 03635 if (!ItinData) 03636 return MI->mayLoad() ? 3 : 1; 03637 03638 unsigned Class = MCID.getSchedClass(); 03639 03640 // For instructions with variable uops, use uops as latency. 03641 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0) 03642 return getNumMicroOps(ItinData, MI); 03643 03644 // For the common case, fall back on the itinerary's latency. 03645 unsigned Latency = ItinData->getStageLatency(Class); 03646 03647 // Adjust for dynamic def-side opcode variants not captured by the itinerary. 03648 unsigned DefAlign = MI->hasOneMemOperand() 03649 ? (*MI->memoperands_begin())->getAlignment() : 0; 03650 int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign); 03651 if (Adj >= 0 || (int)Latency > -Adj) { 03652 return Latency + Adj; 03653 } 03654 return Latency; 03655 } 03656 03657 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 03658 SDNode *Node) const { 03659 if (!Node->isMachineOpcode()) 03660 return 1; 03661 03662 if (!ItinData || ItinData->isEmpty()) 03663 return 1; 03664 03665 unsigned Opcode = Node->getMachineOpcode(); 03666 switch (Opcode) { 03667 default: 03668 return ItinData->getStageLatency(get(Opcode).getSchedClass()); 03669 case ARM::VLDMQIA: 03670 case ARM::VSTMQIA: 03671 return 2; 03672 } 03673 } 03674 03675 bool ARMBaseInstrInfo:: 03676 hasHighOperandLatency(const InstrItineraryData *ItinData, 03677 const MachineRegisterInfo *MRI, 03678 const MachineInstr *DefMI, unsigned DefIdx, 03679 const MachineInstr *UseMI, unsigned UseIdx) const { 03680 unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; 03681 unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask; 03682 if (Subtarget.isCortexA8() && 03683 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) 03684 // CortexA8 VFP instructions are not pipelined. 03685 return true; 03686 03687 // Hoist VFP / NEON instructions with 4 or higher latency. 03688 int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx, 03689 /*FindMin=*/false); 03690 if (Latency < 0) 03691 Latency = getInstrLatency(ItinData, DefMI); 03692 if (Latency <= 3) 03693 return false; 03694 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || 03695 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; 03696 } 03697 03698 bool ARMBaseInstrInfo:: 03699 hasLowDefLatency(const InstrItineraryData *ItinData, 03700 const MachineInstr *DefMI, unsigned DefIdx) const { 03701 if (!ItinData || ItinData->isEmpty()) 03702 return false; 03703 03704 unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; 03705 if (DDomain == ARMII::DomainGeneral) { 03706 unsigned DefClass = DefMI->getDesc().getSchedClass(); 03707 int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 03708 return (DefCycle != -1 && DefCycle <= 2); 03709 } 03710 return false; 03711 } 03712 03713 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI, 03714 StringRef &ErrInfo) const { 03715 if (convertAddSubFlagsOpcode(MI->getOpcode())) { 03716 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG"; 03717 return false; 03718 } 03719 return true; 03720 } 03721 03722 bool 03723 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, 03724 unsigned &AddSubOpc, 03725 bool &NegAcc, bool &HasLane) const { 03726 DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode); 03727 if (I == MLxEntryMap.end()) 03728 return false; 03729 03730 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; 03731 MulOpc = Entry.MulOpc; 03732 AddSubOpc = Entry.AddSubOpc; 03733 NegAcc = Entry.NegAcc; 03734 HasLane = Entry.HasLane; 03735 return true; 03736 } 03737 03738 //===----------------------------------------------------------------------===// 03739 // Execution domains. 03740 //===----------------------------------------------------------------------===// 03741 // 03742 // Some instructions go down the NEON pipeline, some go down the VFP pipeline, 03743 // and some can go down both. The vmov instructions go down the VFP pipeline, 03744 // but they can be changed to vorr equivalents that are executed by the NEON 03745 // pipeline. 03746 // 03747 // We use the following execution domain numbering: 03748 // 03749 enum ARMExeDomain { 03750 ExeGeneric = 0, 03751 ExeVFP = 1, 03752 ExeNEON = 2 03753 }; 03754 // 03755 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h 03756 // 03757 std::pair<uint16_t, uint16_t> 03758 ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { 03759 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON 03760 // if they are not predicated. 03761 if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) 03762 return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); 03763 03764 // CortexA9 is particularly picky about mixing the two and wants these 03765 // converted. 03766 if (Subtarget.isCortexA9() && !isPredicated(MI) && 03767 (MI->getOpcode() == ARM::VMOVRS || 03768 MI->getOpcode() == ARM::VMOVSR || 03769 MI->getOpcode() == ARM::VMOVS)) 03770 return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); 03771 03772 // No other instructions can be swizzled, so just determine their domain. 03773 unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; 03774 03775 if (Domain & ARMII::DomainNEON) 03776 return std::make_pair(ExeNEON, 0); 03777 03778 // Certain instructions can go either way on Cortex-A8. 03779 // Treat them as NEON instructions. 03780 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()) 03781 return std::make_pair(ExeNEON, 0); 03782 03783 if (Domain & ARMII::DomainVFP) 03784 return std::make_pair(ExeVFP, 0); 03785 03786 return std::make_pair(ExeGeneric, 0); 03787 } 03788 03789 static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, 03790 unsigned SReg, unsigned &Lane) { 03791 unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass); 03792 Lane = 0; 03793 03794 if (DReg != ARM::NoRegister) 03795 return DReg; 03796 03797 Lane = 1; 03798 DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass); 03799 03800 assert(DReg && "S-register with no D super-register?"); 03801 return DReg; 03802 } 03803 03804 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, 03805 /// set ImplicitSReg to a register number that must be marked as implicit-use or 03806 /// zero if no register needs to be defined as implicit-use. 03807 /// 03808 /// If the function cannot determine if an SPR should be marked implicit use or 03809 /// not, it returns false. 03810 /// 03811 /// This function handles cases where an instruction is being modified from taking 03812 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict 03813 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other 03814 /// lane of the DPR). 03815 /// 03816 /// If the other SPR is defined, an implicit-use of it should be added. Else, 03817 /// (including the case where the DPR itself is defined), it should not. 03818 /// 03819 static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, 03820 MachineInstr *MI, 03821 unsigned DReg, unsigned Lane, 03822 unsigned &ImplicitSReg) { 03823 // If the DPR is defined or used already, the other SPR lane will be chained 03824 // correctly, so there is nothing to be done. 03825 if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) { 03826 ImplicitSReg = 0; 03827 return true; 03828 } 03829 03830 // Otherwise we need to go searching to see if the SPR is set explicitly. 03831 ImplicitSReg = TRI->getSubReg(DReg, 03832 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); 03833 MachineBasicBlock::LivenessQueryResult LQR = 03834 MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI); 03835 03836 if (LQR == MachineBasicBlock::LQR_Live) 03837 return true; 03838 else if (LQR == MachineBasicBlock::LQR_Unknown) 03839 return false; 03840 03841 // If the register is known not to be live, there is no need to add an 03842 // implicit-use. 03843 ImplicitSReg = 0; 03844 return true; 03845 } 03846 03847 void 03848 ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { 03849 unsigned DstReg, SrcReg, DReg; 03850 unsigned Lane; 03851 MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 03852 const TargetRegisterInfo *TRI = &getRegisterInfo(); 03853 switch (MI->getOpcode()) { 03854 default: 03855 llvm_unreachable("cannot handle opcode!"); 03856 break; 03857 case ARM::VMOVD: 03858 if (Domain != ExeNEON) 03859 break; 03860 03861 // Zap the predicate operands. 03862 assert(!isPredicated(MI) && "Cannot predicate a VORRd"); 03863 03864 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) 03865 DstReg = MI->getOperand(0).getReg(); 03866 SrcReg = MI->getOperand(1).getReg(); 03867 03868 for (unsigned i = MI->getDesc().getNumOperands(); i; --i) 03869 MI->RemoveOperand(i-1); 03870 03871 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) 03872 MI->setDesc(get(ARM::VORRd)); 03873 AddDefaultPred(MIB.addReg(DstReg, RegState::Define) 03874 .addReg(SrcReg) 03875 .addReg(SrcReg)); 03876 break; 03877 case ARM::VMOVRS: 03878 if (Domain != ExeNEON) 03879 break; 03880 assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); 03881 03882 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) 03883 DstReg = MI->getOperand(0).getReg(); 03884 SrcReg = MI->getOperand(1).getReg(); 03885 03886 for (unsigned i = MI->getDesc().getNumOperands(); i; --i) 03887 MI->RemoveOperand(i-1); 03888 03889 DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); 03890 03891 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) 03892 // Note that DSrc has been widened and the other lane may be undef, which 03893 // contaminates the entire register. 03894 MI->setDesc(get(ARM::VGETLNi32)); 03895 AddDefaultPred(MIB.addReg(DstReg, RegState::Define) 03896 .addReg(DReg, RegState::Undef) 03897 .addImm(Lane)); 03898 03899 // The old source should be an implicit use, otherwise we might think it 03900 // was dead before here. 03901 MIB.addReg(SrcReg, RegState::Implicit); 03902 break; 03903 case ARM::VMOVSR: { 03904 if (Domain != ExeNEON) 03905 break; 03906 assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); 03907 03908 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) 03909 DstReg = MI->getOperand(0).getReg(); 03910 SrcReg = MI->getOperand(1).getReg(); 03911 03912 DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); 03913 03914 unsigned ImplicitSReg; 03915 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) 03916 break; 03917 03918 for (unsigned i = MI->getDesc().getNumOperands(); i; --i) 03919 MI->RemoveOperand(i-1); 03920 03921 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) 03922 // Again DDst may be undefined at the beginning of this instruction. 03923 MI->setDesc(get(ARM::VSETLNi32)); 03924 MIB.addReg(DReg, RegState::Define) 03925 .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI))) 03926 .addReg(SrcReg) 03927 .addImm(Lane); 03928 AddDefaultPred(MIB); 03929 03930 // The narrower destination must be marked as set to keep previous chains 03931 // in place. 03932 MIB.addReg(DstReg, RegState::Define | RegState::Implicit); 03933 if (ImplicitSReg != 0) 03934 MIB.addReg(ImplicitSReg, RegState::Implicit); 03935 break; 03936 } 03937 case ARM::VMOVS: { 03938 if (Domain != ExeNEON) 03939 break; 03940 03941 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) 03942 DstReg = MI->getOperand(0).getReg(); 03943 SrcReg = MI->getOperand(1).getReg(); 03944 03945 unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; 03946 DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); 03947 DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); 03948 03949 unsigned ImplicitSReg; 03950 if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg)) 03951 break; 03952 03953 for (unsigned i = MI->getDesc().getNumOperands(); i; --i) 03954 MI->RemoveOperand(i-1); 03955 03956 if (DSrc == DDst) { 03957 // Destination can be: 03958 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) 03959 MI->setDesc(get(ARM::VDUPLN32d)); 03960 MIB.addReg(DDst, RegState::Define) 03961 .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI))) 03962 .addImm(SrcLane); 03963 AddDefaultPred(MIB); 03964 03965 // Neither the source or the destination are naturally represented any 03966 // more, so add them in manually. 03967 MIB.addReg(DstReg, RegState::Implicit | RegState::Define); 03968 MIB.addReg(SrcReg, RegState::Implicit); 03969 if (ImplicitSReg != 0) 03970 MIB.addReg(ImplicitSReg, RegState::Implicit); 03971 break; 03972 } 03973 03974 // In general there's no single instruction that can perform an S <-> S 03975 // move in NEON space, but a pair of VEXT instructions *can* do the 03976 // job. It turns out that the VEXTs needed will only use DSrc once, with 03977 // the position based purely on the combination of lane-0 and lane-1 03978 // involved. For example 03979 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 03980 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 03981 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 03982 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 03983 // 03984 // Pattern of the MachineInstrs is: 03985 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) 03986 MachineInstrBuilder NewMIB; 03987 NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 03988 get(ARM::VEXTd32), DDst); 03989 03990 // On the first instruction, both DSrc and DDst may be <undef> if present. 03991 // Specifically when the original instruction didn't have them as an 03992 // <imp-use>. 03993 unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; 03994 bool CurUndef = !MI->readsRegister(CurReg, TRI); 03995 NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); 03996 03997 CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; 03998 CurUndef = !MI->readsRegister(CurReg, TRI); 03999 NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); 04000 04001 NewMIB.addImm(1); 04002 AddDefaultPred(NewMIB); 04003 04004 if (SrcLane == DstLane) 04005 NewMIB.addReg(SrcReg, RegState::Implicit); 04006 04007 MI->setDesc(get(ARM::VEXTd32)); 04008 MIB.addReg(DDst, RegState::Define); 04009 04010 // On the second instruction, DDst has definitely been defined above, so 04011 // it is not <undef>. DSrc, if present, can be <undef> as above. 04012 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; 04013 CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); 04014 MIB.addReg(CurReg, getUndefRegState(CurUndef)); 04015 04016 CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; 04017 CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); 04018 MIB.addReg(CurReg, getUndefRegState(CurUndef)); 04019 04020 MIB.addImm(1); 04021 AddDefaultPred(MIB); 04022 04023 if (SrcLane != DstLane) 04024 MIB.addReg(SrcReg, RegState::Implicit); 04025 04026 // As before, the original destination is no longer represented, add it 04027 // implicitly. 04028 MIB.addReg(DstReg, RegState::Define | RegState::Implicit); 04029 if (ImplicitSReg != 0) 04030 MIB.addReg(ImplicitSReg, RegState::Implicit); 04031 break; 04032 } 04033 } 04034 04035 } 04036 04037 //===----------------------------------------------------------------------===// 04038 // Partial register updates 04039 //===----------------------------------------------------------------------===// 04040 // 04041 // Swift renames NEON registers with 64-bit granularity. That means any 04042 // instruction writing an S-reg implicitly reads the containing D-reg. The 04043 // problem is mostly avoided by translating f32 operations to v2f32 operations 04044 // on D-registers, but f32 loads are still a problem. 04045 // 04046 // These instructions can load an f32 into a NEON register: 04047 // 04048 // VLDRS - Only writes S, partial D update. 04049 // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. 04050 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. 04051 // 04052 // FCONSTD can be used as a dependency-breaking instruction. 04053 unsigned ARMBaseInstrInfo:: 04054 getPartialRegUpdateClearance(const MachineInstr *MI, 04055 unsigned OpNum, 04056 const TargetRegisterInfo *TRI) const { 04057 if (!SwiftPartialUpdateClearance || 04058 !(Subtarget.isSwift() || Subtarget.isCortexA15())) 04059 return 0; 04060 04061 assert(TRI && "Need TRI instance"); 04062 04063 const MachineOperand &MO = MI->getOperand(OpNum); 04064 if (MO.readsReg()) 04065 return 0; 04066 unsigned Reg = MO.getReg(); 04067 int UseOp = -1; 04068 04069 switch(MI->getOpcode()) { 04070 // Normal instructions writing only an S-register. 04071 case ARM::VLDRS: 04072 case ARM::FCONSTS: 04073 case ARM::VMOVSR: 04074 case ARM::VMOVv8i8: 04075 case ARM::VMOVv4i16: 04076 case ARM::VMOVv2i32: 04077 case ARM::VMOVv2f32: 04078 case ARM::VMOVv1i64: 04079 UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI); 04080 break; 04081 04082 // Explicitly reads the dependency. 04083 case ARM::VLD1LNd32: 04084 UseOp = 3; 04085 break; 04086 default: 04087 return 0; 04088 } 04089 04090 // If this instruction actually reads a value from Reg, there is no unwanted 04091 // dependency. 04092 if (UseOp != -1 && MI->getOperand(UseOp).readsReg()) 04093 return 0; 04094 04095 // We must be able to clobber the whole D-reg. 04096 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 04097 // Virtual register must be a foo:ssub_0<def,undef> operand. 04098 if (!MO.getSubReg() || MI->readsVirtualRegister(Reg)) 04099 return 0; 04100 } else if (ARM::SPRRegClass.contains(Reg)) { 04101 // Physical register: MI must define the full D-reg. 04102 unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, 04103 &ARM::DPRRegClass); 04104 if (!DReg || !MI->definesRegister(DReg, TRI)) 04105 return 0; 04106 } 04107 04108 // MI has an unwanted D-register dependency. 04109 // Avoid defs in the previous N instructrions. 04110 return SwiftPartialUpdateClearance; 04111 } 04112 04113 // Break a partial register dependency after getPartialRegUpdateClearance 04114 // returned non-zero. 04115 void ARMBaseInstrInfo:: 04116 breakPartialRegDependency(MachineBasicBlock::iterator MI, 04117 unsigned OpNum, 04118 const TargetRegisterInfo *TRI) const { 04119 assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def"); 04120 assert(TRI && "Need TRI instance"); 04121 04122 const MachineOperand &MO = MI->getOperand(OpNum); 04123 unsigned Reg = MO.getReg(); 04124 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 04125 "Can't break virtual register dependencies."); 04126 unsigned DReg = Reg; 04127 04128 // If MI defines an S-reg, find the corresponding D super-register. 04129 if (ARM::SPRRegClass.contains(Reg)) { 04130 DReg = ARM::D0 + (Reg - ARM::S0) / 2; 04131 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken"); 04132 } 04133 04134 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps"); 04135 assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); 04136 04137 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines 04138 // the full D-register by loading the same value to both lanes. The 04139 // instruction is micro-coded with 2 uops, so don't do this until we can 04140 // properly schedule micro-coded instuctions. The dispatcher stalls cause 04141 // too big regressions. 04142 04143 // Insert the dependency-breaking FCONSTD before MI. 04144 // 96 is the encoding of 0.5, but the actual value doesn't matter here. 04145 AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 04146 get(ARM::FCONSTD), DReg).addImm(96)); 04147 MI->addRegisterKilled(DReg, TRI, true); 04148 } 04149 04150 bool ARMBaseInstrInfo::hasNOP() const { 04151 return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; 04152 } 04153 04154 bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { 04155 unsigned ShOpVal = MI->getOperand(3).getImm(); 04156 unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal); 04157 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. 04158 if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) || 04159 ((ShImm == 1 || ShImm == 2) && 04160 ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl)) 04161 return true; 04162 04163 return false; 04164 }