| File: | build/source/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp |
| Warning: | line 6548, column 65 The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==// | |||
| 2 | // | |||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | |||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
| 6 | // | |||
| 7 | //===----------------------------------------------------------------------===// | |||
| 8 | /// \file | |||
| 9 | /// This file implements the targeting of the InstructionSelector class for | |||
| 10 | /// AArch64. | |||
| 11 | /// \todo This should be generated by TableGen. | |||
| 12 | //===----------------------------------------------------------------------===// | |||
| 13 | ||||
| 14 | #include "AArch64GlobalISelUtils.h" | |||
| 15 | #include "AArch64InstrInfo.h" | |||
| 16 | #include "AArch64MachineFunctionInfo.h" | |||
| 17 | #include "AArch64RegisterBankInfo.h" | |||
| 18 | #include "AArch64RegisterInfo.h" | |||
| 19 | #include "AArch64Subtarget.h" | |||
| 20 | #include "AArch64TargetMachine.h" | |||
| 21 | #include "MCTargetDesc/AArch64AddressingModes.h" | |||
| 22 | #include "MCTargetDesc/AArch64MCTargetDesc.h" | |||
| 23 | #include "llvm/BinaryFormat/Dwarf.h" | |||
| 24 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" | |||
| 25 | #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" | |||
| 26 | #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" | |||
| 27 | #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" | |||
| 28 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" | |||
| 29 | #include "llvm/CodeGen/GlobalISel/Utils.h" | |||
| 30 | #include "llvm/CodeGen/MachineBasicBlock.h" | |||
| 31 | #include "llvm/CodeGen/MachineConstantPool.h" | |||
| 32 | #include "llvm/CodeGen/MachineFrameInfo.h" | |||
| 33 | #include "llvm/CodeGen/MachineFunction.h" | |||
| 34 | #include "llvm/CodeGen/MachineInstr.h" | |||
| 35 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
| 36 | #include "llvm/CodeGen/MachineMemOperand.h" | |||
| 37 | #include "llvm/CodeGen/MachineOperand.h" | |||
| 38 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
| 39 | #include "llvm/CodeGen/TargetOpcodes.h" | |||
| 40 | #include "llvm/IR/Constants.h" | |||
| 41 | #include "llvm/IR/DerivedTypes.h" | |||
| 42 | #include "llvm/IR/Instructions.h" | |||
| 43 | #include "llvm/IR/IntrinsicsAArch64.h" | |||
| 44 | #include "llvm/IR/PatternMatch.h" | |||
| 45 | #include "llvm/IR/Type.h" | |||
| 46 | #include "llvm/Pass.h" | |||
| 47 | #include "llvm/Support/Debug.h" | |||
| 48 | #include "llvm/Support/raw_ostream.h" | |||
| 49 | #include <optional> | |||
| 50 | ||||
| 51 | #define DEBUG_TYPE"aarch64-isel" "aarch64-isel" | |||
| 52 | ||||
| 53 | using namespace llvm; | |||
| 54 | using namespace MIPatternMatch; | |||
| 55 | using namespace AArch64GISelUtils; | |||
| 56 | ||||
| 57 | namespace llvm { | |||
| 58 | class BlockFrequencyInfo; | |||
| 59 | class ProfileSummaryInfo; | |||
| 60 | } | |||
| 61 | ||||
| 62 | namespace { | |||
| 63 | ||||
| 64 | #define GET_GLOBALISEL_PREDICATE_BITSET | |||
| 65 | #include "AArch64GenGlobalISel.inc" | |||
| 66 | #undef GET_GLOBALISEL_PREDICATE_BITSET | |||
| 67 | ||||
| 68 | ||||
| 69 | class AArch64InstructionSelector : public InstructionSelector { | |||
| 70 | public: | |||
| 71 | AArch64InstructionSelector(const AArch64TargetMachine &TM, | |||
| 72 | const AArch64Subtarget &STI, | |||
| 73 | const AArch64RegisterBankInfo &RBI); | |||
| 74 | ||||
| 75 | bool select(MachineInstr &I) override; | |||
| 76 | static const char *getName() { return DEBUG_TYPE"aarch64-isel"; } | |||
| 77 | ||||
| 78 | void setupMF(MachineFunction &MF, GISelKnownBits *KB, | |||
| 79 | CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI, | |||
| 80 | BlockFrequencyInfo *BFI) override { | |||
| 81 | InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI); | |||
| 82 | MIB.setMF(MF); | |||
| 83 | ||||
| 84 | // hasFnAttribute() is expensive to call on every BRCOND selection, so | |||
| 85 | // cache it here for each run of the selector. | |||
| 86 | ProduceNonFlagSettingCondBr = | |||
| 87 | !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); | |||
| 88 | MFReturnAddr = Register(); | |||
| 89 | ||||
| 90 | processPHIs(MF); | |||
| 91 | } | |||
| 92 | ||||
| 93 | private: | |||
| 94 | /// tblgen-erated 'select' implementation, used as the initial selector for | |||
| 95 | /// the patterns that don't require complex C++. | |||
| 96 | bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; | |||
| 97 | ||||
| 98 | // A lowering phase that runs before any selection attempts. | |||
| 99 | // Returns true if the instruction was modified. | |||
| 100 | bool preISelLower(MachineInstr &I); | |||
| 101 | ||||
| 102 | // An early selection function that runs before the selectImpl() call. | |||
| 103 | bool earlySelect(MachineInstr &I); | |||
| 104 | ||||
| 105 | // Do some preprocessing of G_PHIs before we begin selection. | |||
| 106 | void processPHIs(MachineFunction &MF); | |||
| 107 | ||||
| 108 | bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 109 | ||||
| 110 | /// Eliminate same-sized cross-bank copies into stores before selectImpl(). | |||
| 111 | bool contractCrossBankCopyIntoStore(MachineInstr &I, | |||
| 112 | MachineRegisterInfo &MRI); | |||
| 113 | ||||
| 114 | bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 115 | ||||
| 116 | bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, | |||
| 117 | MachineRegisterInfo &MRI) const; | |||
| 118 | bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, | |||
| 119 | MachineRegisterInfo &MRI) const; | |||
| 120 | ||||
| 121 | ///@{ | |||
| 122 | /// Helper functions for selectCompareBranch. | |||
| 123 | bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp, | |||
| 124 | MachineIRBuilder &MIB) const; | |||
| 125 | bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, | |||
| 126 | MachineIRBuilder &MIB) const; | |||
| 127 | bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, | |||
| 128 | MachineIRBuilder &MIB) const; | |||
| 129 | bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert, | |||
| 130 | MachineBasicBlock *DstMBB, | |||
| 131 | MachineIRBuilder &MIB) const; | |||
| 132 | ///@} | |||
| 133 | ||||
| 134 | bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, | |||
| 135 | MachineRegisterInfo &MRI); | |||
| 136 | ||||
| 137 | bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 138 | bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 139 | ||||
| 140 | // Helper to generate an equivalent of scalar_to_vector into a new register, | |||
| 141 | // returned via 'Dst'. | |||
| 142 | MachineInstr *emitScalarToVector(unsigned EltSize, | |||
| 143 | const TargetRegisterClass *DstRC, | |||
| 144 | Register Scalar, | |||
| 145 | MachineIRBuilder &MIRBuilder) const; | |||
| 146 | ||||
| 147 | /// Emit a lane insert into \p DstReg, or a new vector register if | |||
| 148 | /// std::nullopt is provided. | |||
| 149 | /// | |||
| 150 | /// The lane inserted into is defined by \p LaneIdx. The vector source | |||
| 151 | /// register is given by \p SrcReg. The register containing the element is | |||
| 152 | /// given by \p EltReg. | |||
| 153 | MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg, | |||
| 154 | Register EltReg, unsigned LaneIdx, | |||
| 155 | const RegisterBank &RB, | |||
| 156 | MachineIRBuilder &MIRBuilder) const; | |||
| 157 | ||||
| 158 | /// Emit a sequence of instructions representing a constant \p CV for a | |||
| 159 | /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.) | |||
| 160 | /// | |||
| 161 | /// \returns the last instruction in the sequence on success, and nullptr | |||
| 162 | /// otherwise. | |||
| 163 | MachineInstr *emitConstantVector(Register Dst, Constant *CV, | |||
| 164 | MachineIRBuilder &MIRBuilder, | |||
| 165 | MachineRegisterInfo &MRI); | |||
| 166 | ||||
| 167 | bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 168 | bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, | |||
| 169 | MachineRegisterInfo &MRI); | |||
| 170 | /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a | |||
| 171 | /// SUBREG_TO_REG. | |||
| 172 | bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI); | |||
| 173 | bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 174 | bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 175 | bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 176 | ||||
| 177 | bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 178 | bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 179 | bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 180 | bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 181 | ||||
| 182 | /// Helper function to select vector load intrinsics like | |||
| 183 | /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc. | |||
| 184 | /// \p Opc is the opcode that the selected instruction should use. | |||
| 185 | /// \p NumVecs is the number of vector destinations for the instruction. | |||
| 186 | /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction. | |||
| 187 | bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs, | |||
| 188 | MachineInstr &I); | |||
| 189 | bool selectIntrinsicWithSideEffects(MachineInstr &I, | |||
| 190 | MachineRegisterInfo &MRI); | |||
| 191 | bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 192 | bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 193 | bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const; | |||
| 194 | bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const; | |||
| 195 | bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 196 | bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 197 | bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 198 | bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 199 | bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 200 | bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI); | |||
| 201 | ||||
| 202 | unsigned emitConstantPoolEntry(const Constant *CPVal, | |||
| 203 | MachineFunction &MF) const; | |||
| 204 | MachineInstr *emitLoadFromConstantPool(const Constant *CPVal, | |||
| 205 | MachineIRBuilder &MIRBuilder) const; | |||
| 206 | ||||
| 207 | // Emit a vector concat operation. | |||
| 208 | MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1, | |||
| 209 | Register Op2, | |||
| 210 | MachineIRBuilder &MIRBuilder) const; | |||
| 211 | ||||
| 212 | // Emit an integer compare between LHS and RHS, which checks for Predicate. | |||
| 213 | MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, | |||
| 214 | MachineOperand &Predicate, | |||
| 215 | MachineIRBuilder &MIRBuilder) const; | |||
| 216 | ||||
| 217 | /// Emit a floating point comparison between \p LHS and \p RHS. | |||
| 218 | /// \p Pred if given is the intended predicate to use. | |||
| 219 | MachineInstr * | |||
| 220 | emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder, | |||
| 221 | std::optional<CmpInst::Predicate> = std::nullopt) const; | |||
| 222 | ||||
| 223 | MachineInstr * | |||
| 224 | emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, | |||
| 225 | std::initializer_list<llvm::SrcOp> SrcOps, | |||
| 226 | MachineIRBuilder &MIRBuilder, | |||
| 227 | const ComplexRendererFns &RenderFns = std::nullopt) const; | |||
| 228 | /// Helper function to emit an add or sub instruction. | |||
| 229 | /// | |||
| 230 | /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above | |||
| 231 | /// in a specific order. | |||
| 232 | /// | |||
| 233 | /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode. | |||
| 234 | /// | |||
| 235 | /// \code | |||
| 236 | /// const std::array<std::array<unsigned, 2>, 4> Table { | |||
| 237 | /// {{AArch64::ADDXri, AArch64::ADDWri}, | |||
| 238 | /// {AArch64::ADDXrs, AArch64::ADDWrs}, | |||
| 239 | /// {AArch64::ADDXrr, AArch64::ADDWrr}, | |||
| 240 | /// {AArch64::SUBXri, AArch64::SUBWri}, | |||
| 241 | /// {AArch64::ADDXrx, AArch64::ADDWrx}}}; | |||
| 242 | /// \endcode | |||
| 243 | /// | |||
| 244 | /// Each row in the table corresponds to a different addressing mode. Each | |||
| 245 | /// column corresponds to a different register size. | |||
| 246 | /// | |||
| 247 | /// \attention Rows must be structured as follows: | |||
| 248 | /// - Row 0: The ri opcode variants | |||
| 249 | /// - Row 1: The rs opcode variants | |||
| 250 | /// - Row 2: The rr opcode variants | |||
| 251 | /// - Row 3: The ri opcode variants for negative immediates | |||
| 252 | /// - Row 4: The rx opcode variants | |||
| 253 | /// | |||
| 254 | /// \attention Columns must be structured as follows: | |||
| 255 | /// - Column 0: The 64-bit opcode variants | |||
| 256 | /// - Column 1: The 32-bit opcode variants | |||
| 257 | /// | |||
| 258 | /// \p Dst is the destination register of the binop to emit. | |||
| 259 | /// \p LHS is the left-hand operand of the binop to emit. | |||
| 260 | /// \p RHS is the right-hand operand of the binop to emit. | |||
| 261 | MachineInstr *emitAddSub( | |||
| 262 | const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, | |||
| 263 | Register Dst, MachineOperand &LHS, MachineOperand &RHS, | |||
| 264 | MachineIRBuilder &MIRBuilder) const; | |||
| 265 | MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, | |||
| 266 | MachineOperand &RHS, | |||
| 267 | MachineIRBuilder &MIRBuilder) const; | |||
| 268 | MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, | |||
| 269 | MachineIRBuilder &MIRBuilder) const; | |||
| 270 | MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, | |||
| 271 | MachineIRBuilder &MIRBuilder) const; | |||
| 272 | MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, | |||
| 273 | MachineIRBuilder &MIRBuilder) const; | |||
| 274 | MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, | |||
| 275 | MachineIRBuilder &MIRBuilder) const; | |||
| 276 | MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS, | |||
| 277 | AArch64CC::CondCode CC, | |||
| 278 | MachineIRBuilder &MIRBuilder) const; | |||
| 279 | MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg, | |||
| 280 | const RegisterBank &DstRB, LLT ScalarTy, | |||
| 281 | Register VecReg, unsigned LaneIdx, | |||
| 282 | MachineIRBuilder &MIRBuilder) const; | |||
| 283 | MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2, | |||
| 284 | AArch64CC::CondCode Pred, | |||
| 285 | MachineIRBuilder &MIRBuilder) const; | |||
| 286 | /// Emit a CSet for a FP compare. | |||
| 287 | /// | |||
| 288 | /// \p Dst is expected to be a 32-bit scalar register. | |||
| 289 | MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, | |||
| 290 | MachineIRBuilder &MIRBuilder) const; | |||
| 291 | ||||
| 292 | /// Emit the overflow op for \p Opcode. | |||
| 293 | /// | |||
| 294 | /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, | |||
| 295 | /// G_USUBO, etc. | |||
| 296 | std::pair<MachineInstr *, AArch64CC::CondCode> | |||
| 297 | emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, | |||
| 298 | MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; | |||
| 299 | ||||
| 300 | /// Emit expression as a conjunction (a series of CCMP/CFCMP ops). | |||
| 301 | /// In some cases this is even possible with OR operations in the expression. | |||
| 302 | MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC, | |||
| 303 | MachineIRBuilder &MIB) const; | |||
| 304 | MachineInstr *emitConditionalComparison(Register LHS, Register RHS, | |||
| 305 | CmpInst::Predicate CC, | |||
| 306 | AArch64CC::CondCode Predicate, | |||
| 307 | AArch64CC::CondCode OutCC, | |||
| 308 | MachineIRBuilder &MIB) const; | |||
| 309 | MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC, | |||
| 310 | bool Negate, Register CCOp, | |||
| 311 | AArch64CC::CondCode Predicate, | |||
| 312 | MachineIRBuilder &MIB) const; | |||
| 313 | ||||
| 314 | /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. | |||
| 315 | /// \p IsNegative is true if the test should be "not zero". | |||
| 316 | /// This will also optimize the test bit instruction when possible. | |||
| 317 | MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative, | |||
| 318 | MachineBasicBlock *DstMBB, | |||
| 319 | MachineIRBuilder &MIB) const; | |||
| 320 | ||||
| 321 | /// Emit a CB(N)Z instruction which branches to \p DestMBB. | |||
| 322 | MachineInstr *emitCBZ(Register CompareReg, bool IsNegative, | |||
| 323 | MachineBasicBlock *DestMBB, | |||
| 324 | MachineIRBuilder &MIB) const; | |||
| 325 | ||||
| 326 | // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. | |||
| 327 | // We use these manually instead of using the importer since it doesn't | |||
| 328 | // support SDNodeXForm. | |||
| 329 | ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const; | |||
| 330 | ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const; | |||
| 331 | ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; | |||
| 332 | ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; | |||
| 333 | ||||
| 334 | ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; | |||
| 335 | ComplexRendererFns selectArithImmed(MachineOperand &Root) const; | |||
| 336 | ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; | |||
| 337 | ||||
| 338 | ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, | |||
| 339 | unsigned Size) const; | |||
| 340 | ||||
| 341 | ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { | |||
| 342 | return selectAddrModeUnscaled(Root, 1); | |||
| 343 | } | |||
| 344 | ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { | |||
| 345 | return selectAddrModeUnscaled(Root, 2); | |||
| 346 | } | |||
| 347 | ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { | |||
| 348 | return selectAddrModeUnscaled(Root, 4); | |||
| 349 | } | |||
| 350 | ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { | |||
| 351 | return selectAddrModeUnscaled(Root, 8); | |||
| 352 | } | |||
| 353 | ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { | |||
| 354 | return selectAddrModeUnscaled(Root, 16); | |||
| 355 | } | |||
| 356 | ||||
| 357 | /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used | |||
| 358 | /// from complex pattern matchers like selectAddrModeIndexed(). | |||
| 359 | ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size, | |||
| 360 | MachineRegisterInfo &MRI) const; | |||
| 361 | ||||
| 362 | ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, | |||
| 363 | unsigned Size) const; | |||
| 364 | template <int Width> | |||
| 365 | ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { | |||
| 366 | return selectAddrModeIndexed(Root, Width / 8); | |||
| 367 | } | |||
| 368 | ||||
| 369 | bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, | |||
| 370 | const MachineRegisterInfo &MRI) const; | |||
| 371 | ComplexRendererFns | |||
| 372 | selectAddrModeShiftedExtendXReg(MachineOperand &Root, | |||
| 373 | unsigned SizeInBytes) const; | |||
| 374 | ||||
| 375 | /// Returns a \p ComplexRendererFns which contains a base, offset, and whether | |||
| 376 | /// or not a shift + extend should be folded into an addressing mode. Returns | |||
| 377 | /// None when this is not profitable or possible. | |||
| 378 | ComplexRendererFns | |||
| 379 | selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, | |||
| 380 | MachineOperand &Offset, unsigned SizeInBytes, | |||
| 381 | bool WantsExt) const; | |||
| 382 | ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; | |||
| 383 | ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, | |||
| 384 | unsigned SizeInBytes) const; | |||
| 385 | template <int Width> | |||
| 386 | ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { | |||
| 387 | return selectAddrModeXRO(Root, Width / 8); | |||
| 388 | } | |||
| 389 | ||||
| 390 | ComplexRendererFns selectAddrModeWRO(MachineOperand &Root, | |||
| 391 | unsigned SizeInBytes) const; | |||
| 392 | template <int Width> | |||
| 393 | ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const { | |||
| 394 | return selectAddrModeWRO(Root, Width / 8); | |||
| 395 | } | |||
| 396 | ||||
| 397 | ComplexRendererFns selectShiftedRegister(MachineOperand &Root, | |||
| 398 | bool AllowROR = false) const; | |||
| 399 | ||||
| 400 | ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { | |||
| 401 | return selectShiftedRegister(Root); | |||
| 402 | } | |||
| 403 | ||||
| 404 | ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { | |||
| 405 | return selectShiftedRegister(Root, true); | |||
| 406 | } | |||
| 407 | ||||
| 408 | /// Given an extend instruction, determine the correct shift-extend type for | |||
| 409 | /// that instruction. | |||
| 410 | /// | |||
| 411 | /// If the instruction is going to be used in a load or store, pass | |||
| 412 | /// \p IsLoadStore = true. | |||
| 413 | AArch64_AM::ShiftExtendType | |||
| 414 | getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, | |||
| 415 | bool IsLoadStore = false) const; | |||
| 416 | ||||
| 417 | /// Move \p Reg to \p RC if \p Reg is not already on \p RC. | |||
| 418 | /// | |||
| 419 | /// \returns Either \p Reg if no change was necessary, or the new register | |||
| 420 | /// created by moving \p Reg. | |||
| 421 | /// | |||
| 422 | /// Note: This uses emitCopy right now. | |||
| 423 | Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC, | |||
| 424 | MachineIRBuilder &MIB) const; | |||
| 425 | ||||
| 426 | ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; | |||
| 427 | ||||
| 428 | void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, | |||
| 429 | int OpIdx = -1) const; | |||
| 430 | void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I, | |||
| 431 | int OpIdx = -1) const; | |||
| 432 | void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I, | |||
| 433 | int OpIdx = -1) const; | |||
| 434 | void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI, | |||
| 435 | int OpIdx = -1) const; | |||
| 436 | void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, | |||
| 437 | int OpIdx = -1) const; | |||
| 438 | void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI, | |||
| 439 | int OpIdx = -1) const; | |||
| 440 | void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB, | |||
| 441 | const MachineInstr &MI, | |||
| 442 | int OpIdx = -1) const; | |||
| 443 | ||||
| 444 | // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. | |||
| 445 | void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags); | |||
| 446 | ||||
| 447 | // Optimization methods. | |||
| 448 | bool tryOptSelect(GSelect &Sel); | |||
| 449 | bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI); | |||
| 450 | MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, | |||
| 451 | MachineOperand &Predicate, | |||
| 452 | MachineIRBuilder &MIRBuilder) const; | |||
| 453 | ||||
| 454 | /// Return true if \p MI is a load or store of \p NumBytes bytes. | |||
| 455 | bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; | |||
| 456 | ||||
| 457 | /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit | |||
| 458 | /// register zeroed out. In other words, the result of MI has been explicitly | |||
| 459 | /// zero extended. | |||
| 460 | bool isDef32(const MachineInstr &MI) const; | |||
| 461 | ||||
| 462 | const AArch64TargetMachine &TM; | |||
| 463 | const AArch64Subtarget &STI; | |||
| 464 | const AArch64InstrInfo &TII; | |||
| 465 | const AArch64RegisterInfo &TRI; | |||
| 466 | const AArch64RegisterBankInfo &RBI; | |||
| 467 | ||||
| 468 | bool ProduceNonFlagSettingCondBr = false; | |||
| 469 | ||||
| 470 | // Some cached values used during selection. | |||
| 471 | // We use LR as a live-in register, and we keep track of it here as it can be | |||
| 472 | // clobbered by calls. | |||
| 473 | Register MFReturnAddr; | |||
| 474 | ||||
| 475 | MachineIRBuilder MIB; | |||
| 476 | ||||
| 477 | #define GET_GLOBALISEL_PREDICATES_DECL | |||
| 478 | #include "AArch64GenGlobalISel.inc" | |||
| 479 | #undef GET_GLOBALISEL_PREDICATES_DECL | |||
| 480 | ||||
| 481 | // We declare the temporaries used by selectImpl() in the class to minimize the | |||
| 482 | // cost of constructing placeholder values. | |||
| 483 | #define GET_GLOBALISEL_TEMPORARIES_DECL | |||
| 484 | #include "AArch64GenGlobalISel.inc" | |||
| 485 | #undef GET_GLOBALISEL_TEMPORARIES_DECL | |||
| 486 | }; | |||
| 487 | ||||
| 488 | } // end anonymous namespace | |||
| 489 | ||||
| 490 | #define GET_GLOBALISEL_IMPL | |||
| 491 | #include "AArch64GenGlobalISel.inc" | |||
| 492 | #undef GET_GLOBALISEL_IMPL | |||
| 493 | ||||
| 494 | AArch64InstructionSelector::AArch64InstructionSelector( | |||
| 495 | const AArch64TargetMachine &TM, const AArch64Subtarget &STI, | |||
| 496 | const AArch64RegisterBankInfo &RBI) | |||
| 497 | : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), | |||
| 498 | RBI(RBI), | |||
| 499 | #define GET_GLOBALISEL_PREDICATES_INIT | |||
| 500 | #include "AArch64GenGlobalISel.inc" | |||
| 501 | #undef GET_GLOBALISEL_PREDICATES_INIT | |||
| 502 | #define GET_GLOBALISEL_TEMPORARIES_INIT | |||
| 503 | #include "AArch64GenGlobalISel.inc" | |||
| 504 | #undef GET_GLOBALISEL_TEMPORARIES_INIT | |||
| 505 | { | |||
| 506 | } | |||
| 507 | ||||
| 508 | // FIXME: This should be target-independent, inferred from the types declared | |||
| 509 | // for each class in the bank. | |||
| 510 | // | |||
| 511 | /// Given a register bank, and a type, return the smallest register class that | |||
| 512 | /// can represent that combination. | |||
| 513 | static const TargetRegisterClass * | |||
| 514 | getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, | |||
| 515 | bool GetAllRegSet = false) { | |||
| 516 | if (RB.getID() == AArch64::GPRRegBankID) { | |||
| 517 | if (Ty.getSizeInBits() <= 32) | |||
| 518 | return GetAllRegSet ? &AArch64::GPR32allRegClass | |||
| 519 | : &AArch64::GPR32RegClass; | |||
| 520 | if (Ty.getSizeInBits() == 64) | |||
| 521 | return GetAllRegSet ? &AArch64::GPR64allRegClass | |||
| 522 | : &AArch64::GPR64RegClass; | |||
| 523 | if (Ty.getSizeInBits() == 128) | |||
| 524 | return &AArch64::XSeqPairsClassRegClass; | |||
| 525 | return nullptr; | |||
| 526 | } | |||
| 527 | ||||
| 528 | if (RB.getID() == AArch64::FPRRegBankID) { | |||
| 529 | switch (Ty.getSizeInBits()) { | |||
| 530 | case 8: | |||
| 531 | return &AArch64::FPR8RegClass; | |||
| 532 | case 16: | |||
| 533 | return &AArch64::FPR16RegClass; | |||
| 534 | case 32: | |||
| 535 | return &AArch64::FPR32RegClass; | |||
| 536 | case 64: | |||
| 537 | return &AArch64::FPR64RegClass; | |||
| 538 | case 128: | |||
| 539 | return &AArch64::FPR128RegClass; | |||
| 540 | } | |||
| 541 | return nullptr; | |||
| 542 | } | |||
| 543 | ||||
| 544 | return nullptr; | |||
| 545 | } | |||
| 546 | ||||
| 547 | /// Given a register bank, and size in bits, return the smallest register class | |||
| 548 | /// that can represent that combination. | |||
| 549 | static const TargetRegisterClass * | |||
| 550 | getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, | |||
| 551 | bool GetAllRegSet = false) { | |||
| 552 | unsigned RegBankID = RB.getID(); | |||
| 553 | ||||
| 554 | if (RegBankID == AArch64::GPRRegBankID) { | |||
| 555 | if (SizeInBits <= 32) | |||
| 556 | return GetAllRegSet ? &AArch64::GPR32allRegClass | |||
| 557 | : &AArch64::GPR32RegClass; | |||
| 558 | if (SizeInBits == 64) | |||
| 559 | return GetAllRegSet ? &AArch64::GPR64allRegClass | |||
| 560 | : &AArch64::GPR64RegClass; | |||
| 561 | if (SizeInBits == 128) | |||
| 562 | return &AArch64::XSeqPairsClassRegClass; | |||
| 563 | } | |||
| 564 | ||||
| 565 | if (RegBankID == AArch64::FPRRegBankID) { | |||
| 566 | switch (SizeInBits) { | |||
| 567 | default: | |||
| 568 | return nullptr; | |||
| 569 | case 8: | |||
| 570 | return &AArch64::FPR8RegClass; | |||
| 571 | case 16: | |||
| 572 | return &AArch64::FPR16RegClass; | |||
| 573 | case 32: | |||
| 574 | return &AArch64::FPR32RegClass; | |||
| 575 | case 64: | |||
| 576 | return &AArch64::FPR64RegClass; | |||
| 577 | case 128: | |||
| 578 | return &AArch64::FPR128RegClass; | |||
| 579 | } | |||
| 580 | } | |||
| 581 | ||||
| 582 | return nullptr; | |||
| 583 | } | |||
| 584 | ||||
| 585 | /// Returns the correct subregister to use for a given register class. | |||
| 586 | static bool getSubRegForClass(const TargetRegisterClass *RC, | |||
| 587 | const TargetRegisterInfo &TRI, unsigned &SubReg) { | |||
| 588 | switch (TRI.getRegSizeInBits(*RC)) { | |||
| 589 | case 8: | |||
| 590 | SubReg = AArch64::bsub; | |||
| 591 | break; | |||
| 592 | case 16: | |||
| 593 | SubReg = AArch64::hsub; | |||
| 594 | break; | |||
| 595 | case 32: | |||
| 596 | if (RC != &AArch64::FPR32RegClass) | |||
| 597 | SubReg = AArch64::sub_32; | |||
| 598 | else | |||
| 599 | SubReg = AArch64::ssub; | |||
| 600 | break; | |||
| 601 | case 64: | |||
| 602 | SubReg = AArch64::dsub; | |||
| 603 | break; | |||
| 604 | default: | |||
| 605 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class." ; } } while (false) | |||
| 606 | dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class." ; } } while (false); | |||
| 607 | return false; | |||
| 608 | } | |||
| 609 | ||||
| 610 | return true; | |||
| 611 | } | |||
| 612 | ||||
| 613 | /// Returns the minimum size the given register bank can hold. | |||
| 614 | static unsigned getMinSizeForRegBank(const RegisterBank &RB) { | |||
| 615 | switch (RB.getID()) { | |||
| 616 | case AArch64::GPRRegBankID: | |||
| 617 | return 32; | |||
| 618 | case AArch64::FPRRegBankID: | |||
| 619 | return 8; | |||
| 620 | default: | |||
| 621 | llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank." , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 621); | |||
| 622 | } | |||
| 623 | } | |||
| 624 | ||||
| 625 | /// Create a REG_SEQUENCE instruction using the registers in \p Regs. | |||
| 626 | /// Helper function for functions like createDTuple and createQTuple. | |||
| 627 | /// | |||
| 628 | /// \p RegClassIDs - The list of register class IDs available for some tuple of | |||
| 629 | /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is | |||
| 630 | /// expected to contain between 2 and 4 tuple classes. | |||
| 631 | /// | |||
| 632 | /// \p SubRegs - The list of subregister classes associated with each register | |||
| 633 | /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0 | |||
| 634 | /// subregister class. The index of each subregister class is expected to | |||
| 635 | /// correspond with the index of each register class. | |||
| 636 | /// | |||
| 637 | /// \returns Either the destination register of REG_SEQUENCE instruction that | |||
| 638 | /// was created, or the 0th element of \p Regs if \p Regs contains a single | |||
| 639 | /// element. | |||
| 640 | static Register createTuple(ArrayRef<Register> Regs, | |||
| 641 | const unsigned RegClassIDs[], | |||
| 642 | const unsigned SubRegs[], MachineIRBuilder &MIB) { | |||
| 643 | unsigned NumRegs = Regs.size(); | |||
| 644 | if (NumRegs == 1) | |||
| 645 | return Regs[0]; | |||
| 646 | assert(NumRegs >= 2 && NumRegs <= 4 &&(static_cast <bool> (NumRegs >= 2 && NumRegs <= 4 && "Only support between two and 4 registers in a tuple!" ) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 647, __extension__ __PRETTY_FUNCTION__)) | |||
| 647 | "Only support between two and 4 registers in a tuple!")(static_cast <bool> (NumRegs >= 2 && NumRegs <= 4 && "Only support between two and 4 registers in a tuple!" ) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 647, __extension__ __PRETTY_FUNCTION__)); | |||
| 648 | const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo(); | |||
| 649 | auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]); | |||
| 650 | auto RegSequence = | |||
| 651 | MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {}); | |||
| 652 | for (unsigned I = 0, E = Regs.size(); I < E; ++I) { | |||
| 653 | RegSequence.addUse(Regs[I]); | |||
| 654 | RegSequence.addImm(SubRegs[I]); | |||
| 655 | } | |||
| 656 | return RegSequence.getReg(0); | |||
| 657 | } | |||
| 658 | ||||
| 659 | /// Create a tuple of D-registers using the registers in \p Regs. | |||
| 660 | static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { | |||
| 661 | static const unsigned RegClassIDs[] = { | |||
| 662 | AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; | |||
| 663 | static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, | |||
| 664 | AArch64::dsub2, AArch64::dsub3}; | |||
| 665 | return createTuple(Regs, RegClassIDs, SubRegs, MIB); | |||
| 666 | } | |||
| 667 | ||||
| 668 | /// Create a tuple of Q-registers using the registers in \p Regs. | |||
| 669 | static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { | |||
| 670 | static const unsigned RegClassIDs[] = { | |||
| 671 | AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; | |||
| 672 | static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, | |||
| 673 | AArch64::qsub2, AArch64::qsub3}; | |||
| 674 | return createTuple(Regs, RegClassIDs, SubRegs, MIB); | |||
| 675 | } | |||
| 676 | ||||
| 677 | static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { | |||
| 678 | auto &MI = *Root.getParent(); | |||
| 679 | auto &MBB = *MI.getParent(); | |||
| 680 | auto &MF = *MBB.getParent(); | |||
| 681 | auto &MRI = MF.getRegInfo(); | |||
| 682 | uint64_t Immed; | |||
| 683 | if (Root.isImm()) | |||
| 684 | Immed = Root.getImm(); | |||
| 685 | else if (Root.isCImm()) | |||
| 686 | Immed = Root.getCImm()->getZExtValue(); | |||
| 687 | else if (Root.isReg()) { | |||
| 688 | auto ValAndVReg = | |||
| 689 | getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true); | |||
| 690 | if (!ValAndVReg) | |||
| 691 | return std::nullopt; | |||
| 692 | Immed = ValAndVReg->Value.getSExtValue(); | |||
| 693 | } else | |||
| 694 | return std::nullopt; | |||
| 695 | return Immed; | |||
| 696 | } | |||
| 697 | ||||
| 698 | /// Check whether \p I is a currently unsupported binary operation: | |||
| 699 | /// - it has an unsized type | |||
| 700 | /// - an operand is not a vreg | |||
| 701 | /// - all operands are not in the same bank | |||
| 702 | /// These are checks that should someday live in the verifier, but right now, | |||
| 703 | /// these are mostly limitations of the aarch64 selector. | |||
| 704 | static bool unsupportedBinOp(const MachineInstr &I, | |||
| 705 | const AArch64RegisterBankInfo &RBI, | |||
| 706 | const MachineRegisterInfo &MRI, | |||
| 707 | const AArch64RegisterInfo &TRI) { | |||
| 708 | LLT Ty = MRI.getType(I.getOperand(0).getReg()); | |||
| 709 | if (!Ty.isValid()) { | |||
| 710 | LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n" ; } } while (false); | |||
| 711 | return true; | |||
| 712 | } | |||
| 713 | ||||
| 714 | const RegisterBank *PrevOpBank = nullptr; | |||
| 715 | for (auto &MO : I.operands()) { | |||
| 716 | // FIXME: Support non-register operands. | |||
| 717 | if (!MO.isReg()) { | |||
| 718 | LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n" ; } } while (false); | |||
| 719 | return true; | |||
| 720 | } | |||
| 721 | ||||
| 722 | // FIXME: Can generic operations have physical registers operands? If | |||
| 723 | // so, this will need to be taught about that, and we'll need to get the | |||
| 724 | // bank out of the minimal class for the register. | |||
| 725 | // Either way, this needs to be documented (and possibly verified). | |||
| 726 | if (!MO.getReg().isVirtual()) { | |||
| 727 | LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n" ; } } while (false); | |||
| 728 | return true; | |||
| 729 | } | |||
| 730 | ||||
| 731 | const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI); | |||
| 732 | if (!OpBank) { | |||
| 733 | LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n" ; } } while (false); | |||
| 734 | return true; | |||
| 735 | } | |||
| 736 | ||||
| 737 | if (PrevOpBank && OpBank != PrevOpBank) { | |||
| 738 | LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n" ; } } while (false); | |||
| 739 | return true; | |||
| 740 | } | |||
| 741 | PrevOpBank = OpBank; | |||
| 742 | } | |||
| 743 | return false; | |||
| 744 | } | |||
| 745 | ||||
| 746 | /// Select the AArch64 opcode for the basic binary operation \p GenericOpc | |||
| 747 | /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID | |||
| 748 | /// and of size \p OpSize. | |||
| 749 | /// \returns \p GenericOpc if the combination is unsupported. | |||
| 750 | static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, | |||
| 751 | unsigned OpSize) { | |||
| 752 | switch (RegBankID) { | |||
| 753 | case AArch64::GPRRegBankID: | |||
| 754 | if (OpSize == 32) { | |||
| 755 | switch (GenericOpc) { | |||
| 756 | case TargetOpcode::G_SHL: | |||
| 757 | return AArch64::LSLVWr; | |||
| 758 | case TargetOpcode::G_LSHR: | |||
| 759 | return AArch64::LSRVWr; | |||
| 760 | case TargetOpcode::G_ASHR: | |||
| 761 | return AArch64::ASRVWr; | |||
| 762 | default: | |||
| 763 | return GenericOpc; | |||
| 764 | } | |||
| 765 | } else if (OpSize == 64) { | |||
| 766 | switch (GenericOpc) { | |||
| 767 | case TargetOpcode::G_PTR_ADD: | |||
| 768 | return AArch64::ADDXrr; | |||
| 769 | case TargetOpcode::G_SHL: | |||
| 770 | return AArch64::LSLVXr; | |||
| 771 | case TargetOpcode::G_LSHR: | |||
| 772 | return AArch64::LSRVXr; | |||
| 773 | case TargetOpcode::G_ASHR: | |||
| 774 | return AArch64::ASRVXr; | |||
| 775 | default: | |||
| 776 | return GenericOpc; | |||
| 777 | } | |||
| 778 | } | |||
| 779 | break; | |||
| 780 | case AArch64::FPRRegBankID: | |||
| 781 | switch (OpSize) { | |||
| 782 | case 32: | |||
| 783 | switch (GenericOpc) { | |||
| 784 | case TargetOpcode::G_FADD: | |||
| 785 | return AArch64::FADDSrr; | |||
| 786 | case TargetOpcode::G_FSUB: | |||
| 787 | return AArch64::FSUBSrr; | |||
| 788 | case TargetOpcode::G_FMUL: | |||
| 789 | return AArch64::FMULSrr; | |||
| 790 | case TargetOpcode::G_FDIV: | |||
| 791 | return AArch64::FDIVSrr; | |||
| 792 | default: | |||
| 793 | return GenericOpc; | |||
| 794 | } | |||
| 795 | case 64: | |||
| 796 | switch (GenericOpc) { | |||
| 797 | case TargetOpcode::G_FADD: | |||
| 798 | return AArch64::FADDDrr; | |||
| 799 | case TargetOpcode::G_FSUB: | |||
| 800 | return AArch64::FSUBDrr; | |||
| 801 | case TargetOpcode::G_FMUL: | |||
| 802 | return AArch64::FMULDrr; | |||
| 803 | case TargetOpcode::G_FDIV: | |||
| 804 | return AArch64::FDIVDrr; | |||
| 805 | case TargetOpcode::G_OR: | |||
| 806 | return AArch64::ORRv8i8; | |||
| 807 | default: | |||
| 808 | return GenericOpc; | |||
| 809 | } | |||
| 810 | } | |||
| 811 | break; | |||
| 812 | } | |||
| 813 | return GenericOpc; | |||
| 814 | } | |||
| 815 | ||||
| 816 | /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, | |||
| 817 | /// appropriate for the (value) register bank \p RegBankID and of memory access | |||
| 818 | /// size \p OpSize. This returns the variant with the base+unsigned-immediate | |||
| 819 | /// addressing mode (e.g., LDRXui). | |||
| 820 | /// \returns \p GenericOpc if the combination is unsupported. | |||
| 821 | static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, | |||
| 822 | unsigned OpSize) { | |||
| 823 | const bool isStore = GenericOpc == TargetOpcode::G_STORE; | |||
| 824 | switch (RegBankID) { | |||
| 825 | case AArch64::GPRRegBankID: | |||
| 826 | switch (OpSize) { | |||
| 827 | case 8: | |||
| 828 | return isStore ? AArch64::STRBBui : AArch64::LDRBBui; | |||
| 829 | case 16: | |||
| 830 | return isStore ? AArch64::STRHHui : AArch64::LDRHHui; | |||
| 831 | case 32: | |||
| 832 | return isStore ? AArch64::STRWui : AArch64::LDRWui; | |||
| 833 | case 64: | |||
| 834 | return isStore ? AArch64::STRXui : AArch64::LDRXui; | |||
| 835 | } | |||
| 836 | break; | |||
| 837 | case AArch64::FPRRegBankID: | |||
| 838 | switch (OpSize) { | |||
| 839 | case 8: | |||
| 840 | return isStore ? AArch64::STRBui : AArch64::LDRBui; | |||
| 841 | case 16: | |||
| 842 | return isStore ? AArch64::STRHui : AArch64::LDRHui; | |||
| 843 | case 32: | |||
| 844 | return isStore ? AArch64::STRSui : AArch64::LDRSui; | |||
| 845 | case 64: | |||
| 846 | return isStore ? AArch64::STRDui : AArch64::LDRDui; | |||
| 847 | case 128: | |||
| 848 | return isStore ? AArch64::STRQui : AArch64::LDRQui; | |||
| 849 | } | |||
| 850 | break; | |||
| 851 | } | |||
| 852 | return GenericOpc; | |||
| 853 | } | |||
| 854 | ||||
| 855 | /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg | |||
| 856 | /// to \p *To. | |||
| 857 | /// | |||
| 858 | /// E.g "To = COPY SrcReg:SubReg" | |||
| 859 | static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, | |||
| 860 | const RegisterBankInfo &RBI, Register SrcReg, | |||
| 861 | const TargetRegisterClass *To, unsigned SubReg) { | |||
| 862 | assert(SrcReg.isValid() && "Expected a valid source register?")(static_cast <bool> (SrcReg.isValid() && "Expected a valid source register?" ) ? void (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 862, __extension__ __PRETTY_FUNCTION__)); | |||
| 863 | assert(To && "Destination register class cannot be null")(static_cast <bool> (To && "Destination register class cannot be null" ) ? void (0) : __assert_fail ("To && \"Destination register class cannot be null\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 863, __extension__ __PRETTY_FUNCTION__)); | |||
| 864 | assert(SubReg && "Expected a valid subregister")(static_cast <bool> (SubReg && "Expected a valid subregister" ) ? void (0) : __assert_fail ("SubReg && \"Expected a valid subregister\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 864, __extension__ __PRETTY_FUNCTION__)); | |||
| 865 | ||||
| 866 | MachineIRBuilder MIB(I); | |||
| 867 | auto SubRegCopy = | |||
| 868 | MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg); | |||
| 869 | MachineOperand &RegOp = I.getOperand(1); | |||
| 870 | RegOp.setReg(SubRegCopy.getReg(0)); | |||
| 871 | ||||
| 872 | // It's possible that the destination register won't be constrained. Make | |||
| 873 | // sure that happens. | |||
| 874 | if (!I.getOperand(0).getReg().isPhysical()) | |||
| 875 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI); | |||
| 876 | ||||
| 877 | return true; | |||
| 878 | } | |||
| 879 | ||||
| 880 | /// Helper function to get the source and destination register classes for a | |||
| 881 | /// copy. Returns a std::pair containing the source register class for the | |||
| 882 | /// copy, and the destination register class for the copy. If a register class | |||
| 883 | /// cannot be determined, then it will be nullptr. | |||
| 884 | static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> | |||
| 885 | getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, | |||
| 886 | MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, | |||
| 887 | const RegisterBankInfo &RBI) { | |||
| 888 | Register DstReg = I.getOperand(0).getReg(); | |||
| 889 | Register SrcReg = I.getOperand(1).getReg(); | |||
| 890 | const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); | |||
| 891 | const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); | |||
| 892 | unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); | |||
| 893 | unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); | |||
| 894 | ||||
| 895 | // Special casing for cross-bank copies of s1s. We can technically represent | |||
| 896 | // a 1-bit value with any size of register. The minimum size for a GPR is 32 | |||
| 897 | // bits. So, we need to put the FPR on 32 bits as well. | |||
| 898 | // | |||
| 899 | // FIXME: I'm not sure if this case holds true outside of copies. If it does, | |||
| 900 | // then we can pull it into the helpers that get the appropriate class for a | |||
| 901 | // register bank. Or make a new helper that carries along some constraint | |||
| 902 | // information. | |||
| 903 | if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) | |||
| 904 | SrcSize = DstSize = 32; | |||
| 905 | ||||
| 906 | return {getMinClassForRegBank(SrcRegBank, SrcSize, true), | |||
| 907 | getMinClassForRegBank(DstRegBank, DstSize, true)}; | |||
| 908 | } | |||
| 909 | ||||
| 910 | // FIXME: We need some sort of API in RBI/TRI to allow generic code to | |||
| 911 | // constrain operands of simple instructions given a TargetRegisterClass | |||
| 912 | // and LLT | |||
| 913 | static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, | |||
| 914 | const RegisterBankInfo &RBI) { | |||
| 915 | for (MachineOperand &MO : I.operands()) { | |||
| 916 | if (!MO.isReg()) | |||
| 917 | continue; | |||
| 918 | Register Reg = MO.getReg(); | |||
| 919 | if (!Reg) | |||
| 920 | continue; | |||
| 921 | if (Reg.isPhysical()) | |||
| 922 | continue; | |||
| 923 | LLT Ty = MRI.getType(Reg); | |||
| 924 | const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); | |||
| 925 | const TargetRegisterClass *RC = | |||
| 926 | RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); | |||
| 927 | if (!RC) { | |||
| 928 | const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); | |||
| 929 | RC = getRegClassForTypeOnBank(Ty, RB); | |||
| 930 | if (!RC) { | |||
| 931 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n" ; } } while (false) | |||
| 932 | dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n" ; } } while (false); | |||
| 933 | break; | |||
| 934 | } | |||
| 935 | } | |||
| 936 | RBI.constrainGenericRegister(Reg, *RC, MRI); | |||
| 937 | } | |||
| 938 | ||||
| 939 | return true; | |||
| 940 | } | |||
| 941 | ||||
| 942 | static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, | |||
| 943 | MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, | |||
| 944 | const RegisterBankInfo &RBI) { | |||
| 945 | Register DstReg = I.getOperand(0).getReg(); | |||
| 946 | Register SrcReg = I.getOperand(1).getReg(); | |||
| 947 | const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); | |||
| 948 | const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); | |||
| 949 | ||||
| 950 | // Find the correct register classes for the source and destination registers. | |||
| 951 | const TargetRegisterClass *SrcRC; | |||
| 952 | const TargetRegisterClass *DstRC; | |||
| 953 | std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI); | |||
| 954 | ||||
| 955 | if (!DstRC) { | |||
| 956 | LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unexpected dest size " << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while (false) | |||
| 957 | << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unexpected dest size " << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while (false); | |||
| 958 | return false; | |||
| 959 | } | |||
| 960 | ||||
| 961 | // Is this a copy? If so, then we may need to insert a subregister copy. | |||
| 962 | if (I.isCopy()) { | |||
| 963 | // Yes. Check if there's anything to fix up. | |||
| 964 | if (!SrcRC) { | |||
| 965 | LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n" ; } } while (false); | |||
| 966 | return false; | |||
| 967 | } | |||
| 968 | ||||
| 969 | unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); | |||
| 970 | unsigned DstSize = TRI.getRegSizeInBits(*DstRC); | |||
| 971 | unsigned SubReg; | |||
| 972 | ||||
| 973 | // If the source bank doesn't support a subregister copy small enough, | |||
| 974 | // then we first need to copy to the destination bank. | |||
| 975 | if (getMinSizeForRegBank(SrcRegBank) > DstSize) { | |||
| 976 | const TargetRegisterClass *DstTempRC = | |||
| 977 | getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true); | |||
| 978 | getSubRegForClass(DstRC, TRI, SubReg); | |||
| 979 | ||||
| 980 | MachineIRBuilder MIB(I); | |||
| 981 | auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg}); | |||
| 982 | copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg); | |||
| 983 | } else if (SrcSize > DstSize) { | |||
| 984 | // If the source register is bigger than the destination we need to | |||
| 985 | // perform a subregister copy. | |||
| 986 | const TargetRegisterClass *SubRegRC = | |||
| 987 | getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); | |||
| 988 | getSubRegForClass(SubRegRC, TRI, SubReg); | |||
| 989 | copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg); | |||
| 990 | } else if (DstSize > SrcSize) { | |||
| 991 | // If the destination register is bigger than the source we need to do | |||
| 992 | // a promotion using SUBREG_TO_REG. | |||
| 993 | const TargetRegisterClass *PromotionRC = | |||
| 994 | getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); | |||
| 995 | getSubRegForClass(SrcRC, TRI, SubReg); | |||
| 996 | ||||
| 997 | Register PromoteReg = MRI.createVirtualRegister(PromotionRC); | |||
| 998 | BuildMI(*I.getParent(), I, I.getDebugLoc(), | |||
| 999 | TII.get(AArch64::SUBREG_TO_REG), PromoteReg) | |||
| 1000 | .addImm(0) | |||
| 1001 | .addUse(SrcReg) | |||
| 1002 | .addImm(SubReg); | |||
| 1003 | MachineOperand &RegOp = I.getOperand(1); | |||
| 1004 | RegOp.setReg(PromoteReg); | |||
| 1005 | } | |||
| 1006 | ||||
| 1007 | // If the destination is a physical register, then there's nothing to | |||
| 1008 | // change, so we're done. | |||
| 1009 | if (DstReg.isPhysical()) | |||
| 1010 | return true; | |||
| 1011 | } | |||
| 1012 | ||||
| 1013 | // No need to constrain SrcReg. It will get constrained when we hit another | |||
| 1014 | // of its use or its defs. Copies do not have constraints. | |||
| 1015 | if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { | |||
| 1016 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) << " operand\n"; } } while ( false) | |||
| 1017 | << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) << " operand\n"; } } while ( false); | |||
| 1018 | return false; | |||
| 1019 | } | |||
| 1020 | ||||
| 1021 | // If this a GPR ZEXT that we want to just reduce down into a copy. | |||
| 1022 | // The sizes will be mismatched with the source < 32b but that's ok. | |||
| 1023 | if (I.getOpcode() == TargetOpcode::G_ZEXT) { | |||
| 1024 | I.setDesc(TII.get(AArch64::COPY)); | |||
| 1025 | assert(SrcRegBank.getID() == AArch64::GPRRegBankID)(static_cast <bool> (SrcRegBank.getID() == AArch64::GPRRegBankID ) ? void (0) : __assert_fail ("SrcRegBank.getID() == AArch64::GPRRegBankID" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1025, __extension__ __PRETTY_FUNCTION__)); | |||
| 1026 | return selectCopy(I, TII, MRI, TRI, RBI); | |||
| 1027 | } | |||
| 1028 | ||||
| 1029 | I.setDesc(TII.get(AArch64::COPY)); | |||
| 1030 | return true; | |||
| 1031 | } | |||
| 1032 | ||||
| 1033 | static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { | |||
| 1034 | if (!DstTy.isScalar() || !SrcTy.isScalar()) | |||
| 1035 | return GenericOpc; | |||
| 1036 | ||||
| 1037 | const unsigned DstSize = DstTy.getSizeInBits(); | |||
| 1038 | const unsigned SrcSize = SrcTy.getSizeInBits(); | |||
| 1039 | ||||
| 1040 | switch (DstSize) { | |||
| 1041 | case 32: | |||
| 1042 | switch (SrcSize) { | |||
| 1043 | case 32: | |||
| 1044 | switch (GenericOpc) { | |||
| 1045 | case TargetOpcode::G_SITOFP: | |||
| 1046 | return AArch64::SCVTFUWSri; | |||
| 1047 | case TargetOpcode::G_UITOFP: | |||
| 1048 | return AArch64::UCVTFUWSri; | |||
| 1049 | case TargetOpcode::G_FPTOSI: | |||
| 1050 | return AArch64::FCVTZSUWSr; | |||
| 1051 | case TargetOpcode::G_FPTOUI: | |||
| 1052 | return AArch64::FCVTZUUWSr; | |||
| 1053 | default: | |||
| 1054 | return GenericOpc; | |||
| 1055 | } | |||
| 1056 | case 64: | |||
| 1057 | switch (GenericOpc) { | |||
| 1058 | case TargetOpcode::G_SITOFP: | |||
| 1059 | return AArch64::SCVTFUXSri; | |||
| 1060 | case TargetOpcode::G_UITOFP: | |||
| 1061 | return AArch64::UCVTFUXSri; | |||
| 1062 | case TargetOpcode::G_FPTOSI: | |||
| 1063 | return AArch64::FCVTZSUWDr; | |||
| 1064 | case TargetOpcode::G_FPTOUI: | |||
| 1065 | return AArch64::FCVTZUUWDr; | |||
| 1066 | default: | |||
| 1067 | return GenericOpc; | |||
| 1068 | } | |||
| 1069 | default: | |||
| 1070 | return GenericOpc; | |||
| 1071 | } | |||
| 1072 | case 64: | |||
| 1073 | switch (SrcSize) { | |||
| 1074 | case 32: | |||
| 1075 | switch (GenericOpc) { | |||
| 1076 | case TargetOpcode::G_SITOFP: | |||
| 1077 | return AArch64::SCVTFUWDri; | |||
| 1078 | case TargetOpcode::G_UITOFP: | |||
| 1079 | return AArch64::UCVTFUWDri; | |||
| 1080 | case TargetOpcode::G_FPTOSI: | |||
| 1081 | return AArch64::FCVTZSUXSr; | |||
| 1082 | case TargetOpcode::G_FPTOUI: | |||
| 1083 | return AArch64::FCVTZUUXSr; | |||
| 1084 | default: | |||
| 1085 | return GenericOpc; | |||
| 1086 | } | |||
| 1087 | case 64: | |||
| 1088 | switch (GenericOpc) { | |||
| 1089 | case TargetOpcode::G_SITOFP: | |||
| 1090 | return AArch64::SCVTFUXDri; | |||
| 1091 | case TargetOpcode::G_UITOFP: | |||
| 1092 | return AArch64::UCVTFUXDri; | |||
| 1093 | case TargetOpcode::G_FPTOSI: | |||
| 1094 | return AArch64::FCVTZSUXDr; | |||
| 1095 | case TargetOpcode::G_FPTOUI: | |||
| 1096 | return AArch64::FCVTZUUXDr; | |||
| 1097 | default: | |||
| 1098 | return GenericOpc; | |||
| 1099 | } | |||
| 1100 | default: | |||
| 1101 | return GenericOpc; | |||
| 1102 | } | |||
| 1103 | default: | |||
| 1104 | return GenericOpc; | |||
| 1105 | }; | |||
| 1106 | return GenericOpc; | |||
| 1107 | } | |||
| 1108 | ||||
| 1109 | MachineInstr * | |||
| 1110 | AArch64InstructionSelector::emitSelect(Register Dst, Register True, | |||
| 1111 | Register False, AArch64CC::CondCode CC, | |||
| 1112 | MachineIRBuilder &MIB) const { | |||
| 1113 | MachineRegisterInfo &MRI = *MIB.getMRI(); | |||
| 1114 | assert(RBI.getRegBank(False, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)-> getID() == RBI.getRegBank(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?") ? void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1116, __extension__ __PRETTY_FUNCTION__)) | |||
| 1115 | RBI.getRegBank(True, MRI, TRI)->getID() &&(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)-> getID() == RBI.getRegBank(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?") ? void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1116, __extension__ __PRETTY_FUNCTION__)) | |||
| 1116 | "Expected both select operands to have the same regbank?")(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)-> getID() == RBI.getRegBank(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?") ? void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1116, __extension__ __PRETTY_FUNCTION__)); | |||
| 1117 | LLT Ty = MRI.getType(True); | |||
| 1118 | if (Ty.isVector()) | |||
| 1119 | return nullptr; | |||
| 1120 | const unsigned Size = Ty.getSizeInBits(); | |||
| 1121 | assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1122, __extension__ __PRETTY_FUNCTION__)) | |||
| 1122 | "Expected 32 bit or 64 bit select only?")(static_cast <bool> ((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1122, __extension__ __PRETTY_FUNCTION__)); | |||
| 1123 | const bool Is32Bit = Size == 32; | |||
| 1124 | if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) { | |||
| 1125 | unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr; | |||
| 1126 | auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); | |||
| 1127 | constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI); | |||
| 1128 | return &*FCSel; | |||
| 1129 | } | |||
| 1130 | ||||
| 1131 | // By default, we'll try and emit a CSEL. | |||
| 1132 | unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; | |||
| 1133 | bool Optimized = false; | |||
| 1134 | auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI, | |||
| 1135 | &Optimized](Register &Reg, Register &OtherReg, | |||
| 1136 | bool Invert) { | |||
| 1137 | if (Optimized) | |||
| 1138 | return false; | |||
| 1139 | ||||
| 1140 | // Attempt to fold: | |||
| 1141 | // | |||
| 1142 | // %sub = G_SUB 0, %x | |||
| 1143 | // %select = G_SELECT cc, %reg, %sub | |||
| 1144 | // | |||
| 1145 | // Into: | |||
| 1146 | // %select = CSNEG %reg, %x, cc | |||
| 1147 | Register MatchReg; | |||
| 1148 | if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) { | |||
| 1149 | Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; | |||
| 1150 | Reg = MatchReg; | |||
| 1151 | if (Invert) { | |||
| 1152 | CC = AArch64CC::getInvertedCondCode(CC); | |||
| 1153 | std::swap(Reg, OtherReg); | |||
| 1154 | } | |||
| 1155 | return true; | |||
| 1156 | } | |||
| 1157 | ||||
| 1158 | // Attempt to fold: | |||
| 1159 | // | |||
| 1160 | // %xor = G_XOR %x, -1 | |||
| 1161 | // %select = G_SELECT cc, %reg, %xor | |||
| 1162 | // | |||
| 1163 | // Into: | |||
| 1164 | // %select = CSINV %reg, %x, cc | |||
| 1165 | if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) { | |||
| 1166 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; | |||
| 1167 | Reg = MatchReg; | |||
| 1168 | if (Invert) { | |||
| 1169 | CC = AArch64CC::getInvertedCondCode(CC); | |||
| 1170 | std::swap(Reg, OtherReg); | |||
| 1171 | } | |||
| 1172 | return true; | |||
| 1173 | } | |||
| 1174 | ||||
| 1175 | // Attempt to fold: | |||
| 1176 | // | |||
| 1177 | // %add = G_ADD %x, 1 | |||
| 1178 | // %select = G_SELECT cc, %reg, %add | |||
| 1179 | // | |||
| 1180 | // Into: | |||
| 1181 | // %select = CSINC %reg, %x, cc | |||
| 1182 | if (mi_match(Reg, MRI, | |||
| 1183 | m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)), | |||
| 1184 | m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) { | |||
| 1185 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; | |||
| 1186 | Reg = MatchReg; | |||
| 1187 | if (Invert) { | |||
| 1188 | CC = AArch64CC::getInvertedCondCode(CC); | |||
| 1189 | std::swap(Reg, OtherReg); | |||
| 1190 | } | |||
| 1191 | return true; | |||
| 1192 | } | |||
| 1193 | ||||
| 1194 | return false; | |||
| 1195 | }; | |||
| 1196 | ||||
| 1197 | // Helper lambda which tries to use CSINC/CSINV for the instruction when its | |||
| 1198 | // true/false values are constants. | |||
| 1199 | // FIXME: All of these patterns already exist in tablegen. We should be | |||
| 1200 | // able to import these. | |||
| 1201 | auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI, | |||
| 1202 | &Optimized]() { | |||
| 1203 | if (Optimized) | |||
| 1204 | return false; | |||
| 1205 | auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI); | |||
| 1206 | auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI); | |||
| 1207 | if (!TrueCst && !FalseCst) | |||
| 1208 | return false; | |||
| 1209 | ||||
| 1210 | Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; | |||
| 1211 | if (TrueCst && FalseCst) { | |||
| 1212 | int64_t T = TrueCst->Value.getSExtValue(); | |||
| 1213 | int64_t F = FalseCst->Value.getSExtValue(); | |||
| 1214 | ||||
| 1215 | if (T == 0 && F == 1) { | |||
| 1216 | // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc | |||
| 1217 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; | |||
| 1218 | True = ZReg; | |||
| 1219 | False = ZReg; | |||
| 1220 | return true; | |||
| 1221 | } | |||
| 1222 | ||||
| 1223 | if (T == 0 && F == -1) { | |||
| 1224 | // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc | |||
| 1225 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; | |||
| 1226 | True = ZReg; | |||
| 1227 | False = ZReg; | |||
| 1228 | return true; | |||
| 1229 | } | |||
| 1230 | } | |||
| 1231 | ||||
| 1232 | if (TrueCst) { | |||
| 1233 | int64_t T = TrueCst->Value.getSExtValue(); | |||
| 1234 | if (T == 1) { | |||
| 1235 | // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc | |||
| 1236 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; | |||
| 1237 | True = False; | |||
| 1238 | False = ZReg; | |||
| 1239 | CC = AArch64CC::getInvertedCondCode(CC); | |||
| 1240 | return true; | |||
| 1241 | } | |||
| 1242 | ||||
| 1243 | if (T == -1) { | |||
| 1244 | // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc | |||
| 1245 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; | |||
| 1246 | True = False; | |||
| 1247 | False = ZReg; | |||
| 1248 | CC = AArch64CC::getInvertedCondCode(CC); | |||
| 1249 | return true; | |||
| 1250 | } | |||
| 1251 | } | |||
| 1252 | ||||
| 1253 | if (FalseCst) { | |||
| 1254 | int64_t F = FalseCst->Value.getSExtValue(); | |||
| 1255 | if (F == 1) { | |||
| 1256 | // G_SELECT cc, t, 1 -> CSINC t, zreg, cc | |||
| 1257 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; | |||
| 1258 | False = ZReg; | |||
| 1259 | return true; | |||
| 1260 | } | |||
| 1261 | ||||
| 1262 | if (F == -1) { | |||
| 1263 | // G_SELECT cc, t, -1 -> CSINC t, zreg, cc | |||
| 1264 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; | |||
| 1265 | False = ZReg; | |||
| 1266 | return true; | |||
| 1267 | } | |||
| 1268 | } | |||
| 1269 | return false; | |||
| 1270 | }; | |||
| 1271 | ||||
| 1272 | Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false); | |||
| 1273 | Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true); | |||
| 1274 | Optimized |= TryOptSelectCst(); | |||
| 1275 | auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); | |||
| 1276 | constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI); | |||
| 1277 | return &*SelectInst; | |||
| 1278 | } | |||
| 1279 | ||||
| 1280 | static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { | |||
| 1281 | switch (P) { | |||
| 1282 | default: | |||
| 1283 | llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1283); | |||
| 1284 | case CmpInst::ICMP_NE: | |||
| 1285 | return AArch64CC::NE; | |||
| 1286 | case CmpInst::ICMP_EQ: | |||
| 1287 | return AArch64CC::EQ; | |||
| 1288 | case CmpInst::ICMP_SGT: | |||
| 1289 | return AArch64CC::GT; | |||
| 1290 | case CmpInst::ICMP_SGE: | |||
| 1291 | return AArch64CC::GE; | |||
| 1292 | case CmpInst::ICMP_SLT: | |||
| 1293 | return AArch64CC::LT; | |||
| 1294 | case CmpInst::ICMP_SLE: | |||
| 1295 | return AArch64CC::LE; | |||
| 1296 | case CmpInst::ICMP_UGT: | |||
| 1297 | return AArch64CC::HI; | |||
| 1298 | case CmpInst::ICMP_UGE: | |||
| 1299 | return AArch64CC::HS; | |||
| 1300 | case CmpInst::ICMP_ULT: | |||
| 1301 | return AArch64CC::LO; | |||
| 1302 | case CmpInst::ICMP_ULE: | |||
| 1303 | return AArch64CC::LS; | |||
| 1304 | } | |||
| 1305 | } | |||
| 1306 | ||||
| 1307 | /// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC. | |||
| 1308 | static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, | |||
| 1309 | AArch64CC::CondCode &CondCode, | |||
| 1310 | AArch64CC::CondCode &CondCode2) { | |||
| 1311 | CondCode2 = AArch64CC::AL; | |||
| 1312 | switch (CC) { | |||
| 1313 | default: | |||
| 1314 | llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1314); | |||
| 1315 | case CmpInst::FCMP_OEQ: | |||
| 1316 | CondCode = AArch64CC::EQ; | |||
| 1317 | break; | |||
| 1318 | case CmpInst::FCMP_OGT: | |||
| 1319 | CondCode = AArch64CC::GT; | |||
| 1320 | break; | |||
| 1321 | case CmpInst::FCMP_OGE: | |||
| 1322 | CondCode = AArch64CC::GE; | |||
| 1323 | break; | |||
| 1324 | case CmpInst::FCMP_OLT: | |||
| 1325 | CondCode = AArch64CC::MI; | |||
| 1326 | break; | |||
| 1327 | case CmpInst::FCMP_OLE: | |||
| 1328 | CondCode = AArch64CC::LS; | |||
| 1329 | break; | |||
| 1330 | case CmpInst::FCMP_ONE: | |||
| 1331 | CondCode = AArch64CC::MI; | |||
| 1332 | CondCode2 = AArch64CC::GT; | |||
| 1333 | break; | |||
| 1334 | case CmpInst::FCMP_ORD: | |||
| 1335 | CondCode = AArch64CC::VC; | |||
| 1336 | break; | |||
| 1337 | case CmpInst::FCMP_UNO: | |||
| 1338 | CondCode = AArch64CC::VS; | |||
| 1339 | break; | |||
| 1340 | case CmpInst::FCMP_UEQ: | |||
| 1341 | CondCode = AArch64CC::EQ; | |||
| 1342 | CondCode2 = AArch64CC::VS; | |||
| 1343 | break; | |||
| 1344 | case CmpInst::FCMP_UGT: | |||
| 1345 | CondCode = AArch64CC::HI; | |||
| 1346 | break; | |||
| 1347 | case CmpInst::FCMP_UGE: | |||
| 1348 | CondCode = AArch64CC::PL; | |||
| 1349 | break; | |||
| 1350 | case CmpInst::FCMP_ULT: | |||
| 1351 | CondCode = AArch64CC::LT; | |||
| 1352 | break; | |||
| 1353 | case CmpInst::FCMP_ULE: | |||
| 1354 | CondCode = AArch64CC::LE; | |||
| 1355 | break; | |||
| 1356 | case CmpInst::FCMP_UNE: | |||
| 1357 | CondCode = AArch64CC::NE; | |||
| 1358 | break; | |||
| 1359 | } | |||
| 1360 | } | |||
| 1361 | ||||
| 1362 | /// Convert an IR fp condition code to an AArch64 CC. | |||
| 1363 | /// This differs from changeFPCCToAArch64CC in that it returns cond codes that | |||
| 1364 | /// should be AND'ed instead of OR'ed. | |||
| 1365 | static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, | |||
| 1366 | AArch64CC::CondCode &CondCode, | |||
| 1367 | AArch64CC::CondCode &CondCode2) { | |||
| 1368 | CondCode2 = AArch64CC::AL; | |||
| 1369 | switch (CC) { | |||
| 1370 | default: | |||
| 1371 | changeFPCCToORAArch64CC(CC, CondCode, CondCode2); | |||
| 1372 | assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void (0) : __assert_fail ("CondCode2 == AArch64CC::AL", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1372, __extension__ __PRETTY_FUNCTION__)); | |||
| 1373 | break; | |||
| 1374 | case CmpInst::FCMP_ONE: | |||
| 1375 | // (a one b) | |||
| 1376 | // == ((a olt b) || (a ogt b)) | |||
| 1377 | // == ((a ord b) && (a une b)) | |||
| 1378 | CondCode = AArch64CC::VC; | |||
| 1379 | CondCode2 = AArch64CC::NE; | |||
| 1380 | break; | |||
| 1381 | case CmpInst::FCMP_UEQ: | |||
| 1382 | // (a ueq b) | |||
| 1383 | // == ((a uno b) || (a oeq b)) | |||
| 1384 | // == ((a ule b) && (a uge b)) | |||
| 1385 | CondCode = AArch64CC::PL; | |||
| 1386 | CondCode2 = AArch64CC::LE; | |||
| 1387 | break; | |||
| 1388 | } | |||
| 1389 | } | |||
| 1390 | ||||
| 1391 | /// Return a register which can be used as a bit to test in a TB(N)Z. | |||
| 1392 | static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, | |||
| 1393 | MachineRegisterInfo &MRI) { | |||
| 1394 | assert(Reg.isValid() && "Expected valid register!")(static_cast <bool> (Reg.isValid() && "Expected valid register!" ) ? void (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1394, __extension__ __PRETTY_FUNCTION__)); | |||
| 1395 | bool HasZext = false; | |||
| 1396 | while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) { | |||
| 1397 | unsigned Opc = MI->getOpcode(); | |||
| 1398 | ||||
| 1399 | if (!MI->getOperand(0).isReg() || | |||
| 1400 | !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) | |||
| 1401 | break; | |||
| 1402 | ||||
| 1403 | // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. | |||
| 1404 | // | |||
| 1405 | // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number | |||
| 1406 | // on the truncated x is the same as the bit number on x. | |||
| 1407 | if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT || | |||
| 1408 | Opc == TargetOpcode::G_TRUNC) { | |||
| 1409 | if (Opc == TargetOpcode::G_ZEXT) | |||
| 1410 | HasZext = true; | |||
| 1411 | ||||
| 1412 | Register NextReg = MI->getOperand(1).getReg(); | |||
| 1413 | // Did we find something worth folding? | |||
| 1414 | if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg)) | |||
| 1415 | break; | |||
| 1416 | ||||
| 1417 | // NextReg is worth folding. Keep looking. | |||
| 1418 | Reg = NextReg; | |||
| 1419 | continue; | |||
| 1420 | } | |||
| 1421 | ||||
| 1422 | // Attempt to find a suitable operation with a constant on one side. | |||
| 1423 | std::optional<uint64_t> C; | |||
| 1424 | Register TestReg; | |||
| 1425 | switch (Opc) { | |||
| 1426 | default: | |||
| 1427 | break; | |||
| 1428 | case TargetOpcode::G_AND: | |||
| 1429 | case TargetOpcode::G_XOR: { | |||
| 1430 | TestReg = MI->getOperand(1).getReg(); | |||
| 1431 | Register ConstantReg = MI->getOperand(2).getReg(); | |||
| 1432 | auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI); | |||
| 1433 | if (!VRegAndVal) { | |||
| 1434 | // AND commutes, check the other side for a constant. | |||
| 1435 | // FIXME: Can we canonicalize the constant so that it's always on the | |||
| 1436 | // same side at some point earlier? | |||
| 1437 | std::swap(ConstantReg, TestReg); | |||
| 1438 | VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI); | |||
| 1439 | } | |||
| 1440 | if (VRegAndVal) { | |||
| 1441 | if (HasZext) | |||
| 1442 | C = VRegAndVal->Value.getZExtValue(); | |||
| 1443 | else | |||
| 1444 | C = VRegAndVal->Value.getSExtValue(); | |||
| 1445 | } | |||
| 1446 | break; | |||
| 1447 | } | |||
| 1448 | case TargetOpcode::G_ASHR: | |||
| 1449 | case TargetOpcode::G_LSHR: | |||
| 1450 | case TargetOpcode::G_SHL: { | |||
| 1451 | TestReg = MI->getOperand(1).getReg(); | |||
| 1452 | auto VRegAndVal = | |||
| 1453 | getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); | |||
| 1454 | if (VRegAndVal) | |||
| 1455 | C = VRegAndVal->Value.getSExtValue(); | |||
| 1456 | break; | |||
| 1457 | } | |||
| 1458 | } | |||
| 1459 | ||||
| 1460 | // Didn't find a constant or viable register. Bail out of the loop. | |||
| 1461 | if (!C || !TestReg.isValid()) | |||
| 1462 | break; | |||
| 1463 | ||||
| 1464 | // We found a suitable instruction with a constant. Check to see if we can | |||
| 1465 | // walk through the instruction. | |||
| 1466 | Register NextReg; | |||
| 1467 | unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits(); | |||
| 1468 | switch (Opc) { | |||
| 1469 | default: | |||
| 1470 | break; | |||
| 1471 | case TargetOpcode::G_AND: | |||
| 1472 | // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set. | |||
| 1473 | if ((*C >> Bit) & 1) | |||
| 1474 | NextReg = TestReg; | |||
| 1475 | break; | |||
| 1476 | case TargetOpcode::G_SHL: | |||
| 1477 | // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in | |||
| 1478 | // the type of the register. | |||
| 1479 | if (*C <= Bit && (Bit - *C) < TestRegSize) { | |||
| 1480 | NextReg = TestReg; | |||
| 1481 | Bit = Bit - *C; | |||
| 1482 | } | |||
| 1483 | break; | |||
| 1484 | case TargetOpcode::G_ASHR: | |||
| 1485 | // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits | |||
| 1486 | // in x | |||
| 1487 | NextReg = TestReg; | |||
| 1488 | Bit = Bit + *C; | |||
| 1489 | if (Bit >= TestRegSize) | |||
| 1490 | Bit = TestRegSize - 1; | |||
| 1491 | break; | |||
| 1492 | case TargetOpcode::G_LSHR: | |||
| 1493 | // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x | |||
| 1494 | if ((Bit + *C) < TestRegSize) { | |||
| 1495 | NextReg = TestReg; | |||
| 1496 | Bit = Bit + *C; | |||
| 1497 | } | |||
| 1498 | break; | |||
| 1499 | case TargetOpcode::G_XOR: | |||
| 1500 | // We can walk through a G_XOR by inverting whether we use tbz/tbnz when | |||
| 1501 | // appropriate. | |||
| 1502 | // | |||
| 1503 | // e.g. If x' = xor x, c, and the b-th bit is set in c then | |||
| 1504 | // | |||
| 1505 | // tbz x', b -> tbnz x, b | |||
| 1506 | // | |||
| 1507 | // Because x' only has the b-th bit set if x does not. | |||
| 1508 | if ((*C >> Bit) & 1) | |||
| 1509 | Invert = !Invert; | |||
| 1510 | NextReg = TestReg; | |||
| 1511 | break; | |||
| 1512 | } | |||
| 1513 | ||||
| 1514 | // Check if we found anything worth folding. | |||
| 1515 | if (!NextReg.isValid()) | |||
| 1516 | return Reg; | |||
| 1517 | Reg = NextReg; | |||
| 1518 | } | |||
| 1519 | ||||
| 1520 | return Reg; | |||
| 1521 | } | |||
| 1522 | ||||
| 1523 | MachineInstr *AArch64InstructionSelector::emitTestBit( | |||
| 1524 | Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB, | |||
| 1525 | MachineIRBuilder &MIB) const { | |||
| 1526 | assert(TestReg.isValid())(static_cast <bool> (TestReg.isValid()) ? void (0) : __assert_fail ("TestReg.isValid()", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1526, __extension__ __PRETTY_FUNCTION__)); | |||
| 1527 | assert(ProduceNonFlagSettingCondBr &&(static_cast <bool> (ProduceNonFlagSettingCondBr && "Cannot emit TB(N)Z with speculation tracking!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1528, __extension__ __PRETTY_FUNCTION__)) | |||
| 1528 | "Cannot emit TB(N)Z with speculation tracking!")(static_cast <bool> (ProduceNonFlagSettingCondBr && "Cannot emit TB(N)Z with speculation tracking!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1528, __extension__ __PRETTY_FUNCTION__)); | |||
| 1529 | MachineRegisterInfo &MRI = *MIB.getMRI(); | |||
| 1530 | ||||
| 1531 | // Attempt to optimize the test bit by walking over instructions. | |||
| 1532 | TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI); | |||
| 1533 | LLT Ty = MRI.getType(TestReg); | |||
| 1534 | unsigned Size = Ty.getSizeInBits(); | |||
| 1535 | assert(!Ty.isVector() && "Expected a scalar!")(static_cast <bool> (!Ty.isVector() && "Expected a scalar!" ) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1535, __extension__ __PRETTY_FUNCTION__)); | |||
| 1536 | assert(Bit < 64 && "Bit is too large!")(static_cast <bool> (Bit < 64 && "Bit is too large!" ) ? void (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1536, __extension__ __PRETTY_FUNCTION__)); | |||
| 1537 | ||||
| 1538 | // When the test register is a 64-bit register, we have to narrow to make | |||
| 1539 | // TBNZW work. | |||
| 1540 | bool UseWReg = Bit < 32; | |||
| 1541 | unsigned NecessarySize = UseWReg ? 32 : 64; | |||
| 1542 | if (Size != NecessarySize) | |||
| 1543 | TestReg = moveScalarRegClass( | |||
| 1544 | TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass, | |||
| 1545 | MIB); | |||
| 1546 | ||||
| 1547 | static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX}, | |||
| 1548 | {AArch64::TBZW, AArch64::TBNZW}}; | |||
| 1549 | unsigned Opc = OpcTable[UseWReg][IsNegative]; | |||
| 1550 | auto TestBitMI = | |||
| 1551 | MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB); | |||
| 1552 | constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI); | |||
| 1553 | return &*TestBitMI; | |||
| 1554 | } | |||
| 1555 | ||||
| 1556 | bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( | |||
| 1557 | MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB, | |||
| 1558 | MachineIRBuilder &MIB) const { | |||
| 1559 | assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")(static_cast <bool> (AndInst.getOpcode() == TargetOpcode ::G_AND && "Expected G_AND only?") ? void (0) : __assert_fail ("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1559, __extension__ __PRETTY_FUNCTION__)); | |||
| 1560 | // Given something like this: | |||
| 1561 | // | |||
| 1562 | // %x = ...Something... | |||
| 1563 | // %one = G_CONSTANT i64 1 | |||
| 1564 | // %zero = G_CONSTANT i64 0 | |||
| 1565 | // %and = G_AND %x, %one | |||
| 1566 | // %cmp = G_ICMP intpred(ne), %and, %zero | |||
| 1567 | // %cmp_trunc = G_TRUNC %cmp | |||
| 1568 | // G_BRCOND %cmp_trunc, %bb.3 | |||
| 1569 | // | |||
| 1570 | // We want to try and fold the AND into the G_BRCOND and produce either a | |||
| 1571 | // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)). | |||
| 1572 | // | |||
| 1573 | // In this case, we'd get | |||
| 1574 | // | |||
| 1575 | // TBNZ %x %bb.3 | |||
| 1576 | // | |||
| 1577 | ||||
| 1578 | // Check if the AND has a constant on its RHS which we can use as a mask. | |||
| 1579 | // If it's a power of 2, then it's the same as checking a specific bit. | |||
| 1580 | // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set) | |||
| 1581 | auto MaybeBit = getIConstantVRegValWithLookThrough( | |||
| 1582 | AndInst.getOperand(2).getReg(), *MIB.getMRI()); | |||
| 1583 | if (!MaybeBit) | |||
| 1584 | return false; | |||
| 1585 | ||||
| 1586 | int32_t Bit = MaybeBit->Value.exactLogBase2(); | |||
| 1587 | if (Bit < 0) | |||
| 1588 | return false; | |||
| 1589 | ||||
| 1590 | Register TestReg = AndInst.getOperand(1).getReg(); | |||
| 1591 | ||||
| 1592 | // Emit a TB(N)Z. | |||
| 1593 | emitTestBit(TestReg, Bit, Invert, DstMBB, MIB); | |||
| 1594 | return true; | |||
| 1595 | } | |||
| 1596 | ||||
| 1597 | MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg, | |||
| 1598 | bool IsNegative, | |||
| 1599 | MachineBasicBlock *DestMBB, | |||
| 1600 | MachineIRBuilder &MIB) const { | |||
| 1601 | assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")(static_cast <bool> (ProduceNonFlagSettingCondBr && "CBZ does not set flags!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1601, __extension__ __PRETTY_FUNCTION__)); | |||
| 1602 | MachineRegisterInfo &MRI = *MIB.getMRI(); | |||
| 1603 | assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI )->getID() == AArch64::GPRRegBankID && "Expected GPRs only?" ) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1605, __extension__ __PRETTY_FUNCTION__)) | |||
| 1604 | AArch64::GPRRegBankID &&(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI )->getID() == AArch64::GPRRegBankID && "Expected GPRs only?" ) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1605, __extension__ __PRETTY_FUNCTION__)) | |||
| 1605 | "Expected GPRs only?")(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI )->getID() == AArch64::GPRRegBankID && "Expected GPRs only?" ) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1605, __extension__ __PRETTY_FUNCTION__)); | |||
| 1606 | auto Ty = MRI.getType(CompareReg); | |||
| 1607 | unsigned Width = Ty.getSizeInBits(); | |||
| 1608 | assert(!Ty.isVector() && "Expected scalar only?")(static_cast <bool> (!Ty.isVector() && "Expected scalar only?" ) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1608, __extension__ __PRETTY_FUNCTION__)); | |||
| 1609 | assert(Width <= 64 && "Expected width to be at most 64?")(static_cast <bool> (Width <= 64 && "Expected width to be at most 64?" ) ? void (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1609, __extension__ __PRETTY_FUNCTION__)); | |||
| 1610 | static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX}, | |||
| 1611 | {AArch64::CBNZW, AArch64::CBNZX}}; | |||
| 1612 | unsigned Opc = OpcTable[IsNegative][Width == 64]; | |||
| 1613 | auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB); | |||
| 1614 | constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI); | |||
| 1615 | return &*BranchMI; | |||
| 1616 | } | |||
| 1617 | ||||
| 1618 | bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( | |||
| 1619 | MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const { | |||
| 1620 | assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)(static_cast <bool> (FCmp.getOpcode() == TargetOpcode:: G_FCMP) ? void (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1620, __extension__ __PRETTY_FUNCTION__)); | |||
| 1621 | assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1621, __extension__ __PRETTY_FUNCTION__)); | |||
| 1622 | // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't | |||
| 1623 | // totally clean. Some of them require two branches to implement. | |||
| 1624 | auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate(); | |||
| 1625 | emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB, | |||
| 1626 | Pred); | |||
| 1627 | AArch64CC::CondCode CC1, CC2; | |||
| 1628 | changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2); | |||
| 1629 | MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); | |||
| 1630 | MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); | |||
| 1631 | if (CC2 != AArch64CC::AL) | |||
| 1632 | MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); | |||
| 1633 | I.eraseFromParent(); | |||
| 1634 | return true; | |||
| 1635 | } | |||
| 1636 | ||||
| 1637 | bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( | |||
| 1638 | MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { | |||
| 1639 | assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode:: G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1639, __extension__ __PRETTY_FUNCTION__)); | |||
| 1640 | assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1640, __extension__ __PRETTY_FUNCTION__)); | |||
| 1641 | // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z. | |||
| 1642 | // | |||
| 1643 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z | |||
| 1644 | // instructions will not be produced, as they are conditional branch | |||
| 1645 | // instructions that do not set flags. | |||
| 1646 | if (!ProduceNonFlagSettingCondBr) | |||
| 1647 | return false; | |||
| 1648 | ||||
| 1649 | MachineRegisterInfo &MRI = *MIB.getMRI(); | |||
| 1650 | MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); | |||
| 1651 | auto Pred = | |||
| 1652 | static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate()); | |||
| 1653 | Register LHS = ICmp.getOperand(2).getReg(); | |||
| 1654 | Register RHS = ICmp.getOperand(3).getReg(); | |||
| 1655 | ||||
| 1656 | // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that. | |||
| 1657 | auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI); | |||
| 1658 | MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); | |||
| 1659 | ||||
| 1660 | // When we can emit a TB(N)Z, prefer that. | |||
| 1661 | // | |||
| 1662 | // Handle non-commutative condition codes first. | |||
| 1663 | // Note that we don't want to do this when we have a G_AND because it can | |||
| 1664 | // become a tst. The tst will make the test bit in the TB(N)Z redundant. | |||
| 1665 | if (VRegAndVal && !AndInst) { | |||
| 1666 | int64_t C = VRegAndVal->Value.getSExtValue(); | |||
| 1667 | ||||
| 1668 | // When we have a greater-than comparison, we can just test if the msb is | |||
| 1669 | // zero. | |||
| 1670 | if (C == -1 && Pred == CmpInst::ICMP_SGT) { | |||
| 1671 | uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; | |||
| 1672 | emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB); | |||
| 1673 | I.eraseFromParent(); | |||
| 1674 | return true; | |||
| 1675 | } | |||
| 1676 | ||||
| 1677 | // When we have a less than comparison, we can just test if the msb is not | |||
| 1678 | // zero. | |||
| 1679 | if (C == 0 && Pred == CmpInst::ICMP_SLT) { | |||
| 1680 | uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; | |||
| 1681 | emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB); | |||
| 1682 | I.eraseFromParent(); | |||
| 1683 | return true; | |||
| 1684 | } | |||
| 1685 | ||||
| 1686 | // Inversely, if we have a signed greater-than-or-equal comparison to zero, | |||
| 1687 | // we can test if the msb is zero. | |||
| 1688 | if (C == 0 && Pred == CmpInst::ICMP_SGE) { | |||
| 1689 | uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; | |||
| 1690 | emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB); | |||
| 1691 | I.eraseFromParent(); | |||
| 1692 | return true; | |||
| 1693 | } | |||
| 1694 | } | |||
| 1695 | ||||
| 1696 | // Attempt to handle commutative condition codes. Right now, that's only | |||
| 1697 | // eq/ne. | |||
| 1698 | if (ICmpInst::isEquality(Pred)) { | |||
| 1699 | if (!VRegAndVal) { | |||
| 1700 | std::swap(RHS, LHS); | |||
| 1701 | VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI); | |||
| 1702 | AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); | |||
| 1703 | } | |||
| 1704 | ||||
| 1705 | if (VRegAndVal && VRegAndVal->Value == 0) { | |||
| 1706 | // If there's a G_AND feeding into this branch, try to fold it away by | |||
| 1707 | // emitting a TB(N)Z instead. | |||
| 1708 | // | |||
| 1709 | // Note: If we have LT, then it *is* possible to fold, but it wouldn't be | |||
| 1710 | // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding | |||
| 1711 | // would be redundant. | |||
| 1712 | if (AndInst && | |||
| 1713 | tryOptAndIntoCompareBranch( | |||
| 1714 | *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) { | |||
| 1715 | I.eraseFromParent(); | |||
| 1716 | return true; | |||
| 1717 | } | |||
| 1718 | ||||
| 1719 | // Otherwise, try to emit a CB(N)Z instead. | |||
| 1720 | auto LHSTy = MRI.getType(LHS); | |||
| 1721 | if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) { | |||
| 1722 | emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB); | |||
| 1723 | I.eraseFromParent(); | |||
| 1724 | return true; | |||
| 1725 | } | |||
| 1726 | } | |||
| 1727 | } | |||
| 1728 | ||||
| 1729 | return false; | |||
| 1730 | } | |||
| 1731 | ||||
| 1732 | bool AArch64InstructionSelector::selectCompareBranchFedByICmp( | |||
| 1733 | MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { | |||
| 1734 | assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode:: G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1734, __extension__ __PRETTY_FUNCTION__)); | |||
| 1735 | assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1735, __extension__ __PRETTY_FUNCTION__)); | |||
| 1736 | if (tryOptCompareBranchFedByICmp(I, ICmp, MIB)) | |||
| 1737 | return true; | |||
| 1738 | ||||
| 1739 | // Couldn't optimize. Emit a compare + a Bcc. | |||
| 1740 | MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); | |||
| 1741 | auto PredOp = ICmp.getOperand(1); | |||
| 1742 | emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB); | |||
| 1743 | const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( | |||
| 1744 | static_cast<CmpInst::Predicate>(PredOp.getPredicate())); | |||
| 1745 | MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); | |||
| 1746 | I.eraseFromParent(); | |||
| 1747 | return true; | |||
| 1748 | } | |||
| 1749 | ||||
| 1750 | bool AArch64InstructionSelector::selectCompareBranch( | |||
| 1751 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) { | |||
| 1752 | Register CondReg = I.getOperand(0).getReg(); | |||
| 1753 | MachineInstr *CCMI = MRI.getVRegDef(CondReg); | |||
| 1754 | // Try to select the G_BRCOND using whatever is feeding the condition if | |||
| 1755 | // possible. | |||
| 1756 | unsigned CCMIOpc = CCMI->getOpcode(); | |||
| 1757 | if (CCMIOpc == TargetOpcode::G_FCMP) | |||
| 1758 | return selectCompareBranchFedByFCmp(I, *CCMI, MIB); | |||
| 1759 | if (CCMIOpc == TargetOpcode::G_ICMP) | |||
| 1760 | return selectCompareBranchFedByICmp(I, *CCMI, MIB); | |||
| 1761 | ||||
| 1762 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z | |||
| 1763 | // instructions will not be produced, as they are conditional branch | |||
| 1764 | // instructions that do not set flags. | |||
| 1765 | if (ProduceNonFlagSettingCondBr) { | |||
| 1766 | emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true, | |||
| 1767 | I.getOperand(1).getMBB(), MIB); | |||
| 1768 | I.eraseFromParent(); | |||
| 1769 | return true; | |||
| 1770 | } | |||
| 1771 | ||||
| 1772 | // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead. | |||
| 1773 | auto TstMI = | |||
| 1774 | MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1); | |||
| 1775 | constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); | |||
| 1776 | auto Bcc = MIB.buildInstr(AArch64::Bcc) | |||
| 1777 | .addImm(AArch64CC::EQ) | |||
| 1778 | .addMBB(I.getOperand(1).getMBB()); | |||
| 1779 | I.eraseFromParent(); | |||
| 1780 | return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI); | |||
| 1781 | } | |||
| 1782 | ||||
| 1783 | /// Returns the element immediate value of a vector shift operand if found. | |||
| 1784 | /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR. | |||
| 1785 | static std::optional<int64_t> getVectorShiftImm(Register Reg, | |||
| 1786 | MachineRegisterInfo &MRI) { | |||
| 1787 | assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")(static_cast <bool> (MRI.getType(Reg).isVector() && "Expected a *vector* shift operand") ? void (0) : __assert_fail ("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1787, __extension__ __PRETTY_FUNCTION__)); | |||
| 1788 | MachineInstr *OpMI = MRI.getVRegDef(Reg); | |||
| 1789 | return getAArch64VectorSplatScalar(*OpMI, MRI); | |||
| 1790 | } | |||
| 1791 | ||||
| 1792 | /// Matches and returns the shift immediate value for a SHL instruction given | |||
| 1793 | /// a shift operand. | |||
| 1794 | static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, | |||
| 1795 | MachineRegisterInfo &MRI) { | |||
| 1796 | std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI); | |||
| 1797 | if (!ShiftImm) | |||
| 1798 | return std::nullopt; | |||
| 1799 | // Check the immediate is in range for a SHL. | |||
| 1800 | int64_t Imm = *ShiftImm; | |||
| 1801 | if (Imm < 0) | |||
| 1802 | return std::nullopt; | |||
| 1803 | switch (SrcTy.getElementType().getSizeInBits()) { | |||
| 1804 | default: | |||
| 1805 | LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift" ; } } while (false); | |||
| 1806 | return std::nullopt; | |||
| 1807 | case 8: | |||
| 1808 | if (Imm > 7) | |||
| 1809 | return std::nullopt; | |||
| 1810 | break; | |||
| 1811 | case 16: | |||
| 1812 | if (Imm > 15) | |||
| 1813 | return std::nullopt; | |||
| 1814 | break; | |||
| 1815 | case 32: | |||
| 1816 | if (Imm > 31) | |||
| 1817 | return std::nullopt; | |||
| 1818 | break; | |||
| 1819 | case 64: | |||
| 1820 | if (Imm > 63) | |||
| 1821 | return std::nullopt; | |||
| 1822 | break; | |||
| 1823 | } | |||
| 1824 | return Imm; | |||
| 1825 | } | |||
| 1826 | ||||
| 1827 | bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I, | |||
| 1828 | MachineRegisterInfo &MRI) { | |||
| 1829 | assert(I.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1829, __extension__ __PRETTY_FUNCTION__)); | |||
| 1830 | Register DstReg = I.getOperand(0).getReg(); | |||
| 1831 | const LLT Ty = MRI.getType(DstReg); | |||
| 1832 | Register Src1Reg = I.getOperand(1).getReg(); | |||
| 1833 | Register Src2Reg = I.getOperand(2).getReg(); | |||
| 1834 | ||||
| 1835 | if (!Ty.isVector()) | |||
| 1836 | return false; | |||
| 1837 | ||||
| 1838 | // Check if we have a vector of constants on RHS that we can select as the | |||
| 1839 | // immediate form. | |||
| 1840 | std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI); | |||
| 1841 | ||||
| 1842 | unsigned Opc = 0; | |||
| 1843 | if (Ty == LLT::fixed_vector(2, 64)) { | |||
| 1844 | Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64; | |||
| 1845 | } else if (Ty == LLT::fixed_vector(4, 32)) { | |||
| 1846 | Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32; | |||
| 1847 | } else if (Ty == LLT::fixed_vector(2, 32)) { | |||
| 1848 | Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32; | |||
| 1849 | } else if (Ty == LLT::fixed_vector(4, 16)) { | |||
| 1850 | Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16; | |||
| 1851 | } else if (Ty == LLT::fixed_vector(8, 16)) { | |||
| 1852 | Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; | |||
| 1853 | } else if (Ty == LLT::fixed_vector(16, 8)) { | |||
| 1854 | Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; | |||
| 1855 | } else if (Ty == LLT::fixed_vector(8, 8)) { | |||
| 1856 | Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; | |||
| 1857 | } else { | |||
| 1858 | LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; } } while (false); | |||
| 1859 | return false; | |||
| 1860 | } | |||
| 1861 | ||||
| 1862 | auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg}); | |||
| 1863 | if (ImmVal) | |||
| 1864 | Shl.addImm(*ImmVal); | |||
| 1865 | else | |||
| 1866 | Shl.addUse(Src2Reg); | |||
| 1867 | constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI); | |||
| 1868 | I.eraseFromParent(); | |||
| 1869 | return true; | |||
| 1870 | } | |||
| 1871 | ||||
| 1872 | bool AArch64InstructionSelector::selectVectorAshrLshr( | |||
| 1873 | MachineInstr &I, MachineRegisterInfo &MRI) { | |||
| 1874 | assert(I.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1875, __extension__ __PRETTY_FUNCTION__)) | |||
| 1875 | I.getOpcode() == TargetOpcode::G_LSHR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1875, __extension__ __PRETTY_FUNCTION__)); | |||
| 1876 | Register DstReg = I.getOperand(0).getReg(); | |||
| 1877 | const LLT Ty = MRI.getType(DstReg); | |||
| 1878 | Register Src1Reg = I.getOperand(1).getReg(); | |||
| 1879 | Register Src2Reg = I.getOperand(2).getReg(); | |||
| 1880 | ||||
| 1881 | if (!Ty.isVector()) | |||
| 1882 | return false; | |||
| 1883 | ||||
| 1884 | bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR; | |||
| 1885 | ||||
| 1886 | // We expect the immediate case to be lowered in the PostLegalCombiner to | |||
| 1887 | // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents. | |||
| 1888 | ||||
| 1889 | // There is not a shift right register instruction, but the shift left | |||
| 1890 | // register instruction takes a signed value, where negative numbers specify a | |||
| 1891 | // right shift. | |||
| 1892 | ||||
| 1893 | unsigned Opc = 0; | |||
| 1894 | unsigned NegOpc = 0; | |||
| 1895 | const TargetRegisterClass *RC = | |||
| 1896 | getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID)); | |||
| 1897 | if (Ty == LLT::fixed_vector(2, 64)) { | |||
| 1898 | Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64; | |||
| 1899 | NegOpc = AArch64::NEGv2i64; | |||
| 1900 | } else if (Ty == LLT::fixed_vector(4, 32)) { | |||
| 1901 | Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32; | |||
| 1902 | NegOpc = AArch64::NEGv4i32; | |||
| 1903 | } else if (Ty == LLT::fixed_vector(2, 32)) { | |||
| 1904 | Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32; | |||
| 1905 | NegOpc = AArch64::NEGv2i32; | |||
| 1906 | } else if (Ty == LLT::fixed_vector(4, 16)) { | |||
| 1907 | Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16; | |||
| 1908 | NegOpc = AArch64::NEGv4i16; | |||
| 1909 | } else if (Ty == LLT::fixed_vector(8, 16)) { | |||
| 1910 | Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16; | |||
| 1911 | NegOpc = AArch64::NEGv8i16; | |||
| 1912 | } else if (Ty == LLT::fixed_vector(16, 8)) { | |||
| 1913 | Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; | |||
| 1914 | NegOpc = AArch64::NEGv16i8; | |||
| 1915 | } else if (Ty == LLT::fixed_vector(8, 8)) { | |||
| 1916 | Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; | |||
| 1917 | NegOpc = AArch64::NEGv8i8; | |||
| 1918 | } else { | |||
| 1919 | LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; } } while (false); | |||
| 1920 | return false; | |||
| 1921 | } | |||
| 1922 | ||||
| 1923 | auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg}); | |||
| 1924 | constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI); | |||
| 1925 | auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg}); | |||
| 1926 | constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI); | |||
| 1927 | I.eraseFromParent(); | |||
| 1928 | return true; | |||
| 1929 | } | |||
| 1930 | ||||
| 1931 | bool AArch64InstructionSelector::selectVaStartAAPCS( | |||
| 1932 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { | |||
| 1933 | return false; | |||
| 1934 | } | |||
| 1935 | ||||
| 1936 | bool AArch64InstructionSelector::selectVaStartDarwin( | |||
| 1937 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { | |||
| 1938 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); | |||
| 1939 | Register ListReg = I.getOperand(0).getReg(); | |||
| 1940 | ||||
| 1941 | Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); | |||
| 1942 | ||||
| 1943 | int FrameIdx = FuncInfo->getVarArgsStackIndex(); | |||
| 1944 | if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64( | |||
| 1945 | MF.getFunction().getCallingConv())) { | |||
| 1946 | FrameIdx = FuncInfo->getVarArgsGPRSize() > 0 | |||
| 1947 | ? FuncInfo->getVarArgsGPRIndex() | |||
| 1948 | : FuncInfo->getVarArgsStackIndex(); | |||
| 1949 | } | |||
| 1950 | ||||
| 1951 | auto MIB = | |||
| 1952 | BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri)) | |||
| 1953 | .addDef(ArgsAddrReg) | |||
| 1954 | .addFrameIndex(FrameIdx) | |||
| 1955 | .addImm(0) | |||
| 1956 | .addImm(0); | |||
| 1957 | ||||
| 1958 | constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); | |||
| 1959 | ||||
| 1960 | MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui)) | |||
| 1961 | .addUse(ArgsAddrReg) | |||
| 1962 | .addUse(ListReg) | |||
| 1963 | .addImm(0) | |||
| 1964 | .addMemOperand(*I.memoperands_begin()); | |||
| 1965 | ||||
| 1966 | constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); | |||
| 1967 | I.eraseFromParent(); | |||
| 1968 | return true; | |||
| 1969 | } | |||
| 1970 | ||||
| 1971 | void AArch64InstructionSelector::materializeLargeCMVal( | |||
| 1972 | MachineInstr &I, const Value *V, unsigned OpFlags) { | |||
| 1973 | MachineBasicBlock &MBB = *I.getParent(); | |||
| 1974 | MachineFunction &MF = *MBB.getParent(); | |||
| 1975 | MachineRegisterInfo &MRI = MF.getRegInfo(); | |||
| 1976 | ||||
| 1977 | auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {}); | |||
| 1978 | MovZ->addOperand(MF, I.getOperand(1)); | |||
| 1979 | MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | | |||
| 1980 | AArch64II::MO_NC); | |||
| 1981 | MovZ->addOperand(MF, MachineOperand::CreateImm(0)); | |||
| 1982 | constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); | |||
| 1983 | ||||
| 1984 | auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset, | |||
| 1985 | Register ForceDstReg) { | |||
| 1986 | Register DstReg = ForceDstReg | |||
| 1987 | ? ForceDstReg | |||
| 1988 | : MRI.createVirtualRegister(&AArch64::GPR64RegClass); | |||
| 1989 | auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg); | |||
| 1990 | if (auto *GV = dyn_cast<GlobalValue>(V)) { | |||
| 1991 | MovI->addOperand(MF, MachineOperand::CreateGA( | |||
| 1992 | GV, MovZ->getOperand(1).getOffset(), Flags)); | |||
| 1993 | } else { | |||
| 1994 | MovI->addOperand( | |||
| 1995 | MF, MachineOperand::CreateBA(cast<BlockAddress>(V), | |||
| 1996 | MovZ->getOperand(1).getOffset(), Flags)); | |||
| 1997 | } | |||
| 1998 | MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); | |||
| 1999 | constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); | |||
| 2000 | return DstReg; | |||
| 2001 | }; | |||
| 2002 | Register DstReg = BuildMovK(MovZ.getReg(0), | |||
| 2003 | AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); | |||
| 2004 | DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); | |||
| 2005 | BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); | |||
| 2006 | } | |||
| 2007 | ||||
| 2008 | bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { | |||
| 2009 | MachineBasicBlock &MBB = *I.getParent(); | |||
| 2010 | MachineFunction &MF = *MBB.getParent(); | |||
| 2011 | MachineRegisterInfo &MRI = MF.getRegInfo(); | |||
| 2012 | ||||
| 2013 | switch (I.getOpcode()) { | |||
| 2014 | case TargetOpcode::G_STORE: { | |||
| 2015 | bool Changed = contractCrossBankCopyIntoStore(I, MRI); | |||
| 2016 | MachineOperand &SrcOp = I.getOperand(0); | |||
| 2017 | if (MRI.getType(SrcOp.getReg()).isPointer()) { | |||
| 2018 | // Allow matching with imported patterns for stores of pointers. Unlike | |||
| 2019 | // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy | |||
| 2020 | // and constrain. | |||
| 2021 | auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp); | |||
| 2022 | Register NewSrc = Copy.getReg(0); | |||
| 2023 | SrcOp.setReg(NewSrc); | |||
| 2024 | RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI); | |||
| 2025 | Changed = true; | |||
| 2026 | } | |||
| 2027 | return Changed; | |||
| 2028 | } | |||
| 2029 | case TargetOpcode::G_PTR_ADD: | |||
| 2030 | return convertPtrAddToAdd(I, MRI); | |||
| 2031 | case TargetOpcode::G_LOAD: { | |||
| 2032 | // For scalar loads of pointers, we try to convert the dest type from p0 | |||
| 2033 | // to s64 so that our imported patterns can match. Like with the G_PTR_ADD | |||
| 2034 | // conversion, this should be ok because all users should have been | |||
| 2035 | // selected already, so the type doesn't matter for them. | |||
| 2036 | Register DstReg = I.getOperand(0).getReg(); | |||
| 2037 | const LLT DstTy = MRI.getType(DstReg); | |||
| 2038 | if (!DstTy.isPointer()) | |||
| 2039 | return false; | |||
| 2040 | MRI.setType(DstReg, LLT::scalar(64)); | |||
| 2041 | return true; | |||
| 2042 | } | |||
| 2043 | case AArch64::G_DUP: { | |||
| 2044 | // Convert the type from p0 to s64 to help selection. | |||
| 2045 | LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | |||
| 2046 | if (!DstTy.getElementType().isPointer()) | |||
| 2047 | return false; | |||
| 2048 | auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg()); | |||
| 2049 | MRI.setType(I.getOperand(0).getReg(), | |||
| 2050 | DstTy.changeElementType(LLT::scalar(64))); | |||
| 2051 | MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass); | |||
| 2052 | I.getOperand(1).setReg(NewSrc.getReg(0)); | |||
| 2053 | return true; | |||
| 2054 | } | |||
| 2055 | case TargetOpcode::G_UITOFP: | |||
| 2056 | case TargetOpcode::G_SITOFP: { | |||
| 2057 | // If both source and destination regbanks are FPR, then convert the opcode | |||
| 2058 | // to G_SITOF so that the importer can select it to an fpr variant. | |||
| 2059 | // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank | |||
| 2060 | // copy. | |||
| 2061 | Register SrcReg = I.getOperand(1).getReg(); | |||
| 2062 | LLT SrcTy = MRI.getType(SrcReg); | |||
| 2063 | LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | |||
| 2064 | if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits()) | |||
| 2065 | return false; | |||
| 2066 | ||||
| 2067 | if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) { | |||
| 2068 | if (I.getOpcode() == TargetOpcode::G_SITOFP) | |||
| 2069 | I.setDesc(TII.get(AArch64::G_SITOF)); | |||
| 2070 | else | |||
| 2071 | I.setDesc(TII.get(AArch64::G_UITOF)); | |||
| 2072 | return true; | |||
| 2073 | } | |||
| 2074 | return false; | |||
| 2075 | } | |||
| 2076 | default: | |||
| 2077 | return false; | |||
| 2078 | } | |||
| 2079 | } | |||
| 2080 | ||||
| 2081 | /// This lowering tries to look for G_PTR_ADD instructions and then converts | |||
| 2082 | /// them to a standard G_ADD with a COPY on the source. | |||
| 2083 | /// | |||
| 2084 | /// The motivation behind this is to expose the add semantics to the imported | |||
| 2085 | /// tablegen patterns. We shouldn't need to check for uses being loads/stores, | |||
| 2086 | /// because the selector works bottom up, uses before defs. By the time we | |||
| 2087 | /// end up trying to select a G_PTR_ADD, we should have already attempted to | |||
| 2088 | /// fold this into addressing modes and were therefore unsuccessful. | |||
| 2089 | bool AArch64InstructionSelector::convertPtrAddToAdd( | |||
| 2090 | MachineInstr &I, MachineRegisterInfo &MRI) { | |||
| 2091 | assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2091, __extension__ __PRETTY_FUNCTION__)); | |||
| 2092 | Register DstReg = I.getOperand(0).getReg(); | |||
| 2093 | Register AddOp1Reg = I.getOperand(1).getReg(); | |||
| 2094 | const LLT PtrTy = MRI.getType(DstReg); | |||
| 2095 | if (PtrTy.getAddressSpace() != 0) | |||
| 2096 | return false; | |||
| 2097 | ||||
| 2098 | const LLT CastPtrTy = | |||
| 2099 | PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64); | |||
| 2100 | auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg); | |||
| 2101 | // Set regbanks on the registers. | |||
| 2102 | if (PtrTy.isVector()) | |||
| 2103 | MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID)); | |||
| 2104 | else | |||
| 2105 | MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); | |||
| 2106 | ||||
| 2107 | // Now turn the %dst(p0) = G_PTR_ADD %base, off into: | |||
| 2108 | // %dst(intty) = G_ADD %intbase, off | |||
| 2109 | I.setDesc(TII.get(TargetOpcode::G_ADD)); | |||
| 2110 | MRI.setType(DstReg, CastPtrTy); | |||
| 2111 | I.getOperand(1).setReg(PtrToInt.getReg(0)); | |||
| 2112 | if (!select(*PtrToInt)) { | |||
| 2113 | LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd" ; } } while (false); | |||
| 2114 | return false; | |||
| 2115 | } | |||
| 2116 | ||||
| 2117 | // Also take the opportunity here to try to do some optimization. | |||
| 2118 | // Try to convert this into a G_SUB if the offset is a 0-x negate idiom. | |||
| 2119 | Register NegatedReg; | |||
| 2120 | if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg)))) | |||
| 2121 | return true; | |||
| 2122 | I.getOperand(2).setReg(NegatedReg); | |||
| 2123 | I.setDesc(TII.get(TargetOpcode::G_SUB)); | |||
| 2124 | return true; | |||
| 2125 | } | |||
| 2126 | ||||
| 2127 | bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I, | |||
| 2128 | MachineRegisterInfo &MRI) { | |||
| 2129 | // We try to match the immediate variant of LSL, which is actually an alias | |||
| 2130 | // for a special case of UBFM. Otherwise, we fall back to the imported | |||
| 2131 | // selector which will match the register variant. | |||
| 2132 | assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL && "unexpected op") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2132, __extension__ __PRETTY_FUNCTION__)); | |||
| 2133 | const auto &MO = I.getOperand(2); | |||
| 2134 | auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI); | |||
| 2135 | if (!VRegAndVal) | |||
| 2136 | return false; | |||
| 2137 | ||||
| 2138 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | |||
| 2139 | if (DstTy.isVector()) | |||
| 2140 | return false; | |||
| 2141 | bool Is64Bit = DstTy.getSizeInBits() == 64; | |||
| 2142 | auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO); | |||
| 2143 | auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO); | |||
| 2144 | ||||
| 2145 | if (!Imm1Fn || !Imm2Fn) | |||
| 2146 | return false; | |||
| 2147 | ||||
| 2148 | auto NewI = | |||
| 2149 | MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri, | |||
| 2150 | {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()}); | |||
| 2151 | ||||
| 2152 | for (auto &RenderFn : *Imm1Fn) | |||
| 2153 | RenderFn(NewI); | |||
| 2154 | for (auto &RenderFn : *Imm2Fn) | |||
| 2155 | RenderFn(NewI); | |||
| 2156 | ||||
| 2157 | I.eraseFromParent(); | |||
| 2158 | return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); | |||
| 2159 | } | |||
| 2160 | ||||
| 2161 | bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( | |||
| 2162 | MachineInstr &I, MachineRegisterInfo &MRI) { | |||
| 2163 | assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2163, __extension__ __PRETTY_FUNCTION__)); | |||
| 2164 | // If we're storing a scalar, it doesn't matter what register bank that | |||
| 2165 | // scalar is on. All that matters is the size. | |||
| 2166 | // | |||
| 2167 | // So, if we see something like this (with a 32-bit scalar as an example): | |||
| 2168 | // | |||
| 2169 | // %x:gpr(s32) = ... something ... | |||
| 2170 | // %y:fpr(s32) = COPY %x:gpr(s32) | |||
| 2171 | // G_STORE %y:fpr(s32) | |||
| 2172 | // | |||
| 2173 | // We can fix this up into something like this: | |||
| 2174 | // | |||
| 2175 | // G_STORE %x:gpr(s32) | |||
| 2176 | // | |||
| 2177 | // And then continue the selection process normally. | |||
| 2178 | Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI); | |||
| 2179 | if (!DefDstReg.isValid()) | |||
| 2180 | return false; | |||
| 2181 | LLT DefDstTy = MRI.getType(DefDstReg); | |||
| 2182 | Register StoreSrcReg = I.getOperand(0).getReg(); | |||
| 2183 | LLT StoreSrcTy = MRI.getType(StoreSrcReg); | |||
| 2184 | ||||
| 2185 | // If we get something strange like a physical register, then we shouldn't | |||
| 2186 | // go any further. | |||
| 2187 | if (!DefDstTy.isValid()) | |||
| 2188 | return false; | |||
| 2189 | ||||
| 2190 | // Are the source and dst types the same size? | |||
| 2191 | if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) | |||
| 2192 | return false; | |||
| 2193 | ||||
| 2194 | if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == | |||
| 2195 | RBI.getRegBank(DefDstReg, MRI, TRI)) | |||
| 2196 | return false; | |||
| 2197 | ||||
| 2198 | // We have a cross-bank copy, which is entering a store. Let's fold it. | |||
| 2199 | I.getOperand(0).setReg(DefDstReg); | |||
| 2200 | return true; | |||
| 2201 | } | |||
| 2202 | ||||
| 2203 | bool AArch64InstructionSelector::earlySelect(MachineInstr &I) { | |||
| 2204 | assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!" ) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2204, __extension__ __PRETTY_FUNCTION__)); | |||
| 2205 | assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() && "Instruction should be in a function!") ? void (0) : __assert_fail ("I.getParent()->getParent() && \"Instruction should be in a function!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2205, __extension__ __PRETTY_FUNCTION__)); | |||
| 2206 | ||||
| 2207 | MachineBasicBlock &MBB = *I.getParent(); | |||
| 2208 | MachineFunction &MF = *MBB.getParent(); | |||
| 2209 | MachineRegisterInfo &MRI = MF.getRegInfo(); | |||
| 2210 | ||||
| 2211 | switch (I.getOpcode()) { | |||
| 2212 | case AArch64::G_DUP: { | |||
| 2213 | // Before selecting a DUP instruction, check if it is better selected as a | |||
| 2214 | // MOV or load from a constant pool. | |||
| 2215 | Register Src = I.getOperand(1).getReg(); | |||
| 2216 | auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI); | |||
| 2217 | if (!ValAndVReg) | |||
| 2218 | return false; | |||
| 2219 | LLVMContext &Ctx = MF.getFunction().getContext(); | |||
| 2220 | Register Dst = I.getOperand(0).getReg(); | |||
| 2221 | auto *CV = ConstantDataVector::getSplat( | |||
| 2222 | MRI.getType(Dst).getNumElements(), | |||
| 2223 | ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()), | |||
| 2224 | ValAndVReg->Value)); | |||
| 2225 | if (!emitConstantVector(Dst, CV, MIB, MRI)) | |||
| 2226 | return false; | |||
| 2227 | I.eraseFromParent(); | |||
| 2228 | return true; | |||
| 2229 | } | |||
| 2230 | case TargetOpcode::G_SEXT: | |||
| 2231 | // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV | |||
| 2232 | // over a normal extend. | |||
| 2233 | if (selectUSMovFromExtend(I, MRI)) | |||
| 2234 | return true; | |||
| 2235 | return false; | |||
| 2236 | case TargetOpcode::G_BR: | |||
| 2237 | return false; | |||
| 2238 | case TargetOpcode::G_SHL: | |||
| 2239 | return earlySelectSHL(I, MRI); | |||
| 2240 | case TargetOpcode::G_CONSTANT: { | |||
| 2241 | bool IsZero = false; | |||
| 2242 | if (I.getOperand(1).isCImm()) | |||
| 2243 | IsZero = I.getOperand(1).getCImm()->isZero(); | |||
| 2244 | else if (I.getOperand(1).isImm()) | |||
| 2245 | IsZero = I.getOperand(1).getImm() == 0; | |||
| 2246 | ||||
| 2247 | if (!IsZero) | |||
| 2248 | return false; | |||
| 2249 | ||||
| 2250 | Register DefReg = I.getOperand(0).getReg(); | |||
| 2251 | LLT Ty = MRI.getType(DefReg); | |||
| 2252 | if (Ty.getSizeInBits() == 64) { | |||
| 2253 | I.getOperand(1).ChangeToRegister(AArch64::XZR, false); | |||
| 2254 | RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); | |||
| 2255 | } else if (Ty.getSizeInBits() == 32) { | |||
| 2256 | I.getOperand(1).ChangeToRegister(AArch64::WZR, false); | |||
| 2257 | RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI); | |||
| 2258 | } else | |||
| 2259 | return false; | |||
| 2260 | ||||
| 2261 | I.setDesc(TII.get(TargetOpcode::COPY)); | |||
| 2262 | return true; | |||
| 2263 | } | |||
| 2264 | ||||
| 2265 | case TargetOpcode::G_ADD: { | |||
| 2266 | // Check if this is being fed by a G_ICMP on either side. | |||
| 2267 | // | |||
| 2268 | // (cmp pred, x, y) + z | |||
| 2269 | // | |||
| 2270 | // In the above case, when the cmp is true, we increment z by 1. So, we can | |||
| 2271 | // fold the add into the cset for the cmp by using cinc. | |||
| 2272 | // | |||
| 2273 | // FIXME: This would probably be a lot nicer in PostLegalizerLowering. | |||
| 2274 | Register AddDst = I.getOperand(0).getReg(); | |||
| 2275 | Register AddLHS = I.getOperand(1).getReg(); | |||
| 2276 | Register AddRHS = I.getOperand(2).getReg(); | |||
| 2277 | // Only handle scalars. | |||
| 2278 | LLT Ty = MRI.getType(AddLHS); | |||
| 2279 | if (Ty.isVector()) | |||
| 2280 | return false; | |||
| 2281 | // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64 | |||
| 2282 | // bits. | |||
| 2283 | unsigned Size = Ty.getSizeInBits(); | |||
| 2284 | if (Size != 32 && Size != 64) | |||
| 2285 | return false; | |||
| 2286 | auto MatchCmp = [&](Register Reg) -> MachineInstr * { | |||
| 2287 | if (!MRI.hasOneNonDBGUse(Reg)) | |||
| 2288 | return nullptr; | |||
| 2289 | // If the LHS of the add is 32 bits, then we want to fold a 32-bit | |||
| 2290 | // compare. | |||
| 2291 | if (Size == 32) | |||
| 2292 | return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI); | |||
| 2293 | // We model scalar compares using 32-bit destinations right now. | |||
| 2294 | // If it's a 64-bit compare, it'll have 64-bit sources. | |||
| 2295 | Register ZExt; | |||
| 2296 | if (!mi_match(Reg, MRI, | |||
| 2297 | m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt)))))) | |||
| 2298 | return nullptr; | |||
| 2299 | auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI); | |||
| 2300 | if (!Cmp || | |||
| 2301 | MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64) | |||
| 2302 | return nullptr; | |||
| 2303 | return Cmp; | |||
| 2304 | }; | |||
| 2305 | // Try to match | |||
| 2306 | // z + (cmp pred, x, y) | |||
| 2307 | MachineInstr *Cmp = MatchCmp(AddRHS); | |||
| 2308 | if (!Cmp) { | |||
| 2309 | // (cmp pred, x, y) + z | |||
| 2310 | std::swap(AddLHS, AddRHS); | |||
| 2311 | Cmp = MatchCmp(AddRHS); | |||
| 2312 | if (!Cmp) | |||
| 2313 | return false; | |||
| 2314 | } | |||
| 2315 | auto &PredOp = Cmp->getOperand(1); | |||
| 2316 | auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate()); | |||
| 2317 | const AArch64CC::CondCode InvCC = | |||
| 2318 | changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred)); | |||
| 2319 | MIB.setInstrAndDebugLoc(I); | |||
| 2320 | emitIntegerCompare(/*LHS=*/Cmp->getOperand(2), | |||
| 2321 | /*RHS=*/Cmp->getOperand(3), PredOp, MIB); | |||
| 2322 | emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB); | |||
| 2323 | I.eraseFromParent(); | |||
| 2324 | return true; | |||
| 2325 | } | |||
| 2326 | case TargetOpcode::G_OR: { | |||
| 2327 | // Look for operations that take the lower `Width=Size-ShiftImm` bits of | |||
| 2328 | // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via | |||
| 2329 | // shifting and masking that we can replace with a BFI (encoded as a BFM). | |||
| 2330 | Register Dst = I.getOperand(0).getReg(); | |||
| 2331 | LLT Ty = MRI.getType(Dst); | |||
| 2332 | ||||
| 2333 | if (!Ty.isScalar()) | |||
| 2334 | return false; | |||
| 2335 | ||||
| 2336 | unsigned Size = Ty.getSizeInBits(); | |||
| 2337 | if (Size != 32 && Size != 64) | |||
| 2338 | return false; | |||
| 2339 | ||||
| 2340 | Register ShiftSrc; | |||
| 2341 | int64_t ShiftImm; | |||
| 2342 | Register MaskSrc; | |||
| 2343 | int64_t MaskImm; | |||
| 2344 | if (!mi_match( | |||
| 2345 | Dst, MRI, | |||
| 2346 | m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))), | |||
| 2347 | m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm)))))) | |||
| 2348 | return false; | |||
| 2349 | ||||
| 2350 | if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm)) | |||
| 2351 | return false; | |||
| 2352 | ||||
| 2353 | int64_t Immr = Size - ShiftImm; | |||
| 2354 | int64_t Imms = Size - ShiftImm - 1; | |||
| 2355 | unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri; | |||
| 2356 | emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB); | |||
| 2357 | I.eraseFromParent(); | |||
| 2358 | return true; | |||
| 2359 | } | |||
| 2360 | case TargetOpcode::G_FENCE: { | |||
| 2361 | if (I.getOperand(1).getImm() == 0) | |||
| 2362 | BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER)); | |||
| 2363 | else | |||
| 2364 | BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB)) | |||
| 2365 | .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb); | |||
| 2366 | I.eraseFromParent(); | |||
| 2367 | return true; | |||
| 2368 | } | |||
| 2369 | default: | |||
| 2370 | return false; | |||
| 2371 | } | |||
| 2372 | } | |||
| 2373 | ||||
| 2374 | bool AArch64InstructionSelector::select(MachineInstr &I) { | |||
| 2375 | assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!" ) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2375, __extension__ __PRETTY_FUNCTION__)); | |||
| 2376 | assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() && "Instruction should be in a function!") ? void (0) : __assert_fail ("I.getParent()->getParent() && \"Instruction should be in a function!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2376, __extension__ __PRETTY_FUNCTION__)); | |||
| 2377 | ||||
| 2378 | MachineBasicBlock &MBB = *I.getParent(); | |||
| 2379 | MachineFunction &MF = *MBB.getParent(); | |||
| 2380 | MachineRegisterInfo &MRI = MF.getRegInfo(); | |||
| 2381 | ||||
| 2382 | const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>(); | |||
| 2383 | if (Subtarget->requiresStrictAlign()) { | |||
| 2384 | // We don't support this feature yet. | |||
| 2385 | LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n" ; } } while (false); | |||
| 2386 | return false; | |||
| 2387 | } | |||
| 2388 | ||||
| 2389 | MIB.setInstrAndDebugLoc(I); | |||
| 2390 | ||||
| 2391 | unsigned Opcode = I.getOpcode(); | |||
| 2392 | // G_PHI requires same handling as PHI | |||
| 2393 | if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { | |||
| 2394 | // Certain non-generic instructions also need some special handling. | |||
| 2395 | ||||
| 2396 | if (Opcode == TargetOpcode::LOAD_STACK_GUARD) | |||
| 2397 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 2398 | ||||
| 2399 | if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { | |||
| 2400 | const Register DefReg = I.getOperand(0).getReg(); | |||
| 2401 | const LLT DefTy = MRI.getType(DefReg); | |||
| 2402 | ||||
| 2403 | const RegClassOrRegBank &RegClassOrBank = | |||
| 2404 | MRI.getRegClassOrRegBank(DefReg); | |||
| 2405 | ||||
| 2406 | const TargetRegisterClass *DefRC | |||
| 2407 | = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); | |||
| 2408 | if (!DefRC) { | |||
| 2409 | if (!DefTy.isValid()) { | |||
| 2410 | LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n" ; } } while (false); | |||
| 2411 | return false; | |||
| 2412 | } | |||
| 2413 | const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); | |||
| 2414 | DefRC = getRegClassForTypeOnBank(DefTy, RB); | |||
| 2415 | if (!DefRC) { | |||
| 2416 | LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n" ; } } while (false); | |||
| 2417 | return false; | |||
| 2418 | } | |||
| 2419 | } | |||
| 2420 | ||||
| 2421 | I.setDesc(TII.get(TargetOpcode::PHI)); | |||
| 2422 | ||||
| 2423 | return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); | |||
| 2424 | } | |||
| 2425 | ||||
| 2426 | if (I.isCopy()) | |||
| 2427 | return selectCopy(I, TII, MRI, TRI, RBI); | |||
| 2428 | ||||
| 2429 | if (I.isDebugInstr()) | |||
| 2430 | return selectDebugInstr(I, MRI, RBI); | |||
| 2431 | ||||
| 2432 | return true; | |||
| 2433 | } | |||
| 2434 | ||||
| 2435 | ||||
| 2436 | if (I.getNumOperands() != I.getNumExplicitOperands()) { | |||
| 2437 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n" ; } } while (false) | |||
| 2438 | dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n" ; } } while (false); | |||
| 2439 | return false; | |||
| 2440 | } | |||
| 2441 | ||||
| 2442 | // Try to do some lowering before we start instruction selecting. These | |||
| 2443 | // lowerings are purely transformations on the input G_MIR and so selection | |||
| 2444 | // must continue after any modification of the instruction. | |||
| 2445 | if (preISelLower(I)) { | |||
| 2446 | Opcode = I.getOpcode(); // The opcode may have been modified, refresh it. | |||
| 2447 | } | |||
| 2448 | ||||
| 2449 | // There may be patterns where the importer can't deal with them optimally, | |||
| 2450 | // but does select it to a suboptimal sequence so our custom C++ selection | |||
| 2451 | // code later never has a chance to work on it. Therefore, we have an early | |||
| 2452 | // selection attempt here to give priority to certain selection routines | |||
| 2453 | // over the imported ones. | |||
| 2454 | if (earlySelect(I)) | |||
| 2455 | return true; | |||
| 2456 | ||||
| 2457 | if (selectImpl(I, *CoverageInfo)) | |||
| 2458 | return true; | |||
| 2459 | ||||
| 2460 | LLT Ty = | |||
| 2461 | I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{}; | |||
| 2462 | ||||
| 2463 | switch (Opcode) { | |||
| 2464 | case TargetOpcode::G_SBFX: | |||
| 2465 | case TargetOpcode::G_UBFX: { | |||
| 2466 | static const unsigned OpcTable[2][2] = { | |||
| 2467 | {AArch64::UBFMWri, AArch64::UBFMXri}, | |||
| 2468 | {AArch64::SBFMWri, AArch64::SBFMXri}}; | |||
| 2469 | bool IsSigned = Opcode == TargetOpcode::G_SBFX; | |||
| 2470 | unsigned Size = Ty.getSizeInBits(); | |||
| 2471 | unsigned Opc = OpcTable[IsSigned][Size == 64]; | |||
| 2472 | auto Cst1 = | |||
| 2473 | getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI); | |||
| 2474 | assert(Cst1 && "Should have gotten a constant for src 1?")(static_cast <bool> (Cst1 && "Should have gotten a constant for src 1?" ) ? void (0) : __assert_fail ("Cst1 && \"Should have gotten a constant for src 1?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2474, __extension__ __PRETTY_FUNCTION__)); | |||
| 2475 | auto Cst2 = | |||
| 2476 | getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI); | |||
| 2477 | assert(Cst2 && "Should have gotten a constant for src 2?")(static_cast <bool> (Cst2 && "Should have gotten a constant for src 2?" ) ? void (0) : __assert_fail ("Cst2 && \"Should have gotten a constant for src 2?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2477, __extension__ __PRETTY_FUNCTION__)); | |||
| 2478 | auto LSB = Cst1->Value.getZExtValue(); | |||
| 2479 | auto Width = Cst2->Value.getZExtValue(); | |||
| 2480 | auto BitfieldInst = | |||
| 2481 | MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)}) | |||
| 2482 | .addImm(LSB) | |||
| 2483 | .addImm(LSB + Width - 1); | |||
| 2484 | I.eraseFromParent(); | |||
| 2485 | return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI); | |||
| 2486 | } | |||
| 2487 | case TargetOpcode::G_BRCOND: | |||
| 2488 | return selectCompareBranch(I, MF, MRI); | |||
| 2489 | ||||
| 2490 | case TargetOpcode::G_BRINDIRECT: { | |||
| 2491 | I.setDesc(TII.get(AArch64::BR)); | |||
| 2492 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 2493 | } | |||
| 2494 | ||||
| 2495 | case TargetOpcode::G_BRJT: | |||
| 2496 | return selectBrJT(I, MRI); | |||
| 2497 | ||||
| 2498 | case AArch64::G_ADD_LOW: { | |||
| 2499 | // This op may have been separated from it's ADRP companion by the localizer | |||
| 2500 | // or some other code motion pass. Given that many CPUs will try to | |||
| 2501 | // macro fuse these operations anyway, select this into a MOVaddr pseudo | |||
| 2502 | // which will later be expanded into an ADRP+ADD pair after scheduling. | |||
| 2503 | MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg()); | |||
| 2504 | if (BaseMI->getOpcode() != AArch64::ADRP) { | |||
| 2505 | I.setDesc(TII.get(AArch64::ADDXri)); | |||
| 2506 | I.addOperand(MachineOperand::CreateImm(0)); | |||
| 2507 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 2508 | } | |||
| 2509 | assert(TM.getCodeModel() == CodeModel::Small &&(static_cast <bool> (TM.getCodeModel() == CodeModel::Small && "Expected small code model") ? void (0) : __assert_fail ("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2510, __extension__ __PRETTY_FUNCTION__)) | |||
| 2510 | "Expected small code model")(static_cast <bool> (TM.getCodeModel() == CodeModel::Small && "Expected small code model") ? void (0) : __assert_fail ("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2510, __extension__ __PRETTY_FUNCTION__)); | |||
| 2511 | auto Op1 = BaseMI->getOperand(1); | |||
| 2512 | auto Op2 = I.getOperand(2); | |||
| 2513 | auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {}) | |||
| 2514 | .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(), | |||
| 2515 | Op1.getTargetFlags()) | |||
| 2516 | .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(), | |||
| 2517 | Op2.getTargetFlags()); | |||
| 2518 | I.eraseFromParent(); | |||
| 2519 | return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI); | |||
| 2520 | } | |||
| 2521 | ||||
| 2522 | case TargetOpcode::G_BSWAP: { | |||
| 2523 | // Handle vector types for G_BSWAP directly. | |||
| 2524 | Register DstReg = I.getOperand(0).getReg(); | |||
| 2525 | LLT DstTy = MRI.getType(DstReg); | |||
| 2526 | ||||
| 2527 | // We should only get vector types here; everything else is handled by the | |||
| 2528 | // importer right now. | |||
| 2529 | if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) { | |||
| 2530 | LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n" ; } } while (false); | |||
| 2531 | return false; | |||
| 2532 | } | |||
| 2533 | ||||
| 2534 | // Only handle 4 and 2 element vectors for now. | |||
| 2535 | // TODO: 16-bit elements. | |||
| 2536 | unsigned NumElts = DstTy.getNumElements(); | |||
| 2537 | if (NumElts != 4 && NumElts != 2) { | |||
| 2538 | LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n" ; } } while (false); | |||
| 2539 | return false; | |||
| 2540 | } | |||
| 2541 | ||||
| 2542 | // Choose the correct opcode for the supported types. Right now, that's | |||
| 2543 | // v2s32, v4s32, and v2s64. | |||
| 2544 | unsigned Opc = 0; | |||
| 2545 | unsigned EltSize = DstTy.getElementType().getSizeInBits(); | |||
| 2546 | if (EltSize == 32) | |||
| 2547 | Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8 | |||
| 2548 | : AArch64::REV32v16i8; | |||
| 2549 | else if (EltSize == 64) | |||
| 2550 | Opc = AArch64::REV64v16i8; | |||
| 2551 | ||||
| 2552 | // We should always get something by the time we get here... | |||
| 2553 | assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")(static_cast <bool> (Opc != 0 && "Didn't get an opcode for G_BSWAP?" ) ? void (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2553, __extension__ __PRETTY_FUNCTION__)); | |||
| 2554 | ||||
| 2555 | I.setDesc(TII.get(Opc)); | |||
| 2556 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 2557 | } | |||
| 2558 | ||||
| 2559 | case TargetOpcode::G_FCONSTANT: | |||
| 2560 | case TargetOpcode::G_CONSTANT: { | |||
| 2561 | const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; | |||
| 2562 | ||||
| 2563 | const LLT s8 = LLT::scalar(8); | |||
| 2564 | const LLT s16 = LLT::scalar(16); | |||
| 2565 | const LLT s32 = LLT::scalar(32); | |||
| 2566 | const LLT s64 = LLT::scalar(64); | |||
| 2567 | const LLT s128 = LLT::scalar(128); | |||
| 2568 | const LLT p0 = LLT::pointer(0, 64); | |||
| 2569 | ||||
| 2570 | const Register DefReg = I.getOperand(0).getReg(); | |||
| 2571 | const LLT DefTy = MRI.getType(DefReg); | |||
| 2572 | const unsigned DefSize = DefTy.getSizeInBits(); | |||
| 2573 | const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); | |||
| 2574 | ||||
| 2575 | // FIXME: Redundant check, but even less readable when factored out. | |||
| 2576 | if (isFP) { | |||
| 2577 | if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) { | |||
| 2578 | LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant, expected: " << s16 << " or " << s32 << " or " << s64 << " or " << s128 << '\n'; } } while (false) | |||
| 2579 | << " constant, expected: " << s16 << " or " << s32do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant, expected: " << s16 << " or " << s32 << " or " << s64 << " or " << s128 << '\n'; } } while (false) | |||
| 2580 | << " or " << s64 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant, expected: " << s16 << " or " << s32 << " or " << s64 << " or " << s128 << '\n'; } } while (false); | |||
| 2581 | return false; | |||
| 2582 | } | |||
| 2583 | ||||
| 2584 | if (RB.getID() != AArch64::FPRRegBankID) { | |||
| 2585 | LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant on bank: " << RB << ", expected: FPR\n"; } } while (false) | |||
| 2586 | << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant on bank: " << RB << ", expected: FPR\n"; } } while (false) | |||
| 2587 | << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant on bank: " << RB << ", expected: FPR\n"; } } while (false); | |||
| 2588 | return false; | |||
| 2589 | } | |||
| 2590 | ||||
| 2591 | // The case when we have 0.0 is covered by tablegen. Reject it here so we | |||
| 2592 | // can be sure tablegen works correctly and isn't rescued by this code. | |||
| 2593 | // 0.0 is not covered by tablegen for FP128. So we will handle this | |||
| 2594 | // scenario in the code here. | |||
| 2595 | if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0)) | |||
| 2596 | return false; | |||
| 2597 | } else { | |||
| 2598 | // s32 and s64 are covered by tablegen. | |||
| 2599 | if (Ty != p0 && Ty != s8 && Ty != s16) { | |||
| 2600 | LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant, expected: " << s32 << ", " << s64 << ", or " << p0 << '\n' ; } } while (false) | |||
| 2601 | << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant, expected: " << s32 << ", " << s64 << ", or " << p0 << '\n' ; } } while (false) | |||
| 2602 | << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant, expected: " << s32 << ", " << s64 << ", or " << p0 << '\n' ; } } while (false); | |||
| 2603 | return false; | |||
| 2604 | } | |||
| 2605 | ||||
| 2606 | if (RB.getID() != AArch64::GPRRegBankID) { | |||
| 2607 | LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant on bank: " << RB << ", expected: GPR\n"; } } while (false) | |||
| 2608 | << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant on bank: " << RB << ", expected: GPR\n"; } } while (false) | |||
| 2609 | << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant on bank: " << RB << ", expected: GPR\n"; } } while (false); | |||
| 2610 | return false; | |||
| 2611 | } | |||
| 2612 | } | |||
| 2613 | ||||
| 2614 | if (isFP) { | |||
| 2615 | const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB); | |||
| 2616 | // For 16, 64, and 128b values, emit a constant pool load. | |||
| 2617 | switch (DefSize) { | |||
| 2618 | default: | |||
| 2619 | llvm_unreachable("Unexpected destination size for G_FCONSTANT?")::llvm::llvm_unreachable_internal("Unexpected destination size for G_FCONSTANT?" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2619); | |||
| 2620 | case 32: | |||
| 2621 | // For s32, use a cp load if we have optsize/minsize. | |||
| 2622 | if (!shouldOptForSize(&MF)) | |||
| 2623 | break; | |||
| 2624 | [[fallthrough]]; | |||
| 2625 | case 16: | |||
| 2626 | case 64: | |||
| 2627 | case 128: { | |||
| 2628 | auto *FPImm = I.getOperand(1).getFPImm(); | |||
| 2629 | auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB); | |||
| 2630 | if (!LoadMI) { | |||
| 2631 | LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n" ; } } while (false); | |||
| 2632 | return false; | |||
| 2633 | } | |||
| 2634 | MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()}); | |||
| 2635 | I.eraseFromParent(); | |||
| 2636 | return RBI.constrainGenericRegister(DefReg, FPRRC, MRI); | |||
| 2637 | } | |||
| 2638 | } | |||
| 2639 | ||||
| 2640 | // Either emit a FMOV, or emit a copy to emit a normal mov. | |||
| 2641 | assert(DefSize == 32 &&(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!" ) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2642, __extension__ __PRETTY_FUNCTION__)) | |||
| 2642 | "Expected constant pool loads for all sizes other than 32!")(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!" ) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2642, __extension__ __PRETTY_FUNCTION__)); | |||
| 2643 | const Register DefGPRReg = | |||
| 2644 | MRI.createVirtualRegister(&AArch64::GPR32RegClass); | |||
| 2645 | MachineOperand &RegOp = I.getOperand(0); | |||
| 2646 | RegOp.setReg(DefGPRReg); | |||
| 2647 | MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); | |||
| 2648 | MIB.buildCopy({DefReg}, {DefGPRReg}); | |||
| 2649 | ||||
| 2650 | if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) { | |||
| 2651 | LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n" ; } } while (false); | |||
| 2652 | return false; | |||
| 2653 | } | |||
| 2654 | ||||
| 2655 | MachineOperand &ImmOp = I.getOperand(1); | |||
| 2656 | // FIXME: Is going through int64_t always correct? | |||
| 2657 | ImmOp.ChangeToImmediate( | |||
| 2658 | ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); | |||
| 2659 | } else if (I.getOperand(1).isCImm()) { | |||
| 2660 | uint64_t Val = I.getOperand(1).getCImm()->getZExtValue(); | |||
| 2661 | I.getOperand(1).ChangeToImmediate(Val); | |||
| 2662 | } else if (I.getOperand(1).isImm()) { | |||
| 2663 | uint64_t Val = I.getOperand(1).getImm(); | |||
| 2664 | I.getOperand(1).ChangeToImmediate(Val); | |||
| 2665 | } | |||
| 2666 | ||||
| 2667 | const unsigned MovOpc = | |||
| 2668 | DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm; | |||
| 2669 | I.setDesc(TII.get(MovOpc)); | |||
| 2670 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 2671 | return true; | |||
| 2672 | } | |||
| 2673 | case TargetOpcode::G_EXTRACT: { | |||
| 2674 | Register DstReg = I.getOperand(0).getReg(); | |||
| 2675 | Register SrcReg = I.getOperand(1).getReg(); | |||
| 2676 | LLT SrcTy = MRI.getType(SrcReg); | |||
| 2677 | LLT DstTy = MRI.getType(DstReg); | |||
| 2678 | (void)DstTy; | |||
| 2679 | unsigned SrcSize = SrcTy.getSizeInBits(); | |||
| 2680 | ||||
| 2681 | if (SrcTy.getSizeInBits() > 64) { | |||
| 2682 | // This should be an extract of an s128, which is like a vector extract. | |||
| 2683 | if (SrcTy.getSizeInBits() != 128) | |||
| 2684 | return false; | |||
| 2685 | // Only support extracting 64 bits from an s128 at the moment. | |||
| 2686 | if (DstTy.getSizeInBits() != 64) | |||
| 2687 | return false; | |||
| 2688 | ||||
| 2689 | unsigned Offset = I.getOperand(2).getImm(); | |||
| 2690 | if (Offset % 64 != 0) | |||
| 2691 | return false; | |||
| 2692 | ||||
| 2693 | // Check we have the right regbank always. | |||
| 2694 | const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); | |||
| 2695 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); | |||
| 2696 | assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!")(static_cast <bool> (SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!") ? void (0) : __assert_fail ("SrcRB.getID() == DstRB.getID() && \"Wrong extract regbank!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2696, __extension__ __PRETTY_FUNCTION__)); | |||
| 2697 | ||||
| 2698 | if (SrcRB.getID() == AArch64::GPRRegBankID) { | |||
| 2699 | auto NewI = | |||
| 2700 | MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) | |||
| 2701 | .addUse(SrcReg, 0, | |||
| 2702 | Offset == 0 ? AArch64::sube64 : AArch64::subo64); | |||
| 2703 | constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI, | |||
| 2704 | AArch64::GPR64RegClass, NewI->getOperand(0)); | |||
| 2705 | I.eraseFromParent(); | |||
| 2706 | return true; | |||
| 2707 | } | |||
| 2708 | ||||
| 2709 | // Emit the same code as a vector extract. | |||
| 2710 | // Offset must be a multiple of 64. | |||
| 2711 | unsigned LaneIdx = Offset / 64; | |||
| 2712 | MachineInstr *Extract = emitExtractVectorElt( | |||
| 2713 | DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB); | |||
| 2714 | if (!Extract) | |||
| 2715 | return false; | |||
| 2716 | I.eraseFromParent(); | |||
| 2717 | return true; | |||
| 2718 | } | |||
| 2719 | ||||
| 2720 | I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); | |||
| 2721 | MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() + | |||
| 2722 | Ty.getSizeInBits() - 1); | |||
| 2723 | ||||
| 2724 | if (SrcSize < 64) { | |||
| 2725 | assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits () == 16 && "unexpected G_EXTRACT types") ? void (0) : __assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2726, __extension__ __PRETTY_FUNCTION__)) | |||
| 2726 | "unexpected G_EXTRACT types")(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits () == 16 && "unexpected G_EXTRACT types") ? void (0) : __assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2726, __extension__ __PRETTY_FUNCTION__)); | |||
| 2727 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 2728 | } | |||
| 2729 | ||||
| 2730 | DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); | |||
| 2731 | MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); | |||
| 2732 | MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) | |||
| 2733 | .addReg(DstReg, 0, AArch64::sub_32); | |||
| 2734 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), | |||
| 2735 | AArch64::GPR32RegClass, MRI); | |||
| 2736 | I.getOperand(0).setReg(DstReg); | |||
| 2737 | ||||
| 2738 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 2739 | } | |||
| 2740 | ||||
| 2741 | case TargetOpcode::G_INSERT: { | |||
| 2742 | LLT SrcTy = MRI.getType(I.getOperand(2).getReg()); | |||
| 2743 | LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | |||
| 2744 | unsigned DstSize = DstTy.getSizeInBits(); | |||
| 2745 | // Larger inserts are vectors, same-size ones should be something else by | |||
| 2746 | // now (split up or turned into COPYs). | |||
| 2747 | if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) | |||
| 2748 | return false; | |||
| 2749 | ||||
| 2750 | I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); | |||
| 2751 | unsigned LSB = I.getOperand(3).getImm(); | |||
| 2752 | unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); | |||
| 2753 | I.getOperand(3).setImm((DstSize - LSB) % DstSize); | |||
| 2754 | MachineInstrBuilder(MF, I).addImm(Width - 1); | |||
| 2755 | ||||
| 2756 | if (DstSize < 64) { | |||
| 2757 | assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits () == 16 && "unexpected G_INSERT types") ? void (0) : __assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2758, __extension__ __PRETTY_FUNCTION__)) | |||
| 2758 | "unexpected G_INSERT types")(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits () == 16 && "unexpected G_INSERT types") ? void (0) : __assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2758, __extension__ __PRETTY_FUNCTION__)); | |||
| 2759 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 2760 | } | |||
| 2761 | ||||
| 2762 | Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); | |||
| 2763 | BuildMI(MBB, I.getIterator(), I.getDebugLoc(), | |||
| 2764 | TII.get(AArch64::SUBREG_TO_REG)) | |||
| 2765 | .addDef(SrcReg) | |||
| 2766 | .addImm(0) | |||
| 2767 | .addUse(I.getOperand(2).getReg()) | |||
| 2768 | .addImm(AArch64::sub_32); | |||
| 2769 | RBI.constrainGenericRegister(I.getOperand(2).getReg(), | |||
| 2770 | AArch64::GPR32RegClass, MRI); | |||
| 2771 | I.getOperand(2).setReg(SrcReg); | |||
| 2772 | ||||
| 2773 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 2774 | } | |||
| 2775 | case TargetOpcode::G_FRAME_INDEX: { | |||
| 2776 | // allocas and G_FRAME_INDEX are only supported in addrspace(0). | |||
| 2777 | if (Ty != LLT::pointer(0, 64)) { | |||
| 2778 | LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: " << Ty << ", expected: " << LLT::pointer(0, 64) << '\n'; } } while (false) | |||
| 2779 | << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: " << Ty << ", expected: " << LLT::pointer(0, 64) << '\n'; } } while (false); | |||
| 2780 | return false; | |||
| 2781 | } | |||
| 2782 | I.setDesc(TII.get(AArch64::ADDXri)); | |||
| 2783 | ||||
| 2784 | // MOs for a #0 shifted immediate. | |||
| 2785 | I.addOperand(MachineOperand::CreateImm(0)); | |||
| 2786 | I.addOperand(MachineOperand::CreateImm(0)); | |||
| 2787 | ||||
| 2788 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 2789 | } | |||
| 2790 | ||||
| 2791 | case TargetOpcode::G_GLOBAL_VALUE: { | |||
| 2792 | auto GV = I.getOperand(1).getGlobal(); | |||
| 2793 | if (GV->isThreadLocal()) | |||
| 2794 | return selectTLSGlobalValue(I, MRI); | |||
| 2795 | ||||
| 2796 | unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); | |||
| 2797 | if (OpFlags & AArch64II::MO_GOT) { | |||
| 2798 | I.setDesc(TII.get(AArch64::LOADgot)); | |||
| 2799 | I.getOperand(1).setTargetFlags(OpFlags); | |||
| 2800 | } else if (TM.getCodeModel() == CodeModel::Large) { | |||
| 2801 | // Materialize the global using movz/movk instructions. | |||
| 2802 | materializeLargeCMVal(I, GV, OpFlags); | |||
| 2803 | I.eraseFromParent(); | |||
| 2804 | return true; | |||
| 2805 | } else if (TM.getCodeModel() == CodeModel::Tiny) { | |||
| 2806 | I.setDesc(TII.get(AArch64::ADR)); | |||
| 2807 | I.getOperand(1).setTargetFlags(OpFlags); | |||
| 2808 | } else { | |||
| 2809 | I.setDesc(TII.get(AArch64::MOVaddr)); | |||
| 2810 | I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); | |||
| 2811 | MachineInstrBuilder MIB(MF, I); | |||
| 2812 | MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(), | |||
| 2813 | OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); | |||
| 2814 | } | |||
| 2815 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 2816 | } | |||
| 2817 | ||||
| 2818 | case TargetOpcode::G_ZEXTLOAD: | |||
| 2819 | case TargetOpcode::G_LOAD: | |||
| 2820 | case TargetOpcode::G_STORE: { | |||
| 2821 | GLoadStore &LdSt = cast<GLoadStore>(I); | |||
| 2822 | bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; | |||
| 2823 | LLT PtrTy = MRI.getType(LdSt.getPointerReg()); | |||
| 2824 | ||||
| 2825 | if (PtrTy != LLT::pointer(0, 64)) { | |||
| 2826 | LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Load/Store pointer has type: " << PtrTy << ", expected: " << LLT::pointer (0, 64) << '\n'; } } while (false) | |||
| 2827 | << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Load/Store pointer has type: " << PtrTy << ", expected: " << LLT::pointer (0, 64) << '\n'; } } while (false); | |||
| 2828 | return false; | |||
| 2829 | } | |||
| 2830 | ||||
| 2831 | uint64_t MemSizeInBytes = LdSt.getMemSize(); | |||
| 2832 | unsigned MemSizeInBits = LdSt.getMemSizeInBits(); | |||
| 2833 | AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); | |||
| 2834 | ||||
| 2835 | // Need special instructions for atomics that affect ordering. | |||
| 2836 | if (Order != AtomicOrdering::NotAtomic && | |||
| 2837 | Order != AtomicOrdering::Unordered && | |||
| 2838 | Order != AtomicOrdering::Monotonic) { | |||
| 2839 | assert(!isa<GZExtLoad>(LdSt))(static_cast <bool> (!isa<GZExtLoad>(LdSt)) ? void (0) : __assert_fail ("!isa<GZExtLoad>(LdSt)", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2839, __extension__ __PRETTY_FUNCTION__)); | |||
| 2840 | if (MemSizeInBytes > 64) | |||
| 2841 | return false; | |||
| 2842 | ||||
| 2843 | if (isa<GLoad>(LdSt)) { | |||
| 2844 | static constexpr unsigned LDAPROpcodes[] = { | |||
| 2845 | AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX}; | |||
| 2846 | static constexpr unsigned LDAROpcodes[] = { | |||
| 2847 | AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX}; | |||
| 2848 | ArrayRef<unsigned> Opcodes = | |||
| 2849 | STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent | |||
| 2850 | ? LDAPROpcodes | |||
| 2851 | : LDAROpcodes; | |||
| 2852 | I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); | |||
| 2853 | } else { | |||
| 2854 | static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH, | |||
| 2855 | AArch64::STLRW, AArch64::STLRX}; | |||
| 2856 | Register ValReg = LdSt.getReg(0); | |||
| 2857 | if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) { | |||
| 2858 | // Emit a subreg copy of 32 bits. | |||
| 2859 | Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass); | |||
| 2860 | MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {}) | |||
| 2861 | .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32); | |||
| 2862 | I.getOperand(0).setReg(NewVal); | |||
| 2863 | } | |||
| 2864 | I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); | |||
| 2865 | } | |||
| 2866 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 2867 | return true; | |||
| 2868 | } | |||
| 2869 | ||||
| 2870 | #ifndef NDEBUG | |||
| 2871 | const Register PtrReg = LdSt.getPointerReg(); | |||
| 2872 | const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); | |||
| 2873 | // Check that the pointer register is valid. | |||
| 2874 | assert(PtrRB.getID() == AArch64::GPRRegBankID &&(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR") ? void ( 0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2875, __extension__ __PRETTY_FUNCTION__)) | |||
| 2875 | "Load/Store pointer operand isn't a GPR")(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR") ? void ( 0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2875, __extension__ __PRETTY_FUNCTION__)); | |||
| 2876 | assert(MRI.getType(PtrReg).isPointer() &&(static_cast <bool> (MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail ("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2877, __extension__ __PRETTY_FUNCTION__)) | |||
| 2877 | "Load/Store pointer operand isn't a pointer")(static_cast <bool> (MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail ("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2877, __extension__ __PRETTY_FUNCTION__)); | |||
| 2878 | #endif | |||
| 2879 | ||||
| 2880 | const Register ValReg = LdSt.getReg(0); | |||
| 2881 | const LLT ValTy = MRI.getType(ValReg); | |||
| 2882 | const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); | |||
| 2883 | ||||
| 2884 | // The code below doesn't support truncating stores, so we need to split it | |||
| 2885 | // again. | |||
| 2886 | if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { | |||
| 2887 | unsigned SubReg; | |||
| 2888 | LLT MemTy = LdSt.getMMO().getMemoryType(); | |||
| 2889 | auto *RC = getRegClassForTypeOnBank(MemTy, RB); | |||
| 2890 | if (!getSubRegForClass(RC, TRI, SubReg)) | |||
| 2891 | return false; | |||
| 2892 | ||||
| 2893 | // Generate a subreg copy. | |||
| 2894 | auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {}) | |||
| 2895 | .addReg(ValReg, 0, SubReg) | |||
| 2896 | .getReg(0); | |||
| 2897 | RBI.constrainGenericRegister(Copy, *RC, MRI); | |||
| 2898 | LdSt.getOperand(0).setReg(Copy); | |||
| 2899 | } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { | |||
| 2900 | // If this is an any-extending load from the FPR bank, split it into a regular | |||
| 2901 | // load + extend. | |||
| 2902 | if (RB.getID() == AArch64::FPRRegBankID) { | |||
| 2903 | unsigned SubReg; | |||
| 2904 | LLT MemTy = LdSt.getMMO().getMemoryType(); | |||
| 2905 | auto *RC = getRegClassForTypeOnBank(MemTy, RB); | |||
| 2906 | if (!getSubRegForClass(RC, TRI, SubReg)) | |||
| 2907 | return false; | |||
| 2908 | Register OldDst = LdSt.getReg(0); | |||
| 2909 | Register NewDst = | |||
| 2910 | MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType()); | |||
| 2911 | LdSt.getOperand(0).setReg(NewDst); | |||
| 2912 | MRI.setRegBank(NewDst, RB); | |||
| 2913 | // Generate a SUBREG_TO_REG to extend it. | |||
| 2914 | MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator())); | |||
| 2915 | MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {}) | |||
| 2916 | .addImm(0) | |||
| 2917 | .addUse(NewDst) | |||
| 2918 | .addImm(SubReg); | |||
| 2919 | auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB); | |||
| 2920 | RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI); | |||
| 2921 | MIB.setInstr(LdSt); | |||
| 2922 | } | |||
| 2923 | } | |||
| 2924 | ||||
| 2925 | // Helper lambda for partially selecting I. Either returns the original | |||
| 2926 | // instruction with an updated opcode, or a new instruction. | |||
| 2927 | auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { | |||
| 2928 | bool IsStore = isa<GStore>(I); | |||
| ||||
| 2929 | const unsigned NewOpc = | |||
| 2930 | selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); | |||
| 2931 | if (NewOpc == I.getOpcode()) | |||
| 2932 | return nullptr; | |||
| 2933 | // Check if we can fold anything into the addressing mode. | |||
| 2934 | auto AddrModeFns = | |||
| 2935 | selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); | |||
| 2936 | if (!AddrModeFns) { | |||
| 2937 | // Can't fold anything. Use the original instruction. | |||
| 2938 | I.setDesc(TII.get(NewOpc)); | |||
| 2939 | I.addOperand(MachineOperand::CreateImm(0)); | |||
| 2940 | return &I; | |||
| 2941 | } | |||
| 2942 | ||||
| 2943 | // Folded something. Create a new instruction and return it. | |||
| 2944 | auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags()); | |||
| 2945 | Register CurValReg = I.getOperand(0).getReg(); | |||
| 2946 | IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg); | |||
| 2947 | NewInst.cloneMemRefs(I); | |||
| 2948 | for (auto &Fn : *AddrModeFns) | |||
| 2949 | Fn(NewInst); | |||
| 2950 | I.eraseFromParent(); | |||
| 2951 | return &*NewInst; | |||
| 2952 | }; | |||
| 2953 | ||||
| 2954 | MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); | |||
| 2955 | if (!LoadStore) | |||
| 2956 | return false; | |||
| 2957 | ||||
| 2958 | // If we're storing a 0, use WZR/XZR. | |||
| 2959 | if (Opcode == TargetOpcode::G_STORE) { | |||
| 2960 | auto CVal = getIConstantVRegValWithLookThrough( | |||
| 2961 | LoadStore->getOperand(0).getReg(), MRI); | |||
| 2962 | if (CVal && CVal->Value == 0) { | |||
| 2963 | switch (LoadStore->getOpcode()) { | |||
| 2964 | case AArch64::STRWui: | |||
| 2965 | case AArch64::STRHHui: | |||
| 2966 | case AArch64::STRBBui: | |||
| 2967 | LoadStore->getOperand(0).setReg(AArch64::WZR); | |||
| 2968 | break; | |||
| 2969 | case AArch64::STRXui: | |||
| 2970 | LoadStore->getOperand(0).setReg(AArch64::XZR); | |||
| 2971 | break; | |||
| 2972 | } | |||
| 2973 | } | |||
| 2974 | } | |||
| 2975 | ||||
| 2976 | if (IsZExtLoad) { | |||
| 2977 | // The zextload from a smaller type to i32 should be handled by the | |||
| 2978 | // importer. | |||
| 2979 | if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64) | |||
| 2980 | return false; | |||
| 2981 | // If we have a ZEXTLOAD then change the load's type to be a narrower reg | |||
| 2982 | // and zero_extend with SUBREG_TO_REG. | |||
| 2983 | Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); | |||
| 2984 | Register DstReg = LoadStore->getOperand(0).getReg(); | |||
| 2985 | LoadStore->getOperand(0).setReg(LdReg); | |||
| 2986 | ||||
| 2987 | MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator())); | |||
| 2988 | MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {}) | |||
| 2989 | .addImm(0) | |||
| 2990 | .addUse(LdReg) | |||
| 2991 | .addImm(AArch64::sub_32); | |||
| 2992 | constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); | |||
| 2993 | return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass, | |||
| 2994 | MRI); | |||
| 2995 | } | |||
| 2996 | return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); | |||
| 2997 | } | |||
| 2998 | ||||
| 2999 | case TargetOpcode::G_SMULH: | |||
| 3000 | case TargetOpcode::G_UMULH: { | |||
| 3001 | // Reject the various things we don't support yet. | |||
| 3002 | if (unsupportedBinOp(I, RBI, MRI, TRI)) | |||
| 3003 | return false; | |||
| 3004 | ||||
| 3005 | const Register DefReg = I.getOperand(0).getReg(); | |||
| 3006 | const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); | |||
| 3007 | ||||
| 3008 | if (RB.getID() != AArch64::GPRRegBankID) { | |||
| 3009 | LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n"; } } while (false); | |||
| 3010 | return false; | |||
| 3011 | } | |||
| 3012 | ||||
| 3013 | if (Ty != LLT::scalar(64)) { | |||
| 3014 | LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " << Ty << ", expected: " << LLT::scalar(64) << '\n'; } } while (false) | |||
| 3015 | << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " << Ty << ", expected: " << LLT::scalar(64) << '\n'; } } while (false); | |||
| 3016 | return false; | |||
| 3017 | } | |||
| 3018 | ||||
| 3019 | unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr | |||
| 3020 | : AArch64::UMULHrr; | |||
| 3021 | I.setDesc(TII.get(NewOpc)); | |||
| 3022 | ||||
| 3023 | // Now that we selected an opcode, we need to constrain the register | |||
| 3024 | // operands to use appropriate classes. | |||
| 3025 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 3026 | } | |||
| 3027 | case TargetOpcode::G_LSHR: | |||
| 3028 | case TargetOpcode::G_ASHR: | |||
| 3029 | if (MRI.getType(I.getOperand(0).getReg()).isVector()) | |||
| 3030 | return selectVectorAshrLshr(I, MRI); | |||
| 3031 | [[fallthrough]]; | |||
| 3032 | case TargetOpcode::G_SHL: | |||
| 3033 | if (Opcode == TargetOpcode::G_SHL && | |||
| 3034 | MRI.getType(I.getOperand(0).getReg()).isVector()) | |||
| 3035 | return selectVectorSHL(I, MRI); | |||
| 3036 | ||||
| 3037 | // These shifts were legalized to have 64 bit shift amounts because we | |||
| 3038 | // want to take advantage of the selection patterns that assume the | |||
| 3039 | // immediates are s64s, however, selectBinaryOp will assume both operands | |||
| 3040 | // will have the same bit size. | |||
| 3041 | { | |||
| 3042 | Register SrcReg = I.getOperand(1).getReg(); | |||
| 3043 | Register ShiftReg = I.getOperand(2).getReg(); | |||
| 3044 | const LLT ShiftTy = MRI.getType(ShiftReg); | |||
| 3045 | const LLT SrcTy = MRI.getType(SrcReg); | |||
| 3046 | if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && | |||
| 3047 | ShiftTy.getSizeInBits() == 64) { | |||
| 3048 | assert(!ShiftTy.isVector() && "unexpected vector shift ty")(static_cast <bool> (!ShiftTy.isVector() && "unexpected vector shift ty" ) ? void (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3048, __extension__ __PRETTY_FUNCTION__)); | |||
| 3049 | // Insert a subregister copy to implement a 64->32 trunc | |||
| 3050 | auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {}) | |||
| 3051 | .addReg(ShiftReg, 0, AArch64::sub_32); | |||
| 3052 | MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); | |||
| 3053 | I.getOperand(2).setReg(Trunc.getReg(0)); | |||
| 3054 | } | |||
| 3055 | } | |||
| 3056 | [[fallthrough]]; | |||
| 3057 | case TargetOpcode::G_OR: { | |||
| 3058 | // Reject the various things we don't support yet. | |||
| 3059 | if (unsupportedBinOp(I, RBI, MRI, TRI)) | |||
| 3060 | return false; | |||
| 3061 | ||||
| 3062 | const unsigned OpSize = Ty.getSizeInBits(); | |||
| 3063 | ||||
| 3064 | const Register DefReg = I.getOperand(0).getReg(); | |||
| 3065 | const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); | |||
| 3066 | ||||
| 3067 | const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize); | |||
| 3068 | if (NewOpc == I.getOpcode()) | |||
| 3069 | return false; | |||
| 3070 | ||||
| 3071 | I.setDesc(TII.get(NewOpc)); | |||
| 3072 | // FIXME: Should the type be always reset in setDesc? | |||
| 3073 | ||||
| 3074 | // Now that we selected an opcode, we need to constrain the register | |||
| 3075 | // operands to use appropriate classes. | |||
| 3076 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 3077 | } | |||
| 3078 | ||||
| 3079 | case TargetOpcode::G_PTR_ADD: { | |||
| 3080 | emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB); | |||
| 3081 | I.eraseFromParent(); | |||
| 3082 | return true; | |||
| 3083 | } | |||
| 3084 | case TargetOpcode::G_SADDO: | |||
| 3085 | case TargetOpcode::G_UADDO: | |||
| 3086 | case TargetOpcode::G_SSUBO: | |||
| 3087 | case TargetOpcode::G_USUBO: { | |||
| 3088 | // Emit the operation and get the correct condition code. | |||
| 3089 | auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(), | |||
| 3090 | I.getOperand(2), I.getOperand(3), MIB); | |||
| 3091 | ||||
| 3092 | // Now, put the overflow result in the register given by the first operand | |||
| 3093 | // to the overflow op. CSINC increments the result when the predicate is | |||
| 3094 | // false, so to get the increment when it's true, we need to use the | |||
| 3095 | // inverse. In this case, we want to increment when carry is set. | |||
| 3096 | Register ZReg = AArch64::WZR; | |||
| 3097 | emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg, | |||
| 3098 | getInvertedCondCode(OpAndCC.second), MIB); | |||
| 3099 | I.eraseFromParent(); | |||
| 3100 | return true; | |||
| 3101 | } | |||
| 3102 | ||||
| 3103 | case TargetOpcode::G_PTRMASK: { | |||
| 3104 | Register MaskReg = I.getOperand(2).getReg(); | |||
| 3105 | std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI); | |||
| 3106 | // TODO: Implement arbitrary cases | |||
| 3107 | if (!MaskVal || !isShiftedMask_64(*MaskVal)) | |||
| 3108 | return false; | |||
| 3109 | ||||
| 3110 | uint64_t Mask = *MaskVal; | |||
| 3111 | I.setDesc(TII.get(AArch64::ANDXri)); | |||
| 3112 | I.getOperand(2).ChangeToImmediate( | |||
| 3113 | AArch64_AM::encodeLogicalImmediate(Mask, 64)); | |||
| 3114 | ||||
| 3115 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 3116 | } | |||
| 3117 | case TargetOpcode::G_PTRTOINT: | |||
| 3118 | case TargetOpcode::G_TRUNC: { | |||
| 3119 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | |||
| 3120 | const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); | |||
| 3121 | ||||
| 3122 | const Register DstReg = I.getOperand(0).getReg(); | |||
| 3123 | const Register SrcReg = I.getOperand(1).getReg(); | |||
| 3124 | ||||
| 3125 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); | |||
| 3126 | const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); | |||
| 3127 | ||||
| 3128 | if (DstRB.getID() != SrcRB.getID()) { | |||
| 3129 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n" ; } } while (false) | |||
| 3130 | dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n" ; } } while (false); | |||
| 3131 | return false; | |||
| 3132 | } | |||
| 3133 | ||||
| 3134 | if (DstRB.getID() == AArch64::GPRRegBankID) { | |||
| 3135 | const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB); | |||
| 3136 | if (!DstRC) | |||
| 3137 | return false; | |||
| 3138 | ||||
| 3139 | const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB); | |||
| 3140 | if (!SrcRC) | |||
| 3141 | return false; | |||
| 3142 | ||||
| 3143 | if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || | |||
| 3144 | !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { | |||
| 3145 | LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n" ; } } while (false); | |||
| 3146 | return false; | |||
| 3147 | } | |||
| 3148 | ||||
| 3149 | if (DstRC == SrcRC) { | |||
| 3150 | // Nothing to be done | |||
| 3151 | } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) && | |||
| 3152 | SrcTy == LLT::scalar(64)) { | |||
| 3153 | llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3153); | |||
| 3154 | return false; | |||
| 3155 | } else if (DstRC == &AArch64::GPR32RegClass && | |||
| 3156 | SrcRC == &AArch64::GPR64RegClass) { | |||
| 3157 | I.getOperand(1).setSubReg(AArch64::sub_32); | |||
| 3158 | } else { | |||
| 3159 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n" ; } } while (false) | |||
| 3160 | dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n" ; } } while (false); | |||
| 3161 | return false; | |||
| 3162 | } | |||
| 3163 | ||||
| 3164 | I.setDesc(TII.get(TargetOpcode::COPY)); | |||
| 3165 | return true; | |||
| 3166 | } else if (DstRB.getID() == AArch64::FPRRegBankID) { | |||
| 3167 | if (DstTy == LLT::fixed_vector(4, 16) && | |||
| 3168 | SrcTy == LLT::fixed_vector(4, 32)) { | |||
| 3169 | I.setDesc(TII.get(AArch64::XTNv4i16)); | |||
| 3170 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 3171 | return true; | |||
| 3172 | } | |||
| 3173 | ||||
| 3174 | if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { | |||
| 3175 | MachineInstr *Extract = emitExtractVectorElt( | |||
| 3176 | DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB); | |||
| 3177 | if (!Extract) | |||
| 3178 | return false; | |||
| 3179 | I.eraseFromParent(); | |||
| 3180 | return true; | |||
| 3181 | } | |||
| 3182 | ||||
| 3183 | // We might have a vector G_PTRTOINT, in which case just emit a COPY. | |||
| 3184 | if (Opcode == TargetOpcode::G_PTRTOINT) { | |||
| 3185 | assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")(static_cast <bool> (DstTy.isVector() && "Expected an FPR ptrtoint to be a vector" ) ? void (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3185, __extension__ __PRETTY_FUNCTION__)); | |||
| 3186 | I.setDesc(TII.get(TargetOpcode::COPY)); | |||
| 3187 | return selectCopy(I, TII, MRI, TRI, RBI); | |||
| 3188 | } | |||
| 3189 | } | |||
| 3190 | ||||
| 3191 | return false; | |||
| 3192 | } | |||
| 3193 | ||||
| 3194 | case TargetOpcode::G_ANYEXT: { | |||
| 3195 | if (selectUSMovFromExtend(I, MRI)) | |||
| 3196 | return true; | |||
| 3197 | ||||
| 3198 | const Register DstReg = I.getOperand(0).getReg(); | |||
| 3199 | const Register SrcReg = I.getOperand(1).getReg(); | |||
| 3200 | ||||
| 3201 | const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI); | |||
| 3202 | if (RBDst.getID() != AArch64::GPRRegBankID) { | |||
| 3203 | LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " << RBDst << ", expected: GPR\n"; } } while (false) | |||
| 3204 | << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " << RBDst << ", expected: GPR\n"; } } while (false); | |||
| 3205 | return false; | |||
| 3206 | } | |||
| 3207 | ||||
| 3208 | const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI); | |||
| 3209 | if (RBSrc.getID() != AArch64::GPRRegBankID) { | |||
| 3210 | LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " << RBSrc << ", expected: GPR\n"; } } while (false) | |||
| 3211 | << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " << RBSrc << ", expected: GPR\n"; } } while (false); | |||
| 3212 | return false; | |||
| 3213 | } | |||
| 3214 | ||||
| 3215 | const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); | |||
| 3216 | ||||
| 3217 | if (DstSize == 0) { | |||
| 3218 | LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n" ; } } while (false); | |||
| 3219 | return false; | |||
| 3220 | } | |||
| 3221 | ||||
| 3222 | if (DstSize != 64 && DstSize > 32) { | |||
| 3223 | LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " << DstSize << ", expected: 32 or 64\n"; } } while (false) | |||
| 3224 | << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " << DstSize << ", expected: 32 or 64\n"; } } while (false); | |||
| 3225 | return false; | |||
| 3226 | } | |||
| 3227 | // At this point G_ANYEXT is just like a plain COPY, but we need | |||
| 3228 | // to explicitly form the 64-bit value if any. | |||
| 3229 | if (DstSize > 32) { | |||
| 3230 | Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass); | |||
| 3231 | BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) | |||
| 3232 | .addDef(ExtSrc) | |||
| 3233 | .addImm(0) | |||
| 3234 | .addUse(SrcReg) | |||
| 3235 | .addImm(AArch64::sub_32); | |||
| 3236 | I.getOperand(1).setReg(ExtSrc); | |||
| 3237 | } | |||
| 3238 | return selectCopy(I, TII, MRI, TRI, RBI); | |||
| 3239 | } | |||
| 3240 | ||||
| 3241 | case TargetOpcode::G_ZEXT: | |||
| 3242 | case TargetOpcode::G_SEXT_INREG: | |||
| 3243 | case TargetOpcode::G_SEXT: { | |||
| 3244 | if (selectUSMovFromExtend(I, MRI)) | |||
| 3245 | return true; | |||
| 3246 | ||||
| 3247 | unsigned Opcode = I.getOpcode(); | |||
| 3248 | const bool IsSigned = Opcode != TargetOpcode::G_ZEXT; | |||
| 3249 | const Register DefReg = I.getOperand(0).getReg(); | |||
| 3250 | Register SrcReg = I.getOperand(1).getReg(); | |||
| 3251 | const LLT DstTy = MRI.getType(DefReg); | |||
| 3252 | const LLT SrcTy = MRI.getType(SrcReg); | |||
| 3253 | unsigned DstSize = DstTy.getSizeInBits(); | |||
| 3254 | unsigned SrcSize = SrcTy.getSizeInBits(); | |||
| 3255 | ||||
| 3256 | // SEXT_INREG has the same src reg size as dst, the size of the value to be | |||
| 3257 | // extended is encoded in the imm. | |||
| 3258 | if (Opcode == TargetOpcode::G_SEXT_INREG) | |||
| 3259 | SrcSize = I.getOperand(2).getImm(); | |||
| 3260 | ||||
| 3261 | if (DstTy.isVector()) | |||
| 3262 | return false; // Should be handled by imported patterns. | |||
| 3263 | ||||
| 3264 | assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI) ).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank" ) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3266, __extension__ __PRETTY_FUNCTION__)) | |||
| 3265 | AArch64::GPRRegBankID &&(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI) ).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank" ) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3266, __extension__ __PRETTY_FUNCTION__)) | |||
| 3266 | "Unexpected ext regbank")(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI) ).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank" ) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3266, __extension__ __PRETTY_FUNCTION__)); | |||
| 3267 | ||||
| 3268 | MachineInstr *ExtI; | |||
| 3269 | ||||
| 3270 | // First check if we're extending the result of a load which has a dest type | |||
| 3271 | // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest | |||
| 3272 | // GPR register on AArch64 and all loads which are smaller automatically | |||
| 3273 | // zero-extend the upper bits. E.g. | |||
| 3274 | // %v(s8) = G_LOAD %p, :: (load 1) | |||
| 3275 | // %v2(s32) = G_ZEXT %v(s8) | |||
| 3276 | if (!IsSigned) { | |||
| 3277 | auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); | |||
| 3278 | bool IsGPR = | |||
| 3279 | RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID; | |||
| 3280 | if (LoadMI && IsGPR) { | |||
| 3281 | const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); | |||
| 3282 | unsigned BytesLoaded = MemOp->getSize(); | |||
| 3283 | if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) | |||
| 3284 | return selectCopy(I, TII, MRI, TRI, RBI); | |||
| 3285 | } | |||
| 3286 | ||||
| 3287 | // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs) | |||
| 3288 | // + SUBREG_TO_REG. | |||
| 3289 | if (IsGPR && SrcSize == 32 && DstSize == 64) { | |||
| 3290 | Register SubregToRegSrc = | |||
| 3291 | MRI.createVirtualRegister(&AArch64::GPR32RegClass); | |||
| 3292 | const Register ZReg = AArch64::WZR; | |||
| 3293 | MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg}) | |||
| 3294 | .addImm(0); | |||
| 3295 | ||||
| 3296 | MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) | |||
| 3297 | .addImm(0) | |||
| 3298 | .addUse(SubregToRegSrc) | |||
| 3299 | .addImm(AArch64::sub_32); | |||
| 3300 | ||||
| 3301 | if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, | |||
| 3302 | MRI)) { | |||
| 3303 | LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n" ; } } while (false); | |||
| 3304 | return false; | |||
| 3305 | } | |||
| 3306 | ||||
| 3307 | if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, | |||
| 3308 | MRI)) { | |||
| 3309 | LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n" ; } } while (false); | |||
| 3310 | return false; | |||
| 3311 | } | |||
| 3312 | ||||
| 3313 | I.eraseFromParent(); | |||
| 3314 | return true; | |||
| 3315 | } | |||
| 3316 | } | |||
| 3317 | ||||
| 3318 | if (DstSize == 64) { | |||
| 3319 | if (Opcode != TargetOpcode::G_SEXT_INREG) { | |||
| 3320 | // FIXME: Can we avoid manually doing this? | |||
| 3321 | if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, | |||
| 3322 | MRI)) { | |||
| 3323 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain " << TII.getName(Opcode) << " operand\n"; } } while (false) | |||
| 3324 | << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain " << TII.getName(Opcode) << " operand\n"; } } while (false); | |||
| 3325 | return false; | |||
| 3326 | } | |||
| 3327 | SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, | |||
| 3328 | {&AArch64::GPR64RegClass}, {}) | |||
| 3329 | .addImm(0) | |||
| 3330 | .addUse(SrcReg) | |||
| 3331 | .addImm(AArch64::sub_32) | |||
| 3332 | .getReg(0); | |||
| 3333 | } | |||
| 3334 | ||||
| 3335 | ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, | |||
| 3336 | {DefReg}, {SrcReg}) | |||
| 3337 | .addImm(0) | |||
| 3338 | .addImm(SrcSize - 1); | |||
| 3339 | } else if (DstSize <= 32) { | |||
| 3340 | ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, | |||
| 3341 | {DefReg}, {SrcReg}) | |||
| 3342 | .addImm(0) | |||
| 3343 | .addImm(SrcSize - 1); | |||
| 3344 | } else { | |||
| 3345 | return false; | |||
| 3346 | } | |||
| 3347 | ||||
| 3348 | constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); | |||
| 3349 | I.eraseFromParent(); | |||
| 3350 | return true; | |||
| 3351 | } | |||
| 3352 | ||||
| 3353 | case TargetOpcode::G_SITOFP: | |||
| 3354 | case TargetOpcode::G_UITOFP: | |||
| 3355 | case TargetOpcode::G_FPTOSI: | |||
| 3356 | case TargetOpcode::G_FPTOUI: { | |||
| 3357 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()), | |||
| 3358 | SrcTy = MRI.getType(I.getOperand(1).getReg()); | |||
| 3359 | const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy); | |||
| 3360 | if (NewOpc == Opcode) | |||
| 3361 | return false; | |||
| 3362 | ||||
| 3363 | I.setDesc(TII.get(NewOpc)); | |||
| 3364 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 3365 | I.setFlags(MachineInstr::NoFPExcept); | |||
| 3366 | ||||
| 3367 | return true; | |||
| 3368 | } | |||
| 3369 | ||||
| 3370 | case TargetOpcode::G_FREEZE: | |||
| 3371 | return selectCopy(I, TII, MRI, TRI, RBI); | |||
| 3372 | ||||
| 3373 | case TargetOpcode::G_INTTOPTR: | |||
| 3374 | // The importer is currently unable to import pointer types since they | |||
| 3375 | // didn't exist in SelectionDAG. | |||
| 3376 | return selectCopy(I, TII, MRI, TRI, RBI); | |||
| 3377 | ||||
| 3378 | case TargetOpcode::G_BITCAST: | |||
| 3379 | // Imported SelectionDAG rules can handle every bitcast except those that | |||
| 3380 | // bitcast from a type to the same type. Ideally, these shouldn't occur | |||
| 3381 | // but we might not run an optimizer that deletes them. The other exception | |||
| 3382 | // is bitcasts involving pointer types, as SelectionDAG has no knowledge | |||
| 3383 | // of them. | |||
| 3384 | return selectCopy(I, TII, MRI, TRI, RBI); | |||
| 3385 | ||||
| 3386 | case TargetOpcode::G_SELECT: { | |||
| 3387 | auto &Sel = cast<GSelect>(I); | |||
| 3388 | const Register CondReg = Sel.getCondReg(); | |||
| 3389 | const Register TReg = Sel.getTrueReg(); | |||
| 3390 | const Register FReg = Sel.getFalseReg(); | |||
| 3391 | ||||
| 3392 | if (tryOptSelect(Sel)) | |||
| 3393 | return true; | |||
| 3394 | ||||
| 3395 | // Make sure to use an unused vreg instead of wzr, so that the peephole | |||
| 3396 | // optimizations will be able to optimize these. | |||
| 3397 | Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); | |||
| 3398 | auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg}) | |||
| 3399 | .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); | |||
| 3400 | constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); | |||
| 3401 | if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB)) | |||
| 3402 | return false; | |||
| 3403 | Sel.eraseFromParent(); | |||
| 3404 | return true; | |||
| 3405 | } | |||
| 3406 | case TargetOpcode::G_ICMP: { | |||
| 3407 | if (Ty.isVector()) | |||
| 3408 | return selectVectorICmp(I, MRI); | |||
| 3409 | ||||
| 3410 | if (Ty != LLT::scalar(32)) { | |||
| 3411 | LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ICMP result has type: " << Ty << ", expected: " << LLT::scalar(32) << '\n'; } } while (false) | |||
| 3412 | << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ICMP result has type: " << Ty << ", expected: " << LLT::scalar(32) << '\n'; } } while (false); | |||
| 3413 | return false; | |||
| 3414 | } | |||
| 3415 | ||||
| 3416 | auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); | |||
| 3417 | const AArch64CC::CondCode InvCC = | |||
| 3418 | changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred)); | |||
| 3419 | emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB); | |||
| 3420 | emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR, | |||
| 3421 | /*Src2=*/AArch64::WZR, InvCC, MIB); | |||
| 3422 | I.eraseFromParent(); | |||
| 3423 | return true; | |||
| 3424 | } | |||
| 3425 | ||||
| 3426 | case TargetOpcode::G_FCMP: { | |||
| 3427 | CmpInst::Predicate Pred = | |||
| 3428 | static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); | |||
| 3429 | if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB, | |||
| 3430 | Pred) || | |||
| 3431 | !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB)) | |||
| 3432 | return false; | |||
| 3433 | I.eraseFromParent(); | |||
| 3434 | return true; | |||
| 3435 | } | |||
| 3436 | case TargetOpcode::G_VASTART: | |||
| 3437 | return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) | |||
| 3438 | : selectVaStartAAPCS(I, MF, MRI); | |||
| 3439 | case TargetOpcode::G_INTRINSIC: | |||
| 3440 | return selectIntrinsic(I, MRI); | |||
| 3441 | case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: | |||
| 3442 | return selectIntrinsicWithSideEffects(I, MRI); | |||
| 3443 | case TargetOpcode::G_IMPLICIT_DEF: { | |||
| 3444 | I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); | |||
| 3445 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | |||
| 3446 | const Register DstReg = I.getOperand(0).getReg(); | |||
| 3447 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); | |||
| 3448 | const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB); | |||
| 3449 | RBI.constrainGenericRegister(DstReg, *DstRC, MRI); | |||
| 3450 | return true; | |||
| 3451 | } | |||
| 3452 | case TargetOpcode::G_BLOCK_ADDR: { | |||
| 3453 | if (TM.getCodeModel() == CodeModel::Large) { | |||
| 3454 | materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0); | |||
| 3455 | I.eraseFromParent(); | |||
| 3456 | return true; | |||
| 3457 | } else { | |||
| 3458 | I.setDesc(TII.get(AArch64::MOVaddrBA)); | |||
| 3459 | auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA), | |||
| 3460 | I.getOperand(0).getReg()) | |||
| 3461 | .addBlockAddress(I.getOperand(1).getBlockAddress(), | |||
| 3462 | /* Offset */ 0, AArch64II::MO_PAGE) | |||
| 3463 | .addBlockAddress( | |||
| 3464 | I.getOperand(1).getBlockAddress(), /* Offset */ 0, | |||
| 3465 | AArch64II::MO_NC | AArch64II::MO_PAGEOFF); | |||
| 3466 | I.eraseFromParent(); | |||
| 3467 | return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); | |||
| 3468 | } | |||
| 3469 | } | |||
| 3470 | case AArch64::G_DUP: { | |||
| 3471 | // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by | |||
| 3472 | // imported patterns. Do it manually here. Avoiding generating s16 gpr is | |||
| 3473 | // difficult because at RBS we may end up pessimizing the fpr case if we | |||
| 3474 | // decided to add an anyextend to fix this. Manual selection is the most | |||
| 3475 | // robust solution for now. | |||
| 3476 | if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != | |||
| 3477 | AArch64::GPRRegBankID) | |||
| 3478 | return false; // We expect the fpr regbank case to be imported. | |||
| 3479 | LLT VecTy = MRI.getType(I.getOperand(0).getReg()); | |||
| 3480 | if (VecTy == LLT::fixed_vector(8, 8)) | |||
| 3481 | I.setDesc(TII.get(AArch64::DUPv8i8gpr)); | |||
| 3482 | else if (VecTy == LLT::fixed_vector(16, 8)) | |||
| 3483 | I.setDesc(TII.get(AArch64::DUPv16i8gpr)); | |||
| 3484 | else if (VecTy == LLT::fixed_vector(4, 16)) | |||
| 3485 | I.setDesc(TII.get(AArch64::DUPv4i16gpr)); | |||
| 3486 | else if (VecTy == LLT::fixed_vector(8, 16)) | |||
| 3487 | I.setDesc(TII.get(AArch64::DUPv8i16gpr)); | |||
| 3488 | else | |||
| 3489 | return false; | |||
| 3490 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 3491 | } | |||
| 3492 | case TargetOpcode::G_INTRINSIC_TRUNC: | |||
| 3493 | return selectIntrinsicTrunc(I, MRI); | |||
| 3494 | case TargetOpcode::G_INTRINSIC_ROUND: | |||
| 3495 | return selectIntrinsicRound(I, MRI); | |||
| 3496 | case TargetOpcode::G_BUILD_VECTOR: | |||
| 3497 | return selectBuildVector(I, MRI); | |||
| 3498 | case TargetOpcode::G_MERGE_VALUES: | |||
| 3499 | return selectMergeValues(I, MRI); | |||
| 3500 | case TargetOpcode::G_UNMERGE_VALUES: | |||
| 3501 | return selectUnmergeValues(I, MRI); | |||
| 3502 | case TargetOpcode::G_SHUFFLE_VECTOR: | |||
| 3503 | return selectShuffleVector(I, MRI); | |||
| 3504 | case TargetOpcode::G_EXTRACT_VECTOR_ELT: | |||
| 3505 | return selectExtractElt(I, MRI); | |||
| 3506 | case TargetOpcode::G_INSERT_VECTOR_ELT: | |||
| 3507 | return selectInsertElt(I, MRI); | |||
| 3508 | case TargetOpcode::G_CONCAT_VECTORS: | |||
| 3509 | return selectConcatVectors(I, MRI); | |||
| 3510 | case TargetOpcode::G_JUMP_TABLE: | |||
| 3511 | return selectJumpTable(I, MRI); | |||
| 3512 | case TargetOpcode::G_VECREDUCE_FADD: | |||
| 3513 | case TargetOpcode::G_VECREDUCE_ADD: | |||
| 3514 | return selectReduction(I, MRI); | |||
| 3515 | case TargetOpcode::G_MEMCPY: | |||
| 3516 | case TargetOpcode::G_MEMCPY_INLINE: | |||
| 3517 | case TargetOpcode::G_MEMMOVE: | |||
| 3518 | case TargetOpcode::G_MEMSET: | |||
| 3519 | assert(STI.hasMOPS() && "Shouldn't get here without +mops feature")(static_cast <bool> (STI.hasMOPS() && "Shouldn't get here without +mops feature" ) ? void (0) : __assert_fail ("STI.hasMOPS() && \"Shouldn't get here without +mops feature\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3519, __extension__ __PRETTY_FUNCTION__)); | |||
| 3520 | return selectMOPS(I, MRI); | |||
| 3521 | } | |||
| 3522 | ||||
| 3523 | return false; | |||
| 3524 | } | |||
| 3525 | ||||
| 3526 | bool AArch64InstructionSelector::selectReduction(MachineInstr &I, | |||
| 3527 | MachineRegisterInfo &MRI) { | |||
| 3528 | Register VecReg = I.getOperand(1).getReg(); | |||
| 3529 | LLT VecTy = MRI.getType(VecReg); | |||
| 3530 | if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) { | |||
| 3531 | // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit | |||
| 3532 | // a subregister copy afterwards. | |||
| 3533 | if (VecTy == LLT::fixed_vector(2, 32)) { | |||
| 3534 | Register DstReg = I.getOperand(0).getReg(); | |||
| 3535 | auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass}, | |||
| 3536 | {VecReg, VecReg}); | |||
| 3537 | auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) | |||
| 3538 | .addReg(AddP.getReg(0), 0, AArch64::ssub) | |||
| 3539 | .getReg(0); | |||
| 3540 | RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI); | |||
| 3541 | I.eraseFromParent(); | |||
| 3542 | return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI); | |||
| 3543 | } | |||
| 3544 | ||||
| 3545 | unsigned Opc = 0; | |||
| 3546 | if (VecTy == LLT::fixed_vector(16, 8)) | |||
| 3547 | Opc = AArch64::ADDVv16i8v; | |||
| 3548 | else if (VecTy == LLT::fixed_vector(8, 16)) | |||
| 3549 | Opc = AArch64::ADDVv8i16v; | |||
| 3550 | else if (VecTy == LLT::fixed_vector(4, 32)) | |||
| 3551 | Opc = AArch64::ADDVv4i32v; | |||
| 3552 | else if (VecTy == LLT::fixed_vector(2, 64)) | |||
| 3553 | Opc = AArch64::ADDPv2i64p; | |||
| 3554 | else { | |||
| 3555 | LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled type for add reduction" ; } } while (false); | |||
| 3556 | return false; | |||
| 3557 | } | |||
| 3558 | I.setDesc(TII.get(Opc)); | |||
| 3559 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 3560 | } | |||
| 3561 | ||||
| 3562 | if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) { | |||
| 3563 | unsigned Opc = 0; | |||
| 3564 | if (VecTy == LLT::fixed_vector(2, 32)) | |||
| 3565 | Opc = AArch64::FADDPv2i32p; | |||
| 3566 | else if (VecTy == LLT::fixed_vector(2, 64)) | |||
| 3567 | Opc = AArch64::FADDPv2i64p; | |||
| 3568 | else { | |||
| 3569 | LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction" ; } } while (false); | |||
| 3570 | return false; | |||
| 3571 | } | |||
| 3572 | I.setDesc(TII.get(Opc)); | |||
| 3573 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 3574 | } | |||
| 3575 | return false; | |||
| 3576 | } | |||
| 3577 | ||||
| 3578 | bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI, | |||
| 3579 | MachineRegisterInfo &MRI) { | |||
| 3580 | unsigned Mopcode; | |||
| 3581 | switch (GI.getOpcode()) { | |||
| 3582 | case TargetOpcode::G_MEMCPY: | |||
| 3583 | case TargetOpcode::G_MEMCPY_INLINE: | |||
| 3584 | Mopcode = AArch64::MOPSMemoryCopyPseudo; | |||
| 3585 | break; | |||
| 3586 | case TargetOpcode::G_MEMMOVE: | |||
| 3587 | Mopcode = AArch64::MOPSMemoryMovePseudo; | |||
| 3588 | break; | |||
| 3589 | case TargetOpcode::G_MEMSET: | |||
| 3590 | // For tagged memset see llvm.aarch64.mops.memset.tag | |||
| 3591 | Mopcode = AArch64::MOPSMemorySetPseudo; | |||
| 3592 | break; | |||
| 3593 | } | |||
| 3594 | ||||
| 3595 | auto &DstPtr = GI.getOperand(0); | |||
| 3596 | auto &SrcOrVal = GI.getOperand(1); | |||
| 3597 | auto &Size = GI.getOperand(2); | |||
| 3598 | ||||
| 3599 | // Create copies of the registers that can be clobbered. | |||
| 3600 | const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg()); | |||
| 3601 | const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg()); | |||
| 3602 | const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg()); | |||
| 3603 | ||||
| 3604 | const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo; | |||
| 3605 | const auto &SrcValRegClass = | |||
| 3606 | IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass; | |||
| 3607 | ||||
| 3608 | // Constrain to specific registers | |||
| 3609 | RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI); | |||
| 3610 | RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI); | |||
| 3611 | RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI); | |||
| 3612 | ||||
| 3613 | MIB.buildCopy(DstPtrCopy, DstPtr); | |||
| 3614 | MIB.buildCopy(SrcValCopy, SrcOrVal); | |||
| 3615 | MIB.buildCopy(SizeCopy, Size); | |||
| 3616 | ||||
| 3617 | // New instruction uses the copied registers because it must update them. | |||
| 3618 | // The defs are not used since they don't exist in G_MEM*. They are still | |||
| 3619 | // tied. | |||
| 3620 | // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE | |||
| 3621 | Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass); | |||
| 3622 | Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass); | |||
| 3623 | if (IsSet) { | |||
| 3624 | MIB.buildInstr(Mopcode, {DefDstPtr, DefSize}, | |||
| 3625 | {DstPtrCopy, SizeCopy, SrcValCopy}); | |||
| 3626 | } else { | |||
| 3627 | Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass); | |||
| 3628 | MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize}, | |||
| 3629 | {DstPtrCopy, SrcValCopy, SizeCopy}); | |||
| 3630 | } | |||
| 3631 | ||||
| 3632 | GI.eraseFromParent(); | |||
| 3633 | return true; | |||
| 3634 | } | |||
| 3635 | ||||
| 3636 | bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, | |||
| 3637 | MachineRegisterInfo &MRI) { | |||
| 3638 | assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3638, __extension__ __PRETTY_FUNCTION__)); | |||
| 3639 | Register JTAddr = I.getOperand(0).getReg(); | |||
| 3640 | unsigned JTI = I.getOperand(1).getIndex(); | |||
| 3641 | Register Index = I.getOperand(2).getReg(); | |||
| 3642 | ||||
| 3643 | Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); | |||
| 3644 | Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); | |||
| 3645 | ||||
| 3646 | MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr); | |||
| 3647 | auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32, | |||
| 3648 | {TargetReg, ScratchReg}, {JTAddr, Index}) | |||
| 3649 | .addJumpTableIndex(JTI); | |||
| 3650 | // Build the indirect branch. | |||
| 3651 | MIB.buildInstr(AArch64::BR, {}, {TargetReg}); | |||
| 3652 | I.eraseFromParent(); | |||
| 3653 | return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI); | |||
| 3654 | } | |||
| 3655 | ||||
| 3656 | bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I, | |||
| 3657 | MachineRegisterInfo &MRI) { | |||
| 3658 | assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3658, __extension__ __PRETTY_FUNCTION__)); | |||
| 3659 | assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")(static_cast <bool> (I.getOperand(1).isJTI() && "Jump table op should have a JTI!") ? void (0) : __assert_fail ("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3659, __extension__ __PRETTY_FUNCTION__)); | |||
| 3660 | ||||
| 3661 | Register DstReg = I.getOperand(0).getReg(); | |||
| 3662 | unsigned JTI = I.getOperand(1).getIndex(); | |||
| 3663 | // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. | |||
| 3664 | auto MovMI = | |||
| 3665 | MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) | |||
| 3666 | .addJumpTableIndex(JTI, AArch64II::MO_PAGE) | |||
| 3667 | .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF); | |||
| 3668 | I.eraseFromParent(); | |||
| 3669 | return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); | |||
| 3670 | } | |||
| 3671 | ||||
| 3672 | bool AArch64InstructionSelector::selectTLSGlobalValue( | |||
| 3673 | MachineInstr &I, MachineRegisterInfo &MRI) { | |||
| 3674 | if (!STI.isTargetMachO()) | |||
| 3675 | return false; | |||
| 3676 | MachineFunction &MF = *I.getParent()->getParent(); | |||
| 3677 | MF.getFrameInfo().setAdjustsStack(true); | |||
| 3678 | ||||
| 3679 | const auto &GlobalOp = I.getOperand(1); | |||
| 3680 | assert(GlobalOp.getOffset() == 0 &&(static_cast <bool> (GlobalOp.getOffset() == 0 && "Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail ("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3681, __extension__ __PRETTY_FUNCTION__)) | |||
| 3681 | "Shouldn't have an offset on TLS globals!")(static_cast <bool> (GlobalOp.getOffset() == 0 && "Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail ("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3681, __extension__ __PRETTY_FUNCTION__)); | |||
| 3682 | const GlobalValue &GV = *GlobalOp.getGlobal(); | |||
| 3683 | ||||
| 3684 | auto LoadGOT = | |||
| 3685 | MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {}) | |||
| 3686 | .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); | |||
| 3687 | ||||
| 3688 | auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass}, | |||
| 3689 | {LoadGOT.getReg(0)}) | |||
| 3690 | .addImm(0); | |||
| 3691 | ||||
| 3692 | MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0)); | |||
| 3693 | // TLS calls preserve all registers except those that absolutely must be | |||
| 3694 | // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be | |||
| 3695 | // silly). | |||
| 3696 | MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load}) | |||
| 3697 | .addUse(AArch64::X0, RegState::Implicit) | |||
| 3698 | .addDef(AArch64::X0, RegState::Implicit) | |||
| 3699 | .addRegMask(TRI.getTLSCallPreservedMask()); | |||
| 3700 | ||||
| 3701 | MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0)); | |||
| 3702 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass, | |||
| 3703 | MRI); | |||
| 3704 | I.eraseFromParent(); | |||
| 3705 | return true; | |||
| 3706 | } | |||
| 3707 | ||||
| 3708 | bool AArch64InstructionSelector::selectIntrinsicTrunc( | |||
| 3709 | MachineInstr &I, MachineRegisterInfo &MRI) const { | |||
| 3710 | const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); | |||
| 3711 | ||||
| 3712 | // Select the correct opcode. | |||
| 3713 | unsigned Opc = 0; | |||
| 3714 | if (!SrcTy.isVector()) { | |||
| 3715 | switch (SrcTy.getSizeInBits()) { | |||
| 3716 | default: | |||
| 3717 | case 16: | |||
| 3718 | Opc = AArch64::FRINTZHr; | |||
| 3719 | break; | |||
| 3720 | case 32: | |||
| 3721 | Opc = AArch64::FRINTZSr; | |||
| 3722 | break; | |||
| 3723 | case 64: | |||
| 3724 | Opc = AArch64::FRINTZDr; | |||
| 3725 | break; | |||
| 3726 | } | |||
| 3727 | } else { | |||
| 3728 | unsigned NumElts = SrcTy.getNumElements(); | |||
| 3729 | switch (SrcTy.getElementType().getSizeInBits()) { | |||
| 3730 | default: | |||
| 3731 | break; | |||
| 3732 | case 16: | |||
| 3733 | if (NumElts == 4) | |||
| 3734 | Opc = AArch64::FRINTZv4f16; | |||
| 3735 | else if (NumElts == 8) | |||
| 3736 | Opc = AArch64::FRINTZv8f16; | |||
| 3737 | break; | |||
| 3738 | case 32: | |||
| 3739 | if (NumElts == 2) | |||
| 3740 | Opc = AArch64::FRINTZv2f32; | |||
| 3741 | else if (NumElts == 4) | |||
| 3742 | Opc = AArch64::FRINTZv4f32; | |||
| 3743 | break; | |||
| 3744 | case 64: | |||
| 3745 | if (NumElts == 2) | |||
| 3746 | Opc = AArch64::FRINTZv2f64; | |||
| 3747 | break; | |||
| 3748 | } | |||
| 3749 | } | |||
| 3750 | ||||
| 3751 | if (!Opc) { | |||
| 3752 | // Didn't get an opcode above, bail. | |||
| 3753 | LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n" ; } } while (false); | |||
| 3754 | return false; | |||
| 3755 | } | |||
| 3756 | ||||
| 3757 | // Legalization would have set us up perfectly for this; we just need to | |||
| 3758 | // set the opcode and move on. | |||
| 3759 | I.setDesc(TII.get(Opc)); | |||
| 3760 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 3761 | } | |||
| 3762 | ||||
| 3763 | bool AArch64InstructionSelector::selectIntrinsicRound( | |||
| 3764 | MachineInstr &I, MachineRegisterInfo &MRI) const { | |||
| 3765 | const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); | |||
| 3766 | ||||
| 3767 | // Select the correct opcode. | |||
| 3768 | unsigned Opc = 0; | |||
| 3769 | if (!SrcTy.isVector()) { | |||
| 3770 | switch (SrcTy.getSizeInBits()) { | |||
| 3771 | default: | |||
| 3772 | case 16: | |||
| 3773 | Opc = AArch64::FRINTAHr; | |||
| 3774 | break; | |||
| 3775 | case 32: | |||
| 3776 | Opc = AArch64::FRINTASr; | |||
| 3777 | break; | |||
| 3778 | case 64: | |||
| 3779 | Opc = AArch64::FRINTADr; | |||
| 3780 | break; | |||
| 3781 | } | |||
| 3782 | } else { | |||
| 3783 | unsigned NumElts = SrcTy.getNumElements(); | |||
| 3784 | switch (SrcTy.getElementType().getSizeInBits()) { | |||
| 3785 | default: | |||
| 3786 | break; | |||
| 3787 | case 16: | |||
| 3788 | if (NumElts == 4) | |||
| 3789 | Opc = AArch64::FRINTAv4f16; | |||
| 3790 | else if (NumElts == 8) | |||
| 3791 | Opc = AArch64::FRINTAv8f16; | |||
| 3792 | break; | |||
| 3793 | case 32: | |||
| 3794 | if (NumElts == 2) | |||
| 3795 | Opc = AArch64::FRINTAv2f32; | |||
| 3796 | else if (NumElts == 4) | |||
| 3797 | Opc = AArch64::FRINTAv4f32; | |||
| 3798 | break; | |||
| 3799 | case 64: | |||
| 3800 | if (NumElts == 2) | |||
| 3801 | Opc = AArch64::FRINTAv2f64; | |||
| 3802 | break; | |||
| 3803 | } | |||
| 3804 | } | |||
| 3805 | ||||
| 3806 | if (!Opc) { | |||
| 3807 | // Didn't get an opcode above, bail. | |||
| 3808 | LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n" ; } } while (false); | |||
| 3809 | return false; | |||
| 3810 | } | |||
| 3811 | ||||
| 3812 | // Legalization would have set us up perfectly for this; we just need to | |||
| 3813 | // set the opcode and move on. | |||
| 3814 | I.setDesc(TII.get(Opc)); | |||
| 3815 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | |||
| 3816 | } | |||
| 3817 | ||||
| 3818 | bool AArch64InstructionSelector::selectVectorICmp( | |||
| 3819 | MachineInstr &I, MachineRegisterInfo &MRI) { | |||
| 3820 | Register DstReg = I.getOperand(0).getReg(); | |||
| 3821 | LLT DstTy = MRI.getType(DstReg); | |||
| 3822 | Register SrcReg = I.getOperand(2).getReg(); | |||
| 3823 | Register Src2Reg = I.getOperand(3).getReg(); | |||
| 3824 | LLT SrcTy = MRI.getType(SrcReg); | |||
| 3825 | ||||
| 3826 | unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); | |||
| 3827 | unsigned NumElts = DstTy.getNumElements(); | |||
| 3828 | ||||
| 3829 | // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b | |||
| 3830 | // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16 | |||
| 3831 | // Third index is cc opcode: | |||
| 3832 | // 0 == eq | |||
| 3833 | // 1 == ugt | |||
| 3834 | // 2 == uge | |||
| 3835 | // 3 == ult | |||
| 3836 | // 4 == ule | |||
| 3837 | // 5 == sgt | |||
| 3838 | // 6 == sge | |||
| 3839 | // 7 == slt | |||
| 3840 | // 8 == sle | |||
| 3841 | // ne is done by negating 'eq' result. | |||
| 3842 | ||||
| 3843 | // This table below assumes that for some comparisons the operands will be | |||
| 3844 | // commuted. | |||
| 3845 | // ult op == commute + ugt op | |||
| 3846 | // ule op == commute + uge op | |||
| 3847 | // slt op == commute + sgt op | |||
| 3848 | // sle op == commute + sge op | |||
| 3849 | unsigned PredIdx = 0; | |||
| 3850 | bool SwapOperands = false; | |||
| 3851 | CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); | |||
| 3852 | switch (Pred) { | |||
| 3853 | case CmpInst::ICMP_NE: | |||
| 3854 | case CmpInst::ICMP_EQ: | |||
| 3855 | PredIdx = 0; | |||
| 3856 | break; | |||
| 3857 | case CmpInst::ICMP_UGT: | |||
| 3858 | PredIdx = 1; | |||
| 3859 | break; | |||
| 3860 | case CmpInst::ICMP_UGE: | |||
| 3861 | PredIdx = 2; | |||
| 3862 | break; | |||
| 3863 | case CmpInst::ICMP_ULT: | |||
| 3864 | PredIdx = 3; | |||
| 3865 | SwapOperands = true; | |||
| 3866 | break; | |||
| 3867 | case CmpInst::ICMP_ULE: | |||
| 3868 | PredIdx = 4; | |||
| 3869 | SwapOperands = true; | |||
| 3870 | break; | |||
| 3871 | case CmpInst::ICMP_SGT: | |||
| 3872 | PredIdx = 5; | |||
| 3873 | break; | |||
| 3874 | case CmpInst::ICMP_SGE: | |||
| 3875 | PredIdx = 6; | |||
| 3876 | break; | |||
| 3877 | case CmpInst::ICMP_SLT: | |||
| 3878 | PredIdx = 7; | |||
| 3879 | SwapOperands = true; | |||
| 3880 | break; | |||
| 3881 | case CmpInst::ICMP_SLE: | |||
| 3882 | PredIdx = 8; | |||
| 3883 | SwapOperands = true; | |||
| 3884 | break; | |||
| 3885 | default: | |||
| 3886 | llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3886); | |||
| 3887 | return false; | |||
| 3888 | } | |||
| 3889 | ||||
| 3890 | // This table obviously should be tablegen'd when we have our GISel native | |||
| 3891 | // tablegen selector. | |||
| 3892 | ||||
| 3893 | static const unsigned OpcTable[4][4][9] = { | |||
| 3894 | { | |||
| 3895 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3896 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3897 | 0 /* invalid */}, | |||
| 3898 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3899 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3900 | 0 /* invalid */}, | |||
| 3901 | {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, | |||
| 3902 | AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, | |||
| 3903 | AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, | |||
| 3904 | {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, | |||
| 3905 | AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, | |||
| 3906 | AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} | |||
| 3907 | }, | |||
| 3908 | { | |||
| 3909 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3910 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3911 | 0 /* invalid */}, | |||
| 3912 | {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, | |||
| 3913 | AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, | |||
| 3914 | AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, | |||
| 3915 | {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, | |||
| 3916 | AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, | |||
| 3917 | AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, | |||
| 3918 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3919 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3920 | 0 /* invalid */} | |||
| 3921 | }, | |||
| 3922 | { | |||
| 3923 | {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, | |||
| 3924 | AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, | |||
| 3925 | AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, | |||
| 3926 | {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, | |||
| 3927 | AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, | |||
| 3928 | AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, | |||
| 3929 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3930 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3931 | 0 /* invalid */}, | |||
| 3932 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3933 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3934 | 0 /* invalid */} | |||
| 3935 | }, | |||
| 3936 | { | |||
| 3937 | {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, | |||
| 3938 | AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, | |||
| 3939 | AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, | |||
| 3940 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3941 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3942 | 0 /* invalid */}, | |||
| 3943 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3944 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3945 | 0 /* invalid */}, | |||
| 3946 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3947 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | |||
| 3948 | 0 /* invalid */} | |||
| 3949 | }, | |||
| 3950 | }; | |||
| 3951 | unsigned EltIdx = Log2_32(SrcEltSize / 8); | |||
| 3952 | unsigned NumEltsIdx = Log2_32(NumElts / 2); | |||
| 3953 | unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx]; | |||
| 3954 | if (!Opc) { | |||
| 3955 | LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode" ; } } while (false); | |||
| 3956 | return false; | |||
| 3957 | } | |||
| 3958 | ||||
| 3959 | const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI); | |||
| 3960 | const TargetRegisterClass *SrcRC = | |||
| 3961 | getRegClassForTypeOnBank(SrcTy, VecRB, true); | |||
| 3962 | if (!SrcRC) { | |||
| 3963 | LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not determine source register class.\n" ; } } while (false); | |||
| 3964 | return false; | |||
| 3965 | } | |||
| 3966 | ||||
| 3967 | unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0; | |||
| 3968 | if (SrcTy.getSizeInBits() == 128) | |||
| 3969 | NotOpc = NotOpc ? AArch64::NOTv16i8 : 0; | |||
| 3970 | ||||
| 3971 | if (SwapOperands) | |||
| 3972 | std::swap(SrcReg, Src2Reg); | |||
| 3973 | ||||
| 3974 | auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg}); | |||
| 3975 | constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); | |||
| 3976 | ||||
| 3977 | // Invert if we had a 'ne' cc. | |||
| 3978 | if (NotOpc) { | |||
| 3979 | Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp}); | |||
| 3980 | constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); | |||
| 3981 | } else { | |||
| 3982 | MIB.buildCopy(DstReg, Cmp.getReg(0)); | |||
| 3983 | } | |||
| 3984 | RBI.constrainGenericRegister(DstReg, *SrcRC, MRI); | |||
| 3985 | I.eraseFromParent(); | |||
| 3986 | return true; | |||
| 3987 | } | |||
| 3988 | ||||
| 3989 | MachineInstr *AArch64InstructionSelector::emitScalarToVector( | |||
| 3990 | unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar, | |||
| 3991 | MachineIRBuilder &MIRBuilder) const { | |||
| 3992 | auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {}); | |||
| 3993 | ||||
| 3994 | auto BuildFn = [&](unsigned SubregIndex) { | |||
| 3995 | auto Ins = | |||
| 3996 | MIRBuilder | |||
| 3997 | .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar}) | |||
| 3998 | .addImm(SubregIndex); | |||
| 3999 | constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI); | |||
| 4000 | constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI); | |||
| 4001 | return &*Ins; | |||
| 4002 | }; | |||
| 4003 | ||||
| 4004 | switch (EltSize) { | |||
| 4005 | case 8: | |||
| 4006 | return BuildFn(AArch64::bsub); | |||
| 4007 | case 16: | |||
| 4008 | return BuildFn(AArch64::hsub); | |||
| 4009 | case 32: | |||
| 4010 | return BuildFn(AArch64::ssub); | |||
| 4011 | case 64: | |||
| 4012 | return BuildFn(AArch64::dsub); | |||
| 4013 | default: | |||
| 4014 | return nullptr; | |||
| 4015 | } | |||
| 4016 | } | |||
| 4017 | ||||
| 4018 | bool AArch64InstructionSelector::selectMergeValues( | |||
| 4019 | MachineInstr &I, MachineRegisterInfo &MRI) { | |||
| 4020 | assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4020, __extension__ __PRETTY_FUNCTION__)); | |||
| 4021 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | |||
| 4022 | const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); | |||
| 4023 | assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")(static_cast <bool> (!DstTy.isVector() && !SrcTy .isVector() && "invalid merge operation") ? void (0) : __assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4023, __extension__ __PRETTY_FUNCTION__)); | |||
| 4024 | const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); | |||
| 4025 | ||||
| 4026 | if (I.getNumOperands() != 3) | |||
| 4027 | return false; | |||
| 4028 | ||||
| 4029 | // Merging 2 s64s into an s128. | |||
| 4030 | if (DstTy == LLT::scalar(128)) { | |||
| 4031 | if (SrcTy.getSizeInBits() != 64) | |||
| 4032 | return false; | |||
| 4033 | Register DstReg = I.getOperand(0).getReg(); | |||
| 4034 | Register Src1Reg = I.getOperand(1).getReg(); | |||
| 4035 | Register Src2Reg = I.getOperand(2).getReg(); | |||
| 4036 | auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {}); | |||
| 4037 | MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg, | |||
| 4038 | /* LaneIdx */ 0, RB, MIB); | |||
| 4039 | if (!InsMI) | |||
| 4040 | return false; | |||
| 4041 | MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(), | |||
| 4042 | Src2Reg, /* LaneIdx */ 1, RB, MIB); | |||
| 4043 | if (!Ins2MI) | |||
| 4044 | return false; | |||
| 4045 | constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); | |||
| 4046 | constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI); | |||
| 4047 | I.eraseFromParent(); | |||
| 4048 | return true; | |||
| 4049 | } | |||
| 4050 | ||||
| 4051 | if (RB.getID() != AArch64::GPRRegBankID) | |||
| 4052 | return false; | |||
| 4053 | ||||
| 4054 | if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) | |||
| 4055 | return false; | |||
| 4056 | ||||
| 4057 | auto *DstRC = &AArch64::GPR64RegClass; | |||
| 4058 | Register SubToRegDef = MRI.createVirtualRegister(DstRC); | |||
| 4059 | MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(), | |||
| 4060 | TII.get(TargetOpcode::SUBREG_TO_REG)) | |||
| 4061 | .addDef(SubToRegDef) | |||
| 4062 | .addImm(0) | |||
| 4063 | .addUse(I.getOperand(1).getReg()) | |||
| 4064 | .addImm(AArch64::sub_32); | |||
| 4065 | Register SubToRegDef2 = MRI.createVirtualRegister(DstRC); | |||
| 4066 | // Need to anyext the second scalar before we can use bfm | |||
| 4067 | MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), | |||
| 4068 | TII.get(TargetOpcode::SUBREG_TO_REG)) | |||
| 4069 | .addDef(SubToRegDef2) | |||
| 4070 | .addImm(0) | |||
| 4071 | .addUse(I.getOperand(2).getReg()) | |||
| 4072 | .addImm(AArch64::sub_32); | |||
| 4073 | MachineInstr &BFM = | |||
| 4074 | *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri)) | |||
| 4075 | .addDef(I.getOperand(0).getReg()) | |||
| 4076 | .addUse(SubToRegDef) | |||
| 4077 | .addUse(SubToRegDef2) | |||
| 4078 | .addImm(32) | |||
| 4079 | .addImm(31); | |||
| 4080 | constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI); | |||
| 4081 | constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI); | |||
| 4082 | constrainSelectedInstRegOperands(BFM, TII, TRI, RBI); | |||
| 4083 | I.eraseFromParent(); | |||
| 4084 | return true; | |||
| 4085 | } | |||
| 4086 | ||||
| 4087 | static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, | |||
| 4088 | const unsigned EltSize) { | |||
| 4089 | // Choose a lane copy opcode and subregister based off of the size of the | |||
| 4090 | // vector's elements. | |||
| 4091 | switch (EltSize) { | |||
| 4092 | case 8: | |||
| 4093 | CopyOpc = AArch64::DUPi8; | |||
| 4094 | ExtractSubReg = AArch64::bsub; | |||
| 4095 | break; | |||
| 4096 | case 16: | |||
| 4097 | CopyOpc = AArch64::DUPi16; | |||
| 4098 | ExtractSubReg = AArch64::hsub; | |||
| 4099 | break; | |||
| 4100 | case 32: | |||
| 4101 | CopyOpc = AArch64::DUPi32; | |||
| 4102 | ExtractSubReg = AArch64::ssub; | |||
| 4103 | break; | |||
| 4104 | case 64: | |||
| 4105 | CopyOpc = AArch64::DUPi64; | |||
| 4106 | ExtractSubReg = AArch64::dsub; | |||
| 4107 | break; | |||
| 4108 | default: | |||
| 4109 | // Unknown size, bail out. | |||
| 4110 | LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Elt size '" << EltSize << "' unsupported.\n"; } } while (false); | |||
| 4111 | return false; | |||
| 4112 | } | |||
| 4113 | return true; | |||
| 4114 | } | |||
| 4115 | ||||
| 4116 | MachineInstr *AArch64InstructionSelector::emitExtractVectorElt( | |||
| 4117 | std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy, | |||
| 4118 | Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const { | |||
| 4119 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | |||
| 4120 | unsigned CopyOpc = 0; | |||
| 4121 | unsigned ExtractSubReg = 0; | |||
| 4122 | if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) { | |||
| 4123 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n" ; } } while (false) | |||
| 4124 | dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n" ; } } while (false); | |||
| 4125 | return nullptr; | |||
| 4126 | } | |||
| 4127 | ||||
| 4128 | const TargetRegisterClass *DstRC = | |||
| 4129 | getRegClassForTypeOnBank(ScalarTy, DstRB, true); | |||
| 4130 | if (!DstRC) { | |||
| 4131 | LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n" ; } } while (false); | |||
| 4132 | return nullptr; | |||
| 4133 | } | |||
| 4134 | ||||
| 4135 | const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI); | |||
| 4136 | const LLT &VecTy = MRI.getType(VecReg); | |||
| 4137 | const TargetRegisterClass *VecRC = | |||
| 4138 | getRegClassForTypeOnBank(VecTy, VecRB, true); | |||
| 4139 | if (!VecRC) { | |||
| 4140 | LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not determine source register class.\n" ; } } while (false); | |||
| 4141 | return nullptr; | |||
| 4142 | } | |||
| 4143 | ||||
| 4144 | // The register that we're going to copy into. | |||
| 4145 | Register InsertReg = VecReg; | |||
| 4146 | if (!DstReg) | |||
| 4147 | DstReg = MRI.createVirtualRegister(DstRC); | |||
| 4148 | // If the lane index is 0, we just use a subregister COPY. | |||
| 4149 | if (LaneIdx == 0) { | |||
| 4150 | auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {}) | |||
| 4151 | .addReg(VecReg, 0, ExtractSubReg); | |||
| 4152 | RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); | |||
| 4153 | return &*Copy; | |||
| 4154 | } | |||
| 4155 | ||||
| 4156 | // Lane copies require 128-bit wide registers. If we're dealing with an | |||
| 4157 | // unpacked vector, then we need to move up to that width. Insert an implicit | |||
| 4158 | // def and a subregister insert to get us there. | |||
| 4159 | if (VecTy.getSizeInBits() != 128) { | |||
| 4160 | MachineInstr *ScalarToVector = emitScalarToVector( | |||
| 4161 | VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder); | |||
| 4162 | if (!ScalarToVector) | |||
| 4163 | return nullptr; | |||
| 4164 | InsertReg = ScalarToVector->getOperand(0).getReg(); | |||
| 4165 | } | |||
| 4166 | ||||
| 4167 | MachineInstr *LaneCopyMI = | |||
| 4168 | MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx); | |||
| 4169 | constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI); | |||
| 4170 | ||||
| 4171 | // Make sure that we actually constrain the initial copy. | |||
| 4172 | RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); | |||
| 4173 | return LaneCopyMI; | |||
| 4174 | } | |||
| 4175 | ||||
| 4176 | bool AArch64InstructionSelector::selectExtractElt( | |||
| 4177 | MachineInstr &I, MachineRegisterInfo &MRI) { | |||
| 4178 | assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && "unexpected opcode!") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4179, __extension__ __PRETTY_FUNCTION__)) | |||
| 4179 | "unexpected opcode!")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && "unexpected opcode!") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4179, __extension__ __PRETTY_FUNCTION__)); | |||
| 4180 | Register DstReg = I.getOperand(0).getReg(); | |||
| 4181 | const LLT NarrowTy = MRI.getType(DstReg); | |||
| 4182 | const Register SrcReg = I.getOperand(1).getReg(); | |||
| 4183 | const LLT WideTy = MRI.getType(SrcReg); | |||
| 4184 | (void)WideTy; | |||
| 4185 | assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy .getSizeInBits() && "source register size too small!" ) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4186, __extension__ __PRETTY_FUNCTION__)) | |||
| 4186 | "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy .getSizeInBits() && "source register size too small!" ) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4186, __extension__ __PRETTY_FUNCTION__)); | |||
| 4187 | assert(!NarrowTy.isVector() && "cannot extract vector into vector!")(static_cast <bool> (!NarrowTy.isVector() && "cannot extract vector into vector!" ) ? void (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4187, __extension__ __PRETTY_FUNCTION__)); | |||
| 4188 | ||||
| 4189 | // Need the lane index to determine the correct copy opcode. | |||
| 4190 | MachineOperand &LaneIdxOp = I.getOperand(2); | |||
| 4191 | assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")(static_cast <bool> (LaneIdxOp.isReg() && "Lane index operand was not a register?" ) ? void (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4191, __extension__ __PRETTY_FUNCTION__)); | |||
| 4192 | ||||
| 4193 | if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { | |||
| 4194 | LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n" ; } } while (false); | |||
| 4195 | return false; | |||
| 4196 | } | |||
| 4197 | ||||
| 4198 | // Find the index to extract from. | |||
| 4199 | auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI); | |||
| 4200 | if (!VRegAndVal) | |||
| 4201 | return false; | |||
| 4202 | unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); | |||
| 4203 | ||||
| 4204 | ||||
| 4205 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); | |||
| 4206 | MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg, | |||
| 4207 | LaneIdx, MIB); | |||
| 4208 | if (!Extract) | |||
| 4209 | return false; | |||
| 4210 | ||||
| 4211 | I.eraseFromParent(); | |||
| 4212 | return true; | |||
| 4213 | } | |||
| 4214 | ||||
| 4215 | bool AArch64InstructionSelector::selectSplitVectorUnmerge( | |||
| 4216 | MachineInstr &I, MachineRegisterInfo &MRI) { | |||
| 4217 | unsigned NumElts = I.getNumOperands() - 1; | |||
| 4218 | Register SrcReg = I.getOperand(NumElts).getReg(); | |||
| 4219 | const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); | |||
| 4220 | const LLT SrcTy = MRI.getType(SrcReg); | |||
| 4221 | ||||
| 4222 | assert(NarrowTy.isVector() && "Expected an unmerge into vectors")(static_cast <bool> (NarrowTy.isVector() && "Expected an unmerge into vectors" ) ? void (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4222, __extension__ __PRETTY_FUNCTION__)); | |||
| 4223 | if (SrcTy.getSizeInBits() > 128) { | |||
| 4224 | LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge" ; } } while (false); | |||
| 4225 | return false; | |||
| 4226 | } | |||
| 4227 | ||||
| 4228 | // We implement a split vector operation by treating the sub-vectors as | |||
| 4229 | // scalars and extracting them. | |||
| 4230 | const RegisterBank &DstRB = | |||
| 4231 | *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI); | |||
| 4232 | for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) { | |||
| 4233 | Register Dst = I.getOperand(OpIdx).getReg(); | |||
| 4234 | MachineInstr *Extract = | |||
| 4235 | emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB); | |||
| 4236 | if (!Extract) | |||
| 4237 | return false; | |||
| 4238 | } | |||
| 4239 | I.eraseFromParent(); | |||
| 4240 | return true; | |||
| 4241 | } | |||
| 4242 | ||||
| 4243 | bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I, | |||
| 4244 | MachineRegisterInfo &MRI) { | |||
| 4245 | assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "unexpected opcode") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4246, __extension__ __PRETTY_FUNCTION__)) | |||
| 4246 | "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "unexpected opcode") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4246, __extension__ __PRETTY_FUNCTION__)); | |||
| 4247 | ||||
| 4248 | // TODO: Handle unmerging into GPRs and from scalars to scalars. | |||
| 4249 | if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != | |||
| 4250 | AArch64::FPRRegBankID || | |||
| 4251 | RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != | |||
| 4252 | AArch64::FPRRegBankID) { | |||
| 4253 | LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " "currently unsupported.\n"; } } while (false) | |||
| 4254 | "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " "currently unsupported.\n"; } } while (false); | |||
| 4255 | return false; | |||
| 4256 | } | |||
| 4257 | ||||
| 4258 | // The last operand is the vector source register, and every other operand is | |||
| 4259 | // a register to unpack into. | |||
| 4260 | unsigned NumElts = I.getNumOperands() - 1; | |||
| 4261 | Register SrcReg = I.getOperand(NumElts).getReg(); | |||
| 4262 | const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); | |||
| 4263 | const LLT WideTy = MRI.getType(SrcReg); | |||
| 4264 | (void)WideTy; | |||
| 4265 | assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits () == 128) && "can only unmerge from vector or s128 types!" ) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4266, __extension__ __PRETTY_FUNCTION__)) | |||
| 4266 | "can only unmerge from vector or s128 types!")(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits () == 128) && "can only unmerge from vector or s128 types!" ) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4266, __extension__ __PRETTY_FUNCTION__)); | |||
| 4267 | assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy .getSizeInBits() && "source register size too small!" ) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4268, __extension__ __PRETTY_FUNCTION__)) | |||
| 4268 | "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy .getSizeInBits() && "source register size too small!" ) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4268, __extension__ __PRETTY_FUNCTION__)); | |||
| 4269 | ||||
| 4270 | if (!NarrowTy.isScalar()) | |||
| 4271 | return selectSplitVectorUnmerge(I, MRI); | |||
| 4272 | ||||
| 4273 | // Choose a lane copy opcode and subregister based off of the size of the | |||
| 4274 | // vector's elements. | |||
| 4275 | unsigned CopyOpc = 0; | |||
| 4276 | unsigned ExtractSubReg = 0; | |||
| 4277 | if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) | |||
| 4278 | return false; | |||
| 4279 | ||||
| 4280 | // Set up for the lane copies. | |||
| 4281 | MachineBasicBlock &MBB = *I.getParent(); | |||
| 4282 | ||||
| 4283 | // Stores the registers we'll be copying from. | |||
| 4284 | SmallVector<Register, 4> InsertRegs; | |||
| 4285 | ||||
| 4286 | // We'll use the first register twice, so we only need NumElts-1 registers. | |||
| 4287 | unsigned NumInsertRegs = NumElts - 1; | |||
| 4288 | ||||
| 4289 | // If our elements fit into exactly 128 bits, then we can copy from the source | |||
| 4290 | // directly. Otherwise, we need to do a bit of setup with some subregister | |||
| 4291 | // inserts. | |||
| 4292 | if (NarrowTy.getSizeInBits() * NumElts == 128) { | |||
| 4293 | InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg); | |||
| 4294 | } else { | |||
| 4295 | // No. We have to perform subregister inserts. For each insert, create an | |||
| 4296 | // implicit def and a subregister insert, and save the register we create. | |||
| 4297 | const TargetRegisterClass *RC = getRegClassForTypeOnBank( | |||
| 4298 | LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()), | |||
| 4299 | *RBI.getRegBank(SrcReg, MRI, TRI)); | |||
| 4300 | unsigned SubReg = 0; | |||
| 4301 | bool Found = getSubRegForClass(RC, TRI, SubReg); | |||
| 4302 | (void)Found; | |||
| 4303 | assert(Found && "expected to find last operand's subeg idx")(static_cast <bool> (Found && "expected to find last operand's subeg idx" ) ? void (0) : __assert_fail ("Found && \"expected to find last operand's subeg idx\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4303, __extension__ __PRETTY_FUNCTION__)); | |||
| 4304 | for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) { | |||
| 4305 | Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); | |||
| 4306 | MachineInstr &ImpDefMI = | |||
| 4307 | *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF), | |||
| 4308 | ImpDefReg); | |||
| 4309 | ||||
| 4310 | // Now, create the subregister insert from SrcReg. | |||
| 4311 | Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); | |||
| 4312 | MachineInstr &InsMI = | |||
| 4313 | *BuildMI(MBB, I, I.getDebugLoc(), | |||
| 4314 | TII.get(TargetOpcode::INSERT_SUBREG), InsertReg) | |||
| 4315 | .addUse(ImpDefReg) | |||
| 4316 | .addUse(SrcReg) | |||
| 4317 | .addImm(SubReg); | |||
| 4318 | ||||
| 4319 | constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI); | |||
| 4320 | constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI); | |||
| 4321 | ||||
| 4322 | // Save the register so that we can copy from it after. | |||
| 4323 | InsertRegs.push_back(InsertReg); | |||
| 4324 | } | |||
| 4325 | } | |||
| 4326 | ||||
| 4327 | // Now that we've created any necessary subregister inserts, we can | |||
| 4328 | // create the copies. | |||
| 4329 | // | |||
| 4330 | // Perform the first copy separately as a subregister copy. | |||
| 4331 | Register CopyTo = I.getOperand(0).getReg(); | |||
| 4332 | auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {}) | |||
| 4333 | .addReg(InsertRegs[0], 0, ExtractSubReg); | |||
| 4334 | constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI); | |||
| 4335 | ||||
| 4336 | // Now, perform the remaining copies as vector lane copies. | |||
| 4337 | unsigned LaneIdx = 1; | |||
| 4338 | for (Register InsReg : InsertRegs) { | |||
| 4339 | Register CopyTo = I.getOperand(LaneIdx).getReg(); | |||
| 4340 | MachineInstr &CopyInst = | |||
| 4341 | *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo) | |||
| 4342 | .addUse(InsReg) | |||
| 4343 | .addImm(LaneIdx); | |||
| 4344 | constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI); | |||
| 4345 | ++LaneIdx; | |||
| 4346 | } | |||
| 4347 | ||||
| 4348 | // Separately constrain the first copy's destination. Because of the | |||
| 4349 | // limitation in constrainOperandRegClass, we can't guarantee that this will | |||
| 4350 | // actually be constrained. So, do it ourselves using the second operand. | |||
| 4351 | const TargetRegisterClass *RC = | |||
| 4352 | MRI.getRegClassOrNull(I.getOperand(1).getReg()); | |||
| 4353 | if (!RC) { | |||
| 4354 | LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n" ; } } while (false); | |||
| 4355 | return false; | |||
| 4356 | } | |||
| 4357 | ||||
| 4358 | RBI.constrainGenericRegister(CopyTo, *RC, MRI); | |||
| 4359 | I.eraseFromParent(); | |||
| 4360 | return true; | |||
| 4361 | } | |||
| 4362 | ||||
| 4363 | bool AArch64InstructionSelector::selectConcatVectors( | |||
| 4364 | MachineInstr &I, MachineRegisterInfo &MRI) { | |||
| 4365 | assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Unexpected opcode") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4366, __extension__ __PRETTY_FUNCTION__)) | |||
| 4366 | "Unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Unexpected opcode") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4366, __extension__ __PRETTY_FUNCTION__)); | |||
| 4367 | Register Dst = I.getOperand(0).getReg(); | |||
| 4368 | Register Op1 = I.getOperand(1).getReg(); | |||
| 4369 | Register Op2 = I.getOperand(2).getReg(); | |||
| 4370 | MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB); | |||
| 4371 | if (!ConcatMI) | |||
| 4372 | return false; | |||
| 4373 | I.eraseFromParent(); | |||
| 4374 | return true; | |||
| 4375 | } | |||
| 4376 | ||||
| 4377 | unsigned | |||
| 4378 | AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal, | |||
| 4379 | MachineFunction &MF) const { | |||
| 4380 | Type *CPTy = CPVal->getType(); | |||
| 4381 | Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy); | |||
| 4382 | ||||
| 4383 | MachineConstantPool *MCP = MF.getConstantPool(); | |||
| 4384 | return MCP->getConstantPoolIndex(CPVal, Alignment); | |||
| 4385 | } | |||
| 4386 | ||||
| 4387 | MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( | |||
| 4388 | const Constant *CPVal, MachineIRBuilder &MIRBuilder) const { | |||
| 4389 | const TargetRegisterClass *RC; | |||
| 4390 | unsigned Opc; | |||
| 4391 | bool IsTiny = TM.getCodeModel() == CodeModel::Tiny; | |||
| 4392 | unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType()); | |||
| 4393 | switch (Size) { | |||
| 4394 | case 16: | |||
| 4395 | RC = &AArch64::FPR128RegClass; | |||
| 4396 | Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui; | |||
| 4397 | break; | |||
| 4398 | case 8: | |||
| 4399 | RC = &AArch64::FPR64RegClass; | |||
| 4400 | Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui; | |||
| 4401 | break; | |||
| 4402 | case 4: | |||
| 4403 | RC = &AArch64::FPR32RegClass; | |||
| 4404 | Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui; | |||
| 4405 | break; | |||
| 4406 | case 2: | |||
| 4407 | RC = &AArch64::FPR16RegClass; | |||
| 4408 | Opc = AArch64::LDRHui; | |||
| 4409 | break; | |||
| 4410 | default: | |||
| 4411 | LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not load from constant pool of type " << *CPVal->getType(); } } while (false) | |||
| 4412 | << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not load from constant pool of type " << *CPVal->getType(); } } while (false); | |||
| 4413 | return nullptr; | |||
| 4414 | } | |||
| 4415 | ||||
| 4416 | MachineInstr *LoadMI = nullptr; | |||
| 4417 | auto &MF = MIRBuilder.getMF(); | |||
| 4418 | unsigned CPIdx = emitConstantPoolEntry(CPVal, MF); | |||
| 4419 | if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) { | |||
| 4420 | // Use load(literal) for tiny code model. | |||
| 4421 | LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx); | |||
| 4422 | } else { | |||
| 4423 | auto Adrp = | |||
| 4424 | MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {}) | |||
| 4425 | .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); | |||
| 4426 | ||||
| 4427 | LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp}) | |||
| 4428 | .addConstantPoolIndex( | |||
| 4429 | CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); | |||
| 4430 | ||||
| 4431 | constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI); | |||
| 4432 | } | |||
| 4433 | ||||
| 4434 | MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF); | |||
| 4435 | LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo, | |||
| 4436 | MachineMemOperand::MOLoad, | |||
| 4437 | Size, Align(Size))); | |||
| 4438 | constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI); | |||
| 4439 | return LoadMI; | |||
| 4440 | } | |||
| 4441 | ||||
| 4442 | /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given | |||
| 4443 | /// size and RB. | |||
| 4444 | static std::pair<unsigned, unsigned> | |||
| 4445 | getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { | |||
| 4446 | unsigned Opc, SubregIdx; | |||
| 4447 | if (RB.getID() == AArch64::GPRRegBankID) { | |||
| 4448 | if (EltSize == 16) { | |||
| 4449 | Opc = AArch64::INSvi16gpr; | |||
| 4450 | SubregIdx = AArch64::ssub; | |||
| 4451 | } else if (EltSize == 32) { | |||
| 4452 | Opc = AArch64::INSvi32gpr; | |||
| 4453 | SubregIdx = AArch64::ssub; | |||
| 4454 | } else if (EltSize == 64) { | |||
| 4455 | Opc = AArch64::INSvi64gpr; | |||
| 4456 | SubregIdx = AArch64::dsub; | |||
| 4457 | } else { | |||
| 4458 | llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4458); | |||
| 4459 | } | |||
| 4460 | } else { | |||
| 4461 | if (EltSize == 8) { | |||
| 4462 | Opc = AArch64::INSvi8lane; | |||
| 4463 | SubregIdx = AArch64::bsub; | |||
| 4464 | } else if (EltSize == 16) { | |||
| 4465 | Opc = AArch64::INSvi16lane; | |||
| 4466 | SubregIdx = AArch64::hsub; | |||
| 4467 | } else if (EltSize == 32) { | |||
| 4468 | Opc = AArch64::INSvi32lane; | |||
| 4469 | SubregIdx = AArch64::ssub; | |||
| 4470 | } else if (EltSize == 64) { | |||
| 4471 | Opc = AArch64::INSvi64lane; | |||
| 4472 | SubregIdx = AArch64::dsub; | |||
| 4473 | } else { | |||
| 4474 | llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4474); | |||
| 4475 | } | |||
| 4476 | } | |||
| 4477 | return std::make_pair(Opc, SubregIdx); | |||
| 4478 | } | |||
| 4479 | ||||
| 4480 | MachineInstr *AArch64InstructionSelector::emitInstr( | |||
| 4481 | unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, | |||
| 4482 | std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder, | |||
| 4483 | const ComplexRendererFns &RenderFns) const { | |||
| 4484 | assert(Opcode && "Expected an opcode?")(static_cast <bool> (Opcode && "Expected an opcode?" ) ? void (0) : __assert_fail ("Opcode && \"Expected an opcode?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4484, __extension__ __PRETTY_FUNCTION__)); | |||
| 4485 | assert(!isPreISelGenericOpcode(Opcode) &&(static_cast <bool> (!isPreISelGenericOpcode(Opcode) && "Function should only be used to produce selected instructions!" ) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4486, __extension__ __PRETTY_FUNCTION__)) | |||
| 4486 | "Function should only be used to produce selected instructions!")(static_cast <bool> (!isPreISelGenericOpcode(Opcode) && "Function should only be used to produce selected instructions!" ) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4486, __extension__ __PRETTY_FUNCTION__)); | |||
| 4487 | auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps); | |||
| 4488 | if (RenderFns) | |||
| 4489 | for (auto &Fn : *RenderFns) | |||
| 4490 | Fn(MI); | |||
| 4491 | constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); | |||
| 4492 | return &*MI; | |||
| 4493 | } | |||
| 4494 | ||||
| 4495 | MachineInstr *AArch64InstructionSelector::emitAddSub( | |||
| 4496 | const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, | |||
| 4497 | Register Dst, MachineOperand &LHS, MachineOperand &RHS, | |||
| 4498 | MachineIRBuilder &MIRBuilder) const { | |||
| 4499 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); | |||
| 4500 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg() && "Expected register operands?") ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected register operands?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4500, __extension__ __PRETTY_FUNCTION__)); | |||
| 4501 | auto Ty = MRI.getType(LHS.getReg()); | |||
| 4502 | assert(!Ty.isVector() && "Expected a scalar or pointer?")(static_cast <bool> (!Ty.isVector() && "Expected a scalar or pointer?" ) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4502, __extension__ __PRETTY_FUNCTION__)); | |||
| 4503 | unsigned Size = Ty.getSizeInBits(); | |||
| 4504 | assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(static_cast <bool> ((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only") ? void (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4504, __extension__ __PRETTY_FUNCTION__)); | |||
| 4505 | bool Is32Bit = Size == 32; | |||
| 4506 | ||||
| 4507 | // INSTRri form with positive arithmetic immediate. | |||
| 4508 | if (auto Fns = selectArithImmed(RHS)) | |||
| 4509 | return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS}, | |||
| 4510 | MIRBuilder, Fns); | |||
| 4511 | ||||
| 4512 | // INSTRri form with negative arithmetic immediate. | |||
| 4513 | if (auto Fns = selectNegArithImmed(RHS)) | |||
| 4514 | return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS}, | |||
| 4515 | MIRBuilder, Fns); | |||
| 4516 | ||||
| 4517 | // INSTRrx form. | |||
| 4518 | if (auto Fns = selectArithExtendedRegister(RHS)) | |||
| 4519 | return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS}, | |||
| 4520 | MIRBuilder, Fns); | |||
| 4521 | ||||
| 4522 | // INSTRrs form. | |||
| 4523 | if (auto Fns = selectShiftedRegister(RHS)) | |||
| 4524 | return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS}, | |||
| 4525 | MIRBuilder, Fns); | |||
| 4526 | return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS}, | |||
| 4527 | MIRBuilder); | |||
| 4528 | } | |||
| 4529 | ||||
| 4530 | MachineInstr * | |||
| 4531 | AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, | |||
| 4532 | MachineOperand &RHS, | |||
| 4533 | MachineIRBuilder &MIRBuilder) const { | |||
| 4534 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ | |||
| 4535 | {{AArch64::ADDXri, AArch64::ADDWri}, | |||
| 4536 | {AArch64::ADDXrs, AArch64::ADDWrs}, | |||
| 4537 | {AArch64::ADDXrr, AArch64::ADDWrr}, | |||
| 4538 | {AArch64::SUBXri, AArch64::SUBWri}, | |||
| 4539 | {AArch64::ADDXrx, AArch64::ADDWrx}}}; | |||
| 4540 | return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder); | |||
| 4541 | } | |||
| 4542 | ||||
| 4543 | MachineInstr * | |||
| 4544 | AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS, | |||
| 4545 | MachineOperand &RHS, | |||
| 4546 | MachineIRBuilder &MIRBuilder) const { | |||
| 4547 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ | |||
| 4548 | {{AArch64::ADDSXri, AArch64::ADDSWri}, | |||
| 4549 | {AArch64::ADDSXrs, AArch64::ADDSWrs}, | |||
| 4550 | {AArch64::ADDSXrr, AArch64::ADDSWrr}, | |||
| 4551 | {AArch64::SUBSXri, AArch64::SUBSWri}, | |||
| 4552 | {AArch64::ADDSXrx, AArch64::ADDSWrx}}}; | |||
| 4553 | return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); | |||
| 4554 | } | |||
| 4555 | ||||
| 4556 | MachineInstr * | |||
| 4557 | AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS, | |||
| 4558 | MachineOperand &RHS, | |||
| 4559 | MachineIRBuilder &MIRBuilder) const { | |||
| 4560 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ | |||
| 4561 | {{AArch64::SUBSXri, AArch64::SUBSWri}, | |||
| 4562 | {AArch64::SUBSXrs, AArch64::SUBSWrs}, | |||
| 4563 | {AArch64::SUBSXrr, AArch64::SUBSWrr}, | |||
| 4564 | {AArch64::ADDSXri, AArch64::ADDSWri}, | |||
| 4565 | {AArch64::SUBSXrx, AArch64::SUBSWrx}}}; | |||
| 4566 | return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); | |||
| 4567 | } | |||
| 4568 | ||||
| 4569 | MachineInstr * | |||
| 4570 | AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, | |||
| 4571 | MachineIRBuilder &MIRBuilder) const { | |||
| 4572 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); | |||
| 4573 | bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32); | |||
| 4574 | auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; | |||
| 4575 | return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder); | |||
| 4576 | } | |||
| 4577 | ||||
| 4578 | MachineInstr * | |||
| 4579 | AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, | |||
| 4580 | MachineIRBuilder &MIRBuilder) const { | |||
| 4581 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg() && "Expected register operands?") ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected register operands?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4581, __extension__ __PRETTY_FUNCTION__)); | |||
| 4582 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); | |||
| 4583 | LLT Ty = MRI.getType(LHS.getReg()); | |||
| 4584 | unsigned RegSize = Ty.getSizeInBits(); | |||
| 4585 | bool Is32Bit = (RegSize == 32); | |||
| 4586 | const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri}, | |||
| 4587 | {AArch64::ANDSXrs, AArch64::ANDSWrs}, | |||
| 4588 | {AArch64::ANDSXrr, AArch64::ANDSWrr}}; | |||
| 4589 | // ANDS needs a logical immediate for its immediate form. Check if we can | |||
| 4590 | // fold one in. | |||
| 4591 | if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) { | |||
| 4592 | int64_t Imm = ValAndVReg->Value.getSExtValue(); | |||
| 4593 | ||||
| 4594 | if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) { | |||
| 4595 | auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS}); | |||
| 4596 | TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); | |||
| 4597 | constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); | |||
| 4598 | return &*TstMI; | |||
| 4599 | } | |||
| 4600 | } | |||
| 4601 | ||||
| 4602 | if (auto Fns = selectLogicalShiftedRegister(RHS)) | |||
| 4603 | return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns); | |||
| 4604 | return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder); | |||
| 4605 | } | |||
| 4606 | ||||
| 4607 | MachineInstr *AArch64InstructionSelector::emitIntegerCompare( | |||
| 4608 | MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, | |||
| 4609 | MachineIRBuilder &MIRBuilder) const { | |||
| 4610 | assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")(static_cast <bool> (LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!") ? void ( 0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4610, __extension__ __PRETTY_FUNCTION__)); | |||
| 4611 | assert(Predicate.isPredicate() && "Expected predicate?")(static_cast <bool> (Predicate.isPredicate() && "Expected predicate?") ? void (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4611, __extension__ __PRETTY_FUNCTION__)); | |||
| 4612 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); | |||
| 4613 | LLT CmpTy = MRI.getType(LHS.getReg()); | |||
| 4614 | assert(!CmpTy.isVector() && "Expected scalar or pointer")(static_cast <bool> (!CmpTy.isVector() && "Expected scalar or pointer" ) ? void (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4614, __extension__ __PRETTY_FUNCTION__)); | |||
| 4615 | unsigned Size = CmpTy.getSizeInBits(); | |||
| 4616 | (void)Size; | |||
| 4617 | assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(static_cast <bool> ((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?") ? void (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4617, __extension__ __PRETTY_FUNCTION__)); | |||
| 4618 | // Fold the compare into a cmn or tst if possible. | |||
| 4619 | if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder)) | |||
| 4620 | return FoldCmp; | |||
| 4621 | auto Dst = MRI.cloneVirtualRegister(LHS.getReg()); | |||
| 4622 | return emitSUBS(Dst, LHS, RHS, MIRBuilder); | |||
| 4623 | } | |||
| 4624 | ||||
| 4625 | MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( | |||
| 4626 | Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const { | |||
| 4627 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | |||
| 4628 | #ifndef NDEBUG | |||
| 4629 | LLT Ty = MRI.getType(Dst); | |||
| 4630 | assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits () == 32 && "Expected a 32-bit scalar register?") ? void (0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4631, __extension__ __PRETTY_FUNCTION__)) | |||
| 4631 | "Expected a 32-bit scalar register?")(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits () == 32 && "Expected a 32-bit scalar register?") ? void (0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4631, __extension__ __PRETTY_FUNCTION__)); | |||
| 4632 | #endif | |||
| 4633 | const Register ZReg = AArch64::WZR; | |||
| 4634 | AArch64CC::CondCode CC1, CC2; | |||
| 4635 | changeFCMPPredToAArch64CC(Pred, CC1, CC2); | |||
| 4636 | auto InvCC1 = AArch64CC::getInvertedCondCode(CC1); | |||
| 4637 | if (CC2 == AArch64CC::AL) | |||
| 4638 | return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, | |||
| 4639 | MIRBuilder); | |||
| 4640 | const TargetRegisterClass *RC = &AArch64::GPR32RegClass; | |||
| 4641 | Register Def1Reg = MRI.createVirtualRegister(RC); | |||
| 4642 | Register Def2Reg = MRI.createVirtualRegister(RC); | |||
| 4643 | auto InvCC2 = AArch64CC::getInvertedCondCode(CC2); | |||
| 4644 | emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder); | |||
| 4645 | emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder); | |||
| 4646 | auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg}); | |||
| 4647 | constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI); | |||
| 4648 | return &*OrMI; | |||
| 4649 | } | |||
| 4650 | ||||
| 4651 | MachineInstr *AArch64InstructionSelector::emitFPCompare( | |||
| 4652 | Register LHS, Register RHS, MachineIRBuilder &MIRBuilder, | |||
| 4653 | std::optional<CmpInst::Predicate> Pred) const { | |||
| 4654 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | |||
| 4655 | LLT Ty = MRI.getType(LHS); | |||
| 4656 | if (Ty.isVector()) | |||
| 4657 | return nullptr; | |||
| 4658 | unsigned OpSize = Ty.getSizeInBits(); | |||
| 4659 | if (OpSize != 32 && OpSize != 64) | |||
| 4660 | return nullptr; | |||
| 4661 | ||||
| 4662 | // If this is a compare against +0.0, then we don't have | |||
| 4663 | // to explicitly materialize a constant. | |||
| 4664 | const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI); | |||
| 4665 | bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); | |||
| 4666 | ||||
| 4667 | auto IsEqualityPred = [](CmpInst::Predicate P) { | |||
| 4668 | return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE || | |||
| 4669 | P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE; | |||
| 4670 | }; | |||
| 4671 | if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) { | |||
| 4672 | // Try commutating the operands. | |||
| 4673 | const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI); | |||
| 4674 | if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) { | |||
| 4675 | ShouldUseImm = true; | |||
| 4676 | std::swap(LHS, RHS); | |||
| 4677 | } | |||
| 4678 | } | |||
| 4679 | unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, | |||
| 4680 | {AArch64::FCMPSri, AArch64::FCMPDri}}; | |||
| 4681 | unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64]; | |||
| 4682 | ||||
| 4683 | // Partially build the compare. Decide if we need to add a use for the | |||
| 4684 | // third operand based off whether or not we're comparing against 0.0. | |||
| 4685 | auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS); | |||
| 4686 | CmpMI.setMIFlags(MachineInstr::NoFPExcept); | |||
| 4687 | if (!ShouldUseImm) | |||
| 4688 | CmpMI.addUse(RHS); | |||
| 4689 | constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); | |||
| 4690 | return &*CmpMI; | |||
| 4691 | } | |||
| 4692 | ||||
| 4693 | MachineInstr *AArch64InstructionSelector::emitVectorConcat( | |||
| 4694 | std::optional<Register> Dst, Register Op1, Register Op2, | |||
| 4695 | MachineIRBuilder &MIRBuilder) const { | |||
| 4696 | // We implement a vector concat by: | |||
| 4697 | // 1. Use scalar_to_vector to insert the lower vector into the larger dest | |||
| 4698 | // 2. Insert the upper vector into the destination's upper element | |||
| 4699 | // TODO: some of this code is common with G_BUILD_VECTOR handling. | |||
| 4700 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); | |||
| 4701 | ||||
| 4702 | const LLT Op1Ty = MRI.getType(Op1); | |||
| 4703 | const LLT Op2Ty = MRI.getType(Op2); | |||
| 4704 | ||||
| 4705 | if (Op1Ty != Op2Ty) { | |||
| 4706 | LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys" ; } } while (false); | |||
| 4707 | return nullptr; | |||
| 4708 | } | |||
| 4709 | assert(Op1Ty.isVector() && "Expected a vector for vector concat")(static_cast <bool> (Op1Ty.isVector() && "Expected a vector for vector concat" ) ? void (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4709, __extension__ __PRETTY_FUNCTION__)); | |||
| 4710 | ||||
| 4711 | if (Op1Ty.getSizeInBits() >= 128) { | |||
| 4712 | LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors" ; } } while (false); | |||
| 4713 | return nullptr; | |||
| 4714 | } | |||
| 4715 | ||||
| 4716 | // At the moment we just support 64 bit vector concats. | |||
| 4717 | if (Op1Ty.getSizeInBits() != 64) { | |||
| 4718 | LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors" ; } } while (false); | |||
| 4719 | return nullptr; | |||
| 4720 | } | |||
| 4721 | ||||
| 4722 | const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits()); | |||
| 4723 | const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI); | |||
| 4724 | const TargetRegisterClass *DstRC = | |||
| 4725 | getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank); | |||
| 4726 | ||||
| 4727 | MachineInstr *WidenedOp1 = | |||
| 4728 | emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder); | |||
| 4729 | MachineInstr *WidenedOp2 = | |||
| 4730 | emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder); | |||
| 4731 | if (!WidenedOp1 || !WidenedOp2) { | |||
| 4732 | LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value" ; } } while (false); | |||
| 4733 | return nullptr; | |||
| 4734 | } | |||
| 4735 | ||||
| 4736 | // Now do the insert of the upper element. | |||
| 4737 | unsigned InsertOpc, InsSubRegIdx; | |||
| 4738 | std::tie(InsertOpc, InsSubRegIdx) = | |||
| 4739 | getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits()); | |||
| 4740 | ||||
| 4741 | if (!Dst) | |||
| 4742 | Dst = MRI.createVirtualRegister(DstRC); | |||
| 4743 | auto InsElt = | |||
| 4744 | MIRBuilder | |||
| 4745 | .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()}) | |||
| 4746 | .addImm(1) /* Lane index */ | |||
| 4747 | .addUse(WidenedOp2->getOperand(0).getReg()) | |||
| 4748 | .addImm(0); | |||
| 4749 | constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); | |||
| 4750 | return &*InsElt; | |||
| 4751 | } | |||
| 4752 | ||||
| 4753 | MachineInstr * | |||
| 4754 | AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1, | |||
| 4755 | Register Src2, AArch64CC::CondCode Pred, | |||
| 4756 | MachineIRBuilder &MIRBuilder) const { | |||
| 4757 | auto &MRI = *MIRBuilder.getMRI(); | |||
| 4758 | const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst); | |||
| 4759 | // If we used a register class, then this won't necessarily have an LLT. | |||
| 4760 | // Compute the size based off whether or not we have a class or bank. | |||
| 4761 | unsigned Size; | |||
| 4762 | if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>()) | |||
| 4763 | Size = TRI.getRegSizeInBits(*RC); | |||
| 4764 | else | |||
| 4765 | Size = MRI.getType(Dst).getSizeInBits(); | |||
| 4766 | // Some opcodes use s1. | |||
| 4767 | assert(Size <= 64 && "Expected 64 bits or less only!")(static_cast <bool> (Size <= 64 && "Expected 64 bits or less only!" ) ? void (0) : __assert_fail ("Size <= 64 && \"Expected 64 bits or less only!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4767, __extension__ __PRETTY_FUNCTION__)); | |||
| 4768 | static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr}; | |||
| 4769 | unsigned Opc = OpcTable[Size == 64]; | |||
| 4770 | auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred); | |||
| 4771 | constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI); | |||
| 4772 | return &*CSINC; | |||
| 4773 | } | |||
| 4774 | ||||
| 4775 | std::pair<MachineInstr *, AArch64CC::CondCode> | |||
| 4776 | AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, | |||
| 4777 | MachineOperand &LHS, | |||
| 4778 | MachineOperand &RHS, | |||
| 4779 | MachineIRBuilder &MIRBuilder) const { | |||
| 4780 | switch (Opcode) { | |||
| 4781 | default: | |||
| 4782 | llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4782); | |||
| 4783 | case TargetOpcode::G_SADDO: | |||
| 4784 | return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); | |||
| 4785 | case TargetOpcode::G_UADDO: | |||
| 4786 | return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS); | |||
| 4787 | case TargetOpcode::G_SSUBO: | |||
| 4788 | return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); | |||
| 4789 | case TargetOpcode::G_USUBO: | |||
| 4790 | return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO); | |||
| 4791 | } | |||
| 4792 | } | |||
| 4793 | ||||
| 4794 | /// Returns true if @p Val is a tree of AND/OR/CMP operations that can be | |||
| 4795 | /// expressed as a conjunction. | |||
| 4796 | /// \param CanNegate Set to true if we can negate the whole sub-tree just by | |||
| 4797 | /// changing the conditions on the CMP tests. | |||
| 4798 | /// (this means we can call emitConjunctionRec() with | |||
| 4799 | /// Negate==true on this sub-tree) | |||
| 4800 | /// \param MustBeFirst Set to true if this subtree needs to be negated and we | |||
| 4801 | /// cannot do the negation naturally. We are required to | |||
| 4802 | /// emit the subtree first in this case. | |||
| 4803 | /// \param WillNegate Is true if are called when the result of this | |||
| 4804 | /// subexpression must be negated. This happens when the | |||
| 4805 | /// outer expression is an OR. We can use this fact to know | |||
| 4806 | /// that we have a double negation (or (or ...) ...) that | |||
| 4807 | /// can be implemented for free. | |||
| 4808 | static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, | |||
| 4809 | bool WillNegate, MachineRegisterInfo &MRI, | |||
| 4810 | unsigned Depth = 0) { | |||
| 4811 | if (!MRI.hasOneNonDBGUse(Val)) | |||
| 4812 | return false; | |||
| 4813 | MachineInstr *ValDef = MRI.getVRegDef(Val); | |||
| 4814 | unsigned Opcode = ValDef->getOpcode(); | |||
| 4815 | if (isa<GAnyCmp>(ValDef)) { | |||
| 4816 | CanNegate = true; | |||
| 4817 | MustBeFirst = false; | |||
| 4818 | return true; | |||
| 4819 | } | |||
| 4820 | // Protect against exponential runtime and stack overflow. | |||
| 4821 | if (Depth > 6) | |||
| 4822 | return false; | |||
| 4823 | if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) { | |||
| 4824 | bool IsOR = Opcode == TargetOpcode::G_OR; | |||
| 4825 | Register O0 = ValDef->getOperand(1).getReg(); | |||
| 4826 | Register O1 = ValDef->getOperand(2).getReg(); | |||
| 4827 | bool CanNegateL; | |||
| 4828 | bool MustBeFirstL; | |||
| 4829 | if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1)) | |||
| 4830 | return false; | |||
| 4831 | bool CanNegateR; | |||
| 4832 | bool MustBeFirstR; | |||
| 4833 | if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1)) | |||
| 4834 | return false; | |||
| 4835 | ||||
| 4836 | if (MustBeFirstL && MustBeFirstR) | |||
| 4837 | return false; | |||
| 4838 | ||||
| 4839 | if (IsOR) { | |||
| 4840 | // For an OR expression we need to be able to naturally negate at least | |||
| 4841 | // one side or we cannot do the transformation at all. | |||
| 4842 | if (!CanNegateL && !CanNegateR) | |||
| 4843 | return false; | |||
| 4844 | // If we the result of the OR will be negated and we can naturally negate | |||
| 4845 | // the leaves, then this sub-tree as a whole negates naturally. | |||
| 4846 | CanNegate = WillNegate && CanNegateL && CanNegateR; | |||
| 4847 | // If we cannot naturally negate the whole sub-tree, then this must be | |||
| 4848 | // emitted first. | |||
| 4849 | MustBeFirst = !CanNegate; | |||
| 4850 | } else { | |||
| 4851 | assert(Opcode == TargetOpcode::G_AND && "Must be G_AND")(static_cast <bool> (Opcode == TargetOpcode::G_AND && "Must be G_AND") ? void (0) : __assert_fail ("Opcode == TargetOpcode::G_AND && \"Must be G_AND\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4851, __extension__ __PRETTY_FUNCTION__)); | |||
| 4852 | // We cannot naturally negate an AND operation. | |||
| 4853 | CanNegate = false; | |||
| 4854 | MustBeFirst = MustBeFirstL || MustBeFirstR; | |||
| 4855 | } | |||
| 4856 | return true; | |||
| 4857 | } | |||
| 4858 | return false; | |||
| 4859 | } | |||
| 4860 | ||||
| 4861 | MachineInstr *AArch64InstructionSelector::emitConditionalComparison( | |||
| 4862 | Register LHS, Register RHS, CmpInst::Predicate CC, | |||
| 4863 | AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, | |||
| 4864 | MachineIRBuilder &MIB) const { | |||
| 4865 | // TODO: emit CMN as an optimization. | |||
| 4866 | auto &MRI = *MIB.getMRI(); | |||
| 4867 | LLT OpTy = MRI.getType(LHS); | |||
| 4868 | assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64)(static_cast <bool> (OpTy.getSizeInBits() == 32 || OpTy .getSizeInBits() == 64) ? void (0) : __assert_fail ("OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4868, __extension__ __PRETTY_FUNCTION__)); | |||
| 4869 | unsigned CCmpOpc; | |||
| 4870 | std::optional<ValueAndVReg> C; | |||
| 4871 | if (CmpInst::isIntPredicate(CC)) { | |||
| 4872 | C = getIConstantVRegValWithLookThrough(RHS, MRI); | |||
| 4873 | if (C && C->Value.ult(32)) | |||
| 4874 | CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi; | |||
| 4875 | else | |||
| 4876 | CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr; | |||
| 4877 | } else { | |||
| 4878 | switch (OpTy.getSizeInBits()) { | |||
| 4879 | case 16: | |||
| 4880 | CCmpOpc = AArch64::FCCMPHrr; | |||
| 4881 | break; | |||
| 4882 | case 32: | |||
| 4883 | CCmpOpc = AArch64::FCCMPSrr; | |||
| 4884 | break; | |||
| 4885 | case 64: | |||
| 4886 | CCmpOpc = AArch64::FCCMPDrr; | |||
| 4887 | break; | |||
| 4888 | default: | |||
| 4889 | return nullptr; | |||
| 4890 | } | |||
| 4891 | } | |||
| 4892 | AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); | |||
| 4893 | unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); | |||
| 4894 | auto CCmp = | |||
| 4895 | MIB.buildInstr(CCmpOpc, {}, {LHS}); | |||
| 4896 | if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi) | |||
| 4897 | CCmp.addImm(C->Value.getZExtValue()); | |||
| 4898 | else | |||
| 4899 | CCmp.addReg(RHS); | |||
| 4900 | CCmp.addImm(NZCV).addImm(Predicate); | |||
| 4901 | constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI); | |||
| 4902 | return &*CCmp; | |||
| 4903 | } | |||
| 4904 | ||||
| 4905 | MachineInstr *AArch64InstructionSelector::emitConjunctionRec( | |||
| 4906 | Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp, | |||
| 4907 | AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const { | |||
| 4908 | // We're at a tree leaf, produce a conditional comparison operation. | |||
| 4909 | auto &MRI = *MIB.getMRI(); | |||
| 4910 | MachineInstr *ValDef = MRI.getVRegDef(Val); | |||
| 4911 | unsigned Opcode = ValDef->getOpcode(); | |||
| 4912 | if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) { | |||
| 4913 | Register LHS = Cmp->getLHSReg(); | |||
| 4914 | Register RHS = Cmp->getRHSReg(); | |||
| 4915 | CmpInst::Predicate CC = Cmp->getCond(); | |||
| 4916 | if (Negate) | |||
| 4917 | CC = CmpInst::getInversePredicate(CC); | |||
| 4918 | if (isa<GICmp>(Cmp)) { | |||
| 4919 | OutCC = changeICMPPredToAArch64CC(CC); | |||
| 4920 | } else { | |||
| 4921 | // Handle special FP cases. | |||
| 4922 | AArch64CC::CondCode ExtraCC; | |||
| 4923 | changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC); | |||
| 4924 | // Some floating point conditions can't be tested with a single condition | |||
| 4925 | // code. Construct an additional comparison in this case. | |||
| 4926 | if (ExtraCC != AArch64CC::AL) { | |||
| 4927 | MachineInstr *ExtraCmp; | |||
| 4928 | if (!CCOp) | |||
| 4929 | ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC); | |||
| 4930 | else | |||
| 4931 | ExtraCmp = | |||
| 4932 | emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB); | |||
| 4933 | CCOp = ExtraCmp->getOperand(0).getReg(); | |||
| 4934 | Predicate = ExtraCC; | |||
| 4935 | } | |||
| 4936 | } | |||
| 4937 | ||||
| 4938 | // Produce a normal comparison if we are first in the chain | |||
| 4939 | if (!CCOp) { | |||
| 4940 | auto Dst = MRI.cloneVirtualRegister(LHS); | |||
| 4941 | if (isa<GICmp>(Cmp)) | |||
| 4942 | return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB); | |||
| 4943 | return emitFPCompare(Cmp->getOperand(2).getReg(), | |||
| 4944 | Cmp->getOperand(3).getReg(), MIB); | |||
| 4945 | } | |||
| 4946 | // Otherwise produce a ccmp. | |||
| 4947 | return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB); | |||
| 4948 | } | |||
| 4949 | assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree")(static_cast <bool> (MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree") ? void (0) : __assert_fail ("MRI.hasOneNonDBGUse(Val) && \"Valid conjunction/disjunction tree\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4949, __extension__ __PRETTY_FUNCTION__)); | |||
| 4950 | ||||
| 4951 | bool IsOR = Opcode == TargetOpcode::G_OR; | |||
| 4952 | ||||
| 4953 | Register LHS = ValDef->getOperand(1).getReg(); | |||
| 4954 | bool CanNegateL; | |||
| 4955 | bool MustBeFirstL; | |||
| 4956 | bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI); | |||
| 4957 | assert(ValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidL && "Valid conjunction/disjunction tree" ) ? void (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4957, __extension__ __PRETTY_FUNCTION__)); | |||
| 4958 | (void)ValidL; | |||
| 4959 | ||||
| 4960 | Register RHS = ValDef->getOperand(2).getReg(); | |||
| 4961 | bool CanNegateR; | |||
| 4962 | bool MustBeFirstR; | |||
| 4963 | bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI); | |||
| 4964 | assert(ValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidR && "Valid conjunction/disjunction tree" ) ? void (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4964, __extension__ __PRETTY_FUNCTION__)); | |||
| 4965 | (void)ValidR; | |||
| 4966 | ||||
| 4967 | // Swap sub-tree that must come first to the right side. | |||
| 4968 | if (MustBeFirstL) { | |||
| 4969 | assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "Valid conjunction/disjunction tree" ) ? void (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4969, __extension__ __PRETTY_FUNCTION__)); | |||
| 4970 | std::swap(LHS, RHS); | |||
| 4971 | std::swap(CanNegateL, CanNegateR); | |||
| 4972 | std::swap(MustBeFirstL, MustBeFirstR); | |||
| 4973 | } | |||
| 4974 | ||||
| 4975 | bool NegateR; | |||
| 4976 | bool NegateAfterR; | |||
| 4977 | bool NegateL; | |||
| 4978 | bool NegateAfterAll; | |||
| 4979 | if (Opcode == TargetOpcode::G_OR) { | |||
| 4980 | // Swap the sub-tree that we can negate naturally to the left. | |||
| 4981 | if (!CanNegateL) { | |||
| 4982 | assert(CanNegateR && "at least one side must be negatable")(static_cast <bool> (CanNegateR && "at least one side must be negatable" ) ? void (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4982, __extension__ __PRETTY_FUNCTION__)); | |||
| 4983 | assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "invalid conjunction/disjunction tree" ) ? void (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4983, __extension__ __PRETTY_FUNCTION__)); | |||
| 4984 | assert(!Negate)(static_cast <bool> (!Negate) ? void (0) : __assert_fail ("!Negate", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4984, __extension__ __PRETTY_FUNCTION__)); | |||
| 4985 | std::swap(LHS, RHS); | |||
| 4986 | NegateR = false; | |||
| 4987 | NegateAfterR = true; | |||
| 4988 | } else { | |||
| 4989 | // Negate the left sub-tree if possible, otherwise negate the result. | |||
| 4990 | NegateR = CanNegateR; | |||
| 4991 | NegateAfterR = !CanNegateR; | |||
| 4992 | } | |||
| 4993 | NegateL = true; | |||
| 4994 | NegateAfterAll = !Negate; | |||
| 4995 | } else { | |||
| 4996 | assert(Opcode == TargetOpcode::G_AND &&(static_cast <bool> (Opcode == TargetOpcode::G_AND && "Valid conjunction/disjunction tree") ? void (0) : __assert_fail ("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4997, __extension__ __PRETTY_FUNCTION__)) | |||
| 4997 | "Valid conjunction/disjunction tree")(static_cast <bool> (Opcode == TargetOpcode::G_AND && "Valid conjunction/disjunction tree") ? void (0) : __assert_fail ("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4997, __extension__ __PRETTY_FUNCTION__)); | |||
| 4998 | assert(!Negate && "Valid conjunction/disjunction tree")(static_cast <bool> (!Negate && "Valid conjunction/disjunction tree" ) ? void (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4998, __extension__ __PRETTY_FUNCTION__)); | |||
| 4999 | ||||
| 5000 | NegateL = false; | |||
| 5001 | NegateR = false; | |||
| 5002 | NegateAfterR = false; | |||
| 5003 | NegateAfterAll = false; | |||
| 5004 | } | |||
| 5005 | ||||
| 5006 | // Emit sub-trees. | |||
| 5007 | AArch64CC::CondCode RHSCC; | |||
| 5008 | MachineInstr *CmpR = | |||
| 5009 | emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB); | |||
| 5010 | if (NegateAfterR) | |||
| 5011 | RHSCC = AArch64CC::getInvertedCondCode(RHSCC); | |||
| 5012 | MachineInstr *CmpL = emitConjunctionRec( | |||
| 5013 | LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB); | |||
| 5014 | if (NegateAfterAll) | |||
| 5015 | OutCC = AArch64CC::getInvertedCondCode(OutCC); | |||
| 5016 | return CmpL; | |||
| 5017 | } | |||
| 5018 | ||||
| 5019 | MachineInstr *AArch64InstructionSelector::emitConjunction( | |||
| 5020 | Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const { | |||
| 5021 | bool DummyCanNegate; | |||
| 5022 | bool DummyMustBeFirst; | |||
| 5023 | if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false, | |||
| 5024 | *MIB.getMRI())) | |||
| 5025 | return nullptr; | |||
| 5026 | return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB); | |||
| 5027 | } | |||
| 5028 | ||||
| 5029 | bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI, | |||
| 5030 | MachineInstr &CondMI) { | |||
| 5031 | AArch64CC::CondCode AArch64CC; | |||
| 5032 | MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB); | |||
| 5033 | if (!ConjMI) | |||
| 5034 | return false; | |||
| 5035 | ||||
| 5036 | emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB); | |||
| 5037 | SelI.eraseFromParent(); | |||
| 5038 | return true; | |||
| 5039 | } | |||
| 5040 | ||||
| 5041 | bool AArch64InstructionSelector::tryOptSelect(GSelect &I) { | |||
| 5042 | MachineRegisterInfo &MRI = *MIB.getMRI(); | |||
| 5043 | // We want to recognize this pattern: | |||
| 5044 | // | |||
| 5045 | // $z = G_FCMP pred, $x, $y | |||
| 5046 | // ... | |||
| 5047 | // $w = G_SELECT $z, $a, $b | |||
| 5048 | // | |||
| 5049 | // Where the value of $z is *only* ever used by the G_SELECT (possibly with | |||
| 5050 | // some copies/truncs in between.) | |||
| 5051 | // | |||
| 5052 | // If we see this, then we can emit something like this: | |||
| 5053 | // | |||
| 5054 | // fcmp $x, $y | |||
| 5055 | // fcsel $w, $a, $b, pred | |||
| 5056 | // | |||
| 5057 | // Rather than emitting both of the rather long sequences in the standard | |||
| 5058 | // G_FCMP/G_SELECT select methods. | |||
| 5059 | ||||
| 5060 | // First, check if the condition is defined by a compare. | |||
| 5061 | MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg()); | |||
| 5062 | ||||
| 5063 | // We can only fold if all of the defs have one use. | |||
| 5064 | Register CondDefReg = CondDef->getOperand(0).getReg(); | |||
| 5065 | if (!MRI.hasOneNonDBGUse(CondDefReg)) { | |||
| 5066 | // Unless it's another select. | |||
| 5067 | for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) { | |||
| 5068 | if (CondDef == &UI) | |||
| 5069 | continue; | |||
| 5070 | if (UI.getOpcode() != TargetOpcode::G_SELECT) | |||
| 5071 | return false; | |||
| 5072 | } | |||
| 5073 | } | |||
| 5074 | ||||
| 5075 | // Is the condition defined by a compare? | |||
| 5076 | unsigned CondOpc = CondDef->getOpcode(); | |||
| 5077 | if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) { | |||
| 5078 | if (tryOptSelectConjunction(I, *CondDef)) | |||
| 5079 | return true; | |||
| 5080 | return false; | |||
| 5081 | } | |||
| 5082 | ||||
| 5083 | AArch64CC::CondCode CondCode; | |||
| 5084 | if (CondOpc == TargetOpcode::G_ICMP) { | |||
| 5085 | auto Pred = | |||
| 5086 | static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); | |||
| 5087 | CondCode = changeICMPPredToAArch64CC(Pred); | |||
| 5088 | emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), | |||
| 5089 | CondDef->getOperand(1), MIB); | |||
| 5090 | } else { | |||
| 5091 | // Get the condition code for the select. | |||
| 5092 | auto Pred = | |||
| 5093 | static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); | |||
| 5094 | AArch64CC::CondCode CondCode2; | |||
| 5095 | changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2); | |||
| 5096 | ||||
| 5097 | // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two | |||
| 5098 | // instructions to emit the comparison. | |||
| 5099 | // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be | |||
| 5100 | // unnecessary. | |||
| 5101 | if (CondCode2 != AArch64CC::AL) | |||
| 5102 | return false; | |||
| 5103 | ||||
| 5104 | if (!emitFPCompare(CondDef->getOperand(2).getReg(), | |||
| 5105 | CondDef->getOperand(3).getReg(), MIB)) { | |||
| 5106 | LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n" ; } } while (false); | |||
| 5107 | return false; | |||
| 5108 | } | |||
| 5109 | } | |||
| 5110 | ||||
| 5111 | // Emit the select. | |||
| 5112 | emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(), | |||
| 5113 | I.getOperand(3).getReg(), CondCode, MIB); | |||
| 5114 | I.eraseFromParent(); | |||
| 5115 | return true; | |||
| 5116 | } | |||
| 5117 | ||||
| 5118 | MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( | |||
| 5119 | MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, | |||
| 5120 | MachineIRBuilder &MIRBuilder) const { | |||
| 5121 | assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&(static_cast <bool> (LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && "Unexpected MachineOperand" ) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5122, __extension__ __PRETTY_FUNCTION__)) | |||
| 5122 | "Unexpected MachineOperand")(static_cast <bool> (LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && "Unexpected MachineOperand" ) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5122, __extension__ __PRETTY_FUNCTION__)); | |||
| 5123 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | |||
| 5124 | // We want to find this sort of thing: | |||
| 5125 | // x = G_SUB 0, y | |||
| 5126 | // G_ICMP z, x | |||
| 5127 | // | |||
| 5128 | // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. | |||
| 5129 | // e.g: | |||
| 5130 | // | |||
| 5131 | // cmn z, y | |||
| 5132 | ||||
| 5133 | // Check if the RHS or LHS of the G_ICMP is defined by a SUB | |||
| 5134 | MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI); | |||
| 5135 | MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI); | |||
| 5136 | auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate()); | |||
| 5137 | // Given this: | |||
| 5138 | // | |||
| 5139 | // x = G_SUB 0, y | |||
| 5140 | // G_ICMP x, z | |||
| 5141 | // | |||
| 5142 | // Produce this: | |||
| 5143 | // | |||
| 5144 | // cmn y, z | |||
| 5145 | if (isCMN(LHSDef, P, MRI)) | |||
| 5146 | return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder); | |||
| 5147 | ||||
| 5148 | // Same idea here, but with the RHS of the compare instead: | |||
| 5149 | // | |||
| 5150 | // Given this: | |||
| 5151 | // | |||
| 5152 | // x = G_SUB 0, y | |||
| 5153 | // G_ICMP z, x | |||
| 5154 | // | |||
| 5155 | // Produce this: | |||
| 5156 | // | |||
| 5157 | // cmn z, y | |||
| 5158 | if (isCMN(RHSDef, P, MRI)) | |||
| 5159 | return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder); | |||
| 5160 | ||||
| 5161 | // Given this: | |||
| 5162 | // | |||
| 5163 | // z = G_AND x, y | |||
| 5164 | // G_ICMP z, 0 | |||
| 5165 | // | |||
| 5166 | // Produce this if the compare is signed: | |||
| 5167 | // | |||
| 5168 | // tst x, y | |||
| 5169 | if (!CmpInst::isUnsigned(P) && LHSDef && | |||
| 5170 | LHSDef->getOpcode() == TargetOpcode::G_AND) { | |||
| 5171 | // Make sure that the RHS is 0. | |||
| 5172 | auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI); | |||
| 5173 | if (!ValAndVReg || ValAndVReg->Value != 0) | |||
| 5174 | return nullptr; | |||
| 5175 | ||||
| 5176 | return emitTST(LHSDef->getOperand(1), | |||
| 5177 | LHSDef->getOperand(2), MIRBuilder); | |||
| 5178 | } | |||
| 5179 | ||||
| 5180 | return nullptr; | |||
| 5181 | } | |||
| 5182 | ||||
| 5183 | bool AArch64InstructionSelector::selectShuffleVector( | |||
| 5184 | MachineInstr &I, MachineRegisterInfo &MRI) { | |||
| 5185 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | |||
| 5186 | Register Src1Reg = I.getOperand(1).getReg(); | |||
| 5187 | const LLT Src1Ty = MRI.getType(Src1Reg); | |||
| 5188 | Register Src2Reg = I.getOperand(2).getReg(); | |||
| 5189 | const LLT Src2Ty = MRI.getType(Src2Reg); | |||
| 5190 | ArrayRef<int> Mask = I.getOperand(3).getShuffleMask(); | |||
| 5191 | ||||
| 5192 | MachineBasicBlock &MBB = *I.getParent(); | |||
| 5193 | MachineFunction &MF = *MBB.getParent(); | |||
| 5194 | LLVMContext &Ctx = MF.getFunction().getContext(); | |||
| 5195 | ||||
| 5196 | // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if | |||
| 5197 | // it's originated from a <1 x T> type. Those should have been lowered into | |||
| 5198 | // G_BUILD_VECTOR earlier. | |||
| 5199 | if (!Src1Ty.isVector() || !Src2Ty.isVector()) { | |||
| 5200 | LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n" ; } } while (false); | |||
| 5201 | return false; | |||
| 5202 | } | |||
| 5203 | ||||
| 5204 | unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; | |||
| 5205 | ||||
| 5206 | SmallVector<Constant *, 64> CstIdxs; | |||
| 5207 | for (int Val : Mask) { | |||
| 5208 | // For now, any undef indexes we'll just assume to be 0. This should be | |||
| 5209 | // optimized in future, e.g. to select DUP etc. | |||
| 5210 | Val = Val < 0 ? 0 : Val; | |||
| 5211 | for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { | |||
| 5212 | unsigned Offset = Byte + Val * BytesPerElt; | |||
| 5213 | CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset)); | |||
| 5214 | } | |||
| 5215 | } | |||
| 5216 | ||||
| 5217 | // Use a constant pool to load the index vector for TBL. | |||
| 5218 | Constant *CPVal = ConstantVector::get(CstIdxs); | |||
| 5219 | MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB); | |||
| 5220 | if (!IndexLoad) { | |||
| 5221 | LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not load from a constant pool" ; } } while (false); | |||
| 5222 | return false; | |||
| 5223 | } | |||
| 5224 | ||||
| 5225 | if (DstTy.getSizeInBits() != 128) { | |||
| 5226 | assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")(static_cast <bool> (DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty") ? void (0) : __assert_fail ( "DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5226, __extension__ __PRETTY_FUNCTION__)); | |||
| 5227 | // This case can be done with TBL1. | |||
| 5228 | MachineInstr *Concat = | |||
| 5229 | emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB); | |||
| 5230 | if (!Concat) { | |||
| 5231 | LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1" ; } } while (false); | |||
| 5232 | return false; | |||
| 5233 | } | |||
| 5234 | ||||
| 5235 | // The constant pool load will be 64 bits, so need to convert to FPR128 reg. | |||
| 5236 | IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass, | |||
| 5237 | IndexLoad->getOperand(0).getReg(), MIB); | |||
| 5238 | ||||
| 5239 | auto TBL1 = MIB.buildInstr( | |||
| 5240 | AArch64::TBLv16i8One, {&AArch64::FPR128RegClass}, | |||
| 5241 | {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()}); | |||
| 5242 | constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI); | |||
| 5243 | ||||
| 5244 | auto Copy = | |||
| 5245 | MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) | |||
| 5246 | .addReg(TBL1.getReg(0), 0, AArch64::dsub); | |||
| 5247 | RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI); | |||
| 5248 | I.eraseFromParent(); | |||
| 5249 | return true; | |||
| 5250 | } | |||
| 5251 | ||||
| 5252 | // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive | |||
| 5253 | // Q registers for regalloc. | |||
| 5254 | SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg}; | |||
| 5255 | auto RegSeq = createQTuple(Regs, MIB); | |||
| 5256 | auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)}, | |||
| 5257 | {RegSeq, IndexLoad->getOperand(0)}); | |||
| 5258 | constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI); | |||
| 5259 | I.eraseFromParent(); | |||
| 5260 | return true; | |||
| 5261 | } | |||
| 5262 | ||||
| 5263 | MachineInstr *AArch64InstructionSelector::emitLaneInsert( | |||
| 5264 | std::optional<Register> DstReg, Register SrcReg, Register EltReg, | |||
| 5265 | unsigned LaneIdx, const RegisterBank &RB, | |||
| 5266 | MachineIRBuilder &MIRBuilder) const { | |||
| 5267 | MachineInstr *InsElt = nullptr; | |||
| 5268 | const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; | |||
| 5269 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | |||
| 5270 | ||||
| 5271 | // Create a register to define with the insert if one wasn't passed in. | |||
| 5272 | if (!DstReg) | |||
| 5273 | DstReg = MRI.createVirtualRegister(DstRC); | |||
| 5274 | ||||
| 5275 | unsigned EltSize = MRI.getType(EltReg).getSizeInBits(); | |||
| 5276 | unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first; | |||
| 5277 | ||||
| 5278 | if (RB.getID() == AArch64::FPRRegBankID) { | |||
| 5279 | auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder); | |||
| 5280 | InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) | |||
| 5281 | .addImm(LaneIdx) | |||
| 5282 | .addUse(InsSub->getOperand(0).getReg()) | |||
| 5283 | .addImm(0); | |||
| 5284 | } else { | |||
| 5285 | InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) | |||
| 5286 | .addImm(LaneIdx) | |||
| 5287 | .addUse(EltReg); | |||
| 5288 | } | |||
| 5289 | ||||
| 5290 | constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); | |||
| 5291 | return InsElt; | |||
| 5292 | } | |||
| 5293 | ||||
| 5294 | bool AArch64InstructionSelector::selectUSMovFromExtend( | |||
| 5295 | MachineInstr &MI, MachineRegisterInfo &MRI) { | |||
| 5296 | if (MI.getOpcode() != TargetOpcode::G_SEXT && | |||
| 5297 | MI.getOpcode() != TargetOpcode::G_ZEXT && | |||
| 5298 | MI.getOpcode() != TargetOpcode::G_ANYEXT) | |||
| 5299 | return false; | |||
| 5300 | bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT; | |||
| 5301 | const Register DefReg = MI.getOperand(0).getReg(); | |||
| 5302 | const LLT DstTy = MRI.getType(DefReg); | |||
| 5303 | unsigned DstSize = DstTy.getSizeInBits(); | |||
| 5304 | ||||
| 5305 | if (DstSize != 32 && DstSize != 64) | |||
| 5306 | return false; | |||
| 5307 | ||||
| 5308 | MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT, | |||
| 5309 | MI.getOperand(1).getReg(), MRI); | |||
| 5310 | int64_t Lane; | |||
| 5311 | if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane))) | |||
| 5312 | return false; | |||
| 5313 | Register Src0 = Extract->getOperand(1).getReg(); | |||
| 5314 | ||||
| 5315 | const LLT &VecTy = MRI.getType(Src0); | |||
| 5316 | ||||
| 5317 | if (VecTy.getSizeInBits() != 128) { | |||
| 5318 | const MachineInstr *ScalarToVector = emitScalarToVector( | |||
| 5319 | VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB); | |||
| 5320 | assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!")(static_cast <bool> (ScalarToVector && "Didn't expect emitScalarToVector to fail!" ) ? void (0) : __assert_fail ("ScalarToVector && \"Didn't expect emitScalarToVector to fail!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5320, __extension__ __PRETTY_FUNCTION__)); | |||
| 5321 | Src0 = ScalarToVector->getOperand(0).getReg(); | |||
| 5322 | } | |||
| 5323 | ||||
| 5324 | unsigned Opcode; | |||
| 5325 | if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32) | |||
| 5326 | Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32; | |||
| 5327 | else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16) | |||
| 5328 | Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16; | |||
| 5329 | else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8) | |||
| 5330 | Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8; | |||
| 5331 | else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16) | |||
| 5332 | Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16; | |||
| 5333 | else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8) | |||
| 5334 | Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8; | |||
| 5335 | else | |||
| 5336 | llvm_unreachable("Unexpected type combo for S/UMov!")::llvm::llvm_unreachable_internal("Unexpected type combo for S/UMov!" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5336); | |||
| 5337 | ||||
| 5338 | // We may need to generate one of these, depending on the type and sign of the | |||
| 5339 | // input: | |||
| 5340 | // DstReg = SMOV Src0, Lane; | |||
| 5341 | // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32; | |||
| 5342 | MachineInstr *ExtI = nullptr; | |||
| 5343 | if (DstSize == 64 && !IsSigned) { | |||
| 5344 | Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); | |||
| 5345 | MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane); | |||
| 5346 | ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) | |||
| 5347 | .addImm(0) | |||
| 5348 | .addUse(NewReg) | |||
| 5349 | .addImm(AArch64::sub_32); | |||
| 5350 | RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); | |||
| 5351 | } else | |||
| 5352 | ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane); | |||
| 5353 | ||||
| 5354 | constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); | |||
| 5355 | MI.eraseFromParent(); | |||
| 5356 | return true; | |||
| 5357 | } | |||
| 5358 | ||||
| 5359 | bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I, | |||
| 5360 | MachineRegisterInfo &MRI) { | |||
| 5361 | assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5361, __extension__ __PRETTY_FUNCTION__)); | |||
| 5362 | ||||
| 5363 | // Get information on the destination. | |||
| 5364 | Register DstReg = I.getOperand(0).getReg(); | |||
| 5365 | const LLT DstTy = MRI.getType(DstReg); | |||
| 5366 | unsigned VecSize = DstTy.getSizeInBits(); | |||
| 5367 | ||||
| 5368 | // Get information on the element we want to insert into the destination. | |||
| 5369 | Register EltReg = I.getOperand(2).getReg(); | |||
| 5370 | const LLT EltTy = MRI.getType(EltReg); | |||
| 5371 | unsigned EltSize = EltTy.getSizeInBits(); | |||
| 5372 | if (EltSize < 16 || EltSize > 64) | |||
| 5373 | return false; // Don't support all element types yet. | |||
| 5374 | ||||
| 5375 | // Find the definition of the index. Bail out if it's not defined by a | |||
| 5376 | // G_CONSTANT. | |||
| 5377 | Register IdxReg = I.getOperand(3).getReg(); | |||
| 5378 | auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI); | |||
| 5379 | if (!VRegAndVal) | |||
| 5380 | return false; | |||
| 5381 | unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); | |||
| 5382 | ||||
| 5383 | // Perform the lane insert. | |||
| 5384 | Register SrcReg = I.getOperand(1).getReg(); | |||
| 5385 | const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); | |||
| 5386 | ||||
| 5387 | if (VecSize < 128) { | |||
| 5388 | // If the vector we're inserting into is smaller than 128 bits, widen it | |||
| 5389 | // to 128 to do the insert. | |||
| 5390 | MachineInstr *ScalarToVec = | |||
| 5391 | emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB); | |||
| 5392 | if (!ScalarToVec) | |||
| 5393 | return false; | |||
| 5394 | SrcReg = ScalarToVec->getOperand(0).getReg(); | |||
| 5395 | } | |||
| 5396 | ||||
| 5397 | // Create an insert into a new FPR128 register. | |||
| 5398 | // Note that if our vector is already 128 bits, we end up emitting an extra | |||
| 5399 | // register. | |||
| 5400 | MachineInstr *InsMI = | |||
| 5401 | emitLaneInsert(std::nullopt, SrcReg, EltReg, LaneIdx, EltRB, MIB); | |||
| 5402 | ||||
| 5403 | if (VecSize < 128) { | |||
| 5404 | // If we had to widen to perform the insert, then we have to demote back to | |||
| 5405 | // the original size to get the result we want. | |||
| 5406 | Register DemoteVec = InsMI->getOperand(0).getReg(); | |||
| 5407 | const TargetRegisterClass *RC = | |||
| 5408 | getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DemoteVec, MRI, TRI)); | |||
| 5409 | if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { | |||
| 5410 | LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported register class!\n" ; } } while (false); | |||
| 5411 | return false; | |||
| 5412 | } | |||
| 5413 | unsigned SubReg = 0; | |||
| 5414 | if (!getSubRegForClass(RC, TRI, SubReg)) | |||
| 5415 | return false; | |||
| 5416 | if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { | |||
| 5417 | LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported destination size! (" << VecSize << "\n"; } } while (false) | |||
| 5418 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported destination size! (" << VecSize << "\n"; } } while (false); | |||
| 5419 | return false; | |||
| 5420 | } | |||
| 5421 | MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) | |||
| 5422 | .addReg(DemoteVec, 0, SubReg); | |||
| 5423 | RBI.constrainGenericRegister(DstReg, *RC, MRI); | |||
| 5424 | } else { | |||
| 5425 | // No widening needed. | |||
| 5426 | InsMI->getOperand(0).setReg(DstReg); | |||
| 5427 | constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); | |||
| 5428 | } | |||
| 5429 | ||||
| 5430 | I.eraseFromParent(); | |||
| 5431 | return true; | |||
| 5432 | } | |||
| 5433 | ||||
| 5434 | MachineInstr * | |||
| 5435 | AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, | |||
| 5436 | MachineIRBuilder &MIRBuilder, | |||
| 5437 | MachineRegisterInfo &MRI) { | |||
| 5438 | LLT DstTy = MRI.getType(Dst); | |||
| 5439 | unsigned DstSize = DstTy.getSizeInBits(); | |||
| 5440 | if (CV->isNullValue()) { | |||
| 5441 | if (DstSize == 128) { | |||
| 5442 | auto Mov = | |||
| 5443 | MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0); | |||
| 5444 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); | |||
| 5445 | return &*Mov; | |||
| 5446 | } | |||
| 5447 | ||||
| 5448 | if (DstSize == 64) { | |||
| 5449 | auto Mov = | |||
| 5450 | MIRBuilder | |||
| 5451 | .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) | |||
| 5452 | .addImm(0); | |||
| 5453 | auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {}) | |||
| 5454 | .addReg(Mov.getReg(0), 0, AArch64::dsub); | |||
| 5455 | RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI); | |||
| 5456 | return &*Copy; | |||
| 5457 | } | |||
| 5458 | } | |||
| 5459 | ||||
| 5460 | auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder); | |||
| 5461 | if (!CPLoad) { | |||
| 5462 | LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not generate cp load for constant vector!" ; } } while (false); | |||
| 5463 | return nullptr; | |||
| 5464 | } | |||
| 5465 | ||||
| 5466 | auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0)); | |||
| 5467 | RBI.constrainGenericRegister( | |||
| 5468 | Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI); | |||
| 5469 | return &*Copy; | |||
| 5470 | } | |||
| 5471 | ||||
| 5472 | bool AArch64InstructionSelector::tryOptConstantBuildVec( | |||
| 5473 | MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) { | |||
| 5474 | assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5474, __extension__ __PRETTY_FUNCTION__)); | |||
| 5475 | unsigned DstSize = DstTy.getSizeInBits(); | |||
| 5476 | assert(DstSize <= 128 && "Unexpected build_vec type!")(static_cast <bool> (DstSize <= 128 && "Unexpected build_vec type!" ) ? void (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5476, __extension__ __PRETTY_FUNCTION__)); | |||
| 5477 | if (DstSize < 32) | |||
| 5478 | return false; | |||
| 5479 | // Check if we're building a constant vector, in which case we want to | |||
| 5480 | // generate a constant pool load instead of a vector insert sequence. | |||
| 5481 | SmallVector<Constant *, 16> Csts; | |||
| 5482 | for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) { | |||
| 5483 | // Try to find G_CONSTANT or G_FCONSTANT | |||
| 5484 | auto *OpMI = | |||
| 5485 | getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI); | |||
| 5486 | if (OpMI) | |||
| 5487 | Csts.emplace_back( | |||
| 5488 | const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm())); | |||
| 5489 | else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT, | |||
| 5490 | I.getOperand(Idx).getReg(), MRI))) | |||
| 5491 | Csts.emplace_back( | |||
| 5492 | const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm())); | |||
| 5493 | else | |||
| 5494 | return false; | |||
| 5495 | } | |||
| 5496 | Constant *CV = ConstantVector::get(Csts); | |||
| 5497 | if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI)) | |||
| 5498 | return false; | |||
| 5499 | I.eraseFromParent(); | |||
| 5500 | return true; | |||
| 5501 | } | |||
| 5502 | ||||
| 5503 | bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg( | |||
| 5504 | MachineInstr &I, MachineRegisterInfo &MRI) { | |||
| 5505 | // Given: | |||
| 5506 | // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef | |||
| 5507 | // | |||
| 5508 | // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt. | |||
| 5509 | Register Dst = I.getOperand(0).getReg(); | |||
| 5510 | Register EltReg = I.getOperand(1).getReg(); | |||
| 5511 | LLT EltTy = MRI.getType(EltReg); | |||
| 5512 | // If the index isn't on the same bank as its elements, then this can't be a | |||
| 5513 | // SUBREG_TO_REG. | |||
| 5514 | const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); | |||
| 5515 | const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI); | |||
| 5516 | if (EltRB != DstRB) | |||
| 5517 | return false; | |||
| 5518 | if (any_of(make_range(I.operands_begin() + 2, I.operands_end()), | |||
| 5519 | [&MRI](const MachineOperand &Op) { | |||
| 5520 | return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), | |||
| 5521 | MRI); | |||
| 5522 | })) | |||
| 5523 | return false; | |||
| 5524 | unsigned SubReg; | |||
| 5525 | const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB); | |||
| 5526 | if (!EltRC) | |||
| 5527 | return false; | |||
| 5528 | const TargetRegisterClass *DstRC = | |||
| 5529 | getRegClassForTypeOnBank(MRI.getType(Dst), DstRB); | |||
| 5530 | if (!DstRC) | |||
| 5531 | return false; | |||
| 5532 | if (!getSubRegForClass(EltRC, TRI, SubReg)) | |||
| 5533 | return false; | |||
| 5534 | auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {}) | |||
| 5535 | .addImm(0) | |||
| 5536 | .addUse(EltReg) | |||
| 5537 | .addImm(SubReg); | |||
| 5538 | I.eraseFromParent(); | |||
| 5539 | constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI); | |||
| 5540 | return RBI.constrainGenericRegister(Dst, *DstRC, MRI); | |||
| 5541 | } | |||
| 5542 | ||||
| 5543 | bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I, | |||
| 5544 | MachineRegisterInfo &MRI) { | |||
| 5545 | assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5545, __extension__ __PRETTY_FUNCTION__)); | |||
| 5546 | // Until we port more of the optimized selections, for now just use a vector | |||
| 5547 | // insert sequence. | |||
| 5548 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | |||
| 5549 | const LLT EltTy = MRI.getType(I.getOperand(1).getReg()); | |||
| 5550 | unsigned EltSize = EltTy.getSizeInBits(); | |||
| 5551 | ||||
| 5552 | if (tryOptConstantBuildVec(I, DstTy, MRI)) | |||
| 5553 | return true; | |||
| 5554 | if (tryOptBuildVecToSubregToReg(I, MRI)) | |||
| 5555 | return true; | |||
| 5556 | ||||
| 5557 | if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64) | |||
| 5558 | return false; // Don't support all element types yet. | |||
| 5559 | const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); | |||
| 5560 | ||||
| 5561 | const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; | |||
| 5562 | MachineInstr *ScalarToVec = | |||
| 5563 | emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC, | |||
| 5564 | I.getOperand(1).getReg(), MIB); | |||
| 5565 | if (!ScalarToVec) | |||
| 5566 | return false; | |||
| 5567 | ||||
| 5568 | Register DstVec = ScalarToVec->getOperand(0).getReg(); | |||
| 5569 | unsigned DstSize = DstTy.getSizeInBits(); | |||
| 5570 | ||||
| 5571 | // Keep track of the last MI we inserted. Later on, we might be able to save | |||
| 5572 | // a copy using it. | |||
| 5573 | MachineInstr *PrevMI = nullptr; | |||
| 5574 | for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { | |||
| 5575 | // Note that if we don't do a subregister copy, we can end up making an | |||
| 5576 | // extra register. | |||
| 5577 | PrevMI = &*emitLaneInsert(std::nullopt, DstVec, I.getOperand(i).getReg(), | |||
| 5578 | i - 1, RB, MIB); | |||
| 5579 | DstVec = PrevMI->getOperand(0).getReg(); | |||
| 5580 | } | |||
| 5581 | ||||
| 5582 | // If DstTy's size in bits is less than 128, then emit a subregister copy | |||
| 5583 | // from DstVec to the last register we've defined. | |||
| 5584 | if (DstSize < 128) { | |||
| 5585 | // Force this to be FPR using the destination vector. | |||
| 5586 | const TargetRegisterClass *RC = | |||
| 5587 | getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI)); | |||
| 5588 | if (!RC) | |||
| 5589 | return false; | |||
| 5590 | if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { | |||
| 5591 | LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported register class!\n" ; } } while (false); | |||
| 5592 | return false; | |||
| 5593 | } | |||
| 5594 | ||||
| 5595 | unsigned SubReg = 0; | |||
| 5596 | if (!getSubRegForClass(RC, TRI, SubReg)) | |||
| 5597 | return false; | |||
| 5598 | if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { | |||
| 5599 | LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported destination size! (" << DstSize << "\n"; } } while (false) | |||
| 5600 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported destination size! (" << DstSize << "\n"; } } while (false); | |||
| 5601 | return false; | |||
| 5602 | } | |||
| 5603 | ||||
| 5604 | Register Reg = MRI.createVirtualRegister(RC); | |||
| 5605 | Register DstReg = I.getOperand(0).getReg(); | |||
| 5606 | ||||
| 5607 | MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg); | |||
| 5608 | MachineOperand &RegOp = I.getOperand(1); | |||
| 5609 | RegOp.setReg(Reg); | |||
| 5610 | RBI.constrainGenericRegister(DstReg, *RC, MRI); | |||
| 5611 | } else { | |||
| 5612 | // We don't need a subregister copy. Save a copy by re-using the | |||
| 5613 | // destination register on the final insert. | |||
| 5614 | assert(PrevMI && "PrevMI was null?")(static_cast <bool> (PrevMI && "PrevMI was null?" ) ? void (0) : __assert_fail ("PrevMI && \"PrevMI was null?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5614, __extension__ __PRETTY_FUNCTION__)); | |||
| 5615 | PrevMI->getOperand(0).setReg(I.getOperand(0).getReg()); | |||
| 5616 | constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI); | |||
| 5617 | } | |||
| 5618 | ||||
| 5619 | I.eraseFromParent(); | |||
| 5620 | return true; | |||
| 5621 | } | |||
| 5622 | ||||
| 5623 | bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc, | |||
| 5624 | unsigned NumVecs, | |||
| 5625 | MachineInstr &I) { | |||
| 5626 | assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5626, __extension__ __PRETTY_FUNCTION__)); | |||
| 5627 | assert(Opc && "Expected an opcode?")(static_cast <bool> (Opc && "Expected an opcode?" ) ? void (0) : __assert_fail ("Opc && \"Expected an opcode?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5627, __extension__ __PRETTY_FUNCTION__)); | |||
| 5628 | assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors")(static_cast <bool> (NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors") ? void (0) : __assert_fail ("NumVecs > 1 && NumVecs < 5 && \"Only support 2, 3, or 4 vectors\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5628, __extension__ __PRETTY_FUNCTION__)); | |||
| 5629 | auto &MRI = *MIB.getMRI(); | |||
| 5630 | LLT Ty = MRI.getType(I.getOperand(0).getReg()); | |||
| 5631 | unsigned Size = Ty.getSizeInBits(); | |||
| 5632 | assert((Size == 64 || Size == 128) &&(static_cast <bool> ((Size == 64 || Size == 128) && "Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail ("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5633, __extension__ __PRETTY_FUNCTION__)) | |||
| 5633 | "Destination must be 64 bits or 128 bits?")(static_cast <bool> ((Size == 64 || Size == 128) && "Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail ("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5633, __extension__ __PRETTY_FUNCTION__)); | |||
| 5634 | unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0; | |||
| 5635 | auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg(); | |||
| 5636 | assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?")(static_cast <bool> (MRI.getType(Ptr).isPointer() && "Expected a pointer type?") ? void (0) : __assert_fail ("MRI.getType(Ptr).isPointer() && \"Expected a pointer type?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5636, __extension__ __PRETTY_FUNCTION__)); | |||
| 5637 | auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr}); | |||
| 5638 | Load.cloneMemRefs(I); | |||
| 5639 | constrainSelectedInstRegOperands(*Load, TII, TRI, RBI); | |||
| 5640 | Register SelectedLoadDst = Load->getOperand(0).getReg(); | |||
| 5641 | for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { | |||
| 5642 | auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {}) | |||
| 5643 | .addReg(SelectedLoadDst, 0, SubReg + Idx); | |||
| 5644 | // Emit the subreg copies and immediately select them. | |||
| 5645 | // FIXME: We should refactor our copy code into an emitCopy helper and | |||
| 5646 | // clean up uses of this pattern elsewhere in the selector. | |||
| 5647 | selectCopy(*Vec, TII, MRI, TRI, RBI); | |||
| 5648 | } | |||
| 5649 | return true; | |||
| 5650 | } | |||
| 5651 | ||||
| 5652 | bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( | |||
| 5653 | MachineInstr &I, MachineRegisterInfo &MRI) { | |||
| 5654 | // Find the intrinsic ID. | |||
| 5655 | unsigned IntrinID = I.getIntrinsicID(); | |||
| 5656 | ||||
| 5657 | const LLT S8 = LLT::scalar(8); | |||
| 5658 | const LLT S16 = LLT::scalar(16); | |||
| 5659 | const LLT S32 = LLT::scalar(32); | |||
| 5660 | const LLT S64 = LLT::scalar(64); | |||
| 5661 | const LLT P0 = LLT::pointer(0, 64); | |||
| 5662 | // Select the instruction. | |||
| 5663 | switch (IntrinID) { | |||
| 5664 | default: | |||
| 5665 | return false; | |||
| 5666 | case Intrinsic::aarch64_ldxp: | |||
| 5667 | case Intrinsic::aarch64_ldaxp: { | |||
| 5668 | auto NewI = MIB.buildInstr( | |||
| 5669 | IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX, | |||
| 5670 | {I.getOperand(0).getReg(), I.getOperand(1).getReg()}, | |||
| 5671 | {I.getOperand(3)}); | |||
| 5672 | NewI.cloneMemRefs(I); | |||
| 5673 | constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); | |||
| 5674 | break; | |||
| 5675 | } | |||
| 5676 | case Intrinsic::trap: | |||
| 5677 | MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1); | |||
| 5678 | break; | |||
| 5679 | case Intrinsic::debugtrap: | |||
| 5680 | MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); | |||
| 5681 | break; | |||
| 5682 | case Intrinsic::ubsantrap: | |||
| 5683 | MIB.buildInstr(AArch64::BRK, {}, {}) | |||
| 5684 | .addImm(I.getOperand(1).getImm() | ('U' << 8)); | |||
| 5685 | break; | |||
| 5686 | case Intrinsic::aarch64_neon_ld2: { | |||
| 5687 | LLT Ty = MRI.getType(I.getOperand(0).getReg()); | |||
| 5688 | unsigned Opc = 0; | |||
| 5689 | if (Ty == LLT::fixed_vector(8, S8)) | |||
| 5690 | Opc = AArch64::LD2Twov8b; | |||
| 5691 | else if (Ty == LLT::fixed_vector(16, S8)) | |||
| 5692 | Opc = AArch64::LD2Twov16b; | |||
| 5693 | else if (Ty == LLT::fixed_vector(4, S16)) | |||
| 5694 | Opc = AArch64::LD2Twov4h; | |||
| 5695 | else if (Ty == LLT::fixed_vector(8, S16)) | |||
| 5696 | Opc = AArch64::LD2Twov8h; | |||
| 5697 | else if (Ty == LLT::fixed_vector(2, S32)) | |||
| 5698 | Opc = AArch64::LD2Twov2s; | |||
| 5699 | else if (Ty == LLT::fixed_vector(4, S32)) | |||
| 5700 | Opc = AArch64::LD2Twov4s; | |||
| 5701 | else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) | |||
| 5702 | Opc = AArch64::LD2Twov2d; | |||
| 5703 | else if (Ty == S64 || Ty == P0) | |||
| 5704 | Opc = AArch64::LD1Twov1d; | |||
| 5705 | else | |||
| 5706 | llvm_unreachable("Unexpected type for ld2!")::llvm::llvm_unreachable_internal("Unexpected type for ld2!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5706); | |||
| 5707 | selectVectorLoadIntrinsic(Opc, 2, I); | |||
| 5708 | break; | |||
| 5709 | } | |||
| 5710 | case Intrinsic::aarch64_neon_ld4: { | |||
| 5711 | LLT Ty = MRI.getType(I.getOperand(0).getReg()); | |||
| 5712 | unsigned Opc = 0; | |||
| 5713 | if (Ty == LLT::fixed_vector(8, S8)) | |||
| 5714 | Opc = AArch64::LD4Fourv8b; | |||
| 5715 | else if (Ty == LLT::fixed_vector(16, S8)) | |||
| 5716 | Opc = AArch64::LD4Fourv16b; | |||
| 5717 | else if (Ty == LLT::fixed_vector(4, S16)) | |||
| 5718 | Opc = AArch64::LD4Fourv4h; | |||
| 5719 | else if (Ty == LLT::fixed_vector(8, S16)) | |||
| 5720 | Opc = AArch64::LD4Fourv8h; | |||
| 5721 | else if (Ty == LLT::fixed_vector(2, S32)) | |||
| 5722 | Opc = AArch64::LD4Fourv2s; | |||
| 5723 | else if (Ty == LLT::fixed_vector(4, S32)) | |||
| 5724 | Opc = AArch64::LD4Fourv4s; | |||
| 5725 | else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) | |||
| 5726 | Opc = AArch64::LD4Fourv2d; | |||
| 5727 | else if (Ty == S64 || Ty == P0) | |||
| 5728 | Opc = AArch64::LD1Fourv1d; | |||
| 5729 | else | |||
| 5730 | llvm_unreachable("Unexpected type for ld4!")::llvm::llvm_unreachable_internal("Unexpected type for ld4!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5730); | |||
| 5731 | selectVectorLoadIntrinsic(Opc, 4, I); | |||
| 5732 | break; | |||
| 5733 | } | |||
| 5734 | case Intrinsic::aarch64_neon_st2: { | |||
| 5735 | Register Src1 = I.getOperand(1).getReg(); | |||
| 5736 | Register Src2 = I.getOperand(2).getReg(); | |||
| 5737 | Register Ptr = I.getOperand(3).getReg(); | |||
| 5738 | LLT Ty = MRI.getType(Src1); | |||
| 5739 | unsigned Opc; | |||
| 5740 | if (Ty == LLT::fixed_vector(8, S8)) | |||
| 5741 | Opc = AArch64::ST2Twov8b; | |||
| 5742 | else if (Ty == LLT::fixed_vector(16, S8)) | |||
| 5743 | Opc = AArch64::ST2Twov16b; | |||
| 5744 | else if (Ty == LLT::fixed_vector(4, S16)) | |||
| 5745 | Opc = AArch64::ST2Twov4h; | |||
| 5746 | else if (Ty == LLT::fixed_vector(8, S16)) | |||
| 5747 | Opc = AArch64::ST2Twov8h; | |||
| 5748 | else if (Ty == LLT::fixed_vector(2, S32)) | |||
| 5749 | Opc = AArch64::ST2Twov2s; | |||
| 5750 | else if (Ty == LLT::fixed_vector(4, S32)) | |||
| 5751 | Opc = AArch64::ST2Twov4s; | |||
| 5752 | else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) | |||
| 5753 | Opc = AArch64::ST2Twov2d; | |||
| 5754 | else if (Ty == S64 || Ty == P0) | |||
| 5755 | Opc = AArch64::ST1Twov1d; | |||
| 5756 | else | |||
| 5757 | llvm_unreachable("Unexpected type for st2!")::llvm::llvm_unreachable_internal("Unexpected type for st2!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5757); | |||
| 5758 | SmallVector<Register, 2> Regs = {Src1, Src2}; | |||
| 5759 | Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB) | |||
| 5760 | : createDTuple(Regs, MIB); | |||
| 5761 | auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr}); | |||
| 5762 | Store.cloneMemRefs(I); | |||
| 5763 | constrainSelectedInstRegOperands(*Store, TII, TRI, RBI); | |||
| 5764 | break; | |||
| 5765 | } | |||
| 5766 | case Intrinsic::aarch64_mops_memset_tag: { | |||
| 5767 | // Transform | |||
| 5768 | // %dst:gpr(p0) = \ | |||
| 5769 | // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag), | |||
| 5770 | // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64) | |||
| 5771 | // where %dst is updated, into | |||
| 5772 | // %Rd:GPR64common, %Rn:GPR64) = \ | |||
| 5773 | // MOPSMemorySetTaggingPseudo \ | |||
| 5774 | // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64 | |||
| 5775 | // where Rd and Rn are tied. | |||
| 5776 | // It is expected that %val has been extended to s64 in legalization. | |||
| 5777 | // Note that the order of the size/value operands are swapped. | |||
| 5778 | ||||
| 5779 | Register DstDef = I.getOperand(0).getReg(); | |||
| 5780 | // I.getOperand(1) is the intrinsic function | |||
| 5781 | Register DstUse = I.getOperand(2).getReg(); | |||
| 5782 | Register ValUse = I.getOperand(3).getReg(); | |||
| 5783 | Register SizeUse = I.getOperand(4).getReg(); | |||
| 5784 | ||||
| 5785 | // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one. | |||
| 5786 | // Therefore an additional virtual register is requried for the updated size | |||
| 5787 | // operand. This value is not accessible via the semantics of the intrinsic. | |||
| 5788 | Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64)); | |||
| 5789 | ||||
| 5790 | auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo, | |||
| 5791 | {DstDef, SizeDef}, {DstUse, SizeUse, ValUse}); | |||
| 5792 | Memset.cloneMemRefs(I); | |||
| 5793 | constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI); | |||
| 5794 | break; | |||
| 5795 | } | |||
| 5796 | } | |||
| 5797 | ||||
| 5798 | I.eraseFromParent(); | |||
| 5799 | return true; | |||
| 5800 | } | |||
| 5801 | ||||
| 5802 | bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, | |||
| 5803 | MachineRegisterInfo &MRI) { | |||
| 5804 | unsigned IntrinID = I.getIntrinsicID(); | |||
| 5805 | ||||
| 5806 | switch (IntrinID) { | |||
| 5807 | default: | |||
| 5808 | break; | |||
| 5809 | case Intrinsic::aarch64_crypto_sha1h: { | |||
| 5810 | Register DstReg = I.getOperand(0).getReg(); | |||
| 5811 | Register SrcReg = I.getOperand(2).getReg(); | |||
| 5812 | ||||
| 5813 | // FIXME: Should this be an assert? | |||
| 5814 | if (MRI.getType(DstReg).getSizeInBits() != 32 || | |||
| 5815 | MRI.getType(SrcReg).getSizeInBits() != 32) | |||
| 5816 | return false; | |||
| 5817 | ||||
| 5818 | // The operation has to happen on FPRs. Set up some new FPR registers for | |||
| 5819 | // the source and destination if they are on GPRs. | |||
| 5820 | if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { | |||
| 5821 | SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); | |||
| 5822 | MIB.buildCopy({SrcReg}, {I.getOperand(2)}); | |||
| 5823 | ||||
| 5824 | // Make sure the copy ends up getting constrained properly. | |||
| 5825 | RBI.constrainGenericRegister(I.getOperand(2).getReg(), | |||
| 5826 | AArch64::GPR32RegClass, MRI); | |||
| 5827 | } | |||
| 5828 | ||||
| 5829 | if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) | |||
| 5830 | DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); | |||
| 5831 | ||||
| 5832 | // Actually insert the instruction. | |||
| 5833 | auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg}); | |||
| 5834 | constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI); | |||
| 5835 | ||||
| 5836 | // Did we create a new register for the destination? | |||
| 5837 | if (DstReg != I.getOperand(0).getReg()) { | |||
| 5838 | // Yep. Copy the result of the instruction back into the original | |||
| 5839 | // destination. | |||
| 5840 | MIB.buildCopy({I.getOperand(0)}, {DstReg}); | |||
| 5841 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), | |||
| 5842 | AArch64::GPR32RegClass, MRI); | |||
| 5843 | } | |||
| 5844 | ||||
| 5845 | I.eraseFromParent(); | |||
| 5846 | return true; | |||
| 5847 | } | |||
| 5848 | case Intrinsic::ptrauth_sign: { | |||
| 5849 | Register DstReg = I.getOperand(0).getReg(); | |||
| 5850 | Register ValReg = I.getOperand(2).getReg(); | |||
| 5851 | uint64_t Key = I.getOperand(3).getImm(); | |||
| 5852 | Register DiscReg = I.getOperand(4).getReg(); | |||
| 5853 | auto DiscVal = getIConstantVRegVal(DiscReg, MRI); | |||
| 5854 | bool IsDiscZero = DiscVal && DiscVal->isZero(); | |||
| 5855 | ||||
| 5856 | if (Key > AArch64PACKey::LAST) | |||
| 5857 | return false; | |||
| 5858 | ||||
| 5859 | unsigned Opcodes[][4] = { | |||
| 5860 | {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB}, | |||
| 5861 | {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}}; | |||
| 5862 | unsigned Opcode = Opcodes[IsDiscZero][Key]; | |||
| 5863 | ||||
| 5864 | auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg}); | |||
| 5865 | ||||
| 5866 | if (!IsDiscZero) { | |||
| 5867 | PAC.addUse(DiscReg); | |||
| 5868 | RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI); | |||
| 5869 | } | |||
| 5870 | ||||
| 5871 | RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); | |||
| 5872 | I.eraseFromParent(); | |||
| 5873 | return true; | |||
| 5874 | } | |||
| 5875 | case Intrinsic::ptrauth_strip: { | |||
| 5876 | Register DstReg = I.getOperand(0).getReg(); | |||
| 5877 | Register ValReg = I.getOperand(2).getReg(); | |||
| 5878 | uint64_t Key = I.getOperand(3).getImm(); | |||
| 5879 | ||||
| 5880 | if (Key > AArch64PACKey::LAST) | |||
| 5881 | return false; | |||
| 5882 | unsigned Opcode = getXPACOpcodeForKey((AArch64PACKey::ID)Key); | |||
| 5883 | ||||
| 5884 | MIB.buildInstr(Opcode, {DstReg}, {ValReg}); | |||
| 5885 | ||||
| 5886 | RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); | |||
| 5887 | RBI.constrainGenericRegister(ValReg, AArch64::GPR64RegClass, MRI); | |||
| 5888 | I.eraseFromParent(); | |||
| 5889 | return true; | |||
| 5890 | } | |||
| 5891 | case Intrinsic::frameaddress: | |||
| 5892 | case Intrinsic::returnaddress: { | |||
| 5893 | MachineFunction &MF = *I.getParent()->getParent(); | |||
| 5894 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
| 5895 | ||||
| 5896 | unsigned Depth = I.getOperand(2).getImm(); | |||
| 5897 | Register DstReg = I.getOperand(0).getReg(); | |||
| 5898 | RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); | |||
| 5899 | ||||
| 5900 | if (Depth == 0 && IntrinID == Intrinsic::returnaddress) { | |||
| 5901 | if (!MFReturnAddr) { | |||
| 5902 | // Insert the copy from LR/X30 into the entry block, before it can be | |||
| 5903 | // clobbered by anything. | |||
| 5904 | MFI.setReturnAddressIsTaken(true); | |||
| 5905 | MFReturnAddr = getFunctionLiveInPhysReg( | |||
| 5906 | MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc()); | |||
| 5907 | } | |||
| 5908 | ||||
| 5909 | if (STI.hasPAuth()) { | |||
| 5910 | MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr}); | |||
| 5911 | } else { | |||
| 5912 | MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr}); | |||
| 5913 | MIB.buildInstr(AArch64::XPACLRI); | |||
| 5914 | MIB.buildCopy({DstReg}, {Register(AArch64::LR)}); | |||
| 5915 | } | |||
| 5916 | ||||
| 5917 | I.eraseFromParent(); | |||
| 5918 | return true; | |||
| 5919 | } | |||
| 5920 | ||||
| 5921 | MFI.setFrameAddressIsTaken(true); | |||
| 5922 | Register FrameAddr(AArch64::FP); | |||
| 5923 | while (Depth--) { | |||
| 5924 | Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); | |||
| 5925 | auto Ldr = | |||
| 5926 | MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0); | |||
| 5927 | constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI); | |||
| 5928 | FrameAddr = NextFrame; | |||
| 5929 | } | |||
| 5930 | ||||
| 5931 | if (IntrinID == Intrinsic::frameaddress) | |||
| 5932 | MIB.buildCopy({DstReg}, {FrameAddr}); | |||
| 5933 | else { | |||
| 5934 | MFI.setReturnAddressIsTaken(true); | |||
| 5935 | ||||
| 5936 | if (STI.hasPAuth()) { | |||
| 5937 | Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); | |||
| 5938 | MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1); | |||
| 5939 | MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg}); | |||
| 5940 | } else { | |||
| 5941 | MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}) | |||
| 5942 | .addImm(1); | |||
| 5943 | MIB.buildInstr(AArch64::XPACLRI); | |||
| 5944 | MIB.buildCopy({DstReg}, {Register(AArch64::LR)}); | |||
| 5945 | } | |||
| 5946 | } | |||
| 5947 | ||||
| 5948 | I.eraseFromParent(); | |||
| 5949 | return true; | |||
| 5950 | } | |||
| 5951 | case Intrinsic::swift_async_context_addr: | |||
| 5952 | auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()}, | |||
| 5953 | {Register(AArch64::FP)}) | |||
| 5954 | .addImm(8) | |||
| 5955 | .addImm(0); | |||
| 5956 | constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI); | |||
| 5957 | ||||
| 5958 | MF->getFrameInfo().setFrameAddressIsTaken(true); | |||
| 5959 | MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); | |||
| 5960 | I.eraseFromParent(); | |||
| 5961 | return true; | |||
| 5962 | } | |||
| 5963 | return false; | |||
| 5964 | } | |||
| 5965 | ||||
| 5966 | InstructionSelector::ComplexRendererFns | |||
| 5967 | AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { | |||
| 5968 | auto MaybeImmed = getImmedFromMO(Root); | |||
| 5969 | if (MaybeImmed == std::nullopt || *MaybeImmed > 31) | |||
| 5970 | return std::nullopt; | |||
| 5971 | uint64_t Enc = (32 - *MaybeImmed) & 0x1f; | |||
| 5972 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; | |||
| 5973 | } | |||
| 5974 | ||||
| 5975 | InstructionSelector::ComplexRendererFns | |||
| 5976 | AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const { | |||
| 5977 | auto MaybeImmed = getImmedFromMO(Root); | |||
| 5978 | if (MaybeImmed == std::nullopt || *MaybeImmed > 31) | |||
| 5979 | return std::nullopt; | |||
| 5980 | uint64_t Enc = 31 - *MaybeImmed; | |||
| 5981 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; | |||
| 5982 | } | |||
| 5983 | ||||
| 5984 | InstructionSelector::ComplexRendererFns | |||
| 5985 | AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const { | |||
| 5986 | auto MaybeImmed = getImmedFromMO(Root); | |||
| 5987 | if (MaybeImmed == std::nullopt || *MaybeImmed > 63) | |||
| 5988 | return std::nullopt; | |||
| 5989 | uint64_t Enc = (64 - *MaybeImmed) & 0x3f; | |||
| 5990 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; | |||
| 5991 | } | |||
| 5992 | ||||
| 5993 | InstructionSelector::ComplexRendererFns | |||
| 5994 | AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { | |||
| 5995 | auto MaybeImmed = getImmedFromMO(Root); | |||
| 5996 | if (MaybeImmed == std::nullopt || *MaybeImmed > 63) | |||
| 5997 | return std::nullopt; | |||
| 5998 | uint64_t Enc = 63 - *MaybeImmed; | |||
| 5999 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; | |||
| 6000 | } | |||
| 6001 | ||||
| 6002 | /// Helper to select an immediate value that can be represented as a 12-bit | |||
| 6003 | /// value shifted left by either 0 or 12. If it is possible to do so, return | |||
| 6004 | /// the immediate and shift value. If not, return std::nullopt. | |||
| 6005 | /// | |||
| 6006 | /// Used by selectArithImmed and selectNegArithImmed. | |||
| 6007 | InstructionSelector::ComplexRendererFns | |||
| 6008 | AArch64InstructionSelector::select12BitValueWithLeftShift( | |||
| 6009 | uint64_t Immed) const { | |||
| 6010 | unsigned ShiftAmt; | |||
| 6011 | if (Immed >> 12 == 0) { | |||
| 6012 | ShiftAmt = 0; | |||
| 6013 | } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { | |||
| 6014 | ShiftAmt = 12; | |||
| 6015 | Immed = Immed >> 12; | |||
| 6016 | } else | |||
| 6017 | return std::nullopt; | |||
| 6018 | ||||
| 6019 | unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); | |||
| 6020 | return {{ | |||
| 6021 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); }, | |||
| 6022 | [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); }, | |||
| 6023 | }}; | |||
| 6024 | } | |||
| 6025 | ||||
| 6026 | /// SelectArithImmed - Select an immediate value that can be represented as | |||
| 6027 | /// a 12-bit value shifted left by either 0 or 12. If so, return true with | |||
| 6028 | /// Val set to the 12-bit value and Shift set to the shifter operand. | |||
| 6029 | InstructionSelector::ComplexRendererFns | |||
| 6030 | AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { | |||
| 6031 | // This function is called from the addsub_shifted_imm ComplexPattern, | |||
| 6032 | // which lists [imm] as the list of opcode it's interested in, however | |||
| 6033 | // we still need to check whether the operand is actually an immediate | |||
| 6034 | // here because the ComplexPattern opcode list is only used in | |||
| 6035 | // root-level opcode matching. | |||
| 6036 | auto MaybeImmed = getImmedFromMO(Root); | |||
| 6037 | if (MaybeImmed == std::nullopt) | |||
| 6038 | return std::nullopt; | |||
| 6039 | return select12BitValueWithLeftShift(*MaybeImmed); | |||
| 6040 | } | |||
| 6041 | ||||
| 6042 | /// SelectNegArithImmed - As above, but negates the value before trying to | |||
| 6043 | /// select it. | |||
| 6044 | InstructionSelector::ComplexRendererFns | |||
| 6045 | AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { | |||
| 6046 | // We need a register here, because we need to know if we have a 64 or 32 | |||
| 6047 | // bit immediate. | |||
| 6048 | if (!Root.isReg()) | |||
| 6049 | return std::nullopt; | |||
| 6050 | auto MaybeImmed = getImmedFromMO(Root); | |||
| 6051 | if (MaybeImmed == std::nullopt) | |||
| 6052 | return std::nullopt; | |||
| 6053 | uint64_t Immed = *MaybeImmed; | |||
| 6054 | ||||
| 6055 | // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" | |||
| 6056 | // have the opposite effect on the C flag, so this pattern mustn't match under | |||
| 6057 | // those circumstances. | |||
| 6058 | if (Immed == 0) | |||
| 6059 | return std::nullopt; | |||
| 6060 | ||||
| 6061 | // Check if we're dealing with a 32-bit type on the root or a 64-bit type on | |||
| 6062 | // the root. | |||
| 6063 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | |||
| 6064 | if (MRI.getType(Root.getReg()).getSizeInBits() == 32) | |||
| 6065 | Immed = ~((uint32_t)Immed) + 1; | |||
| 6066 | else | |||
| 6067 | Immed = ~Immed + 1ULL; | |||
| 6068 | ||||
| 6069 | if (Immed & 0xFFFFFFFFFF000000ULL) | |||
| 6070 | return std::nullopt; | |||
| 6071 | ||||
| 6072 | Immed &= 0xFFFFFFULL; | |||
| 6073 | return select12BitValueWithLeftShift(Immed); | |||
| 6074 | } | |||
| 6075 | ||||
| 6076 | /// Return true if it is worth folding MI into an extended register. That is, | |||
| 6077 | /// if it's safe to pull it into the addressing mode of a load or store as a | |||
| 6078 | /// shift. | |||
| 6079 | bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( | |||
| 6080 | MachineInstr &MI, const MachineRegisterInfo &MRI) const { | |||
| 6081 | // Always fold if there is one use, or if we're optimizing for size. | |||
| 6082 | Register DefReg = MI.getOperand(0).getReg(); | |||
| 6083 | if (MRI.hasOneNonDBGUse(DefReg) || | |||
| 6084 | MI.getParent()->getParent()->getFunction().hasOptSize()) | |||
| 6085 | return true; | |||
| 6086 | ||||
| 6087 | // It's better to avoid folding and recomputing shifts when we don't have a | |||
| 6088 | // fastpath. | |||
| 6089 | if (!STI.hasLSLFast()) | |||
| 6090 | return false; | |||
| 6091 | ||||
| 6092 | // We have a fastpath, so folding a shift in and potentially computing it | |||
| 6093 | // many times may be beneficial. Check if this is only used in memory ops. | |||
| 6094 | // If it is, then we should fold. | |||
| 6095 | return all_of(MRI.use_nodbg_instructions(DefReg), | |||
| 6096 | [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); | |||
| 6097 | } | |||
| 6098 | ||||
| 6099 | static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) { | |||
| 6100 | switch (Type) { | |||
| 6101 | case AArch64_AM::SXTB: | |||
| 6102 | case AArch64_AM::SXTH: | |||
| 6103 | case AArch64_AM::SXTW: | |||
| 6104 | return true; | |||
| 6105 | default: | |||
| 6106 | return false; | |||
| 6107 | } | |||
| 6108 | } | |||
| 6109 | ||||
| 6110 | InstructionSelector::ComplexRendererFns | |||
| 6111 | AArch64InstructionSelector::selectExtendedSHL( | |||
| 6112 | MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, | |||
| 6113 | unsigned SizeInBytes, bool WantsExt) const { | |||
| 6114 | assert(Base.isReg() && "Expected base to be a register operand")(static_cast <bool> (Base.isReg() && "Expected base to be a register operand" ) ? void (0) : __assert_fail ("Base.isReg() && \"Expected base to be a register operand\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6114, __extension__ __PRETTY_FUNCTION__)); | |||
| 6115 | assert(Offset.isReg() && "Expected offset to be a register operand")(static_cast <bool> (Offset.isReg() && "Expected offset to be a register operand" ) ? void (0) : __assert_fail ("Offset.isReg() && \"Expected offset to be a register operand\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6115, __extension__ __PRETTY_FUNCTION__)); | |||
| 6116 | ||||
| 6117 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | |||
| 6118 | MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg()); | |||
| 6119 | ||||
| 6120 | unsigned OffsetOpc = OffsetInst->getOpcode(); | |||
| 6121 | bool LookedThroughZExt = false; | |||
| 6122 | if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) { | |||
| 6123 | // Try to look through a ZEXT. | |||
| 6124 | if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt) | |||
| 6125 | return std::nullopt; | |||
| 6126 | ||||
| 6127 | OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg()); | |||
| 6128 | OffsetOpc = OffsetInst->getOpcode(); | |||
| 6129 | LookedThroughZExt = true; | |||
| 6130 | ||||
| 6131 | if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) | |||
| 6132 | return std::nullopt; | |||
| 6133 | } | |||
| 6134 | // Make sure that the memory op is a valid size. | |||
| 6135 | int64_t LegalShiftVal = Log2_32(SizeInBytes); | |||
| 6136 | if (LegalShiftVal == 0) | |||
| 6137 | return std::nullopt; | |||
| 6138 | if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) | |||
| 6139 | return std::nullopt; | |||
| 6140 | ||||
| 6141 | // Now, try to find the specific G_CONSTANT. Start by assuming that the | |||
| 6142 | // register we will offset is the LHS, and the register containing the | |||
| 6143 | // constant is the RHS. | |||
| 6144 | Register OffsetReg = OffsetInst->getOperand(1).getReg(); | |||
| 6145 | Register ConstantReg = OffsetInst->getOperand(2).getReg(); | |||
| 6146 | auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI); | |||
| 6147 | if (!ValAndVReg) { | |||
| 6148 | // We didn't get a constant on the RHS. If the opcode is a shift, then | |||
| 6149 | // we're done. | |||
| 6150 | if (OffsetOpc == TargetOpcode::G_SHL) | |||
| 6151 | return std::nullopt; | |||
| 6152 | ||||
| 6153 | // If we have a G_MUL, we can use either register. Try looking at the RHS. | |||
| 6154 | std::swap(OffsetReg, ConstantReg); | |||
| 6155 | ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI); | |||
| 6156 | if (!ValAndVReg) | |||
| 6157 | return std::nullopt; | |||
| 6158 | } | |||
| 6159 | ||||
| 6160 | // The value must fit into 3 bits, and must be positive. Make sure that is | |||
| 6161 | // true. | |||
| 6162 | int64_t ImmVal = ValAndVReg->Value.getSExtValue(); | |||
| 6163 | ||||
| 6164 | // Since we're going to pull this into a shift, the constant value must be | |||
| 6165 | // a power of 2. If we got a multiply, then we need to check this. | |||
| 6166 | if (OffsetOpc == TargetOpcode::G_MUL) { | |||
| 6167 | if (!llvm::has_single_bit<uint32_t>(ImmVal)) | |||
| 6168 | return std::nullopt; | |||
| 6169 | ||||
| 6170 | // Got a power of 2. So, the amount we'll shift is the log base-2 of that. | |||
| 6171 | ImmVal = Log2_32(ImmVal); | |||
| 6172 | } | |||
| 6173 | ||||
| 6174 | if ((ImmVal & 0x7) != ImmVal) | |||
| 6175 | return std::nullopt; | |||
| 6176 | ||||
| 6177 | // We are only allowed to shift by LegalShiftVal. This shift value is built | |||
| 6178 | // into the instruction, so we can't just use whatever we want. | |||
| 6179 | if (ImmVal != LegalShiftVal) | |||
| 6180 | return std::nullopt; | |||
| 6181 | ||||
| 6182 | unsigned SignExtend = 0; | |||
| 6183 | if (WantsExt) { | |||
| 6184 | // Check if the offset is defined by an extend, unless we looked through a | |||
| 6185 | // G_ZEXT earlier. | |||
| 6186 | if (!LookedThroughZExt) { | |||
| 6187 | MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI); | |||
| 6188 | auto Ext = getExtendTypeForInst(*ExtInst, MRI, true); | |||
| 6189 | if (Ext == AArch64_AM::InvalidShiftExtend) | |||
| 6190 | return std::nullopt; | |||
| 6191 | ||||
| 6192 | SignExtend = isSignExtendShiftType(Ext) ? 1 : 0; | |||
| 6193 | // We only support SXTW for signed extension here. | |||
| 6194 | if (SignExtend && Ext != AArch64_AM::SXTW) | |||
| 6195 | return std::nullopt; | |||
| 6196 | OffsetReg = ExtInst->getOperand(1).getReg(); | |||
| 6197 | } | |||
| 6198 | ||||
| 6199 | // Need a 32-bit wide register here. | |||
| 6200 | MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg())); | |||
| 6201 | OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB); | |||
| 6202 | } | |||
| 6203 | ||||
| 6204 | // We can use the LHS of the GEP as the base, and the LHS of the shift as an | |||
| 6205 | // offset. Signify that we are shifting by setting the shift flag to 1. | |||
| 6206 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); }, | |||
| 6207 | [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, | |||
| 6208 | [=](MachineInstrBuilder &MIB) { | |||
| 6209 | // Need to add both immediates here to make sure that they are both | |||
| 6210 | // added to the instruction. | |||
| 6211 | MIB.addImm(SignExtend); | |||
| 6212 | MIB.addImm(1); | |||
| 6213 | }}}; | |||
| 6214 | } | |||
| 6215 | ||||
| 6216 | /// This is used for computing addresses like this: | |||
| 6217 | /// | |||
| 6218 | /// ldr x1, [x2, x3, lsl #3] | |||
| 6219 | /// | |||
| 6220 | /// Where x2 is the base register, and x3 is an offset register. The shift-left | |||
| 6221 | /// is a constant value specific to this load instruction. That is, we'll never | |||
| 6222 | /// see anything other than a 3 here (which corresponds to the size of the | |||
| 6223 | /// element being loaded.) | |||
| 6224 | InstructionSelector::ComplexRendererFns | |||
| 6225 | AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( | |||
| 6226 | MachineOperand &Root, unsigned SizeInBytes) const { | |||
| 6227 | if (!Root.isReg()) | |||
| 6228 | return std::nullopt; | |||
| 6229 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | |||
| 6230 | ||||
| 6231 | // We want to find something like this: | |||
| 6232 | // | |||
| 6233 | // val = G_CONSTANT LegalShiftVal | |||
| 6234 | // shift = G_SHL off_reg val | |||
| 6235 | // ptr = G_PTR_ADD base_reg shift | |||
| 6236 | // x = G_LOAD ptr | |||
| 6237 | // | |||
| 6238 | // And fold it into this addressing mode: | |||
| 6239 | // | |||
| 6240 | // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] | |||
| 6241 | ||||
| 6242 | // Check if we can find the G_PTR_ADD. | |||
| 6243 | MachineInstr *PtrAdd = | |||
| 6244 | getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); | |||
| 6245 | if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) | |||
| 6246 | return std::nullopt; | |||
| 6247 | ||||
| 6248 | // Now, try to match an opcode which will match our specific offset. | |||
| 6249 | // We want a G_SHL or a G_MUL. | |||
| 6250 | MachineInstr *OffsetInst = | |||
| 6251 | getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI); | |||
| 6252 | return selectExtendedSHL(Root, PtrAdd->getOperand(1), | |||
| 6253 | OffsetInst->getOperand(0), SizeInBytes, | |||
| 6254 | /*WantsExt=*/false); | |||
| 6255 | } | |||
| 6256 | ||||
| 6257 | /// This is used for computing addresses like this: | |||
| 6258 | /// | |||
| 6259 | /// ldr x1, [x2, x3] | |||
| 6260 | /// | |||
| 6261 | /// Where x2 is the base register, and x3 is an offset register. | |||
| 6262 | /// | |||
| 6263 | /// When possible (or profitable) to fold a G_PTR_ADD into the address | |||
| 6264 | /// calculation, this will do so. Otherwise, it will return std::nullopt. | |||
| 6265 | InstructionSelector::ComplexRendererFns | |||
| 6266 | AArch64InstructionSelector::selectAddrModeRegisterOffset( | |||
| 6267 | MachineOperand &Root) const { | |||
| 6268 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | |||
| 6269 | ||||
| 6270 | // We need a GEP. | |||
| 6271 | MachineInstr *Gep = MRI.getVRegDef(Root.getReg()); | |||
| 6272 | if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD) | |||
| 6273 | return std::nullopt; | |||
| 6274 | ||||
| 6275 | // If this is used more than once, let's not bother folding. | |||
| 6276 | // TODO: Check if they are memory ops. If they are, then we can still fold | |||
| 6277 | // without having to recompute anything. | |||
| 6278 | if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg())) | |||
| 6279 | return std::nullopt; | |||
| 6280 | ||||
| 6281 | // Base is the GEP's LHS, offset is its RHS. | |||
| 6282 | return {{[=](MachineInstrBuilder &MIB) { | |||
| 6283 | MIB.addUse(Gep->getOperand(1).getReg()); | |||
| 6284 | }, | |||
| 6285 | [=](MachineInstrBuilder &MIB) { | |||
| 6286 | MIB.addUse(Gep->getOperand(2).getReg()); | |||
| 6287 | }, | |||
| 6288 | [=](MachineInstrBuilder &MIB) { | |||
| 6289 | // Need to add both immediates here to make sure that they are both | |||
| 6290 | // added to the instruction. | |||
| 6291 | MIB.addImm(0); | |||
| 6292 | MIB.addImm(0); | |||
| 6293 | }}}; | |||
| 6294 | } | |||
| 6295 | ||||
| 6296 | /// This is intended to be equivalent to selectAddrModeXRO in | |||
| 6297 | /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. | |||
| 6298 | InstructionSelector::ComplexRendererFns | |||
| 6299 | AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, | |||
| 6300 | unsigned SizeInBytes) const { | |||
| 6301 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | |||
| 6302 | if (!Root.isReg()) | |||
| 6303 | return std::nullopt; | |||
| 6304 | MachineInstr *PtrAdd = | |||
| 6305 | getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); | |||
| 6306 | if (!PtrAdd) | |||
| 6307 | return std::nullopt; | |||
| 6308 | ||||
| 6309 | // Check for an immediates which cannot be encoded in the [base + imm] | |||
| 6310 | // addressing mode, and can't be encoded in an add/sub. If this happens, we'll | |||
| 6311 | // end up with code like: | |||
| 6312 | // | |||
| 6313 | // mov x0, wide | |||
| 6314 | // add x1 base, x0 | |||
| 6315 | // ldr x2, [x1, x0] | |||
| 6316 | // | |||
| 6317 | // In this situation, we can use the [base, xreg] addressing mode to save an | |||
| 6318 | // add/sub: | |||
| 6319 | // | |||
| 6320 | // mov x0, wide | |||
| 6321 | // ldr x2, [base, x0] | |||
| 6322 | auto ValAndVReg = | |||
| 6323 | getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI); | |||
| 6324 | if (ValAndVReg) { | |||
| 6325 | unsigned Scale = Log2_32(SizeInBytes); | |||
| 6326 | int64_t ImmOff = ValAndVReg->Value.getSExtValue(); | |||
| 6327 | ||||
| 6328 | // Skip immediates that can be selected in the load/store addresing | |||
| 6329 | // mode. | |||
| 6330 | if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && | |||
| 6331 | ImmOff < (0x1000 << Scale)) | |||
| 6332 | return std::nullopt; | |||
| 6333 | ||||
| 6334 | // Helper lambda to decide whether or not it is preferable to emit an add. | |||
| 6335 | auto isPreferredADD = [](int64_t ImmOff) { | |||
| 6336 | // Constants in [0x0, 0xfff] can be encoded in an add. | |||
| 6337 | if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) | |||
| 6338 | return true; | |||
| 6339 | ||||
| 6340 | // Can it be encoded in an add lsl #12? | |||
| 6341 | if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL) | |||
| 6342 | return false; | |||
| 6343 | ||||
| 6344 | // It can be encoded in an add lsl #12, but we may not want to. If it is | |||
| 6345 | // possible to select this as a single movz, then prefer that. A single | |||
| 6346 | // movz is faster than an add with a shift. | |||
| 6347 | return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && | |||
| 6348 | (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; | |||
| 6349 | }; | |||
| 6350 | ||||
| 6351 | // If the immediate can be encoded in a single add/sub, then bail out. | |||
| 6352 | if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) | |||
| 6353 | return std::nullopt; | |||
| 6354 | } | |||
| 6355 | ||||
| 6356 | // Try to fold shifts into the addressing mode. | |||
| 6357 | auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); | |||
| 6358 | if (AddrModeFns) | |||
| 6359 | return AddrModeFns; | |||
| 6360 | ||||
| 6361 | // If that doesn't work, see if it's possible to fold in registers from | |||
| 6362 | // a GEP. | |||
| 6363 | return selectAddrModeRegisterOffset(Root); | |||
| 6364 | } | |||
| 6365 | ||||
| 6366 | /// This is used for computing addresses like this: | |||
| 6367 | /// | |||
| 6368 | /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal] | |||
| 6369 | /// | |||
| 6370 | /// Where we have a 64-bit base register, a 32-bit offset register, and an | |||
| 6371 | /// extend (which may or may not be signed). | |||
| 6372 | InstructionSelector::ComplexRendererFns | |||
| 6373 | AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, | |||
| 6374 | unsigned SizeInBytes) const { | |||
| 6375 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | |||
| 6376 | ||||
| 6377 | MachineInstr *PtrAdd = | |||
| 6378 | getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); | |||
| 6379 | if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) | |||
| 6380 | return std::nullopt; | |||
| 6381 | ||||
| 6382 | MachineOperand &LHS = PtrAdd->getOperand(1); | |||
| 6383 | MachineOperand &RHS = PtrAdd->getOperand(2); | |||
| 6384 | MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI); | |||
| 6385 | ||||
| 6386 | // The first case is the same as selectAddrModeXRO, except we need an extend. | |||
| 6387 | // In this case, we try to find a shift and extend, and fold them into the | |||
| 6388 | // addressing mode. | |||
| 6389 | // | |||
| 6390 | // E.g. | |||
| 6391 | // | |||
| 6392 | // off_reg = G_Z/S/ANYEXT ext_reg | |||
| 6393 | // val = G_CONSTANT LegalShiftVal | |||
| 6394 | // shift = G_SHL off_reg val | |||
| 6395 | // ptr = G_PTR_ADD base_reg shift | |||
| 6396 | // x = G_LOAD ptr | |||
| 6397 | // | |||
| 6398 | // In this case we can get a load like this: | |||
| 6399 | // | |||
| 6400 | // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal] | |||
| 6401 | auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0), | |||
| 6402 | SizeInBytes, /*WantsExt=*/true); | |||
| 6403 | if (ExtendedShl) | |||
| 6404 | return ExtendedShl; | |||
| 6405 | ||||
| 6406 | // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though. | |||
| 6407 | // | |||
| 6408 | // e.g. | |||
| 6409 | // ldr something, [base_reg, ext_reg, sxtw] | |||
| 6410 | if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) | |||
| 6411 | return std::nullopt; | |||
| 6412 | ||||
| 6413 | // Check if this is an extend. We'll get an extend type if it is. | |||
| 6414 | AArch64_AM::ShiftExtendType Ext = | |||
| 6415 | getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true); | |||
| 6416 | if (Ext == AArch64_AM::InvalidShiftExtend) | |||
| 6417 | return std::nullopt; | |||
| 6418 | ||||
| 6419 | // Need a 32-bit wide register. | |||
| 6420 | MachineIRBuilder MIB(*PtrAdd); | |||
| 6421 | Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(), | |||
| 6422 | AArch64::GPR32RegClass, MIB); | |||
| 6423 | unsigned SignExtend = Ext == AArch64_AM::SXTW; | |||
| 6424 | ||||
| 6425 | // Base is LHS, offset is ExtReg. | |||
| 6426 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); }, | |||
| 6427 | [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, | |||
| 6428 | [=](MachineInstrBuilder &MIB) { | |||
| 6429 | MIB.addImm(SignExtend); | |||
| 6430 | MIB.addImm(0); | |||
| 6431 | }}}; | |||
| 6432 | } | |||
| 6433 | ||||
| 6434 | /// Select a "register plus unscaled signed 9-bit immediate" address. This | |||
| 6435 | /// should only match when there is an offset that is not valid for a scaled | |||
| 6436 | /// immediate addressing mode. The "Size" argument is the size in bytes of the | |||
| 6437 | /// memory reference, which is needed here to know what is valid for a scaled | |||
| 6438 | /// immediate. | |||
| 6439 | InstructionSelector::ComplexRendererFns | |||
| 6440 | AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, | |||
| 6441 | unsigned Size) const { | |||
| 6442 | MachineRegisterInfo &MRI = | |||
| 6443 | Root.getParent()->getParent()->getParent()->getRegInfo(); | |||
| 6444 | ||||
| 6445 | if (!Root.isReg()) | |||
| 6446 | return std::nullopt; | |||
| 6447 | ||||
| 6448 | if (!isBaseWithConstantOffset(Root, MRI)) | |||
| 6449 | return std::nullopt; | |||
| 6450 | ||||
| 6451 | MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); | |||
| 6452 | ||||
| 6453 | MachineOperand &OffImm = RootDef->getOperand(2); | |||
| 6454 | if (!OffImm.isReg()) | |||
| 6455 | return std::nullopt; | |||
| 6456 | MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); | |||
| 6457 | if (RHS->getOpcode() != TargetOpcode::G_CONSTANT) | |||
| 6458 | return std::nullopt; | |||
| 6459 | int64_t RHSC; | |||
| 6460 | MachineOperand &RHSOp1 = RHS->getOperand(1); | |||
| 6461 | if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) | |||
| 6462 | return std::nullopt; | |||
| 6463 | RHSC = RHSOp1.getCImm()->getSExtValue(); | |||
| 6464 | ||||
| 6465 | // If the offset is valid as a scaled immediate, don't match here. | |||
| 6466 | if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) | |||
| 6467 | return std::nullopt; | |||
| 6468 | if (RHSC >= -256 && RHSC < 256) { | |||
| 6469 | MachineOperand &Base = RootDef->getOperand(1); | |||
| 6470 | return {{ | |||
| 6471 | [=](MachineInstrBuilder &MIB) { MIB.add(Base); }, | |||
| 6472 | [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, | |||
| 6473 | }}; | |||
| 6474 | } | |||
| 6475 | return std::nullopt; | |||
| 6476 | } | |||
| 6477 | ||||
| 6478 | InstructionSelector::ComplexRendererFns | |||
| 6479 | AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, | |||
| 6480 | unsigned Size, | |||
| 6481 | MachineRegisterInfo &MRI) const { | |||
| 6482 | if (RootDef.getOpcode() != AArch64::G_ADD_LOW) | |||
| 6483 | return std::nullopt; | |||
| 6484 | MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg()); | |||
| 6485 | if (Adrp.getOpcode() != AArch64::ADRP) | |||
| 6486 | return std::nullopt; | |||
| 6487 | ||||
| 6488 | // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. | |||
| 6489 | auto Offset = Adrp.getOperand(1).getOffset(); | |||
| 6490 | if (Offset % Size != 0) | |||
| 6491 | return std::nullopt; | |||
| 6492 | ||||
| 6493 | auto GV = Adrp.getOperand(1).getGlobal(); | |||
| 6494 | if (GV->isThreadLocal()) | |||
| 6495 | return std::nullopt; | |||
| 6496 | ||||
| 6497 | auto &MF = *RootDef.getParent()->getParent(); | |||
| 6498 | if (GV->getPointerAlignment(MF.getDataLayout()) < Size) | |||
| 6499 | return std::nullopt; | |||
| 6500 | ||||
| 6501 | unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget()); | |||
| 6502 | MachineIRBuilder MIRBuilder(RootDef); | |||
| 6503 | Register AdrpReg = Adrp.getOperand(0).getReg(); | |||
| 6504 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); }, | |||
| 6505 | [=](MachineInstrBuilder &MIB) { | |||
| 6506 | MIB.addGlobalAddress(GV, Offset, | |||
| 6507 | OpFlags | AArch64II::MO_PAGEOFF | | |||
| 6508 | AArch64II::MO_NC); | |||
| 6509 | }}}; | |||
| 6510 | } | |||
| 6511 | ||||
| 6512 | /// Select a "register plus scaled unsigned 12-bit immediate" address. The | |||
| 6513 | /// "Size" argument is the size in bytes of the memory reference, which | |||
| 6514 | /// determines the scale. | |||
| 6515 | InstructionSelector::ComplexRendererFns | |||
| 6516 | AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, | |||
| 6517 | unsigned Size) const { | |||
| 6518 | MachineFunction &MF = *Root.getParent()->getParent()->getParent(); | |||
| 6519 | MachineRegisterInfo &MRI = MF.getRegInfo(); | |||
| 6520 | ||||
| 6521 | if (!Root.isReg()) | |||
| 6522 | return std::nullopt; | |||
| 6523 | ||||
| 6524 | MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); | |||
| 6525 | if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { | |||
| 6526 | return {{ | |||
| 6527 | [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, | |||
| 6528 | [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, | |||
| 6529 | }}; | |||
| 6530 | } | |||
| 6531 | ||||
| 6532 | CodeModel::Model CM = MF.getTarget().getCodeModel(); | |||
| 6533 | // Check if we can fold in the ADD of small code model ADRP + ADD address. | |||
| 6534 | if (CM == CodeModel::Small) { | |||
| 6535 | auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI); | |||
| 6536 | if (OpFns) | |||
| 6537 | return OpFns; | |||
| 6538 | } | |||
| 6539 | ||||
| 6540 | if (isBaseWithConstantOffset(Root, MRI)) { | |||
| 6541 | MachineOperand &LHS = RootDef->getOperand(1); | |||
| 6542 | MachineOperand &RHS = RootDef->getOperand(2); | |||
| 6543 | MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); | |||
| 6544 | MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); | |||
| 6545 | ||||
| 6546 | int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); | |||
| 6547 | unsigned Scale = Log2_32(Size); | |||
| 6548 | if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { | |||
| ||||
| 6549 | if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) | |||
| 6550 | return {{ | |||
| 6551 | [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, | |||
| 6552 | [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, | |||
| 6553 | }}; | |||
| 6554 | ||||
| 6555 | return {{ | |||
| 6556 | [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, | |||
| 6557 | [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, | |||
| 6558 | }}; | |||
| 6559 | } | |||
| 6560 | } | |||
| 6561 | ||||
| 6562 | // Before falling back to our general case, check if the unscaled | |||
| 6563 | // instructions can handle this. If so, that's preferable. | |||
| 6564 | if (selectAddrModeUnscaled(Root, Size)) | |||
| 6565 | return std::nullopt; | |||
| 6566 | ||||
| 6567 | return {{ | |||
| 6568 | [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, | |||
| 6569 | [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, | |||
| 6570 | }}; | |||
| 6571 | } | |||
| 6572 | ||||
| 6573 | /// Given a shift instruction, return the correct shift type for that | |||
| 6574 | /// instruction. | |||
| 6575 | static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { | |||
| 6576 | switch (MI.getOpcode()) { | |||
| 6577 | default: | |||
| 6578 | return AArch64_AM::InvalidShiftExtend; | |||
| 6579 | case TargetOpcode::G_SHL: | |||
| 6580 | return AArch64_AM::LSL; | |||
| 6581 | case TargetOpcode::G_LSHR: | |||
| 6582 | return AArch64_AM::LSR; | |||
| 6583 | case TargetOpcode::G_ASHR: | |||
| 6584 | return AArch64_AM::ASR; | |||
| 6585 | case TargetOpcode::G_ROTR: | |||
| 6586 | return AArch64_AM::ROR; | |||
| 6587 | } | |||
| 6588 | } | |||
| 6589 | ||||
| 6590 | /// Select a "shifted register" operand. If the value is not shifted, set the | |||
| 6591 | /// shift operand to a default value of "lsl 0". | |||
| 6592 | InstructionSelector::ComplexRendererFns | |||
| 6593 | AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root, | |||
| 6594 | bool AllowROR) const { | |||
| 6595 | if (!Root.isReg()) | |||
| 6596 | return std::nullopt; | |||
| 6597 | MachineRegisterInfo &MRI = | |||
| 6598 | Root.getParent()->getParent()->getParent()->getRegInfo(); | |||
| 6599 | ||||
| 6600 | // Check if the operand is defined by an instruction which corresponds to | |||
| 6601 | // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. | |||
| 6602 | MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg()); | |||
| 6603 | AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst); | |||
| 6604 | if (ShType == AArch64_AM::InvalidShiftExtend) | |||
| 6605 | return std::nullopt; | |||
| 6606 | if (ShType == AArch64_AM::ROR && !AllowROR) | |||
| 6607 | return std::nullopt; | |||
| 6608 | if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI)) | |||
| 6609 | return std::nullopt; | |||
| 6610 | ||||
| 6611 | // Need an immediate on the RHS. | |||
| 6612 | MachineOperand &ShiftRHS = ShiftInst->getOperand(2); | |||
| 6613 | auto Immed = getImmedFromMO(ShiftRHS); | |||
| 6614 | if (!Immed) | |||
| 6615 | return std::nullopt; | |||
| 6616 | ||||
| 6617 | // We have something that we can fold. Fold in the shift's LHS and RHS into | |||
| 6618 | // the instruction. | |||
| 6619 | MachineOperand &ShiftLHS = ShiftInst->getOperand(1); | |||
| 6620 | Register ShiftReg = ShiftLHS.getReg(); | |||
| 6621 | ||||
| 6622 | unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits(); | |||
| 6623 | unsigned Val = *Immed & (NumBits - 1); | |||
| 6624 | unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val); | |||
| 6625 | ||||
| 6626 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); }, | |||
| 6627 | [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}}; | |||
| 6628 | } | |||
| 6629 | ||||
| 6630 | AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( | |||
| 6631 | MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const { | |||
| 6632 | unsigned Opc = MI.getOpcode(); | |||
| 6633 | ||||
| 6634 | // Handle explicit extend instructions first. | |||
| 6635 | if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { | |||
| 6636 | unsigned Size; | |||
| 6637 | if (Opc == TargetOpcode::G_SEXT) | |||
| 6638 | Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); | |||
| 6639 | else | |||
| 6640 | Size = MI.getOperand(2).getImm(); | |||
| 6641 | assert(Size != 64 && "Extend from 64 bits?")(static_cast <bool> (Size != 64 && "Extend from 64 bits?" ) ? void (0) : __assert_fail ("Size != 64 && \"Extend from 64 bits?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6641, __extension__ __PRETTY_FUNCTION__)); | |||
| 6642 | switch (Size) { | |||
| 6643 | case 8: | |||
| 6644 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB; | |||
| 6645 | case 16: | |||
| 6646 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH; | |||
| 6647 | case 32: | |||
| 6648 | return AArch64_AM::SXTW; | |||
| 6649 | default: | |||
| 6650 | return AArch64_AM::InvalidShiftExtend; | |||
| 6651 | } | |||
| 6652 | } | |||
| 6653 | ||||
| 6654 | if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { | |||
| 6655 | unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); | |||
| 6656 | assert(Size != 64 && "Extend from 64 bits?")(static_cast <bool> (Size != 64 && "Extend from 64 bits?" ) ? void (0) : __assert_fail ("Size != 64 && \"Extend from 64 bits?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6656, __extension__ __PRETTY_FUNCTION__)); | |||
| 6657 | switch (Size) { | |||
| 6658 | case 8: | |||
| 6659 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB; | |||
| 6660 | case 16: | |||
| 6661 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH; | |||
| 6662 | case 32: | |||
| 6663 | return AArch64_AM::UXTW; | |||
| 6664 | default: | |||
| 6665 | return AArch64_AM::InvalidShiftExtend; | |||
| 6666 | } | |||
| 6667 | } | |||
| 6668 | ||||
| 6669 | // Don't have an explicit extend. Try to handle a G_AND with a constant mask | |||
| 6670 | // on the RHS. | |||
| 6671 | if (Opc != TargetOpcode::G_AND) | |||
| 6672 | return AArch64_AM::InvalidShiftExtend; | |||
| 6673 | ||||
| 6674 | std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2)); | |||
| 6675 | if (!MaybeAndMask) | |||
| 6676 | return AArch64_AM::InvalidShiftExtend; | |||
| 6677 | uint64_t AndMask = *MaybeAndMask; | |||
| 6678 | switch (AndMask) { | |||
| 6679 | default: | |||
| 6680 | return AArch64_AM::InvalidShiftExtend; | |||
| 6681 | case 0xFF: | |||
| 6682 | return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; | |||
| 6683 | case 0xFFFF: | |||
| 6684 | return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; | |||
| 6685 | case 0xFFFFFFFF: | |||
| 6686 | return AArch64_AM::UXTW; | |||
| 6687 | } | |||
| 6688 | } | |||
| 6689 | ||||
| 6690 | Register AArch64InstructionSelector::moveScalarRegClass( | |||
| 6691 | Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const { | |||
| 6692 | MachineRegisterInfo &MRI = *MIB.getMRI(); | |||
| 6693 | auto Ty = MRI.getType(Reg); | |||
| 6694 | assert(!Ty.isVector() && "Expected scalars only!")(static_cast <bool> (!Ty.isVector() && "Expected scalars only!" ) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalars only!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6694, __extension__ __PRETTY_FUNCTION__)); | |||
| 6695 | if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC)) | |||
| 6696 | return Reg; | |||
| 6697 | ||||
| 6698 | // Create a copy and immediately select it. | |||
| 6699 | // FIXME: We should have an emitCopy function? | |||
| 6700 | auto Copy = MIB.buildCopy({&RC}, {Reg}); | |||
| 6701 | selectCopy(*Copy, TII, MRI, TRI, RBI); | |||
| 6702 | return Copy.getReg(0); | |||
| 6703 | } | |||
| 6704 | ||||
| 6705 | /// Select an "extended register" operand. This operand folds in an extend | |||
| 6706 | /// followed by an optional left shift. | |||
| 6707 | InstructionSelector::ComplexRendererFns | |||
| 6708 | AArch64InstructionSelector::selectArithExtendedRegister( | |||
| 6709 | MachineOperand &Root) const { | |||
| 6710 | if (!Root.isReg()) | |||
| 6711 | return std::nullopt; | |||
| 6712 | MachineRegisterInfo &MRI = | |||
| 6713 | Root.getParent()->getParent()->getParent()->getRegInfo(); | |||
| 6714 | ||||
| 6715 | uint64_t ShiftVal = 0; | |||
| 6716 | Register ExtReg; | |||
| 6717 | AArch64_AM::ShiftExtendType Ext; | |||
| 6718 | MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI); | |||
| 6719 | if (!RootDef) | |||
| 6720 | return std::nullopt; | |||
| 6721 | ||||
| 6722 | if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI)) | |||
| 6723 | return std::nullopt; | |||
| 6724 | ||||
| 6725 | // Check if we can fold a shift and an extend. | |||
| 6726 | if (RootDef->getOpcode() == TargetOpcode::G_SHL) { | |||
| 6727 | // Look for a constant on the RHS of the shift. | |||
| 6728 | MachineOperand &RHS = RootDef->getOperand(2); | |||
| 6729 | std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS); | |||
| 6730 | if (!MaybeShiftVal) | |||
| 6731 | return std::nullopt; | |||
| 6732 | ShiftVal = *MaybeShiftVal; | |||
| 6733 | if (ShiftVal > 4) | |||
| 6734 | return std::nullopt; | |||
| 6735 | // Look for a valid extend instruction on the LHS of the shift. | |||
| 6736 | MachineOperand &LHS = RootDef->getOperand(1); | |||
| 6737 | MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI); | |||
| 6738 | if (!ExtDef) | |||
| 6739 | return std::nullopt; | |||
| 6740 | Ext = getExtendTypeForInst(*ExtDef, MRI); | |||
| 6741 | if (Ext == AArch64_AM::InvalidShiftExtend) | |||
| 6742 | return std::nullopt; | |||
| 6743 | ExtReg = ExtDef->getOperand(1).getReg(); | |||
| 6744 | } else { | |||
| 6745 | // Didn't get a shift. Try just folding an extend. | |||
| 6746 | Ext = getExtendTypeForInst(*RootDef, MRI); | |||
| 6747 | if (Ext == AArch64_AM::InvalidShiftExtend) | |||
| 6748 | return std::nullopt; | |||
| 6749 | ExtReg = RootDef->getOperand(1).getReg(); | |||
| 6750 | ||||
| 6751 | // If we have a 32 bit instruction which zeroes out the high half of a | |||
| 6752 | // register, we get an implicit zero extend for free. Check if we have one. | |||
| 6753 | // FIXME: We actually emit the extend right now even though we don't have | |||
| 6754 | // to. | |||
| 6755 | if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) { | |||
| 6756 | MachineInstr *ExtInst = MRI.getVRegDef(ExtReg); | |||
| 6757 | if (isDef32(*ExtInst)) | |||
| 6758 | return std::nullopt; | |||
| 6759 | } | |||
| 6760 | } | |||
| 6761 | ||||
| 6762 | // We require a GPR32 here. Narrow the ExtReg if needed using a subregister | |||
| 6763 | // copy. | |||
| 6764 | MachineIRBuilder MIB(*RootDef); | |||
| 6765 | ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB); | |||
| 6766 | ||||
| 6767 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, | |||
| 6768 | [=](MachineInstrBuilder &MIB) { | |||
| 6769 | MIB.addImm(getArithExtendImm(Ext, ShiftVal)); | |||
| 6770 | }}}; | |||
| 6771 | } | |||
| 6772 | ||||
| 6773 | void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, | |||
| 6774 | const MachineInstr &MI, | |||
| 6775 | int OpIdx) const { | |||
| 6776 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); | |||
| 6777 | assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6778, __extension__ __PRETTY_FUNCTION__)) | |||
| 6778 | "Expected G_CONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6778, __extension__ __PRETTY_FUNCTION__)); | |||
| 6779 | std::optional<int64_t> CstVal = | |||
| 6780 | getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI); | |||
| 6781 | assert(CstVal && "Expected constant value")(static_cast <bool> (CstVal && "Expected constant value" ) ? void (0) : __assert_fail ("CstVal && \"Expected constant value\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6781, __extension__ __PRETTY_FUNCTION__)); | |||
| 6782 | MIB.addImm(*CstVal); | |||
| 6783 | } | |||
| 6784 | ||||
| 6785 | void AArch64InstructionSelector::renderLogicalImm32( | |||
| 6786 | MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { | |||
| 6787 | assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6788, __extension__ __PRETTY_FUNCTION__)) | |||
| 6788 | "Expected G_CONSTANT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6788, __extension__ __PRETTY_FUNCTION__)); | |||
| 6789 | uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); | |||
| 6790 | uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32); | |||
| 6791 | MIB.addImm(Enc); | |||
| 6792 | } | |||
| 6793 | ||||
| 6794 | void AArch64InstructionSelector::renderLogicalImm64( | |||
| 6795 | MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { | |||
| 6796 | assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6797, __extension__ __PRETTY_FUNCTION__)) | |||
| 6797 | "Expected G_CONSTANT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6797, __extension__ __PRETTY_FUNCTION__)); | |||
| 6798 | uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); | |||
| 6799 | uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64); | |||
| 6800 | MIB.addImm(Enc); | |||
| 6801 | } | |||
| 6802 | ||||
| 6803 | void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB, | |||
| 6804 | const MachineInstr &MI, | |||
| 6805 | int OpIdx) const { | |||
| 6806 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6807, __extension__ __PRETTY_FUNCTION__)) | |||
| 6807 | "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6807, __extension__ __PRETTY_FUNCTION__)); | |||
| 6808 | MIB.addImm( | |||
| 6809 | AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF())); | |||
| 6810 | } | |||
| 6811 | ||||
| 6812 | void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB, | |||
| 6813 | const MachineInstr &MI, | |||
| 6814 | int OpIdx) const { | |||
| 6815 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6816, __extension__ __PRETTY_FUNCTION__)) | |||
| 6816 | "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6816, __extension__ __PRETTY_FUNCTION__)); | |||
| 6817 | MIB.addImm( | |||
| 6818 | AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF())); | |||
| 6819 | } | |||
| 6820 | ||||
| 6821 | void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB, | |||
| 6822 | const MachineInstr &MI, | |||
| 6823 | int OpIdx) const { | |||
| 6824 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6825, __extension__ __PRETTY_FUNCTION__)) | |||
| 6825 | "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6825, __extension__ __PRETTY_FUNCTION__)); | |||
| 6826 | MIB.addImm( | |||
| 6827 | AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF())); | |||
| 6828 | } | |||
| 6829 | ||||
| 6830 | void AArch64InstructionSelector::renderFPImm32SIMDModImmType4( | |||
| 6831 | MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { | |||
| 6832 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6833, __extension__ __PRETTY_FUNCTION__)) | |||
| 6833 | "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6833, __extension__ __PRETTY_FUNCTION__)); | |||
| 6834 | MIB.addImm(AArch64_AM::encodeAdvSIMDModImmType4(MI.getOperand(1) | |||
| 6835 | .getFPImm() | |||
| 6836 | ->getValueAPF() | |||
| 6837 | .bitcastToAPInt() | |||
| 6838 | .getZExtValue())); | |||
| 6839 | } | |||
| 6840 | ||||
| 6841 | bool AArch64InstructionSelector::isLoadStoreOfNumBytes( | |||
| 6842 | const MachineInstr &MI, unsigned NumBytes) const { | |||
| 6843 | if (!MI.mayLoadOrStore()) | |||
| 6844 | return false; | |||
| 6845 | assert(MI.hasOneMemOperand() &&(static_cast <bool> (MI.hasOneMemOperand() && "Expected load/store to have only one mem op!" ) ? void (0) : __assert_fail ("MI.hasOneMemOperand() && \"Expected load/store to have only one mem op!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6846, __extension__ __PRETTY_FUNCTION__)) | |||
| 6846 | "Expected load/store to have only one mem op!")(static_cast <bool> (MI.hasOneMemOperand() && "Expected load/store to have only one mem op!" ) ? void (0) : __assert_fail ("MI.hasOneMemOperand() && \"Expected load/store to have only one mem op!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6846, __extension__ __PRETTY_FUNCTION__)); | |||
| 6847 | return (*MI.memoperands_begin())->getSize() == NumBytes; | |||
| 6848 | } | |||
| 6849 | ||||
| 6850 | bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { | |||
| 6851 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); | |||
| 6852 | if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32) | |||
| 6853 | return false; | |||
| 6854 | ||||
| 6855 | // Only return true if we know the operation will zero-out the high half of | |||
| 6856 | // the 64-bit register. Truncates can be subregister copies, which don't | |||
| 6857 | // zero out the high bits. Copies and other copy-like instructions can be | |||
| 6858 | // fed by truncates, or could be lowered as subregister copies. | |||
| 6859 | switch (MI.getOpcode()) { | |||
| 6860 | default: | |||
| 6861 | return true; | |||
| 6862 | case TargetOpcode::COPY: | |||
| 6863 | case TargetOpcode::G_BITCAST: | |||
| 6864 | case TargetOpcode::G_TRUNC: | |||
| 6865 | case TargetOpcode::G_PHI: | |||
| 6866 | return false; | |||
| 6867 | } | |||
| 6868 | } | |||
| 6869 | ||||
| 6870 | ||||
| 6871 | // Perform fixups on the given PHI instruction's operands to force them all | |||
| 6872 | // to be the same as the destination regbank. | |||
| 6873 | static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, | |||
| 6874 | const AArch64RegisterBankInfo &RBI) { | |||
| 6875 | assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_PHI && \"Expected a G_PHI\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6875, __extension__ __PRETTY_FUNCTION__)); | |||
| 6876 | Register DstReg = MI.getOperand(0).getReg(); | |||
| 6877 | const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg); | |||
| 6878 | assert(DstRB && "Expected PHI dst to have regbank assigned")(static_cast <bool> (DstRB && "Expected PHI dst to have regbank assigned" ) ? void (0) : __assert_fail ("DstRB && \"Expected PHI dst to have regbank assigned\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6878, __extension__ __PRETTY_FUNCTION__)); | |||
| 6879 | MachineIRBuilder MIB(MI); | |||
| 6880 | ||||
| 6881 | // Go through each operand and ensure it has the same regbank. | |||
| 6882 | for (MachineOperand &MO : llvm::drop_begin(MI.operands())) { | |||
| 6883 | if (!MO.isReg()) | |||
| 6884 | continue; | |||
| 6885 | Register OpReg = MO.getReg(); | |||
| 6886 | const RegisterBank *RB = MRI.getRegBankOrNull(OpReg); | |||
| 6887 | if (RB != DstRB) { | |||
| 6888 | // Insert a cross-bank copy. | |||
| 6889 | auto *OpDef = MRI.getVRegDef(OpReg); | |||
| 6890 | const LLT &Ty = MRI.getType(OpReg); | |||
| 6891 | MachineBasicBlock &OpDefBB = *OpDef->getParent(); | |||
| 6892 | ||||
| 6893 | // Any instruction we insert must appear after all PHIs in the block | |||
| 6894 | // for the block to be valid MIR. | |||
| 6895 | MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator()); | |||
| 6896 | if (InsertPt != OpDefBB.end() && InsertPt->isPHI()) | |||
| 6897 | InsertPt = OpDefBB.getFirstNonPHI(); | |||
| 6898 | MIB.setInsertPt(*OpDef->getParent(), InsertPt); | |||
| 6899 | auto Copy = MIB.buildCopy(Ty, OpReg); | |||
| 6900 | MRI.setRegBank(Copy.getReg(0), *DstRB); | |||
| 6901 | MO.setReg(Copy.getReg(0)); | |||
| 6902 | } | |||
| 6903 | } | |||
| 6904 | } | |||
| 6905 | ||||
| 6906 | void AArch64InstructionSelector::processPHIs(MachineFunction &MF) { | |||
| 6907 | // We're looking for PHIs, build a list so we don't invalidate iterators. | |||
| 6908 | MachineRegisterInfo &MRI = MF.getRegInfo(); | |||
| 6909 | SmallVector<MachineInstr *, 32> Phis; | |||
| 6910 | for (auto &BB : MF) { | |||
| 6911 | for (auto &MI : BB) { | |||
| 6912 | if (MI.getOpcode() == TargetOpcode::G_PHI) | |||
| 6913 | Phis.emplace_back(&MI); | |||
| 6914 | } | |||
| 6915 | } | |||
| 6916 | ||||
| 6917 | for (auto *MI : Phis) { | |||
| 6918 | // We need to do some work here if the operand types are < 16 bit and they | |||
| 6919 | // are split across fpr/gpr banks. Since all types <32b on gpr | |||
| 6920 | // end up being assigned gpr32 regclasses, we can end up with PHIs here | |||
| 6921 | // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't | |||
| 6922 | // be selecting heterogenous regbanks for operands if possible, but we | |||
| 6923 | // still need to be able to deal with it here. | |||
| 6924 | // | |||
| 6925 | // To fix this, if we have a gpr-bank operand < 32b in size and at least | |||
| 6926 | // one other operand is on the fpr bank, then we add cross-bank copies | |||
| 6927 | // to homogenize the operand banks. For simplicity the bank that we choose | |||
| 6928 | // to settle on is whatever bank the def operand has. For example: | |||
| 6929 | // | |||
| 6930 | // %endbb: | |||
| 6931 | // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2 | |||
| 6932 | // => | |||
| 6933 | // %bb2: | |||
| 6934 | // ... | |||
| 6935 | // %in2_copy:gpr(s16) = COPY %in2:fpr(s16) | |||
| 6936 | // ... | |||
| 6937 | // %endbb: | |||
| 6938 | // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2 | |||
| 6939 | bool HasGPROp = false, HasFPROp = false; | |||
| 6940 | for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) { | |||
| 6941 | if (!MO.isReg()) | |||
| 6942 | continue; | |||
| 6943 | const LLT &Ty = MRI.getType(MO.getReg()); | |||
| 6944 | if (!Ty.isValid() || !Ty.isScalar()) | |||
| 6945 | break; | |||
| 6946 | if (Ty.getSizeInBits() >= 32) | |||
| 6947 | break; | |||
| 6948 | const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()); | |||
| 6949 | // If for some reason we don't have a regbank yet. Don't try anything. | |||
| 6950 | if (!RB) | |||
| 6951 | break; | |||
| 6952 | ||||
| 6953 | if (RB->getID() == AArch64::GPRRegBankID) | |||
| 6954 | HasGPROp = true; | |||
| 6955 | else | |||
| 6956 | HasFPROp = true; | |||
| 6957 | } | |||
| 6958 | // We have heterogenous regbanks, need to fixup. | |||
| 6959 | if (HasGPROp && HasFPROp) | |||
| 6960 | fixupPHIOpBanks(*MI, MRI, RBI); | |||
| 6961 | } | |||
| 6962 | } | |||
| 6963 | ||||
| 6964 | namespace llvm { | |||
| 6965 | InstructionSelector * | |||
| 6966 | createAArch64InstructionSelector(const AArch64TargetMachine &TM, | |||
| 6967 | AArch64Subtarget &Subtarget, | |||
| 6968 | AArch64RegisterBankInfo &RBI) { | |||
| 6969 | return new AArch64InstructionSelector(TM, Subtarget, RBI); | |||
| 6970 | } | |||
| 6971 | } |
| 1 | //===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains some functions that are useful for math stuff. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #ifndef LLVM_SUPPORT_MATHEXTRAS_H |
| 14 | #define LLVM_SUPPORT_MATHEXTRAS_H |
| 15 | |
| 16 | #include "llvm/ADT/bit.h" |
| 17 | #include "llvm/Support/Compiler.h" |
| 18 | #include <cassert> |
| 19 | #include <climits> |
| 20 | #include <cstdint> |
| 21 | #include <cstring> |
| 22 | #include <limits> |
| 23 | #include <type_traits> |
| 24 | |
| 25 | namespace llvm { |
| 26 | |
| 27 | /// Mathematical constants. |
| 28 | namespace numbers { |
| 29 | // TODO: Track C++20 std::numbers. |
| 30 | // TODO: Favor using the hexadecimal FP constants (requires C++17). |
| 31 | constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113 |
| 32 | egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620 |
| 33 | ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162 |
| 34 | ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392 |
| 35 | log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0) |
| 36 | log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2) |
| 37 | pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796 |
| 38 | inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541 |
| 39 | sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161 |
| 40 | inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197 |
| 41 | sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219 |
| 42 | inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1) |
| 43 | sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194 |
| 44 | inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1) |
| 45 | phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622 |
| 46 | constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113 |
| 47 | egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620 |
| 48 | ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162 |
| 49 | ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392 |
| 50 | log2ef = 1.44269504F, // (0x1.715476P+0) |
| 51 | log10ef = .434294482F, // (0x1.bcb7b2P-2) |
| 52 | pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796 |
| 53 | inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541 |
| 54 | sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161 |
| 55 | inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197 |
| 56 | sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193 |
| 57 | inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1) |
| 58 | sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194 |
| 59 | inv_sqrt3f = .577350269F, // (0x1.279a74P-1) |
| 60 | phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622 |
| 61 | } // namespace numbers |
| 62 | |
| 63 | /// Count number of 0's from the least significant bit to the most |
| 64 | /// stopping at the first 1. |
| 65 | /// |
| 66 | /// Only unsigned integral types are allowed. |
| 67 | /// |
| 68 | /// Returns std::numeric_limits<T>::digits on an input of 0. |
| 69 | template <typename T> |
| 70 | LLVM_DEPRECATED("Use llvm::countr_zero instead.", "llvm::countr_zero")__attribute__((deprecated("Use llvm::countr_zero instead.", "llvm::countr_zero" ))) |
| 71 | unsigned countTrailingZeros(T Val) { |
| 72 | static_assert(std::is_unsigned_v<T>, |
| 73 | "Only unsigned integral types are allowed."); |
| 74 | return llvm::countr_zero(Val); |
| 75 | } |
| 76 | |
| 77 | /// Count number of 0's from the most significant bit to the least |
| 78 | /// stopping at the first 1. |
| 79 | /// |
| 80 | /// Only unsigned integral types are allowed. |
| 81 | /// |
| 82 | /// Returns std::numeric_limits<T>::digits on an input of 0. |
| 83 | template <typename T> |
| 84 | LLVM_DEPRECATED("Use llvm::countl_zero instead.", "llvm::countl_zero")__attribute__((deprecated("Use llvm::countl_zero instead.", "llvm::countl_zero" ))) |
| 85 | unsigned countLeadingZeros(T Val) { |
| 86 | static_assert(std::is_unsigned_v<T>, |
| 87 | "Only unsigned integral types are allowed."); |
| 88 | return llvm::countl_zero(Val); |
| 89 | } |
| 90 | |
| 91 | /// Create a bitmask with the N right-most bits set to 1, and all other |
| 92 | /// bits set to 0. Only unsigned types are allowed. |
| 93 | template <typename T> T maskTrailingOnes(unsigned N) { |
| 94 | static_assert(std::is_unsigned_v<T>, "Invalid type!"); |
| 95 | const unsigned Bits = CHAR_BIT8 * sizeof(T); |
| 96 | assert(N <= Bits && "Invalid bit index")(static_cast <bool> (N <= Bits && "Invalid bit index" ) ? void (0) : __assert_fail ("N <= Bits && \"Invalid bit index\"" , "llvm/include/llvm/Support/MathExtras.h", 96, __extension__ __PRETTY_FUNCTION__)); |
| 97 | return N == 0 ? 0 : (T(-1) >> (Bits - N)); |
| 98 | } |
| 99 | |
| 100 | /// Create a bitmask with the N left-most bits set to 1, and all other |
| 101 | /// bits set to 0. Only unsigned types are allowed. |
| 102 | template <typename T> T maskLeadingOnes(unsigned N) { |
| 103 | return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N); |
| 104 | } |
| 105 | |
| 106 | /// Create a bitmask with the N right-most bits set to 0, and all other |
| 107 | /// bits set to 1. Only unsigned types are allowed. |
| 108 | template <typename T> T maskTrailingZeros(unsigned N) { |
| 109 | return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N); |
| 110 | } |
| 111 | |
| 112 | /// Create a bitmask with the N left-most bits set to 0, and all other |
| 113 | /// bits set to 1. Only unsigned types are allowed. |
| 114 | template <typename T> T maskLeadingZeros(unsigned N) { |
| 115 | return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N); |
| 116 | } |
| 117 | |
| 118 | /// Macro compressed bit reversal table for 256 bits. |
| 119 | /// |
| 120 | /// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable |
| 121 | static const unsigned char BitReverseTable256[256] = { |
| 122 | #define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64 |
| 123 | #define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16) |
| 124 | #define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4) |
| 125 | R6(0), R6(2), R6(1), R6(3) |
| 126 | #undef R2 |
| 127 | #undef R4 |
| 128 | #undef R6 |
| 129 | }; |
| 130 | |
| 131 | /// Reverse the bits in \p Val. |
| 132 | template <typename T> T reverseBits(T Val) { |
| 133 | #if __has_builtin(__builtin_bitreverse8)1 |
| 134 | if constexpr (std::is_same_v<T, uint8_t>) |
| 135 | return __builtin_bitreverse8(Val); |
| 136 | #endif |
| 137 | #if __has_builtin(__builtin_bitreverse16)1 |
| 138 | if constexpr (std::is_same_v<T, uint16_t>) |
| 139 | return __builtin_bitreverse16(Val); |
| 140 | #endif |
| 141 | #if __has_builtin(__builtin_bitreverse32)1 |
| 142 | if constexpr (std::is_same_v<T, uint32_t>) |
| 143 | return __builtin_bitreverse32(Val); |
| 144 | #endif |
| 145 | #if __has_builtin(__builtin_bitreverse64)1 |
| 146 | if constexpr (std::is_same_v<T, uint64_t>) |
| 147 | return __builtin_bitreverse64(Val); |
| 148 | #endif |
| 149 | |
| 150 | unsigned char in[sizeof(Val)]; |
| 151 | unsigned char out[sizeof(Val)]; |
| 152 | std::memcpy(in, &Val, sizeof(Val)); |
| 153 | for (unsigned i = 0; i < sizeof(Val); ++i) |
| 154 | out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]]; |
| 155 | std::memcpy(&Val, out, sizeof(Val)); |
| 156 | return Val; |
| 157 | } |
| 158 | |
| 159 | // NOTE: The following support functions use the _32/_64 extensions instead of |
| 160 | // type overloading so that signed and unsigned integers can be used without |
| 161 | // ambiguity. |
| 162 | |
| 163 | /// Return the high 32 bits of a 64 bit value. |
| 164 | constexpr inline uint32_t Hi_32(uint64_t Value) { |
| 165 | return static_cast<uint32_t>(Value >> 32); |
| 166 | } |
| 167 | |
| 168 | /// Return the low 32 bits of a 64 bit value. |
| 169 | constexpr inline uint32_t Lo_32(uint64_t Value) { |
| 170 | return static_cast<uint32_t>(Value); |
| 171 | } |
| 172 | |
| 173 | /// Make a 64-bit integer from a high / low pair of 32-bit integers. |
| 174 | constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) { |
| 175 | return ((uint64_t)High << 32) | (uint64_t)Low; |
| 176 | } |
| 177 | |
| 178 | /// Checks if an integer fits into the given bit width. |
| 179 | template <unsigned N> constexpr inline bool isInt(int64_t x) { |
| 180 | if constexpr (N == 8) |
| 181 | return static_cast<int8_t>(x) == x; |
| 182 | if constexpr (N == 16) |
| 183 | return static_cast<int16_t>(x) == x; |
| 184 | if constexpr (N == 32) |
| 185 | return static_cast<int32_t>(x) == x; |
| 186 | if constexpr (N < 64) |
| 187 | return -(INT64_C(1)1L << (N - 1)) <= x && x < (INT64_C(1)1L << (N - 1)); |
| 188 | (void)x; // MSVC v19.25 warns that x is unused. |
| 189 | return true; |
| 190 | } |
| 191 | |
| 192 | /// Checks if a signed integer is an N bit number shifted left by S. |
| 193 | template <unsigned N, unsigned S> |
| 194 | constexpr inline bool isShiftedInt(int64_t x) { |
| 195 | static_assert( |
| 196 | N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number."); |
| 197 | static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide."); |
| 198 | return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0); |
| 199 | } |
| 200 | |
| 201 | /// Checks if an unsigned integer fits into the given bit width. |
| 202 | template <unsigned N> constexpr inline bool isUInt(uint64_t x) { |
| 203 | static_assert(N > 0, "isUInt<0> doesn't make sense"); |
| 204 | if constexpr (N == 8) |
| 205 | return static_cast<uint8_t>(x) == x; |
| 206 | if constexpr (N == 16) |
| 207 | return static_cast<uint16_t>(x) == x; |
| 208 | if constexpr (N == 32) |
| 209 | return static_cast<uint32_t>(x) == x; |
| 210 | if constexpr (N < 64) |
| 211 | return x < (UINT64_C(1)1UL << (N)); |
| 212 | (void)x; // MSVC v19.25 warns that x is unused. |
| 213 | return true; |
| 214 | } |
| 215 | |
| 216 | /// Checks if a unsigned integer is an N bit number shifted left by S. |
| 217 | template <unsigned N, unsigned S> |
| 218 | constexpr inline bool isShiftedUInt(uint64_t x) { |
| 219 | static_assert( |
| 220 | N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)"); |
| 221 | static_assert(N + S <= 64, |
| 222 | "isShiftedUInt<N, S> with N + S > 64 is too wide."); |
| 223 | // Per the two static_asserts above, S must be strictly less than 64. So |
| 224 | // 1 << S is not undefined behavior. |
| 225 | return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0); |
| 226 | } |
| 227 | |
| 228 | /// Gets the maximum value for a N-bit unsigned integer. |
| 229 | inline uint64_t maxUIntN(uint64_t N) { |
| 230 | assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 && "integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "llvm/include/llvm/Support/MathExtras.h", 230, __extension__ __PRETTY_FUNCTION__)); |
| 231 | |
| 232 | // uint64_t(1) << 64 is undefined behavior, so we can't do |
| 233 | // (uint64_t(1) << N) - 1 |
| 234 | // without checking first that N != 64. But this works and doesn't have a |
| 235 | // branch. |
| 236 | return UINT64_MAX(18446744073709551615UL) >> (64 - N); |
| 237 | } |
| 238 | |
| 239 | /// Gets the minimum value for a N-bit signed integer. |
| 240 | inline int64_t minIntN(int64_t N) { |
| 241 | assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 && "integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "llvm/include/llvm/Support/MathExtras.h", 241, __extension__ __PRETTY_FUNCTION__)); |
| 242 | |
| 243 | return UINT64_C(1)1UL + ~(UINT64_C(1)1UL << (N - 1)); |
| 244 | } |
| 245 | |
| 246 | /// Gets the maximum value for a N-bit signed integer. |
| 247 | inline int64_t maxIntN(int64_t N) { |
| 248 | assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 && "integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "llvm/include/llvm/Support/MathExtras.h", 248, __extension__ __PRETTY_FUNCTION__)); |
| 249 | |
| 250 | // This relies on two's complement wraparound when N == 64, so we convert to |
| 251 | // int64_t only at the very end to avoid UB. |
| 252 | return (UINT64_C(1)1UL << (N - 1)) - 1; |
| 253 | } |
| 254 | |
| 255 | /// Checks if an unsigned integer fits into the given (dynamic) bit width. |
| 256 | inline bool isUIntN(unsigned N, uint64_t x) { |
| 257 | return N >= 64 || x <= maxUIntN(N); |
| 258 | } |
| 259 | |
| 260 | /// Checks if an signed integer fits into the given (dynamic) bit width. |
| 261 | inline bool isIntN(unsigned N, int64_t x) { |
| 262 | return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N)); |
| 263 | } |
| 264 | |
| 265 | /// Return true if the argument is a non-empty sequence of ones starting at the |
| 266 | /// least significant bit with the remainder zero (32 bit version). |
| 267 | /// Ex. isMask_32(0x0000FFFFU) == true. |
| 268 | constexpr inline bool isMask_32(uint32_t Value) { |
| 269 | return Value && ((Value + 1) & Value) == 0; |
| 270 | } |
| 271 | |
| 272 | /// Return true if the argument is a non-empty sequence of ones starting at the |
| 273 | /// least significant bit with the remainder zero (64 bit version). |
| 274 | constexpr inline bool isMask_64(uint64_t Value) { |
| 275 | return Value && ((Value + 1) & Value) == 0; |
| 276 | } |
| 277 | |
| 278 | /// Return true if the argument contains a non-empty sequence of ones with the |
| 279 | /// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true. |
| 280 | constexpr inline bool isShiftedMask_32(uint32_t Value) { |
| 281 | return Value && isMask_32((Value - 1) | Value); |
| 282 | } |
| 283 | |
| 284 | /// Return true if the argument contains a non-empty sequence of ones with the |
| 285 | /// remainder zero (64 bit version.) |
| 286 | constexpr inline bool isShiftedMask_64(uint64_t Value) { |
| 287 | return Value && isMask_64((Value - 1) | Value); |
| 288 | } |
| 289 | |
| 290 | /// Return true if the argument is a power of two > 0. |
| 291 | /// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.) |
| 292 | constexpr inline bool isPowerOf2_32(uint32_t Value) { |
| 293 | return llvm::has_single_bit(Value); |
| 294 | } |
| 295 | |
| 296 | /// Return true if the argument is a power of two > 0 (64 bit edition.) |
| 297 | constexpr inline bool isPowerOf2_64(uint64_t Value) { |
| 298 | return llvm::has_single_bit(Value); |
| 299 | } |
| 300 | |
| 301 | /// Count the number of ones from the most significant bit to the first |
| 302 | /// zero bit. |
| 303 | /// |
| 304 | /// Ex. countLeadingOnes(0xFF0FFF00) == 8. |
| 305 | /// Only unsigned integral types are allowed. |
| 306 | /// |
| 307 | /// Returns std::numeric_limits<T>::digits on an input of all ones. |
| 308 | template <typename T> |
| 309 | LLVM_DEPRECATED("Use llvm::countl_one instead.", "llvm::countl_one")__attribute__((deprecated("Use llvm::countl_one instead.", "llvm::countl_one" ))) |
| 310 | unsigned countLeadingOnes(T Value) { |
| 311 | static_assert(std::is_unsigned_v<T>, |
| 312 | "Only unsigned integral types are allowed."); |
| 313 | return llvm::countl_one<T>(Value); |
| 314 | } |
| 315 | |
| 316 | /// Count the number of ones from the least significant bit to the first |
| 317 | /// zero bit. |
| 318 | /// |
| 319 | /// Ex. countTrailingOnes(0x00FF00FF) == 8. |
| 320 | /// Only unsigned integral types are allowed. |
| 321 | /// |
| 322 | /// Returns std::numeric_limits<T>::digits on an input of all ones. |
| 323 | template <typename T> |
| 324 | LLVM_DEPRECATED("Use llvm::countr_one instead.", "llvm::countr_one")__attribute__((deprecated("Use llvm::countr_one instead.", "llvm::countr_one" ))) |
| 325 | unsigned countTrailingOnes(T Value) { |
| 326 | static_assert(std::is_unsigned_v<T>, |
| 327 | "Only unsigned integral types are allowed."); |
| 328 | return llvm::countr_one<T>(Value); |
| 329 | } |
| 330 | |
| 331 | /// Count the number of set bits in a value. |
| 332 | /// Ex. countPopulation(0xF000F000) = 8 |
| 333 | /// Returns 0 if the word is zero. |
| 334 | template <typename T> |
| 335 | LLVM_DEPRECATED("Use llvm::popcount instead.", "llvm::popcount")__attribute__((deprecated("Use llvm::popcount instead.", "llvm::popcount" ))) |
| 336 | inline unsigned countPopulation(T Value) { |
| 337 | static_assert(std::is_unsigned_v<T>, |
| 338 | "Only unsigned integral types are allowed."); |
| 339 | return (unsigned)llvm::popcount(Value); |
| 340 | } |
| 341 | |
| 342 | /// Return true if the argument contains a non-empty sequence of ones with the |
| 343 | /// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true. |
| 344 | /// If true, \p MaskIdx will specify the index of the lowest set bit and \p |
| 345 | /// MaskLen is updated to specify the length of the mask, else neither are |
| 346 | /// updated. |
| 347 | inline bool isShiftedMask_32(uint32_t Value, unsigned &MaskIdx, |
| 348 | unsigned &MaskLen) { |
| 349 | if (!isShiftedMask_32(Value)) |
| 350 | return false; |
| 351 | MaskIdx = llvm::countr_zero(Value); |
| 352 | MaskLen = llvm::popcount(Value); |
| 353 | return true; |
| 354 | } |
| 355 | |
| 356 | /// Return true if the argument contains a non-empty sequence of ones with the |
| 357 | /// remainder zero (64 bit version.) If true, \p MaskIdx will specify the index |
| 358 | /// of the lowest set bit and \p MaskLen is updated to specify the length of the |
| 359 | /// mask, else neither are updated. |
| 360 | inline bool isShiftedMask_64(uint64_t Value, unsigned &MaskIdx, |
| 361 | unsigned &MaskLen) { |
| 362 | if (!isShiftedMask_64(Value)) |
| 363 | return false; |
| 364 | MaskIdx = llvm::countr_zero(Value); |
| 365 | MaskLen = llvm::popcount(Value); |
| 366 | return true; |
| 367 | } |
| 368 | |
| 369 | /// Compile time Log2. |
| 370 | /// Valid only for positive powers of two. |
| 371 | template <size_t kValue> constexpr inline size_t CTLog2() { |
| 372 | static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue), |
| 373 | "Value is not a valid power of 2"); |
| 374 | return 1 + CTLog2<kValue / 2>(); |
| 375 | } |
| 376 | |
| 377 | template <> constexpr inline size_t CTLog2<1>() { return 0; } |
| 378 | |
| 379 | /// Return the floor log base 2 of the specified value, -1 if the value is zero. |
| 380 | /// (32 bit edition.) |
| 381 | /// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2 |
| 382 | inline unsigned Log2_32(uint32_t Value) { |
| 383 | return 31 - llvm::countl_zero(Value); |
| 384 | } |
| 385 | |
| 386 | /// Return the floor log base 2 of the specified value, -1 if the value is zero. |
| 387 | /// (64 bit edition.) |
| 388 | inline unsigned Log2_64(uint64_t Value) { |
| 389 | return 63 - llvm::countl_zero(Value); |
| 390 | } |
| 391 | |
| 392 | /// Return the ceil log base 2 of the specified value, 32 if the value is zero. |
| 393 | /// (32 bit edition). |
| 394 | /// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3 |
| 395 | inline unsigned Log2_32_Ceil(uint32_t Value) { |
| 396 | return 32 - llvm::countl_zero(Value - 1); |
| 397 | } |
| 398 | |
| 399 | /// Return the ceil log base 2 of the specified value, 64 if the value is zero. |
| 400 | /// (64 bit edition.) |
| 401 | inline unsigned Log2_64_Ceil(uint64_t Value) { |
| 402 | return 64 - llvm::countl_zero(Value - 1); |
| 403 | } |
| 404 | |
| 405 | /// This function takes a 64-bit integer and returns the bit equivalent double. |
| 406 | LLVM_DEPRECATED("use llvm::bit_cast instead", "llvm::bit_cast<double>")__attribute__((deprecated("use llvm::bit_cast instead", "llvm::bit_cast<double>" ))) |
| 407 | inline double BitsToDouble(uint64_t Bits) { |
| 408 | static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); |
| 409 | return llvm::bit_cast<double>(Bits); |
| 410 | } |
| 411 | |
| 412 | /// This function takes a 32-bit integer and returns the bit equivalent float. |
| 413 | LLVM_DEPRECATED("use llvm::bit_cast instead", "llvm::bit_cast<float>")__attribute__((deprecated("use llvm::bit_cast instead", "llvm::bit_cast<float>" ))) |
| 414 | inline float BitsToFloat(uint32_t Bits) { |
| 415 | static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); |
| 416 | return llvm::bit_cast<float>(Bits); |
| 417 | } |
| 418 | |
| 419 | /// This function takes a double and returns the bit equivalent 64-bit integer. |
| 420 | /// Note that copying doubles around changes the bits of NaNs on some hosts, |
| 421 | /// notably x86, so this routine cannot be used if these bits are needed. |
| 422 | LLVM_DEPRECATED("use llvm::bit_cast instead", "llvm::bit_cast<uint64_t>")__attribute__((deprecated("use llvm::bit_cast instead", "llvm::bit_cast<uint64_t>" ))) |
| 423 | inline uint64_t DoubleToBits(double Double) { |
| 424 | static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); |
| 425 | return llvm::bit_cast<uint64_t>(Double); |
| 426 | } |
| 427 | |
| 428 | /// This function takes a float and returns the bit equivalent 32-bit integer. |
| 429 | /// Note that copying floats around changes the bits of NaNs on some hosts, |
| 430 | /// notably x86, so this routine cannot be used if these bits are needed. |
| 431 | LLVM_DEPRECATED("use llvm::bit_cast instead", "llvm::bit_cast<uint32_t>")__attribute__((deprecated("use llvm::bit_cast instead", "llvm::bit_cast<uint32_t>" ))) |
| 432 | inline uint32_t FloatToBits(float Float) { |
| 433 | static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); |
| 434 | return llvm::bit_cast<uint32_t>(Float); |
| 435 | } |
| 436 | |
| 437 | /// A and B are either alignments or offsets. Return the minimum alignment that |
| 438 | /// may be assumed after adding the two together. |
| 439 | constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) { |
| 440 | // The largest power of 2 that divides both A and B. |
| 441 | // |
| 442 | // Replace "-Value" by "1+~Value" in the following commented code to avoid |
| 443 | // MSVC warning C4146 |
| 444 | // return (A | B) & -(A | B); |
| 445 | return (A | B) & (1 + ~(A | B)); |
| 446 | } |
| 447 | |
| 448 | /// Returns the next power of two (in 64-bits) that is strictly greater than A. |
| 449 | /// Returns zero on overflow. |
| 450 | constexpr inline uint64_t NextPowerOf2(uint64_t A) { |
| 451 | A |= (A >> 1); |
| 452 | A |= (A >> 2); |
| 453 | A |= (A >> 4); |
| 454 | A |= (A >> 8); |
| 455 | A |= (A >> 16); |
| 456 | A |= (A >> 32); |
| 457 | return A + 1; |
| 458 | } |
| 459 | |
| 460 | /// Returns the power of two which is less than or equal to the given value. |
| 461 | /// Essentially, it is a floor operation across the domain of powers of two. |
| 462 | LLVM_DEPRECATED("use llvm::bit_floor instead", "llvm::bit_floor")__attribute__((deprecated("use llvm::bit_floor instead", "llvm::bit_floor" ))) |
| 463 | inline uint64_t PowerOf2Floor(uint64_t A) { |
| 464 | return llvm::bit_floor(A); |
| 465 | } |
| 466 | |
| 467 | /// Returns the power of two which is greater than or equal to the given value. |
| 468 | /// Essentially, it is a ceil operation across the domain of powers of two. |
| 469 | inline uint64_t PowerOf2Ceil(uint64_t A) { |
| 470 | if (!A) |
| 471 | return 0; |
| 472 | return NextPowerOf2(A - 1); |
| 473 | } |
| 474 | |
| 475 | /// Returns the next integer (mod 2**64) that is greater than or equal to |
| 476 | /// \p Value and is a multiple of \p Align. \p Align must be non-zero. |
| 477 | /// |
| 478 | /// Examples: |
| 479 | /// \code |
| 480 | /// alignTo(5, 8) = 8 |
| 481 | /// alignTo(17, 8) = 24 |
| 482 | /// alignTo(~0LL, 8) = 0 |
| 483 | /// alignTo(321, 255) = 510 |
| 484 | /// \endcode |
| 485 | inline uint64_t alignTo(uint64_t Value, uint64_t Align) { |
| 486 | assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0." ) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\"" , "llvm/include/llvm/Support/MathExtras.h", 486, __extension__ __PRETTY_FUNCTION__)); |
| 487 | return (Value + Align - 1) / Align * Align; |
| 488 | } |
| 489 | |
| 490 | inline uint64_t alignToPowerOf2(uint64_t Value, uint64_t Align) { |
| 491 | assert(Align != 0 && (Align & (Align - 1)) == 0 &&(static_cast <bool> (Align != 0 && (Align & (Align - 1)) == 0 && "Align must be a power of 2") ? void (0) : __assert_fail ("Align != 0 && (Align & (Align - 1)) == 0 && \"Align must be a power of 2\"" , "llvm/include/llvm/Support/MathExtras.h", 492, __extension__ __PRETTY_FUNCTION__)) |
| 492 | "Align must be a power of 2")(static_cast <bool> (Align != 0 && (Align & (Align - 1)) == 0 && "Align must be a power of 2") ? void (0) : __assert_fail ("Align != 0 && (Align & (Align - 1)) == 0 && \"Align must be a power of 2\"" , "llvm/include/llvm/Support/MathExtras.h", 492, __extension__ __PRETTY_FUNCTION__)); |
| 493 | return (Value + Align - 1) & -Align; |
| 494 | } |
| 495 | |
| 496 | /// If non-zero \p Skew is specified, the return value will be a minimal integer |
| 497 | /// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for |
| 498 | /// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p |
| 499 | /// Skew mod \p A'. \p Align must be non-zero. |
| 500 | /// |
| 501 | /// Examples: |
| 502 | /// \code |
| 503 | /// alignTo(5, 8, 7) = 7 |
| 504 | /// alignTo(17, 8, 1) = 17 |
| 505 | /// alignTo(~0LL, 8, 3) = 3 |
| 506 | /// alignTo(321, 255, 42) = 552 |
| 507 | /// \endcode |
| 508 | inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew) { |
| 509 | assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0." ) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\"" , "llvm/include/llvm/Support/MathExtras.h", 509, __extension__ __PRETTY_FUNCTION__)); |
| 510 | Skew %= Align; |
| 511 | return alignTo(Value - Skew, Align) + Skew; |
| 512 | } |
| 513 | |
| 514 | /// Returns the next integer (mod 2**64) that is greater than or equal to |
| 515 | /// \p Value and is a multiple of \c Align. \c Align must be non-zero. |
| 516 | template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) { |
| 517 | static_assert(Align != 0u, "Align must be non-zero"); |
| 518 | return (Value + Align - 1) / Align * Align; |
| 519 | } |
| 520 | |
| 521 | /// Returns the integer ceil(Numerator / Denominator). |
| 522 | inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) { |
| 523 | return alignTo(Numerator, Denominator) / Denominator; |
| 524 | } |
| 525 | |
| 526 | /// Returns the integer nearest(Numerator / Denominator). |
| 527 | inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) { |
| 528 | return (Numerator + (Denominator / 2)) / Denominator; |
| 529 | } |
| 530 | |
| 531 | /// Returns the largest uint64_t less than or equal to \p Value and is |
| 532 | /// \p Skew mod \p Align. \p Align must be non-zero |
| 533 | inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { |
| 534 | assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0." ) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\"" , "llvm/include/llvm/Support/MathExtras.h", 534, __extension__ __PRETTY_FUNCTION__)); |
| 535 | Skew %= Align; |
| 536 | return (Value - Skew) / Align * Align + Skew; |
| 537 | } |
| 538 | |
| 539 | /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. |
| 540 | /// Requires 0 < B <= 32. |
| 541 | template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) { |
| 542 | static_assert(B > 0, "Bit width can't be 0."); |
| 543 | static_assert(B <= 32, "Bit width out of range."); |
| 544 | return int32_t(X << (32 - B)) >> (32 - B); |
| 545 | } |
| 546 | |
| 547 | /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. |
| 548 | /// Requires 0 < B <= 32. |
| 549 | inline int32_t SignExtend32(uint32_t X, unsigned B) { |
| 550 | assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0." ) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\"" , "llvm/include/llvm/Support/MathExtras.h", 550, __extension__ __PRETTY_FUNCTION__)); |
| 551 | assert(B <= 32 && "Bit width out of range.")(static_cast <bool> (B <= 32 && "Bit width out of range." ) ? void (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\"" , "llvm/include/llvm/Support/MathExtras.h", 551, __extension__ __PRETTY_FUNCTION__)); |
| 552 | return int32_t(X << (32 - B)) >> (32 - B); |
| 553 | } |
| 554 | |
| 555 | /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. |
| 556 | /// Requires 0 < B <= 64. |
| 557 | template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) { |
| 558 | static_assert(B > 0, "Bit width can't be 0."); |
| 559 | static_assert(B <= 64, "Bit width out of range."); |
| 560 | return int64_t(x << (64 - B)) >> (64 - B); |
| 561 | } |
| 562 | |
| 563 | /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. |
| 564 | /// Requires 0 < B <= 64. |
| 565 | inline int64_t SignExtend64(uint64_t X, unsigned B) { |
| 566 | assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0." ) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\"" , "llvm/include/llvm/Support/MathExtras.h", 566, __extension__ __PRETTY_FUNCTION__)); |
| 567 | assert(B <= 64 && "Bit width out of range.")(static_cast <bool> (B <= 64 && "Bit width out of range." ) ? void (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\"" , "llvm/include/llvm/Support/MathExtras.h", 567, __extension__ __PRETTY_FUNCTION__)); |
| 568 | return int64_t(X << (64 - B)) >> (64 - B); |
| 569 | } |
| 570 | |
| 571 | /// Subtract two unsigned integers, X and Y, of type T and return the absolute |
| 572 | /// value of the result. |
| 573 | template <typename T> |
| 574 | std::enable_if_t<std::is_unsigned_v<T>, T> AbsoluteDifference(T X, T Y) { |
| 575 | return X > Y ? (X - Y) : (Y - X); |
| 576 | } |
| 577 | |
| 578 | /// Add two unsigned integers, X and Y, of type T. Clamp the result to the |
| 579 | /// maximum representable value of T on overflow. ResultOverflowed indicates if |
| 580 | /// the result is larger than the maximum representable value of type T. |
| 581 | template <typename T> |
| 582 | std::enable_if_t<std::is_unsigned_v<T>, T> |
| 583 | SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) { |
| 584 | bool Dummy; |
| 585 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |
| 586 | // Hacker's Delight, p. 29 |
| 587 | T Z = X + Y; |
| 588 | Overflowed = (Z < X || Z < Y); |
| 589 | if (Overflowed) |
| 590 | return std::numeric_limits<T>::max(); |
| 591 | else |
| 592 | return Z; |
| 593 | } |
| 594 | |
| 595 | /// Add multiple unsigned integers of type T. Clamp the result to the |
| 596 | /// maximum representable value of T on overflow. |
| 597 | template <class T, class... Ts> |
| 598 | std::enable_if_t<std::is_unsigned_v<T>, T> SaturatingAdd(T X, T Y, T Z, |
| 599 | Ts... Args) { |
| 600 | bool Overflowed = false; |
| 601 | T XY = SaturatingAdd(X, Y, &Overflowed); |
| 602 | if (Overflowed) |
| 603 | return SaturatingAdd(std::numeric_limits<T>::max(), T(1), Args...); |
| 604 | return SaturatingAdd(XY, Z, Args...); |
| 605 | } |
| 606 | |
| 607 | /// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the |
| 608 | /// maximum representable value of T on overflow. ResultOverflowed indicates if |
| 609 | /// the result is larger than the maximum representable value of type T. |
| 610 | template <typename T> |
| 611 | std::enable_if_t<std::is_unsigned_v<T>, T> |
| 612 | SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) { |
| 613 | bool Dummy; |
| 614 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |
| 615 | |
| 616 | // Hacker's Delight, p. 30 has a different algorithm, but we don't use that |
| 617 | // because it fails for uint16_t (where multiplication can have undefined |
| 618 | // behavior due to promotion to int), and requires a division in addition |
| 619 | // to the multiplication. |
| 620 | |
| 621 | Overflowed = false; |
| 622 | |
| 623 | // Log2(Z) would be either Log2Z or Log2Z + 1. |
| 624 | // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z |
| 625 | // will necessarily be less than Log2Max as desired. |
| 626 | int Log2Z = Log2_64(X) + Log2_64(Y); |
| 627 | const T Max = std::numeric_limits<T>::max(); |
| 628 | int Log2Max = Log2_64(Max); |
| 629 | if (Log2Z < Log2Max) { |
| 630 | return X * Y; |
| 631 | } |
| 632 | if (Log2Z > Log2Max) { |
| 633 | Overflowed = true; |
| 634 | return Max; |
| 635 | } |
| 636 | |
| 637 | // We're going to use the top bit, and maybe overflow one |
| 638 | // bit past it. Multiply all but the bottom bit then add |
| 639 | // that on at the end. |
| 640 | T Z = (X >> 1) * Y; |
| 641 | if (Z & ~(Max >> 1)) { |
| 642 | Overflowed = true; |
| 643 | return Max; |
| 644 | } |
| 645 | Z <<= 1; |
| 646 | if (X & 1) |
| 647 | return SaturatingAdd(Z, Y, ResultOverflowed); |
| 648 | |
| 649 | return Z; |
| 650 | } |
| 651 | |
| 652 | /// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to |
| 653 | /// the product. Clamp the result to the maximum representable value of T on |
| 654 | /// overflow. ResultOverflowed indicates if the result is larger than the |
| 655 | /// maximum representable value of type T. |
| 656 | template <typename T> |
| 657 | std::enable_if_t<std::is_unsigned_v<T>, T> |
| 658 | SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) { |
| 659 | bool Dummy; |
| 660 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |
| 661 | |
| 662 | T Product = SaturatingMultiply(X, Y, &Overflowed); |
| 663 | if (Overflowed) |
| 664 | return Product; |
| 665 | |
| 666 | return SaturatingAdd(A, Product, &Overflowed); |
| 667 | } |
| 668 | |
| 669 | /// Use this rather than HUGE_VALF; the latter causes warnings on MSVC. |
| 670 | extern const float huge_valf; |
| 671 | |
| 672 | |
| 673 | /// Add two signed integers, computing the two's complement truncated result, |
| 674 | /// returning true if overflow occurred. |
| 675 | template <typename T> |
| 676 | std::enable_if_t<std::is_signed_v<T>, T> AddOverflow(T X, T Y, T &Result) { |
| 677 | #if __has_builtin(__builtin_add_overflow)1 |
| 678 | return __builtin_add_overflow(X, Y, &Result); |
| 679 | #else |
| 680 | // Perform the unsigned addition. |
| 681 | using U = std::make_unsigned_t<T>; |
| 682 | const U UX = static_cast<U>(X); |
| 683 | const U UY = static_cast<U>(Y); |
| 684 | const U UResult = UX + UY; |
| 685 | |
| 686 | // Convert to signed. |
| 687 | Result = static_cast<T>(UResult); |
| 688 | |
| 689 | // Adding two positive numbers should result in a positive number. |
| 690 | if (X > 0 && Y > 0) |
| 691 | return Result <= 0; |
| 692 | // Adding two negatives should result in a negative number. |
| 693 | if (X < 0 && Y < 0) |
| 694 | return Result >= 0; |
| 695 | return false; |
| 696 | #endif |
| 697 | } |
| 698 | |
| 699 | /// Subtract two signed integers, computing the two's complement truncated |
| 700 | /// result, returning true if an overflow ocurred. |
| 701 | template <typename T> |
| 702 | std::enable_if_t<std::is_signed_v<T>, T> SubOverflow(T X, T Y, T &Result) { |
| 703 | #if __has_builtin(__builtin_sub_overflow)1 |
| 704 | return __builtin_sub_overflow(X, Y, &Result); |
| 705 | #else |
| 706 | // Perform the unsigned addition. |
| 707 | using U = std::make_unsigned_t<T>; |
| 708 | const U UX = static_cast<U>(X); |
| 709 | const U UY = static_cast<U>(Y); |
| 710 | const U UResult = UX - UY; |
| 711 | |
| 712 | // Convert to signed. |
| 713 | Result = static_cast<T>(UResult); |
| 714 | |
| 715 | // Subtracting a positive number from a negative results in a negative number. |
| 716 | if (X <= 0 && Y > 0) |
| 717 | return Result >= 0; |
| 718 | // Subtracting a negative number from a positive results in a positive number. |
| 719 | if (X >= 0 && Y < 0) |
| 720 | return Result <= 0; |
| 721 | return false; |
| 722 | #endif |
| 723 | } |
| 724 | |
| 725 | /// Multiply two signed integers, computing the two's complement truncated |
| 726 | /// result, returning true if an overflow ocurred. |
| 727 | template <typename T> |
| 728 | std::enable_if_t<std::is_signed_v<T>, T> MulOverflow(T X, T Y, T &Result) { |
| 729 | // Perform the unsigned multiplication on absolute values. |
| 730 | using U = std::make_unsigned_t<T>; |
| 731 | const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X); |
| 732 | const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y); |
| 733 | const U UResult = UX * UY; |
| 734 | |
| 735 | // Convert to signed. |
| 736 | const bool IsNegative = (X < 0) ^ (Y < 0); |
| 737 | Result = IsNegative ? (0 - UResult) : UResult; |
| 738 | |
| 739 | // If any of the args was 0, result is 0 and no overflow occurs. |
| 740 | if (UX == 0 || UY == 0) |
| 741 | return false; |
| 742 | |
| 743 | // UX and UY are in [1, 2^n], where n is the number of digits. |
| 744 | // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for |
| 745 | // positive) divided by an argument compares to the other. |
| 746 | if (IsNegative) |
| 747 | return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY; |
| 748 | else |
| 749 | return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY; |
| 750 | } |
| 751 | |
| 752 | } // End llvm namespace |
| 753 | |
| 754 | #endif |