File: | build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp |
Warning: | line 1022, column 7 1st function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==// | ||||
2 | // | ||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
6 | // | ||||
7 | //===----------------------------------------------------------------------===// | ||||
8 | /// \file | ||||
9 | /// This file implements the targeting of the InstructionSelector class for | ||||
10 | /// AArch64. | ||||
11 | /// \todo This should be generated by TableGen. | ||||
12 | //===----------------------------------------------------------------------===// | ||||
13 | |||||
14 | #include "AArch64GlobalISelUtils.h" | ||||
15 | #include "AArch64InstrInfo.h" | ||||
16 | #include "AArch64MachineFunctionInfo.h" | ||||
17 | #include "AArch64RegisterBankInfo.h" | ||||
18 | #include "AArch64RegisterInfo.h" | ||||
19 | #include "AArch64Subtarget.h" | ||||
20 | #include "AArch64TargetMachine.h" | ||||
21 | #include "MCTargetDesc/AArch64AddressingModes.h" | ||||
22 | #include "MCTargetDesc/AArch64MCTargetDesc.h" | ||||
23 | #include "llvm/ADT/Optional.h" | ||||
24 | #include "llvm/BinaryFormat/Dwarf.h" | ||||
25 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" | ||||
26 | #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" | ||||
27 | #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" | ||||
28 | #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" | ||||
29 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" | ||||
30 | #include "llvm/CodeGen/GlobalISel/Utils.h" | ||||
31 | #include "llvm/CodeGen/MachineBasicBlock.h" | ||||
32 | #include "llvm/CodeGen/MachineConstantPool.h" | ||||
33 | #include "llvm/CodeGen/MachineFrameInfo.h" | ||||
34 | #include "llvm/CodeGen/MachineFunction.h" | ||||
35 | #include "llvm/CodeGen/MachineInstr.h" | ||||
36 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||||
37 | #include "llvm/CodeGen/MachineMemOperand.h" | ||||
38 | #include "llvm/CodeGen/MachineOperand.h" | ||||
39 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||||
40 | #include "llvm/CodeGen/TargetOpcodes.h" | ||||
41 | #include "llvm/IR/Constants.h" | ||||
42 | #include "llvm/IR/DerivedTypes.h" | ||||
43 | #include "llvm/IR/Instructions.h" | ||||
44 | #include "llvm/IR/IntrinsicsAArch64.h" | ||||
45 | #include "llvm/IR/PatternMatch.h" | ||||
46 | #include "llvm/IR/Type.h" | ||||
47 | #include "llvm/Pass.h" | ||||
48 | #include "llvm/Support/Debug.h" | ||||
49 | #include "llvm/Support/raw_ostream.h" | ||||
50 | |||||
51 | #define DEBUG_TYPE"aarch64-isel" "aarch64-isel" | ||||
52 | |||||
53 | using namespace llvm; | ||||
54 | using namespace MIPatternMatch; | ||||
55 | using namespace AArch64GISelUtils; | ||||
56 | |||||
57 | namespace llvm { | ||||
58 | class BlockFrequencyInfo; | ||||
59 | class ProfileSummaryInfo; | ||||
60 | } | ||||
61 | |||||
62 | namespace { | ||||
63 | |||||
64 | #define GET_GLOBALISEL_PREDICATE_BITSET | ||||
65 | #include "AArch64GenGlobalISel.inc" | ||||
66 | #undef GET_GLOBALISEL_PREDICATE_BITSET | ||||
67 | |||||
68 | |||||
69 | class AArch64InstructionSelector : public InstructionSelector { | ||||
70 | public: | ||||
71 | AArch64InstructionSelector(const AArch64TargetMachine &TM, | ||||
72 | const AArch64Subtarget &STI, | ||||
73 | const AArch64RegisterBankInfo &RBI); | ||||
74 | |||||
75 | bool select(MachineInstr &I) override; | ||||
76 | static const char *getName() { return DEBUG_TYPE"aarch64-isel"; } | ||||
77 | |||||
78 | void setupMF(MachineFunction &MF, GISelKnownBits *KB, | ||||
79 | CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI, | ||||
80 | BlockFrequencyInfo *BFI) override { | ||||
81 | InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI); | ||||
82 | MIB.setMF(MF); | ||||
83 | |||||
84 | // hasFnAttribute() is expensive to call on every BRCOND selection, so | ||||
85 | // cache it here for each run of the selector. | ||||
86 | ProduceNonFlagSettingCondBr = | ||||
87 | !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); | ||||
88 | MFReturnAddr = Register(); | ||||
89 | |||||
90 | processPHIs(MF); | ||||
91 | } | ||||
92 | |||||
93 | private: | ||||
94 | /// tblgen-erated 'select' implementation, used as the initial selector for | ||||
95 | /// the patterns that don't require complex C++. | ||||
96 | bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; | ||||
97 | |||||
98 | // A lowering phase that runs before any selection attempts. | ||||
99 | // Returns true if the instruction was modified. | ||||
100 | bool preISelLower(MachineInstr &I); | ||||
101 | |||||
102 | // An early selection function that runs before the selectImpl() call. | ||||
103 | bool earlySelect(MachineInstr &I); | ||||
104 | |||||
105 | // Do some preprocessing of G_PHIs before we begin selection. | ||||
106 | void processPHIs(MachineFunction &MF); | ||||
107 | |||||
108 | bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
109 | |||||
110 | /// Eliminate same-sized cross-bank copies into stores before selectImpl(). | ||||
111 | bool contractCrossBankCopyIntoStore(MachineInstr &I, | ||||
112 | MachineRegisterInfo &MRI); | ||||
113 | |||||
114 | bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
115 | |||||
116 | bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, | ||||
117 | MachineRegisterInfo &MRI) const; | ||||
118 | bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, | ||||
119 | MachineRegisterInfo &MRI) const; | ||||
120 | |||||
121 | ///@{ | ||||
122 | /// Helper functions for selectCompareBranch. | ||||
123 | bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp, | ||||
124 | MachineIRBuilder &MIB) const; | ||||
125 | bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, | ||||
126 | MachineIRBuilder &MIB) const; | ||||
127 | bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, | ||||
128 | MachineIRBuilder &MIB) const; | ||||
129 | bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert, | ||||
130 | MachineBasicBlock *DstMBB, | ||||
131 | MachineIRBuilder &MIB) const; | ||||
132 | ///@} | ||||
133 | |||||
134 | bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, | ||||
135 | MachineRegisterInfo &MRI); | ||||
136 | |||||
137 | bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
138 | bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
139 | |||||
140 | // Helper to generate an equivalent of scalar_to_vector into a new register, | ||||
141 | // returned via 'Dst'. | ||||
142 | MachineInstr *emitScalarToVector(unsigned EltSize, | ||||
143 | const TargetRegisterClass *DstRC, | ||||
144 | Register Scalar, | ||||
145 | MachineIRBuilder &MIRBuilder) const; | ||||
146 | |||||
147 | /// Emit a lane insert into \p DstReg, or a new vector register if None is | ||||
148 | /// provided. | ||||
149 | /// | ||||
150 | /// The lane inserted into is defined by \p LaneIdx. The vector source | ||||
151 | /// register is given by \p SrcReg. The register containing the element is | ||||
152 | /// given by \p EltReg. | ||||
153 | MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg, | ||||
154 | Register EltReg, unsigned LaneIdx, | ||||
155 | const RegisterBank &RB, | ||||
156 | MachineIRBuilder &MIRBuilder) const; | ||||
157 | |||||
158 | /// Emit a sequence of instructions representing a constant \p CV for a | ||||
159 | /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.) | ||||
160 | /// | ||||
161 | /// \returns the last instruction in the sequence on success, and nullptr | ||||
162 | /// otherwise. | ||||
163 | MachineInstr *emitConstantVector(Register Dst, Constant *CV, | ||||
164 | MachineIRBuilder &MIRBuilder, | ||||
165 | MachineRegisterInfo &MRI); | ||||
166 | |||||
167 | bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
168 | bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, | ||||
169 | MachineRegisterInfo &MRI); | ||||
170 | /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a | ||||
171 | /// SUBREG_TO_REG. | ||||
172 | bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI); | ||||
173 | bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
174 | bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
175 | bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
176 | |||||
177 | bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
178 | bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
179 | bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
180 | bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
181 | |||||
182 | /// Helper function to select vector load intrinsics like | ||||
183 | /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc. | ||||
184 | /// \p Opc is the opcode that the selected instruction should use. | ||||
185 | /// \p NumVecs is the number of vector destinations for the instruction. | ||||
186 | /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction. | ||||
187 | bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs, | ||||
188 | MachineInstr &I); | ||||
189 | bool selectIntrinsicWithSideEffects(MachineInstr &I, | ||||
190 | MachineRegisterInfo &MRI); | ||||
191 | bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
192 | bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
193 | bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const; | ||||
194 | bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const; | ||||
195 | bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
196 | bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
197 | bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
198 | bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
199 | bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
200 | bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI); | ||||
201 | |||||
202 | unsigned emitConstantPoolEntry(const Constant *CPVal, | ||||
203 | MachineFunction &MF) const; | ||||
204 | MachineInstr *emitLoadFromConstantPool(const Constant *CPVal, | ||||
205 | MachineIRBuilder &MIRBuilder) const; | ||||
206 | |||||
207 | // Emit a vector concat operation. | ||||
208 | MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1, | ||||
209 | Register Op2, | ||||
210 | MachineIRBuilder &MIRBuilder) const; | ||||
211 | |||||
212 | // Emit an integer compare between LHS and RHS, which checks for Predicate. | ||||
213 | MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, | ||||
214 | MachineOperand &Predicate, | ||||
215 | MachineIRBuilder &MIRBuilder) const; | ||||
216 | |||||
217 | /// Emit a floating point comparison between \p LHS and \p RHS. | ||||
218 | /// \p Pred if given is the intended predicate to use. | ||||
219 | MachineInstr *emitFPCompare(Register LHS, Register RHS, | ||||
220 | MachineIRBuilder &MIRBuilder, | ||||
221 | Optional<CmpInst::Predicate> = None) const; | ||||
222 | |||||
223 | MachineInstr *emitInstr(unsigned Opcode, | ||||
224 | std::initializer_list<llvm::DstOp> DstOps, | ||||
225 | std::initializer_list<llvm::SrcOp> SrcOps, | ||||
226 | MachineIRBuilder &MIRBuilder, | ||||
227 | const ComplexRendererFns &RenderFns = None) const; | ||||
228 | /// Helper function to emit an add or sub instruction. | ||||
229 | /// | ||||
230 | /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above | ||||
231 | /// in a specific order. | ||||
232 | /// | ||||
233 | /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode. | ||||
234 | /// | ||||
235 | /// \code | ||||
236 | /// const std::array<std::array<unsigned, 2>, 4> Table { | ||||
237 | /// {{AArch64::ADDXri, AArch64::ADDWri}, | ||||
238 | /// {AArch64::ADDXrs, AArch64::ADDWrs}, | ||||
239 | /// {AArch64::ADDXrr, AArch64::ADDWrr}, | ||||
240 | /// {AArch64::SUBXri, AArch64::SUBWri}, | ||||
241 | /// {AArch64::ADDXrx, AArch64::ADDWrx}}}; | ||||
242 | /// \endcode | ||||
243 | /// | ||||
244 | /// Each row in the table corresponds to a different addressing mode. Each | ||||
245 | /// column corresponds to a different register size. | ||||
246 | /// | ||||
247 | /// \attention Rows must be structured as follows: | ||||
248 | /// - Row 0: The ri opcode variants | ||||
249 | /// - Row 1: The rs opcode variants | ||||
250 | /// - Row 2: The rr opcode variants | ||||
251 | /// - Row 3: The ri opcode variants for negative immediates | ||||
252 | /// - Row 4: The rx opcode variants | ||||
253 | /// | ||||
254 | /// \attention Columns must be structured as follows: | ||||
255 | /// - Column 0: The 64-bit opcode variants | ||||
256 | /// - Column 1: The 32-bit opcode variants | ||||
257 | /// | ||||
258 | /// \p Dst is the destination register of the binop to emit. | ||||
259 | /// \p LHS is the left-hand operand of the binop to emit. | ||||
260 | /// \p RHS is the right-hand operand of the binop to emit. | ||||
261 | MachineInstr *emitAddSub( | ||||
262 | const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, | ||||
263 | Register Dst, MachineOperand &LHS, MachineOperand &RHS, | ||||
264 | MachineIRBuilder &MIRBuilder) const; | ||||
265 | MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, | ||||
266 | MachineOperand &RHS, | ||||
267 | MachineIRBuilder &MIRBuilder) const; | ||||
268 | MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, | ||||
269 | MachineIRBuilder &MIRBuilder) const; | ||||
270 | MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, | ||||
271 | MachineIRBuilder &MIRBuilder) const; | ||||
272 | MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, | ||||
273 | MachineIRBuilder &MIRBuilder) const; | ||||
274 | MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, | ||||
275 | MachineIRBuilder &MIRBuilder) const; | ||||
276 | MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS, | ||||
277 | AArch64CC::CondCode CC, | ||||
278 | MachineIRBuilder &MIRBuilder) const; | ||||
279 | MachineInstr *emitExtractVectorElt(Optional<Register> DstReg, | ||||
280 | const RegisterBank &DstRB, LLT ScalarTy, | ||||
281 | Register VecReg, unsigned LaneIdx, | ||||
282 | MachineIRBuilder &MIRBuilder) const; | ||||
283 | MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2, | ||||
284 | AArch64CC::CondCode Pred, | ||||
285 | MachineIRBuilder &MIRBuilder) const; | ||||
286 | /// Emit a CSet for a FP compare. | ||||
287 | /// | ||||
288 | /// \p Dst is expected to be a 32-bit scalar register. | ||||
289 | MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, | ||||
290 | MachineIRBuilder &MIRBuilder) const; | ||||
291 | |||||
292 | /// Emit the overflow op for \p Opcode. | ||||
293 | /// | ||||
294 | /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, | ||||
295 | /// G_USUBO, etc. | ||||
296 | std::pair<MachineInstr *, AArch64CC::CondCode> | ||||
297 | emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, | ||||
298 | MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; | ||||
299 | |||||
300 | /// Emit expression as a conjunction (a series of CCMP/CFCMP ops). | ||||
301 | /// In some cases this is even possible with OR operations in the expression. | ||||
302 | MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC, | ||||
303 | MachineIRBuilder &MIB) const; | ||||
304 | MachineInstr *emitConditionalComparison(Register LHS, Register RHS, | ||||
305 | CmpInst::Predicate CC, | ||||
306 | AArch64CC::CondCode Predicate, | ||||
307 | AArch64CC::CondCode OutCC, | ||||
308 | MachineIRBuilder &MIB) const; | ||||
309 | MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC, | ||||
310 | bool Negate, Register CCOp, | ||||
311 | AArch64CC::CondCode Predicate, | ||||
312 | MachineIRBuilder &MIB) const; | ||||
313 | |||||
314 | /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. | ||||
315 | /// \p IsNegative is true if the test should be "not zero". | ||||
316 | /// This will also optimize the test bit instruction when possible. | ||||
317 | MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative, | ||||
318 | MachineBasicBlock *DstMBB, | ||||
319 | MachineIRBuilder &MIB) const; | ||||
320 | |||||
321 | /// Emit a CB(N)Z instruction which branches to \p DestMBB. | ||||
322 | MachineInstr *emitCBZ(Register CompareReg, bool IsNegative, | ||||
323 | MachineBasicBlock *DestMBB, | ||||
324 | MachineIRBuilder &MIB) const; | ||||
325 | |||||
326 | // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. | ||||
327 | // We use these manually instead of using the importer since it doesn't | ||||
328 | // support SDNodeXForm. | ||||
329 | ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const; | ||||
330 | ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const; | ||||
331 | ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; | ||||
332 | ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; | ||||
333 | |||||
334 | ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; | ||||
335 | ComplexRendererFns selectArithImmed(MachineOperand &Root) const; | ||||
336 | ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; | ||||
337 | |||||
338 | ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, | ||||
339 | unsigned Size) const; | ||||
340 | |||||
341 | ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { | ||||
342 | return selectAddrModeUnscaled(Root, 1); | ||||
343 | } | ||||
344 | ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { | ||||
345 | return selectAddrModeUnscaled(Root, 2); | ||||
346 | } | ||||
347 | ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { | ||||
348 | return selectAddrModeUnscaled(Root, 4); | ||||
349 | } | ||||
350 | ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { | ||||
351 | return selectAddrModeUnscaled(Root, 8); | ||||
352 | } | ||||
353 | ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { | ||||
354 | return selectAddrModeUnscaled(Root, 16); | ||||
355 | } | ||||
356 | |||||
357 | /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used | ||||
358 | /// from complex pattern matchers like selectAddrModeIndexed(). | ||||
359 | ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size, | ||||
360 | MachineRegisterInfo &MRI) const; | ||||
361 | |||||
362 | ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, | ||||
363 | unsigned Size) const; | ||||
364 | template <int Width> | ||||
365 | ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { | ||||
366 | return selectAddrModeIndexed(Root, Width / 8); | ||||
367 | } | ||||
368 | |||||
369 | bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, | ||||
370 | const MachineRegisterInfo &MRI) const; | ||||
371 | ComplexRendererFns | ||||
372 | selectAddrModeShiftedExtendXReg(MachineOperand &Root, | ||||
373 | unsigned SizeInBytes) const; | ||||
374 | |||||
375 | /// Returns a \p ComplexRendererFns which contains a base, offset, and whether | ||||
376 | /// or not a shift + extend should be folded into an addressing mode. Returns | ||||
377 | /// None when this is not profitable or possible. | ||||
378 | ComplexRendererFns | ||||
379 | selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, | ||||
380 | MachineOperand &Offset, unsigned SizeInBytes, | ||||
381 | bool WantsExt) const; | ||||
382 | ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; | ||||
383 | ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, | ||||
384 | unsigned SizeInBytes) const; | ||||
385 | template <int Width> | ||||
386 | ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { | ||||
387 | return selectAddrModeXRO(Root, Width / 8); | ||||
388 | } | ||||
389 | |||||
390 | ComplexRendererFns selectAddrModeWRO(MachineOperand &Root, | ||||
391 | unsigned SizeInBytes) const; | ||||
392 | template <int Width> | ||||
393 | ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const { | ||||
394 | return selectAddrModeWRO(Root, Width / 8); | ||||
395 | } | ||||
396 | |||||
397 | ComplexRendererFns selectShiftedRegister(MachineOperand &Root, | ||||
398 | bool AllowROR = false) const; | ||||
399 | |||||
400 | ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { | ||||
401 | return selectShiftedRegister(Root); | ||||
402 | } | ||||
403 | |||||
404 | ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { | ||||
405 | return selectShiftedRegister(Root, true); | ||||
406 | } | ||||
407 | |||||
408 | /// Given an extend instruction, determine the correct shift-extend type for | ||||
409 | /// that instruction. | ||||
410 | /// | ||||
411 | /// If the instruction is going to be used in a load or store, pass | ||||
412 | /// \p IsLoadStore = true. | ||||
413 | AArch64_AM::ShiftExtendType | ||||
414 | getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, | ||||
415 | bool IsLoadStore = false) const; | ||||
416 | |||||
417 | /// Move \p Reg to \p RC if \p Reg is not already on \p RC. | ||||
418 | /// | ||||
419 | /// \returns Either \p Reg if no change was necessary, or the new register | ||||
420 | /// created by moving \p Reg. | ||||
421 | /// | ||||
422 | /// Note: This uses emitCopy right now. | ||||
423 | Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC, | ||||
424 | MachineIRBuilder &MIB) const; | ||||
425 | |||||
426 | ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; | ||||
427 | |||||
428 | void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, | ||||
429 | int OpIdx = -1) const; | ||||
430 | void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I, | ||||
431 | int OpIdx = -1) const; | ||||
432 | void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I, | ||||
433 | int OpIdx = -1) const; | ||||
434 | void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI, | ||||
435 | int OpIdx = -1) const; | ||||
436 | void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, | ||||
437 | int OpIdx = -1) const; | ||||
438 | void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI, | ||||
439 | int OpIdx = -1) const; | ||||
440 | void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB, | ||||
441 | const MachineInstr &MI, | ||||
442 | int OpIdx = -1) const; | ||||
443 | |||||
444 | // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. | ||||
445 | void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags); | ||||
446 | |||||
447 | // Optimization methods. | ||||
448 | bool tryOptSelect(GSelect &Sel); | ||||
449 | bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI); | ||||
450 | MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, | ||||
451 | MachineOperand &Predicate, | ||||
452 | MachineIRBuilder &MIRBuilder) const; | ||||
453 | |||||
454 | /// Return true if \p MI is a load or store of \p NumBytes bytes. | ||||
455 | bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; | ||||
456 | |||||
457 | /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit | ||||
458 | /// register zeroed out. In other words, the result of MI has been explicitly | ||||
459 | /// zero extended. | ||||
460 | bool isDef32(const MachineInstr &MI) const; | ||||
461 | |||||
462 | const AArch64TargetMachine &TM; | ||||
463 | const AArch64Subtarget &STI; | ||||
464 | const AArch64InstrInfo &TII; | ||||
465 | const AArch64RegisterInfo &TRI; | ||||
466 | const AArch64RegisterBankInfo &RBI; | ||||
467 | |||||
468 | bool ProduceNonFlagSettingCondBr = false; | ||||
469 | |||||
470 | // Some cached values used during selection. | ||||
471 | // We use LR as a live-in register, and we keep track of it here as it can be | ||||
472 | // clobbered by calls. | ||||
473 | Register MFReturnAddr; | ||||
474 | |||||
475 | MachineIRBuilder MIB; | ||||
476 | |||||
477 | #define GET_GLOBALISEL_PREDICATES_DECL | ||||
478 | #include "AArch64GenGlobalISel.inc" | ||||
479 | #undef GET_GLOBALISEL_PREDICATES_DECL | ||||
480 | |||||
481 | // We declare the temporaries used by selectImpl() in the class to minimize the | ||||
482 | // cost of constructing placeholder values. | ||||
483 | #define GET_GLOBALISEL_TEMPORARIES_DECL | ||||
484 | #include "AArch64GenGlobalISel.inc" | ||||
485 | #undef GET_GLOBALISEL_TEMPORARIES_DECL | ||||
486 | }; | ||||
487 | |||||
488 | } // end anonymous namespace | ||||
489 | |||||
490 | #define GET_GLOBALISEL_IMPL | ||||
491 | #include "AArch64GenGlobalISel.inc" | ||||
492 | #undef GET_GLOBALISEL_IMPL | ||||
493 | |||||
494 | AArch64InstructionSelector::AArch64InstructionSelector( | ||||
495 | const AArch64TargetMachine &TM, const AArch64Subtarget &STI, | ||||
496 | const AArch64RegisterBankInfo &RBI) | ||||
497 | : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), | ||||
498 | RBI(RBI), | ||||
499 | #define GET_GLOBALISEL_PREDICATES_INIT | ||||
500 | #include "AArch64GenGlobalISel.inc" | ||||
501 | #undef GET_GLOBALISEL_PREDICATES_INIT | ||||
502 | #define GET_GLOBALISEL_TEMPORARIES_INIT | ||||
503 | #include "AArch64GenGlobalISel.inc" | ||||
504 | #undef GET_GLOBALISEL_TEMPORARIES_INIT | ||||
505 | { | ||||
506 | } | ||||
507 | |||||
508 | // FIXME: This should be target-independent, inferred from the types declared | ||||
509 | // for each class in the bank. | ||||
510 | static const TargetRegisterClass * | ||||
511 | getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, | ||||
512 | const RegisterBankInfo &RBI, | ||||
513 | bool GetAllRegSet = false) { | ||||
514 | if (RB.getID() == AArch64::GPRRegBankID) { | ||||
515 | if (Ty.getSizeInBits() <= 32) | ||||
516 | return GetAllRegSet ? &AArch64::GPR32allRegClass | ||||
517 | : &AArch64::GPR32RegClass; | ||||
518 | if (Ty.getSizeInBits() == 64) | ||||
519 | return GetAllRegSet ? &AArch64::GPR64allRegClass | ||||
520 | : &AArch64::GPR64RegClass; | ||||
521 | if (Ty.getSizeInBits() == 128) | ||||
522 | return &AArch64::XSeqPairsClassRegClass; | ||||
523 | return nullptr; | ||||
524 | } | ||||
525 | |||||
526 | if (RB.getID() == AArch64::FPRRegBankID) { | ||||
527 | switch (Ty.getSizeInBits()) { | ||||
528 | case 8: | ||||
529 | return &AArch64::FPR8RegClass; | ||||
530 | case 16: | ||||
531 | return &AArch64::FPR16RegClass; | ||||
532 | case 32: | ||||
533 | return &AArch64::FPR32RegClass; | ||||
534 | case 64: | ||||
535 | return &AArch64::FPR64RegClass; | ||||
536 | case 128: | ||||
537 | return &AArch64::FPR128RegClass; | ||||
538 | } | ||||
539 | return nullptr; | ||||
540 | } | ||||
541 | |||||
542 | return nullptr; | ||||
543 | } | ||||
544 | |||||
545 | /// Given a register bank, and size in bits, return the smallest register class | ||||
546 | /// that can represent that combination. | ||||
547 | static const TargetRegisterClass * | ||||
548 | getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, | ||||
549 | bool GetAllRegSet = false) { | ||||
550 | unsigned RegBankID = RB.getID(); | ||||
551 | |||||
552 | if (RegBankID == AArch64::GPRRegBankID) { | ||||
553 | if (SizeInBits <= 32) | ||||
554 | return GetAllRegSet ? &AArch64::GPR32allRegClass | ||||
555 | : &AArch64::GPR32RegClass; | ||||
556 | if (SizeInBits == 64) | ||||
557 | return GetAllRegSet ? &AArch64::GPR64allRegClass | ||||
558 | : &AArch64::GPR64RegClass; | ||||
559 | if (SizeInBits == 128) | ||||
560 | return &AArch64::XSeqPairsClassRegClass; | ||||
561 | } | ||||
562 | |||||
563 | if (RegBankID == AArch64::FPRRegBankID) { | ||||
564 | switch (SizeInBits) { | ||||
565 | default: | ||||
566 | return nullptr; | ||||
567 | case 8: | ||||
568 | return &AArch64::FPR8RegClass; | ||||
569 | case 16: | ||||
570 | return &AArch64::FPR16RegClass; | ||||
571 | case 32: | ||||
572 | return &AArch64::FPR32RegClass; | ||||
573 | case 64: | ||||
574 | return &AArch64::FPR64RegClass; | ||||
575 | case 128: | ||||
576 | return &AArch64::FPR128RegClass; | ||||
577 | } | ||||
578 | } | ||||
579 | |||||
580 | return nullptr; | ||||
581 | } | ||||
582 | |||||
583 | /// Returns the correct subregister to use for a given register class. | ||||
584 | static bool getSubRegForClass(const TargetRegisterClass *RC, | ||||
585 | const TargetRegisterInfo &TRI, unsigned &SubReg) { | ||||
586 | switch (TRI.getRegSizeInBits(*RC)) { | ||||
587 | case 8: | ||||
588 | SubReg = AArch64::bsub; | ||||
589 | break; | ||||
590 | case 16: | ||||
591 | SubReg = AArch64::hsub; | ||||
592 | break; | ||||
593 | case 32: | ||||
594 | if (RC != &AArch64::FPR32RegClass) | ||||
595 | SubReg = AArch64::sub_32; | ||||
596 | else | ||||
597 | SubReg = AArch64::ssub; | ||||
598 | break; | ||||
599 | case 64: | ||||
600 | SubReg = AArch64::dsub; | ||||
601 | break; | ||||
602 | default: | ||||
603 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class." ; } } while (false) | ||||
604 | dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class." ; } } while (false); | ||||
605 | return false; | ||||
606 | } | ||||
607 | |||||
608 | return true; | ||||
609 | } | ||||
610 | |||||
611 | /// Returns the minimum size the given register bank can hold. | ||||
612 | static unsigned getMinSizeForRegBank(const RegisterBank &RB) { | ||||
613 | switch (RB.getID()) { | ||||
614 | case AArch64::GPRRegBankID: | ||||
615 | return 32; | ||||
616 | case AArch64::FPRRegBankID: | ||||
617 | return 8; | ||||
618 | default: | ||||
619 | llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank." , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 619); | ||||
620 | } | ||||
621 | } | ||||
622 | |||||
623 | /// Create a REG_SEQUENCE instruction using the registers in \p Regs. | ||||
624 | /// Helper function for functions like createDTuple and createQTuple. | ||||
625 | /// | ||||
626 | /// \p RegClassIDs - The list of register class IDs available for some tuple of | ||||
627 | /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is | ||||
628 | /// expected to contain between 2 and 4 tuple classes. | ||||
629 | /// | ||||
630 | /// \p SubRegs - The list of subregister classes associated with each register | ||||
631 | /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0 | ||||
632 | /// subregister class. The index of each subregister class is expected to | ||||
633 | /// correspond with the index of each register class. | ||||
634 | /// | ||||
635 | /// \returns Either the destination register of REG_SEQUENCE instruction that | ||||
636 | /// was created, or the 0th element of \p Regs if \p Regs contains a single | ||||
637 | /// element. | ||||
638 | static Register createTuple(ArrayRef<Register> Regs, | ||||
639 | const unsigned RegClassIDs[], | ||||
640 | const unsigned SubRegs[], MachineIRBuilder &MIB) { | ||||
641 | unsigned NumRegs = Regs.size(); | ||||
642 | if (NumRegs == 1) | ||||
643 | return Regs[0]; | ||||
644 | assert(NumRegs >= 2 && NumRegs <= 4 &&(static_cast <bool> (NumRegs >= 2 && NumRegs <= 4 && "Only support between two and 4 registers in a tuple!" ) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 645, __extension__ __PRETTY_FUNCTION__)) | ||||
645 | "Only support between two and 4 registers in a tuple!")(static_cast <bool> (NumRegs >= 2 && NumRegs <= 4 && "Only support between two and 4 registers in a tuple!" ) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 645, __extension__ __PRETTY_FUNCTION__)); | ||||
646 | const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo(); | ||||
647 | auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]); | ||||
648 | auto RegSequence = | ||||
649 | MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {}); | ||||
650 | for (unsigned I = 0, E = Regs.size(); I < E; ++I) { | ||||
651 | RegSequence.addUse(Regs[I]); | ||||
652 | RegSequence.addImm(SubRegs[I]); | ||||
653 | } | ||||
654 | return RegSequence.getReg(0); | ||||
655 | } | ||||
656 | |||||
657 | /// Create a tuple of D-registers using the registers in \p Regs. | ||||
658 | static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { | ||||
659 | static const unsigned RegClassIDs[] = { | ||||
660 | AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; | ||||
661 | static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, | ||||
662 | AArch64::dsub2, AArch64::dsub3}; | ||||
663 | return createTuple(Regs, RegClassIDs, SubRegs, MIB); | ||||
664 | } | ||||
665 | |||||
666 | /// Create a tuple of Q-registers using the registers in \p Regs. | ||||
667 | static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { | ||||
668 | static const unsigned RegClassIDs[] = { | ||||
669 | AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; | ||||
670 | static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, | ||||
671 | AArch64::qsub2, AArch64::qsub3}; | ||||
672 | return createTuple(Regs, RegClassIDs, SubRegs, MIB); | ||||
673 | } | ||||
674 | |||||
675 | static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { | ||||
676 | auto &MI = *Root.getParent(); | ||||
677 | auto &MBB = *MI.getParent(); | ||||
678 | auto &MF = *MBB.getParent(); | ||||
679 | auto &MRI = MF.getRegInfo(); | ||||
680 | uint64_t Immed; | ||||
681 | if (Root.isImm()) | ||||
682 | Immed = Root.getImm(); | ||||
683 | else if (Root.isCImm()) | ||||
684 | Immed = Root.getCImm()->getZExtValue(); | ||||
685 | else if (Root.isReg()) { | ||||
686 | auto ValAndVReg = | ||||
687 | getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true); | ||||
688 | if (!ValAndVReg) | ||||
689 | return None; | ||||
690 | Immed = ValAndVReg->Value.getSExtValue(); | ||||
691 | } else | ||||
692 | return None; | ||||
693 | return Immed; | ||||
694 | } | ||||
695 | |||||
696 | /// Check whether \p I is a currently unsupported binary operation: | ||||
697 | /// - it has an unsized type | ||||
698 | /// - an operand is not a vreg | ||||
699 | /// - all operands are not in the same bank | ||||
700 | /// These are checks that should someday live in the verifier, but right now, | ||||
701 | /// these are mostly limitations of the aarch64 selector. | ||||
702 | static bool unsupportedBinOp(const MachineInstr &I, | ||||
703 | const AArch64RegisterBankInfo &RBI, | ||||
704 | const MachineRegisterInfo &MRI, | ||||
705 | const AArch64RegisterInfo &TRI) { | ||||
706 | LLT Ty = MRI.getType(I.getOperand(0).getReg()); | ||||
707 | if (!Ty.isValid()) { | ||||
708 | LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n" ; } } while (false); | ||||
709 | return true; | ||||
710 | } | ||||
711 | |||||
712 | const RegisterBank *PrevOpBank = nullptr; | ||||
713 | for (auto &MO : I.operands()) { | ||||
714 | // FIXME: Support non-register operands. | ||||
715 | if (!MO.isReg()) { | ||||
716 | LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n" ; } } while (false); | ||||
717 | return true; | ||||
718 | } | ||||
719 | |||||
720 | // FIXME: Can generic operations have physical registers operands? If | ||||
721 | // so, this will need to be taught about that, and we'll need to get the | ||||
722 | // bank out of the minimal class for the register. | ||||
723 | // Either way, this needs to be documented (and possibly verified). | ||||
724 | if (!Register::isVirtualRegister(MO.getReg())) { | ||||
725 | LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n" ; } } while (false); | ||||
726 | return true; | ||||
727 | } | ||||
728 | |||||
729 | const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI); | ||||
730 | if (!OpBank) { | ||||
731 | LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n" ; } } while (false); | ||||
732 | return true; | ||||
733 | } | ||||
734 | |||||
735 | if (PrevOpBank && OpBank != PrevOpBank) { | ||||
736 | LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n" ; } } while (false); | ||||
737 | return true; | ||||
738 | } | ||||
739 | PrevOpBank = OpBank; | ||||
740 | } | ||||
741 | return false; | ||||
742 | } | ||||
743 | |||||
744 | /// Select the AArch64 opcode for the basic binary operation \p GenericOpc | ||||
745 | /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID | ||||
746 | /// and of size \p OpSize. | ||||
747 | /// \returns \p GenericOpc if the combination is unsupported. | ||||
748 | static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, | ||||
749 | unsigned OpSize) { | ||||
750 | switch (RegBankID) { | ||||
751 | case AArch64::GPRRegBankID: | ||||
752 | if (OpSize == 32) { | ||||
753 | switch (GenericOpc) { | ||||
754 | case TargetOpcode::G_SHL: | ||||
755 | return AArch64::LSLVWr; | ||||
756 | case TargetOpcode::G_LSHR: | ||||
757 | return AArch64::LSRVWr; | ||||
758 | case TargetOpcode::G_ASHR: | ||||
759 | return AArch64::ASRVWr; | ||||
760 | default: | ||||
761 | return GenericOpc; | ||||
762 | } | ||||
763 | } else if (OpSize == 64) { | ||||
764 | switch (GenericOpc) { | ||||
765 | case TargetOpcode::G_PTR_ADD: | ||||
766 | return AArch64::ADDXrr; | ||||
767 | case TargetOpcode::G_SHL: | ||||
768 | return AArch64::LSLVXr; | ||||
769 | case TargetOpcode::G_LSHR: | ||||
770 | return AArch64::LSRVXr; | ||||
771 | case TargetOpcode::G_ASHR: | ||||
772 | return AArch64::ASRVXr; | ||||
773 | default: | ||||
774 | return GenericOpc; | ||||
775 | } | ||||
776 | } | ||||
777 | break; | ||||
778 | case AArch64::FPRRegBankID: | ||||
779 | switch (OpSize) { | ||||
780 | case 32: | ||||
781 | switch (GenericOpc) { | ||||
782 | case TargetOpcode::G_FADD: | ||||
783 | return AArch64::FADDSrr; | ||||
784 | case TargetOpcode::G_FSUB: | ||||
785 | return AArch64::FSUBSrr; | ||||
786 | case TargetOpcode::G_FMUL: | ||||
787 | return AArch64::FMULSrr; | ||||
788 | case TargetOpcode::G_FDIV: | ||||
789 | return AArch64::FDIVSrr; | ||||
790 | default: | ||||
791 | return GenericOpc; | ||||
792 | } | ||||
793 | case 64: | ||||
794 | switch (GenericOpc) { | ||||
795 | case TargetOpcode::G_FADD: | ||||
796 | return AArch64::FADDDrr; | ||||
797 | case TargetOpcode::G_FSUB: | ||||
798 | return AArch64::FSUBDrr; | ||||
799 | case TargetOpcode::G_FMUL: | ||||
800 | return AArch64::FMULDrr; | ||||
801 | case TargetOpcode::G_FDIV: | ||||
802 | return AArch64::FDIVDrr; | ||||
803 | case TargetOpcode::G_OR: | ||||
804 | return AArch64::ORRv8i8; | ||||
805 | default: | ||||
806 | return GenericOpc; | ||||
807 | } | ||||
808 | } | ||||
809 | break; | ||||
810 | } | ||||
811 | return GenericOpc; | ||||
812 | } | ||||
813 | |||||
814 | /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, | ||||
815 | /// appropriate for the (value) register bank \p RegBankID and of memory access | ||||
816 | /// size \p OpSize. This returns the variant with the base+unsigned-immediate | ||||
817 | /// addressing mode (e.g., LDRXui). | ||||
818 | /// \returns \p GenericOpc if the combination is unsupported. | ||||
819 | static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, | ||||
820 | unsigned OpSize) { | ||||
821 | const bool isStore = GenericOpc == TargetOpcode::G_STORE; | ||||
822 | switch (RegBankID) { | ||||
823 | case AArch64::GPRRegBankID: | ||||
824 | switch (OpSize) { | ||||
825 | case 8: | ||||
826 | return isStore ? AArch64::STRBBui : AArch64::LDRBBui; | ||||
827 | case 16: | ||||
828 | return isStore ? AArch64::STRHHui : AArch64::LDRHHui; | ||||
829 | case 32: | ||||
830 | return isStore ? AArch64::STRWui : AArch64::LDRWui; | ||||
831 | case 64: | ||||
832 | return isStore ? AArch64::STRXui : AArch64::LDRXui; | ||||
833 | } | ||||
834 | break; | ||||
835 | case AArch64::FPRRegBankID: | ||||
836 | switch (OpSize) { | ||||
837 | case 8: | ||||
838 | return isStore ? AArch64::STRBui : AArch64::LDRBui; | ||||
839 | case 16: | ||||
840 | return isStore ? AArch64::STRHui : AArch64::LDRHui; | ||||
841 | case 32: | ||||
842 | return isStore ? AArch64::STRSui : AArch64::LDRSui; | ||||
843 | case 64: | ||||
844 | return isStore ? AArch64::STRDui : AArch64::LDRDui; | ||||
845 | case 128: | ||||
846 | return isStore ? AArch64::STRQui : AArch64::LDRQui; | ||||
847 | } | ||||
848 | break; | ||||
849 | } | ||||
850 | return GenericOpc; | ||||
851 | } | ||||
852 | |||||
853 | #ifndef NDEBUG | ||||
854 | /// Helper function that verifies that we have a valid copy at the end of | ||||
855 | /// selectCopy. Verifies that the source and dest have the expected sizes and | ||||
856 | /// then returns true. | ||||
857 | static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, | ||||
858 | const MachineRegisterInfo &MRI, | ||||
859 | const TargetRegisterInfo &TRI, | ||||
860 | const RegisterBankInfo &RBI) { | ||||
861 | const Register DstReg = I.getOperand(0).getReg(); | ||||
862 | const Register SrcReg = I.getOperand(1).getReg(); | ||||
863 | const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); | ||||
864 | const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); | ||||
865 | |||||
866 | // Make sure the size of the source and dest line up. | ||||
867 | assert((static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 876, __extension__ __PRETTY_FUNCTION__)) | ||||
868 | (DstSize == SrcSize ||(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 876, __extension__ __PRETTY_FUNCTION__)) | ||||
869 | // Copies are a mean to setup initial types, the number of(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 876, __extension__ __PRETTY_FUNCTION__)) | ||||
870 | // bits may not exactly match.(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 876, __extension__ __PRETTY_FUNCTION__)) | ||||
871 | (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 876, __extension__ __PRETTY_FUNCTION__)) | ||||
872 | // Copies are a mean to copy bits around, as long as we are(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 876, __extension__ __PRETTY_FUNCTION__)) | ||||
873 | // on the same register class, that's fine. Otherwise, that(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 876, __extension__ __PRETTY_FUNCTION__)) | ||||
874 | // means we need some SUBREG_TO_REG or AND & co.(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 876, __extension__ __PRETTY_FUNCTION__)) | ||||
875 | (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 876, __extension__ __PRETTY_FUNCTION__)) | ||||
876 | "Copy with different width?!")(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 876, __extension__ __PRETTY_FUNCTION__)); | ||||
877 | |||||
878 | // Check the size of the destination. | ||||
879 | assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&(static_cast <bool> ((DstSize <= 64 || DstBank.getID () == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values" ) ? void (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 880, __extension__ __PRETTY_FUNCTION__)) | ||||
880 | "GPRs cannot get more than 64-bit width values")(static_cast <bool> ((DstSize <= 64 || DstBank.getID () == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values" ) ? void (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 880, __extension__ __PRETTY_FUNCTION__)); | ||||
881 | |||||
882 | return true; | ||||
883 | } | ||||
884 | #endif | ||||
885 | |||||
886 | /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg | ||||
887 | /// to \p *To. | ||||
888 | /// | ||||
889 | /// E.g "To = COPY SrcReg:SubReg" | ||||
890 | static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, | ||||
891 | const RegisterBankInfo &RBI, Register SrcReg, | ||||
892 | const TargetRegisterClass *To, unsigned SubReg) { | ||||
893 | assert(SrcReg.isValid() && "Expected a valid source register?")(static_cast <bool> (SrcReg.isValid() && "Expected a valid source register?" ) ? void (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 893, __extension__ __PRETTY_FUNCTION__)); | ||||
894 | assert(To && "Destination register class cannot be null")(static_cast <bool> (To && "Destination register class cannot be null" ) ? void (0) : __assert_fail ("To && \"Destination register class cannot be null\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 894, __extension__ __PRETTY_FUNCTION__)); | ||||
895 | assert(SubReg && "Expected a valid subregister")(static_cast <bool> (SubReg && "Expected a valid subregister" ) ? void (0) : __assert_fail ("SubReg && \"Expected a valid subregister\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 895, __extension__ __PRETTY_FUNCTION__)); | ||||
896 | |||||
897 | MachineIRBuilder MIB(I); | ||||
898 | auto SubRegCopy = | ||||
899 | MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg); | ||||
900 | MachineOperand &RegOp = I.getOperand(1); | ||||
901 | RegOp.setReg(SubRegCopy.getReg(0)); | ||||
902 | |||||
903 | // It's possible that the destination register won't be constrained. Make | ||||
904 | // sure that happens. | ||||
905 | if (!Register::isPhysicalRegister(I.getOperand(0).getReg())) | ||||
906 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI); | ||||
907 | |||||
908 | return true; | ||||
909 | } | ||||
910 | |||||
911 | /// Helper function to get the source and destination register classes for a | ||||
912 | /// copy. Returns a std::pair containing the source register class for the | ||||
913 | /// copy, and the destination register class for the copy. If a register class | ||||
914 | /// cannot be determined, then it will be nullptr. | ||||
915 | static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> | ||||
916 | getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, | ||||
917 | MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, | ||||
918 | const RegisterBankInfo &RBI) { | ||||
919 | Register DstReg = I.getOperand(0).getReg(); | ||||
920 | Register SrcReg = I.getOperand(1).getReg(); | ||||
921 | const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); | ||||
922 | const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); | ||||
923 | unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); | ||||
924 | unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); | ||||
925 | |||||
926 | // Special casing for cross-bank copies of s1s. We can technically represent | ||||
927 | // a 1-bit value with any size of register. The minimum size for a GPR is 32 | ||||
928 | // bits. So, we need to put the FPR on 32 bits as well. | ||||
929 | // | ||||
930 | // FIXME: I'm not sure if this case holds true outside of copies. If it does, | ||||
931 | // then we can pull it into the helpers that get the appropriate class for a | ||||
932 | // register bank. Or make a new helper that carries along some constraint | ||||
933 | // information. | ||||
934 | if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) | ||||
935 | SrcSize = DstSize = 32; | ||||
936 | |||||
937 | return {getMinClassForRegBank(SrcRegBank, SrcSize, true), | ||||
938 | getMinClassForRegBank(DstRegBank, DstSize, true)}; | ||||
939 | } | ||||
940 | |||||
941 | static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, | ||||
942 | MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, | ||||
943 | const RegisterBankInfo &RBI) { | ||||
944 | Register DstReg = I.getOperand(0).getReg(); | ||||
945 | Register SrcReg = I.getOperand(1).getReg(); | ||||
946 | const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); | ||||
947 | const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); | ||||
948 | |||||
949 | // Find the correct register classes for the source and destination registers. | ||||
950 | const TargetRegisterClass *SrcRC; | ||||
951 | const TargetRegisterClass *DstRC; | ||||
952 | std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI); | ||||
953 | |||||
954 | if (!DstRC) { | ||||
955 | LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unexpected dest size " << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while (false) | ||||
956 | << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unexpected dest size " << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while (false); | ||||
957 | return false; | ||||
958 | } | ||||
959 | |||||
960 | // A couple helpers below, for making sure that the copy we produce is valid. | ||||
961 | |||||
962 | // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want | ||||
963 | // to verify that the src and dst are the same size, since that's handled by | ||||
964 | // the SUBREG_TO_REG. | ||||
965 | bool KnownValid = false; | ||||
966 | |||||
967 | // Returns true, or asserts if something we don't expect happens. Instead of | ||||
968 | // returning true, we return isValidCopy() to ensure that we verify the | ||||
969 | // result. | ||||
970 | auto CheckCopy = [&]() { | ||||
971 | // If we have a bitcast or something, we can't have physical registers. | ||||
972 | assert((I.isCopy() ||(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister (I.getOperand(0).getReg()) && !Register::isPhysicalRegister (I.getOperand(1).getReg()))) && "No phys reg on generic operator!" ) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 975, __extension__ __PRETTY_FUNCTION__)) | ||||
973 | (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister (I.getOperand(0).getReg()) && !Register::isPhysicalRegister (I.getOperand(1).getReg()))) && "No phys reg on generic operator!" ) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 975, __extension__ __PRETTY_FUNCTION__)) | ||||
974 | !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister (I.getOperand(0).getReg()) && !Register::isPhysicalRegister (I.getOperand(1).getReg()))) && "No phys reg on generic operator!" ) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 975, __extension__ __PRETTY_FUNCTION__)) | ||||
975 | "No phys reg on generic operator!")(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister (I.getOperand(0).getReg()) && !Register::isPhysicalRegister (I.getOperand(1).getReg()))) && "No phys reg on generic operator!" ) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 975, __extension__ __PRETTY_FUNCTION__)); | ||||
976 | bool ValidCopy = true; | ||||
977 | #ifndef NDEBUG | ||||
978 | ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI); | ||||
979 | assert(ValidCopy && "Invalid copy.")(static_cast <bool> (ValidCopy && "Invalid copy." ) ? void (0) : __assert_fail ("ValidCopy && \"Invalid copy.\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 979, __extension__ __PRETTY_FUNCTION__)); | ||||
980 | #endif | ||||
981 | (void)KnownValid; | ||||
982 | return ValidCopy; | ||||
983 | }; | ||||
984 | |||||
985 | // Is this a copy? If so, then we may need to insert a subregister copy. | ||||
986 | if (I.isCopy()) { | ||||
987 | // Yes. Check if there's anything to fix up. | ||||
988 | if (!SrcRC) { | ||||
989 | LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n" ; } } while (false); | ||||
990 | return false; | ||||
991 | } | ||||
992 | |||||
993 | unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); | ||||
994 | unsigned DstSize = TRI.getRegSizeInBits(*DstRC); | ||||
995 | unsigned SubReg; | ||||
996 | |||||
997 | // If the source bank doesn't support a subregister copy small enough, | ||||
998 | // then we first need to copy to the destination bank. | ||||
999 | if (getMinSizeForRegBank(SrcRegBank) > DstSize) { | ||||
1000 | const TargetRegisterClass *DstTempRC = | ||||
1001 | getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true); | ||||
1002 | getSubRegForClass(DstRC, TRI, SubReg); | ||||
1003 | |||||
1004 | MachineIRBuilder MIB(I); | ||||
1005 | auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg}); | ||||
1006 | copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg); | ||||
1007 | } else if (SrcSize > DstSize) { | ||||
1008 | // If the source register is bigger than the destination we need to | ||||
1009 | // perform a subregister copy. | ||||
1010 | const TargetRegisterClass *SubRegRC = | ||||
1011 | getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); | ||||
1012 | getSubRegForClass(SubRegRC, TRI, SubReg); | ||||
1013 | copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg); | ||||
1014 | } else if (DstSize > SrcSize) { | ||||
1015 | // If the destination register is bigger than the source we need to do | ||||
1016 | // a promotion using SUBREG_TO_REG. | ||||
1017 | const TargetRegisterClass *PromotionRC = | ||||
1018 | getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); | ||||
1019 | getSubRegForClass(SrcRC, TRI, SubReg); | ||||
1020 | |||||
1021 | Register PromoteReg = MRI.createVirtualRegister(PromotionRC); | ||||
1022 | BuildMI(*I.getParent(), I, I.getDebugLoc(), | ||||
| |||||
1023 | TII.get(AArch64::SUBREG_TO_REG), PromoteReg) | ||||
1024 | .addImm(0) | ||||
1025 | .addUse(SrcReg) | ||||
1026 | .addImm(SubReg); | ||||
1027 | MachineOperand &RegOp = I.getOperand(1); | ||||
1028 | RegOp.setReg(PromoteReg); | ||||
1029 | |||||
1030 | // Promise that the copy is implicitly validated by the SUBREG_TO_REG. | ||||
1031 | KnownValid = true; | ||||
1032 | } | ||||
1033 | |||||
1034 | // If the destination is a physical register, then there's nothing to | ||||
1035 | // change, so we're done. | ||||
1036 | if (Register::isPhysicalRegister(DstReg)) | ||||
1037 | return CheckCopy(); | ||||
1038 | } | ||||
1039 | |||||
1040 | // No need to constrain SrcReg. It will get constrained when we hit another | ||||
1041 | // of its use or its defs. Copies do not have constraints. | ||||
1042 | if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { | ||||
1043 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) << " operand\n"; } } while ( false) | ||||
1044 | << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) << " operand\n"; } } while ( false); | ||||
1045 | return false; | ||||
1046 | } | ||||
1047 | |||||
1048 | // If this a GPR ZEXT that we want to just reduce down into a copy. | ||||
1049 | // The sizes will be mismatched with the source < 32b but that's ok. | ||||
1050 | if (I.getOpcode() == TargetOpcode::G_ZEXT) { | ||||
1051 | I.setDesc(TII.get(AArch64::COPY)); | ||||
1052 | assert(SrcRegBank.getID() == AArch64::GPRRegBankID)(static_cast <bool> (SrcRegBank.getID() == AArch64::GPRRegBankID ) ? void (0) : __assert_fail ("SrcRegBank.getID() == AArch64::GPRRegBankID" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1052, __extension__ __PRETTY_FUNCTION__)); | ||||
1053 | return selectCopy(I, TII, MRI, TRI, RBI); | ||||
1054 | } | ||||
1055 | |||||
1056 | I.setDesc(TII.get(AArch64::COPY)); | ||||
1057 | return CheckCopy(); | ||||
1058 | } | ||||
1059 | |||||
1060 | static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { | ||||
1061 | if (!DstTy.isScalar() || !SrcTy.isScalar()) | ||||
1062 | return GenericOpc; | ||||
1063 | |||||
1064 | const unsigned DstSize = DstTy.getSizeInBits(); | ||||
1065 | const unsigned SrcSize = SrcTy.getSizeInBits(); | ||||
1066 | |||||
1067 | switch (DstSize) { | ||||
1068 | case 32: | ||||
1069 | switch (SrcSize) { | ||||
1070 | case 32: | ||||
1071 | switch (GenericOpc) { | ||||
1072 | case TargetOpcode::G_SITOFP: | ||||
1073 | return AArch64::SCVTFUWSri; | ||||
1074 | case TargetOpcode::G_UITOFP: | ||||
1075 | return AArch64::UCVTFUWSri; | ||||
1076 | case TargetOpcode::G_FPTOSI: | ||||
1077 | return AArch64::FCVTZSUWSr; | ||||
1078 | case TargetOpcode::G_FPTOUI: | ||||
1079 | return AArch64::FCVTZUUWSr; | ||||
1080 | default: | ||||
1081 | return GenericOpc; | ||||
1082 | } | ||||
1083 | case 64: | ||||
1084 | switch (GenericOpc) { | ||||
1085 | case TargetOpcode::G_SITOFP: | ||||
1086 | return AArch64::SCVTFUXSri; | ||||
1087 | case TargetOpcode::G_UITOFP: | ||||
1088 | return AArch64::UCVTFUXSri; | ||||
1089 | case TargetOpcode::G_FPTOSI: | ||||
1090 | return AArch64::FCVTZSUWDr; | ||||
1091 | case TargetOpcode::G_FPTOUI: | ||||
1092 | return AArch64::FCVTZUUWDr; | ||||
1093 | default: | ||||
1094 | return GenericOpc; | ||||
1095 | } | ||||
1096 | default: | ||||
1097 | return GenericOpc; | ||||
1098 | } | ||||
1099 | case 64: | ||||
1100 | switch (SrcSize) { | ||||
1101 | case 32: | ||||
1102 | switch (GenericOpc) { | ||||
1103 | case TargetOpcode::G_SITOFP: | ||||
1104 | return AArch64::SCVTFUWDri; | ||||
1105 | case TargetOpcode::G_UITOFP: | ||||
1106 | return AArch64::UCVTFUWDri; | ||||
1107 | case TargetOpcode::G_FPTOSI: | ||||
1108 | return AArch64::FCVTZSUXSr; | ||||
1109 | case TargetOpcode::G_FPTOUI: | ||||
1110 | return AArch64::FCVTZUUXSr; | ||||
1111 | default: | ||||
1112 | return GenericOpc; | ||||
1113 | } | ||||
1114 | case 64: | ||||
1115 | switch (GenericOpc) { | ||||
1116 | case TargetOpcode::G_SITOFP: | ||||
1117 | return AArch64::SCVTFUXDri; | ||||
1118 | case TargetOpcode::G_UITOFP: | ||||
1119 | return AArch64::UCVTFUXDri; | ||||
1120 | case TargetOpcode::G_FPTOSI: | ||||
1121 | return AArch64::FCVTZSUXDr; | ||||
1122 | case TargetOpcode::G_FPTOUI: | ||||
1123 | return AArch64::FCVTZUUXDr; | ||||
1124 | default: | ||||
1125 | return GenericOpc; | ||||
1126 | } | ||||
1127 | default: | ||||
1128 | return GenericOpc; | ||||
1129 | } | ||||
1130 | default: | ||||
1131 | return GenericOpc; | ||||
1132 | }; | ||||
1133 | return GenericOpc; | ||||
1134 | } | ||||
1135 | |||||
1136 | MachineInstr * | ||||
1137 | AArch64InstructionSelector::emitSelect(Register Dst, Register True, | ||||
1138 | Register False, AArch64CC::CondCode CC, | ||||
1139 | MachineIRBuilder &MIB) const { | ||||
1140 | MachineRegisterInfo &MRI = *MIB.getMRI(); | ||||
1141 | assert(RBI.getRegBank(False, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)-> getID() == RBI.getRegBank(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?") ? void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1143, __extension__ __PRETTY_FUNCTION__)) | ||||
1142 | RBI.getRegBank(True, MRI, TRI)->getID() &&(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)-> getID() == RBI.getRegBank(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?") ? void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1143, __extension__ __PRETTY_FUNCTION__)) | ||||
1143 | "Expected both select operands to have the same regbank?")(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)-> getID() == RBI.getRegBank(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?") ? void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1143, __extension__ __PRETTY_FUNCTION__)); | ||||
1144 | LLT Ty = MRI.getType(True); | ||||
1145 | if (Ty.isVector()) | ||||
1146 | return nullptr; | ||||
1147 | const unsigned Size = Ty.getSizeInBits(); | ||||
1148 | assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1149, __extension__ __PRETTY_FUNCTION__)) | ||||
1149 | "Expected 32 bit or 64 bit select only?")(static_cast <bool> ((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1149, __extension__ __PRETTY_FUNCTION__)); | ||||
1150 | const bool Is32Bit = Size == 32; | ||||
1151 | if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) { | ||||
1152 | unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr; | ||||
1153 | auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); | ||||
1154 | constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI); | ||||
1155 | return &*FCSel; | ||||
1156 | } | ||||
1157 | |||||
1158 | // By default, we'll try and emit a CSEL. | ||||
1159 | unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; | ||||
1160 | bool Optimized = false; | ||||
1161 | auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI, | ||||
1162 | &Optimized](Register &Reg, Register &OtherReg, | ||||
1163 | bool Invert) { | ||||
1164 | if (Optimized) | ||||
1165 | return false; | ||||
1166 | |||||
1167 | // Attempt to fold: | ||||
1168 | // | ||||
1169 | // %sub = G_SUB 0, %x | ||||
1170 | // %select = G_SELECT cc, %reg, %sub | ||||
1171 | // | ||||
1172 | // Into: | ||||
1173 | // %select = CSNEG %reg, %x, cc | ||||
1174 | Register MatchReg; | ||||
1175 | if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) { | ||||
1176 | Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; | ||||
1177 | Reg = MatchReg; | ||||
1178 | if (Invert) { | ||||
1179 | CC = AArch64CC::getInvertedCondCode(CC); | ||||
1180 | std::swap(Reg, OtherReg); | ||||
1181 | } | ||||
1182 | return true; | ||||
1183 | } | ||||
1184 | |||||
1185 | // Attempt to fold: | ||||
1186 | // | ||||
1187 | // %xor = G_XOR %x, -1 | ||||
1188 | // %select = G_SELECT cc, %reg, %xor | ||||
1189 | // | ||||
1190 | // Into: | ||||
1191 | // %select = CSINV %reg, %x, cc | ||||
1192 | if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) { | ||||
1193 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; | ||||
1194 | Reg = MatchReg; | ||||
1195 | if (Invert) { | ||||
1196 | CC = AArch64CC::getInvertedCondCode(CC); | ||||
1197 | std::swap(Reg, OtherReg); | ||||
1198 | } | ||||
1199 | return true; | ||||
1200 | } | ||||
1201 | |||||
1202 | // Attempt to fold: | ||||
1203 | // | ||||
1204 | // %add = G_ADD %x, 1 | ||||
1205 | // %select = G_SELECT cc, %reg, %add | ||||
1206 | // | ||||
1207 | // Into: | ||||
1208 | // %select = CSINC %reg, %x, cc | ||||
1209 | if (mi_match(Reg, MRI, | ||||
1210 | m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)), | ||||
1211 | m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) { | ||||
1212 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; | ||||
1213 | Reg = MatchReg; | ||||
1214 | if (Invert) { | ||||
1215 | CC = AArch64CC::getInvertedCondCode(CC); | ||||
1216 | std::swap(Reg, OtherReg); | ||||
1217 | } | ||||
1218 | return true; | ||||
1219 | } | ||||
1220 | |||||
1221 | return false; | ||||
1222 | }; | ||||
1223 | |||||
1224 | // Helper lambda which tries to use CSINC/CSINV for the instruction when its | ||||
1225 | // true/false values are constants. | ||||
1226 | // FIXME: All of these patterns already exist in tablegen. We should be | ||||
1227 | // able to import these. | ||||
1228 | auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI, | ||||
1229 | &Optimized]() { | ||||
1230 | if (Optimized) | ||||
1231 | return false; | ||||
1232 | auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI); | ||||
1233 | auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI); | ||||
1234 | if (!TrueCst && !FalseCst) | ||||
1235 | return false; | ||||
1236 | |||||
1237 | Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; | ||||
1238 | if (TrueCst && FalseCst) { | ||||
1239 | int64_t T = TrueCst->Value.getSExtValue(); | ||||
1240 | int64_t F = FalseCst->Value.getSExtValue(); | ||||
1241 | |||||
1242 | if (T == 0 && F == 1) { | ||||
1243 | // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc | ||||
1244 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; | ||||
1245 | True = ZReg; | ||||
1246 | False = ZReg; | ||||
1247 | return true; | ||||
1248 | } | ||||
1249 | |||||
1250 | if (T == 0 && F == -1) { | ||||
1251 | // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc | ||||
1252 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; | ||||
1253 | True = ZReg; | ||||
1254 | False = ZReg; | ||||
1255 | return true; | ||||
1256 | } | ||||
1257 | } | ||||
1258 | |||||
1259 | if (TrueCst) { | ||||
1260 | int64_t T = TrueCst->Value.getSExtValue(); | ||||
1261 | if (T == 1) { | ||||
1262 | // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc | ||||
1263 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; | ||||
1264 | True = False; | ||||
1265 | False = ZReg; | ||||
1266 | CC = AArch64CC::getInvertedCondCode(CC); | ||||
1267 | return true; | ||||
1268 | } | ||||
1269 | |||||
1270 | if (T == -1) { | ||||
1271 | // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc | ||||
1272 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; | ||||
1273 | True = False; | ||||
1274 | False = ZReg; | ||||
1275 | CC = AArch64CC::getInvertedCondCode(CC); | ||||
1276 | return true; | ||||
1277 | } | ||||
1278 | } | ||||
1279 | |||||
1280 | if (FalseCst) { | ||||
1281 | int64_t F = FalseCst->Value.getSExtValue(); | ||||
1282 | if (F == 1) { | ||||
1283 | // G_SELECT cc, t, 1 -> CSINC t, zreg, cc | ||||
1284 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; | ||||
1285 | False = ZReg; | ||||
1286 | return true; | ||||
1287 | } | ||||
1288 | |||||
1289 | if (F == -1) { | ||||
1290 | // G_SELECT cc, t, -1 -> CSINC t, zreg, cc | ||||
1291 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; | ||||
1292 | False = ZReg; | ||||
1293 | return true; | ||||
1294 | } | ||||
1295 | } | ||||
1296 | return false; | ||||
1297 | }; | ||||
1298 | |||||
1299 | Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false); | ||||
1300 | Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true); | ||||
1301 | Optimized |= TryOptSelectCst(); | ||||
1302 | auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); | ||||
1303 | constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI); | ||||
1304 | return &*SelectInst; | ||||
1305 | } | ||||
1306 | |||||
1307 | static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { | ||||
1308 | switch (P) { | ||||
1309 | default: | ||||
1310 | llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1310); | ||||
1311 | case CmpInst::ICMP_NE: | ||||
1312 | return AArch64CC::NE; | ||||
1313 | case CmpInst::ICMP_EQ: | ||||
1314 | return AArch64CC::EQ; | ||||
1315 | case CmpInst::ICMP_SGT: | ||||
1316 | return AArch64CC::GT; | ||||
1317 | case CmpInst::ICMP_SGE: | ||||
1318 | return AArch64CC::GE; | ||||
1319 | case CmpInst::ICMP_SLT: | ||||
1320 | return AArch64CC::LT; | ||||
1321 | case CmpInst::ICMP_SLE: | ||||
1322 | return AArch64CC::LE; | ||||
1323 | case CmpInst::ICMP_UGT: | ||||
1324 | return AArch64CC::HI; | ||||
1325 | case CmpInst::ICMP_UGE: | ||||
1326 | return AArch64CC::HS; | ||||
1327 | case CmpInst::ICMP_ULT: | ||||
1328 | return AArch64CC::LO; | ||||
1329 | case CmpInst::ICMP_ULE: | ||||
1330 | return AArch64CC::LS; | ||||
1331 | } | ||||
1332 | } | ||||
1333 | |||||
1334 | /// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC. | ||||
1335 | static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, | ||||
1336 | AArch64CC::CondCode &CondCode, | ||||
1337 | AArch64CC::CondCode &CondCode2) { | ||||
1338 | CondCode2 = AArch64CC::AL; | ||||
1339 | switch (CC) { | ||||
1340 | default: | ||||
1341 | llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1341); | ||||
1342 | case CmpInst::FCMP_OEQ: | ||||
1343 | CondCode = AArch64CC::EQ; | ||||
1344 | break; | ||||
1345 | case CmpInst::FCMP_OGT: | ||||
1346 | CondCode = AArch64CC::GT; | ||||
1347 | break; | ||||
1348 | case CmpInst::FCMP_OGE: | ||||
1349 | CondCode = AArch64CC::GE; | ||||
1350 | break; | ||||
1351 | case CmpInst::FCMP_OLT: | ||||
1352 | CondCode = AArch64CC::MI; | ||||
1353 | break; | ||||
1354 | case CmpInst::FCMP_OLE: | ||||
1355 | CondCode = AArch64CC::LS; | ||||
1356 | break; | ||||
1357 | case CmpInst::FCMP_ONE: | ||||
1358 | CondCode = AArch64CC::MI; | ||||
1359 | CondCode2 = AArch64CC::GT; | ||||
1360 | break; | ||||
1361 | case CmpInst::FCMP_ORD: | ||||
1362 | CondCode = AArch64CC::VC; | ||||
1363 | break; | ||||
1364 | case CmpInst::FCMP_UNO: | ||||
1365 | CondCode = AArch64CC::VS; | ||||
1366 | break; | ||||
1367 | case CmpInst::FCMP_UEQ: | ||||
1368 | CondCode = AArch64CC::EQ; | ||||
1369 | CondCode2 = AArch64CC::VS; | ||||
1370 | break; | ||||
1371 | case CmpInst::FCMP_UGT: | ||||
1372 | CondCode = AArch64CC::HI; | ||||
1373 | break; | ||||
1374 | case CmpInst::FCMP_UGE: | ||||
1375 | CondCode = AArch64CC::PL; | ||||
1376 | break; | ||||
1377 | case CmpInst::FCMP_ULT: | ||||
1378 | CondCode = AArch64CC::LT; | ||||
1379 | break; | ||||
1380 | case CmpInst::FCMP_ULE: | ||||
1381 | CondCode = AArch64CC::LE; | ||||
1382 | break; | ||||
1383 | case CmpInst::FCMP_UNE: | ||||
1384 | CondCode = AArch64CC::NE; | ||||
1385 | break; | ||||
1386 | } | ||||
1387 | } | ||||
1388 | |||||
1389 | /// Convert an IR fp condition code to an AArch64 CC. | ||||
1390 | /// This differs from changeFPCCToAArch64CC in that it returns cond codes that | ||||
1391 | /// should be AND'ed instead of OR'ed. | ||||
1392 | static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, | ||||
1393 | AArch64CC::CondCode &CondCode, | ||||
1394 | AArch64CC::CondCode &CondCode2) { | ||||
1395 | CondCode2 = AArch64CC::AL; | ||||
1396 | switch (CC) { | ||||
1397 | default: | ||||
1398 | changeFPCCToORAArch64CC(CC, CondCode, CondCode2); | ||||
1399 | assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void (0) : __assert_fail ("CondCode2 == AArch64CC::AL", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1399, __extension__ __PRETTY_FUNCTION__)); | ||||
1400 | break; | ||||
1401 | case CmpInst::FCMP_ONE: | ||||
1402 | // (a one b) | ||||
1403 | // == ((a olt b) || (a ogt b)) | ||||
1404 | // == ((a ord b) && (a une b)) | ||||
1405 | CondCode = AArch64CC::VC; | ||||
1406 | CondCode2 = AArch64CC::NE; | ||||
1407 | break; | ||||
1408 | case CmpInst::FCMP_UEQ: | ||||
1409 | // (a ueq b) | ||||
1410 | // == ((a uno b) || (a oeq b)) | ||||
1411 | // == ((a ule b) && (a uge b)) | ||||
1412 | CondCode = AArch64CC::PL; | ||||
1413 | CondCode2 = AArch64CC::LE; | ||||
1414 | break; | ||||
1415 | } | ||||
1416 | } | ||||
1417 | |||||
1418 | /// Return a register which can be used as a bit to test in a TB(N)Z. | ||||
1419 | static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, | ||||
1420 | MachineRegisterInfo &MRI) { | ||||
1421 | assert(Reg.isValid() && "Expected valid register!")(static_cast <bool> (Reg.isValid() && "Expected valid register!" ) ? void (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1421, __extension__ __PRETTY_FUNCTION__)); | ||||
1422 | bool HasZext = false; | ||||
1423 | while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) { | ||||
1424 | unsigned Opc = MI->getOpcode(); | ||||
1425 | |||||
1426 | if (!MI->getOperand(0).isReg() || | ||||
1427 | !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) | ||||
1428 | break; | ||||
1429 | |||||
1430 | // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. | ||||
1431 | // | ||||
1432 | // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number | ||||
1433 | // on the truncated x is the same as the bit number on x. | ||||
1434 | if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT || | ||||
1435 | Opc == TargetOpcode::G_TRUNC) { | ||||
1436 | if (Opc == TargetOpcode::G_ZEXT) | ||||
1437 | HasZext = true; | ||||
1438 | |||||
1439 | Register NextReg = MI->getOperand(1).getReg(); | ||||
1440 | // Did we find something worth folding? | ||||
1441 | if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg)) | ||||
1442 | break; | ||||
1443 | |||||
1444 | // NextReg is worth folding. Keep looking. | ||||
1445 | Reg = NextReg; | ||||
1446 | continue; | ||||
1447 | } | ||||
1448 | |||||
1449 | // Attempt to find a suitable operation with a constant on one side. | ||||
1450 | Optional<uint64_t> C; | ||||
1451 | Register TestReg; | ||||
1452 | switch (Opc) { | ||||
1453 | default: | ||||
1454 | break; | ||||
1455 | case TargetOpcode::G_AND: | ||||
1456 | case TargetOpcode::G_XOR: { | ||||
1457 | TestReg = MI->getOperand(1).getReg(); | ||||
1458 | Register ConstantReg = MI->getOperand(2).getReg(); | ||||
1459 | auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI); | ||||
1460 | if (!VRegAndVal) { | ||||
1461 | // AND commutes, check the other side for a constant. | ||||
1462 | // FIXME: Can we canonicalize the constant so that it's always on the | ||||
1463 | // same side at some point earlier? | ||||
1464 | std::swap(ConstantReg, TestReg); | ||||
1465 | VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI); | ||||
1466 | } | ||||
1467 | if (VRegAndVal) { | ||||
1468 | if (HasZext) | ||||
1469 | C = VRegAndVal->Value.getZExtValue(); | ||||
1470 | else | ||||
1471 | C = VRegAndVal->Value.getSExtValue(); | ||||
1472 | } | ||||
1473 | break; | ||||
1474 | } | ||||
1475 | case TargetOpcode::G_ASHR: | ||||
1476 | case TargetOpcode::G_LSHR: | ||||
1477 | case TargetOpcode::G_SHL: { | ||||
1478 | TestReg = MI->getOperand(1).getReg(); | ||||
1479 | auto VRegAndVal = | ||||
1480 | getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); | ||||
1481 | if (VRegAndVal) | ||||
1482 | C = VRegAndVal->Value.getSExtValue(); | ||||
1483 | break; | ||||
1484 | } | ||||
1485 | } | ||||
1486 | |||||
1487 | // Didn't find a constant or viable register. Bail out of the loop. | ||||
1488 | if (!C || !TestReg.isValid()) | ||||
1489 | break; | ||||
1490 | |||||
1491 | // We found a suitable instruction with a constant. Check to see if we can | ||||
1492 | // walk through the instruction. | ||||
1493 | Register NextReg; | ||||
1494 | unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits(); | ||||
1495 | switch (Opc) { | ||||
1496 | default: | ||||
1497 | break; | ||||
1498 | case TargetOpcode::G_AND: | ||||
1499 | // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set. | ||||
1500 | if ((*C >> Bit) & 1) | ||||
1501 | NextReg = TestReg; | ||||
1502 | break; | ||||
1503 | case TargetOpcode::G_SHL: | ||||
1504 | // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in | ||||
1505 | // the type of the register. | ||||
1506 | if (*C <= Bit && (Bit - *C) < TestRegSize) { | ||||
1507 | NextReg = TestReg; | ||||
1508 | Bit = Bit - *C; | ||||
1509 | } | ||||
1510 | break; | ||||
1511 | case TargetOpcode::G_ASHR: | ||||
1512 | // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits | ||||
1513 | // in x | ||||
1514 | NextReg = TestReg; | ||||
1515 | Bit = Bit + *C; | ||||
1516 | if (Bit >= TestRegSize) | ||||
1517 | Bit = TestRegSize - 1; | ||||
1518 | break; | ||||
1519 | case TargetOpcode::G_LSHR: | ||||
1520 | // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x | ||||
1521 | if ((Bit + *C) < TestRegSize) { | ||||
1522 | NextReg = TestReg; | ||||
1523 | Bit = Bit + *C; | ||||
1524 | } | ||||
1525 | break; | ||||
1526 | case TargetOpcode::G_XOR: | ||||
1527 | // We can walk through a G_XOR by inverting whether we use tbz/tbnz when | ||||
1528 | // appropriate. | ||||
1529 | // | ||||
1530 | // e.g. If x' = xor x, c, and the b-th bit is set in c then | ||||
1531 | // | ||||
1532 | // tbz x', b -> tbnz x, b | ||||
1533 | // | ||||
1534 | // Because x' only has the b-th bit set if x does not. | ||||
1535 | if ((*C >> Bit) & 1) | ||||
1536 | Invert = !Invert; | ||||
1537 | NextReg = TestReg; | ||||
1538 | break; | ||||
1539 | } | ||||
1540 | |||||
1541 | // Check if we found anything worth folding. | ||||
1542 | if (!NextReg.isValid()) | ||||
1543 | return Reg; | ||||
1544 | Reg = NextReg; | ||||
1545 | } | ||||
1546 | |||||
1547 | return Reg; | ||||
1548 | } | ||||
1549 | |||||
1550 | MachineInstr *AArch64InstructionSelector::emitTestBit( | ||||
1551 | Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB, | ||||
1552 | MachineIRBuilder &MIB) const { | ||||
1553 | assert(TestReg.isValid())(static_cast <bool> (TestReg.isValid()) ? void (0) : __assert_fail ("TestReg.isValid()", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1553, __extension__ __PRETTY_FUNCTION__)); | ||||
1554 | assert(ProduceNonFlagSettingCondBr &&(static_cast <bool> (ProduceNonFlagSettingCondBr && "Cannot emit TB(N)Z with speculation tracking!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1555, __extension__ __PRETTY_FUNCTION__)) | ||||
1555 | "Cannot emit TB(N)Z with speculation tracking!")(static_cast <bool> (ProduceNonFlagSettingCondBr && "Cannot emit TB(N)Z with speculation tracking!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1555, __extension__ __PRETTY_FUNCTION__)); | ||||
1556 | MachineRegisterInfo &MRI = *MIB.getMRI(); | ||||
1557 | |||||
1558 | // Attempt to optimize the test bit by walking over instructions. | ||||
1559 | TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI); | ||||
1560 | LLT Ty = MRI.getType(TestReg); | ||||
1561 | unsigned Size = Ty.getSizeInBits(); | ||||
1562 | assert(!Ty.isVector() && "Expected a scalar!")(static_cast <bool> (!Ty.isVector() && "Expected a scalar!" ) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1562, __extension__ __PRETTY_FUNCTION__)); | ||||
1563 | assert(Bit < 64 && "Bit is too large!")(static_cast <bool> (Bit < 64 && "Bit is too large!" ) ? void (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1563, __extension__ __PRETTY_FUNCTION__)); | ||||
1564 | |||||
1565 | // When the test register is a 64-bit register, we have to narrow to make | ||||
1566 | // TBNZW work. | ||||
1567 | bool UseWReg = Bit < 32; | ||||
1568 | unsigned NecessarySize = UseWReg ? 32 : 64; | ||||
1569 | if (Size != NecessarySize) | ||||
1570 | TestReg = moveScalarRegClass( | ||||
1571 | TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass, | ||||
1572 | MIB); | ||||
1573 | |||||
1574 | static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX}, | ||||
1575 | {AArch64::TBZW, AArch64::TBNZW}}; | ||||
1576 | unsigned Opc = OpcTable[UseWReg][IsNegative]; | ||||
1577 | auto TestBitMI = | ||||
1578 | MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB); | ||||
1579 | constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI); | ||||
1580 | return &*TestBitMI; | ||||
1581 | } | ||||
1582 | |||||
1583 | bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( | ||||
1584 | MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB, | ||||
1585 | MachineIRBuilder &MIB) const { | ||||
1586 | assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")(static_cast <bool> (AndInst.getOpcode() == TargetOpcode ::G_AND && "Expected G_AND only?") ? void (0) : __assert_fail ("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1586, __extension__ __PRETTY_FUNCTION__)); | ||||
1587 | // Given something like this: | ||||
1588 | // | ||||
1589 | // %x = ...Something... | ||||
1590 | // %one = G_CONSTANT i64 1 | ||||
1591 | // %zero = G_CONSTANT i64 0 | ||||
1592 | // %and = G_AND %x, %one | ||||
1593 | // %cmp = G_ICMP intpred(ne), %and, %zero | ||||
1594 | // %cmp_trunc = G_TRUNC %cmp | ||||
1595 | // G_BRCOND %cmp_trunc, %bb.3 | ||||
1596 | // | ||||
1597 | // We want to try and fold the AND into the G_BRCOND and produce either a | ||||
1598 | // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)). | ||||
1599 | // | ||||
1600 | // In this case, we'd get | ||||
1601 | // | ||||
1602 | // TBNZ %x %bb.3 | ||||
1603 | // | ||||
1604 | |||||
1605 | // Check if the AND has a constant on its RHS which we can use as a mask. | ||||
1606 | // If it's a power of 2, then it's the same as checking a specific bit. | ||||
1607 | // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set) | ||||
1608 | auto MaybeBit = getIConstantVRegValWithLookThrough( | ||||
1609 | AndInst.getOperand(2).getReg(), *MIB.getMRI()); | ||||
1610 | if (!MaybeBit) | ||||
1611 | return false; | ||||
1612 | |||||
1613 | int32_t Bit = MaybeBit->Value.exactLogBase2(); | ||||
1614 | if (Bit < 0) | ||||
1615 | return false; | ||||
1616 | |||||
1617 | Register TestReg = AndInst.getOperand(1).getReg(); | ||||
1618 | |||||
1619 | // Emit a TB(N)Z. | ||||
1620 | emitTestBit(TestReg, Bit, Invert, DstMBB, MIB); | ||||
1621 | return true; | ||||
1622 | } | ||||
1623 | |||||
1624 | MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg, | ||||
1625 | bool IsNegative, | ||||
1626 | MachineBasicBlock *DestMBB, | ||||
1627 | MachineIRBuilder &MIB) const { | ||||
1628 | assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")(static_cast <bool> (ProduceNonFlagSettingCondBr && "CBZ does not set flags!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1628, __extension__ __PRETTY_FUNCTION__)); | ||||
1629 | MachineRegisterInfo &MRI = *MIB.getMRI(); | ||||
1630 | assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI )->getID() == AArch64::GPRRegBankID && "Expected GPRs only?" ) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1632, __extension__ __PRETTY_FUNCTION__)) | ||||
1631 | AArch64::GPRRegBankID &&(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI )->getID() == AArch64::GPRRegBankID && "Expected GPRs only?" ) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1632, __extension__ __PRETTY_FUNCTION__)) | ||||
1632 | "Expected GPRs only?")(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI )->getID() == AArch64::GPRRegBankID && "Expected GPRs only?" ) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1632, __extension__ __PRETTY_FUNCTION__)); | ||||
1633 | auto Ty = MRI.getType(CompareReg); | ||||
1634 | unsigned Width = Ty.getSizeInBits(); | ||||
1635 | assert(!Ty.isVector() && "Expected scalar only?")(static_cast <bool> (!Ty.isVector() && "Expected scalar only?" ) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1635, __extension__ __PRETTY_FUNCTION__)); | ||||
1636 | assert(Width <= 64 && "Expected width to be at most 64?")(static_cast <bool> (Width <= 64 && "Expected width to be at most 64?" ) ? void (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1636, __extension__ __PRETTY_FUNCTION__)); | ||||
1637 | static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX}, | ||||
1638 | {AArch64::CBNZW, AArch64::CBNZX}}; | ||||
1639 | unsigned Opc = OpcTable[IsNegative][Width == 64]; | ||||
1640 | auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB); | ||||
1641 | constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI); | ||||
1642 | return &*BranchMI; | ||||
1643 | } | ||||
1644 | |||||
1645 | bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( | ||||
1646 | MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const { | ||||
1647 | assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)(static_cast <bool> (FCmp.getOpcode() == TargetOpcode:: G_FCMP) ? void (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1647, __extension__ __PRETTY_FUNCTION__)); | ||||
1648 | assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1648, __extension__ __PRETTY_FUNCTION__)); | ||||
1649 | // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't | ||||
1650 | // totally clean. Some of them require two branches to implement. | ||||
1651 | auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate(); | ||||
1652 | emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB, | ||||
1653 | Pred); | ||||
1654 | AArch64CC::CondCode CC1, CC2; | ||||
1655 | changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2); | ||||
1656 | MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); | ||||
1657 | MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); | ||||
1658 | if (CC2 != AArch64CC::AL) | ||||
1659 | MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); | ||||
1660 | I.eraseFromParent(); | ||||
1661 | return true; | ||||
1662 | } | ||||
1663 | |||||
1664 | bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( | ||||
1665 | MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { | ||||
1666 | assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode:: G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1666, __extension__ __PRETTY_FUNCTION__)); | ||||
1667 | assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1667, __extension__ __PRETTY_FUNCTION__)); | ||||
1668 | // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z. | ||||
1669 | // | ||||
1670 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z | ||||
1671 | // instructions will not be produced, as they are conditional branch | ||||
1672 | // instructions that do not set flags. | ||||
1673 | if (!ProduceNonFlagSettingCondBr) | ||||
1674 | return false; | ||||
1675 | |||||
1676 | MachineRegisterInfo &MRI = *MIB.getMRI(); | ||||
1677 | MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); | ||||
1678 | auto Pred = | ||||
1679 | static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate()); | ||||
1680 | Register LHS = ICmp.getOperand(2).getReg(); | ||||
1681 | Register RHS = ICmp.getOperand(3).getReg(); | ||||
1682 | |||||
1683 | // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that. | ||||
1684 | auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI); | ||||
1685 | MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); | ||||
1686 | |||||
1687 | // When we can emit a TB(N)Z, prefer that. | ||||
1688 | // | ||||
1689 | // Handle non-commutative condition codes first. | ||||
1690 | // Note that we don't want to do this when we have a G_AND because it can | ||||
1691 | // become a tst. The tst will make the test bit in the TB(N)Z redundant. | ||||
1692 | if (VRegAndVal && !AndInst) { | ||||
1693 | int64_t C = VRegAndVal->Value.getSExtValue(); | ||||
1694 | |||||
1695 | // When we have a greater-than comparison, we can just test if the msb is | ||||
1696 | // zero. | ||||
1697 | if (C == -1 && Pred == CmpInst::ICMP_SGT) { | ||||
1698 | uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; | ||||
1699 | emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB); | ||||
1700 | I.eraseFromParent(); | ||||
1701 | return true; | ||||
1702 | } | ||||
1703 | |||||
1704 | // When we have a less than comparison, we can just test if the msb is not | ||||
1705 | // zero. | ||||
1706 | if (C == 0 && Pred == CmpInst::ICMP_SLT) { | ||||
1707 | uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; | ||||
1708 | emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB); | ||||
1709 | I.eraseFromParent(); | ||||
1710 | return true; | ||||
1711 | } | ||||
1712 | } | ||||
1713 | |||||
1714 | // Attempt to handle commutative condition codes. Right now, that's only | ||||
1715 | // eq/ne. | ||||
1716 | if (ICmpInst::isEquality(Pred)) { | ||||
1717 | if (!VRegAndVal) { | ||||
1718 | std::swap(RHS, LHS); | ||||
1719 | VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI); | ||||
1720 | AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); | ||||
1721 | } | ||||
1722 | |||||
1723 | if (VRegAndVal && VRegAndVal->Value == 0) { | ||||
1724 | // If there's a G_AND feeding into this branch, try to fold it away by | ||||
1725 | // emitting a TB(N)Z instead. | ||||
1726 | // | ||||
1727 | // Note: If we have LT, then it *is* possible to fold, but it wouldn't be | ||||
1728 | // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding | ||||
1729 | // would be redundant. | ||||
1730 | if (AndInst && | ||||
1731 | tryOptAndIntoCompareBranch( | ||||
1732 | *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) { | ||||
1733 | I.eraseFromParent(); | ||||
1734 | return true; | ||||
1735 | } | ||||
1736 | |||||
1737 | // Otherwise, try to emit a CB(N)Z instead. | ||||
1738 | auto LHSTy = MRI.getType(LHS); | ||||
1739 | if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) { | ||||
1740 | emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB); | ||||
1741 | I.eraseFromParent(); | ||||
1742 | return true; | ||||
1743 | } | ||||
1744 | } | ||||
1745 | } | ||||
1746 | |||||
1747 | return false; | ||||
1748 | } | ||||
1749 | |||||
1750 | bool AArch64InstructionSelector::selectCompareBranchFedByICmp( | ||||
1751 | MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { | ||||
1752 | assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode:: G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1752, __extension__ __PRETTY_FUNCTION__)); | ||||
1753 | assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1753, __extension__ __PRETTY_FUNCTION__)); | ||||
1754 | if (tryOptCompareBranchFedByICmp(I, ICmp, MIB)) | ||||
1755 | return true; | ||||
1756 | |||||
1757 | // Couldn't optimize. Emit a compare + a Bcc. | ||||
1758 | MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); | ||||
1759 | auto PredOp = ICmp.getOperand(1); | ||||
1760 | emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB); | ||||
1761 | const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( | ||||
1762 | static_cast<CmpInst::Predicate>(PredOp.getPredicate())); | ||||
1763 | MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); | ||||
1764 | I.eraseFromParent(); | ||||
1765 | return true; | ||||
1766 | } | ||||
1767 | |||||
1768 | bool AArch64InstructionSelector::selectCompareBranch( | ||||
1769 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) { | ||||
1770 | Register CondReg = I.getOperand(0).getReg(); | ||||
1771 | MachineInstr *CCMI = MRI.getVRegDef(CondReg); | ||||
1772 | if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) { | ||||
1773 | CondReg = CCMI->getOperand(1).getReg(); | ||||
1774 | CCMI = MRI.getVRegDef(CondReg); | ||||
1775 | } | ||||
1776 | |||||
1777 | // Try to select the G_BRCOND using whatever is feeding the condition if | ||||
1778 | // possible. | ||||
1779 | unsigned CCMIOpc = CCMI->getOpcode(); | ||||
1780 | if (CCMIOpc == TargetOpcode::G_FCMP) | ||||
1781 | return selectCompareBranchFedByFCmp(I, *CCMI, MIB); | ||||
1782 | if (CCMIOpc == TargetOpcode::G_ICMP) | ||||
1783 | return selectCompareBranchFedByICmp(I, *CCMI, MIB); | ||||
1784 | |||||
1785 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z | ||||
1786 | // instructions will not be produced, as they are conditional branch | ||||
1787 | // instructions that do not set flags. | ||||
1788 | if (ProduceNonFlagSettingCondBr) { | ||||
1789 | emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true, | ||||
1790 | I.getOperand(1).getMBB(), MIB); | ||||
1791 | I.eraseFromParent(); | ||||
1792 | return true; | ||||
1793 | } | ||||
1794 | |||||
1795 | // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead. | ||||
1796 | auto TstMI = | ||||
1797 | MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1); | ||||
1798 | constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); | ||||
1799 | auto Bcc = MIB.buildInstr(AArch64::Bcc) | ||||
1800 | .addImm(AArch64CC::EQ) | ||||
1801 | .addMBB(I.getOperand(1).getMBB()); | ||||
1802 | I.eraseFromParent(); | ||||
1803 | return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI); | ||||
1804 | } | ||||
1805 | |||||
1806 | /// Returns the element immediate value of a vector shift operand if found. | ||||
1807 | /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR. | ||||
1808 | static Optional<int64_t> getVectorShiftImm(Register Reg, | ||||
1809 | MachineRegisterInfo &MRI) { | ||||
1810 | assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")(static_cast <bool> (MRI.getType(Reg).isVector() && "Expected a *vector* shift operand") ? void (0) : __assert_fail ("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1810, __extension__ __PRETTY_FUNCTION__)); | ||||
1811 | MachineInstr *OpMI = MRI.getVRegDef(Reg); | ||||
1812 | assert(OpMI && "Expected to find a vreg def for vector shift operand")(static_cast <bool> (OpMI && "Expected to find a vreg def for vector shift operand" ) ? void (0) : __assert_fail ("OpMI && \"Expected to find a vreg def for vector shift operand\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1812, __extension__ __PRETTY_FUNCTION__)); | ||||
1813 | return getAArch64VectorSplatScalar(*OpMI, MRI); | ||||
1814 | } | ||||
1815 | |||||
1816 | /// Matches and returns the shift immediate value for a SHL instruction given | ||||
1817 | /// a shift operand. | ||||
1818 | static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) { | ||||
1819 | Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI); | ||||
1820 | if (!ShiftImm) | ||||
1821 | return None; | ||||
1822 | // Check the immediate is in range for a SHL. | ||||
1823 | int64_t Imm = *ShiftImm; | ||||
1824 | if (Imm < 0) | ||||
1825 | return None; | ||||
1826 | switch (SrcTy.getElementType().getSizeInBits()) { | ||||
1827 | default: | ||||
1828 | LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift" ; } } while (false); | ||||
1829 | return None; | ||||
1830 | case 8: | ||||
1831 | if (Imm > 7) | ||||
1832 | return None; | ||||
1833 | break; | ||||
1834 | case 16: | ||||
1835 | if (Imm > 15) | ||||
1836 | return None; | ||||
1837 | break; | ||||
1838 | case 32: | ||||
1839 | if (Imm > 31) | ||||
1840 | return None; | ||||
1841 | break; | ||||
1842 | case 64: | ||||
1843 | if (Imm > 63) | ||||
1844 | return None; | ||||
1845 | break; | ||||
1846 | } | ||||
1847 | return Imm; | ||||
1848 | } | ||||
1849 | |||||
1850 | bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I, | ||||
1851 | MachineRegisterInfo &MRI) { | ||||
1852 | assert(I.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1852, __extension__ __PRETTY_FUNCTION__)); | ||||
1853 | Register DstReg = I.getOperand(0).getReg(); | ||||
1854 | const LLT Ty = MRI.getType(DstReg); | ||||
1855 | Register Src1Reg = I.getOperand(1).getReg(); | ||||
1856 | Register Src2Reg = I.getOperand(2).getReg(); | ||||
1857 | |||||
1858 | if (!Ty.isVector()) | ||||
1859 | return false; | ||||
1860 | |||||
1861 | // Check if we have a vector of constants on RHS that we can select as the | ||||
1862 | // immediate form. | ||||
1863 | Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI); | ||||
1864 | |||||
1865 | unsigned Opc = 0; | ||||
1866 | if (Ty == LLT::fixed_vector(2, 64)) { | ||||
1867 | Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64; | ||||
1868 | } else if (Ty == LLT::fixed_vector(4, 32)) { | ||||
1869 | Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32; | ||||
1870 | } else if (Ty == LLT::fixed_vector(2, 32)) { | ||||
1871 | Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32; | ||||
1872 | } else if (Ty == LLT::fixed_vector(4, 16)) { | ||||
1873 | Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16; | ||||
1874 | } else if (Ty == LLT::fixed_vector(8, 16)) { | ||||
1875 | Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; | ||||
1876 | } else if (Ty == LLT::fixed_vector(16, 8)) { | ||||
1877 | Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; | ||||
1878 | } else if (Ty == LLT::fixed_vector(8, 8)) { | ||||
1879 | Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; | ||||
1880 | } else { | ||||
1881 | LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; } } while (false); | ||||
1882 | return false; | ||||
1883 | } | ||||
1884 | |||||
1885 | auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg}); | ||||
1886 | if (ImmVal) | ||||
1887 | Shl.addImm(*ImmVal); | ||||
1888 | else | ||||
1889 | Shl.addUse(Src2Reg); | ||||
1890 | constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI); | ||||
1891 | I.eraseFromParent(); | ||||
1892 | return true; | ||||
1893 | } | ||||
1894 | |||||
1895 | bool AArch64InstructionSelector::selectVectorAshrLshr( | ||||
1896 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||
1897 | assert(I.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1898, __extension__ __PRETTY_FUNCTION__)) | ||||
1898 | I.getOpcode() == TargetOpcode::G_LSHR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1898, __extension__ __PRETTY_FUNCTION__)); | ||||
1899 | Register DstReg = I.getOperand(0).getReg(); | ||||
1900 | const LLT Ty = MRI.getType(DstReg); | ||||
1901 | Register Src1Reg = I.getOperand(1).getReg(); | ||||
1902 | Register Src2Reg = I.getOperand(2).getReg(); | ||||
1903 | |||||
1904 | if (!Ty.isVector()) | ||||
1905 | return false; | ||||
1906 | |||||
1907 | bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR; | ||||
1908 | |||||
1909 | // We expect the immediate case to be lowered in the PostLegalCombiner to | ||||
1910 | // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents. | ||||
1911 | |||||
1912 | // There is not a shift right register instruction, but the shift left | ||||
1913 | // register instruction takes a signed value, where negative numbers specify a | ||||
1914 | // right shift. | ||||
1915 | |||||
1916 | unsigned Opc = 0; | ||||
1917 | unsigned NegOpc = 0; | ||||
1918 | const TargetRegisterClass *RC = | ||||
1919 | getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI); | ||||
1920 | if (Ty == LLT::fixed_vector(2, 64)) { | ||||
1921 | Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64; | ||||
1922 | NegOpc = AArch64::NEGv2i64; | ||||
1923 | } else if (Ty == LLT::fixed_vector(4, 32)) { | ||||
1924 | Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32; | ||||
1925 | NegOpc = AArch64::NEGv4i32; | ||||
1926 | } else if (Ty == LLT::fixed_vector(2, 32)) { | ||||
1927 | Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32; | ||||
1928 | NegOpc = AArch64::NEGv2i32; | ||||
1929 | } else if (Ty == LLT::fixed_vector(4, 16)) { | ||||
1930 | Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16; | ||||
1931 | NegOpc = AArch64::NEGv4i16; | ||||
1932 | } else if (Ty == LLT::fixed_vector(8, 16)) { | ||||
1933 | Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16; | ||||
1934 | NegOpc = AArch64::NEGv8i16; | ||||
1935 | } else if (Ty == LLT::fixed_vector(16, 8)) { | ||||
1936 | Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; | ||||
1937 | NegOpc = AArch64::NEGv16i8; | ||||
1938 | } else if (Ty == LLT::fixed_vector(8, 8)) { | ||||
1939 | Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; | ||||
1940 | NegOpc = AArch64::NEGv8i8; | ||||
1941 | } else { | ||||
1942 | LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; } } while (false); | ||||
1943 | return false; | ||||
1944 | } | ||||
1945 | |||||
1946 | auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg}); | ||||
1947 | constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI); | ||||
1948 | auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg}); | ||||
1949 | constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI); | ||||
1950 | I.eraseFromParent(); | ||||
1951 | return true; | ||||
1952 | } | ||||
1953 | |||||
1954 | bool AArch64InstructionSelector::selectVaStartAAPCS( | ||||
1955 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { | ||||
1956 | return false; | ||||
1957 | } | ||||
1958 | |||||
1959 | bool AArch64InstructionSelector::selectVaStartDarwin( | ||||
1960 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { | ||||
1961 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); | ||||
1962 | Register ListReg = I.getOperand(0).getReg(); | ||||
1963 | |||||
1964 | Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); | ||||
1965 | |||||
1966 | auto MIB = | ||||
1967 | BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri)) | ||||
1968 | .addDef(ArgsAddrReg) | ||||
1969 | .addFrameIndex(FuncInfo->getVarArgsStackIndex()) | ||||
1970 | .addImm(0) | ||||
1971 | .addImm(0); | ||||
1972 | |||||
1973 | constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); | ||||
1974 | |||||
1975 | MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui)) | ||||
1976 | .addUse(ArgsAddrReg) | ||||
1977 | .addUse(ListReg) | ||||
1978 | .addImm(0) | ||||
1979 | .addMemOperand(*I.memoperands_begin()); | ||||
1980 | |||||
1981 | constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); | ||||
1982 | I.eraseFromParent(); | ||||
1983 | return true; | ||||
1984 | } | ||||
1985 | |||||
1986 | void AArch64InstructionSelector::materializeLargeCMVal( | ||||
1987 | MachineInstr &I, const Value *V, unsigned OpFlags) { | ||||
1988 | MachineBasicBlock &MBB = *I.getParent(); | ||||
1989 | MachineFunction &MF = *MBB.getParent(); | ||||
1990 | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||||
1991 | |||||
1992 | auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {}); | ||||
1993 | MovZ->addOperand(MF, I.getOperand(1)); | ||||
1994 | MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | | ||||
1995 | AArch64II::MO_NC); | ||||
1996 | MovZ->addOperand(MF, MachineOperand::CreateImm(0)); | ||||
1997 | constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); | ||||
1998 | |||||
1999 | auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset, | ||||
2000 | Register ForceDstReg) { | ||||
2001 | Register DstReg = ForceDstReg | ||||
2002 | ? ForceDstReg | ||||
2003 | : MRI.createVirtualRegister(&AArch64::GPR64RegClass); | ||||
2004 | auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg); | ||||
2005 | if (auto *GV = dyn_cast<GlobalValue>(V)) { | ||||
2006 | MovI->addOperand(MF, MachineOperand::CreateGA( | ||||
2007 | GV, MovZ->getOperand(1).getOffset(), Flags)); | ||||
2008 | } else { | ||||
2009 | MovI->addOperand( | ||||
2010 | MF, MachineOperand::CreateBA(cast<BlockAddress>(V), | ||||
2011 | MovZ->getOperand(1).getOffset(), Flags)); | ||||
2012 | } | ||||
2013 | MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); | ||||
2014 | constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); | ||||
2015 | return DstReg; | ||||
2016 | }; | ||||
2017 | Register DstReg = BuildMovK(MovZ.getReg(0), | ||||
2018 | AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); | ||||
2019 | DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); | ||||
2020 | BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); | ||||
2021 | } | ||||
2022 | |||||
2023 | bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { | ||||
2024 | MachineBasicBlock &MBB = *I.getParent(); | ||||
2025 | MachineFunction &MF = *MBB.getParent(); | ||||
2026 | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||||
2027 | |||||
2028 | switch (I.getOpcode()) { | ||||
2029 | case TargetOpcode::G_STORE: { | ||||
2030 | bool Changed = contractCrossBankCopyIntoStore(I, MRI); | ||||
2031 | MachineOperand &SrcOp = I.getOperand(0); | ||||
2032 | if (MRI.getType(SrcOp.getReg()).isPointer()) { | ||||
2033 | // Allow matching with imported patterns for stores of pointers. Unlike | ||||
2034 | // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy | ||||
2035 | // and constrain. | ||||
2036 | auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp); | ||||
2037 | Register NewSrc = Copy.getReg(0); | ||||
2038 | SrcOp.setReg(NewSrc); | ||||
2039 | RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI); | ||||
2040 | Changed = true; | ||||
2041 | } | ||||
2042 | return Changed; | ||||
2043 | } | ||||
2044 | case TargetOpcode::G_PTR_ADD: | ||||
2045 | return convertPtrAddToAdd(I, MRI); | ||||
2046 | case TargetOpcode::G_LOAD: { | ||||
2047 | // For scalar loads of pointers, we try to convert the dest type from p0 | ||||
2048 | // to s64 so that our imported patterns can match. Like with the G_PTR_ADD | ||||
2049 | // conversion, this should be ok because all users should have been | ||||
2050 | // selected already, so the type doesn't matter for them. | ||||
2051 | Register DstReg = I.getOperand(0).getReg(); | ||||
2052 | const LLT DstTy = MRI.getType(DstReg); | ||||
2053 | if (!DstTy.isPointer()) | ||||
2054 | return false; | ||||
2055 | MRI.setType(DstReg, LLT::scalar(64)); | ||||
2056 | return true; | ||||
2057 | } | ||||
2058 | case AArch64::G_DUP: { | ||||
2059 | // Convert the type from p0 to s64 to help selection. | ||||
2060 | LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||
2061 | if (!DstTy.getElementType().isPointer()) | ||||
2062 | return false; | ||||
2063 | auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg()); | ||||
2064 | MRI.setType(I.getOperand(0).getReg(), | ||||
2065 | DstTy.changeElementType(LLT::scalar(64))); | ||||
2066 | MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass); | ||||
2067 | I.getOperand(1).setReg(NewSrc.getReg(0)); | ||||
2068 | return true; | ||||
2069 | } | ||||
2070 | case TargetOpcode::G_UITOFP: | ||||
2071 | case TargetOpcode::G_SITOFP: { | ||||
2072 | // If both source and destination regbanks are FPR, then convert the opcode | ||||
2073 | // to G_SITOF so that the importer can select it to an fpr variant. | ||||
2074 | // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank | ||||
2075 | // copy. | ||||
2076 | Register SrcReg = I.getOperand(1).getReg(); | ||||
2077 | LLT SrcTy = MRI.getType(SrcReg); | ||||
2078 | LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||
2079 | if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits()) | ||||
2080 | return false; | ||||
2081 | |||||
2082 | if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) { | ||||
2083 | if (I.getOpcode() == TargetOpcode::G_SITOFP) | ||||
2084 | I.setDesc(TII.get(AArch64::G_SITOF)); | ||||
2085 | else | ||||
2086 | I.setDesc(TII.get(AArch64::G_UITOF)); | ||||
2087 | return true; | ||||
2088 | } | ||||
2089 | return false; | ||||
2090 | } | ||||
2091 | default: | ||||
2092 | return false; | ||||
2093 | } | ||||
2094 | } | ||||
2095 | |||||
2096 | /// This lowering tries to look for G_PTR_ADD instructions and then converts | ||||
2097 | /// them to a standard G_ADD with a COPY on the source. | ||||
2098 | /// | ||||
2099 | /// The motivation behind this is to expose the add semantics to the imported | ||||
2100 | /// tablegen patterns. We shouldn't need to check for uses being loads/stores, | ||||
2101 | /// because the selector works bottom up, uses before defs. By the time we | ||||
2102 | /// end up trying to select a G_PTR_ADD, we should have already attempted to | ||||
2103 | /// fold this into addressing modes and were therefore unsuccessful. | ||||
2104 | bool AArch64InstructionSelector::convertPtrAddToAdd( | ||||
2105 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||
2106 | assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2106, __extension__ __PRETTY_FUNCTION__)); | ||||
2107 | Register DstReg = I.getOperand(0).getReg(); | ||||
2108 | Register AddOp1Reg = I.getOperand(1).getReg(); | ||||
2109 | const LLT PtrTy = MRI.getType(DstReg); | ||||
2110 | if (PtrTy.getAddressSpace() != 0) | ||||
2111 | return false; | ||||
2112 | |||||
2113 | const LLT CastPtrTy = | ||||
2114 | PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64); | ||||
2115 | auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg); | ||||
2116 | // Set regbanks on the registers. | ||||
2117 | if (PtrTy.isVector()) | ||||
2118 | MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID)); | ||||
2119 | else | ||||
2120 | MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); | ||||
2121 | |||||
2122 | // Now turn the %dst(p0) = G_PTR_ADD %base, off into: | ||||
2123 | // %dst(intty) = G_ADD %intbase, off | ||||
2124 | I.setDesc(TII.get(TargetOpcode::G_ADD)); | ||||
2125 | MRI.setType(DstReg, CastPtrTy); | ||||
2126 | I.getOperand(1).setReg(PtrToInt.getReg(0)); | ||||
2127 | if (!select(*PtrToInt)) { | ||||
2128 | LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd" ; } } while (false); | ||||
2129 | return false; | ||||
2130 | } | ||||
2131 | |||||
2132 | // Also take the opportunity here to try to do some optimization. | ||||
2133 | // Try to convert this into a G_SUB if the offset is a 0-x negate idiom. | ||||
2134 | Register NegatedReg; | ||||
2135 | if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg)))) | ||||
2136 | return true; | ||||
2137 | I.getOperand(2).setReg(NegatedReg); | ||||
2138 | I.setDesc(TII.get(TargetOpcode::G_SUB)); | ||||
2139 | return true; | ||||
2140 | } | ||||
2141 | |||||
2142 | bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I, | ||||
2143 | MachineRegisterInfo &MRI) { | ||||
2144 | // We try to match the immediate variant of LSL, which is actually an alias | ||||
2145 | // for a special case of UBFM. Otherwise, we fall back to the imported | ||||
2146 | // selector which will match the register variant. | ||||
2147 | assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL && "unexpected op") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2147, __extension__ __PRETTY_FUNCTION__)); | ||||
2148 | const auto &MO = I.getOperand(2); | ||||
2149 | auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI); | ||||
2150 | if (!VRegAndVal) | ||||
2151 | return false; | ||||
2152 | |||||
2153 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||
2154 | if (DstTy.isVector()) | ||||
2155 | return false; | ||||
2156 | bool Is64Bit = DstTy.getSizeInBits() == 64; | ||||
2157 | auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO); | ||||
2158 | auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO); | ||||
2159 | |||||
2160 | if (!Imm1Fn || !Imm2Fn) | ||||
2161 | return false; | ||||
2162 | |||||
2163 | auto NewI = | ||||
2164 | MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri, | ||||
2165 | {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()}); | ||||
2166 | |||||
2167 | for (auto &RenderFn : *Imm1Fn) | ||||
2168 | RenderFn(NewI); | ||||
2169 | for (auto &RenderFn : *Imm2Fn) | ||||
2170 | RenderFn(NewI); | ||||
2171 | |||||
2172 | I.eraseFromParent(); | ||||
2173 | return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); | ||||
2174 | } | ||||
2175 | |||||
2176 | bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( | ||||
2177 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||
2178 | assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2178, __extension__ __PRETTY_FUNCTION__)); | ||||
2179 | // If we're storing a scalar, it doesn't matter what register bank that | ||||
2180 | // scalar is on. All that matters is the size. | ||||
2181 | // | ||||
2182 | // So, if we see something like this (with a 32-bit scalar as an example): | ||||
2183 | // | ||||
2184 | // %x:gpr(s32) = ... something ... | ||||
2185 | // %y:fpr(s32) = COPY %x:gpr(s32) | ||||
2186 | // G_STORE %y:fpr(s32) | ||||
2187 | // | ||||
2188 | // We can fix this up into something like this: | ||||
2189 | // | ||||
2190 | // G_STORE %x:gpr(s32) | ||||
2191 | // | ||||
2192 | // And then continue the selection process normally. | ||||
2193 | Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI); | ||||
2194 | if (!DefDstReg.isValid()) | ||||
2195 | return false; | ||||
2196 | LLT DefDstTy = MRI.getType(DefDstReg); | ||||
2197 | Register StoreSrcReg = I.getOperand(0).getReg(); | ||||
2198 | LLT StoreSrcTy = MRI.getType(StoreSrcReg); | ||||
2199 | |||||
2200 | // If we get something strange like a physical register, then we shouldn't | ||||
2201 | // go any further. | ||||
2202 | if (!DefDstTy.isValid()) | ||||
2203 | return false; | ||||
2204 | |||||
2205 | // Are the source and dst types the same size? | ||||
2206 | if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) | ||||
2207 | return false; | ||||
2208 | |||||
2209 | if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == | ||||
2210 | RBI.getRegBank(DefDstReg, MRI, TRI)) | ||||
2211 | return false; | ||||
2212 | |||||
2213 | // We have a cross-bank copy, which is entering a store. Let's fold it. | ||||
2214 | I.getOperand(0).setReg(DefDstReg); | ||||
2215 | return true; | ||||
2216 | } | ||||
2217 | |||||
2218 | bool AArch64InstructionSelector::earlySelect(MachineInstr &I) { | ||||
2219 | assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!" ) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2219, __extension__ __PRETTY_FUNCTION__)); | ||||
2220 | assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() && "Instruction should be in a function!") ? void (0) : __assert_fail ("I.getParent()->getParent() && \"Instruction should be in a function!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2220, __extension__ __PRETTY_FUNCTION__)); | ||||
2221 | |||||
2222 | MachineBasicBlock &MBB = *I.getParent(); | ||||
2223 | MachineFunction &MF = *MBB.getParent(); | ||||
2224 | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||||
2225 | |||||
2226 | switch (I.getOpcode()) { | ||||
2227 | case AArch64::G_DUP: { | ||||
2228 | // Before selecting a DUP instruction, check if it is better selected as a | ||||
2229 | // MOV or load from a constant pool. | ||||
2230 | Register Src = I.getOperand(1).getReg(); | ||||
2231 | auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI); | ||||
2232 | if (!ValAndVReg) | ||||
2233 | return false; | ||||
2234 | LLVMContext &Ctx = MF.getFunction().getContext(); | ||||
2235 | Register Dst = I.getOperand(0).getReg(); | ||||
2236 | auto *CV = ConstantDataVector::getSplat( | ||||
2237 | MRI.getType(Dst).getNumElements(), | ||||
2238 | ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()), | ||||
2239 | ValAndVReg->Value)); | ||||
2240 | if (!emitConstantVector(Dst, CV, MIB, MRI)) | ||||
2241 | return false; | ||||
2242 | I.eraseFromParent(); | ||||
2243 | return true; | ||||
2244 | } | ||||
2245 | case TargetOpcode::G_SEXT: | ||||
2246 | // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV | ||||
2247 | // over a normal extend. | ||||
2248 | if (selectUSMovFromExtend(I, MRI)) | ||||
2249 | return true; | ||||
2250 | return false; | ||||
2251 | case TargetOpcode::G_BR: | ||||
2252 | return false; | ||||
2253 | case TargetOpcode::G_SHL: | ||||
2254 | return earlySelectSHL(I, MRI); | ||||
2255 | case TargetOpcode::G_CONSTANT: { | ||||
2256 | bool IsZero = false; | ||||
2257 | if (I.getOperand(1).isCImm()) | ||||
2258 | IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0; | ||||
2259 | else if (I.getOperand(1).isImm()) | ||||
2260 | IsZero = I.getOperand(1).getImm() == 0; | ||||
2261 | |||||
2262 | if (!IsZero) | ||||
2263 | return false; | ||||
2264 | |||||
2265 | Register DefReg = I.getOperand(0).getReg(); | ||||
2266 | LLT Ty = MRI.getType(DefReg); | ||||
2267 | if (Ty.getSizeInBits() == 64) { | ||||
2268 | I.getOperand(1).ChangeToRegister(AArch64::XZR, false); | ||||
2269 | RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); | ||||
2270 | } else if (Ty.getSizeInBits() == 32) { | ||||
2271 | I.getOperand(1).ChangeToRegister(AArch64::WZR, false); | ||||
2272 | RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI); | ||||
2273 | } else | ||||
2274 | return false; | ||||
2275 | |||||
2276 | I.setDesc(TII.get(TargetOpcode::COPY)); | ||||
2277 | return true; | ||||
2278 | } | ||||
2279 | |||||
2280 | case TargetOpcode::G_ADD: { | ||||
2281 | // Check if this is being fed by a G_ICMP on either side. | ||||
2282 | // | ||||
2283 | // (cmp pred, x, y) + z | ||||
2284 | // | ||||
2285 | // In the above case, when the cmp is true, we increment z by 1. So, we can | ||||
2286 | // fold the add into the cset for the cmp by using cinc. | ||||
2287 | // | ||||
2288 | // FIXME: This would probably be a lot nicer in PostLegalizerLowering. | ||||
2289 | Register AddDst = I.getOperand(0).getReg(); | ||||
2290 | Register AddLHS = I.getOperand(1).getReg(); | ||||
2291 | Register AddRHS = I.getOperand(2).getReg(); | ||||
2292 | // Only handle scalars. | ||||
2293 | LLT Ty = MRI.getType(AddLHS); | ||||
2294 | if (Ty.isVector()) | ||||
2295 | return false; | ||||
2296 | // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64 | ||||
2297 | // bits. | ||||
2298 | unsigned Size = Ty.getSizeInBits(); | ||||
2299 | if (Size != 32 && Size != 64) | ||||
2300 | return false; | ||||
2301 | auto MatchCmp = [&](Register Reg) -> MachineInstr * { | ||||
2302 | if (!MRI.hasOneNonDBGUse(Reg)) | ||||
2303 | return nullptr; | ||||
2304 | // If the LHS of the add is 32 bits, then we want to fold a 32-bit | ||||
2305 | // compare. | ||||
2306 | if (Size == 32) | ||||
2307 | return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI); | ||||
2308 | // We model scalar compares using 32-bit destinations right now. | ||||
2309 | // If it's a 64-bit compare, it'll have 64-bit sources. | ||||
2310 | Register ZExt; | ||||
2311 | if (!mi_match(Reg, MRI, | ||||
2312 | m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt)))))) | ||||
2313 | return nullptr; | ||||
2314 | auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI); | ||||
2315 | if (!Cmp || | ||||
2316 | MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64) | ||||
2317 | return nullptr; | ||||
2318 | return Cmp; | ||||
2319 | }; | ||||
2320 | // Try to match | ||||
2321 | // z + (cmp pred, x, y) | ||||
2322 | MachineInstr *Cmp = MatchCmp(AddRHS); | ||||
2323 | if (!Cmp) { | ||||
2324 | // (cmp pred, x, y) + z | ||||
2325 | std::swap(AddLHS, AddRHS); | ||||
2326 | Cmp = MatchCmp(AddRHS); | ||||
2327 | if (!Cmp) | ||||
2328 | return false; | ||||
2329 | } | ||||
2330 | auto &PredOp = Cmp->getOperand(1); | ||||
2331 | auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate()); | ||||
2332 | const AArch64CC::CondCode InvCC = | ||||
2333 | changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred)); | ||||
2334 | MIB.setInstrAndDebugLoc(I); | ||||
2335 | emitIntegerCompare(/*LHS=*/Cmp->getOperand(2), | ||||
2336 | /*RHS=*/Cmp->getOperand(3), PredOp, MIB); | ||||
2337 | emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB); | ||||
2338 | I.eraseFromParent(); | ||||
2339 | return true; | ||||
2340 | } | ||||
2341 | case TargetOpcode::G_OR: { | ||||
2342 | // Look for operations that take the lower `Width=Size-ShiftImm` bits of | ||||
2343 | // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via | ||||
2344 | // shifting and masking that we can replace with a BFI (encoded as a BFM). | ||||
2345 | Register Dst = I.getOperand(0).getReg(); | ||||
2346 | LLT Ty = MRI.getType(Dst); | ||||
2347 | |||||
2348 | if (!Ty.isScalar()) | ||||
2349 | return false; | ||||
2350 | |||||
2351 | unsigned Size = Ty.getSizeInBits(); | ||||
2352 | if (Size != 32 && Size != 64) | ||||
2353 | return false; | ||||
2354 | |||||
2355 | Register ShiftSrc; | ||||
2356 | int64_t ShiftImm; | ||||
2357 | Register MaskSrc; | ||||
2358 | int64_t MaskImm; | ||||
2359 | if (!mi_match( | ||||
2360 | Dst, MRI, | ||||
2361 | m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))), | ||||
2362 | m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm)))))) | ||||
2363 | return false; | ||||
2364 | |||||
2365 | if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm)) | ||||
2366 | return false; | ||||
2367 | |||||
2368 | int64_t Immr = Size - ShiftImm; | ||||
2369 | int64_t Imms = Size - ShiftImm - 1; | ||||
2370 | unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri; | ||||
2371 | emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB); | ||||
2372 | I.eraseFromParent(); | ||||
2373 | return true; | ||||
2374 | } | ||||
2375 | default: | ||||
2376 | return false; | ||||
2377 | } | ||||
2378 | } | ||||
2379 | |||||
2380 | bool AArch64InstructionSelector::select(MachineInstr &I) { | ||||
2381 | assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!" ) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2381, __extension__ __PRETTY_FUNCTION__)); | ||||
2382 | assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() && "Instruction should be in a function!") ? void (0) : __assert_fail ("I.getParent()->getParent() && \"Instruction should be in a function!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2382, __extension__ __PRETTY_FUNCTION__)); | ||||
2383 | |||||
2384 | MachineBasicBlock &MBB = *I.getParent(); | ||||
2385 | MachineFunction &MF = *MBB.getParent(); | ||||
2386 | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||||
2387 | |||||
2388 | const AArch64Subtarget *Subtarget = | ||||
2389 | &static_cast<const AArch64Subtarget &>(MF.getSubtarget()); | ||||
2390 | if (Subtarget->requiresStrictAlign()) { | ||||
2391 | // We don't support this feature yet. | ||||
2392 | LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n" ; } } while (false); | ||||
2393 | return false; | ||||
2394 | } | ||||
2395 | |||||
2396 | MIB.setInstrAndDebugLoc(I); | ||||
2397 | |||||
2398 | unsigned Opcode = I.getOpcode(); | ||||
2399 | // G_PHI requires same handling as PHI | ||||
2400 | if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { | ||||
2401 | // Certain non-generic instructions also need some special handling. | ||||
2402 | |||||
2403 | if (Opcode == TargetOpcode::LOAD_STACK_GUARD) | ||||
2404 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
2405 | |||||
2406 | if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { | ||||
2407 | const Register DefReg = I.getOperand(0).getReg(); | ||||
2408 | const LLT DefTy = MRI.getType(DefReg); | ||||
2409 | |||||
2410 | const RegClassOrRegBank &RegClassOrBank = | ||||
2411 | MRI.getRegClassOrRegBank(DefReg); | ||||
2412 | |||||
2413 | const TargetRegisterClass *DefRC | ||||
2414 | = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); | ||||
2415 | if (!DefRC) { | ||||
2416 | if (!DefTy.isValid()) { | ||||
2417 | LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n" ; } } while (false); | ||||
2418 | return false; | ||||
2419 | } | ||||
2420 | const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); | ||||
2421 | DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI); | ||||
2422 | if (!DefRC) { | ||||
2423 | LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n" ; } } while (false); | ||||
2424 | return false; | ||||
2425 | } | ||||
2426 | } | ||||
2427 | |||||
2428 | I.setDesc(TII.get(TargetOpcode::PHI)); | ||||
2429 | |||||
2430 | return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); | ||||
2431 | } | ||||
2432 | |||||
2433 | if (I.isCopy()) | ||||
2434 | return selectCopy(I, TII, MRI, TRI, RBI); | ||||
2435 | |||||
2436 | return true; | ||||
2437 | } | ||||
2438 | |||||
2439 | |||||
2440 | if (I.getNumOperands() != I.getNumExplicitOperands()) { | ||||
2441 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n" ; } } while (false) | ||||
2442 | dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n" ; } } while (false); | ||||
2443 | return false; | ||||
2444 | } | ||||
2445 | |||||
2446 | // Try to do some lowering before we start instruction selecting. These | ||||
2447 | // lowerings are purely transformations on the input G_MIR and so selection | ||||
2448 | // must continue after any modification of the instruction. | ||||
2449 | if (preISelLower(I)) { | ||||
2450 | Opcode = I.getOpcode(); // The opcode may have been modified, refresh it. | ||||
2451 | } | ||||
2452 | |||||
2453 | // There may be patterns where the importer can't deal with them optimally, | ||||
2454 | // but does select it to a suboptimal sequence so our custom C++ selection | ||||
2455 | // code later never has a chance to work on it. Therefore, we have an early | ||||
2456 | // selection attempt here to give priority to certain selection routines | ||||
2457 | // over the imported ones. | ||||
2458 | if (earlySelect(I)) | ||||
2459 | return true; | ||||
2460 | |||||
2461 | if (selectImpl(I, *CoverageInfo)) | ||||
2462 | return true; | ||||
2463 | |||||
2464 | LLT Ty = | ||||
2465 | I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{}; | ||||
2466 | |||||
2467 | switch (Opcode) { | ||||
2468 | case TargetOpcode::G_SBFX: | ||||
2469 | case TargetOpcode::G_UBFX: { | ||||
2470 | static const unsigned OpcTable[2][2] = { | ||||
2471 | {AArch64::UBFMWri, AArch64::UBFMXri}, | ||||
2472 | {AArch64::SBFMWri, AArch64::SBFMXri}}; | ||||
2473 | bool IsSigned = Opcode == TargetOpcode::G_SBFX; | ||||
2474 | unsigned Size = Ty.getSizeInBits(); | ||||
2475 | unsigned Opc = OpcTable[IsSigned][Size == 64]; | ||||
2476 | auto Cst1 = | ||||
2477 | getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI); | ||||
2478 | assert(Cst1 && "Should have gotten a constant for src 1?")(static_cast <bool> (Cst1 && "Should have gotten a constant for src 1?" ) ? void (0) : __assert_fail ("Cst1 && \"Should have gotten a constant for src 1?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2478, __extension__ __PRETTY_FUNCTION__)); | ||||
2479 | auto Cst2 = | ||||
2480 | getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI); | ||||
2481 | assert(Cst2 && "Should have gotten a constant for src 2?")(static_cast <bool> (Cst2 && "Should have gotten a constant for src 2?" ) ? void (0) : __assert_fail ("Cst2 && \"Should have gotten a constant for src 2?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2481, __extension__ __PRETTY_FUNCTION__)); | ||||
2482 | auto LSB = Cst1->Value.getZExtValue(); | ||||
2483 | auto Width = Cst2->Value.getZExtValue(); | ||||
2484 | auto BitfieldInst = | ||||
2485 | MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)}) | ||||
2486 | .addImm(LSB) | ||||
2487 | .addImm(LSB + Width - 1); | ||||
2488 | I.eraseFromParent(); | ||||
2489 | return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI); | ||||
2490 | } | ||||
2491 | case TargetOpcode::G_BRCOND: | ||||
2492 | return selectCompareBranch(I, MF, MRI); | ||||
2493 | |||||
2494 | case TargetOpcode::G_BRINDIRECT: { | ||||
2495 | I.setDesc(TII.get(AArch64::BR)); | ||||
2496 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
2497 | } | ||||
2498 | |||||
2499 | case TargetOpcode::G_BRJT: | ||||
2500 | return selectBrJT(I, MRI); | ||||
2501 | |||||
2502 | case AArch64::G_ADD_LOW: { | ||||
2503 | // This op may have been separated from it's ADRP companion by the localizer | ||||
2504 | // or some other code motion pass. Given that many CPUs will try to | ||||
2505 | // macro fuse these operations anyway, select this into a MOVaddr pseudo | ||||
2506 | // which will later be expanded into an ADRP+ADD pair after scheduling. | ||||
2507 | MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg()); | ||||
2508 | if (BaseMI->getOpcode() != AArch64::ADRP) { | ||||
2509 | I.setDesc(TII.get(AArch64::ADDXri)); | ||||
2510 | I.addOperand(MachineOperand::CreateImm(0)); | ||||
2511 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
2512 | } | ||||
2513 | assert(TM.getCodeModel() == CodeModel::Small &&(static_cast <bool> (TM.getCodeModel() == CodeModel::Small && "Expected small code model") ? void (0) : __assert_fail ("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2514, __extension__ __PRETTY_FUNCTION__)) | ||||
2514 | "Expected small code model")(static_cast <bool> (TM.getCodeModel() == CodeModel::Small && "Expected small code model") ? void (0) : __assert_fail ("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2514, __extension__ __PRETTY_FUNCTION__)); | ||||
2515 | auto Op1 = BaseMI->getOperand(1); | ||||
2516 | auto Op2 = I.getOperand(2); | ||||
2517 | auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {}) | ||||
2518 | .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(), | ||||
2519 | Op1.getTargetFlags()) | ||||
2520 | .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(), | ||||
2521 | Op2.getTargetFlags()); | ||||
2522 | I.eraseFromParent(); | ||||
2523 | return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI); | ||||
2524 | } | ||||
2525 | |||||
2526 | case TargetOpcode::G_BSWAP: { | ||||
2527 | // Handle vector types for G_BSWAP directly. | ||||
2528 | Register DstReg = I.getOperand(0).getReg(); | ||||
2529 | LLT DstTy = MRI.getType(DstReg); | ||||
2530 | |||||
2531 | // We should only get vector types here; everything else is handled by the | ||||
2532 | // importer right now. | ||||
2533 | if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) { | ||||
2534 | LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n" ; } } while (false); | ||||
2535 | return false; | ||||
2536 | } | ||||
2537 | |||||
2538 | // Only handle 4 and 2 element vectors for now. | ||||
2539 | // TODO: 16-bit elements. | ||||
2540 | unsigned NumElts = DstTy.getNumElements(); | ||||
2541 | if (NumElts != 4 && NumElts != 2) { | ||||
2542 | LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n" ; } } while (false); | ||||
2543 | return false; | ||||
2544 | } | ||||
2545 | |||||
2546 | // Choose the correct opcode for the supported types. Right now, that's | ||||
2547 | // v2s32, v4s32, and v2s64. | ||||
2548 | unsigned Opc = 0; | ||||
2549 | unsigned EltSize = DstTy.getElementType().getSizeInBits(); | ||||
2550 | if (EltSize == 32) | ||||
2551 | Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8 | ||||
2552 | : AArch64::REV32v16i8; | ||||
2553 | else if (EltSize == 64) | ||||
2554 | Opc = AArch64::REV64v16i8; | ||||
2555 | |||||
2556 | // We should always get something by the time we get here... | ||||
2557 | assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")(static_cast <bool> (Opc != 0 && "Didn't get an opcode for G_BSWAP?" ) ? void (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2557, __extension__ __PRETTY_FUNCTION__)); | ||||
2558 | |||||
2559 | I.setDesc(TII.get(Opc)); | ||||
2560 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
2561 | } | ||||
2562 | |||||
2563 | case TargetOpcode::G_FCONSTANT: | ||||
2564 | case TargetOpcode::G_CONSTANT: { | ||||
2565 | const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; | ||||
2566 | |||||
2567 | const LLT s8 = LLT::scalar(8); | ||||
2568 | const LLT s16 = LLT::scalar(16); | ||||
2569 | const LLT s32 = LLT::scalar(32); | ||||
2570 | const LLT s64 = LLT::scalar(64); | ||||
2571 | const LLT s128 = LLT::scalar(128); | ||||
2572 | const LLT p0 = LLT::pointer(0, 64); | ||||
2573 | |||||
2574 | const Register DefReg = I.getOperand(0).getReg(); | ||||
2575 | const LLT DefTy = MRI.getType(DefReg); | ||||
2576 | const unsigned DefSize = DefTy.getSizeInBits(); | ||||
2577 | const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); | ||||
2578 | |||||
2579 | // FIXME: Redundant check, but even less readable when factored out. | ||||
2580 | if (isFP) { | ||||
2581 | if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) { | ||||
2582 | LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant, expected: " << s16 << " or " << s32 << " or " << s64 << " or " << s128 << '\n'; } } while (false) | ||||
2583 | << " constant, expected: " << s16 << " or " << s32do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant, expected: " << s16 << " or " << s32 << " or " << s64 << " or " << s128 << '\n'; } } while (false) | ||||
2584 | << " or " << s64 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant, expected: " << s16 << " or " << s32 << " or " << s64 << " or " << s128 << '\n'; } } while (false); | ||||
2585 | return false; | ||||
2586 | } | ||||
2587 | |||||
2588 | if (RB.getID() != AArch64::FPRRegBankID) { | ||||
2589 | LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant on bank: " << RB << ", expected: FPR\n"; } } while (false) | ||||
2590 | << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant on bank: " << RB << ", expected: FPR\n"; } } while (false) | ||||
2591 | << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant on bank: " << RB << ", expected: FPR\n"; } } while (false); | ||||
2592 | return false; | ||||
2593 | } | ||||
2594 | |||||
2595 | // The case when we have 0.0 is covered by tablegen. Reject it here so we | ||||
2596 | // can be sure tablegen works correctly and isn't rescued by this code. | ||||
2597 | // 0.0 is not covered by tablegen for FP128. So we will handle this | ||||
2598 | // scenario in the code here. | ||||
2599 | if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0)) | ||||
2600 | return false; | ||||
2601 | } else { | ||||
2602 | // s32 and s64 are covered by tablegen. | ||||
2603 | if (Ty != p0 && Ty != s8 && Ty != s16) { | ||||
2604 | LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant, expected: " << s32 << ", " << s64 << ", or " << p0 << '\n' ; } } while (false) | ||||
2605 | << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant, expected: " << s32 << ", " << s64 << ", or " << p0 << '\n' ; } } while (false) | ||||
2606 | << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant, expected: " << s32 << ", " << s64 << ", or " << p0 << '\n' ; } } while (false); | ||||
2607 | return false; | ||||
2608 | } | ||||
2609 | |||||
2610 | if (RB.getID() != AArch64::GPRRegBankID) { | ||||
2611 | LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant on bank: " << RB << ", expected: GPR\n"; } } while (false) | ||||
2612 | << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant on bank: " << RB << ", expected: GPR\n"; } } while (false) | ||||
2613 | << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant on bank: " << RB << ", expected: GPR\n"; } } while (false); | ||||
2614 | return false; | ||||
2615 | } | ||||
2616 | } | ||||
2617 | |||||
2618 | if (isFP) { | ||||
2619 | const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize); | ||||
2620 | // For 16, 64, and 128b values, emit a constant pool load. | ||||
2621 | switch (DefSize) { | ||||
2622 | default: | ||||
2623 | llvm_unreachable("Unexpected destination size for G_FCONSTANT?")::llvm::llvm_unreachable_internal("Unexpected destination size for G_FCONSTANT?" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2623); | ||||
2624 | case 32: | ||||
2625 | // For s32, use a cp load if we have optsize/minsize. | ||||
2626 | if (!shouldOptForSize(&MF)) | ||||
2627 | break; | ||||
2628 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
2629 | case 16: | ||||
2630 | case 64: | ||||
2631 | case 128: { | ||||
2632 | auto *FPImm = I.getOperand(1).getFPImm(); | ||||
2633 | auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB); | ||||
2634 | if (!LoadMI) { | ||||
2635 | LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n" ; } } while (false); | ||||
2636 | return false; | ||||
2637 | } | ||||
2638 | MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()}); | ||||
2639 | I.eraseFromParent(); | ||||
2640 | return RBI.constrainGenericRegister(DefReg, FPRRC, MRI); | ||||
2641 | } | ||||
2642 | } | ||||
2643 | |||||
2644 | // Either emit a FMOV, or emit a copy to emit a normal mov. | ||||
2645 | assert(DefSize == 32 &&(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!" ) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2646, __extension__ __PRETTY_FUNCTION__)) | ||||
2646 | "Expected constant pool loads for all sizes other than 32!")(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!" ) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2646, __extension__ __PRETTY_FUNCTION__)); | ||||
2647 | const Register DefGPRReg = | ||||
2648 | MRI.createVirtualRegister(&AArch64::GPR32RegClass); | ||||
2649 | MachineOperand &RegOp = I.getOperand(0); | ||||
2650 | RegOp.setReg(DefGPRReg); | ||||
2651 | MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); | ||||
2652 | MIB.buildCopy({DefReg}, {DefGPRReg}); | ||||
2653 | |||||
2654 | if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) { | ||||
2655 | LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n" ; } } while (false); | ||||
2656 | return false; | ||||
2657 | } | ||||
2658 | |||||
2659 | MachineOperand &ImmOp = I.getOperand(1); | ||||
2660 | // FIXME: Is going through int64_t always correct? | ||||
2661 | ImmOp.ChangeToImmediate( | ||||
2662 | ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); | ||||
2663 | } else if (I.getOperand(1).isCImm()) { | ||||
2664 | uint64_t Val = I.getOperand(1).getCImm()->getZExtValue(); | ||||
2665 | I.getOperand(1).ChangeToImmediate(Val); | ||||
2666 | } else if (I.getOperand(1).isImm()) { | ||||
2667 | uint64_t Val = I.getOperand(1).getImm(); | ||||
2668 | I.getOperand(1).ChangeToImmediate(Val); | ||||
2669 | } | ||||
2670 | |||||
2671 | const unsigned MovOpc = | ||||
2672 | DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm; | ||||
2673 | I.setDesc(TII.get(MovOpc)); | ||||
2674 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
2675 | return true; | ||||
2676 | } | ||||
2677 | case TargetOpcode::G_EXTRACT: { | ||||
2678 | Register DstReg = I.getOperand(0).getReg(); | ||||
2679 | Register SrcReg = I.getOperand(1).getReg(); | ||||
2680 | LLT SrcTy = MRI.getType(SrcReg); | ||||
2681 | LLT DstTy = MRI.getType(DstReg); | ||||
2682 | (void)DstTy; | ||||
2683 | unsigned SrcSize = SrcTy.getSizeInBits(); | ||||
2684 | |||||
2685 | if (SrcTy.getSizeInBits() > 64) { | ||||
2686 | // This should be an extract of an s128, which is like a vector extract. | ||||
2687 | if (SrcTy.getSizeInBits() != 128) | ||||
2688 | return false; | ||||
2689 | // Only support extracting 64 bits from an s128 at the moment. | ||||
2690 | if (DstTy.getSizeInBits() != 64) | ||||
2691 | return false; | ||||
2692 | |||||
2693 | unsigned Offset = I.getOperand(2).getImm(); | ||||
2694 | if (Offset % 64 != 0) | ||||
2695 | return false; | ||||
2696 | |||||
2697 | // Check we have the right regbank always. | ||||
2698 | const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); | ||||
2699 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); | ||||
2700 | assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!")(static_cast <bool> (SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!") ? void (0) : __assert_fail ("SrcRB.getID() == DstRB.getID() && \"Wrong extract regbank!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2700, __extension__ __PRETTY_FUNCTION__)); | ||||
2701 | |||||
2702 | if (SrcRB.getID() == AArch64::GPRRegBankID) { | ||||
2703 | MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) | ||||
2704 | .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64); | ||||
2705 | I.eraseFromParent(); | ||||
2706 | return true; | ||||
2707 | } | ||||
2708 | |||||
2709 | // Emit the same code as a vector extract. | ||||
2710 | // Offset must be a multiple of 64. | ||||
2711 | unsigned LaneIdx = Offset / 64; | ||||
2712 | MachineInstr *Extract = emitExtractVectorElt( | ||||
2713 | DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB); | ||||
2714 | if (!Extract) | ||||
2715 | return false; | ||||
2716 | I.eraseFromParent(); | ||||
2717 | return true; | ||||
2718 | } | ||||
2719 | |||||
2720 | I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); | ||||
2721 | MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() + | ||||
2722 | Ty.getSizeInBits() - 1); | ||||
2723 | |||||
2724 | if (SrcSize < 64) { | ||||
2725 | assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits () == 16 && "unexpected G_EXTRACT types") ? void (0) : __assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2726, __extension__ __PRETTY_FUNCTION__)) | ||||
2726 | "unexpected G_EXTRACT types")(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits () == 16 && "unexpected G_EXTRACT types") ? void (0) : __assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2726, __extension__ __PRETTY_FUNCTION__)); | ||||
2727 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
2728 | } | ||||
2729 | |||||
2730 | DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); | ||||
2731 | MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); | ||||
2732 | MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) | ||||
2733 | .addReg(DstReg, 0, AArch64::sub_32); | ||||
2734 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), | ||||
2735 | AArch64::GPR32RegClass, MRI); | ||||
2736 | I.getOperand(0).setReg(DstReg); | ||||
2737 | |||||
2738 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
2739 | } | ||||
2740 | |||||
2741 | case TargetOpcode::G_INSERT: { | ||||
2742 | LLT SrcTy = MRI.getType(I.getOperand(2).getReg()); | ||||
2743 | LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||
2744 | unsigned DstSize = DstTy.getSizeInBits(); | ||||
2745 | // Larger inserts are vectors, same-size ones should be something else by | ||||
2746 | // now (split up or turned into COPYs). | ||||
2747 | if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) | ||||
2748 | return false; | ||||
2749 | |||||
2750 | I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); | ||||
2751 | unsigned LSB = I.getOperand(3).getImm(); | ||||
2752 | unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); | ||||
2753 | I.getOperand(3).setImm((DstSize - LSB) % DstSize); | ||||
2754 | MachineInstrBuilder(MF, I).addImm(Width - 1); | ||||
2755 | |||||
2756 | if (DstSize < 64) { | ||||
2757 | assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits () == 16 && "unexpected G_INSERT types") ? void (0) : __assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2758, __extension__ __PRETTY_FUNCTION__)) | ||||
2758 | "unexpected G_INSERT types")(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits () == 16 && "unexpected G_INSERT types") ? void (0) : __assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2758, __extension__ __PRETTY_FUNCTION__)); | ||||
2759 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
2760 | } | ||||
2761 | |||||
2762 | Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); | ||||
2763 | BuildMI(MBB, I.getIterator(), I.getDebugLoc(), | ||||
2764 | TII.get(AArch64::SUBREG_TO_REG)) | ||||
2765 | .addDef(SrcReg) | ||||
2766 | .addImm(0) | ||||
2767 | .addUse(I.getOperand(2).getReg()) | ||||
2768 | .addImm(AArch64::sub_32); | ||||
2769 | RBI.constrainGenericRegister(I.getOperand(2).getReg(), | ||||
2770 | AArch64::GPR32RegClass, MRI); | ||||
2771 | I.getOperand(2).setReg(SrcReg); | ||||
2772 | |||||
2773 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
2774 | } | ||||
2775 | case TargetOpcode::G_FRAME_INDEX: { | ||||
2776 | // allocas and G_FRAME_INDEX are only supported in addrspace(0). | ||||
2777 | if (Ty != LLT::pointer(0, 64)) { | ||||
2778 | LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: " << Ty << ", expected: " << LLT::pointer(0, 64) << '\n'; } } while (false) | ||||
2779 | << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: " << Ty << ", expected: " << LLT::pointer(0, 64) << '\n'; } } while (false); | ||||
2780 | return false; | ||||
2781 | } | ||||
2782 | I.setDesc(TII.get(AArch64::ADDXri)); | ||||
2783 | |||||
2784 | // MOs for a #0 shifted immediate. | ||||
2785 | I.addOperand(MachineOperand::CreateImm(0)); | ||||
2786 | I.addOperand(MachineOperand::CreateImm(0)); | ||||
2787 | |||||
2788 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
2789 | } | ||||
2790 | |||||
2791 | case TargetOpcode::G_GLOBAL_VALUE: { | ||||
2792 | auto GV = I.getOperand(1).getGlobal(); | ||||
2793 | if (GV->isThreadLocal()) | ||||
2794 | return selectTLSGlobalValue(I, MRI); | ||||
2795 | |||||
2796 | unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); | ||||
2797 | if (OpFlags & AArch64II::MO_GOT) { | ||||
2798 | I.setDesc(TII.get(AArch64::LOADgot)); | ||||
2799 | I.getOperand(1).setTargetFlags(OpFlags); | ||||
2800 | } else if (TM.getCodeModel() == CodeModel::Large) { | ||||
2801 | // Materialize the global using movz/movk instructions. | ||||
2802 | materializeLargeCMVal(I, GV, OpFlags); | ||||
2803 | I.eraseFromParent(); | ||||
2804 | return true; | ||||
2805 | } else if (TM.getCodeModel() == CodeModel::Tiny) { | ||||
2806 | I.setDesc(TII.get(AArch64::ADR)); | ||||
2807 | I.getOperand(1).setTargetFlags(OpFlags); | ||||
2808 | } else { | ||||
2809 | I.setDesc(TII.get(AArch64::MOVaddr)); | ||||
2810 | I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); | ||||
2811 | MachineInstrBuilder MIB(MF, I); | ||||
2812 | MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(), | ||||
2813 | OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); | ||||
2814 | } | ||||
2815 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
2816 | } | ||||
2817 | |||||
2818 | case TargetOpcode::G_ZEXTLOAD: | ||||
2819 | case TargetOpcode::G_LOAD: | ||||
2820 | case TargetOpcode::G_STORE: { | ||||
2821 | GLoadStore &LdSt = cast<GLoadStore>(I); | ||||
2822 | bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; | ||||
2823 | LLT PtrTy = MRI.getType(LdSt.getPointerReg()); | ||||
2824 | |||||
2825 | if (PtrTy != LLT::pointer(0, 64)) { | ||||
2826 | LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Load/Store pointer has type: " << PtrTy << ", expected: " << LLT::pointer (0, 64) << '\n'; } } while (false) | ||||
2827 | << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Load/Store pointer has type: " << PtrTy << ", expected: " << LLT::pointer (0, 64) << '\n'; } } while (false); | ||||
2828 | return false; | ||||
2829 | } | ||||
2830 | |||||
2831 | uint64_t MemSizeInBytes = LdSt.getMemSize(); | ||||
2832 | unsigned MemSizeInBits = LdSt.getMemSizeInBits(); | ||||
2833 | AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); | ||||
2834 | |||||
2835 | // Need special instructions for atomics that affect ordering. | ||||
2836 | if (Order != AtomicOrdering::NotAtomic && | ||||
2837 | Order != AtomicOrdering::Unordered && | ||||
2838 | Order != AtomicOrdering::Monotonic) { | ||||
2839 | assert(!isa<GZExtLoad>(LdSt))(static_cast <bool> (!isa<GZExtLoad>(LdSt)) ? void (0) : __assert_fail ("!isa<GZExtLoad>(LdSt)", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2839, __extension__ __PRETTY_FUNCTION__)); | ||||
2840 | if (MemSizeInBytes > 64) | ||||
2841 | return false; | ||||
2842 | |||||
2843 | if (isa<GLoad>(LdSt)) { | ||||
2844 | static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH, | ||||
2845 | AArch64::LDARW, AArch64::LDARX}; | ||||
2846 | I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); | ||||
2847 | } else { | ||||
2848 | static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH, | ||||
2849 | AArch64::STLRW, AArch64::STLRX}; | ||||
2850 | Register ValReg = LdSt.getReg(0); | ||||
2851 | if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) { | ||||
2852 | // Emit a subreg copy of 32 bits. | ||||
2853 | Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass); | ||||
2854 | MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {}) | ||||
2855 | .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32); | ||||
2856 | I.getOperand(0).setReg(NewVal); | ||||
2857 | } | ||||
2858 | I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); | ||||
2859 | } | ||||
2860 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
2861 | return true; | ||||
2862 | } | ||||
2863 | |||||
2864 | #ifndef NDEBUG | ||||
2865 | const Register PtrReg = LdSt.getPointerReg(); | ||||
2866 | const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); | ||||
2867 | // Check that the pointer register is valid. | ||||
2868 | assert(PtrRB.getID() == AArch64::GPRRegBankID &&(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR") ? void ( 0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2869, __extension__ __PRETTY_FUNCTION__)) | ||||
2869 | "Load/Store pointer operand isn't a GPR")(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR") ? void ( 0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2869, __extension__ __PRETTY_FUNCTION__)); | ||||
2870 | assert(MRI.getType(PtrReg).isPointer() &&(static_cast <bool> (MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail ("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2871, __extension__ __PRETTY_FUNCTION__)) | ||||
2871 | "Load/Store pointer operand isn't a pointer")(static_cast <bool> (MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail ("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2871, __extension__ __PRETTY_FUNCTION__)); | ||||
2872 | #endif | ||||
2873 | |||||
2874 | const Register ValReg = LdSt.getReg(0); | ||||
2875 | const LLT ValTy = MRI.getType(ValReg); | ||||
2876 | const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); | ||||
2877 | |||||
2878 | // The code below doesn't support truncating stores, so we need to split it | ||||
2879 | // again. | ||||
2880 | if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { | ||||
2881 | unsigned SubReg; | ||||
2882 | LLT MemTy = LdSt.getMMO().getMemoryType(); | ||||
2883 | auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI); | ||||
2884 | if (!getSubRegForClass(RC, TRI, SubReg)) | ||||
2885 | return false; | ||||
2886 | |||||
2887 | // Generate a subreg copy. | ||||
2888 | auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {}) | ||||
2889 | .addReg(ValReg, 0, SubReg) | ||||
2890 | .getReg(0); | ||||
2891 | RBI.constrainGenericRegister(Copy, *RC, MRI); | ||||
2892 | LdSt.getOperand(0).setReg(Copy); | ||||
2893 | } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { | ||||
2894 | // If this is an any-extending load from the FPR bank, split it into a regular | ||||
2895 | // load + extend. | ||||
2896 | if (RB.getID() == AArch64::FPRRegBankID) { | ||||
2897 | unsigned SubReg; | ||||
2898 | LLT MemTy = LdSt.getMMO().getMemoryType(); | ||||
2899 | auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI); | ||||
2900 | if (!getSubRegForClass(RC, TRI, SubReg)) | ||||
2901 | return false; | ||||
2902 | Register OldDst = LdSt.getReg(0); | ||||
2903 | Register NewDst = | ||||
2904 | MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType()); | ||||
2905 | LdSt.getOperand(0).setReg(NewDst); | ||||
2906 | MRI.setRegBank(NewDst, RB); | ||||
2907 | // Generate a SUBREG_TO_REG to extend it. | ||||
2908 | MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator())); | ||||
2909 | MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {}) | ||||
2910 | .addImm(0) | ||||
2911 | .addUse(NewDst) | ||||
2912 | .addImm(SubReg); | ||||
2913 | auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI); | ||||
2914 | RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI); | ||||
2915 | MIB.setInstr(LdSt); | ||||
2916 | } | ||||
2917 | } | ||||
2918 | |||||
2919 | // Helper lambda for partially selecting I. Either returns the original | ||||
2920 | // instruction with an updated opcode, or a new instruction. | ||||
2921 | auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { | ||||
2922 | bool IsStore = isa<GStore>(I); | ||||
2923 | const unsigned NewOpc = | ||||
2924 | selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); | ||||
2925 | if (NewOpc == I.getOpcode()) | ||||
2926 | return nullptr; | ||||
2927 | // Check if we can fold anything into the addressing mode. | ||||
2928 | auto AddrModeFns = | ||||
2929 | selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); | ||||
2930 | if (!AddrModeFns) { | ||||
2931 | // Can't fold anything. Use the original instruction. | ||||
2932 | I.setDesc(TII.get(NewOpc)); | ||||
2933 | I.addOperand(MachineOperand::CreateImm(0)); | ||||
2934 | return &I; | ||||
2935 | } | ||||
2936 | |||||
2937 | // Folded something. Create a new instruction and return it. | ||||
2938 | auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags()); | ||||
2939 | Register CurValReg = I.getOperand(0).getReg(); | ||||
2940 | IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg); | ||||
2941 | NewInst.cloneMemRefs(I); | ||||
2942 | for (auto &Fn : *AddrModeFns) | ||||
2943 | Fn(NewInst); | ||||
2944 | I.eraseFromParent(); | ||||
2945 | return &*NewInst; | ||||
2946 | }; | ||||
2947 | |||||
2948 | MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); | ||||
2949 | if (!LoadStore) | ||||
2950 | return false; | ||||
2951 | |||||
2952 | // If we're storing a 0, use WZR/XZR. | ||||
2953 | if (Opcode == TargetOpcode::G_STORE) { | ||||
2954 | auto CVal = getIConstantVRegValWithLookThrough( | ||||
2955 | LoadStore->getOperand(0).getReg(), MRI); | ||||
2956 | if (CVal && CVal->Value == 0) { | ||||
2957 | switch (LoadStore->getOpcode()) { | ||||
2958 | case AArch64::STRWui: | ||||
2959 | case AArch64::STRHHui: | ||||
2960 | case AArch64::STRBBui: | ||||
2961 | LoadStore->getOperand(0).setReg(AArch64::WZR); | ||||
2962 | break; | ||||
2963 | case AArch64::STRXui: | ||||
2964 | LoadStore->getOperand(0).setReg(AArch64::XZR); | ||||
2965 | break; | ||||
2966 | } | ||||
2967 | } | ||||
2968 | } | ||||
2969 | |||||
2970 | if (IsZExtLoad) { | ||||
2971 | // The zextload from a smaller type to i32 should be handled by the | ||||
2972 | // importer. | ||||
2973 | if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64) | ||||
2974 | return false; | ||||
2975 | // If we have a ZEXTLOAD then change the load's type to be a narrower reg | ||||
2976 | // and zero_extend with SUBREG_TO_REG. | ||||
2977 | Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); | ||||
2978 | Register DstReg = LoadStore->getOperand(0).getReg(); | ||||
2979 | LoadStore->getOperand(0).setReg(LdReg); | ||||
2980 | |||||
2981 | MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator())); | ||||
2982 | MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {}) | ||||
2983 | .addImm(0) | ||||
2984 | .addUse(LdReg) | ||||
2985 | .addImm(AArch64::sub_32); | ||||
2986 | constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); | ||||
2987 | return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass, | ||||
2988 | MRI); | ||||
2989 | } | ||||
2990 | return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); | ||||
2991 | } | ||||
2992 | |||||
2993 | case TargetOpcode::G_SMULH: | ||||
2994 | case TargetOpcode::G_UMULH: { | ||||
2995 | // Reject the various things we don't support yet. | ||||
2996 | if (unsupportedBinOp(I, RBI, MRI, TRI)) | ||||
2997 | return false; | ||||
2998 | |||||
2999 | const Register DefReg = I.getOperand(0).getReg(); | ||||
3000 | const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); | ||||
3001 | |||||
3002 | if (RB.getID() != AArch64::GPRRegBankID) { | ||||
3003 | LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n"; } } while (false); | ||||
3004 | return false; | ||||
3005 | } | ||||
3006 | |||||
3007 | if (Ty != LLT::scalar(64)) { | ||||
3008 | LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " << Ty << ", expected: " << LLT::scalar(64) << '\n'; } } while (false) | ||||
3009 | << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " << Ty << ", expected: " << LLT::scalar(64) << '\n'; } } while (false); | ||||
3010 | return false; | ||||
3011 | } | ||||
3012 | |||||
3013 | unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr | ||||
3014 | : AArch64::UMULHrr; | ||||
3015 | I.setDesc(TII.get(NewOpc)); | ||||
3016 | |||||
3017 | // Now that we selected an opcode, we need to constrain the register | ||||
3018 | // operands to use appropriate classes. | ||||
3019 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
3020 | } | ||||
3021 | case TargetOpcode::G_LSHR: | ||||
3022 | case TargetOpcode::G_ASHR: | ||||
3023 | if (MRI.getType(I.getOperand(0).getReg()).isVector()) | ||||
3024 | return selectVectorAshrLshr(I, MRI); | ||||
3025 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3026 | case TargetOpcode::G_SHL: | ||||
3027 | if (Opcode == TargetOpcode::G_SHL && | ||||
3028 | MRI.getType(I.getOperand(0).getReg()).isVector()) | ||||
3029 | return selectVectorSHL(I, MRI); | ||||
3030 | |||||
3031 | // These shifts were legalized to have 64 bit shift amounts because we | ||||
3032 | // want to take advantage of the selection patterns that assume the | ||||
3033 | // immediates are s64s, however, selectBinaryOp will assume both operands | ||||
3034 | // will have the same bit size. | ||||
3035 | { | ||||
3036 | Register SrcReg = I.getOperand(1).getReg(); | ||||
3037 | Register ShiftReg = I.getOperand(2).getReg(); | ||||
3038 | const LLT ShiftTy = MRI.getType(ShiftReg); | ||||
3039 | const LLT SrcTy = MRI.getType(SrcReg); | ||||
3040 | if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && | ||||
3041 | ShiftTy.getSizeInBits() == 64) { | ||||
3042 | assert(!ShiftTy.isVector() && "unexpected vector shift ty")(static_cast <bool> (!ShiftTy.isVector() && "unexpected vector shift ty" ) ? void (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3042, __extension__ __PRETTY_FUNCTION__)); | ||||
3043 | assert(MRI.getVRegDef(ShiftReg) &&(static_cast <bool> (MRI.getVRegDef(ShiftReg) && "could not find a vreg definition for shift amount") ? void ( 0) : __assert_fail ("MRI.getVRegDef(ShiftReg) && \"could not find a vreg definition for shift amount\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3044, __extension__ __PRETTY_FUNCTION__)) | ||||
3044 | "could not find a vreg definition for shift amount")(static_cast <bool> (MRI.getVRegDef(ShiftReg) && "could not find a vreg definition for shift amount") ? void ( 0) : __assert_fail ("MRI.getVRegDef(ShiftReg) && \"could not find a vreg definition for shift amount\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3044, __extension__ __PRETTY_FUNCTION__)); | ||||
3045 | // Insert a subregister copy to implement a 64->32 trunc | ||||
3046 | auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {}) | ||||
3047 | .addReg(ShiftReg, 0, AArch64::sub_32); | ||||
3048 | MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); | ||||
3049 | I.getOperand(2).setReg(Trunc.getReg(0)); | ||||
3050 | } | ||||
3051 | } | ||||
3052 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3053 | case TargetOpcode::G_OR: { | ||||
3054 | // Reject the various things we don't support yet. | ||||
3055 | if (unsupportedBinOp(I, RBI, MRI, TRI)) | ||||
3056 | return false; | ||||
3057 | |||||
3058 | const unsigned OpSize = Ty.getSizeInBits(); | ||||
3059 | |||||
3060 | const Register DefReg = I.getOperand(0).getReg(); | ||||
3061 | const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); | ||||
3062 | |||||
3063 | const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize); | ||||
3064 | if (NewOpc == I.getOpcode()) | ||||
3065 | return false; | ||||
3066 | |||||
3067 | I.setDesc(TII.get(NewOpc)); | ||||
3068 | // FIXME: Should the type be always reset in setDesc? | ||||
3069 | |||||
3070 | // Now that we selected an opcode, we need to constrain the register | ||||
3071 | // operands to use appropriate classes. | ||||
3072 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
3073 | } | ||||
3074 | |||||
3075 | case TargetOpcode::G_PTR_ADD: { | ||||
3076 | emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB); | ||||
3077 | I.eraseFromParent(); | ||||
3078 | return true; | ||||
3079 | } | ||||
3080 | case TargetOpcode::G_SADDO: | ||||
3081 | case TargetOpcode::G_UADDO: | ||||
3082 | case TargetOpcode::G_SSUBO: | ||||
3083 | case TargetOpcode::G_USUBO: { | ||||
3084 | // Emit the operation and get the correct condition code. | ||||
3085 | auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(), | ||||
3086 | I.getOperand(2), I.getOperand(3), MIB); | ||||
3087 | |||||
3088 | // Now, put the overflow result in the register given by the first operand | ||||
3089 | // to the overflow op. CSINC increments the result when the predicate is | ||||
3090 | // false, so to get the increment when it's true, we need to use the | ||||
3091 | // inverse. In this case, we want to increment when carry is set. | ||||
3092 | Register ZReg = AArch64::WZR; | ||||
3093 | emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg, | ||||
3094 | getInvertedCondCode(OpAndCC.second), MIB); | ||||
3095 | I.eraseFromParent(); | ||||
3096 | return true; | ||||
3097 | } | ||||
3098 | |||||
3099 | case TargetOpcode::G_PTRMASK: { | ||||
3100 | Register MaskReg = I.getOperand(2).getReg(); | ||||
3101 | Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI); | ||||
3102 | // TODO: Implement arbitrary cases | ||||
3103 | if (!MaskVal || !isShiftedMask_64(*MaskVal)) | ||||
3104 | return false; | ||||
3105 | |||||
3106 | uint64_t Mask = *MaskVal; | ||||
3107 | I.setDesc(TII.get(AArch64::ANDXri)); | ||||
3108 | I.getOperand(2).ChangeToImmediate( | ||||
3109 | AArch64_AM::encodeLogicalImmediate(Mask, 64)); | ||||
3110 | |||||
3111 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
3112 | } | ||||
3113 | case TargetOpcode::G_PTRTOINT: | ||||
3114 | case TargetOpcode::G_TRUNC: { | ||||
3115 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||
3116 | const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); | ||||
3117 | |||||
3118 | const Register DstReg = I.getOperand(0).getReg(); | ||||
3119 | const Register SrcReg = I.getOperand(1).getReg(); | ||||
3120 | |||||
3121 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); | ||||
3122 | const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); | ||||
3123 | |||||
3124 | if (DstRB.getID() != SrcRB.getID()) { | ||||
3125 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n" ; } } while (false) | ||||
3126 | dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n" ; } } while (false); | ||||
3127 | return false; | ||||
3128 | } | ||||
3129 | |||||
3130 | if (DstRB.getID() == AArch64::GPRRegBankID) { | ||||
3131 | const TargetRegisterClass *DstRC = | ||||
3132 | getRegClassForTypeOnBank(DstTy, DstRB, RBI); | ||||
3133 | if (!DstRC) | ||||
3134 | return false; | ||||
3135 | |||||
3136 | const TargetRegisterClass *SrcRC = | ||||
3137 | getRegClassForTypeOnBank(SrcTy, SrcRB, RBI); | ||||
3138 | if (!SrcRC) | ||||
3139 | return false; | ||||
3140 | |||||
3141 | if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || | ||||
3142 | !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { | ||||
3143 | LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n" ; } } while (false); | ||||
3144 | return false; | ||||
3145 | } | ||||
3146 | |||||
3147 | if (DstRC == SrcRC) { | ||||
3148 | // Nothing to be done | ||||
3149 | } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) && | ||||
3150 | SrcTy == LLT::scalar(64)) { | ||||
3151 | llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3151); | ||||
3152 | return false; | ||||
3153 | } else if (DstRC == &AArch64::GPR32RegClass && | ||||
3154 | SrcRC == &AArch64::GPR64RegClass) { | ||||
3155 | I.getOperand(1).setSubReg(AArch64::sub_32); | ||||
3156 | } else { | ||||
3157 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n" ; } } while (false) | ||||
3158 | dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n" ; } } while (false); | ||||
3159 | return false; | ||||
3160 | } | ||||
3161 | |||||
3162 | I.setDesc(TII.get(TargetOpcode::COPY)); | ||||
3163 | return true; | ||||
3164 | } else if (DstRB.getID() == AArch64::FPRRegBankID) { | ||||
3165 | if (DstTy == LLT::fixed_vector(4, 16) && | ||||
3166 | SrcTy == LLT::fixed_vector(4, 32)) { | ||||
3167 | I.setDesc(TII.get(AArch64::XTNv4i16)); | ||||
3168 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
3169 | return true; | ||||
3170 | } | ||||
3171 | |||||
3172 | if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { | ||||
3173 | MachineInstr *Extract = emitExtractVectorElt( | ||||
3174 | DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB); | ||||
3175 | if (!Extract) | ||||
3176 | return false; | ||||
3177 | I.eraseFromParent(); | ||||
3178 | return true; | ||||
3179 | } | ||||
3180 | |||||
3181 | // We might have a vector G_PTRTOINT, in which case just emit a COPY. | ||||
3182 | if (Opcode == TargetOpcode::G_PTRTOINT) { | ||||
3183 | assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")(static_cast <bool> (DstTy.isVector() && "Expected an FPR ptrtoint to be a vector" ) ? void (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3183, __extension__ __PRETTY_FUNCTION__)); | ||||
3184 | I.setDesc(TII.get(TargetOpcode::COPY)); | ||||
3185 | return selectCopy(I, TII, MRI, TRI, RBI); | ||||
3186 | } | ||||
3187 | } | ||||
3188 | |||||
3189 | return false; | ||||
3190 | } | ||||
3191 | |||||
3192 | case TargetOpcode::G_ANYEXT: { | ||||
3193 | if (selectUSMovFromExtend(I, MRI)) | ||||
3194 | return true; | ||||
3195 | |||||
3196 | const Register DstReg = I.getOperand(0).getReg(); | ||||
3197 | const Register SrcReg = I.getOperand(1).getReg(); | ||||
3198 | |||||
3199 | const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI); | ||||
3200 | if (RBDst.getID() != AArch64::GPRRegBankID) { | ||||
3201 | LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " << RBDst << ", expected: GPR\n"; } } while (false) | ||||
3202 | << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " << RBDst << ", expected: GPR\n"; } } while (false); | ||||
3203 | return false; | ||||
3204 | } | ||||
3205 | |||||
3206 | const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI); | ||||
3207 | if (RBSrc.getID() != AArch64::GPRRegBankID) { | ||||
3208 | LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " << RBSrc << ", expected: GPR\n"; } } while (false) | ||||
3209 | << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " << RBSrc << ", expected: GPR\n"; } } while (false); | ||||
3210 | return false; | ||||
3211 | } | ||||
3212 | |||||
3213 | const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); | ||||
3214 | |||||
3215 | if (DstSize == 0) { | ||||
3216 | LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n" ; } } while (false); | ||||
3217 | return false; | ||||
3218 | } | ||||
3219 | |||||
3220 | if (DstSize != 64 && DstSize > 32) { | ||||
3221 | LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " << DstSize << ", expected: 32 or 64\n"; } } while (false) | ||||
3222 | << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " << DstSize << ", expected: 32 or 64\n"; } } while (false); | ||||
3223 | return false; | ||||
3224 | } | ||||
3225 | // At this point G_ANYEXT is just like a plain COPY, but we need | ||||
3226 | // to explicitly form the 64-bit value if any. | ||||
3227 | if (DstSize > 32) { | ||||
3228 | Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass); | ||||
3229 | BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) | ||||
3230 | .addDef(ExtSrc) | ||||
3231 | .addImm(0) | ||||
3232 | .addUse(SrcReg) | ||||
3233 | .addImm(AArch64::sub_32); | ||||
3234 | I.getOperand(1).setReg(ExtSrc); | ||||
3235 | } | ||||
3236 | return selectCopy(I, TII, MRI, TRI, RBI); | ||||
3237 | } | ||||
3238 | |||||
3239 | case TargetOpcode::G_ZEXT: | ||||
3240 | case TargetOpcode::G_SEXT_INREG: | ||||
3241 | case TargetOpcode::G_SEXT: { | ||||
3242 | if (selectUSMovFromExtend(I, MRI)) | ||||
3243 | return true; | ||||
3244 | |||||
3245 | unsigned Opcode = I.getOpcode(); | ||||
3246 | const bool IsSigned = Opcode != TargetOpcode::G_ZEXT; | ||||
3247 | const Register DefReg = I.getOperand(0).getReg(); | ||||
3248 | Register SrcReg = I.getOperand(1).getReg(); | ||||
3249 | const LLT DstTy = MRI.getType(DefReg); | ||||
3250 | const LLT SrcTy = MRI.getType(SrcReg); | ||||
3251 | unsigned DstSize = DstTy.getSizeInBits(); | ||||
3252 | unsigned SrcSize = SrcTy.getSizeInBits(); | ||||
3253 | |||||
3254 | // SEXT_INREG has the same src reg size as dst, the size of the value to be | ||||
3255 | // extended is encoded in the imm. | ||||
3256 | if (Opcode == TargetOpcode::G_SEXT_INREG) | ||||
3257 | SrcSize = I.getOperand(2).getImm(); | ||||
3258 | |||||
3259 | if (DstTy.isVector()) | ||||
3260 | return false; // Should be handled by imported patterns. | ||||
3261 | |||||
3262 | assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI) ).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank" ) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3264, __extension__ __PRETTY_FUNCTION__)) | ||||
3263 | AArch64::GPRRegBankID &&(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI) ).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank" ) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3264, __extension__ __PRETTY_FUNCTION__)) | ||||
3264 | "Unexpected ext regbank")(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI) ).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank" ) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3264, __extension__ __PRETTY_FUNCTION__)); | ||||
3265 | |||||
3266 | MachineInstr *ExtI; | ||||
3267 | |||||
3268 | // First check if we're extending the result of a load which has a dest type | ||||
3269 | // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest | ||||
3270 | // GPR register on AArch64 and all loads which are smaller automatically | ||||
3271 | // zero-extend the upper bits. E.g. | ||||
3272 | // %v(s8) = G_LOAD %p, :: (load 1) | ||||
3273 | // %v2(s32) = G_ZEXT %v(s8) | ||||
3274 | if (!IsSigned) { | ||||
3275 | auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); | ||||
3276 | bool IsGPR = | ||||
3277 | RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID; | ||||
3278 | if (LoadMI && IsGPR) { | ||||
3279 | const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); | ||||
3280 | unsigned BytesLoaded = MemOp->getSize(); | ||||
3281 | if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) | ||||
3282 | return selectCopy(I, TII, MRI, TRI, RBI); | ||||
3283 | } | ||||
3284 | |||||
3285 | // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs) | ||||
3286 | // + SUBREG_TO_REG. | ||||
3287 | // | ||||
3288 | // If we are zero extending from 32 bits to 64 bits, it's possible that | ||||
3289 | // the instruction implicitly does the zero extend for us. In that case, | ||||
3290 | // we only need the SUBREG_TO_REG. | ||||
3291 | if (IsGPR && SrcSize == 32 && DstSize == 64) { | ||||
3292 | // Unlike with the G_LOAD case, we don't want to look through copies | ||||
3293 | // here. (See isDef32.) | ||||
3294 | MachineInstr *Def = MRI.getVRegDef(SrcReg); | ||||
3295 | Register SubregToRegSrc = SrcReg; | ||||
3296 | |||||
3297 | // Does the instruction implicitly zero extend? | ||||
3298 | if (!Def || !isDef32(*Def)) { | ||||
3299 | // No. Zero out using an OR. | ||||
3300 | Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass); | ||||
3301 | const Register ZReg = AArch64::WZR; | ||||
3302 | MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0); | ||||
3303 | SubregToRegSrc = OrDst; | ||||
3304 | } | ||||
3305 | |||||
3306 | MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) | ||||
3307 | .addImm(0) | ||||
3308 | .addUse(SubregToRegSrc) | ||||
3309 | .addImm(AArch64::sub_32); | ||||
3310 | |||||
3311 | if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, | ||||
3312 | MRI)) { | ||||
3313 | LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n" ; } } while (false); | ||||
3314 | return false; | ||||
3315 | } | ||||
3316 | |||||
3317 | if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, | ||||
3318 | MRI)) { | ||||
3319 | LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n" ; } } while (false); | ||||
3320 | return false; | ||||
3321 | } | ||||
3322 | |||||
3323 | I.eraseFromParent(); | ||||
3324 | return true; | ||||
3325 | } | ||||
3326 | } | ||||
3327 | |||||
3328 | if (DstSize == 64) { | ||||
3329 | if (Opcode != TargetOpcode::G_SEXT_INREG) { | ||||
3330 | // FIXME: Can we avoid manually doing this? | ||||
3331 | if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, | ||||
3332 | MRI)) { | ||||
3333 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain " << TII.getName(Opcode) << " operand\n"; } } while (false) | ||||
3334 | << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain " << TII.getName(Opcode) << " operand\n"; } } while (false); | ||||
3335 | return false; | ||||
3336 | } | ||||
3337 | SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, | ||||
3338 | {&AArch64::GPR64RegClass}, {}) | ||||
3339 | .addImm(0) | ||||
3340 | .addUse(SrcReg) | ||||
3341 | .addImm(AArch64::sub_32) | ||||
3342 | .getReg(0); | ||||
3343 | } | ||||
3344 | |||||
3345 | ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, | ||||
3346 | {DefReg}, {SrcReg}) | ||||
3347 | .addImm(0) | ||||
3348 | .addImm(SrcSize - 1); | ||||
3349 | } else if (DstSize <= 32) { | ||||
3350 | ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, | ||||
3351 | {DefReg}, {SrcReg}) | ||||
3352 | .addImm(0) | ||||
3353 | .addImm(SrcSize - 1); | ||||
3354 | } else { | ||||
3355 | return false; | ||||
3356 | } | ||||
3357 | |||||
3358 | constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); | ||||
3359 | I.eraseFromParent(); | ||||
3360 | return true; | ||||
3361 | } | ||||
3362 | |||||
3363 | case TargetOpcode::G_SITOFP: | ||||
3364 | case TargetOpcode::G_UITOFP: | ||||
3365 | case TargetOpcode::G_FPTOSI: | ||||
3366 | case TargetOpcode::G_FPTOUI: { | ||||
3367 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()), | ||||
3368 | SrcTy = MRI.getType(I.getOperand(1).getReg()); | ||||
3369 | const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy); | ||||
3370 | if (NewOpc == Opcode) | ||||
3371 | return false; | ||||
3372 | |||||
3373 | I.setDesc(TII.get(NewOpc)); | ||||
3374 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
3375 | I.setFlags(MachineInstr::NoFPExcept); | ||||
3376 | |||||
3377 | return true; | ||||
3378 | } | ||||
3379 | |||||
3380 | case TargetOpcode::G_FREEZE: | ||||
3381 | return selectCopy(I, TII, MRI, TRI, RBI); | ||||
3382 | |||||
3383 | case TargetOpcode::G_INTTOPTR: | ||||
3384 | // The importer is currently unable to import pointer types since they | ||||
3385 | // didn't exist in SelectionDAG. | ||||
3386 | return selectCopy(I, TII, MRI, TRI, RBI); | ||||
3387 | |||||
3388 | case TargetOpcode::G_BITCAST: | ||||
3389 | // Imported SelectionDAG rules can handle every bitcast except those that | ||||
3390 | // bitcast from a type to the same type. Ideally, these shouldn't occur | ||||
3391 | // but we might not run an optimizer that deletes them. The other exception | ||||
3392 | // is bitcasts involving pointer types, as SelectionDAG has no knowledge | ||||
3393 | // of them. | ||||
3394 | return selectCopy(I, TII, MRI, TRI, RBI); | ||||
3395 | |||||
3396 | case TargetOpcode::G_SELECT: { | ||||
3397 | auto &Sel = cast<GSelect>(I); | ||||
3398 | if (MRI.getType(Sel.getCondReg()) != LLT::scalar(1)) { | ||||
3399 | LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_SELECT cond has type: " << Ty << ", expected: " << LLT::scalar(1) << '\n'; } } while (false) | ||||
3400 | << ", expected: " << LLT::scalar(1) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_SELECT cond has type: " << Ty << ", expected: " << LLT::scalar(1) << '\n'; } } while (false); | ||||
3401 | return false; | ||||
3402 | } | ||||
3403 | |||||
3404 | const Register CondReg = Sel.getCondReg(); | ||||
3405 | const Register TReg = Sel.getTrueReg(); | ||||
3406 | const Register FReg = Sel.getFalseReg(); | ||||
3407 | |||||
3408 | if (tryOptSelect(Sel)) | ||||
3409 | return true; | ||||
3410 | |||||
3411 | // Make sure to use an unused vreg instead of wzr, so that the peephole | ||||
3412 | // optimizations will be able to optimize these. | ||||
3413 | Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); | ||||
3414 | auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg}) | ||||
3415 | .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); | ||||
3416 | constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); | ||||
3417 | if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB)) | ||||
3418 | return false; | ||||
3419 | Sel.eraseFromParent(); | ||||
3420 | return true; | ||||
3421 | } | ||||
3422 | case TargetOpcode::G_ICMP: { | ||||
3423 | if (Ty.isVector()) | ||||
3424 | return selectVectorICmp(I, MRI); | ||||
3425 | |||||
3426 | if (Ty != LLT::scalar(32)) { | ||||
3427 | LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ICMP result has type: " << Ty << ", expected: " << LLT::scalar(32) << '\n'; } } while (false) | ||||
3428 | << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ICMP result has type: " << Ty << ", expected: " << LLT::scalar(32) << '\n'; } } while (false); | ||||
3429 | return false; | ||||
3430 | } | ||||
3431 | |||||
3432 | auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); | ||||
3433 | const AArch64CC::CondCode InvCC = | ||||
3434 | changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred)); | ||||
3435 | emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB); | ||||
3436 | emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR, | ||||
3437 | /*Src2=*/AArch64::WZR, InvCC, MIB); | ||||
3438 | I.eraseFromParent(); | ||||
3439 | return true; | ||||
3440 | } | ||||
3441 | |||||
3442 | case TargetOpcode::G_FCMP: { | ||||
3443 | CmpInst::Predicate Pred = | ||||
3444 | static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); | ||||
3445 | if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB, | ||||
3446 | Pred) || | ||||
3447 | !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB)) | ||||
3448 | return false; | ||||
3449 | I.eraseFromParent(); | ||||
3450 | return true; | ||||
3451 | } | ||||
3452 | case TargetOpcode::G_VASTART: | ||||
3453 | return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) | ||||
3454 | : selectVaStartAAPCS(I, MF, MRI); | ||||
3455 | case TargetOpcode::G_INTRINSIC: | ||||
3456 | return selectIntrinsic(I, MRI); | ||||
3457 | case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: | ||||
3458 | return selectIntrinsicWithSideEffects(I, MRI); | ||||
3459 | case TargetOpcode::G_IMPLICIT_DEF: { | ||||
3460 | I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); | ||||
3461 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||
3462 | const Register DstReg = I.getOperand(0).getReg(); | ||||
3463 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); | ||||
3464 | const TargetRegisterClass *DstRC = | ||||
3465 | getRegClassForTypeOnBank(DstTy, DstRB, RBI); | ||||
3466 | RBI.constrainGenericRegister(DstReg, *DstRC, MRI); | ||||
3467 | return true; | ||||
3468 | } | ||||
3469 | case TargetOpcode::G_BLOCK_ADDR: { | ||||
3470 | if (TM.getCodeModel() == CodeModel::Large) { | ||||
3471 | materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0); | ||||
3472 | I.eraseFromParent(); | ||||
3473 | return true; | ||||
3474 | } else { | ||||
3475 | I.setDesc(TII.get(AArch64::MOVaddrBA)); | ||||
3476 | auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA), | ||||
3477 | I.getOperand(0).getReg()) | ||||
3478 | .addBlockAddress(I.getOperand(1).getBlockAddress(), | ||||
3479 | /* Offset */ 0, AArch64II::MO_PAGE) | ||||
3480 | .addBlockAddress( | ||||
3481 | I.getOperand(1).getBlockAddress(), /* Offset */ 0, | ||||
3482 | AArch64II::MO_NC | AArch64II::MO_PAGEOFF); | ||||
3483 | I.eraseFromParent(); | ||||
3484 | return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); | ||||
3485 | } | ||||
3486 | } | ||||
3487 | case AArch64::G_DUP: { | ||||
3488 | // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by | ||||
3489 | // imported patterns. Do it manually here. Avoiding generating s16 gpr is | ||||
3490 | // difficult because at RBS we may end up pessimizing the fpr case if we | ||||
3491 | // decided to add an anyextend to fix this. Manual selection is the most | ||||
3492 | // robust solution for now. | ||||
3493 | if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != | ||||
3494 | AArch64::GPRRegBankID) | ||||
3495 | return false; // We expect the fpr regbank case to be imported. | ||||
3496 | LLT VecTy = MRI.getType(I.getOperand(0).getReg()); | ||||
3497 | if (VecTy == LLT::fixed_vector(8, 8)) | ||||
3498 | I.setDesc(TII.get(AArch64::DUPv8i8gpr)); | ||||
3499 | else if (VecTy == LLT::fixed_vector(16, 8)) | ||||
3500 | I.setDesc(TII.get(AArch64::DUPv16i8gpr)); | ||||
3501 | else if (VecTy == LLT::fixed_vector(4, 16)) | ||||
3502 | I.setDesc(TII.get(AArch64::DUPv4i16gpr)); | ||||
3503 | else if (VecTy == LLT::fixed_vector(8, 16)) | ||||
3504 | I.setDesc(TII.get(AArch64::DUPv8i16gpr)); | ||||
3505 | else | ||||
3506 | return false; | ||||
3507 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
3508 | } | ||||
3509 | case TargetOpcode::G_INTRINSIC_TRUNC: | ||||
3510 | return selectIntrinsicTrunc(I, MRI); | ||||
3511 | case TargetOpcode::G_INTRINSIC_ROUND: | ||||
3512 | return selectIntrinsicRound(I, MRI); | ||||
3513 | case TargetOpcode::G_BUILD_VECTOR: | ||||
3514 | return selectBuildVector(I, MRI); | ||||
3515 | case TargetOpcode::G_MERGE_VALUES: | ||||
3516 | return selectMergeValues(I, MRI); | ||||
3517 | case TargetOpcode::G_UNMERGE_VALUES: | ||||
3518 | return selectUnmergeValues(I, MRI); | ||||
3519 | case TargetOpcode::G_SHUFFLE_VECTOR: | ||||
3520 | return selectShuffleVector(I, MRI); | ||||
3521 | case TargetOpcode::G_EXTRACT_VECTOR_ELT: | ||||
3522 | return selectExtractElt(I, MRI); | ||||
3523 | case TargetOpcode::G_INSERT_VECTOR_ELT: | ||||
3524 | return selectInsertElt(I, MRI); | ||||
3525 | case TargetOpcode::G_CONCAT_VECTORS: | ||||
3526 | return selectConcatVectors(I, MRI); | ||||
3527 | case TargetOpcode::G_JUMP_TABLE: | ||||
3528 | return selectJumpTable(I, MRI); | ||||
3529 | case TargetOpcode::G_VECREDUCE_FADD: | ||||
3530 | case TargetOpcode::G_VECREDUCE_ADD: | ||||
3531 | return selectReduction(I, MRI); | ||||
3532 | case TargetOpcode::G_MEMCPY: | ||||
3533 | case TargetOpcode::G_MEMCPY_INLINE: | ||||
3534 | case TargetOpcode::G_MEMMOVE: | ||||
3535 | case TargetOpcode::G_MEMSET: | ||||
3536 | assert(STI.hasMOPS() && "Shouldn't get here without +mops feature")(static_cast <bool> (STI.hasMOPS() && "Shouldn't get here without +mops feature" ) ? void (0) : __assert_fail ("STI.hasMOPS() && \"Shouldn't get here without +mops feature\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3536, __extension__ __PRETTY_FUNCTION__)); | ||||
3537 | return selectMOPS(I, MRI); | ||||
3538 | } | ||||
3539 | |||||
3540 | return false; | ||||
3541 | } | ||||
3542 | |||||
3543 | bool AArch64InstructionSelector::selectReduction(MachineInstr &I, | ||||
3544 | MachineRegisterInfo &MRI) { | ||||
3545 | Register VecReg = I.getOperand(1).getReg(); | ||||
3546 | LLT VecTy = MRI.getType(VecReg); | ||||
3547 | if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) { | ||||
3548 | // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit | ||||
3549 | // a subregister copy afterwards. | ||||
3550 | if (VecTy == LLT::fixed_vector(2, 32)) { | ||||
3551 | Register DstReg = I.getOperand(0).getReg(); | ||||
3552 | auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass}, | ||||
3553 | {VecReg, VecReg}); | ||||
3554 | auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) | ||||
3555 | .addReg(AddP.getReg(0), 0, AArch64::ssub) | ||||
3556 | .getReg(0); | ||||
3557 | RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI); | ||||
3558 | I.eraseFromParent(); | ||||
3559 | return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI); | ||||
3560 | } | ||||
3561 | |||||
3562 | unsigned Opc = 0; | ||||
3563 | if (VecTy == LLT::fixed_vector(16, 8)) | ||||
3564 | Opc = AArch64::ADDVv16i8v; | ||||
3565 | else if (VecTy == LLT::fixed_vector(8, 16)) | ||||
3566 | Opc = AArch64::ADDVv8i16v; | ||||
3567 | else if (VecTy == LLT::fixed_vector(4, 32)) | ||||
3568 | Opc = AArch64::ADDVv4i32v; | ||||
3569 | else if (VecTy == LLT::fixed_vector(2, 64)) | ||||
3570 | Opc = AArch64::ADDPv2i64p; | ||||
3571 | else { | ||||
3572 | LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled type for add reduction" ; } } while (false); | ||||
3573 | return false; | ||||
3574 | } | ||||
3575 | I.setDesc(TII.get(Opc)); | ||||
3576 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
3577 | } | ||||
3578 | |||||
3579 | if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) { | ||||
3580 | unsigned Opc = 0; | ||||
3581 | if (VecTy == LLT::fixed_vector(2, 32)) | ||||
3582 | Opc = AArch64::FADDPv2i32p; | ||||
3583 | else if (VecTy == LLT::fixed_vector(2, 64)) | ||||
3584 | Opc = AArch64::FADDPv2i64p; | ||||
3585 | else { | ||||
3586 | LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction" ; } } while (false); | ||||
3587 | return false; | ||||
3588 | } | ||||
3589 | I.setDesc(TII.get(Opc)); | ||||
3590 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
3591 | } | ||||
3592 | return false; | ||||
3593 | } | ||||
3594 | |||||
3595 | bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI, | ||||
3596 | MachineRegisterInfo &MRI) { | ||||
3597 | unsigned Mopcode; | ||||
3598 | switch (GI.getOpcode()) { | ||||
3599 | case TargetOpcode::G_MEMCPY: | ||||
3600 | case TargetOpcode::G_MEMCPY_INLINE: | ||||
3601 | Mopcode = AArch64::MOPSMemoryCopyPseudo; | ||||
3602 | break; | ||||
3603 | case TargetOpcode::G_MEMMOVE: | ||||
3604 | Mopcode = AArch64::MOPSMemoryMovePseudo; | ||||
3605 | break; | ||||
3606 | case TargetOpcode::G_MEMSET: | ||||
3607 | // For tagged memset see llvm.aarch64.mops.memset.tag | ||||
3608 | Mopcode = AArch64::MOPSMemorySetPseudo; | ||||
3609 | break; | ||||
3610 | } | ||||
3611 | |||||
3612 | auto &DstPtr = GI.getOperand(0); | ||||
3613 | auto &SrcOrVal = GI.getOperand(1); | ||||
3614 | auto &Size = GI.getOperand(2); | ||||
3615 | |||||
3616 | // Create copies of the registers that can be clobbered. | ||||
3617 | const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg()); | ||||
3618 | const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg()); | ||||
3619 | const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg()); | ||||
3620 | |||||
3621 | const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo; | ||||
3622 | const auto &SrcValRegClass = | ||||
3623 | IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass; | ||||
3624 | |||||
3625 | // Constrain to specific registers | ||||
3626 | RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI); | ||||
3627 | RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI); | ||||
3628 | RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI); | ||||
3629 | |||||
3630 | MIB.buildCopy(DstPtrCopy, DstPtr); | ||||
3631 | MIB.buildCopy(SrcValCopy, SrcOrVal); | ||||
3632 | MIB.buildCopy(SizeCopy, Size); | ||||
3633 | |||||
3634 | // New instruction uses the copied registers because it must update them. | ||||
3635 | // The defs are not used since they don't exist in G_MEM*. They are still | ||||
3636 | // tied. | ||||
3637 | // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE | ||||
3638 | Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass); | ||||
3639 | Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass); | ||||
3640 | if (IsSet) { | ||||
3641 | MIB.buildInstr(Mopcode, {DefDstPtr, DefSize}, | ||||
3642 | {DstPtrCopy, SizeCopy, SrcValCopy}); | ||||
3643 | } else { | ||||
3644 | Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass); | ||||
3645 | MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize}, | ||||
3646 | {DstPtrCopy, SrcValCopy, SizeCopy}); | ||||
3647 | } | ||||
3648 | |||||
3649 | GI.eraseFromParent(); | ||||
3650 | return true; | ||||
3651 | } | ||||
3652 | |||||
3653 | bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, | ||||
3654 | MachineRegisterInfo &MRI) { | ||||
3655 | assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3655, __extension__ __PRETTY_FUNCTION__)); | ||||
3656 | Register JTAddr = I.getOperand(0).getReg(); | ||||
3657 | unsigned JTI = I.getOperand(1).getIndex(); | ||||
3658 | Register Index = I.getOperand(2).getReg(); | ||||
3659 | |||||
3660 | Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); | ||||
3661 | Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); | ||||
3662 | |||||
3663 | MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr); | ||||
3664 | auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32, | ||||
3665 | {TargetReg, ScratchReg}, {JTAddr, Index}) | ||||
3666 | .addJumpTableIndex(JTI); | ||||
3667 | // Build the indirect branch. | ||||
3668 | MIB.buildInstr(AArch64::BR, {}, {TargetReg}); | ||||
3669 | I.eraseFromParent(); | ||||
3670 | return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI); | ||||
3671 | } | ||||
3672 | |||||
3673 | bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I, | ||||
3674 | MachineRegisterInfo &MRI) { | ||||
3675 | assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3675, __extension__ __PRETTY_FUNCTION__)); | ||||
3676 | assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")(static_cast <bool> (I.getOperand(1).isJTI() && "Jump table op should have a JTI!") ? void (0) : __assert_fail ("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3676, __extension__ __PRETTY_FUNCTION__)); | ||||
3677 | |||||
3678 | Register DstReg = I.getOperand(0).getReg(); | ||||
3679 | unsigned JTI = I.getOperand(1).getIndex(); | ||||
3680 | // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. | ||||
3681 | auto MovMI = | ||||
3682 | MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) | ||||
3683 | .addJumpTableIndex(JTI, AArch64II::MO_PAGE) | ||||
3684 | .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF); | ||||
3685 | I.eraseFromParent(); | ||||
3686 | return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); | ||||
3687 | } | ||||
3688 | |||||
3689 | bool AArch64InstructionSelector::selectTLSGlobalValue( | ||||
3690 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||
3691 | if (!STI.isTargetMachO()) | ||||
3692 | return false; | ||||
3693 | MachineFunction &MF = *I.getParent()->getParent(); | ||||
3694 | MF.getFrameInfo().setAdjustsStack(true); | ||||
3695 | |||||
3696 | const auto &GlobalOp = I.getOperand(1); | ||||
3697 | assert(GlobalOp.getOffset() == 0 &&(static_cast <bool> (GlobalOp.getOffset() == 0 && "Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail ("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3698, __extension__ __PRETTY_FUNCTION__)) | ||||
3698 | "Shouldn't have an offset on TLS globals!")(static_cast <bool> (GlobalOp.getOffset() == 0 && "Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail ("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3698, __extension__ __PRETTY_FUNCTION__)); | ||||
3699 | const GlobalValue &GV = *GlobalOp.getGlobal(); | ||||
3700 | |||||
3701 | auto LoadGOT = | ||||
3702 | MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {}) | ||||
3703 | .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); | ||||
3704 | |||||
3705 | auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass}, | ||||
3706 | {LoadGOT.getReg(0)}) | ||||
3707 | .addImm(0); | ||||
3708 | |||||
3709 | MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0)); | ||||
3710 | // TLS calls preserve all registers except those that absolutely must be | ||||
3711 | // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be | ||||
3712 | // silly). | ||||
3713 | MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load}) | ||||
3714 | .addUse(AArch64::X0, RegState::Implicit) | ||||
3715 | .addDef(AArch64::X0, RegState::Implicit) | ||||
3716 | .addRegMask(TRI.getTLSCallPreservedMask()); | ||||
3717 | |||||
3718 | MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0)); | ||||
3719 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass, | ||||
3720 | MRI); | ||||
3721 | I.eraseFromParent(); | ||||
3722 | return true; | ||||
3723 | } | ||||
3724 | |||||
3725 | bool AArch64InstructionSelector::selectIntrinsicTrunc( | ||||
3726 | MachineInstr &I, MachineRegisterInfo &MRI) const { | ||||
3727 | const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); | ||||
3728 | |||||
3729 | // Select the correct opcode. | ||||
3730 | unsigned Opc = 0; | ||||
3731 | if (!SrcTy.isVector()) { | ||||
3732 | switch (SrcTy.getSizeInBits()) { | ||||
3733 | default: | ||||
3734 | case 16: | ||||
3735 | Opc = AArch64::FRINTZHr; | ||||
3736 | break; | ||||
3737 | case 32: | ||||
3738 | Opc = AArch64::FRINTZSr; | ||||
3739 | break; | ||||
3740 | case 64: | ||||
3741 | Opc = AArch64::FRINTZDr; | ||||
3742 | break; | ||||
3743 | } | ||||
3744 | } else { | ||||
3745 | unsigned NumElts = SrcTy.getNumElements(); | ||||
3746 | switch (SrcTy.getElementType().getSizeInBits()) { | ||||
3747 | default: | ||||
3748 | break; | ||||
3749 | case 16: | ||||
3750 | if (NumElts == 4) | ||||
3751 | Opc = AArch64::FRINTZv4f16; | ||||
3752 | else if (NumElts == 8) | ||||
3753 | Opc = AArch64::FRINTZv8f16; | ||||
3754 | break; | ||||
3755 | case 32: | ||||
3756 | if (NumElts == 2) | ||||
3757 | Opc = AArch64::FRINTZv2f32; | ||||
3758 | else if (NumElts == 4) | ||||
3759 | Opc = AArch64::FRINTZv4f32; | ||||
3760 | break; | ||||
3761 | case 64: | ||||
3762 | if (NumElts == 2) | ||||
3763 | Opc = AArch64::FRINTZv2f64; | ||||
3764 | break; | ||||
3765 | } | ||||
3766 | } | ||||
3767 | |||||
3768 | if (!Opc) { | ||||
3769 | // Didn't get an opcode above, bail. | ||||
3770 | LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n" ; } } while (false); | ||||
3771 | return false; | ||||
3772 | } | ||||
3773 | |||||
3774 | // Legalization would have set us up perfectly for this; we just need to | ||||
3775 | // set the opcode and move on. | ||||
3776 | I.setDesc(TII.get(Opc)); | ||||
3777 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
3778 | } | ||||
3779 | |||||
3780 | bool AArch64InstructionSelector::selectIntrinsicRound( | ||||
3781 | MachineInstr &I, MachineRegisterInfo &MRI) const { | ||||
3782 | const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); | ||||
3783 | |||||
3784 | // Select the correct opcode. | ||||
3785 | unsigned Opc = 0; | ||||
3786 | if (!SrcTy.isVector()) { | ||||
3787 | switch (SrcTy.getSizeInBits()) { | ||||
3788 | default: | ||||
3789 | case 16: | ||||
3790 | Opc = AArch64::FRINTAHr; | ||||
3791 | break; | ||||
3792 | case 32: | ||||
3793 | Opc = AArch64::FRINTASr; | ||||
3794 | break; | ||||
3795 | case 64: | ||||
3796 | Opc = AArch64::FRINTADr; | ||||
3797 | break; | ||||
3798 | } | ||||
3799 | } else { | ||||
3800 | unsigned NumElts = SrcTy.getNumElements(); | ||||
3801 | switch (SrcTy.getElementType().getSizeInBits()) { | ||||
3802 | default: | ||||
3803 | break; | ||||
3804 | case 16: | ||||
3805 | if (NumElts == 4) | ||||
3806 | Opc = AArch64::FRINTAv4f16; | ||||
3807 | else if (NumElts == 8) | ||||
3808 | Opc = AArch64::FRINTAv8f16; | ||||
3809 | break; | ||||
3810 | case 32: | ||||
3811 | if (NumElts == 2) | ||||
3812 | Opc = AArch64::FRINTAv2f32; | ||||
3813 | else if (NumElts == 4) | ||||
3814 | Opc = AArch64::FRINTAv4f32; | ||||
3815 | break; | ||||
3816 | case 64: | ||||
3817 | if (NumElts == 2) | ||||
3818 | Opc = AArch64::FRINTAv2f64; | ||||
3819 | break; | ||||
3820 | } | ||||
3821 | } | ||||
3822 | |||||
3823 | if (!Opc) { | ||||
3824 | // Didn't get an opcode above, bail. | ||||
3825 | LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n" ; } } while (false); | ||||
3826 | return false; | ||||
3827 | } | ||||
3828 | |||||
3829 | // Legalization would have set us up perfectly for this; we just need to | ||||
3830 | // set the opcode and move on. | ||||
3831 | I.setDesc(TII.get(Opc)); | ||||
3832 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||
3833 | } | ||||
3834 | |||||
3835 | bool AArch64InstructionSelector::selectVectorICmp( | ||||
3836 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||
3837 | Register DstReg = I.getOperand(0).getReg(); | ||||
3838 | LLT DstTy = MRI.getType(DstReg); | ||||
3839 | Register SrcReg = I.getOperand(2).getReg(); | ||||
3840 | Register Src2Reg = I.getOperand(3).getReg(); | ||||
3841 | LLT SrcTy = MRI.getType(SrcReg); | ||||
3842 | |||||
3843 | unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); | ||||
3844 | unsigned NumElts = DstTy.getNumElements(); | ||||
3845 | |||||
3846 | // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b | ||||
3847 | // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16 | ||||
3848 | // Third index is cc opcode: | ||||
3849 | // 0 == eq | ||||
3850 | // 1 == ugt | ||||
3851 | // 2 == uge | ||||
3852 | // 3 == ult | ||||
3853 | // 4 == ule | ||||
3854 | // 5 == sgt | ||||
3855 | // 6 == sge | ||||
3856 | // 7 == slt | ||||
3857 | // 8 == sle | ||||
3858 | // ne is done by negating 'eq' result. | ||||
3859 | |||||
3860 | // This table below assumes that for some comparisons the operands will be | ||||
3861 | // commuted. | ||||
3862 | // ult op == commute + ugt op | ||||
3863 | // ule op == commute + uge op | ||||
3864 | // slt op == commute + sgt op | ||||
3865 | // sle op == commute + sge op | ||||
3866 | unsigned PredIdx = 0; | ||||
3867 | bool SwapOperands = false; | ||||
3868 | CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); | ||||
3869 | switch (Pred) { | ||||
3870 | case CmpInst::ICMP_NE: | ||||
3871 | case CmpInst::ICMP_EQ: | ||||
3872 | PredIdx = 0; | ||||
3873 | break; | ||||
3874 | case CmpInst::ICMP_UGT: | ||||
3875 | PredIdx = 1; | ||||
3876 | break; | ||||
3877 | case CmpInst::ICMP_UGE: | ||||
3878 | PredIdx = 2; | ||||
3879 | break; | ||||
3880 | case CmpInst::ICMP_ULT: | ||||
3881 | PredIdx = 3; | ||||
3882 | SwapOperands = true; | ||||
3883 | break; | ||||
3884 | case CmpInst::ICMP_ULE: | ||||
3885 | PredIdx = 4; | ||||
3886 | SwapOperands = true; | ||||
3887 | break; | ||||
3888 | case CmpInst::ICMP_SGT: | ||||
3889 | PredIdx = 5; | ||||
3890 | break; | ||||
3891 | case CmpInst::ICMP_SGE: | ||||
3892 | PredIdx = 6; | ||||
3893 | break; | ||||
3894 | case CmpInst::ICMP_SLT: | ||||
3895 | PredIdx = 7; | ||||
3896 | SwapOperands = true; | ||||
3897 | break; | ||||
3898 | case CmpInst::ICMP_SLE: | ||||
3899 | PredIdx = 8; | ||||
3900 | SwapOperands = true; | ||||
3901 | break; | ||||
3902 | default: | ||||
3903 | llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3903); | ||||
3904 | return false; | ||||
3905 | } | ||||
3906 | |||||
3907 | // This table obviously should be tablegen'd when we have our GISel native | ||||
3908 | // tablegen selector. | ||||
3909 | |||||
3910 | static const unsigned OpcTable[4][4][9] = { | ||||
3911 | { | ||||
3912 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3913 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3914 | 0 /* invalid */}, | ||||
3915 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3916 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3917 | 0 /* invalid */}, | ||||
3918 | {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, | ||||
3919 | AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, | ||||
3920 | AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, | ||||
3921 | {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, | ||||
3922 | AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, | ||||
3923 | AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} | ||||
3924 | }, | ||||
3925 | { | ||||
3926 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3927 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3928 | 0 /* invalid */}, | ||||
3929 | {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, | ||||
3930 | AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, | ||||
3931 | AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, | ||||
3932 | {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, | ||||
3933 | AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, | ||||
3934 | AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, | ||||
3935 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3936 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3937 | 0 /* invalid */} | ||||
3938 | }, | ||||
3939 | { | ||||
3940 | {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, | ||||
3941 | AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, | ||||
3942 | AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, | ||||
3943 | {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, | ||||
3944 | AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, | ||||
3945 | AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, | ||||
3946 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3947 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3948 | 0 /* invalid */}, | ||||
3949 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3950 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3951 | 0 /* invalid */} | ||||
3952 | }, | ||||
3953 | { | ||||
3954 | {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, | ||||
3955 | AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, | ||||
3956 | AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, | ||||
3957 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3958 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3959 | 0 /* invalid */}, | ||||
3960 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3961 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3962 | 0 /* invalid */}, | ||||
3963 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3964 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||
3965 | 0 /* invalid */} | ||||
3966 | }, | ||||
3967 | }; | ||||
3968 | unsigned EltIdx = Log2_32(SrcEltSize / 8); | ||||
3969 | unsigned NumEltsIdx = Log2_32(NumElts / 2); | ||||
3970 | unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx]; | ||||
3971 | if (!Opc) { | ||||
3972 | LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode" ; } } while (false); | ||||
3973 | return false; | ||||
3974 | } | ||||
3975 | |||||
3976 | const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI); | ||||
3977 | const TargetRegisterClass *SrcRC = | ||||
3978 | getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true); | ||||
3979 | if (!SrcRC) { | ||||
3980 | LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not determine source register class.\n" ; } } while (false); | ||||
3981 | return false; | ||||
3982 | } | ||||
3983 | |||||
3984 | unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0; | ||||
3985 | if (SrcTy.getSizeInBits() == 128) | ||||
3986 | NotOpc = NotOpc ? AArch64::NOTv16i8 : 0; | ||||
3987 | |||||
3988 | if (SwapOperands) | ||||
3989 | std::swap(SrcReg, Src2Reg); | ||||
3990 | |||||
3991 | auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg}); | ||||
3992 | constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); | ||||
3993 | |||||
3994 | // Invert if we had a 'ne' cc. | ||||
3995 | if (NotOpc) { | ||||
3996 | Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp}); | ||||
3997 | constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); | ||||
3998 | } else { | ||||
3999 | MIB.buildCopy(DstReg, Cmp.getReg(0)); | ||||
4000 | } | ||||
4001 | RBI.constrainGenericRegister(DstReg, *SrcRC, MRI); | ||||
4002 | I.eraseFromParent(); | ||||
4003 | return true; | ||||
4004 | } | ||||
4005 | |||||
4006 | MachineInstr *AArch64InstructionSelector::emitScalarToVector( | ||||
4007 | unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar, | ||||
4008 | MachineIRBuilder &MIRBuilder) const { | ||||
4009 | auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {}); | ||||
4010 | |||||
4011 | auto BuildFn = [&](unsigned SubregIndex) { | ||||
4012 | auto Ins = | ||||
4013 | MIRBuilder | ||||
4014 | .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar}) | ||||
4015 | .addImm(SubregIndex); | ||||
4016 | constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI); | ||||
4017 | constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI); | ||||
4018 | return &*Ins; | ||||
4019 | }; | ||||
4020 | |||||
4021 | switch (EltSize) { | ||||
4022 | case 16: | ||||
4023 | return BuildFn(AArch64::hsub); | ||||
4024 | case 32: | ||||
4025 | return BuildFn(AArch64::ssub); | ||||
4026 | case 64: | ||||
4027 | return BuildFn(AArch64::dsub); | ||||
4028 | default: | ||||
4029 | return nullptr; | ||||
4030 | } | ||||
4031 | } | ||||
4032 | |||||
4033 | bool AArch64InstructionSelector::selectMergeValues( | ||||
4034 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||
4035 | assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4035, __extension__ __PRETTY_FUNCTION__)); | ||||
4036 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||
4037 | const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); | ||||
4038 | assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")(static_cast <bool> (!DstTy.isVector() && !SrcTy .isVector() && "invalid merge operation") ? void (0) : __assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4038, __extension__ __PRETTY_FUNCTION__)); | ||||
4039 | const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); | ||||
4040 | |||||
4041 | if (I.getNumOperands() != 3) | ||||
4042 | return false; | ||||
4043 | |||||
4044 | // Merging 2 s64s into an s128. | ||||
4045 | if (DstTy == LLT::scalar(128)) { | ||||
4046 | if (SrcTy.getSizeInBits() != 64) | ||||
4047 | return false; | ||||
4048 | Register DstReg = I.getOperand(0).getReg(); | ||||
4049 | Register Src1Reg = I.getOperand(1).getReg(); | ||||
4050 | Register Src2Reg = I.getOperand(2).getReg(); | ||||
4051 | auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {}); | ||||
4052 | MachineInstr *InsMI = | ||||
4053 | emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB); | ||||
4054 | if (!InsMI) | ||||
4055 | return false; | ||||
4056 | MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(), | ||||
4057 | Src2Reg, /* LaneIdx */ 1, RB, MIB); | ||||
4058 | if (!Ins2MI) | ||||
4059 | return false; | ||||
4060 | constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); | ||||
4061 | constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI); | ||||
4062 | I.eraseFromParent(); | ||||
4063 | return true; | ||||
4064 | } | ||||
4065 | |||||
4066 | if (RB.getID() != AArch64::GPRRegBankID) | ||||
4067 | return false; | ||||
4068 | |||||
4069 | if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) | ||||
4070 | return false; | ||||
4071 | |||||
4072 | auto *DstRC = &AArch64::GPR64RegClass; | ||||
4073 | Register SubToRegDef = MRI.createVirtualRegister(DstRC); | ||||
4074 | MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(), | ||||
4075 | TII.get(TargetOpcode::SUBREG_TO_REG)) | ||||
4076 | .addDef(SubToRegDef) | ||||
4077 | .addImm(0) | ||||
4078 | .addUse(I.getOperand(1).getReg()) | ||||
4079 | .addImm(AArch64::sub_32); | ||||
4080 | Register SubToRegDef2 = MRI.createVirtualRegister(DstRC); | ||||
4081 | // Need to anyext the second scalar before we can use bfm | ||||
4082 | MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), | ||||
4083 | TII.get(TargetOpcode::SUBREG_TO_REG)) | ||||
4084 | .addDef(SubToRegDef2) | ||||
4085 | .addImm(0) | ||||
4086 | .addUse(I.getOperand(2).getReg()) | ||||
4087 | .addImm(AArch64::sub_32); | ||||
4088 | MachineInstr &BFM = | ||||
4089 | *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri)) | ||||
4090 | .addDef(I.getOperand(0).getReg()) | ||||
4091 | .addUse(SubToRegDef) | ||||
4092 | .addUse(SubToRegDef2) | ||||
4093 | .addImm(32) | ||||
4094 | .addImm(31); | ||||
4095 | constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI); | ||||
4096 | constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI); | ||||
4097 | constrainSelectedInstRegOperands(BFM, TII, TRI, RBI); | ||||
4098 | I.eraseFromParent(); | ||||
4099 | return true; | ||||
4100 | } | ||||
4101 | |||||
4102 | static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, | ||||
4103 | const unsigned EltSize) { | ||||
4104 | // Choose a lane copy opcode and subregister based off of the size of the | ||||
4105 | // vector's elements. | ||||
4106 | switch (EltSize) { | ||||
4107 | case 8: | ||||
4108 | CopyOpc = AArch64::DUPi8; | ||||
4109 | ExtractSubReg = AArch64::bsub; | ||||
4110 | break; | ||||
4111 | case 16: | ||||
4112 | CopyOpc = AArch64::DUPi16; | ||||
4113 | ExtractSubReg = AArch64::hsub; | ||||
4114 | break; | ||||
4115 | case 32: | ||||
4116 | CopyOpc = AArch64::DUPi32; | ||||
4117 | ExtractSubReg = AArch64::ssub; | ||||
4118 | break; | ||||
4119 | case 64: | ||||
4120 | CopyOpc = AArch64::DUPi64; | ||||
4121 | ExtractSubReg = AArch64::dsub; | ||||
4122 | break; | ||||
4123 | default: | ||||
4124 | // Unknown size, bail out. | ||||
4125 | LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Elt size '" << EltSize << "' unsupported.\n"; } } while (false); | ||||
4126 | return false; | ||||
4127 | } | ||||
4128 | return true; | ||||
4129 | } | ||||
4130 | |||||
4131 | MachineInstr *AArch64InstructionSelector::emitExtractVectorElt( | ||||
4132 | Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy, | ||||
4133 | Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const { | ||||
4134 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | ||||
4135 | unsigned CopyOpc = 0; | ||||
4136 | unsigned ExtractSubReg = 0; | ||||
4137 | if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) { | ||||
4138 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n" ; } } while (false) | ||||
4139 | dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n" ; } } while (false); | ||||
4140 | return nullptr; | ||||
4141 | } | ||||
4142 | |||||
4143 | const TargetRegisterClass *DstRC = | ||||
4144 | getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true); | ||||
4145 | if (!DstRC) { | ||||
4146 | LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n" ; } } while (false); | ||||
4147 | return nullptr; | ||||
4148 | } | ||||
4149 | |||||
4150 | const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI); | ||||
4151 | const LLT &VecTy = MRI.getType(VecReg); | ||||
4152 | const TargetRegisterClass *VecRC = | ||||
4153 | getRegClassForTypeOnBank(VecTy, VecRB, RBI, true); | ||||
4154 | if (!VecRC) { | ||||
4155 | LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not determine source register class.\n" ; } } while (false); | ||||
4156 | return nullptr; | ||||
4157 | } | ||||
4158 | |||||
4159 | // The register that we're going to copy into. | ||||
4160 | Register InsertReg = VecReg; | ||||
4161 | if (!DstReg) | ||||
4162 | DstReg = MRI.createVirtualRegister(DstRC); | ||||
4163 | // If the lane index is 0, we just use a subregister COPY. | ||||
4164 | if (LaneIdx == 0) { | ||||
4165 | auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {}) | ||||
4166 | .addReg(VecReg, 0, ExtractSubReg); | ||||
4167 | RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); | ||||
4168 | return &*Copy; | ||||
4169 | } | ||||
4170 | |||||
4171 | // Lane copies require 128-bit wide registers. If we're dealing with an | ||||
4172 | // unpacked vector, then we need to move up to that width. Insert an implicit | ||||
4173 | // def and a subregister insert to get us there. | ||||
4174 | if (VecTy.getSizeInBits() != 128) { | ||||
4175 | MachineInstr *ScalarToVector = emitScalarToVector( | ||||
4176 | VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder); | ||||
4177 | if (!ScalarToVector) | ||||
4178 | return nullptr; | ||||
4179 | InsertReg = ScalarToVector->getOperand(0).getReg(); | ||||
4180 | } | ||||
4181 | |||||
4182 | MachineInstr *LaneCopyMI = | ||||
4183 | MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx); | ||||
4184 | constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI); | ||||
4185 | |||||
4186 | // Make sure that we actually constrain the initial copy. | ||||
4187 | RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); | ||||
4188 | return LaneCopyMI; | ||||
4189 | } | ||||
4190 | |||||
4191 | bool AArch64InstructionSelector::selectExtractElt( | ||||
4192 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||
4193 | assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && "unexpected opcode!") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4194, __extension__ __PRETTY_FUNCTION__)) | ||||
4194 | "unexpected opcode!")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && "unexpected opcode!") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4194, __extension__ __PRETTY_FUNCTION__)); | ||||
4195 | Register DstReg = I.getOperand(0).getReg(); | ||||
4196 | const LLT NarrowTy = MRI.getType(DstReg); | ||||
4197 | const Register SrcReg = I.getOperand(1).getReg(); | ||||
4198 | const LLT WideTy = MRI.getType(SrcReg); | ||||
4199 | (void)WideTy; | ||||
4200 | assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy .getSizeInBits() && "source register size too small!" ) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4201, __extension__ __PRETTY_FUNCTION__)) | ||||
4201 | "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy .getSizeInBits() && "source register size too small!" ) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4201, __extension__ __PRETTY_FUNCTION__)); | ||||
4202 | assert(!NarrowTy.isVector() && "cannot extract vector into vector!")(static_cast <bool> (!NarrowTy.isVector() && "cannot extract vector into vector!" ) ? void (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4202, __extension__ __PRETTY_FUNCTION__)); | ||||
4203 | |||||
4204 | // Need the lane index to determine the correct copy opcode. | ||||
4205 | MachineOperand &LaneIdxOp = I.getOperand(2); | ||||
4206 | assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")(static_cast <bool> (LaneIdxOp.isReg() && "Lane index operand was not a register?" ) ? void (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4206, __extension__ __PRETTY_FUNCTION__)); | ||||
4207 | |||||
4208 | if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { | ||||
4209 | LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n" ; } } while (false); | ||||
4210 | return false; | ||||
4211 | } | ||||
4212 | |||||
4213 | // Find the index to extract from. | ||||
4214 | auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI); | ||||
4215 | if (!VRegAndVal) | ||||
4216 | return false; | ||||
4217 | unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); | ||||
4218 | |||||
4219 | |||||
4220 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); | ||||
4221 | MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg, | ||||
4222 | LaneIdx, MIB); | ||||
4223 | if (!Extract) | ||||
4224 | return false; | ||||
4225 | |||||
4226 | I.eraseFromParent(); | ||||
4227 | return true; | ||||
4228 | } | ||||
4229 | |||||
4230 | bool AArch64InstructionSelector::selectSplitVectorUnmerge( | ||||
4231 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||
4232 | unsigned NumElts = I.getNumOperands() - 1; | ||||
4233 | Register SrcReg = I.getOperand(NumElts).getReg(); | ||||
4234 | const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); | ||||
4235 | const LLT SrcTy = MRI.getType(SrcReg); | ||||
4236 | |||||
4237 | assert(NarrowTy.isVector() && "Expected an unmerge into vectors")(static_cast <bool> (NarrowTy.isVector() && "Expected an unmerge into vectors" ) ? void (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4237, __extension__ __PRETTY_FUNCTION__)); | ||||
4238 | if (SrcTy.getSizeInBits() > 128) { | ||||
4239 | LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge" ; } } while (false); | ||||
4240 | return false; | ||||
4241 | } | ||||
4242 | |||||
4243 | // We implement a split vector operation by treating the sub-vectors as | ||||
4244 | // scalars and extracting them. | ||||
4245 | const RegisterBank &DstRB = | ||||
4246 | *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI); | ||||
4247 | for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) { | ||||
4248 | Register Dst = I.getOperand(OpIdx).getReg(); | ||||
4249 | MachineInstr *Extract = | ||||
4250 | emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB); | ||||
4251 | if (!Extract) | ||||
4252 | return false; | ||||
4253 | } | ||||
4254 | I.eraseFromParent(); | ||||
4255 | return true; | ||||
4256 | } | ||||
4257 | |||||
4258 | bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I, | ||||
4259 | MachineRegisterInfo &MRI) { | ||||
4260 | assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "unexpected opcode") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4261, __extension__ __PRETTY_FUNCTION__)) | ||||
4261 | "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "unexpected opcode") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4261, __extension__ __PRETTY_FUNCTION__)); | ||||
4262 | |||||
4263 | // TODO: Handle unmerging into GPRs and from scalars to scalars. | ||||
4264 | if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != | ||||
4265 | AArch64::FPRRegBankID || | ||||
4266 | RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != | ||||
4267 | AArch64::FPRRegBankID) { | ||||
4268 | LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " "currently unsupported.\n"; } } while (false) | ||||
4269 | "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " "currently unsupported.\n"; } } while (false); | ||||
4270 | return false; | ||||
4271 | } | ||||
4272 | |||||
4273 | // The last operand is the vector source register, and every other operand is | ||||
4274 | // a register to unpack into. | ||||
4275 | unsigned NumElts = I.getNumOperands() - 1; | ||||
4276 | Register SrcReg = I.getOperand(NumElts).getReg(); | ||||
4277 | const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); | ||||
4278 | const LLT WideTy = MRI.getType(SrcReg); | ||||
4279 | (void)WideTy; | ||||
4280 | assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits () == 128) && "can only unmerge from vector or s128 types!" ) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4281, __extension__ __PRETTY_FUNCTION__)) | ||||
4281 | "can only unmerge from vector or s128 types!")(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits () == 128) && "can only unmerge from vector or s128 types!" ) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4281, __extension__ __PRETTY_FUNCTION__)); | ||||
4282 | assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy .getSizeInBits() && "source register size too small!" ) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4283, __extension__ __PRETTY_FUNCTION__)) | ||||
4283 | "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy .getSizeInBits() && "source register size too small!" ) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4283, __extension__ __PRETTY_FUNCTION__)); | ||||
4284 | |||||
4285 | if (!NarrowTy.isScalar()) | ||||
4286 | return selectSplitVectorUnmerge(I, MRI); | ||||
4287 | |||||
4288 | // Choose a lane copy opcode and subregister based off of the size of the | ||||
4289 | // vector's elements. | ||||
4290 | unsigned CopyOpc = 0; | ||||
4291 | unsigned ExtractSubReg = 0; | ||||
4292 | if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) | ||||
4293 | return false; | ||||
4294 | |||||
4295 | // Set up for the lane copies. | ||||
4296 | MachineBasicBlock &MBB = *I.getParent(); | ||||
4297 | |||||
4298 | // Stores the registers we'll be copying from. | ||||
4299 | SmallVector<Register, 4> InsertRegs; | ||||
4300 | |||||
4301 | // We'll use the first register twice, so we only need NumElts-1 registers. | ||||
4302 | unsigned NumInsertRegs = NumElts - 1; | ||||
4303 | |||||
4304 | // If our elements fit into exactly 128 bits, then we can copy from the source | ||||
4305 | // directly. Otherwise, we need to do a bit of setup with some subregister | ||||
4306 | // inserts. | ||||
4307 | if (NarrowTy.getSizeInBits() * NumElts == 128) { | ||||
4308 | InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg); | ||||
4309 | } else { | ||||
4310 | // No. We have to perform subregister inserts. For each insert, create an | ||||
4311 | // implicit def and a subregister insert, and save the register we create. | ||||
4312 | const TargetRegisterClass *RC = | ||||
4313 | getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI), | ||||
4314 | WideTy.getScalarSizeInBits() * NumElts); | ||||
4315 | unsigned SubReg = 0; | ||||
4316 | bool Found = getSubRegForClass(RC, TRI, SubReg); | ||||
4317 | (void)Found; | ||||
4318 | assert(Found && "expected to find last operand's subeg idx")(static_cast <bool> (Found && "expected to find last operand's subeg idx" ) ? void (0) : __assert_fail ("Found && \"expected to find last operand's subeg idx\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4318, __extension__ __PRETTY_FUNCTION__)); | ||||
4319 | for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) { | ||||
4320 | Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); | ||||
4321 | MachineInstr &ImpDefMI = | ||||
4322 | *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF), | ||||
4323 | ImpDefReg); | ||||
4324 | |||||
4325 | // Now, create the subregister insert from SrcReg. | ||||
4326 | Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); | ||||
4327 | MachineInstr &InsMI = | ||||
4328 | *BuildMI(MBB, I, I.getDebugLoc(), | ||||
4329 | TII.get(TargetOpcode::INSERT_SUBREG), InsertReg) | ||||
4330 | .addUse(ImpDefReg) | ||||
4331 | .addUse(SrcReg) | ||||
4332 | .addImm(SubReg); | ||||
4333 | |||||
4334 | constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI); | ||||
4335 | constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI); | ||||
4336 | |||||
4337 | // Save the register so that we can copy from it after. | ||||
4338 | InsertRegs.push_back(InsertReg); | ||||
4339 | } | ||||
4340 | } | ||||
4341 | |||||
4342 | // Now that we've created any necessary subregister inserts, we can | ||||
4343 | // create the copies. | ||||
4344 | // | ||||
4345 | // Perform the first copy separately as a subregister copy. | ||||
4346 | Register CopyTo = I.getOperand(0).getReg(); | ||||
4347 | auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {}) | ||||
4348 | .addReg(InsertRegs[0], 0, ExtractSubReg); | ||||
4349 | constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI); | ||||
4350 | |||||
4351 | // Now, perform the remaining copies as vector lane copies. | ||||
4352 | unsigned LaneIdx = 1; | ||||
4353 | for (Register InsReg : InsertRegs) { | ||||
4354 | Register CopyTo = I.getOperand(LaneIdx).getReg(); | ||||
4355 | MachineInstr &CopyInst = | ||||
4356 | *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo) | ||||
4357 | .addUse(InsReg) | ||||
4358 | .addImm(LaneIdx); | ||||
4359 | constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI); | ||||
4360 | ++LaneIdx; | ||||
4361 | } | ||||
4362 | |||||
4363 | // Separately constrain the first copy's destination. Because of the | ||||
4364 | // limitation in constrainOperandRegClass, we can't guarantee that this will | ||||
4365 | // actually be constrained. So, do it ourselves using the second operand. | ||||
4366 | const TargetRegisterClass *RC = | ||||
4367 | MRI.getRegClassOrNull(I.getOperand(1).getReg()); | ||||
4368 | if (!RC) { | ||||
4369 | LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n" ; } } while (false); | ||||
4370 | return false; | ||||
4371 | } | ||||
4372 | |||||
4373 | RBI.constrainGenericRegister(CopyTo, *RC, MRI); | ||||
4374 | I.eraseFromParent(); | ||||
4375 | return true; | ||||
4376 | } | ||||
4377 | |||||
4378 | bool AArch64InstructionSelector::selectConcatVectors( | ||||
4379 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||
4380 | assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Unexpected opcode") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4381, __extension__ __PRETTY_FUNCTION__)) | ||||
4381 | "Unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Unexpected opcode") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4381, __extension__ __PRETTY_FUNCTION__)); | ||||
4382 | Register Dst = I.getOperand(0).getReg(); | ||||
4383 | Register Op1 = I.getOperand(1).getReg(); | ||||
4384 | Register Op2 = I.getOperand(2).getReg(); | ||||
4385 | MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB); | ||||
4386 | if (!ConcatMI) | ||||
4387 | return false; | ||||
4388 | I.eraseFromParent(); | ||||
4389 | return true; | ||||
4390 | } | ||||
4391 | |||||
4392 | unsigned | ||||
4393 | AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal, | ||||
4394 | MachineFunction &MF) const { | ||||
4395 | Type *CPTy = CPVal->getType(); | ||||
4396 | Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy); | ||||
4397 | |||||
4398 | MachineConstantPool *MCP = MF.getConstantPool(); | ||||
4399 | return MCP->getConstantPoolIndex(CPVal, Alignment); | ||||
4400 | } | ||||
4401 | |||||
4402 | MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( | ||||
4403 | const Constant *CPVal, MachineIRBuilder &MIRBuilder) const { | ||||
4404 | auto &MF = MIRBuilder.getMF(); | ||||
4405 | unsigned CPIdx = emitConstantPoolEntry(CPVal, MF); | ||||
4406 | |||||
4407 | auto Adrp = | ||||
4408 | MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {}) | ||||
4409 | .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); | ||||
4410 | |||||
4411 | MachineInstr *LoadMI = nullptr; | ||||
4412 | MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF); | ||||
4413 | unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType()); | ||||
4414 | switch (Size) { | ||||
4415 | case 16: | ||||
4416 | LoadMI = | ||||
4417 | &*MIRBuilder | ||||
4418 | .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp}) | ||||
4419 | .addConstantPoolIndex(CPIdx, 0, | ||||
4420 | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); | ||||
4421 | break; | ||||
4422 | case 8: | ||||
4423 | LoadMI = | ||||
4424 | &*MIRBuilder | ||||
4425 | .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp}) | ||||
4426 | .addConstantPoolIndex(CPIdx, 0, | ||||
4427 | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); | ||||
4428 | break; | ||||
4429 | case 4: | ||||
4430 | LoadMI = | ||||
4431 | &*MIRBuilder | ||||
4432 | .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp}) | ||||
4433 | .addConstantPoolIndex(CPIdx, 0, | ||||
4434 | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); | ||||
4435 | break; | ||||
4436 | case 2: | ||||
4437 | LoadMI = | ||||
4438 | &*MIRBuilder | ||||
4439 | .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp}) | ||||
4440 | .addConstantPoolIndex(CPIdx, 0, | ||||
4441 | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); | ||||
4442 | break; | ||||
4443 | default: | ||||
4444 | LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not load from constant pool of type " << *CPVal->getType(); } } while (false) | ||||
4445 | << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not load from constant pool of type " << *CPVal->getType(); } } while (false); | ||||
4446 | return nullptr; | ||||
4447 | } | ||||
4448 | LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo, | ||||
4449 | MachineMemOperand::MOLoad, | ||||
4450 | Size, Align(Size))); | ||||
4451 | constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI); | ||||
4452 | constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI); | ||||
4453 | return LoadMI; | ||||
4454 | } | ||||
4455 | |||||
4456 | /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given | ||||
4457 | /// size and RB. | ||||
4458 | static std::pair<unsigned, unsigned> | ||||
4459 | getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { | ||||
4460 | unsigned Opc, SubregIdx; | ||||
4461 | if (RB.getID() == AArch64::GPRRegBankID) { | ||||
4462 | if (EltSize == 16) { | ||||
4463 | Opc = AArch64::INSvi16gpr; | ||||
4464 | SubregIdx = AArch64::ssub; | ||||
4465 | } else if (EltSize == 32) { | ||||
4466 | Opc = AArch64::INSvi32gpr; | ||||
4467 | SubregIdx = AArch64::ssub; | ||||
4468 | } else if (EltSize == 64) { | ||||
4469 | Opc = AArch64::INSvi64gpr; | ||||
4470 | SubregIdx = AArch64::dsub; | ||||
4471 | } else { | ||||
4472 | llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4472); | ||||
4473 | } | ||||
4474 | } else { | ||||
4475 | if (EltSize == 8) { | ||||
4476 | Opc = AArch64::INSvi8lane; | ||||
4477 | SubregIdx = AArch64::bsub; | ||||
4478 | } else if (EltSize == 16) { | ||||
4479 | Opc = AArch64::INSvi16lane; | ||||
4480 | SubregIdx = AArch64::hsub; | ||||
4481 | } else if (EltSize == 32) { | ||||
4482 | Opc = AArch64::INSvi32lane; | ||||
4483 | SubregIdx = AArch64::ssub; | ||||
4484 | } else if (EltSize == 64) { | ||||
4485 | Opc = AArch64::INSvi64lane; | ||||
4486 | SubregIdx = AArch64::dsub; | ||||
4487 | } else { | ||||
4488 | llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4488); | ||||
4489 | } | ||||
4490 | } | ||||
4491 | return std::make_pair(Opc, SubregIdx); | ||||
4492 | } | ||||
4493 | |||||
4494 | MachineInstr *AArch64InstructionSelector::emitInstr( | ||||
4495 | unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, | ||||
4496 | std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder, | ||||
4497 | const ComplexRendererFns &RenderFns) const { | ||||
4498 | assert(Opcode && "Expected an opcode?")(static_cast <bool> (Opcode && "Expected an opcode?" ) ? void (0) : __assert_fail ("Opcode && \"Expected an opcode?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4498, __extension__ __PRETTY_FUNCTION__)); | ||||
4499 | assert(!isPreISelGenericOpcode(Opcode) &&(static_cast <bool> (!isPreISelGenericOpcode(Opcode) && "Function should only be used to produce selected instructions!" ) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4500, __extension__ __PRETTY_FUNCTION__)) | ||||
4500 | "Function should only be used to produce selected instructions!")(static_cast <bool> (!isPreISelGenericOpcode(Opcode) && "Function should only be used to produce selected instructions!" ) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4500, __extension__ __PRETTY_FUNCTION__)); | ||||
4501 | auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps); | ||||
4502 | if (RenderFns) | ||||
4503 | for (auto &Fn : *RenderFns) | ||||
4504 | Fn(MI); | ||||
4505 | constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); | ||||
4506 | return &*MI; | ||||
4507 | } | ||||
4508 | |||||
4509 | MachineInstr *AArch64InstructionSelector::emitAddSub( | ||||
4510 | const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, | ||||
4511 | Register Dst, MachineOperand &LHS, MachineOperand &RHS, | ||||
4512 | MachineIRBuilder &MIRBuilder) const { | ||||
4513 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); | ||||
4514 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg() && "Expected register operands?") ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected register operands?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4514, __extension__ __PRETTY_FUNCTION__)); | ||||
4515 | auto Ty = MRI.getType(LHS.getReg()); | ||||
4516 | assert(!Ty.isVector() && "Expected a scalar or pointer?")(static_cast <bool> (!Ty.isVector() && "Expected a scalar or pointer?" ) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4516, __extension__ __PRETTY_FUNCTION__)); | ||||
4517 | unsigned Size = Ty.getSizeInBits(); | ||||
4518 | assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(static_cast <bool> ((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only") ? void (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4518, __extension__ __PRETTY_FUNCTION__)); | ||||
4519 | bool Is32Bit = Size == 32; | ||||
4520 | |||||
4521 | // INSTRri form with positive arithmetic immediate. | ||||
4522 | if (auto Fns = selectArithImmed(RHS)) | ||||
4523 | return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS}, | ||||
4524 | MIRBuilder, Fns); | ||||
4525 | |||||
4526 | // INSTRri form with negative arithmetic immediate. | ||||
4527 | if (auto Fns = selectNegArithImmed(RHS)) | ||||
4528 | return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS}, | ||||
4529 | MIRBuilder, Fns); | ||||
4530 | |||||
4531 | // INSTRrx form. | ||||
4532 | if (auto Fns = selectArithExtendedRegister(RHS)) | ||||
4533 | return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS}, | ||||
4534 | MIRBuilder, Fns); | ||||
4535 | |||||
4536 | // INSTRrs form. | ||||
4537 | if (auto Fns = selectShiftedRegister(RHS)) | ||||
4538 | return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS}, | ||||
4539 | MIRBuilder, Fns); | ||||
4540 | return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS}, | ||||
4541 | MIRBuilder); | ||||
4542 | } | ||||
4543 | |||||
4544 | MachineInstr * | ||||
4545 | AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, | ||||
4546 | MachineOperand &RHS, | ||||
4547 | MachineIRBuilder &MIRBuilder) const { | ||||
4548 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ | ||||
4549 | {{AArch64::ADDXri, AArch64::ADDWri}, | ||||
4550 | {AArch64::ADDXrs, AArch64::ADDWrs}, | ||||
4551 | {AArch64::ADDXrr, AArch64::ADDWrr}, | ||||
4552 | {AArch64::SUBXri, AArch64::SUBWri}, | ||||
4553 | {AArch64::ADDXrx, AArch64::ADDWrx}}}; | ||||
4554 | return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder); | ||||
4555 | } | ||||
4556 | |||||
4557 | MachineInstr * | ||||
4558 | AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS, | ||||
4559 | MachineOperand &RHS, | ||||
4560 | MachineIRBuilder &MIRBuilder) const { | ||||
4561 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ | ||||
4562 | {{AArch64::ADDSXri, AArch64::ADDSWri}, | ||||
4563 | {AArch64::ADDSXrs, AArch64::ADDSWrs}, | ||||
4564 | {AArch64::ADDSXrr, AArch64::ADDSWrr}, | ||||
4565 | {AArch64::SUBSXri, AArch64::SUBSWri}, | ||||
4566 | {AArch64::ADDSXrx, AArch64::ADDSWrx}}}; | ||||
4567 | return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); | ||||
4568 | } | ||||
4569 | |||||
4570 | MachineInstr * | ||||
4571 | AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS, | ||||
4572 | MachineOperand &RHS, | ||||
4573 | MachineIRBuilder &MIRBuilder) const { | ||||
4574 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ | ||||
4575 | {{AArch64::SUBSXri, AArch64::SUBSWri}, | ||||
4576 | {AArch64::SUBSXrs, AArch64::SUBSWrs}, | ||||
4577 | {AArch64::SUBSXrr, AArch64::SUBSWrr}, | ||||
4578 | {AArch64::ADDSXri, AArch64::ADDSWri}, | ||||
4579 | {AArch64::SUBSXrx, AArch64::SUBSWrx}}}; | ||||
4580 | return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); | ||||
4581 | } | ||||
4582 | |||||
4583 | MachineInstr * | ||||
4584 | AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, | ||||
4585 | MachineIRBuilder &MIRBuilder) const { | ||||
4586 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); | ||||
4587 | bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32); | ||||
4588 | auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; | ||||
4589 | return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder); | ||||
4590 | } | ||||
4591 | |||||
4592 | MachineInstr * | ||||
4593 | AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, | ||||
4594 | MachineIRBuilder &MIRBuilder) const { | ||||
4595 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg() && "Expected register operands?") ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected register operands?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4595, __extension__ __PRETTY_FUNCTION__)); | ||||
4596 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); | ||||
4597 | LLT Ty = MRI.getType(LHS.getReg()); | ||||
4598 | unsigned RegSize = Ty.getSizeInBits(); | ||||
4599 | bool Is32Bit = (RegSize == 32); | ||||
4600 | const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri}, | ||||
4601 | {AArch64::ANDSXrs, AArch64::ANDSWrs}, | ||||
4602 | {AArch64::ANDSXrr, AArch64::ANDSWrr}}; | ||||
4603 | // ANDS needs a logical immediate for its immediate form. Check if we can | ||||
4604 | // fold one in. | ||||
4605 | if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) { | ||||
4606 | int64_t Imm = ValAndVReg->Value.getSExtValue(); | ||||
4607 | |||||
4608 | if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) { | ||||
4609 | auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS}); | ||||
4610 | TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); | ||||
4611 | constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); | ||||
4612 | return &*TstMI; | ||||
4613 | } | ||||
4614 | } | ||||
4615 | |||||
4616 | if (auto Fns = selectLogicalShiftedRegister(RHS)) | ||||
4617 | return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns); | ||||
4618 | return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder); | ||||
4619 | } | ||||
4620 | |||||
4621 | MachineInstr *AArch64InstructionSelector::emitIntegerCompare( | ||||
4622 | MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, | ||||
4623 | MachineIRBuilder &MIRBuilder) const { | ||||
4624 | assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")(static_cast <bool> (LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!") ? void ( 0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4624, __extension__ __PRETTY_FUNCTION__)); | ||||
4625 | assert(Predicate.isPredicate() && "Expected predicate?")(static_cast <bool> (Predicate.isPredicate() && "Expected predicate?") ? void (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4625, __extension__ __PRETTY_FUNCTION__)); | ||||
4626 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); | ||||
4627 | LLT CmpTy = MRI.getType(LHS.getReg()); | ||||
4628 | assert(!CmpTy.isVector() && "Expected scalar or pointer")(static_cast <bool> (!CmpTy.isVector() && "Expected scalar or pointer" ) ? void (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4628, __extension__ __PRETTY_FUNCTION__)); | ||||
4629 | unsigned Size = CmpTy.getSizeInBits(); | ||||
4630 | (void)Size; | ||||
4631 | assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(static_cast <bool> ((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?") ? void (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4631, __extension__ __PRETTY_FUNCTION__)); | ||||
4632 | // Fold the compare into a cmn or tst if possible. | ||||
4633 | if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder)) | ||||
4634 | return FoldCmp; | ||||
4635 | auto Dst = MRI.cloneVirtualRegister(LHS.getReg()); | ||||
4636 | return emitSUBS(Dst, LHS, RHS, MIRBuilder); | ||||
4637 | } | ||||
4638 | |||||
4639 | MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( | ||||
4640 | Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const { | ||||
4641 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | ||||
4642 | #ifndef NDEBUG | ||||
4643 | LLT Ty = MRI.getType(Dst); | ||||
4644 | assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits () == 32 && "Expected a 32-bit scalar register?") ? void (0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4645, __extension__ __PRETTY_FUNCTION__)) | ||||
4645 | "Expected a 32-bit scalar register?")(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits () == 32 && "Expected a 32-bit scalar register?") ? void (0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4645, __extension__ __PRETTY_FUNCTION__)); | ||||
4646 | #endif | ||||
4647 | const Register ZReg = AArch64::WZR; | ||||
4648 | AArch64CC::CondCode CC1, CC2; | ||||
4649 | changeFCMPPredToAArch64CC(Pred, CC1, CC2); | ||||
4650 | auto InvCC1 = AArch64CC::getInvertedCondCode(CC1); | ||||
4651 | if (CC2 == AArch64CC::AL) | ||||
4652 | return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, | ||||
4653 | MIRBuilder); | ||||
4654 | const TargetRegisterClass *RC = &AArch64::GPR32RegClass; | ||||
4655 | Register Def1Reg = MRI.createVirtualRegister(RC); | ||||
4656 | Register Def2Reg = MRI.createVirtualRegister(RC); | ||||
4657 | auto InvCC2 = AArch64CC::getInvertedCondCode(CC2); | ||||
4658 | emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder); | ||||
4659 | emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder); | ||||
4660 | auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg}); | ||||
4661 | constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI); | ||||
4662 | return &*OrMI; | ||||
4663 | } | ||||
4664 | |||||
4665 | MachineInstr * | ||||
4666 | AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS, | ||||
4667 | MachineIRBuilder &MIRBuilder, | ||||
4668 | Optional<CmpInst::Predicate> Pred) const { | ||||
4669 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | ||||
4670 | LLT Ty = MRI.getType(LHS); | ||||
4671 | if (Ty.isVector()) | ||||
4672 | return nullptr; | ||||
4673 | unsigned OpSize = Ty.getSizeInBits(); | ||||
4674 | if (OpSize != 32 && OpSize != 64) | ||||
4675 | return nullptr; | ||||
4676 | |||||
4677 | // If this is a compare against +0.0, then we don't have | ||||
4678 | // to explicitly materialize a constant. | ||||
4679 | const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI); | ||||
4680 | bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); | ||||
4681 | |||||
4682 | auto IsEqualityPred = [](CmpInst::Predicate P) { | ||||
4683 | return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE || | ||||
4684 | P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE; | ||||
4685 | }; | ||||
4686 | if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) { | ||||
4687 | // Try commutating the operands. | ||||
4688 | const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI); | ||||
4689 | if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) { | ||||
4690 | ShouldUseImm = true; | ||||
4691 | std::swap(LHS, RHS); | ||||
4692 | } | ||||
4693 | } | ||||
4694 | unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, | ||||
4695 | {AArch64::FCMPSri, AArch64::FCMPDri}}; | ||||
4696 | unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64]; | ||||
4697 | |||||
4698 | // Partially build the compare. Decide if we need to add a use for the | ||||
4699 | // third operand based off whether or not we're comparing against 0.0. | ||||
4700 | auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS); | ||||
4701 | CmpMI.setMIFlags(MachineInstr::NoFPExcept); | ||||
4702 | if (!ShouldUseImm) | ||||
4703 | CmpMI.addUse(RHS); | ||||
4704 | constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); | ||||
4705 | return &*CmpMI; | ||||
4706 | } | ||||
4707 | |||||
4708 | MachineInstr *AArch64InstructionSelector::emitVectorConcat( | ||||
4709 | Optional<Register> Dst, Register Op1, Register Op2, | ||||
4710 | MachineIRBuilder &MIRBuilder) const { | ||||
4711 | // We implement a vector concat by: | ||||
4712 | // 1. Use scalar_to_vector to insert the lower vector into the larger dest | ||||
4713 | // 2. Insert the upper vector into the destination's upper element | ||||
4714 | // TODO: some of this code is common with G_BUILD_VECTOR handling. | ||||
4715 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); | ||||
4716 | |||||
4717 | const LLT Op1Ty = MRI.getType(Op1); | ||||
4718 | const LLT Op2Ty = MRI.getType(Op2); | ||||
4719 | |||||
4720 | if (Op1Ty != Op2Ty) { | ||||
4721 | LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys" ; } } while (false); | ||||
4722 | return nullptr; | ||||
4723 | } | ||||
4724 | assert(Op1Ty.isVector() && "Expected a vector for vector concat")(static_cast <bool> (Op1Ty.isVector() && "Expected a vector for vector concat" ) ? void (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4724, __extension__ __PRETTY_FUNCTION__)); | ||||
4725 | |||||
4726 | if (Op1Ty.getSizeInBits() >= 128) { | ||||
4727 | LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors" ; } } while (false); | ||||
4728 | return nullptr; | ||||
4729 | } | ||||
4730 | |||||
4731 | // At the moment we just support 64 bit vector concats. | ||||
4732 | if (Op1Ty.getSizeInBits() != 64) { | ||||
4733 | LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors" ; } } while (false); | ||||
4734 | return nullptr; | ||||
4735 | } | ||||
4736 | |||||
4737 | const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits()); | ||||
4738 | const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI); | ||||
4739 | const TargetRegisterClass *DstRC = | ||||
4740 | getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2); | ||||
4741 | |||||
4742 | MachineInstr *WidenedOp1 = | ||||
4743 | emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder); | ||||
4744 | MachineInstr *WidenedOp2 = | ||||
4745 | emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder); | ||||
4746 | if (!WidenedOp1 || !WidenedOp2) { | ||||
4747 | LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value" ; } } while (false); | ||||
4748 | return nullptr; | ||||
4749 | } | ||||
4750 | |||||
4751 | // Now do the insert of the upper element. | ||||
4752 | unsigned InsertOpc, InsSubRegIdx; | ||||
4753 | std::tie(InsertOpc, InsSubRegIdx) = | ||||
4754 | getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits()); | ||||
4755 | |||||
4756 | if (!Dst) | ||||
4757 | Dst = MRI.createVirtualRegister(DstRC); | ||||
4758 | auto InsElt = | ||||
4759 | MIRBuilder | ||||
4760 | .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()}) | ||||
4761 | .addImm(1) /* Lane index */ | ||||
4762 | .addUse(WidenedOp2->getOperand(0).getReg()) | ||||
4763 | .addImm(0); | ||||
4764 | constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); | ||||
4765 | return &*InsElt; | ||||
4766 | } | ||||
4767 | |||||
4768 | MachineInstr * | ||||
4769 | AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1, | ||||
4770 | Register Src2, AArch64CC::CondCode Pred, | ||||
4771 | MachineIRBuilder &MIRBuilder) const { | ||||
4772 | auto &MRI = *MIRBuilder.getMRI(); | ||||
4773 | const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst); | ||||
4774 | // If we used a register class, then this won't necessarily have an LLT. | ||||
4775 | // Compute the size based off whether or not we have a class or bank. | ||||
4776 | unsigned Size; | ||||
4777 | if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>()) | ||||
4778 | Size = TRI.getRegSizeInBits(*RC); | ||||
4779 | else | ||||
4780 | Size = MRI.getType(Dst).getSizeInBits(); | ||||
4781 | // Some opcodes use s1. | ||||
4782 | assert(Size <= 64 && "Expected 64 bits or less only!")(static_cast <bool> (Size <= 64 && "Expected 64 bits or less only!" ) ? void (0) : __assert_fail ("Size <= 64 && \"Expected 64 bits or less only!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4782, __extension__ __PRETTY_FUNCTION__)); | ||||
4783 | static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr}; | ||||
4784 | unsigned Opc = OpcTable[Size == 64]; | ||||
4785 | auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred); | ||||
4786 | constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI); | ||||
4787 | return &*CSINC; | ||||
4788 | } | ||||
4789 | |||||
4790 | std::pair<MachineInstr *, AArch64CC::CondCode> | ||||
4791 | AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, | ||||
4792 | MachineOperand &LHS, | ||||
4793 | MachineOperand &RHS, | ||||
4794 | MachineIRBuilder &MIRBuilder) const { | ||||
4795 | switch (Opcode) { | ||||
4796 | default: | ||||
4797 | llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4797); | ||||
4798 | case TargetOpcode::G_SADDO: | ||||
4799 | return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); | ||||
4800 | case TargetOpcode::G_UADDO: | ||||
4801 | return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS); | ||||
4802 | case TargetOpcode::G_SSUBO: | ||||
4803 | return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); | ||||
4804 | case TargetOpcode::G_USUBO: | ||||
4805 | return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO); | ||||
4806 | } | ||||
4807 | } | ||||
4808 | |||||
4809 | /// Returns true if @p Val is a tree of AND/OR/CMP operations that can be | ||||
4810 | /// expressed as a conjunction. | ||||
4811 | /// \param CanNegate Set to true if we can negate the whole sub-tree just by | ||||
4812 | /// changing the conditions on the CMP tests. | ||||
4813 | /// (this means we can call emitConjunctionRec() with | ||||
4814 | /// Negate==true on this sub-tree) | ||||
4815 | /// \param MustBeFirst Set to true if this subtree needs to be negated and we | ||||
4816 | /// cannot do the negation naturally. We are required to | ||||
4817 | /// emit the subtree first in this case. | ||||
4818 | /// \param WillNegate Is true if are called when the result of this | ||||
4819 | /// subexpression must be negated. This happens when the | ||||
4820 | /// outer expression is an OR. We can use this fact to know | ||||
4821 | /// that we have a double negation (or (or ...) ...) that | ||||
4822 | /// can be implemented for free. | ||||
4823 | static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, | ||||
4824 | bool WillNegate, MachineRegisterInfo &MRI, | ||||
4825 | unsigned Depth = 0) { | ||||
4826 | if (!MRI.hasOneNonDBGUse(Val)) | ||||
4827 | return false; | ||||
4828 | MachineInstr *ValDef = MRI.getVRegDef(Val); | ||||
4829 | unsigned Opcode = ValDef->getOpcode(); | ||||
4830 | if (Opcode == TargetOpcode::G_TRUNC) { | ||||
4831 | // Look through a trunc. | ||||
4832 | Val = ValDef->getOperand(1).getReg(); | ||||
4833 | ValDef = MRI.getVRegDef(Val); | ||||
4834 | Opcode = ValDef->getOpcode(); | ||||
4835 | } | ||||
4836 | if (isa<GAnyCmp>(ValDef)) { | ||||
4837 | CanNegate = true; | ||||
4838 | MustBeFirst = false; | ||||
4839 | return true; | ||||
4840 | } | ||||
4841 | // Protect against exponential runtime and stack overflow. | ||||
4842 | if (Depth > 6) | ||||
4843 | return false; | ||||
4844 | if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) { | ||||
4845 | bool IsOR = Opcode == TargetOpcode::G_OR; | ||||
4846 | Register O0 = ValDef->getOperand(1).getReg(); | ||||
4847 | Register O1 = ValDef->getOperand(2).getReg(); | ||||
4848 | bool CanNegateL; | ||||
4849 | bool MustBeFirstL; | ||||
4850 | if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1)) | ||||
4851 | return false; | ||||
4852 | bool CanNegateR; | ||||
4853 | bool MustBeFirstR; | ||||
4854 | if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1)) | ||||
4855 | return false; | ||||
4856 | |||||
4857 | if (MustBeFirstL && MustBeFirstR) | ||||
4858 | return false; | ||||
4859 | |||||
4860 | if (IsOR) { | ||||
4861 | // For an OR expression we need to be able to naturally negate at least | ||||
4862 | // one side or we cannot do the transformation at all. | ||||
4863 | if (!CanNegateL && !CanNegateR) | ||||
4864 | return false; | ||||
4865 | // If we the result of the OR will be negated and we can naturally negate | ||||
4866 | // the leaves, then this sub-tree as a whole negates naturally. | ||||
4867 | CanNegate = WillNegate && CanNegateL && CanNegateR; | ||||
4868 | // If we cannot naturally negate the whole sub-tree, then this must be | ||||
4869 | // emitted first. | ||||
4870 | MustBeFirst = !CanNegate; | ||||
4871 | } else { | ||||
4872 | assert(Opcode == TargetOpcode::G_AND && "Must be G_AND")(static_cast <bool> (Opcode == TargetOpcode::G_AND && "Must be G_AND") ? void (0) : __assert_fail ("Opcode == TargetOpcode::G_AND && \"Must be G_AND\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4872, __extension__ __PRETTY_FUNCTION__)); | ||||
4873 | // We cannot naturally negate an AND operation. | ||||
4874 | CanNegate = false; | ||||
4875 | MustBeFirst = MustBeFirstL || MustBeFirstR; | ||||
4876 | } | ||||
4877 | return true; | ||||
4878 | } | ||||
4879 | return false; | ||||
4880 | } | ||||
4881 | |||||
4882 | MachineInstr *AArch64InstructionSelector::emitConditionalComparison( | ||||
4883 | Register LHS, Register RHS, CmpInst::Predicate CC, | ||||
4884 | AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, | ||||
4885 | MachineIRBuilder &MIB) const { | ||||
4886 | // TODO: emit CMN as an optimization. | ||||
4887 | auto &MRI = *MIB.getMRI(); | ||||
4888 | LLT OpTy = MRI.getType(LHS); | ||||
4889 | assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64)(static_cast <bool> (OpTy.getSizeInBits() == 32 || OpTy .getSizeInBits() == 64) ? void (0) : __assert_fail ("OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4889, __extension__ __PRETTY_FUNCTION__)); | ||||
4890 | unsigned CCmpOpc; | ||||
4891 | if (CmpInst::isIntPredicate(CC)) { | ||||
4892 | CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr; | ||||
4893 | } else { | ||||
4894 | switch (OpTy.getSizeInBits()) { | ||||
4895 | case 16: | ||||
4896 | CCmpOpc = AArch64::FCCMPHrr; | ||||
4897 | break; | ||||
4898 | case 32: | ||||
4899 | CCmpOpc = AArch64::FCCMPSrr; | ||||
4900 | break; | ||||
4901 | case 64: | ||||
4902 | CCmpOpc = AArch64::FCCMPDrr; | ||||
4903 | break; | ||||
4904 | default: | ||||
4905 | return nullptr; | ||||
4906 | } | ||||
4907 | } | ||||
4908 | AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); | ||||
4909 | unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); | ||||
4910 | auto CCmp = | ||||
4911 | MIB.buildInstr(CCmpOpc, {}, {LHS, RHS}).addImm(NZCV).addImm(Predicate); | ||||
4912 | constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI); | ||||
4913 | return &*CCmp; | ||||
4914 | } | ||||
4915 | |||||
4916 | MachineInstr *AArch64InstructionSelector::emitConjunctionRec( | ||||
4917 | Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp, | ||||
4918 | AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const { | ||||
4919 | // We're at a tree leaf, produce a conditional comparison operation. | ||||
4920 | auto &MRI = *MIB.getMRI(); | ||||
4921 | MachineInstr *ValDef = MRI.getVRegDef(Val); | ||||
4922 | unsigned Opcode = ValDef->getOpcode(); | ||||
4923 | if (Opcode == TargetOpcode::G_TRUNC) { | ||||
4924 | // Look through a trunc. | ||||
4925 | Val = ValDef->getOperand(1).getReg(); | ||||
4926 | ValDef = MRI.getVRegDef(Val); | ||||
4927 | Opcode = ValDef->getOpcode(); | ||||
4928 | } | ||||
4929 | if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) { | ||||
4930 | Register LHS = Cmp->getLHSReg(); | ||||
4931 | Register RHS = Cmp->getRHSReg(); | ||||
4932 | CmpInst::Predicate CC = Cmp->getCond(); | ||||
4933 | if (Negate) | ||||
4934 | CC = CmpInst::getInversePredicate(CC); | ||||
4935 | if (isa<GICmp>(Cmp)) { | ||||
4936 | OutCC = changeICMPPredToAArch64CC(CC); | ||||
4937 | } else { | ||||
4938 | // Handle special FP cases. | ||||
4939 | AArch64CC::CondCode ExtraCC; | ||||
4940 | changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC); | ||||
4941 | // Some floating point conditions can't be tested with a single condition | ||||
4942 | // code. Construct an additional comparison in this case. | ||||
4943 | if (ExtraCC != AArch64CC::AL) { | ||||
4944 | MachineInstr *ExtraCmp; | ||||
4945 | if (!CCOp) | ||||
4946 | ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC); | ||||
4947 | else | ||||
4948 | ExtraCmp = | ||||
4949 | emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB); | ||||
4950 | CCOp = ExtraCmp->getOperand(0).getReg(); | ||||
4951 | Predicate = ExtraCC; | ||||
4952 | } | ||||
4953 | } | ||||
4954 | |||||
4955 | // Produce a normal comparison if we are first in the chain | ||||
4956 | if (!CCOp) { | ||||
4957 | auto Dst = MRI.cloneVirtualRegister(LHS); | ||||
4958 | if (isa<GICmp>(Cmp)) | ||||
4959 | return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB); | ||||
4960 | return emitFPCompare(Cmp->getOperand(2).getReg(), | ||||
4961 | Cmp->getOperand(3).getReg(), MIB); | ||||
4962 | } | ||||
4963 | // Otherwise produce a ccmp. | ||||
4964 | return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB); | ||||
4965 | } | ||||
4966 | assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree")(static_cast <bool> (MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree") ? void (0) : __assert_fail ("MRI.hasOneNonDBGUse(Val) && \"Valid conjunction/disjunction tree\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4966, __extension__ __PRETTY_FUNCTION__)); | ||||
4967 | |||||
4968 | bool IsOR = Opcode == TargetOpcode::G_OR; | ||||
4969 | |||||
4970 | Register LHS = ValDef->getOperand(1).getReg(); | ||||
4971 | bool CanNegateL; | ||||
4972 | bool MustBeFirstL; | ||||
4973 | bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI); | ||||
4974 | assert(ValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidL && "Valid conjunction/disjunction tree" ) ? void (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4974, __extension__ __PRETTY_FUNCTION__)); | ||||
4975 | (void)ValidL; | ||||
4976 | |||||
4977 | Register RHS = ValDef->getOperand(2).getReg(); | ||||
4978 | bool CanNegateR; | ||||
4979 | bool MustBeFirstR; | ||||
4980 | bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI); | ||||
4981 | assert(ValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidR && "Valid conjunction/disjunction tree" ) ? void (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4981, __extension__ __PRETTY_FUNCTION__)); | ||||
4982 | (void)ValidR; | ||||
4983 | |||||
4984 | // Swap sub-tree that must come first to the right side. | ||||
4985 | if (MustBeFirstL) { | ||||
4986 | assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "Valid conjunction/disjunction tree" ) ? void (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4986, __extension__ __PRETTY_FUNCTION__)); | ||||
4987 | std::swap(LHS, RHS); | ||||
4988 | std::swap(CanNegateL, CanNegateR); | ||||
4989 | std::swap(MustBeFirstL, MustBeFirstR); | ||||
4990 | } | ||||
4991 | |||||
4992 | bool NegateR; | ||||
4993 | bool NegateAfterR; | ||||
4994 | bool NegateL; | ||||
4995 | bool NegateAfterAll; | ||||
4996 | if (Opcode == TargetOpcode::G_OR) { | ||||
4997 | // Swap the sub-tree that we can negate naturally to the left. | ||||
4998 | if (!CanNegateL) { | ||||
4999 | assert(CanNegateR && "at least one side must be negatable")(static_cast <bool> (CanNegateR && "at least one side must be negatable" ) ? void (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4999, __extension__ __PRETTY_FUNCTION__)); | ||||
5000 | assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "invalid conjunction/disjunction tree" ) ? void (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5000, __extension__ __PRETTY_FUNCTION__)); | ||||
5001 | assert(!Negate)(static_cast <bool> (!Negate) ? void (0) : __assert_fail ("!Negate", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5001, __extension__ __PRETTY_FUNCTION__)); | ||||
5002 | std::swap(LHS, RHS); | ||||
5003 | NegateR = false; | ||||
5004 | NegateAfterR = true; | ||||
5005 | } else { | ||||
5006 | // Negate the left sub-tree if possible, otherwise negate the result. | ||||
5007 | NegateR = CanNegateR; | ||||
5008 | NegateAfterR = !CanNegateR; | ||||
5009 | } | ||||
5010 | NegateL = true; | ||||
5011 | NegateAfterAll = !Negate; | ||||
5012 | } else { | ||||
5013 | assert(Opcode == TargetOpcode::G_AND &&(static_cast <bool> (Opcode == TargetOpcode::G_AND && "Valid conjunction/disjunction tree") ? void (0) : __assert_fail ("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5014, __extension__ __PRETTY_FUNCTION__)) | ||||
5014 | "Valid conjunction/disjunction tree")(static_cast <bool> (Opcode == TargetOpcode::G_AND && "Valid conjunction/disjunction tree") ? void (0) : __assert_fail ("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5014, __extension__ __PRETTY_FUNCTION__)); | ||||
5015 | assert(!Negate && "Valid conjunction/disjunction tree")(static_cast <bool> (!Negate && "Valid conjunction/disjunction tree" ) ? void (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5015, __extension__ __PRETTY_FUNCTION__)); | ||||
5016 | |||||
5017 | NegateL = false; | ||||
5018 | NegateR = false; | ||||
5019 | NegateAfterR = false; | ||||
5020 | NegateAfterAll = false; | ||||
5021 | } | ||||
5022 | |||||
5023 | // Emit sub-trees. | ||||
5024 | AArch64CC::CondCode RHSCC; | ||||
5025 | MachineInstr *CmpR = | ||||
5026 | emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB); | ||||
5027 | if (NegateAfterR) | ||||
5028 | RHSCC = AArch64CC::getInvertedCondCode(RHSCC); | ||||
5029 | MachineInstr *CmpL = emitConjunctionRec( | ||||
5030 | LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB); | ||||
5031 | if (NegateAfterAll) | ||||
5032 | OutCC = AArch64CC::getInvertedCondCode(OutCC); | ||||
5033 | return CmpL; | ||||
5034 | } | ||||
5035 | |||||
5036 | MachineInstr *AArch64InstructionSelector::emitConjunction( | ||||
5037 | Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const { | ||||
5038 | bool DummyCanNegate; | ||||
5039 | bool DummyMustBeFirst; | ||||
5040 | if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false, | ||||
5041 | *MIB.getMRI())) | ||||
5042 | return nullptr; | ||||
5043 | return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB); | ||||
5044 | } | ||||
5045 | |||||
5046 | bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI, | ||||
5047 | MachineInstr &CondMI) { | ||||
5048 | AArch64CC::CondCode AArch64CC; | ||||
5049 | MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB); | ||||
5050 | if (!ConjMI) | ||||
5051 | return false; | ||||
5052 | |||||
5053 | emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB); | ||||
5054 | SelI.eraseFromParent(); | ||||
5055 | return true; | ||||
5056 | } | ||||
5057 | |||||
5058 | bool AArch64InstructionSelector::tryOptSelect(GSelect &I) { | ||||
5059 | MachineRegisterInfo &MRI = *MIB.getMRI(); | ||||
5060 | // We want to recognize this pattern: | ||||
5061 | // | ||||
5062 | // $z = G_FCMP pred, $x, $y | ||||
5063 | // ... | ||||
5064 | // $w = G_SELECT $z, $a, $b | ||||
5065 | // | ||||
5066 | // Where the value of $z is *only* ever used by the G_SELECT (possibly with | ||||
5067 | // some copies/truncs in between.) | ||||
5068 | // | ||||
5069 | // If we see this, then we can emit something like this: | ||||
5070 | // | ||||
5071 | // fcmp $x, $y | ||||
5072 | // fcsel $w, $a, $b, pred | ||||
5073 | // | ||||
5074 | // Rather than emitting both of the rather long sequences in the standard | ||||
5075 | // G_FCMP/G_SELECT select methods. | ||||
5076 | |||||
5077 | // First, check if the condition is defined by a compare. | ||||
5078 | MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg()); | ||||
5079 | while (CondDef) { | ||||
5080 | // We can only fold if all of the defs have one use. | ||||
5081 | Register CondDefReg = CondDef->getOperand(0).getReg(); | ||||
5082 | if (!MRI.hasOneNonDBGUse(CondDefReg)) { | ||||
5083 | // Unless it's another select. | ||||
5084 | for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) { | ||||
5085 | if (CondDef == &UI) | ||||
5086 | continue; | ||||
5087 | if (UI.getOpcode() != TargetOpcode::G_SELECT) | ||||
5088 | return false; | ||||
5089 | } | ||||
5090 | } | ||||
5091 | |||||
5092 | // We can skip over G_TRUNC since the condition is 1-bit. | ||||
5093 | // Truncating/extending can have no impact on the value. | ||||
5094 | unsigned Opc = CondDef->getOpcode(); | ||||
5095 | if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC) | ||||
5096 | break; | ||||
5097 | |||||
5098 | // Can't see past copies from physregs. | ||||
5099 | if (Opc == TargetOpcode::COPY && | ||||
5100 | Register::isPhysicalRegister(CondDef->getOperand(1).getReg())) | ||||
5101 | return false; | ||||
5102 | |||||
5103 | CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg()); | ||||
5104 | } | ||||
5105 | |||||
5106 | // Is the condition defined by a compare? | ||||
5107 | if (!CondDef) | ||||
5108 | return false; | ||||
5109 | |||||
5110 | unsigned CondOpc = CondDef->getOpcode(); | ||||
5111 | if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) { | ||||
5112 | if (tryOptSelectConjunction(I, *CondDef)) | ||||
5113 | return true; | ||||
5114 | return false; | ||||
5115 | } | ||||
5116 | |||||
5117 | AArch64CC::CondCode CondCode; | ||||
5118 | if (CondOpc == TargetOpcode::G_ICMP) { | ||||
5119 | auto Pred = | ||||
5120 | static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); | ||||
5121 | CondCode = changeICMPPredToAArch64CC(Pred); | ||||
5122 | emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), | ||||
5123 | CondDef->getOperand(1), MIB); | ||||
5124 | } else { | ||||
5125 | // Get the condition code for the select. | ||||
5126 | auto Pred = | ||||
5127 | static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); | ||||
5128 | AArch64CC::CondCode CondCode2; | ||||
5129 | changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2); | ||||
5130 | |||||
5131 | // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two | ||||
5132 | // instructions to emit the comparison. | ||||
5133 | // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be | ||||
5134 | // unnecessary. | ||||
5135 | if (CondCode2 != AArch64CC::AL) | ||||
5136 | return false; | ||||
5137 | |||||
5138 | if (!emitFPCompare(CondDef->getOperand(2).getReg(), | ||||
5139 | CondDef->getOperand(3).getReg(), MIB)) { | ||||
5140 | LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n" ; } } while (false); | ||||
5141 | return false; | ||||
5142 | } | ||||
5143 | } | ||||
5144 | |||||
5145 | // Emit the select. | ||||
5146 | emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(), | ||||
5147 | I.getOperand(3).getReg(), CondCode, MIB); | ||||
5148 | I.eraseFromParent(); | ||||
5149 | return true; | ||||
5150 | } | ||||
5151 | |||||
5152 | MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( | ||||
5153 | MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, | ||||
5154 | MachineIRBuilder &MIRBuilder) const { | ||||
5155 | assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&(static_cast <bool> (LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && "Unexpected MachineOperand" ) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5156, __extension__ __PRETTY_FUNCTION__)) | ||||
5156 | "Unexpected MachineOperand")(static_cast <bool> (LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && "Unexpected MachineOperand" ) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5156, __extension__ __PRETTY_FUNCTION__)); | ||||
5157 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | ||||
5158 | // We want to find this sort of thing: | ||||
5159 | // x = G_SUB 0, y | ||||
5160 | // G_ICMP z, x | ||||
5161 | // | ||||
5162 | // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. | ||||
5163 | // e.g: | ||||
5164 | // | ||||
5165 | // cmn z, y | ||||
5166 | |||||
5167 | // Check if the RHS or LHS of the G_ICMP is defined by a SUB | ||||
5168 | MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI); | ||||
5169 | MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI); | ||||
5170 | auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate()); | ||||
5171 | // Given this: | ||||
5172 | // | ||||
5173 | // x = G_SUB 0, y | ||||
5174 | // G_ICMP x, z | ||||
5175 | // | ||||
5176 | // Produce this: | ||||
5177 | // | ||||
5178 | // cmn y, z | ||||
5179 | if (isCMN(LHSDef, P, MRI)) | ||||
5180 | return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder); | ||||
5181 | |||||
5182 | // Same idea here, but with the RHS of the compare instead: | ||||
5183 | // | ||||
5184 | // Given this: | ||||
5185 | // | ||||
5186 | // x = G_SUB 0, y | ||||
5187 | // G_ICMP z, x | ||||
5188 | // | ||||
5189 | // Produce this: | ||||
5190 | // | ||||
5191 | // cmn z, y | ||||
5192 | if (isCMN(RHSDef, P, MRI)) | ||||
5193 | return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder); | ||||
5194 | |||||
5195 | // Given this: | ||||
5196 | // | ||||
5197 | // z = G_AND x, y | ||||
5198 | // G_ICMP z, 0 | ||||
5199 | // | ||||
5200 | // Produce this if the compare is signed: | ||||
5201 | // | ||||
5202 | // tst x, y | ||||
5203 | if (!CmpInst::isUnsigned(P) && LHSDef && | ||||
5204 | LHSDef->getOpcode() == TargetOpcode::G_AND) { | ||||
5205 | // Make sure that the RHS is 0. | ||||
5206 | auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI); | ||||
5207 | if (!ValAndVReg || ValAndVReg->Value != 0) | ||||
5208 | return nullptr; | ||||
5209 | |||||
5210 | return emitTST(LHSDef->getOperand(1), | ||||
5211 | LHSDef->getOperand(2), MIRBuilder); | ||||
5212 | } | ||||
5213 | |||||
5214 | return nullptr; | ||||
5215 | } | ||||
5216 | |||||
5217 | bool AArch64InstructionSelector::selectShuffleVector( | ||||
5218 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||
5219 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||
5220 | Register Src1Reg = I.getOperand(1).getReg(); | ||||
5221 | const LLT Src1Ty = MRI.getType(Src1Reg); | ||||
5222 | Register Src2Reg = I.getOperand(2).getReg(); | ||||
5223 | const LLT Src2Ty = MRI.getType(Src2Reg); | ||||
5224 | ArrayRef<int> Mask = I.getOperand(3).getShuffleMask(); | ||||
5225 | |||||
5226 | MachineBasicBlock &MBB = *I.getParent(); | ||||
5227 | MachineFunction &MF = *MBB.getParent(); | ||||
5228 | LLVMContext &Ctx = MF.getFunction().getContext(); | ||||
5229 | |||||
5230 | // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if | ||||
5231 | // it's originated from a <1 x T> type. Those should have been lowered into | ||||
5232 | // G_BUILD_VECTOR earlier. | ||||
5233 | if (!Src1Ty.isVector() || !Src2Ty.isVector()) { | ||||
5234 | LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n" ; } } while (false); | ||||
5235 | return false; | ||||
5236 | } | ||||
5237 | |||||
5238 | unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; | ||||
5239 | |||||
5240 | SmallVector<Constant *, 64> CstIdxs; | ||||
5241 | for (int Val : Mask) { | ||||
5242 | // For now, any undef indexes we'll just assume to be 0. This should be | ||||
5243 | // optimized in future, e.g. to select DUP etc. | ||||
5244 | Val = Val < 0 ? 0 : Val; | ||||
5245 | for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { | ||||
5246 | unsigned Offset = Byte + Val * BytesPerElt; | ||||
5247 | CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset)); | ||||
5248 | } | ||||
5249 | } | ||||
5250 | |||||
5251 | // Use a constant pool to load the index vector for TBL. | ||||
5252 | Constant *CPVal = ConstantVector::get(CstIdxs); | ||||
5253 | MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB); | ||||
5254 | if (!IndexLoad) { | ||||
5255 | LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not load from a constant pool" ; } } while (false); | ||||
5256 | return false; | ||||
5257 | } | ||||
5258 | |||||
5259 | if (DstTy.getSizeInBits() != 128) { | ||||
5260 | assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")(static_cast <bool> (DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty") ? void (0) : __assert_fail ( "DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5260, __extension__ __PRETTY_FUNCTION__)); | ||||
5261 | // This case can be done with TBL1. | ||||
5262 | MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB); | ||||
5263 | if (!Concat) { | ||||
5264 | LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1" ; } } while (false); | ||||
5265 | return false; | ||||
5266 | } | ||||
5267 | |||||
5268 | // The constant pool load will be 64 bits, so need to convert to FPR128 reg. | ||||
5269 | IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass, | ||||
5270 | IndexLoad->getOperand(0).getReg(), MIB); | ||||
5271 | |||||
5272 | auto TBL1 = MIB.buildInstr( | ||||
5273 | AArch64::TBLv16i8One, {&AArch64::FPR128RegClass}, | ||||
5274 | {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()}); | ||||
5275 | constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI); | ||||
5276 | |||||
5277 | auto Copy = | ||||
5278 | MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) | ||||
5279 | .addReg(TBL1.getReg(0), 0, AArch64::dsub); | ||||
5280 | RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI); | ||||
5281 | I.eraseFromParent(); | ||||
5282 | return true; | ||||
5283 | } | ||||
5284 | |||||
5285 | // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive | ||||
5286 | // Q registers for regalloc. | ||||
5287 | SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg}; | ||||
5288 | auto RegSeq = createQTuple(Regs, MIB); | ||||
5289 | auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)}, | ||||
5290 | {RegSeq, IndexLoad->getOperand(0)}); | ||||
5291 | constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI); | ||||
5292 | I.eraseFromParent(); | ||||
5293 | return true; | ||||
5294 | } | ||||
5295 | |||||
5296 | MachineInstr *AArch64InstructionSelector::emitLaneInsert( | ||||
5297 | Optional<Register> DstReg, Register SrcReg, Register EltReg, | ||||
5298 | unsigned LaneIdx, const RegisterBank &RB, | ||||
5299 | MachineIRBuilder &MIRBuilder) const { | ||||
5300 | MachineInstr *InsElt = nullptr; | ||||
5301 | const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; | ||||
5302 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | ||||
5303 | |||||
5304 | // Create a register to define with the insert if one wasn't passed in. | ||||
5305 | if (!DstReg) | ||||
5306 | DstReg = MRI.createVirtualRegister(DstRC); | ||||
5307 | |||||
5308 | unsigned EltSize = MRI.getType(EltReg).getSizeInBits(); | ||||
5309 | unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first; | ||||
5310 | |||||
5311 | if (RB.getID() == AArch64::FPRRegBankID) { | ||||
5312 | auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder); | ||||
5313 | InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) | ||||
5314 | .addImm(LaneIdx) | ||||
5315 | .addUse(InsSub->getOperand(0).getReg()) | ||||
5316 | .addImm(0); | ||||
5317 | } else { | ||||
5318 | InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) | ||||
5319 | .addImm(LaneIdx) | ||||
5320 | .addUse(EltReg); | ||||
5321 | } | ||||
5322 | |||||
5323 | constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); | ||||
5324 | return InsElt; | ||||
5325 | } | ||||
5326 | |||||
5327 | bool AArch64InstructionSelector::selectUSMovFromExtend( | ||||
5328 | MachineInstr &MI, MachineRegisterInfo &MRI) { | ||||
5329 | if (MI.getOpcode() != TargetOpcode::G_SEXT && | ||||
5330 | MI.getOpcode() != TargetOpcode::G_ZEXT && | ||||
5331 | MI.getOpcode() != TargetOpcode::G_ANYEXT) | ||||
5332 | return false; | ||||
5333 | bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT; | ||||
5334 | const Register DefReg = MI.getOperand(0).getReg(); | ||||
5335 | const LLT DstTy = MRI.getType(DefReg); | ||||
5336 | unsigned DstSize = DstTy.getSizeInBits(); | ||||
5337 | |||||
5338 | if (DstSize != 32 && DstSize != 64) | ||||
5339 | return false; | ||||
5340 | |||||
5341 | MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT, | ||||
5342 | MI.getOperand(1).getReg(), MRI); | ||||
5343 | int64_t Lane; | ||||
5344 | if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane))) | ||||
5345 | return false; | ||||
5346 | Register Src0 = Extract->getOperand(1).getReg(); | ||||
5347 | |||||
5348 | const LLT &VecTy = MRI.getType(Src0); | ||||
5349 | |||||
5350 | if (VecTy.getSizeInBits() != 128) { | ||||
5351 | const MachineInstr *ScalarToVector = emitScalarToVector( | ||||
5352 | VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB); | ||||
5353 | assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!")(static_cast <bool> (ScalarToVector && "Didn't expect emitScalarToVector to fail!" ) ? void (0) : __assert_fail ("ScalarToVector && \"Didn't expect emitScalarToVector to fail!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5353, __extension__ __PRETTY_FUNCTION__)); | ||||
5354 | Src0 = ScalarToVector->getOperand(0).getReg(); | ||||
5355 | } | ||||
5356 | |||||
5357 | unsigned Opcode; | ||||
5358 | if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32) | ||||
5359 | Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32; | ||||
5360 | else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16) | ||||
5361 | Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16; | ||||
5362 | else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8) | ||||
5363 | Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8; | ||||
5364 | else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16) | ||||
5365 | Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16; | ||||
5366 | else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8) | ||||
5367 | Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8; | ||||
5368 | else | ||||
5369 | llvm_unreachable("Unexpected type combo for S/UMov!")::llvm::llvm_unreachable_internal("Unexpected type combo for S/UMov!" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5369); | ||||
5370 | |||||
5371 | // We may need to generate one of these, depending on the type and sign of the | ||||
5372 | // input: | ||||
5373 | // DstReg = SMOV Src0, Lane; | ||||
5374 | // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32; | ||||
5375 | MachineInstr *ExtI = nullptr; | ||||
5376 | if (DstSize == 64 && !IsSigned) { | ||||
5377 | Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); | ||||
5378 | MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane); | ||||
5379 | ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) | ||||
5380 | .addImm(0) | ||||
5381 | .addUse(NewReg) | ||||
5382 | .addImm(AArch64::sub_32); | ||||
5383 | RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); | ||||
5384 | } else | ||||
5385 | ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane); | ||||
5386 | |||||
5387 | constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); | ||||
5388 | MI.eraseFromParent(); | ||||
5389 | return true; | ||||
5390 | } | ||||
5391 | |||||
5392 | bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I, | ||||
5393 | MachineRegisterInfo &MRI) { | ||||
5394 | assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5394, __extension__ __PRETTY_FUNCTION__)); | ||||
5395 | |||||
5396 | // Get information on the destination. | ||||
5397 | Register DstReg = I.getOperand(0).getReg(); | ||||
5398 | const LLT DstTy = MRI.getType(DstReg); | ||||
5399 | unsigned VecSize = DstTy.getSizeInBits(); | ||||
5400 | |||||
5401 | // Get information on the element we want to insert into the destination. | ||||
5402 | Register EltReg = I.getOperand(2).getReg(); | ||||
5403 | const LLT EltTy = MRI.getType(EltReg); | ||||
5404 | unsigned EltSize = EltTy.getSizeInBits(); | ||||
5405 | if (EltSize < 16 || EltSize > 64) | ||||
5406 | return false; // Don't support all element types yet. | ||||
5407 | |||||
5408 | // Find the definition of the index. Bail out if it's not defined by a | ||||
5409 | // G_CONSTANT. | ||||
5410 | Register IdxReg = I.getOperand(3).getReg(); | ||||
5411 | auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI); | ||||
5412 | if (!VRegAndVal) | ||||
5413 | return false; | ||||
5414 | unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); | ||||
5415 | |||||
5416 | // Perform the lane insert. | ||||
5417 | Register SrcReg = I.getOperand(1).getReg(); | ||||
5418 | const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); | ||||
5419 | |||||
5420 | if (VecSize < 128) { | ||||
5421 | // If the vector we're inserting into is smaller than 128 bits, widen it | ||||
5422 | // to 128 to do the insert. | ||||
5423 | MachineInstr *ScalarToVec = | ||||
5424 | emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB); | ||||
5425 | if (!ScalarToVec) | ||||
5426 | return false; | ||||
5427 | SrcReg = ScalarToVec->getOperand(0).getReg(); | ||||
5428 | } | ||||
5429 | |||||
5430 | // Create an insert into a new FPR128 register. | ||||
5431 | // Note that if our vector is already 128 bits, we end up emitting an extra | ||||
5432 | // register. | ||||
5433 | MachineInstr *InsMI = | ||||
5434 | emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB); | ||||
5435 | |||||
5436 | if (VecSize < 128) { | ||||
5437 | // If we had to widen to perform the insert, then we have to demote back to | ||||
5438 | // the original size to get the result we want. | ||||
5439 | Register DemoteVec = InsMI->getOperand(0).getReg(); | ||||
5440 | const TargetRegisterClass *RC = | ||||
5441 | getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize); | ||||
5442 | if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { | ||||
5443 | LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported register class!\n" ; } } while (false); | ||||
5444 | return false; | ||||
5445 | } | ||||
5446 | unsigned SubReg = 0; | ||||
5447 | if (!getSubRegForClass(RC, TRI, SubReg)) | ||||
5448 | return false; | ||||
5449 | if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { | ||||
5450 | LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported destination size! (" << VecSize << "\n"; } } while (false) | ||||
5451 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported destination size! (" << VecSize << "\n"; } } while (false); | ||||
5452 | return false; | ||||
5453 | } | ||||
5454 | MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) | ||||
5455 | .addReg(DemoteVec, 0, SubReg); | ||||
5456 | RBI.constrainGenericRegister(DstReg, *RC, MRI); | ||||
5457 | } else { | ||||
5458 | // No widening needed. | ||||
5459 | InsMI->getOperand(0).setReg(DstReg); | ||||
5460 | constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); | ||||
5461 | } | ||||
5462 | |||||
5463 | I.eraseFromParent(); | ||||
5464 | return true; | ||||
5465 | } | ||||
5466 | |||||
5467 | MachineInstr * | ||||
5468 | AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, | ||||
5469 | MachineIRBuilder &MIRBuilder, | ||||
5470 | MachineRegisterInfo &MRI) { | ||||
5471 | LLT DstTy = MRI.getType(Dst); | ||||
5472 | unsigned DstSize = DstTy.getSizeInBits(); | ||||
5473 | if (CV->isNullValue()) { | ||||
5474 | if (DstSize == 128) { | ||||
5475 | auto Mov = | ||||
5476 | MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0); | ||||
5477 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); | ||||
5478 | return &*Mov; | ||||
5479 | } | ||||
5480 | |||||
5481 | if (DstSize == 64) { | ||||
5482 | auto Mov = | ||||
5483 | MIRBuilder | ||||
5484 | .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) | ||||
5485 | .addImm(0); | ||||
5486 | auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {}) | ||||
5487 | .addReg(Mov.getReg(0), 0, AArch64::dsub); | ||||
5488 | RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI); | ||||
5489 | return &*Copy; | ||||
5490 | } | ||||
5491 | } | ||||
5492 | |||||
5493 | auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder); | ||||
5494 | if (!CPLoad) { | ||||
5495 | LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not generate cp load for constant vector!" ; } } while (false); | ||||
5496 | return nullptr; | ||||
5497 | } | ||||
5498 | |||||
5499 | auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0)); | ||||
5500 | RBI.constrainGenericRegister( | ||||
5501 | Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI); | ||||
5502 | return &*Copy; | ||||
5503 | } | ||||
5504 | |||||
5505 | bool AArch64InstructionSelector::tryOptConstantBuildVec( | ||||
5506 | MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) { | ||||
5507 | assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5507, __extension__ __PRETTY_FUNCTION__)); | ||||
5508 | unsigned DstSize = DstTy.getSizeInBits(); | ||||
5509 | assert(DstSize <= 128 && "Unexpected build_vec type!")(static_cast <bool> (DstSize <= 128 && "Unexpected build_vec type!" ) ? void (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5509, __extension__ __PRETTY_FUNCTION__)); | ||||
5510 | if (DstSize < 32) | ||||
5511 | return false; | ||||
5512 | // Check if we're building a constant vector, in which case we want to | ||||
5513 | // generate a constant pool load instead of a vector insert sequence. | ||||
5514 | SmallVector<Constant *, 16> Csts; | ||||
5515 | for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) { | ||||
5516 | // Try to find G_CONSTANT or G_FCONSTANT | ||||
5517 | auto *OpMI = | ||||
5518 | getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI); | ||||
5519 | if (OpMI) | ||||
5520 | Csts.emplace_back( | ||||
5521 | const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm())); | ||||
5522 | else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT, | ||||
5523 | I.getOperand(Idx).getReg(), MRI))) | ||||
5524 | Csts.emplace_back( | ||||
5525 | const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm())); | ||||
5526 | else | ||||
5527 | return false; | ||||
5528 | } | ||||
5529 | Constant *CV = ConstantVector::get(Csts); | ||||
5530 | if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI)) | ||||
5531 | return false; | ||||
5532 | I.eraseFromParent(); | ||||
5533 | return true; | ||||
5534 | } | ||||
5535 | |||||
5536 | bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg( | ||||
5537 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||
5538 | // Given: | ||||
5539 | // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef | ||||
5540 | // | ||||
5541 | // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt. | ||||
5542 | Register Dst = I.getOperand(0).getReg(); | ||||
5543 | Register EltReg = I.getOperand(1).getReg(); | ||||
5544 | LLT EltTy = MRI.getType(EltReg); | ||||
5545 | // If the index isn't on the same bank as its elements, then this can't be a | ||||
5546 | // SUBREG_TO_REG. | ||||
5547 | const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); | ||||
5548 | const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI); | ||||
5549 | if (EltRB != DstRB) | ||||
5550 | return false; | ||||
5551 | if (any_of(make_range(I.operands_begin() + 2, I.operands_end()), | ||||
5552 | [&MRI](const MachineOperand &Op) { | ||||
5553 | return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), | ||||
5554 | MRI); | ||||
5555 | })) | ||||
5556 | return false; | ||||
5557 | unsigned SubReg; | ||||
5558 | const TargetRegisterClass *EltRC = | ||||
5559 | getMinClassForRegBank(EltRB, EltTy.getSizeInBits()); | ||||
5560 | if (!EltRC) | ||||
5561 | return false; | ||||
5562 | const TargetRegisterClass *DstRC = | ||||
5563 | getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits()); | ||||
5564 | if (!DstRC) | ||||
5565 | return false; | ||||
5566 | if (!getSubRegForClass(EltRC, TRI, SubReg)) | ||||
5567 | return false; | ||||
5568 | auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {}) | ||||
5569 | .addImm(0) | ||||
5570 | .addUse(EltReg) | ||||
5571 | .addImm(SubReg); | ||||
5572 | I.eraseFromParent(); | ||||
5573 | constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI); | ||||
5574 | return RBI.constrainGenericRegister(Dst, *DstRC, MRI); | ||||
5575 | } | ||||
5576 | |||||
5577 | bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I, | ||||
5578 | MachineRegisterInfo &MRI) { | ||||
5579 | assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5579, __extension__ __PRETTY_FUNCTION__)); | ||||
5580 | // Until we port more of the optimized selections, for now just use a vector | ||||
5581 | // insert sequence. | ||||
5582 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||
5583 | const LLT EltTy = MRI.getType(I.getOperand(1).getReg()); | ||||
5584 | unsigned EltSize = EltTy.getSizeInBits(); | ||||
5585 | |||||
5586 | if (tryOptConstantBuildVec(I, DstTy, MRI)) | ||||
5587 | return true; | ||||
5588 | if (tryOptBuildVecToSubregToReg(I, MRI)) | ||||
5589 | return true; | ||||
5590 | |||||
5591 | if (EltSize < 16 || EltSize > 64) | ||||
5592 | return false; // Don't support all element types yet. | ||||
5593 | const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); | ||||
5594 | |||||
5595 | const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; | ||||
5596 | MachineInstr *ScalarToVec = | ||||
5597 | emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC, | ||||
5598 | I.getOperand(1).getReg(), MIB); | ||||
5599 | if (!ScalarToVec) | ||||
5600 | return false; | ||||
5601 | |||||
5602 | Register DstVec = ScalarToVec->getOperand(0).getReg(); | ||||
5603 | unsigned DstSize = DstTy.getSizeInBits(); | ||||
5604 | |||||
5605 | // Keep track of the last MI we inserted. Later on, we might be able to save | ||||
5606 | // a copy using it. | ||||
5607 | MachineInstr *PrevMI = nullptr; | ||||
5608 | for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { | ||||
5609 | // Note that if we don't do a subregister copy, we can end up making an | ||||
5610 | // extra register. | ||||
5611 | PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB, | ||||
5612 | MIB); | ||||
5613 | DstVec = PrevMI->getOperand(0).getReg(); | ||||
5614 | } | ||||
5615 | |||||
5616 | // If DstTy's size in bits is less than 128, then emit a subregister copy | ||||
5617 | // from DstVec to the last register we've defined. | ||||
5618 | if (DstSize < 128) { | ||||
5619 | // Force this to be FPR using the destination vector. | ||||
5620 | const TargetRegisterClass *RC = | ||||
5621 | getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize); | ||||
5622 | if (!RC) | ||||
5623 | return false; | ||||
5624 | if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { | ||||
5625 | LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported register class!\n" ; } } while (false); | ||||
5626 | return false; | ||||
5627 | } | ||||
5628 | |||||
5629 | unsigned SubReg = 0; | ||||
5630 | if (!getSubRegForClass(RC, TRI, SubReg)) | ||||
5631 | return false; | ||||
5632 | if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { | ||||
5633 | LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported destination size! (" << DstSize << "\n"; } } while (false) | ||||
5634 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported destination size! (" << DstSize << "\n"; } } while (false); | ||||
5635 | return false; | ||||
5636 | } | ||||
5637 | |||||
5638 | Register Reg = MRI.createVirtualRegister(RC); | ||||
5639 | Register DstReg = I.getOperand(0).getReg(); | ||||
5640 | |||||
5641 | MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg); | ||||
5642 | MachineOperand &RegOp = I.getOperand(1); | ||||
5643 | RegOp.setReg(Reg); | ||||
5644 | RBI.constrainGenericRegister(DstReg, *RC, MRI); | ||||
5645 | } else { | ||||
5646 | // We don't need a subregister copy. Save a copy by re-using the | ||||
5647 | // destination register on the final insert. | ||||
5648 | assert(PrevMI && "PrevMI was null?")(static_cast <bool> (PrevMI && "PrevMI was null?" ) ? void (0) : __assert_fail ("PrevMI && \"PrevMI was null?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5648, __extension__ __PRETTY_FUNCTION__)); | ||||
5649 | PrevMI->getOperand(0).setReg(I.getOperand(0).getReg()); | ||||
5650 | constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI); | ||||
5651 | } | ||||
5652 | |||||
5653 | I.eraseFromParent(); | ||||
5654 | return true; | ||||
5655 | } | ||||
5656 | |||||
5657 | bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc, | ||||
5658 | unsigned NumVecs, | ||||
5659 | MachineInstr &I) { | ||||
5660 | assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5660, __extension__ __PRETTY_FUNCTION__)); | ||||
5661 | assert(Opc && "Expected an opcode?")(static_cast <bool> (Opc && "Expected an opcode?" ) ? void (0) : __assert_fail ("Opc && \"Expected an opcode?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5661, __extension__ __PRETTY_FUNCTION__)); | ||||
5662 | assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors")(static_cast <bool> (NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors") ? void (0) : __assert_fail ("NumVecs > 1 && NumVecs < 5 && \"Only support 2, 3, or 4 vectors\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5662, __extension__ __PRETTY_FUNCTION__)); | ||||
5663 | auto &MRI = *MIB.getMRI(); | ||||
5664 | LLT Ty = MRI.getType(I.getOperand(0).getReg()); | ||||
5665 | unsigned Size = Ty.getSizeInBits(); | ||||
5666 | assert((Size == 64 || Size == 128) &&(static_cast <bool> ((Size == 64 || Size == 128) && "Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail ("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5667, __extension__ __PRETTY_FUNCTION__)) | ||||
5667 | "Destination must be 64 bits or 128 bits?")(static_cast <bool> ((Size == 64 || Size == 128) && "Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail ("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5667, __extension__ __PRETTY_FUNCTION__)); | ||||
5668 | unsigned SubReg = Size
| ||||
5669 | auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg(); | ||||
5670 | assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?")(static_cast <bool> (MRI.getType(Ptr).isPointer() && "Expected a pointer type?") ? void (0) : __assert_fail ("MRI.getType(Ptr).isPointer() && \"Expected a pointer type?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5670, __extension__ __PRETTY_FUNCTION__)); | ||||
5671 | auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr}); | ||||
5672 | Load.cloneMemRefs(I); | ||||
5673 | constrainSelectedInstRegOperands(*Load, TII, TRI, RBI); | ||||
5674 | Register SelectedLoadDst = Load->getOperand(0).getReg(); | ||||
5675 | for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { | ||||
5676 | auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {}) | ||||
5677 | .addReg(SelectedLoadDst, 0, SubReg + Idx); | ||||
5678 | // Emit the subreg copies and immediately select them. | ||||
5679 | // FIXME: We should refactor our copy code into an emitCopy helper and | ||||
5680 | // clean up uses of this pattern elsewhere in the selector. | ||||
5681 | selectCopy(*Vec, TII, MRI, TRI, RBI); | ||||
5682 | } | ||||
5683 | return true; | ||||
5684 | } | ||||
5685 | |||||
5686 | bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( | ||||
5687 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||
5688 | // Find the intrinsic ID. | ||||
5689 | unsigned IntrinID = I.getIntrinsicID(); | ||||
5690 | |||||
5691 | const LLT S8 = LLT::scalar(8); | ||||
5692 | const LLT S16 = LLT::scalar(16); | ||||
5693 | const LLT S32 = LLT::scalar(32); | ||||
5694 | const LLT S64 = LLT::scalar(64); | ||||
5695 | const LLT P0 = LLT::pointer(0, 64); | ||||
5696 | // Select the instruction. | ||||
5697 | switch (IntrinID) { | ||||
| |||||
5698 | default: | ||||
5699 | return false; | ||||
5700 | case Intrinsic::aarch64_ldxp: | ||||
5701 | case Intrinsic::aarch64_ldaxp: { | ||||
5702 | auto NewI = MIB.buildInstr( | ||||
5703 | IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX, | ||||
5704 | {I.getOperand(0).getReg(), I.getOperand(1).getReg()}, | ||||
5705 | {I.getOperand(3)}); | ||||
5706 | NewI.cloneMemRefs(I); | ||||
5707 | constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); | ||||
5708 | break; | ||||
5709 | } | ||||
5710 | case Intrinsic::trap: | ||||
5711 | MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1); | ||||
5712 | break; | ||||
5713 | case Intrinsic::debugtrap: | ||||
5714 | MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); | ||||
5715 | break; | ||||
5716 | case Intrinsic::ubsantrap: | ||||
5717 | MIB.buildInstr(AArch64::BRK, {}, {}) | ||||
5718 | .addImm(I.getOperand(1).getImm() | ('U' << 8)); | ||||
5719 | break; | ||||
5720 | case Intrinsic::aarch64_neon_ld2: { | ||||
5721 | LLT Ty = MRI.getType(I.getOperand(0).getReg()); | ||||
5722 | unsigned Opc = 0; | ||||
5723 | if (Ty == LLT::fixed_vector(8, S8)) | ||||
5724 | Opc = AArch64::LD2Twov8b; | ||||
5725 | else if (Ty == LLT::fixed_vector(16, S8)) | ||||
5726 | Opc = AArch64::LD2Twov16b; | ||||
5727 | else if (Ty == LLT::fixed_vector(4, S16)) | ||||
5728 | Opc = AArch64::LD2Twov4h; | ||||
5729 | else if (Ty == LLT::fixed_vector(8, S16)) | ||||
5730 | Opc = AArch64::LD2Twov8h; | ||||
5731 | else if (Ty == LLT::fixed_vector(2, S32)) | ||||
5732 | Opc = AArch64::LD2Twov2s; | ||||
5733 | else if (Ty == LLT::fixed_vector(4, S32)) | ||||
5734 | Opc = AArch64::LD2Twov4s; | ||||
5735 | else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) | ||||
5736 | Opc = AArch64::LD2Twov2d; | ||||
5737 | else if (Ty == S64 || Ty == P0) | ||||
5738 | Opc = AArch64::LD1Twov1d; | ||||
5739 | else | ||||
5740 | llvm_unreachable("Unexpected type for ld2!")::llvm::llvm_unreachable_internal("Unexpected type for ld2!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5740); | ||||
5741 | selectVectorLoadIntrinsic(Opc, 2, I); | ||||
5742 | break; | ||||
5743 | } | ||||
5744 | case Intrinsic::aarch64_neon_ld4: { | ||||
5745 | LLT Ty = MRI.getType(I.getOperand(0).getReg()); | ||||
5746 | unsigned Opc = 0; | ||||
5747 | if (Ty == LLT::fixed_vector(8, S8)) | ||||
5748 | Opc = AArch64::LD4Fourv8b; | ||||
5749 | else if (Ty == LLT::fixed_vector(16, S8)) | ||||
5750 | Opc = AArch64::LD4Fourv16b; | ||||
5751 | else if (Ty == LLT::fixed_vector(4, S16)) | ||||
5752 | Opc = AArch64::LD4Fourv4h; | ||||
5753 | else if (Ty == LLT::fixed_vector(8, S16)) | ||||
5754 | Opc = AArch64::LD4Fourv8h; | ||||
5755 | else if (Ty == LLT::fixed_vector(2, S32)) | ||||
5756 | Opc = AArch64::LD4Fourv2s; | ||||
5757 | else if (Ty == LLT::fixed_vector(4, S32)) | ||||
5758 | Opc = AArch64::LD4Fourv4s; | ||||
5759 | else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) | ||||
5760 | Opc = AArch64::LD4Fourv2d; | ||||
5761 | else if (Ty == S64 || Ty == P0) | ||||
5762 | Opc = AArch64::LD1Fourv1d; | ||||
5763 | else | ||||
5764 | llvm_unreachable("Unexpected type for ld4!")::llvm::llvm_unreachable_internal("Unexpected type for ld4!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5764); | ||||
5765 | selectVectorLoadIntrinsic(Opc, 4, I); | ||||
5766 | break; | ||||
5767 | } | ||||
5768 | case Intrinsic::aarch64_neon_st2: { | ||||
5769 | Register Src1 = I.getOperand(1).getReg(); | ||||
5770 | Register Src2 = I.getOperand(2).getReg(); | ||||
5771 | Register Ptr = I.getOperand(3).getReg(); | ||||
5772 | LLT Ty = MRI.getType(Src1); | ||||
5773 | unsigned Opc; | ||||
5774 | if (Ty == LLT::fixed_vector(8, S8)) | ||||
5775 | Opc = AArch64::ST2Twov8b; | ||||
5776 | else if (Ty == LLT::fixed_vector(16, S8)) | ||||
5777 | Opc = AArch64::ST2Twov16b; | ||||
5778 | else if (Ty == LLT::fixed_vector(4, S16)) | ||||
5779 | Opc = AArch64::ST2Twov4h; | ||||
5780 | else if (Ty == LLT::fixed_vector(8, S16)) | ||||
5781 | Opc = AArch64::ST2Twov8h; | ||||
5782 | else if (Ty == LLT::fixed_vector(2, S32)) | ||||
5783 | Opc = AArch64::ST2Twov2s; | ||||
5784 | else if (Ty == LLT::fixed_vector(4, S32)) | ||||
5785 | Opc = AArch64::ST2Twov4s; | ||||
5786 | else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) | ||||
5787 | Opc = AArch64::ST2Twov2d; | ||||
5788 | else if (Ty == S64 || Ty == P0) | ||||
5789 | Opc = AArch64::ST1Twov1d; | ||||
5790 | else | ||||
5791 | llvm_unreachable("Unexpected type for st2!")::llvm::llvm_unreachable_internal("Unexpected type for st2!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5791); | ||||
5792 | SmallVector<Register, 2> Regs = {Src1, Src2}; | ||||
5793 | Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB) | ||||
5794 | : createDTuple(Regs, MIB); | ||||
5795 | auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr}); | ||||
5796 | Store.cloneMemRefs(I); | ||||
5797 | constrainSelectedInstRegOperands(*Store, TII, TRI, RBI); | ||||
5798 | break; | ||||
5799 | } | ||||
5800 | case Intrinsic::aarch64_mops_memset_tag: { | ||||
5801 | // Transform | ||||
5802 | // %dst:gpr(p0) = \ | ||||
5803 | // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag), | ||||
5804 | // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64) | ||||
5805 | // where %dst is updated, into | ||||
5806 | // %Rd:GPR64common, %Rn:GPR64) = \ | ||||
5807 | // MOPSMemorySetTaggingPseudo \ | ||||
5808 | // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64 | ||||
5809 | // where Rd and Rn are tied. | ||||
5810 | // It is expected that %val has been extended to s64 in legalization. | ||||
5811 | // Note that the order of the size/value operands are swapped. | ||||
5812 | |||||
5813 | Register DstDef = I.getOperand(0).getReg(); | ||||
5814 | // I.getOperand(1) is the intrinsic function | ||||
5815 | Register DstUse = I.getOperand(2).getReg(); | ||||
5816 | Register ValUse = I.getOperand(3).getReg(); | ||||
5817 | Register SizeUse = I.getOperand(4).getReg(); | ||||
5818 | |||||
5819 | // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one. | ||||
5820 | // Therefore an additional virtual register is requried for the updated size | ||||
5821 | // operand. This value is not accessible via the semantics of the intrinsic. | ||||
5822 | Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64)); | ||||
5823 | |||||
5824 | auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo, | ||||
5825 | {DstDef, SizeDef}, {DstUse, SizeUse, ValUse}); | ||||
5826 | Memset.cloneMemRefs(I); | ||||
5827 | constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI); | ||||
5828 | break; | ||||
5829 | } | ||||
5830 | } | ||||
5831 | |||||
5832 | I.eraseFromParent(); | ||||
5833 | return true; | ||||
5834 | } | ||||
5835 | |||||
5836 | bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, | ||||
5837 | MachineRegisterInfo &MRI) { | ||||
5838 | unsigned IntrinID = I.getIntrinsicID(); | ||||
5839 | |||||
5840 | switch (IntrinID) { | ||||
5841 | default: | ||||
5842 | break; | ||||
5843 | case Intrinsic::aarch64_crypto_sha1h: { | ||||
5844 | Register DstReg = I.getOperand(0).getReg(); | ||||
5845 | Register SrcReg = I.getOperand(2).getReg(); | ||||
5846 | |||||
5847 | // FIXME: Should this be an assert? | ||||
5848 | if (MRI.getType(DstReg).getSizeInBits() != 32 || | ||||
5849 | MRI.getType(SrcReg).getSizeInBits() != 32) | ||||
5850 | return false; | ||||
5851 | |||||
5852 | // The operation has to happen on FPRs. Set up some new FPR registers for | ||||
5853 | // the source and destination if they are on GPRs. | ||||
5854 | if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { | ||||
5855 | SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); | ||||
5856 | MIB.buildCopy({SrcReg}, {I.getOperand(2)}); | ||||
5857 | |||||
5858 | // Make sure the copy ends up getting constrained properly. | ||||
5859 | RBI.constrainGenericRegister(I.getOperand(2).getReg(), | ||||
5860 | AArch64::GPR32RegClass, MRI); | ||||
5861 | } | ||||
5862 | |||||
5863 | if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) | ||||
5864 | DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); | ||||
5865 | |||||
5866 | // Actually insert the instruction. | ||||
5867 | auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg}); | ||||
5868 | constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI); | ||||
5869 | |||||
5870 | // Did we create a new register for the destination? | ||||
5871 | if (DstReg != I.getOperand(0).getReg()) { | ||||
5872 | // Yep. Copy the result of the instruction back into the original | ||||
5873 | // destination. | ||||
5874 | MIB.buildCopy({I.getOperand(0)}, {DstReg}); | ||||
5875 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), | ||||
5876 | AArch64::GPR32RegClass, MRI); | ||||
5877 | } | ||||
5878 | |||||
5879 | I.eraseFromParent(); | ||||
5880 | return true; | ||||
5881 | } | ||||
5882 | case Intrinsic::ptrauth_sign: { | ||||
5883 | Register DstReg = I.getOperand(0).getReg(); | ||||
5884 | Register ValReg = I.getOperand(2).getReg(); | ||||
5885 | uint64_t Key = I.getOperand(3).getImm(); | ||||
5886 | Register DiscReg = I.getOperand(4).getReg(); | ||||
5887 | auto DiscVal = getIConstantVRegVal(DiscReg, MRI); | ||||
5888 | bool IsDiscZero = DiscVal.hasValue() && DiscVal->isNullValue(); | ||||
5889 | |||||
5890 | if (Key > 3) | ||||
5891 | return false; | ||||
5892 | |||||
5893 | unsigned Opcodes[][4] = { | ||||
5894 | {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB}, | ||||
5895 | {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}}; | ||||
5896 | unsigned Opcode = Opcodes[IsDiscZero][Key]; | ||||
5897 | |||||
5898 | auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg}); | ||||
5899 | |||||
5900 | if (!IsDiscZero) { | ||||
5901 | PAC.addUse(DiscReg); | ||||
5902 | RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI); | ||||
5903 | } | ||||
5904 | |||||
5905 | RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); | ||||
5906 | I.eraseFromParent(); | ||||
5907 | return true; | ||||
5908 | } | ||||
5909 | case Intrinsic::frameaddress: | ||||
5910 | case Intrinsic::returnaddress: { | ||||
5911 | MachineFunction &MF = *I.getParent()->getParent(); | ||||
5912 | MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||
5913 | |||||
5914 | unsigned Depth = I.getOperand(2).getImm(); | ||||
5915 | Register DstReg = I.getOperand(0).getReg(); | ||||
5916 | RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); | ||||
5917 | |||||
5918 | if (Depth == 0 && IntrinID == Intrinsic::returnaddress) { | ||||
5919 | if (!MFReturnAddr) { | ||||
5920 | // Insert the copy from LR/X30 into the entry block, before it can be | ||||
5921 | // clobbered by anything. | ||||
5922 | MFI.setReturnAddressIsTaken(true); | ||||
5923 | MFReturnAddr = getFunctionLiveInPhysReg( | ||||
5924 | MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc()); | ||||
5925 | } | ||||
5926 | |||||
5927 | if (STI.hasPAuth()) { | ||||
5928 | MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr}); | ||||
5929 | } else { | ||||
5930 | MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr}); | ||||
5931 | MIB.buildInstr(AArch64::XPACLRI); | ||||
5932 | MIB.buildCopy({DstReg}, {Register(AArch64::LR)}); | ||||
5933 | } | ||||
5934 | |||||
5935 | I.eraseFromParent(); | ||||
5936 | return true; | ||||
5937 | } | ||||
5938 | |||||
5939 | MFI.setFrameAddressIsTaken(true); | ||||
5940 | Register FrameAddr(AArch64::FP); | ||||
5941 | while (Depth--) { | ||||
5942 | Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); | ||||
5943 | auto Ldr = | ||||
5944 | MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0); | ||||
5945 | constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI); | ||||
5946 | FrameAddr = NextFrame; | ||||
5947 | } | ||||
5948 | |||||
5949 | if (IntrinID == Intrinsic::frameaddress) | ||||
5950 | MIB.buildCopy({DstReg}, {FrameAddr}); | ||||
5951 | else { | ||||
5952 | MFI.setReturnAddressIsTaken(true); | ||||
5953 | |||||
5954 | if (STI.hasPAuth()) { | ||||
5955 | Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); | ||||
5956 | MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1); | ||||
5957 | MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg}); | ||||
5958 | } else { | ||||
5959 | MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}) | ||||
5960 | .addImm(1); | ||||
5961 | MIB.buildInstr(AArch64::XPACLRI); | ||||
5962 | MIB.buildCopy({DstReg}, {Register(AArch64::LR)}); | ||||
5963 | } | ||||
5964 | } | ||||
5965 | |||||
5966 | I.eraseFromParent(); | ||||
5967 | return true; | ||||
5968 | } | ||||
5969 | case Intrinsic::swift_async_context_addr: | ||||
5970 | auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()}, | ||||
5971 | {Register(AArch64::FP)}) | ||||
5972 | .addImm(8) | ||||
5973 | .addImm(0); | ||||
5974 | constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI); | ||||
5975 | |||||
5976 | MF->getFrameInfo().setFrameAddressIsTaken(true); | ||||
5977 | MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); | ||||
5978 | I.eraseFromParent(); | ||||
5979 | return true; | ||||
5980 | } | ||||
5981 | return false; | ||||
5982 | } | ||||
5983 | |||||
5984 | InstructionSelector::ComplexRendererFns | ||||
5985 | AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { | ||||
5986 | auto MaybeImmed = getImmedFromMO(Root); | ||||
5987 | if (MaybeImmed == None || *MaybeImmed > 31) | ||||
5988 | return None; | ||||
5989 | uint64_t Enc = (32 - *MaybeImmed) & 0x1f; | ||||
5990 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; | ||||
5991 | } | ||||
5992 | |||||
5993 | InstructionSelector::ComplexRendererFns | ||||
5994 | AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const { | ||||
5995 | auto MaybeImmed = getImmedFromMO(Root); | ||||
5996 | if (MaybeImmed == None || *MaybeImmed > 31) | ||||
5997 | return None; | ||||
5998 | uint64_t Enc = 31 - *MaybeImmed; | ||||
5999 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; | ||||
6000 | } | ||||
6001 | |||||
6002 | InstructionSelector::ComplexRendererFns | ||||
6003 | AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const { | ||||
6004 | auto MaybeImmed = getImmedFromMO(Root); | ||||
6005 | if (MaybeImmed == None || *MaybeImmed > 63) | ||||
6006 | return None; | ||||
6007 | uint64_t Enc = (64 - *MaybeImmed) & 0x3f; | ||||
6008 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; | ||||
6009 | } | ||||
6010 | |||||
6011 | InstructionSelector::ComplexRendererFns | ||||
6012 | AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { | ||||
6013 | auto MaybeImmed = getImmedFromMO(Root); | ||||
6014 | if (MaybeImmed == None || *MaybeImmed > 63) | ||||
6015 | return None; | ||||
6016 | uint64_t Enc = 63 - *MaybeImmed; | ||||
6017 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; | ||||
6018 | } | ||||
6019 | |||||
6020 | /// Helper to select an immediate value that can be represented as a 12-bit | ||||
6021 | /// value shifted left by either 0 or 12. If it is possible to do so, return | ||||
6022 | /// the immediate and shift value. If not, return None. | ||||
6023 | /// | ||||
6024 | /// Used by selectArithImmed and selectNegArithImmed. | ||||
6025 | InstructionSelector::ComplexRendererFns | ||||
6026 | AArch64InstructionSelector::select12BitValueWithLeftShift( | ||||
6027 | uint64_t Immed) const { | ||||
6028 | unsigned ShiftAmt; | ||||
6029 | if (Immed >> 12 == 0) { | ||||
6030 | ShiftAmt = 0; | ||||
6031 | } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { | ||||
6032 | ShiftAmt = 12; | ||||
6033 | Immed = Immed >> 12; | ||||
6034 | } else | ||||
6035 | return None; | ||||
6036 | |||||
6037 | unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); | ||||
6038 | return {{ | ||||
6039 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); }, | ||||
6040 | [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); }, | ||||
6041 | }}; | ||||
6042 | } | ||||
6043 | |||||
6044 | /// SelectArithImmed - Select an immediate value that can be represented as | ||||
6045 | /// a 12-bit value shifted left by either 0 or 12. If so, return true with | ||||
6046 | /// Val set to the 12-bit value and Shift set to the shifter operand. | ||||
6047 | InstructionSelector::ComplexRendererFns | ||||
6048 | AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { | ||||
6049 | // This function is called from the addsub_shifted_imm ComplexPattern, | ||||
6050 | // which lists [imm] as the list of opcode it's interested in, however | ||||
6051 | // we still need to check whether the operand is actually an immediate | ||||
6052 | // here because the ComplexPattern opcode list is only used in | ||||
6053 | // root-level opcode matching. | ||||
6054 | auto MaybeImmed = getImmedFromMO(Root); | ||||
6055 | if (MaybeImmed == None) | ||||
6056 | return None; | ||||
6057 | return select12BitValueWithLeftShift(*MaybeImmed); | ||||
6058 | } | ||||
6059 | |||||
6060 | /// SelectNegArithImmed - As above, but negates the value before trying to | ||||
6061 | /// select it. | ||||
6062 | InstructionSelector::ComplexRendererFns | ||||
6063 | AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { | ||||
6064 | // We need a register here, because we need to know if we have a 64 or 32 | ||||
6065 | // bit immediate. | ||||
6066 | if (!Root.isReg()) | ||||
6067 | return None; | ||||
6068 | auto MaybeImmed = getImmedFromMO(Root); | ||||
6069 | if (MaybeImmed == None) | ||||
6070 | return None; | ||||
6071 | uint64_t Immed = *MaybeImmed; | ||||
6072 | |||||
6073 | // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" | ||||
6074 | // have the opposite effect on the C flag, so this pattern mustn't match under | ||||
6075 | // those circumstances. | ||||
6076 | if (Immed == 0) | ||||
6077 | return None; | ||||
6078 | |||||
6079 | // Check if we're dealing with a 32-bit type on the root or a 64-bit type on | ||||
6080 | // the root. | ||||
6081 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | ||||
6082 | if (MRI.getType(Root.getReg()).getSizeInBits() == 32) | ||||
6083 | Immed = ~((uint32_t)Immed) + 1; | ||||
6084 | else | ||||
6085 | Immed = ~Immed + 1ULL; | ||||
6086 | |||||
6087 | if (Immed & 0xFFFFFFFFFF000000ULL) | ||||
6088 | return None; | ||||
6089 | |||||
6090 | Immed &= 0xFFFFFFULL; | ||||
6091 | return select12BitValueWithLeftShift(Immed); | ||||
6092 | } | ||||
6093 | |||||
6094 | /// Return true if it is worth folding MI into an extended register. That is, | ||||
6095 | /// if it's safe to pull it into the addressing mode of a load or store as a | ||||
6096 | /// shift. | ||||
6097 | bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( | ||||
6098 | MachineInstr &MI, const MachineRegisterInfo &MRI) const { | ||||
6099 | // Always fold if there is one use, or if we're optimizing for size. | ||||
6100 | Register DefReg = MI.getOperand(0).getReg(); | ||||
6101 | if (MRI.hasOneNonDBGUse(DefReg) || | ||||
6102 | MI.getParent()->getParent()->getFunction().hasOptSize()) | ||||
6103 | return true; | ||||
6104 | |||||
6105 | // It's better to avoid folding and recomputing shifts when we don't have a | ||||
6106 | // fastpath. | ||||
6107 | if (!STI.hasLSLFast()) | ||||
6108 | return false; | ||||
6109 | |||||
6110 | // We have a fastpath, so folding a shift in and potentially computing it | ||||
6111 | // many times may be beneficial. Check if this is only used in memory ops. | ||||
6112 | // If it is, then we should fold. | ||||
6113 | return all_of(MRI.use_nodbg_instructions(DefReg), | ||||
6114 | [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); | ||||
6115 | } | ||||
6116 | |||||
6117 | static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) { | ||||
6118 | switch (Type) { | ||||
6119 | case AArch64_AM::SXTB: | ||||
6120 | case AArch64_AM::SXTH: | ||||
6121 | case AArch64_AM::SXTW: | ||||
6122 | return true; | ||||
6123 | default: | ||||
6124 | return false; | ||||
6125 | } | ||||
6126 | } | ||||
6127 | |||||
6128 | InstructionSelector::ComplexRendererFns | ||||
6129 | AArch64InstructionSelector::selectExtendedSHL( | ||||
6130 | MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, | ||||
6131 | unsigned SizeInBytes, bool WantsExt) const { | ||||
6132 | assert(Base.isReg() && "Expected base to be a register operand")(static_cast <bool> (Base.isReg() && "Expected base to be a register operand" ) ? void (0) : __assert_fail ("Base.isReg() && \"Expected base to be a register operand\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6132, __extension__ __PRETTY_FUNCTION__)); | ||||
6133 | assert(Offset.isReg() && "Expected offset to be a register operand")(static_cast <bool> (Offset.isReg() && "Expected offset to be a register operand" ) ? void (0) : __assert_fail ("Offset.isReg() && \"Expected offset to be a register operand\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6133, __extension__ __PRETTY_FUNCTION__)); | ||||
6134 | |||||
6135 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | ||||
6136 | MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg()); | ||||
6137 | if (!OffsetInst) | ||||
6138 | return None; | ||||
6139 | |||||
6140 | unsigned OffsetOpc = OffsetInst->getOpcode(); | ||||
6141 | bool LookedThroughZExt = false; | ||||
6142 | if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) { | ||||
6143 | // Try to look through a ZEXT. | ||||
6144 | if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt) | ||||
6145 | return None; | ||||
6146 | |||||
6147 | OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg()); | ||||
6148 | OffsetOpc = OffsetInst->getOpcode(); | ||||
6149 | LookedThroughZExt = true; | ||||
6150 | |||||
6151 | if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) | ||||
6152 | return None; | ||||
6153 | } | ||||
6154 | // Make sure that the memory op is a valid size. | ||||
6155 | int64_t LegalShiftVal = Log2_32(SizeInBytes); | ||||
6156 | if (LegalShiftVal == 0) | ||||
6157 | return None; | ||||
6158 | if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) | ||||
6159 | return None; | ||||
6160 | |||||
6161 | // Now, try to find the specific G_CONSTANT. Start by assuming that the | ||||
6162 | // register we will offset is the LHS, and the register containing the | ||||
6163 | // constant is the RHS. | ||||
6164 | Register OffsetReg = OffsetInst->getOperand(1).getReg(); | ||||
6165 | Register ConstantReg = OffsetInst->getOperand(2).getReg(); | ||||
6166 | auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI); | ||||
6167 | if (!ValAndVReg) { | ||||
6168 | // We didn't get a constant on the RHS. If the opcode is a shift, then | ||||
6169 | // we're done. | ||||
6170 | if (OffsetOpc == TargetOpcode::G_SHL) | ||||
6171 | return None; | ||||
6172 | |||||
6173 | // If we have a G_MUL, we can use either register. Try looking at the RHS. | ||||
6174 | std::swap(OffsetReg, ConstantReg); | ||||
6175 | ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI); | ||||
6176 | if (!ValAndVReg) | ||||
6177 | return None; | ||||
6178 | } | ||||
6179 | |||||
6180 | // The value must fit into 3 bits, and must be positive. Make sure that is | ||||
6181 | // true. | ||||
6182 | int64_t ImmVal = ValAndVReg->Value.getSExtValue(); | ||||
6183 | |||||
6184 | // Since we're going to pull this into a shift, the constant value must be | ||||
6185 | // a power of 2. If we got a multiply, then we need to check this. | ||||
6186 | if (OffsetOpc == TargetOpcode::G_MUL) { | ||||
6187 | if (!isPowerOf2_32(ImmVal)) | ||||
6188 | return None; | ||||
6189 | |||||
6190 | // Got a power of 2. So, the amount we'll shift is the log base-2 of that. | ||||
6191 | ImmVal = Log2_32(ImmVal); | ||||
6192 | } | ||||
6193 | |||||
6194 | if ((ImmVal & 0x7) != ImmVal) | ||||
6195 | return None; | ||||
6196 | |||||
6197 | // We are only allowed to shift by LegalShiftVal. This shift value is built | ||||
6198 | // into the instruction, so we can't just use whatever we want. | ||||
6199 | if (ImmVal != LegalShiftVal) | ||||
6200 | return None; | ||||
6201 | |||||
6202 | unsigned SignExtend = 0; | ||||
6203 | if (WantsExt) { | ||||
6204 | // Check if the offset is defined by an extend, unless we looked through a | ||||
6205 | // G_ZEXT earlier. | ||||
6206 | if (!LookedThroughZExt) { | ||||
6207 | MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI); | ||||
6208 | auto Ext = getExtendTypeForInst(*ExtInst, MRI, true); | ||||
6209 | if (Ext == AArch64_AM::InvalidShiftExtend) | ||||
6210 | return None; | ||||
6211 | |||||
6212 | SignExtend = isSignExtendShiftType(Ext) ? 1 : 0; | ||||
6213 | // We only support SXTW for signed extension here. | ||||
6214 | if (SignExtend && Ext != AArch64_AM::SXTW) | ||||
6215 | return None; | ||||
6216 | OffsetReg = ExtInst->getOperand(1).getReg(); | ||||
6217 | } | ||||
6218 | |||||
6219 | // Need a 32-bit wide register here. | ||||
6220 | MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg())); | ||||
6221 | OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB); | ||||
6222 | } | ||||
6223 | |||||
6224 | // We can use the LHS of the GEP as the base, and the LHS of the shift as an | ||||
6225 | // offset. Signify that we are shifting by setting the shift flag to 1. | ||||
6226 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); }, | ||||
6227 | [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, | ||||
6228 | [=](MachineInstrBuilder &MIB) { | ||||
6229 | // Need to add both immediates here to make sure that they are both | ||||
6230 | // added to the instruction. | ||||
6231 | MIB.addImm(SignExtend); | ||||
6232 | MIB.addImm(1); | ||||
6233 | }}}; | ||||
6234 | } | ||||
6235 | |||||
6236 | /// This is used for computing addresses like this: | ||||
6237 | /// | ||||
6238 | /// ldr x1, [x2, x3, lsl #3] | ||||
6239 | /// | ||||
6240 | /// Where x2 is the base register, and x3 is an offset register. The shift-left | ||||
6241 | /// is a constant value specific to this load instruction. That is, we'll never | ||||
6242 | /// see anything other than a 3 here (which corresponds to the size of the | ||||
6243 | /// element being loaded.) | ||||
6244 | InstructionSelector::ComplexRendererFns | ||||
6245 | AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( | ||||
6246 | MachineOperand &Root, unsigned SizeInBytes) const { | ||||
6247 | if (!Root.isReg()) | ||||
6248 | return None; | ||||
6249 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | ||||
6250 | |||||
6251 | // We want to find something like this: | ||||
6252 | // | ||||
6253 | // val = G_CONSTANT LegalShiftVal | ||||
6254 | // shift = G_SHL off_reg val | ||||
6255 | // ptr = G_PTR_ADD base_reg shift | ||||
6256 | // x = G_LOAD ptr | ||||
6257 | // | ||||
6258 | // And fold it into this addressing mode: | ||||
6259 | // | ||||
6260 | // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] | ||||
6261 | |||||
6262 | // Check if we can find the G_PTR_ADD. | ||||
6263 | MachineInstr *PtrAdd = | ||||
6264 | getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); | ||||
6265 | if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) | ||||
6266 | return None; | ||||
6267 | |||||
6268 | // Now, try to match an opcode which will match our specific offset. | ||||
6269 | // We want a G_SHL or a G_MUL. | ||||
6270 | MachineInstr *OffsetInst = | ||||
6271 | getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI); | ||||
6272 | return selectExtendedSHL(Root, PtrAdd->getOperand(1), | ||||
6273 | OffsetInst->getOperand(0), SizeInBytes, | ||||
6274 | /*WantsExt=*/false); | ||||
6275 | } | ||||
6276 | |||||
6277 | /// This is used for computing addresses like this: | ||||
6278 | /// | ||||
6279 | /// ldr x1, [x2, x3] | ||||
6280 | /// | ||||
6281 | /// Where x2 is the base register, and x3 is an offset register. | ||||
6282 | /// | ||||
6283 | /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation, | ||||
6284 | /// this will do so. Otherwise, it will return None. | ||||
6285 | InstructionSelector::ComplexRendererFns | ||||
6286 | AArch64InstructionSelector::selectAddrModeRegisterOffset( | ||||
6287 | MachineOperand &Root) const { | ||||
6288 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | ||||
6289 | |||||
6290 | // We need a GEP. | ||||
6291 | MachineInstr *Gep = MRI.getVRegDef(Root.getReg()); | ||||
6292 | if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD) | ||||
6293 | return None; | ||||
6294 | |||||
6295 | // If this is used more than once, let's not bother folding. | ||||
6296 | // TODO: Check if they are memory ops. If they are, then we can still fold | ||||
6297 | // without having to recompute anything. | ||||
6298 | if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg())) | ||||
6299 | return None; | ||||
6300 | |||||
6301 | // Base is the GEP's LHS, offset is its RHS. | ||||
6302 | return {{[=](MachineInstrBuilder &MIB) { | ||||
6303 | MIB.addUse(Gep->getOperand(1).getReg()); | ||||
6304 | }, | ||||
6305 | [=](MachineInstrBuilder &MIB) { | ||||
6306 | MIB.addUse(Gep->getOperand(2).getReg()); | ||||
6307 | }, | ||||
6308 | [=](MachineInstrBuilder &MIB) { | ||||
6309 | // Need to add both immediates here to make sure that they are both | ||||
6310 | // added to the instruction. | ||||
6311 | MIB.addImm(0); | ||||
6312 | MIB.addImm(0); | ||||
6313 | }}}; | ||||
6314 | } | ||||
6315 | |||||
6316 | /// This is intended to be equivalent to selectAddrModeXRO in | ||||
6317 | /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. | ||||
6318 | InstructionSelector::ComplexRendererFns | ||||
6319 | AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, | ||||
6320 | unsigned SizeInBytes) const { | ||||
6321 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | ||||
6322 | if (!Root.isReg()) | ||||
6323 | return None; | ||||
6324 | MachineInstr *PtrAdd = | ||||
6325 | getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); | ||||
6326 | if (!PtrAdd) | ||||
6327 | return None; | ||||
6328 | |||||
6329 | // Check for an immediates which cannot be encoded in the [base + imm] | ||||
6330 | // addressing mode, and can't be encoded in an add/sub. If this happens, we'll | ||||
6331 | // end up with code like: | ||||
6332 | // | ||||
6333 | // mov x0, wide | ||||
6334 | // add x1 base, x0 | ||||
6335 | // ldr x2, [x1, x0] | ||||
6336 | // | ||||
6337 | // In this situation, we can use the [base, xreg] addressing mode to save an | ||||
6338 | // add/sub: | ||||
6339 | // | ||||
6340 | // mov x0, wide | ||||
6341 | // ldr x2, [base, x0] | ||||
6342 | auto ValAndVReg = | ||||
6343 | getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI); | ||||
6344 | if (ValAndVReg) { | ||||
6345 | unsigned Scale = Log2_32(SizeInBytes); | ||||
6346 | int64_t ImmOff = ValAndVReg->Value.getSExtValue(); | ||||
6347 | |||||
6348 | // Skip immediates that can be selected in the load/store addresing | ||||
6349 | // mode. | ||||
6350 | if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && | ||||
6351 | ImmOff < (0x1000 << Scale)) | ||||
6352 | return None; | ||||
6353 | |||||
6354 | // Helper lambda to decide whether or not it is preferable to emit an add. | ||||
6355 | auto isPreferredADD = [](int64_t ImmOff) { | ||||
6356 | // Constants in [0x0, 0xfff] can be encoded in an add. | ||||
6357 | if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) | ||||
6358 | return true; | ||||
6359 | |||||
6360 | // Can it be encoded in an add lsl #12? | ||||
6361 | if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL) | ||||
6362 | return false; | ||||
6363 | |||||
6364 | // It can be encoded in an add lsl #12, but we may not want to. If it is | ||||
6365 | // possible to select this as a single movz, then prefer that. A single | ||||
6366 | // movz is faster than an add with a shift. | ||||
6367 | return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && | ||||
6368 | (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; | ||||
6369 | }; | ||||
6370 | |||||
6371 | // If the immediate can be encoded in a single add/sub, then bail out. | ||||
6372 | if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) | ||||
6373 | return None; | ||||
6374 | } | ||||
6375 | |||||
6376 | // Try to fold shifts into the addressing mode. | ||||
6377 | auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); | ||||
6378 | if (AddrModeFns) | ||||
6379 | return AddrModeFns; | ||||
6380 | |||||
6381 | // If that doesn't work, see if it's possible to fold in registers from | ||||
6382 | // a GEP. | ||||
6383 | return selectAddrModeRegisterOffset(Root); | ||||
6384 | } | ||||
6385 | |||||
6386 | /// This is used for computing addresses like this: | ||||
6387 | /// | ||||
6388 | /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal] | ||||
6389 | /// | ||||
6390 | /// Where we have a 64-bit base register, a 32-bit offset register, and an | ||||
6391 | /// extend (which may or may not be signed). | ||||
6392 | InstructionSelector::ComplexRendererFns | ||||
6393 | AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, | ||||
6394 | unsigned SizeInBytes) const { | ||||
6395 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | ||||
6396 | |||||
6397 | MachineInstr *PtrAdd = | ||||
6398 | getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); | ||||
6399 | if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) | ||||
6400 | return None; | ||||
6401 | |||||
6402 | MachineOperand &LHS = PtrAdd->getOperand(1); | ||||
6403 | MachineOperand &RHS = PtrAdd->getOperand(2); | ||||
6404 | MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI); | ||||
6405 | |||||
6406 | // The first case is the same as selectAddrModeXRO, except we need an extend. | ||||
6407 | // In this case, we try to find a shift and extend, and fold them into the | ||||
6408 | // addressing mode. | ||||
6409 | // | ||||
6410 | // E.g. | ||||
6411 | // | ||||
6412 | // off_reg = G_Z/S/ANYEXT ext_reg | ||||
6413 | // val = G_CONSTANT LegalShiftVal | ||||
6414 | // shift = G_SHL off_reg val | ||||
6415 | // ptr = G_PTR_ADD base_reg shift | ||||
6416 | // x = G_LOAD ptr | ||||
6417 | // | ||||
6418 | // In this case we can get a load like this: | ||||
6419 | // | ||||
6420 | // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal] | ||||
6421 | auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0), | ||||
6422 | SizeInBytes, /*WantsExt=*/true); | ||||
6423 | if (ExtendedShl) | ||||
6424 | return ExtendedShl; | ||||
6425 | |||||
6426 | // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though. | ||||
6427 | // | ||||
6428 | // e.g. | ||||
6429 | // ldr something, [base_reg, ext_reg, sxtw] | ||||
6430 | if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) | ||||
6431 | return None; | ||||
6432 | |||||
6433 | // Check if this is an extend. We'll get an extend type if it is. | ||||
6434 | AArch64_AM::ShiftExtendType Ext = | ||||
6435 | getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true); | ||||
6436 | if (Ext == AArch64_AM::InvalidShiftExtend) | ||||
6437 | return None; | ||||
6438 | |||||
6439 | // Need a 32-bit wide register. | ||||
6440 | MachineIRBuilder MIB(*PtrAdd); | ||||
6441 | Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(), | ||||
6442 | AArch64::GPR32RegClass, MIB); | ||||
6443 | unsigned SignExtend = Ext == AArch64_AM::SXTW; | ||||
6444 | |||||
6445 | // Base is LHS, offset is ExtReg. | ||||
6446 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); }, | ||||
6447 | [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, | ||||
6448 | [=](MachineInstrBuilder &MIB) { | ||||
6449 | MIB.addImm(SignExtend); | ||||
6450 | MIB.addImm(0); | ||||
6451 | }}}; | ||||
6452 | } | ||||
6453 | |||||
6454 | /// Select a "register plus unscaled signed 9-bit immediate" address. This | ||||
6455 | /// should only match when there is an offset that is not valid for a scaled | ||||
6456 | /// immediate addressing mode. The "Size" argument is the size in bytes of the | ||||
6457 | /// memory reference, which is needed here to know what is valid for a scaled | ||||
6458 | /// immediate. | ||||
6459 | InstructionSelector::ComplexRendererFns | ||||
6460 | AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, | ||||
6461 | unsigned Size) const { | ||||
6462 | MachineRegisterInfo &MRI = | ||||
6463 | Root.getParent()->getParent()->getParent()->getRegInfo(); | ||||
6464 | |||||
6465 | if (!Root.isReg()) | ||||
6466 | return None; | ||||
6467 | |||||
6468 | if (!isBaseWithConstantOffset(Root, MRI)) | ||||
6469 | return None; | ||||
6470 | |||||
6471 | MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); | ||||
6472 | if (!RootDef) | ||||
6473 | return None; | ||||
6474 | |||||
6475 | MachineOperand &OffImm = RootDef->getOperand(2); | ||||
6476 | if (!OffImm.isReg()) | ||||
6477 | return None; | ||||
6478 | MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); | ||||
6479 | if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT) | ||||
6480 | return None; | ||||
6481 | int64_t RHSC; | ||||
6482 | MachineOperand &RHSOp1 = RHS->getOperand(1); | ||||
6483 | if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) | ||||
6484 | return None; | ||||
6485 | RHSC = RHSOp1.getCImm()->getSExtValue(); | ||||
6486 | |||||
6487 | // If the offset is valid as a scaled immediate, don't match here. | ||||
6488 | if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) | ||||
6489 | return None; | ||||
6490 | if (RHSC >= -256 && RHSC < 256) { | ||||
6491 | MachineOperand &Base = RootDef->getOperand(1); | ||||
6492 | return {{ | ||||
6493 | [=](MachineInstrBuilder &MIB) { MIB.add(Base); }, | ||||
6494 | [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, | ||||
6495 | }}; | ||||
6496 | } | ||||
6497 | return None; | ||||
6498 | } | ||||
6499 | |||||
6500 | InstructionSelector::ComplexRendererFns | ||||
6501 | AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, | ||||
6502 | unsigned Size, | ||||
6503 | MachineRegisterInfo &MRI) const { | ||||
6504 | if (RootDef.getOpcode() != AArch64::G_ADD_LOW) | ||||
6505 | return None; | ||||
6506 | MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg()); | ||||
6507 | if (Adrp.getOpcode() != AArch64::ADRP) | ||||
6508 | return None; | ||||
6509 | |||||
6510 | // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. | ||||
6511 | auto Offset = Adrp.getOperand(1).getOffset(); | ||||
6512 | if (Offset % Size != 0) | ||||
6513 | return None; | ||||
6514 | |||||
6515 | auto GV = Adrp.getOperand(1).getGlobal(); | ||||
6516 | if (GV->isThreadLocal()) | ||||
6517 | return None; | ||||
6518 | |||||
6519 | auto &MF = *RootDef.getParent()->getParent(); | ||||
6520 | if (GV->getPointerAlignment(MF.getDataLayout()) < Size) | ||||
6521 | return None; | ||||
6522 | |||||
6523 | unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget()); | ||||
6524 | MachineIRBuilder MIRBuilder(RootDef); | ||||
6525 | Register AdrpReg = Adrp.getOperand(0).getReg(); | ||||
6526 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); }, | ||||
6527 | [=](MachineInstrBuilder &MIB) { | ||||
6528 | MIB.addGlobalAddress(GV, Offset, | ||||
6529 | OpFlags | AArch64II::MO_PAGEOFF | | ||||
6530 | AArch64II::MO_NC); | ||||
6531 | }}}; | ||||
6532 | } | ||||
6533 | |||||
6534 | /// Select a "register plus scaled unsigned 12-bit immediate" address. The | ||||
6535 | /// "Size" argument is the size in bytes of the memory reference, which | ||||
6536 | /// determines the scale. | ||||
6537 | InstructionSelector::ComplexRendererFns | ||||
6538 | AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, | ||||
6539 | unsigned Size) const { | ||||
6540 | MachineFunction &MF = *Root.getParent()->getParent()->getParent(); | ||||
6541 | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||||
6542 | |||||
6543 | if (!Root.isReg()) | ||||
6544 | return None; | ||||
6545 | |||||
6546 | MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); | ||||
6547 | if (!RootDef) | ||||
6548 | return None; | ||||
6549 | |||||
6550 | if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { | ||||
6551 | return {{ | ||||
6552 | [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, | ||||
6553 | [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, | ||||
6554 | }}; | ||||
6555 | } | ||||
6556 | |||||
6557 | CodeModel::Model CM = MF.getTarget().getCodeModel(); | ||||
6558 | // Check if we can fold in the ADD of small code model ADRP + ADD address. | ||||
6559 | if (CM == CodeModel::Small) { | ||||
6560 | auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI); | ||||
6561 | if (OpFns) | ||||
6562 | return OpFns; | ||||
6563 | } | ||||
6564 | |||||
6565 | if (isBaseWithConstantOffset(Root, MRI)) { | ||||
6566 | MachineOperand &LHS = RootDef->getOperand(1); | ||||
6567 | MachineOperand &RHS = RootDef->getOperand(2); | ||||
6568 | MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); | ||||
6569 | MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); | ||||
6570 | if (LHSDef && RHSDef) { | ||||
6571 | int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); | ||||
6572 | unsigned Scale = Log2_32(Size); | ||||
6573 | if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { | ||||
6574 | if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) | ||||
6575 | return {{ | ||||
6576 | [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, | ||||
6577 | [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, | ||||
6578 | }}; | ||||
6579 | |||||
6580 | return {{ | ||||
6581 | [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, | ||||
6582 | [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, | ||||
6583 | }}; | ||||
6584 | } | ||||
6585 | } | ||||
6586 | } | ||||
6587 | |||||
6588 | // Before falling back to our general case, check if the unscaled | ||||
6589 | // instructions can handle this. If so, that's preferable. | ||||
6590 | if (selectAddrModeUnscaled(Root, Size).hasValue()) | ||||
6591 | return None; | ||||
6592 | |||||
6593 | return {{ | ||||
6594 | [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, | ||||
6595 | [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, | ||||
6596 | }}; | ||||
6597 | } | ||||
6598 | |||||
6599 | /// Given a shift instruction, return the correct shift type for that | ||||
6600 | /// instruction. | ||||
6601 | static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { | ||||
6602 | switch (MI.getOpcode()) { | ||||
6603 | default: | ||||
6604 | return AArch64_AM::InvalidShiftExtend; | ||||
6605 | case TargetOpcode::G_SHL: | ||||
6606 | return AArch64_AM::LSL; | ||||
6607 | case TargetOpcode::G_LSHR: | ||||
6608 | return AArch64_AM::LSR; | ||||
6609 | case TargetOpcode::G_ASHR: | ||||
6610 | return AArch64_AM::ASR; | ||||
6611 | case TargetOpcode::G_ROTR: | ||||
6612 | return AArch64_AM::ROR; | ||||
6613 | } | ||||
6614 | } | ||||
6615 | |||||
6616 | /// Select a "shifted register" operand. If the value is not shifted, set the | ||||
6617 | /// shift operand to a default value of "lsl 0". | ||||
6618 | InstructionSelector::ComplexRendererFns | ||||
6619 | AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root, | ||||
6620 | bool AllowROR) const { | ||||
6621 | if (!Root.isReg()) | ||||
6622 | return None; | ||||
6623 | MachineRegisterInfo &MRI = | ||||
6624 | Root.getParent()->getParent()->getParent()->getRegInfo(); | ||||
6625 | |||||
6626 | // Check if the operand is defined by an instruction which corresponds to | ||||
6627 | // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. | ||||
6628 | MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg()); | ||||
6629 | if (!ShiftInst) | ||||
6630 | return None; | ||||
6631 | AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst); | ||||
6632 | if (ShType == AArch64_AM::InvalidShiftExtend) | ||||
6633 | return None; | ||||
6634 | if (ShType == AArch64_AM::ROR && !AllowROR) | ||||
6635 | return None; | ||||
6636 | if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI)) | ||||
6637 | return None; | ||||
6638 | |||||
6639 | // Need an immediate on the RHS. | ||||
6640 | MachineOperand &ShiftRHS = ShiftInst->getOperand(2); | ||||
6641 | auto Immed = getImmedFromMO(ShiftRHS); | ||||
6642 | if (!Immed) | ||||
6643 | return None; | ||||
6644 | |||||
6645 | // We have something that we can fold. Fold in the shift's LHS and RHS into | ||||
6646 | // the instruction. | ||||
6647 | MachineOperand &ShiftLHS = ShiftInst->getOperand(1); | ||||
6648 | Register ShiftReg = ShiftLHS.getReg(); | ||||
6649 | |||||
6650 | unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits(); | ||||
6651 | unsigned Val = *Immed & (NumBits - 1); | ||||
6652 | unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val); | ||||
6653 | |||||
6654 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); }, | ||||
6655 | [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}}; | ||||
6656 | } | ||||
6657 | |||||
6658 | AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( | ||||
6659 | MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const { | ||||
6660 | unsigned Opc = MI.getOpcode(); | ||||
6661 | |||||
6662 | // Handle explicit extend instructions first. | ||||
6663 | if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { | ||||
6664 | unsigned Size; | ||||
6665 | if (Opc == TargetOpcode::G_SEXT) | ||||
6666 | Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); | ||||
6667 | else | ||||
6668 | Size = MI.getOperand(2).getImm(); | ||||
6669 | assert(Size != 64 && "Extend from 64 bits?")(static_cast <bool> (Size != 64 && "Extend from 64 bits?" ) ? void (0) : __assert_fail ("Size != 64 && \"Extend from 64 bits?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6669, __extension__ __PRETTY_FUNCTION__)); | ||||
6670 | switch (Size) { | ||||
6671 | case 8: | ||||
6672 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB; | ||||
6673 | case 16: | ||||
6674 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH; | ||||
6675 | case 32: | ||||
6676 | return AArch64_AM::SXTW; | ||||
6677 | default: | ||||
6678 | return AArch64_AM::InvalidShiftExtend; | ||||
6679 | } | ||||
6680 | } | ||||
6681 | |||||
6682 | if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { | ||||
6683 | unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); | ||||
6684 | assert(Size != 64 && "Extend from 64 bits?")(static_cast <bool> (Size != 64 && "Extend from 64 bits?" ) ? void (0) : __assert_fail ("Size != 64 && \"Extend from 64 bits?\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6684, __extension__ __PRETTY_FUNCTION__)); | ||||
6685 | switch (Size) { | ||||
6686 | case 8: | ||||
6687 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB; | ||||
6688 | case 16: | ||||
6689 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH; | ||||
6690 | case 32: | ||||
6691 | return AArch64_AM::UXTW; | ||||
6692 | default: | ||||
6693 | return AArch64_AM::InvalidShiftExtend; | ||||
6694 | } | ||||
6695 | } | ||||
6696 | |||||
6697 | // Don't have an explicit extend. Try to handle a G_AND with a constant mask | ||||
6698 | // on the RHS. | ||||
6699 | if (Opc != TargetOpcode::G_AND) | ||||
6700 | return AArch64_AM::InvalidShiftExtend; | ||||
6701 | |||||
6702 | Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2)); | ||||
6703 | if (!MaybeAndMask) | ||||
6704 | return AArch64_AM::InvalidShiftExtend; | ||||
6705 | uint64_t AndMask = *MaybeAndMask; | ||||
6706 | switch (AndMask) { | ||||
6707 | default: | ||||
6708 | return AArch64_AM::InvalidShiftExtend; | ||||
6709 | case 0xFF: | ||||
6710 | return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; | ||||
6711 | case 0xFFFF: | ||||
6712 | return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; | ||||
6713 | case 0xFFFFFFFF: | ||||
6714 | return AArch64_AM::UXTW; | ||||
6715 | } | ||||
6716 | } | ||||
6717 | |||||
6718 | Register AArch64InstructionSelector::moveScalarRegClass( | ||||
6719 | Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const { | ||||
6720 | MachineRegisterInfo &MRI = *MIB.getMRI(); | ||||
6721 | auto Ty = MRI.getType(Reg); | ||||
6722 | assert(!Ty.isVector() && "Expected scalars only!")(static_cast <bool> (!Ty.isVector() && "Expected scalars only!" ) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalars only!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6722, __extension__ __PRETTY_FUNCTION__)); | ||||
6723 | if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC)) | ||||
6724 | return Reg; | ||||
6725 | |||||
6726 | // Create a copy and immediately select it. | ||||
6727 | // FIXME: We should have an emitCopy function? | ||||
6728 | auto Copy = MIB.buildCopy({&RC}, {Reg}); | ||||
6729 | selectCopy(*Copy, TII, MRI, TRI, RBI); | ||||
6730 | return Copy.getReg(0); | ||||
6731 | } | ||||
6732 | |||||
6733 | /// Select an "extended register" operand. This operand folds in an extend | ||||
6734 | /// followed by an optional left shift. | ||||
6735 | InstructionSelector::ComplexRendererFns | ||||
6736 | AArch64InstructionSelector::selectArithExtendedRegister( | ||||
6737 | MachineOperand &Root) const { | ||||
6738 | if (!Root.isReg()) | ||||
6739 | return None; | ||||
6740 | MachineRegisterInfo &MRI = | ||||
6741 | Root.getParent()->getParent()->getParent()->getRegInfo(); | ||||
6742 | |||||
6743 | uint64_t ShiftVal = 0; | ||||
6744 | Register ExtReg; | ||||
6745 | AArch64_AM::ShiftExtendType Ext; | ||||
6746 | MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI); | ||||
6747 | if (!RootDef) | ||||
6748 | return None; | ||||
6749 | |||||
6750 | if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI)) | ||||
6751 | return None; | ||||
6752 | |||||
6753 | // Check if we can fold a shift and an extend. | ||||
6754 | if (RootDef->getOpcode() == TargetOpcode::G_SHL) { | ||||
6755 | // Look for a constant on the RHS of the shift. | ||||
6756 | MachineOperand &RHS = RootDef->getOperand(2); | ||||
6757 | Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS); | ||||
6758 | if (!MaybeShiftVal) | ||||
6759 | return None; | ||||
6760 | ShiftVal = *MaybeShiftVal; | ||||
6761 | if (ShiftVal > 4) | ||||
6762 | return None; | ||||
6763 | // Look for a valid extend instruction on the LHS of the shift. | ||||
6764 | MachineOperand &LHS = RootDef->getOperand(1); | ||||
6765 | MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI); | ||||
6766 | if (!ExtDef) | ||||
6767 | return None; | ||||
6768 | Ext = getExtendTypeForInst(*ExtDef, MRI); | ||||
6769 | if (Ext == AArch64_AM::InvalidShiftExtend) | ||||
6770 | return None; | ||||
6771 | ExtReg = ExtDef->getOperand(1).getReg(); | ||||
6772 | } else { | ||||
6773 | // Didn't get a shift. Try just folding an extend. | ||||
6774 | Ext = getExtendTypeForInst(*RootDef, MRI); | ||||
6775 | if (Ext == AArch64_AM::InvalidShiftExtend) | ||||
6776 | return None; | ||||
6777 | ExtReg = RootDef->getOperand(1).getReg(); | ||||
6778 | |||||
6779 | // If we have a 32 bit instruction which zeroes out the high half of a | ||||
6780 | // register, we get an implicit zero extend for free. Check if we have one. | ||||
6781 | // FIXME: We actually emit the extend right now even though we don't have | ||||
6782 | // to. | ||||
6783 | if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) { | ||||
6784 | MachineInstr *ExtInst = MRI.getVRegDef(ExtReg); | ||||
6785 | if (ExtInst && isDef32(*ExtInst)) | ||||
6786 | return None; | ||||
6787 | } | ||||
6788 | } | ||||
6789 | |||||
6790 | // We require a GPR32 here. Narrow the ExtReg if needed using a subregister | ||||
6791 | // copy. | ||||
6792 | MachineIRBuilder MIB(*RootDef); | ||||
6793 | ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB); | ||||
6794 | |||||
6795 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, | ||||
6796 | [=](MachineInstrBuilder &MIB) { | ||||
6797 | MIB.addImm(getArithExtendImm(Ext, ShiftVal)); | ||||
6798 | }}}; | ||||
6799 | } | ||||
6800 | |||||
6801 | void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, | ||||
6802 | const MachineInstr &MI, | ||||
6803 | int OpIdx) const { | ||||
6804 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); | ||||
6805 | assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6806, __extension__ __PRETTY_FUNCTION__)) | ||||
6806 | "Expected G_CONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6806, __extension__ __PRETTY_FUNCTION__)); | ||||
6807 | Optional<int64_t> CstVal = | ||||
6808 | getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI); | ||||
6809 | assert(CstVal && "Expected constant value")(static_cast <bool> (CstVal && "Expected constant value" ) ? void (0) : __assert_fail ("CstVal && \"Expected constant value\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6809, __extension__ __PRETTY_FUNCTION__)); | ||||
6810 | MIB.addImm(CstVal.getValue()); | ||||
6811 | } | ||||
6812 | |||||
6813 | void AArch64InstructionSelector::renderLogicalImm32( | ||||
6814 | MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { | ||||
6815 | assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6816, __extension__ __PRETTY_FUNCTION__)) | ||||
6816 | "Expected G_CONSTANT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6816, __extension__ __PRETTY_FUNCTION__)); | ||||
6817 | uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); | ||||
6818 | uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32); | ||||
6819 | MIB.addImm(Enc); | ||||
6820 | } | ||||
6821 | |||||
6822 | void AArch64InstructionSelector::renderLogicalImm64( | ||||
6823 | MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { | ||||
6824 | assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6825, __extension__ __PRETTY_FUNCTION__)) | ||||
6825 | "Expected G_CONSTANT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6825, __extension__ __PRETTY_FUNCTION__)); | ||||
6826 | uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); | ||||
6827 | uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64); | ||||
6828 | MIB.addImm(Enc); | ||||
6829 | } | ||||
6830 | |||||
6831 | void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB, | ||||
6832 | const MachineInstr &MI, | ||||
6833 | int OpIdx) const { | ||||
6834 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6835, __extension__ __PRETTY_FUNCTION__)) | ||||
6835 | "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6835, __extension__ __PRETTY_FUNCTION__)); | ||||
6836 | MIB.addImm( | ||||
6837 | AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF())); | ||||
6838 | } | ||||
6839 | |||||
6840 | void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB, | ||||
6841 | const MachineInstr &MI, | ||||
6842 | int OpIdx) const { | ||||
6843 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6844, __extension__ __PRETTY_FUNCTION__)) | ||||
6844 | "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6844, __extension__ __PRETTY_FUNCTION__)); | ||||
6845 | MIB.addImm( | ||||
6846 | AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF())); | ||||
6847 | } | ||||
6848 | |||||
6849 | void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB, | ||||
6850 | const MachineInstr &MI, | ||||
6851 | int OpIdx) const { | ||||
6852 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6853, __extension__ __PRETTY_FUNCTION__)) | ||||
6853 | "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6853, __extension__ __PRETTY_FUNCTION__)); | ||||
6854 | MIB.addImm( | ||||
6855 | AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF())); | ||||
6856 | } | ||||
6857 | |||||
6858 | void AArch64InstructionSelector::renderFPImm32SIMDModImmType4( | ||||
6859 | MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { | ||||
6860 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6861, __extension__ __PRETTY_FUNCTION__)) | ||||
6861 | "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6861, __extension__ __PRETTY_FUNCTION__)); | ||||
6862 | MIB.addImm(AArch64_AM::encodeAdvSIMDModImmType4(MI.getOperand(1) | ||||
6863 | .getFPImm() | ||||
6864 | ->getValueAPF() | ||||
6865 | .bitcastToAPInt() | ||||
6866 | .getZExtValue())); | ||||
6867 | } | ||||
6868 | |||||
6869 | bool AArch64InstructionSelector::isLoadStoreOfNumBytes( | ||||
6870 | const MachineInstr &MI, unsigned NumBytes) const { | ||||
6871 | if (!MI.mayLoadOrStore()) | ||||
6872 | return false; | ||||
6873 | assert(MI.hasOneMemOperand() &&(static_cast <bool> (MI.hasOneMemOperand() && "Expected load/store to have only one mem op!" ) ? void (0) : __assert_fail ("MI.hasOneMemOperand() && \"Expected load/store to have only one mem op!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6874, __extension__ __PRETTY_FUNCTION__)) | ||||
6874 | "Expected load/store to have only one mem op!")(static_cast <bool> (MI.hasOneMemOperand() && "Expected load/store to have only one mem op!" ) ? void (0) : __assert_fail ("MI.hasOneMemOperand() && \"Expected load/store to have only one mem op!\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6874, __extension__ __PRETTY_FUNCTION__)); | ||||
6875 | return (*MI.memoperands_begin())->getSize() == NumBytes; | ||||
6876 | } | ||||
6877 | |||||
6878 | bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { | ||||
6879 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); | ||||
6880 | if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32) | ||||
6881 | return false; | ||||
6882 | |||||
6883 | // Only return true if we know the operation will zero-out the high half of | ||||
6884 | // the 64-bit register. Truncates can be subregister copies, which don't | ||||
6885 | // zero out the high bits. Copies and other copy-like instructions can be | ||||
6886 | // fed by truncates, or could be lowered as subregister copies. | ||||
6887 | switch (MI.getOpcode()) { | ||||
6888 | default: | ||||
6889 | return true; | ||||
6890 | case TargetOpcode::COPY: | ||||
6891 | case TargetOpcode::G_BITCAST: | ||||
6892 | case TargetOpcode::G_TRUNC: | ||||
6893 | case TargetOpcode::G_PHI: | ||||
6894 | return false; | ||||
6895 | } | ||||
6896 | } | ||||
6897 | |||||
6898 | |||||
6899 | // Perform fixups on the given PHI instruction's operands to force them all | ||||
6900 | // to be the same as the destination regbank. | ||||
6901 | static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, | ||||
6902 | const AArch64RegisterBankInfo &RBI) { | ||||
6903 | assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_PHI && \"Expected a G_PHI\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6903, __extension__ __PRETTY_FUNCTION__)); | ||||
6904 | Register DstReg = MI.getOperand(0).getReg(); | ||||
6905 | const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg); | ||||
6906 | assert(DstRB && "Expected PHI dst to have regbank assigned")(static_cast <bool> (DstRB && "Expected PHI dst to have regbank assigned" ) ? void (0) : __assert_fail ("DstRB && \"Expected PHI dst to have regbank assigned\"" , "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6906, __extension__ __PRETTY_FUNCTION__)); | ||||
6907 | MachineIRBuilder MIB(MI); | ||||
6908 | |||||
6909 | // Go through each operand and ensure it has the same regbank. | ||||
6910 | for (MachineOperand &MO : llvm::drop_begin(MI.operands())) { | ||||
6911 | if (!MO.isReg()) | ||||
6912 | continue; | ||||
6913 | Register OpReg = MO.getReg(); | ||||
6914 | const RegisterBank *RB = MRI.getRegBankOrNull(OpReg); | ||||
6915 | if (RB != DstRB) { | ||||
6916 | // Insert a cross-bank copy. | ||||
6917 | auto *OpDef = MRI.getVRegDef(OpReg); | ||||
6918 | const LLT &Ty = MRI.getType(OpReg); | ||||
6919 | MachineBasicBlock &OpDefBB = *OpDef->getParent(); | ||||
6920 | |||||
6921 | // Any instruction we insert must appear after all PHIs in the block | ||||
6922 | // for the block to be valid MIR. | ||||
6923 | MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator()); | ||||
6924 | if (InsertPt != OpDefBB.end() && InsertPt->isPHI()) | ||||
6925 | InsertPt = OpDefBB.getFirstNonPHI(); | ||||
6926 | MIB.setInsertPt(*OpDef->getParent(), InsertPt); | ||||
6927 | auto Copy = MIB.buildCopy(Ty, OpReg); | ||||
6928 | MRI.setRegBank(Copy.getReg(0), *DstRB); | ||||
6929 | MO.setReg(Copy.getReg(0)); | ||||
6930 | } | ||||
6931 | } | ||||
6932 | } | ||||
6933 | |||||
6934 | void AArch64InstructionSelector::processPHIs(MachineFunction &MF) { | ||||
6935 | // We're looking for PHIs, build a list so we don't invalidate iterators. | ||||
6936 | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||||
6937 | SmallVector<MachineInstr *, 32> Phis; | ||||
6938 | for (auto &BB : MF) { | ||||
6939 | for (auto &MI : BB) { | ||||
6940 | if (MI.getOpcode() == TargetOpcode::G_PHI) | ||||
6941 | Phis.emplace_back(&MI); | ||||
6942 | } | ||||
6943 | } | ||||
6944 | |||||
6945 | for (auto *MI : Phis) { | ||||
6946 | // We need to do some work here if the operand types are < 16 bit and they | ||||
6947 | // are split across fpr/gpr banks. Since all types <32b on gpr | ||||
6948 | // end up being assigned gpr32 regclasses, we can end up with PHIs here | ||||
6949 | // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't | ||||
6950 | // be selecting heterogenous regbanks for operands if possible, but we | ||||
6951 | // still need to be able to deal with it here. | ||||
6952 | // | ||||
6953 | // To fix this, if we have a gpr-bank operand < 32b in size and at least | ||||
6954 | // one other operand is on the fpr bank, then we add cross-bank copies | ||||
6955 | // to homogenize the operand banks. For simplicity the bank that we choose | ||||
6956 | // to settle on is whatever bank the def operand has. For example: | ||||
6957 | // | ||||
6958 | // %endbb: | ||||
6959 | // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2 | ||||
6960 | // => | ||||
6961 | // %bb2: | ||||
6962 | // ... | ||||
6963 | // %in2_copy:gpr(s16) = COPY %in2:fpr(s16) | ||||
6964 | // ... | ||||
6965 | // %endbb: | ||||
6966 | // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2 | ||||
6967 | bool HasGPROp = false, HasFPROp = false; | ||||
6968 | for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) { | ||||
6969 | if (!MO.isReg()) | ||||
6970 | continue; | ||||
6971 | const LLT &Ty = MRI.getType(MO.getReg()); | ||||
6972 | if (!Ty.isValid() || !Ty.isScalar()) | ||||
6973 | break; | ||||
6974 | if (Ty.getSizeInBits() >= 32) | ||||
6975 | break; | ||||
6976 | const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()); | ||||
6977 | // If for some reason we don't have a regbank yet. Don't try anything. | ||||
6978 | if (!RB) | ||||
6979 | break; | ||||
6980 | |||||
6981 | if (RB->getID() == AArch64::GPRRegBankID) | ||||
6982 | HasGPROp = true; | ||||
6983 | else | ||||
6984 | HasFPROp = true; | ||||
6985 | } | ||||
6986 | // We have heterogenous regbanks, need to fixup. | ||||
6987 | if (HasGPROp && HasFPROp) | ||||
6988 | fixupPHIOpBanks(*MI, MRI, RBI); | ||||
6989 | } | ||||
6990 | } | ||||
6991 | |||||
6992 | namespace llvm { | ||||
6993 | InstructionSelector * | ||||
6994 | createAArch64InstructionSelector(const AArch64TargetMachine &TM, | ||||
6995 | AArch64Subtarget &Subtarget, | ||||
6996 | AArch64RegisterBankInfo &RBI) { | ||||
6997 | return new AArch64InstructionSelector(TM, Subtarget, RBI); | ||||
6998 | } | ||||
6999 | } |
1 | // <tuple> -*- C++ -*- |
2 | |
3 | // Copyright (C) 2007-2020 Free Software Foundation, Inc. |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free |
6 | // software; you can redistribute it and/or modify it under the |
7 | // terms of the GNU General Public License as published by the |
8 | // Free Software Foundation; either version 3, or (at your option) |
9 | // any later version. |
10 | |
11 | // This library is distributed in the hope that it will be useful, |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | // GNU General Public License for more details. |
15 | |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version |
18 | // 3.1, as published by the Free Software Foundation. |
19 | |
20 | // You should have received a copy of the GNU General Public License and |
21 | // a copy of the GCC Runtime Library Exception along with this program; |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
23 | // <http://www.gnu.org/licenses/>. |
24 | |
25 | /** @file include/tuple |
26 | * This is a Standard C++ Library header. |
27 | */ |
28 | |
29 | #ifndef _GLIBCXX_TUPLE1 |
30 | #define _GLIBCXX_TUPLE1 1 |
31 | |
32 | #pragma GCC system_header |
33 | |
34 | #if __cplusplus201402L < 201103L |
35 | # include <bits/c++0x_warning.h> |
36 | #else |
37 | |
38 | #include <utility> |
39 | #include <array> |
40 | #include <bits/uses_allocator.h> |
41 | #include <bits/invoke.h> |
42 | #if __cplusplus201402L > 201703L |
43 | # include <compare> |
44 | # define __cpp_lib_constexpr_tuple 201811L |
45 | #endif |
46 | |
47 | namespace std _GLIBCXX_VISIBILITY(default)__attribute__ ((__visibility__ ("default"))) |
48 | { |
49 | _GLIBCXX_BEGIN_NAMESPACE_VERSION |
50 | |
51 | /** |
52 | * @addtogroup utilities |
53 | * @{ |
54 | */ |
55 | |
56 | template<typename... _Elements> |
57 | class tuple; |
58 | |
59 | template<typename _Tp> |
60 | struct __is_empty_non_tuple : is_empty<_Tp> { }; |
61 | |
62 | // Using EBO for elements that are tuples causes ambiguous base errors. |
63 | template<typename _El0, typename... _El> |
64 | struct __is_empty_non_tuple<tuple<_El0, _El...>> : false_type { }; |
65 | |
66 | // Use the Empty Base-class Optimization for empty, non-final types. |
67 | template<typename _Tp> |
68 | using __empty_not_final |
69 | = typename conditional<__is_final(_Tp), false_type, |
70 | __is_empty_non_tuple<_Tp>>::type; |
71 | |
72 | template<std::size_t _Idx, typename _Head, |
73 | bool = __empty_not_final<_Head>::value> |
74 | struct _Head_base; |
75 | |
76 | template<std::size_t _Idx, typename _Head> |
77 | struct _Head_base<_Idx, _Head, true> |
78 | : public _Head |
79 | { |
80 | constexpr _Head_base() |
81 | : _Head() { } |
82 | |
83 | constexpr _Head_base(const _Head& __h) |
84 | : _Head(__h) { } |
85 | |
86 | constexpr _Head_base(const _Head_base&) = default; |
87 | constexpr _Head_base(_Head_base&&) = default; |
88 | |
89 | template<typename _UHead> |
90 | constexpr _Head_base(_UHead&& __h) |
91 | : _Head(std::forward<_UHead>(__h)) { } |
92 | |
93 | _Head_base(allocator_arg_t, __uses_alloc0) |
94 | : _Head() { } |
95 | |
96 | template<typename _Alloc> |
97 | _Head_base(allocator_arg_t, __uses_alloc1<_Alloc> __a) |
98 | : _Head(allocator_arg, *__a._M_a) { } |
99 | |
100 | template<typename _Alloc> |
101 | _Head_base(allocator_arg_t, __uses_alloc2<_Alloc> __a) |
102 | : _Head(*__a._M_a) { } |
103 | |
104 | template<typename _UHead> |
105 | _Head_base(__uses_alloc0, _UHead&& __uhead) |
106 | : _Head(std::forward<_UHead>(__uhead)) { } |
107 | |
108 | template<typename _Alloc, typename _UHead> |
109 | _Head_base(__uses_alloc1<_Alloc> __a, _UHead&& __uhead) |
110 | : _Head(allocator_arg, *__a._M_a, std::forward<_UHead>(__uhead)) { } |
111 | |
112 | template<typename _Alloc, typename _UHead> |
113 | _Head_base(__uses_alloc2<_Alloc> __a, _UHead&& __uhead) |
114 | : _Head(std::forward<_UHead>(__uhead), *__a._M_a) { } |
115 | |
116 | static constexpr _Head& |
117 | _M_head(_Head_base& __b) noexcept { return __b; } |
118 | |
119 | static constexpr const _Head& |
120 | _M_head(const _Head_base& __b) noexcept { return __b; } |
121 | }; |
122 | |
123 | template<std::size_t _Idx, typename _Head> |
124 | struct _Head_base<_Idx, _Head, false> |
125 | { |
126 | constexpr _Head_base() |
127 | : _M_head_impl() { } |
128 | |
129 | constexpr _Head_base(const _Head& __h) |
130 | : _M_head_impl(__h) { } |
131 | |
132 | constexpr _Head_base(const _Head_base&) = default; |
133 | constexpr _Head_base(_Head_base&&) = default; |
134 | |
135 | template<typename _UHead> |
136 | constexpr _Head_base(_UHead&& __h) |
137 | : _M_head_impl(std::forward<_UHead>(__h)) { } |
138 | |
139 | _GLIBCXX20_CONSTEXPR |
140 | _Head_base(allocator_arg_t, __uses_alloc0) |
141 | : _M_head_impl() { } |
142 | |
143 | template<typename _Alloc> |
144 | _Head_base(allocator_arg_t, __uses_alloc1<_Alloc> __a) |
145 | : _M_head_impl(allocator_arg, *__a._M_a) { } |
146 | |
147 | template<typename _Alloc> |
148 | _Head_base(allocator_arg_t, __uses_alloc2<_Alloc> __a) |
149 | : _M_head_impl(*__a._M_a) { } |
150 | |
151 | template<typename _UHead> |
152 | _GLIBCXX20_CONSTEXPR |
153 | _Head_base(__uses_alloc0, _UHead&& __uhead) |
154 | : _M_head_impl(std::forward<_UHead>(__uhead)) { } |
155 | |
156 | template<typename _Alloc, typename _UHead> |
157 | _Head_base(__uses_alloc1<_Alloc> __a, _UHead&& __uhead) |
158 | : _M_head_impl(allocator_arg, *__a._M_a, std::forward<_UHead>(__uhead)) |
159 | { } |
160 | |
161 | template<typename _Alloc, typename _UHead> |
162 | _Head_base(__uses_alloc2<_Alloc> __a, _UHead&& __uhead) |
163 | : _M_head_impl(std::forward<_UHead>(__uhead), *__a._M_a) { } |
164 | |
165 | static constexpr _Head& |
166 | _M_head(_Head_base& __b) noexcept { return __b._M_head_impl; } |
167 | |
168 | static constexpr const _Head& |
169 | _M_head(const _Head_base& __b) noexcept { return __b._M_head_impl; } |
170 | |
171 | _Head _M_head_impl; |
172 | }; |
173 | |
174 | /** |
175 | * Contains the actual implementation of the @c tuple template, stored |
176 | * as a recursive inheritance hierarchy from the first element (most |
177 | * derived class) to the last (least derived class). The @c Idx |
178 | * parameter gives the 0-based index of the element stored at this |
179 | * point in the hierarchy; we use it to implement a constant-time |
180 | * get() operation. |
181 | */ |
182 | template<std::size_t _Idx, typename... _Elements> |
183 | struct _Tuple_impl; |
184 | |
185 | /** |
186 | * Recursive tuple implementation. Here we store the @c Head element |
187 | * and derive from a @c Tuple_impl containing the remaining elements |
188 | * (which contains the @c Tail). |
189 | */ |
190 | template<std::size_t _Idx, typename _Head, typename... _Tail> |
191 | struct _Tuple_impl<_Idx, _Head, _Tail...> |
192 | : public _Tuple_impl<_Idx + 1, _Tail...>, |
193 | private _Head_base<_Idx, _Head> |
194 | { |
195 | template<std::size_t, typename...> friend class _Tuple_impl; |
196 | |
197 | typedef _Tuple_impl<_Idx + 1, _Tail...> _Inherited; |
198 | typedef _Head_base<_Idx, _Head> _Base; |
199 | |
200 | static constexpr _Head& |
201 | _M_head(_Tuple_impl& __t) noexcept { return _Base::_M_head(__t); } |
202 | |
203 | static constexpr const _Head& |
204 | _M_head(const _Tuple_impl& __t) noexcept { return _Base::_M_head(__t); } |
205 | |
206 | static constexpr _Inherited& |
207 | _M_tail(_Tuple_impl& __t) noexcept { return __t; } |
208 | |
209 | static constexpr const _Inherited& |
210 | _M_tail(const _Tuple_impl& __t) noexcept { return __t; } |
211 | |
212 | constexpr _Tuple_impl() |
213 | : _Inherited(), _Base() { } |
214 | |
215 | explicit |
216 | constexpr _Tuple_impl(const _Head& __head, const _Tail&... __tail) |
217 | : _Inherited(__tail...), _Base(__head) { } |
218 | |
219 | template<typename _UHead, typename... _UTail, typename = typename |
220 | enable_if<sizeof...(_Tail) == sizeof...(_UTail)>::type> |
221 | explicit |
222 | constexpr _Tuple_impl(_UHead&& __head, _UTail&&... __tail) |
223 | : _Inherited(std::forward<_UTail>(__tail)...), |
224 | _Base(std::forward<_UHead>(__head)) { } |
225 | |
226 | constexpr _Tuple_impl(const _Tuple_impl&) = default; |
227 | |
228 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
229 | // 2729. Missing SFINAE on std::pair::operator= |
230 | _Tuple_impl& operator=(const _Tuple_impl&) = delete; |
231 | |
232 | constexpr |
233 | _Tuple_impl(_Tuple_impl&& __in) |
234 | noexcept(__and_<is_nothrow_move_constructible<_Head>, |
235 | is_nothrow_move_constructible<_Inherited>>::value) |
236 | : _Inherited(std::move(_M_tail(__in))), |
237 | _Base(std::forward<_Head>(_M_head(__in))) { } |
238 | |
239 | template<typename... _UElements> |
240 | constexpr _Tuple_impl(const _Tuple_impl<_Idx, _UElements...>& __in) |
241 | : _Inherited(_Tuple_impl<_Idx, _UElements...>::_M_tail(__in)), |
242 | _Base(_Tuple_impl<_Idx, _UElements...>::_M_head(__in)) { } |
243 | |
244 | template<typename _UHead, typename... _UTails> |
245 | constexpr _Tuple_impl(_Tuple_impl<_Idx, _UHead, _UTails...>&& __in) |
246 | : _Inherited(std::move |
247 | (_Tuple_impl<_Idx, _UHead, _UTails...>::_M_tail(__in))), |
248 | _Base(std::forward<_UHead> |
249 | (_Tuple_impl<_Idx, _UHead, _UTails...>::_M_head(__in))) { } |
250 | |
251 | template<typename _Alloc> |
252 | _GLIBCXX20_CONSTEXPR |
253 | _Tuple_impl(allocator_arg_t __tag, const _Alloc& __a) |
254 | : _Inherited(__tag, __a), |
255 | _Base(__tag, __use_alloc<_Head>(__a)) { } |
256 | |
257 | template<typename _Alloc> |
258 | _Tuple_impl(allocator_arg_t __tag, const _Alloc& __a, |
259 | const _Head& __head, const _Tail&... __tail) |
260 | : _Inherited(__tag, __a, __tail...), |
261 | _Base(__use_alloc<_Head, _Alloc, _Head>(__a), __head) { } |
262 | |
263 | template<typename _Alloc, typename _UHead, typename... _UTail, |
264 | typename = typename enable_if<sizeof...(_Tail) |
265 | == sizeof...(_UTail)>::type> |
266 | _GLIBCXX20_CONSTEXPR |
267 | _Tuple_impl(allocator_arg_t __tag, const _Alloc& __a, |
268 | _UHead&& __head, _UTail&&... __tail) |
269 | : _Inherited(__tag, __a, std::forward<_UTail>(__tail)...), |
270 | _Base(__use_alloc<_Head, _Alloc, _UHead>(__a), |
271 | std::forward<_UHead>(__head)) { } |
272 | |
273 | template<typename _Alloc> |
274 | _GLIBCXX20_CONSTEXPR |
275 | _Tuple_impl(allocator_arg_t __tag, const _Alloc& __a, |
276 | const _Tuple_impl& __in) |
277 | : _Inherited(__tag, __a, _M_tail(__in)), |
278 | _Base(__use_alloc<_Head, _Alloc, _Head>(__a), _M_head(__in)) { } |
279 | |
280 | template<typename _Alloc> |
281 | _GLIBCXX20_CONSTEXPR |
282 | _Tuple_impl(allocator_arg_t __tag, const _Alloc& __a, |
283 | _Tuple_impl&& __in) |
284 | : _Inherited(__tag, __a, std::move(_M_tail(__in))), |
285 | _Base(__use_alloc<_Head, _Alloc, _Head>(__a), |
286 | std::forward<_Head>(_M_head(__in))) { } |
287 | |
288 | template<typename _Alloc, typename _UHead, typename... _UTails> |
289 | _GLIBCXX20_CONSTEXPR |
290 | _Tuple_impl(allocator_arg_t __tag, const _Alloc& __a, |
291 | const _Tuple_impl<_Idx, _UHead, _UTails...>& __in) |
292 | : _Inherited(__tag, __a, |
293 | _Tuple_impl<_Idx, _UHead, _UTails...>::_M_tail(__in)), |
294 | _Base(__use_alloc<_Head, _Alloc, const _UHead&>(__a), |
295 | _Tuple_impl<_Idx, _UHead, _UTails...>::_M_head(__in)) { } |
296 | |
297 | template<typename _Alloc, typename _UHead, typename... _UTails> |
298 | _GLIBCXX20_CONSTEXPR |
299 | _Tuple_impl(allocator_arg_t __tag, const _Alloc& __a, |
300 | _Tuple_impl<_Idx, _UHead, _UTails...>&& __in) |
301 | : _Inherited(__tag, __a, std::move |
302 | (_Tuple_impl<_Idx, _UHead, _UTails...>::_M_tail(__in))), |
303 | _Base(__use_alloc<_Head, _Alloc, _UHead>(__a), |
304 | std::forward<_UHead> |
305 | (_Tuple_impl<_Idx, _UHead, _UTails...>::_M_head(__in))) { } |
306 | |
307 | template<typename... _UElements> |
308 | _GLIBCXX20_CONSTEXPR |
309 | void |
310 | _M_assign(const _Tuple_impl<_Idx, _UElements...>& __in) |
311 | { |
312 | _M_head(*this) = _Tuple_impl<_Idx, _UElements...>::_M_head(__in); |
313 | _M_tail(*this)._M_assign( |
314 | _Tuple_impl<_Idx, _UElements...>::_M_tail(__in)); |
315 | } |
316 | |
317 | template<typename _UHead, typename... _UTails> |
318 | _GLIBCXX20_CONSTEXPR |
319 | void |
320 | _M_assign(_Tuple_impl<_Idx, _UHead, _UTails...>&& __in) |
321 | { |
322 | _M_head(*this) = std::forward<_UHead> |
323 | (_Tuple_impl<_Idx, _UHead, _UTails...>::_M_head(__in)); |
324 | _M_tail(*this)._M_assign( |
325 | std::move(_Tuple_impl<_Idx, _UHead, _UTails...>::_M_tail(__in))); |
326 | } |
327 | |
328 | protected: |
329 | _GLIBCXX20_CONSTEXPR |
330 | void |
331 | _M_swap(_Tuple_impl& __in) |
332 | { |
333 | using std::swap; |
334 | swap(_M_head(*this), _M_head(__in)); |
335 | _Inherited::_M_swap(_M_tail(__in)); |
336 | } |
337 | }; |
338 | |
339 | // Basis case of inheritance recursion. |
340 | template<std::size_t _Idx, typename _Head> |
341 | struct _Tuple_impl<_Idx, _Head> |
342 | : private _Head_base<_Idx, _Head> |
343 | { |
344 | template<std::size_t, typename...> friend class _Tuple_impl; |
345 | |
346 | typedef _Head_base<_Idx, _Head> _Base; |
347 | |
348 | static constexpr _Head& |
349 | _M_head(_Tuple_impl& __t) noexcept { return _Base::_M_head(__t); } |
350 | |
351 | static constexpr const _Head& |
352 | _M_head(const _Tuple_impl& __t) noexcept { return _Base::_M_head(__t); } |
353 | |
354 | constexpr _Tuple_impl() |
355 | : _Base() { } |
356 | |
357 | explicit |
358 | constexpr _Tuple_impl(const _Head& __head) |
359 | : _Base(__head) { } |
360 | |
361 | template<typename _UHead> |
362 | explicit |
363 | constexpr _Tuple_impl(_UHead&& __head) |
364 | : _Base(std::forward<_UHead>(__head)) { } |
365 | |
366 | constexpr _Tuple_impl(const _Tuple_impl&) = default; |
367 | |
368 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
369 | // 2729. Missing SFINAE on std::pair::operator= |
370 | _Tuple_impl& operator=(const _Tuple_impl&) = delete; |
371 | |
372 | constexpr |
373 | _Tuple_impl(_Tuple_impl&& __in) |
374 | noexcept(is_nothrow_move_constructible<_Head>::value) |
375 | : _Base(std::forward<_Head>(_M_head(__in))) { } |
376 | |
377 | template<typename _UHead> |
378 | constexpr _Tuple_impl(const _Tuple_impl<_Idx, _UHead>& __in) |
379 | : _Base(_Tuple_impl<_Idx, _UHead>::_M_head(__in)) { } |
380 | |
381 | template<typename _UHead> |
382 | constexpr _Tuple_impl(_Tuple_impl<_Idx, _UHead>&& __in) |
383 | : _Base(std::forward<_UHead>(_Tuple_impl<_Idx, _UHead>::_M_head(__in))) |
384 | { } |
385 | |
386 | template<typename _Alloc> |
387 | _GLIBCXX20_CONSTEXPR |
388 | _Tuple_impl(allocator_arg_t __tag, const _Alloc& __a) |
389 | : _Base(__tag, __use_alloc<_Head>(__a)) { } |
390 | |
391 | template<typename _Alloc> |
392 | _Tuple_impl(allocator_arg_t __tag, const _Alloc& __a, |
393 | const _Head& __head) |
394 | : _Base(__use_alloc<_Head, _Alloc, _Head>(__a), __head) { } |
395 | |
396 | template<typename _Alloc, typename _UHead> |
397 | _GLIBCXX20_CONSTEXPR |
398 | _Tuple_impl(allocator_arg_t __tag, const _Alloc& __a, |
399 | _UHead&& __head) |
400 | : _Base(__use_alloc<_Head, _Alloc, _UHead>(__a), |
401 | std::forward<_UHead>(__head)) { } |
402 | |
403 | template<typename _Alloc> |
404 | _GLIBCXX20_CONSTEXPR |
405 | _Tuple_impl(allocator_arg_t __tag, const _Alloc& __a, |
406 | const _Tuple_impl& __in) |
407 | : _Base(__use_alloc<_Head, _Alloc, _Head>(__a), _M_head(__in)) { } |
408 | |
409 | template<typename _Alloc> |
410 | _GLIBCXX20_CONSTEXPR |
411 | _Tuple_impl(allocator_arg_t __tag, const _Alloc& __a, |
412 | _Tuple_impl&& __in) |
413 | : _Base(__use_alloc<_Head, _Alloc, _Head>(__a), |
414 | std::forward<_Head>(_M_head(__in))) { } |
415 | |
416 | template<typename _Alloc, typename _UHead> |
417 | _GLIBCXX20_CONSTEXPR |
418 | _Tuple_impl(allocator_arg_t __tag, const _Alloc& __a, |
419 | const _Tuple_impl<_Idx, _UHead>& __in) |
420 | : _Base(__use_alloc<_Head, _Alloc, const _UHead&>(__a), |
421 | _Tuple_impl<_Idx, _UHead>::_M_head(__in)) { } |
422 | |
423 | template<typename _Alloc, typename _UHead> |
424 | _GLIBCXX20_CONSTEXPR |
425 | _Tuple_impl(allocator_arg_t __tag, const _Alloc& __a, |
426 | _Tuple_impl<_Idx, _UHead>&& __in) |
427 | : _Base(__use_alloc<_Head, _Alloc, _UHead>(__a), |
428 | std::forward<_UHead>(_Tuple_impl<_Idx, _UHead>::_M_head(__in))) |
429 | { } |
430 | |
431 | template<typename _UHead> |
432 | _GLIBCXX20_CONSTEXPR |
433 | void |
434 | _M_assign(const _Tuple_impl<_Idx, _UHead>& __in) |
435 | { |
436 | _M_head(*this) = _Tuple_impl<_Idx, _UHead>::_M_head(__in); |
437 | } |
438 | |
439 | template<typename _UHead> |
440 | _GLIBCXX20_CONSTEXPR |
441 | void |
442 | _M_assign(_Tuple_impl<_Idx, _UHead>&& __in) |
443 | { |
444 | _M_head(*this) |
445 | = std::forward<_UHead>(_Tuple_impl<_Idx, _UHead>::_M_head(__in)); |
446 | } |
447 | |
448 | protected: |
449 | _GLIBCXX20_CONSTEXPR |
450 | void |
451 | _M_swap(_Tuple_impl& __in) |
452 | { |
453 | using std::swap; |
454 | swap(_M_head(*this), _M_head(__in)); |
455 | } |
456 | }; |
457 | |
458 | // Concept utility functions, reused in conditionally-explicit |
459 | // constructors. |
460 | template<bool, typename... _Types> |
461 | struct _TupleConstraints |
462 | { |
463 | // Constraint for a non-explicit constructor. |
464 | // True iff each Ti in _Types... can be constructed from Ui in _UTypes... |
465 | // and every Ui is implicitly convertible to Ti. |
466 | template<typename... _UTypes> |
467 | static constexpr bool __is_implicitly_constructible() |
468 | { |
469 | return __and_<is_constructible<_Types, _UTypes>..., |
470 | is_convertible<_UTypes, _Types>... |
471 | >::value; |
472 | } |
473 | |
474 | // Constraint for a non-explicit constructor. |
475 | // True iff each Ti in _Types... can be constructed from Ui in _UTypes... |
476 | // but not every Ui is implicitly convertible to Ti. |
477 | template<typename... _UTypes> |
478 | static constexpr bool __is_explicitly_constructible() |
479 | { |
480 | return __and_<is_constructible<_Types, _UTypes>..., |
481 | __not_<__and_<is_convertible<_UTypes, _Types>...>> |
482 | >::value; |
483 | } |
484 | |
485 | static constexpr bool __is_implicitly_default_constructible() |
486 | { |
487 | return __and_<std::__is_implicitly_default_constructible<_Types>... |
488 | >::value; |
489 | } |
490 | |
491 | static constexpr bool __is_explicitly_default_constructible() |
492 | { |
493 | return __and_<is_default_constructible<_Types>..., |
494 | __not_<__and_< |
495 | std::__is_implicitly_default_constructible<_Types>...> |
496 | >>::value; |
497 | } |
498 | }; |
499 | |
500 | // Partial specialization used when a required precondition isn't met, |
501 | // e.g. when sizeof...(_Types) != sizeof...(_UTypes). |
502 | template<typename... _Types> |
503 | struct _TupleConstraints<false, _Types...> |
504 | { |
505 | template<typename... _UTypes> |
506 | static constexpr bool __is_implicitly_constructible() |
507 | { return false; } |
508 | |
509 | template<typename... _UTypes> |
510 | static constexpr bool __is_explicitly_constructible() |
511 | { return false; } |
512 | }; |
513 | |
514 | /// Primary class template, tuple |
515 | template<typename... _Elements> |
516 | class tuple : public _Tuple_impl<0, _Elements...> |
517 | { |
518 | typedef _Tuple_impl<0, _Elements...> _Inherited; |
519 | |
520 | template<bool _Cond> |
521 | using _TCC = _TupleConstraints<_Cond, _Elements...>; |
522 | |
523 | // Constraint for non-explicit default constructor |
524 | template<bool _Dummy> |
525 | using _ImplicitDefaultCtor = __enable_if_t< |
526 | _TCC<_Dummy>::__is_implicitly_default_constructible(), |
527 | bool>; |
528 | |
529 | // Constraint for explicit default constructor |
530 | template<bool _Dummy> |
531 | using _ExplicitDefaultCtor = __enable_if_t< |
532 | _TCC<_Dummy>::__is_explicitly_default_constructible(), |
533 | bool>; |
534 | |
535 | // Constraint for non-explicit constructors |
536 | template<bool _Cond, typename... _Args> |
537 | using _ImplicitCtor = __enable_if_t< |
538 | _TCC<_Cond>::template __is_implicitly_constructible<_Args...>(), |
539 | bool>; |
540 | |
541 | // Constraint for non-explicit constructors |
542 | template<bool _Cond, typename... _Args> |
543 | using _ExplicitCtor = __enable_if_t< |
544 | _TCC<_Cond>::template __is_explicitly_constructible<_Args...>(), |
545 | bool>; |
546 | |
547 | template<typename... _UElements> |
548 | static constexpr |
549 | __enable_if_t<sizeof...(_UElements) == sizeof...(_Elements), bool> |
550 | __assignable() |
551 | { return __and_<is_assignable<_Elements&, _UElements>...>::value; } |
552 | |
553 | // Condition for noexcept-specifier of an assignment operator. |
554 | template<typename... _UElements> |
555 | static constexpr bool __nothrow_assignable() |
556 | { |
557 | return |
558 | __and_<is_nothrow_assignable<_Elements&, _UElements>...>::value; |
559 | } |
560 | |
561 | // Condition for noexcept-specifier of a constructor. |
562 | template<typename... _UElements> |
563 | static constexpr bool __nothrow_constructible() |
564 | { |
565 | return |
566 | __and_<is_nothrow_constructible<_Elements, _UElements>...>::value; |
567 | } |
568 | |
569 | // Constraint for tuple(_UTypes&&...) where sizeof...(_UTypes) == 1. |
570 | template<typename _Up> |
571 | static constexpr bool __valid_args() |
572 | { |
573 | return sizeof...(_Elements) == 1 |
574 | && !is_same<tuple, __remove_cvref_t<_Up>>::value; |
575 | } |
576 | |
577 | // Constraint for tuple(_UTypes&&...) where sizeof...(_UTypes) > 1. |
578 | template<typename, typename, typename... _Tail> |
579 | static constexpr bool __valid_args() |
580 | { return (sizeof...(_Tail) + 2) == sizeof...(_Elements); } |
581 | |
582 | /* Constraint for constructors with a tuple<UTypes...> parameter ensures |
583 | * that the constructor is only viable when it would not interfere with |
584 | * tuple(UTypes&&...) or tuple(const tuple&) or tuple(tuple&&). |
585 | * Such constructors are only viable if: |
586 | * either sizeof...(Types) != 1, |
587 | * or (when Types... expands to T and UTypes... expands to U) |
588 | * is_convertible_v<TUPLE, T>, is_constructible_v<T, TUPLE>, |
589 | * and is_same_v<T, U> are all false. |
590 | */ |
591 | template<typename _Tuple, typename = tuple, |
592 | typename = __remove_cvref_t<_Tuple>> |
593 | struct _UseOtherCtor |
594 | : false_type |
595 | { }; |
596 | // If TUPLE is convertible to the single element in *this, |
597 | // then TUPLE should match tuple(UTypes&&...) instead. |
598 | template<typename _Tuple, typename _Tp, typename _Up> |
599 | struct _UseOtherCtor<_Tuple, tuple<_Tp>, tuple<_Up>> |
600 | : __or_<is_convertible<_Tuple, _Tp>, is_constructible<_Tp, _Tuple>> |
601 | { }; |
602 | // If TUPLE and *this each have a single element of the same type, |
603 | // then TUPLE should match a copy/move constructor instead. |
604 | template<typename _Tuple, typename _Tp> |
605 | struct _UseOtherCtor<_Tuple, tuple<_Tp>, tuple<_Tp>> |
606 | : true_type |
607 | { }; |
608 | |
609 | // Return true iff sizeof...(Types) == 1 && tuple_size_v<TUPLE> == 1 |
610 | // and the single element in Types can be initialized from TUPLE, |
611 | // or is the same type as tuple_element_t<0, TUPLE>. |
612 | template<typename _Tuple> |
613 | static constexpr bool __use_other_ctor() |
614 | { return _UseOtherCtor<_Tuple>::value; } |
615 | |
616 | public: |
617 | template<typename _Dummy = void, |
618 | _ImplicitDefaultCtor<is_void<_Dummy>::value> = true> |
619 | constexpr |
620 | tuple() |
621 | noexcept(__and_<is_nothrow_default_constructible<_Elements>...>::value) |
622 | : _Inherited() { } |
623 | |
624 | template<typename _Dummy = void, |
625 | _ExplicitDefaultCtor<is_void<_Dummy>::value> = false> |
626 | explicit constexpr |
627 | tuple() |
628 | noexcept(__and_<is_nothrow_default_constructible<_Elements>...>::value) |
629 | : _Inherited() { } |
630 | |
631 | template<bool _NotEmpty = (sizeof...(_Elements) >= 1), |
632 | _ImplicitCtor<_NotEmpty, const _Elements&...> = true> |
633 | constexpr |
634 | tuple(const _Elements&... __elements) |
635 | noexcept(__nothrow_constructible<const _Elements&...>()) |
636 | : _Inherited(__elements...) { } |
637 | |
638 | template<bool _NotEmpty = (sizeof...(_Elements) >= 1), |
639 | _ExplicitCtor<_NotEmpty, const _Elements&...> = false> |
640 | explicit constexpr |
641 | tuple(const _Elements&... __elements) |
642 | noexcept(__nothrow_constructible<const _Elements&...>()) |
643 | : _Inherited(__elements...) { } |
644 | |
645 | template<typename... _UElements, |
646 | bool _Valid = __valid_args<_UElements...>(), |
647 | _ImplicitCtor<_Valid, _UElements...> = true> |
648 | constexpr |
649 | tuple(_UElements&&... __elements) |
650 | noexcept(__nothrow_constructible<_UElements...>()) |
651 | : _Inherited(std::forward<_UElements>(__elements)...) { } |
652 | |
653 | template<typename... _UElements, |
654 | bool _Valid = __valid_args<_UElements...>(), |
655 | _ExplicitCtor<_Valid, _UElements...> = false> |
656 | explicit constexpr |
657 | tuple(_UElements&&... __elements) |
658 | noexcept(__nothrow_constructible<_UElements...>()) |
659 | : _Inherited(std::forward<_UElements>(__elements)...) { } |
660 | |
661 | constexpr tuple(const tuple&) = default; |
662 | |
663 | constexpr tuple(tuple&&) = default; |
664 | |
665 | template<typename... _UElements, |
666 | bool _Valid = (sizeof...(_Elements) == sizeof...(_UElements)) |
667 | && !__use_other_ctor<const tuple<_UElements...>&>(), |
668 | _ImplicitCtor<_Valid, const _UElements&...> = true> |
669 | constexpr |
670 | tuple(const tuple<_UElements...>& __in) |
671 | noexcept(__nothrow_constructible<const _UElements&...>()) |
672 | : _Inherited(static_cast<const _Tuple_impl<0, _UElements...>&>(__in)) |
673 | { } |
674 | |
675 | template<typename... _UElements, |
676 | bool _Valid = (sizeof...(_Elements) == sizeof...(_UElements)) |
677 | && !__use_other_ctor<const tuple<_UElements...>&>(), |
678 | _ExplicitCtor<_Valid, const _UElements&...> = false> |
679 | explicit constexpr |
680 | tuple(const tuple<_UElements...>& __in) |
681 | noexcept(__nothrow_constructible<const _UElements&...>()) |
682 | : _Inherited(static_cast<const _Tuple_impl<0, _UElements...>&>(__in)) |
683 | { } |
684 | |
685 | template<typename... _UElements, |
686 | bool _Valid = (sizeof...(_Elements) == sizeof...(_UElements)) |
687 | && !__use_other_ctor<tuple<_UElements...>&&>(), |
688 | _ImplicitCtor<_Valid, _UElements...> = true> |
689 | constexpr |
690 | tuple(tuple<_UElements...>&& __in) |
691 | noexcept(__nothrow_constructible<_UElements...>()) |
692 | : _Inherited(static_cast<_Tuple_impl<0, _UElements...>&&>(__in)) { } |
693 | |
694 | template<typename... _UElements, |
695 | bool _Valid = (sizeof...(_Elements) == sizeof...(_UElements)) |
696 | && !__use_other_ctor<tuple<_UElements...>&&>(), |
697 | _ExplicitCtor<_Valid, _UElements...> = false> |
698 | explicit constexpr |
699 | tuple(tuple<_UElements...>&& __in) |
700 | noexcept(__nothrow_constructible<_UElements...>()) |
701 | : _Inherited(static_cast<_Tuple_impl<0, _UElements...>&&>(__in)) { } |
702 | |
703 | // Allocator-extended constructors. |
704 | |
705 | template<typename _Alloc, |
706 | _ImplicitDefaultCtor<is_object<_Alloc>::value> = true> |
707 | _GLIBCXX20_CONSTEXPR |
708 | tuple(allocator_arg_t __tag, const _Alloc& __a) |
709 | : _Inherited(__tag, __a) { } |
710 | |
711 | template<typename _Alloc, bool _NotEmpty = (sizeof...(_Elements) >= 1), |
712 | _ImplicitCtor<_NotEmpty, const _Elements&...> = true> |
713 | _GLIBCXX20_CONSTEXPR |
714 | tuple(allocator_arg_t __tag, const _Alloc& __a, |
715 | const _Elements&... __elements) |
716 | : _Inherited(__tag, __a, __elements...) { } |
717 | |
718 | template<typename _Alloc, bool _NotEmpty = (sizeof...(_Elements) >= 1), |
719 | _ExplicitCtor<_NotEmpty, const _Elements&...> = false> |
720 | _GLIBCXX20_CONSTEXPR |
721 | explicit |
722 | tuple(allocator_arg_t __tag, const _Alloc& __a, |
723 | const _Elements&... __elements) |
724 | : _Inherited(__tag, __a, __elements...) { } |
725 | |
726 | template<typename _Alloc, typename... _UElements, |
727 | bool _Valid = __valid_args<_UElements...>(), |
728 | _ImplicitCtor<_Valid, _UElements...> = true> |
729 | _GLIBCXX20_CONSTEXPR |
730 | tuple(allocator_arg_t __tag, const _Alloc& __a, |
731 | _UElements&&... __elements) |
732 | : _Inherited(__tag, __a, std::forward<_UElements>(__elements)...) |
733 | { } |
734 | |
735 | template<typename _Alloc, typename... _UElements, |
736 | bool _Valid = __valid_args<_UElements...>(), |
737 | _ExplicitCtor<_Valid, _UElements...> = false> |
738 | _GLIBCXX20_CONSTEXPR |
739 | explicit |
740 | tuple(allocator_arg_t __tag, const _Alloc& __a, |
741 | _UElements&&... __elements) |
742 | : _Inherited(__tag, __a, std::forward<_UElements>(__elements)...) |
743 | { } |
744 | |
745 | template<typename _Alloc> |
746 | _GLIBCXX20_CONSTEXPR |
747 | tuple(allocator_arg_t __tag, const _Alloc& __a, const tuple& __in) |
748 | : _Inherited(__tag, __a, static_cast<const _Inherited&>(__in)) { } |
749 | |
750 | template<typename _Alloc> |
751 | _GLIBCXX20_CONSTEXPR |
752 | tuple(allocator_arg_t __tag, const _Alloc& __a, tuple&& __in) |
753 | : _Inherited(__tag, __a, static_cast<_Inherited&&>(__in)) { } |
754 | |
755 | template<typename _Alloc, typename... _UElements, |
756 | bool _Valid = (sizeof...(_Elements) == sizeof...(_UElements)) |
757 | && !__use_other_ctor<const tuple<_UElements...>&>(), |
758 | _ImplicitCtor<_Valid, const _UElements&...> = true> |
759 | _GLIBCXX20_CONSTEXPR |
760 | tuple(allocator_arg_t __tag, const _Alloc& __a, |
761 | const tuple<_UElements...>& __in) |
762 | : _Inherited(__tag, __a, |
763 | static_cast<const _Tuple_impl<0, _UElements...>&>(__in)) |
764 | { } |
765 | |
766 | template<typename _Alloc, typename... _UElements, |
767 | bool _Valid = (sizeof...(_Elements) == sizeof...(_UElements)) |
768 | && !__use_other_ctor<const tuple<_UElements...>&>(), |
769 | _ExplicitCtor<_Valid, const _UElements&...> = false> |
770 | _GLIBCXX20_CONSTEXPR |
771 | explicit |
772 | tuple(allocator_arg_t __tag, const _Alloc& __a, |
773 | const tuple<_UElements...>& __in) |
774 | : _Inherited(__tag, __a, |
775 | static_cast<const _Tuple_impl<0, _UElements...>&>(__in)) |
776 | { } |
777 | |
778 | template<typename _Alloc, typename... _UElements, |
779 | bool _Valid = (sizeof...(_Elements) == sizeof...(_UElements)) |
780 | && !__use_other_ctor<tuple<_UElements...>&&>(), |
781 | _ImplicitCtor<_Valid, _UElements...> = true> |
782 | _GLIBCXX20_CONSTEXPR |
783 | tuple(allocator_arg_t __tag, const _Alloc& __a, |
784 | tuple<_UElements...>&& __in) |
785 | : _Inherited(__tag, __a, |
786 | static_cast<_Tuple_impl<0, _UElements...>&&>(__in)) |
787 | { } |
788 | |
789 | template<typename _Alloc, typename... _UElements, |
790 | bool _Valid = (sizeof...(_Elements) == sizeof...(_UElements)) |
791 | && !__use_other_ctor<tuple<_UElements...>&&>(), |
792 | _ExplicitCtor<_Valid, _UElements...> = false> |
793 | _GLIBCXX20_CONSTEXPR |
794 | explicit |
795 | tuple(allocator_arg_t __tag, const _Alloc& __a, |
796 | tuple<_UElements...>&& __in) |
797 | : _Inherited(__tag, __a, |
798 | static_cast<_Tuple_impl<0, _UElements...>&&>(__in)) |
799 | { } |
800 | |
801 | // tuple assignment |
802 | |
803 | _GLIBCXX20_CONSTEXPR |
804 | tuple& |
805 | operator=(typename conditional<__assignable<const _Elements&...>(), |
806 | const tuple&, |
807 | const __nonesuch&>::type __in) |
808 | noexcept(__nothrow_assignable<const _Elements&...>()) |
809 | { |
810 | this->_M_assign(__in); |
811 | return *this; |
812 | } |
813 | |
814 | _GLIBCXX20_CONSTEXPR |
815 | tuple& |
816 | operator=(typename conditional<__assignable<_Elements...>(), |
817 | tuple&&, |
818 | __nonesuch&&>::type __in) |
819 | noexcept(__nothrow_assignable<_Elements...>()) |
820 | { |
821 | this->_M_assign(std::move(__in)); |
822 | return *this; |
823 | } |
824 | |
825 | template<typename... _UElements> |
826 | _GLIBCXX20_CONSTEXPR |
827 | __enable_if_t<__assignable<const _UElements&...>(), tuple&> |
828 | operator=(const tuple<_UElements...>& __in) |
829 | noexcept(__nothrow_assignable<const _UElements&...>()) |
830 | { |
831 | this->_M_assign(__in); |
832 | return *this; |
833 | } |
834 | |
835 | template<typename... _UElements> |
836 | _GLIBCXX20_CONSTEXPR |
837 | __enable_if_t<__assignable<_UElements...>(), tuple&> |
838 | operator=(tuple<_UElements...>&& __in) |
839 | noexcept(__nothrow_assignable<_UElements...>()) |
840 | { |
841 | this->_M_assign(std::move(__in)); |
842 | return *this; |
843 | } |
844 | |
845 | // tuple swap |
846 | _GLIBCXX20_CONSTEXPR |
847 | void |
848 | swap(tuple& __in) |
849 | noexcept(__and_<__is_nothrow_swappable<_Elements>...>::value) |
850 | { _Inherited::_M_swap(__in); } |
851 | }; |
852 | |
853 | #if __cpp_deduction_guides >= 201606 |
854 | template<typename... _UTypes> |
855 | tuple(_UTypes...) -> tuple<_UTypes...>; |
856 | template<typename _T1, typename _T2> |
857 | tuple(pair<_T1, _T2>) -> tuple<_T1, _T2>; |
858 | template<typename _Alloc, typename... _UTypes> |
859 | tuple(allocator_arg_t, _Alloc, _UTypes...) -> tuple<_UTypes...>; |
860 | template<typename _Alloc, typename _T1, typename _T2> |
861 | tuple(allocator_arg_t, _Alloc, pair<_T1, _T2>) -> tuple<_T1, _T2>; |
862 | template<typename _Alloc, typename... _UTypes> |
863 | tuple(allocator_arg_t, _Alloc, tuple<_UTypes...>) -> tuple<_UTypes...>; |
864 | #endif |
865 | |
866 | // Explicit specialization, zero-element tuple. |
867 | template<> |
868 | class tuple<> |
869 | { |
870 | public: |
871 | void swap(tuple&) noexcept { /* no-op */ } |
872 | // We need the default since we're going to define no-op |
873 | // allocator constructors. |
874 | tuple() = default; |
875 | // No-op allocator constructors. |
876 | template<typename _Alloc> |
877 | _GLIBCXX20_CONSTEXPR |
878 | tuple(allocator_arg_t, const _Alloc&) noexcept { } |
879 | template<typename _Alloc> |
880 | _GLIBCXX20_CONSTEXPR |
881 | tuple(allocator_arg_t, const _Alloc&, const tuple&) noexcept { } |
882 | }; |
883 | |
884 | /// Partial specialization, 2-element tuple. |
885 | /// Includes construction and assignment from a pair. |
886 | template<typename _T1, typename _T2> |
887 | class tuple<_T1, _T2> : public _Tuple_impl<0, _T1, _T2> |
888 | { |
889 | typedef _Tuple_impl<0, _T1, _T2> _Inherited; |
890 | |
891 | // Constraint for non-explicit default constructor |
892 | template<bool _Dummy, typename _U1, typename _U2> |
893 | using _ImplicitDefaultCtor = __enable_if_t< |
894 | _TupleConstraints<_Dummy, _U1, _U2>:: |
895 | __is_implicitly_default_constructible(), |
896 | bool>; |
897 | |
898 | // Constraint for explicit default constructor |
899 | template<bool _Dummy, typename _U1, typename _U2> |
900 | using _ExplicitDefaultCtor = __enable_if_t< |
901 | _TupleConstraints<_Dummy, _U1, _U2>:: |
902 | __is_explicitly_default_constructible(), |
903 | bool>; |
904 | |
905 | template<bool _Dummy> |
906 | using _TCC = _TupleConstraints<_Dummy, _T1, _T2>; |
907 | |
908 | // Constraint for non-explicit constructors |
909 | template<bool _Cond, typename _U1, typename _U2> |
910 | using _ImplicitCtor = __enable_if_t< |
911 | _TCC<_Cond>::template __is_implicitly_constructible<_U1, _U2>(), |
912 | bool>; |
913 | |
914 | // Constraint for non-explicit constructors |
915 | template<bool _Cond, typename _U1, typename _U2> |
916 | using _ExplicitCtor = __enable_if_t< |
917 | _TCC<_Cond>::template __is_explicitly_constructible<_U1, _U2>(), |
918 | bool>; |
919 | |
920 | template<typename _U1, typename _U2> |
921 | static constexpr bool __assignable() |
922 | { |
923 | return __and_<is_assignable<_T1&, _U1>, |
924 | is_assignable<_T2&, _U2>>::value; |
925 | } |
926 | |
927 | template<typename _U1, typename _U2> |
928 | static constexpr bool __nothrow_assignable() |
929 | { |
930 | return __and_<is_nothrow_assignable<_T1&, _U1>, |
931 | is_nothrow_assignable<_T2&, _U2>>::value; |
932 | } |
933 | |
934 | template<typename _U1, typename _U2> |
935 | static constexpr bool __nothrow_constructible() |
936 | { |
937 | return __and_<is_nothrow_constructible<_T1, _U1>, |
938 | is_nothrow_constructible<_T2, _U2>>::value; |
939 | } |
940 | |
941 | static constexpr bool __nothrow_default_constructible() |
942 | { |
943 | return __and_<is_nothrow_default_constructible<_T1>, |
944 | is_nothrow_default_constructible<_T2>>::value; |
945 | } |
946 | |
947 | template<typename _U1> |
948 | static constexpr bool __is_alloc_arg() |
949 | { return is_same<__remove_cvref_t<_U1>, allocator_arg_t>::value; } |
950 | |
951 | public: |
952 | template<bool _Dummy = true, |
953 | _ImplicitDefaultCtor<_Dummy, _T1, _T2> = true> |
954 | constexpr |
955 | tuple() |
956 | noexcept(__nothrow_default_constructible()) |
957 | : _Inherited() { } |
958 | |
959 | template<bool _Dummy = true, |
960 | _ExplicitDefaultCtor<_Dummy, _T1, _T2> = false> |
961 | explicit constexpr |
962 | tuple() |
963 | noexcept(__nothrow_default_constructible()) |
964 | : _Inherited() { } |
965 | |
966 | template<bool _Dummy = true, |
967 | _ImplicitCtor<_Dummy, const _T1&, const _T2&> = true> |
968 | constexpr |
969 | tuple(const _T1& __a1, const _T2& __a2) |
970 | noexcept(__nothrow_constructible<const _T1&, const _T2&>()) |
971 | : _Inherited(__a1, __a2) { } |
972 | |
973 | template<bool _Dummy = true, |
974 | _ExplicitCtor<_Dummy, const _T1&, const _T2&> = false> |
975 | explicit constexpr |
976 | tuple(const _T1& __a1, const _T2& __a2) |
977 | noexcept(__nothrow_constructible<const _T1&, const _T2&>()) |
978 | : _Inherited(__a1, __a2) { } |
979 | |
980 | template<typename _U1, typename _U2, |
981 | _ImplicitCtor<!__is_alloc_arg<_U1>(), _U1, _U2> = true> |
982 | constexpr |
983 | tuple(_U1&& __a1, _U2&& __a2) |
984 | noexcept(__nothrow_constructible<_U1, _U2>()) |
985 | : _Inherited(std::forward<_U1>(__a1), std::forward<_U2>(__a2)) { } |
986 | |
987 | template<typename _U1, typename _U2, |
988 | _ExplicitCtor<!__is_alloc_arg<_U1>(), _U1, _U2> = false> |
989 | explicit constexpr |
990 | tuple(_U1&& __a1, _U2&& __a2) |
991 | noexcept(__nothrow_constructible<_U1, _U2>()) |
992 | : _Inherited(std::forward<_U1>(__a1), std::forward<_U2>(__a2)) { } |
993 | |
994 | constexpr tuple(const tuple&) = default; |
995 | |
996 | constexpr tuple(tuple&&) = default; |
997 | |
998 | template<typename _U1, typename _U2, |
999 | _ImplicitCtor<true, const _U1&, const _U2&> = true> |
1000 | constexpr |
1001 | tuple(const tuple<_U1, _U2>& __in) |
1002 | noexcept(__nothrow_constructible<const _U1&, const _U2&>()) |
1003 | : _Inherited(static_cast<const _Tuple_impl<0, _U1, _U2>&>(__in)) { } |
1004 | |
1005 | template<typename _U1, typename _U2, |
1006 | _ExplicitCtor<true, const _U1&, const _U2&> = false> |
1007 | explicit constexpr |
1008 | tuple(const tuple<_U1, _U2>& __in) |
1009 | noexcept(__nothrow_constructible<const _U1&, const _U2&>()) |
1010 | : _Inherited(static_cast<const _Tuple_impl<0, _U1, _U2>&>(__in)) { } |
1011 | |
1012 | template<typename _U1, typename _U2, |
1013 | _ImplicitCtor<true, _U1, _U2> = true> |
1014 | constexpr |
1015 | tuple(tuple<_U1, _U2>&& __in) |
1016 | noexcept(__nothrow_constructible<_U1, _U2>()) |
1017 | : _Inherited(static_cast<_Tuple_impl<0, _U1, _U2>&&>(__in)) { } |
1018 | |
1019 | template<typename _U1, typename _U2, |
1020 | _ExplicitCtor<true, _U1, _U2> = false> |
1021 | explicit constexpr |
1022 | tuple(tuple<_U1, _U2>&& __in) |
1023 | noexcept(__nothrow_constructible<_U1, _U2>()) |
1024 | : _Inherited(static_cast<_Tuple_impl<0, _U1, _U2>&&>(__in)) { } |
1025 | |
1026 | template<typename _U1, typename _U2, |
1027 | _ImplicitCtor<true, const _U1&, const _U2&> = true> |
1028 | constexpr |
1029 | tuple(const pair<_U1, _U2>& __in) |
1030 | noexcept(__nothrow_constructible<const _U1&, const _U2&>()) |
1031 | : _Inherited(__in.first, __in.second) { } |
1032 | |
1033 | template<typename _U1, typename _U2, |
1034 | _ExplicitCtor<true, const _U1&, const _U2&> = false> |
1035 | explicit constexpr |
1036 | tuple(const pair<_U1, _U2>& __in) |
1037 | noexcept(__nothrow_constructible<const _U1&, const _U2&>()) |
1038 | : _Inherited(__in.first, __in.second) { } |
1039 | |
1040 | template<typename _U1, typename _U2, |
1041 | _ImplicitCtor<true, _U1, _U2> = true> |
1042 | constexpr |
1043 | tuple(pair<_U1, _U2>&& __in) |
1044 | noexcept(__nothrow_constructible<_U1, _U2>()) |
1045 | : _Inherited(std::forward<_U1>(__in.first), |
1046 | std::forward<_U2>(__in.second)) { } |
1047 | |
1048 | template<typename _U1, typename _U2, |
1049 | _ExplicitCtor<true, _U1, _U2> = false> |
1050 | explicit constexpr |
1051 | tuple(pair<_U1, _U2>&& __in) |
1052 | noexcept(__nothrow_constructible<_U1, _U2>()) |
1053 | : _Inherited(std::forward<_U1>(__in.first), |
1054 | std::forward<_U2>(__in.second)) { } |
1055 | |
1056 | // Allocator-extended constructors. |
1057 | |
1058 | template<typename _Alloc, |
1059 | _ImplicitDefaultCtor<is_object<_Alloc>::value, _T1, _T2> = true> |
1060 | _GLIBCXX20_CONSTEXPR |
1061 | tuple(allocator_arg_t __tag, const _Alloc& __a) |
1062 | : _Inherited(__tag, __a) { } |
1063 | |
1064 | template<typename _Alloc, bool _Dummy = true, |
1065 | _ImplicitCtor<_Dummy, const _T1&, const _T2&> = true> |
1066 | _GLIBCXX20_CONSTEXPR |
1067 | tuple(allocator_arg_t __tag, const _Alloc& __a, |
1068 | const _T1& __a1, const _T2& __a2) |
1069 | : _Inherited(__tag, __a, __a1, __a2) { } |
1070 | |
1071 | template<typename _Alloc, bool _Dummy = true, |
1072 | _ExplicitCtor<_Dummy, const _T1&, const _T2&> = false> |
1073 | explicit |
1074 | _GLIBCXX20_CONSTEXPR |
1075 | tuple(allocator_arg_t __tag, const _Alloc& __a, |
1076 | const _T1& __a1, const _T2& __a2) |
1077 | : _Inherited(__tag, __a, __a1, __a2) { } |
1078 | |
1079 | template<typename _Alloc, typename _U1, typename _U2, |
1080 | _ImplicitCtor<true, _U1, _U2> = true> |
1081 | _GLIBCXX20_CONSTEXPR |
1082 | tuple(allocator_arg_t __tag, const _Alloc& __a, _U1&& __a1, _U2&& __a2) |
1083 | : _Inherited(__tag, __a, std::forward<_U1>(__a1), |
1084 | std::forward<_U2>(__a2)) { } |
1085 | |
1086 | template<typename _Alloc, typename _U1, typename _U2, |
1087 | _ExplicitCtor<true, _U1, _U2> = false> |
1088 | explicit |
1089 | _GLIBCXX20_CONSTEXPR |
1090 | tuple(allocator_arg_t __tag, const _Alloc& __a, |
1091 | _U1&& __a1, _U2&& __a2) |
1092 | : _Inherited(__tag, __a, std::forward<_U1>(__a1), |
1093 | std::forward<_U2>(__a2)) { } |
1094 | |
1095 | template<typename _Alloc> |
1096 | _GLIBCXX20_CONSTEXPR |
1097 | tuple(allocator_arg_t __tag, const _Alloc& __a, const tuple& __in) |
1098 | : _Inherited(__tag, __a, static_cast<const _Inherited&>(__in)) { } |
1099 | |
1100 | template<typename _Alloc> |
1101 | _GLIBCXX20_CONSTEXPR |
1102 | tuple(allocator_arg_t __tag, const _Alloc& __a, tuple&& __in) |
1103 | : _Inherited(__tag, __a, static_cast<_Inherited&&>(__in)) { } |
1104 | |
1105 | template<typename _Alloc, typename _U1, typename _U2, |
1106 | _ImplicitCtor<true, const _U1&, const _U2&> = true> |
1107 | _GLIBCXX20_CONSTEXPR |
1108 | tuple(allocator_arg_t __tag, const _Alloc& __a, |
1109 | const tuple<_U1, _U2>& __in) |
1110 | : _Inherited(__tag, __a, |
1111 | static_cast<const _Tuple_impl<0, _U1, _U2>&>(__in)) |
1112 | { } |
1113 | |
1114 | template<typename _Alloc, typename _U1, typename _U2, |
1115 | _ExplicitCtor<true, const _U1&, const _U2&> = false> |
1116 | explicit |
1117 | _GLIBCXX20_CONSTEXPR |
1118 | tuple(allocator_arg_t __tag, const _Alloc& __a, |
1119 | const tuple<_U1, _U2>& __in) |
1120 | : _Inherited(__tag, __a, |
1121 | static_cast<const _Tuple_impl<0, _U1, _U2>&>(__in)) |
1122 | { } |
1123 | |
1124 | template<typename _Alloc, typename _U1, typename _U2, |
1125 | _ImplicitCtor<true, _U1, _U2> = true> |
1126 | _GLIBCXX20_CONSTEXPR |
1127 | tuple(allocator_arg_t __tag, const _Alloc& __a, tuple<_U1, _U2>&& __in) |
1128 | : _Inherited(__tag, __a, static_cast<_Tuple_impl<0, _U1, _U2>&&>(__in)) |
1129 | { } |
1130 | |
1131 | template<typename _Alloc, typename _U1, typename _U2, |
1132 | _ExplicitCtor<true, _U1, _U2> = false> |
1133 | explicit |
1134 | _GLIBCXX20_CONSTEXPR |
1135 | tuple(allocator_arg_t __tag, const _Alloc& __a, tuple<_U1, _U2>&& __in) |
1136 | : _Inherited(__tag, __a, static_cast<_Tuple_impl<0, _U1, _U2>&&>(__in)) |
1137 | { } |
1138 | |
1139 | template<typename _Alloc, typename _U1, typename _U2, |
1140 | _ImplicitCtor<true, const _U1&, const _U2&> = true> |
1141 | _GLIBCXX20_CONSTEXPR |
1142 | tuple(allocator_arg_t __tag, const _Alloc& __a, |
1143 | const pair<_U1, _U2>& __in) |
1144 | : _Inherited(__tag, __a, __in.first, __in.second) { } |
1145 | |
1146 | template<typename _Alloc, typename _U1, typename _U2, |
1147 | _ExplicitCtor<true, const _U1&, const _U2&> = false> |
1148 | explicit |
1149 | _GLIBCXX20_CONSTEXPR |
1150 | tuple(allocator_arg_t __tag, const _Alloc& __a, |
1151 | const pair<_U1, _U2>& __in) |
1152 | : _Inherited(__tag, __a, __in.first, __in.second) { } |
1153 | |
1154 | template<typename _Alloc, typename _U1, typename _U2, |
1155 | _ImplicitCtor<true, _U1, _U2> = true> |
1156 | _GLIBCXX20_CONSTEXPR |
1157 | tuple(allocator_arg_t __tag, const _Alloc& __a, pair<_U1, _U2>&& __in) |
1158 | : _Inherited(__tag, __a, std::forward<_U1>(__in.first), |
1159 | std::forward<_U2>(__in.second)) { } |
1160 | |
1161 | template<typename _Alloc, typename _U1, typename _U2, |
1162 | _ExplicitCtor<true, _U1, _U2> = false> |
1163 | explicit |
1164 | _GLIBCXX20_CONSTEXPR |
1165 | tuple(allocator_arg_t __tag, const _Alloc& __a, pair<_U1, _U2>&& __in) |
1166 | : _Inherited(__tag, __a, std::forward<_U1>(__in.first), |
1167 | std::forward<_U2>(__in.second)) { } |
1168 | |
1169 | // Tuple assignment. |
1170 | |
1171 | _GLIBCXX20_CONSTEXPR |
1172 | tuple& |
1173 | operator=(typename conditional<__assignable<const _T1&, const _T2&>(), |
1174 | const tuple&, |
1175 | const __nonesuch&>::type __in) |
1176 | noexcept(__nothrow_assignable<const _T1&, const _T2&>()) |
1177 | { |
1178 | this->_M_assign(__in); |
1179 | return *this; |
1180 | } |
1181 | |
1182 | _GLIBCXX20_CONSTEXPR |
1183 | tuple& |
1184 | operator=(typename conditional<__assignable<_T1, _T2>(), |
1185 | tuple&&, |
1186 | __nonesuch&&>::type __in) |
1187 | noexcept(__nothrow_assignable<_T1, _T2>()) |
1188 | { |
1189 | this->_M_assign(std::move(__in)); |
1190 | return *this; |
1191 | } |
1192 | |
1193 | template<typename _U1, typename _U2> |
1194 | _GLIBCXX20_CONSTEXPR |
1195 | __enable_if_t<__assignable<const _U1&, const _U2&>(), tuple&> |
1196 | operator=(const tuple<_U1, _U2>& __in) |
1197 | noexcept(__nothrow_assignable<const _U1&, const _U2&>()) |
1198 | { |
1199 | this->_M_assign(__in); |
1200 | return *this; |
1201 | } |
1202 | |
1203 | template<typename _U1, typename _U2> |
1204 | _GLIBCXX20_CONSTEXPR |
1205 | __enable_if_t<__assignable<_U1, _U2>(), tuple&> |
1206 | operator=(tuple<_U1, _U2>&& __in) |
1207 | noexcept(__nothrow_assignable<_U1, _U2>()) |
1208 | { |
1209 | this->_M_assign(std::move(__in)); |
1210 | return *this; |
1211 | } |
1212 | |
1213 | template<typename _U1, typename _U2> |
1214 | _GLIBCXX20_CONSTEXPR |
1215 | __enable_if_t<__assignable<const _U1&, const _U2&>(), tuple&> |
1216 | operator=(const pair<_U1, _U2>& __in) |
1217 | noexcept(__nothrow_assignable<const _U1&, const _U2&>()) |
1218 | { |
1219 | this->_M_head(*this) = __in.first; |
1220 | this->_M_tail(*this)._M_head(*this) = __in.second; |
1221 | return *this; |
1222 | } |
1223 | |
1224 | template<typename _U1, typename _U2> |
1225 | _GLIBCXX20_CONSTEXPR |
1226 | __enable_if_t<__assignable<_U1, _U2>(), tuple&> |
1227 | operator=(pair<_U1, _U2>&& __in) |
1228 | noexcept(__nothrow_assignable<_U1, _U2>()) |
1229 | { |
1230 | this->_M_head(*this) = std::forward<_U1>(__in.first); |
1231 | this->_M_tail(*this)._M_head(*this) = std::forward<_U2>(__in.second); |
1232 | return *this; |
1233 | } |
1234 | |
1235 | _GLIBCXX20_CONSTEXPR |
1236 | void |
1237 | swap(tuple& __in) |
1238 | noexcept(__and_<__is_nothrow_swappable<_T1>, |
1239 | __is_nothrow_swappable<_T2>>::value) |
1240 | { _Inherited::_M_swap(__in); } |
1241 | }; |
1242 | |
1243 | |
1244 | /// class tuple_size |
1245 | template<typename... _Elements> |
1246 | struct tuple_size<tuple<_Elements...>> |
1247 | : public integral_constant<std::size_t, sizeof...(_Elements)> { }; |
1248 | |
1249 | #if __cplusplus201402L > 201402L |
1250 | template <typename _Tp> |
1251 | inline constexpr size_t tuple_size_v = tuple_size<_Tp>::value; |
1252 | #endif |
1253 | |
1254 | /** |
1255 | * Recursive case for tuple_element: strip off the first element in |
1256 | * the tuple and retrieve the (i-1)th element of the remaining tuple. |
1257 | */ |
1258 | template<std::size_t __i, typename _Head, typename... _Tail> |
1259 | struct tuple_element<__i, tuple<_Head, _Tail...> > |
1260 | : tuple_element<__i - 1, tuple<_Tail...> > { }; |
1261 | |
1262 | /** |
1263 | * Basis case for tuple_element: The first element is the one we're seeking. |
1264 | */ |
1265 | template<typename _Head, typename... _Tail> |
1266 | struct tuple_element<0, tuple<_Head, _Tail...> > |
1267 | { |
1268 | typedef _Head type; |
1269 | }; |
1270 | |
1271 | /** |
1272 | * Error case for tuple_element: invalid index. |
1273 | */ |
1274 | template<size_t __i> |
1275 | struct tuple_element<__i, tuple<>> |
1276 | { |
1277 | static_assert(__i < tuple_size<tuple<>>::value, |
1278 | "tuple index is in range"); |
1279 | }; |
1280 | |
1281 | template<std::size_t __i, typename _Head, typename... _Tail> |
1282 | constexpr _Head& |
1283 | __get_helper(_Tuple_impl<__i, _Head, _Tail...>& __t) noexcept |
1284 | { return _Tuple_impl<__i, _Head, _Tail...>::_M_head(__t); } |
1285 | |
1286 | template<std::size_t __i, typename _Head, typename... _Tail> |
1287 | constexpr const _Head& |
1288 | __get_helper(const _Tuple_impl<__i, _Head, _Tail...>& __t) noexcept |
1289 | { return _Tuple_impl<__i, _Head, _Tail...>::_M_head(__t); } |
1290 | |
1291 | /// Return a reference to the ith element of a tuple. |
1292 | template<std::size_t __i, typename... _Elements> |
1293 | constexpr __tuple_element_t<__i, tuple<_Elements...>>& |
1294 | get(tuple<_Elements...>& __t) noexcept |
1295 | { return std::__get_helper<__i>(__t); } |
1296 | |
1297 | /// Return a const reference to the ith element of a const tuple. |
1298 | template<std::size_t __i, typename... _Elements> |
1299 | constexpr const __tuple_element_t<__i, tuple<_Elements...>>& |
1300 | get(const tuple<_Elements...>& __t) noexcept |
1301 | { return std::__get_helper<__i>(__t); } |
1302 | |
1303 | /// Return an rvalue reference to the ith element of a tuple rvalue. |
1304 | template<std::size_t __i, typename... _Elements> |
1305 | constexpr __tuple_element_t<__i, tuple<_Elements...>>&& |
1306 | get(tuple<_Elements...>&& __t) noexcept |
1307 | { |
1308 | typedef __tuple_element_t<__i, tuple<_Elements...>> __element_type; |
1309 | return std::forward<__element_type&&>(std::get<__i>(__t)); |
1310 | } |
1311 | |
1312 | /// Return a const rvalue reference to the ith element of a const tuple rvalue. |
1313 | template<std::size_t __i, typename... _Elements> |
1314 | constexpr const __tuple_element_t<__i, tuple<_Elements...>>&& |
1315 | get(const tuple<_Elements...>&& __t) noexcept |
1316 | { |
1317 | typedef __tuple_element_t<__i, tuple<_Elements...>> __element_type; |
1318 | return std::forward<const __element_type&&>(std::get<__i>(__t)); |
1319 | } |
1320 | |
1321 | #if __cplusplus201402L >= 201402L |
1322 | |
1323 | #define __cpp_lib_tuples_by_type201304 201304 |
1324 | |
1325 | template<typename _Head, size_t __i, typename... _Tail> |
1326 | constexpr _Head& |
1327 | __get_helper2(_Tuple_impl<__i, _Head, _Tail...>& __t) noexcept |
1328 | { return _Tuple_impl<__i, _Head, _Tail...>::_M_head(__t); } |
1329 | |
1330 | template<typename _Head, size_t __i, typename... _Tail> |
1331 | constexpr const _Head& |
1332 | __get_helper2(const _Tuple_impl<__i, _Head, _Tail...>& __t) noexcept |
1333 | { return _Tuple_impl<__i, _Head, _Tail...>::_M_head(__t); } |
1334 | |
1335 | /// Return a reference to the unique element of type _Tp of a tuple. |
1336 | template <typename _Tp, typename... _Types> |
1337 | constexpr _Tp& |
1338 | get(tuple<_Types...>& __t) noexcept |
1339 | { return std::__get_helper2<_Tp>(__t); } |
1340 | |
1341 | /// Return a reference to the unique element of type _Tp of a tuple rvalue. |
1342 | template <typename _Tp, typename... _Types> |
1343 | constexpr _Tp&& |
1344 | get(tuple<_Types...>&& __t) noexcept |
1345 | { return std::forward<_Tp&&>(std::__get_helper2<_Tp>(__t)); } |
1346 | |
1347 | /// Return a const reference to the unique element of type _Tp of a tuple. |
1348 | template <typename _Tp, typename... _Types> |
1349 | constexpr const _Tp& |
1350 | get(const tuple<_Types...>& __t) noexcept |
1351 | { return std::__get_helper2<_Tp>(__t); } |
1352 | |
1353 | /// Return a const reference to the unique element of type _Tp of |
1354 | /// a const tuple rvalue. |
1355 | template <typename _Tp, typename... _Types> |
1356 | constexpr const _Tp&& |
1357 | get(const tuple<_Types...>&& __t) noexcept |
1358 | { return std::forward<const _Tp&&>(std::__get_helper2<_Tp>(__t)); } |
1359 | #endif |
1360 | |
1361 | // This class performs the comparison operations on tuples |
1362 | template<typename _Tp, typename _Up, size_t __i, size_t __size> |
1363 | struct __tuple_compare |
1364 | { |
1365 | static constexpr bool |
1366 | __eq(const _Tp& __t, const _Up& __u) |
1367 | { |
1368 | return bool(std::get<__i>(__t) == std::get<__i>(__u)) |
1369 | && __tuple_compare<_Tp, _Up, __i + 1, __size>::__eq(__t, __u); |
1370 | } |
1371 | |
1372 | static constexpr bool |
1373 | __less(const _Tp& __t, const _Up& __u) |
1374 | { |
1375 | return bool(std::get<__i>(__t) < std::get<__i>(__u)) |
1376 | || (!bool(std::get<__i>(__u) < std::get<__i>(__t)) |
1377 | && __tuple_compare<_Tp, _Up, __i + 1, __size>::__less(__t, __u)); |
1378 | } |
1379 | }; |
1380 | |
1381 | template<typename _Tp, typename _Up, size_t __size> |
1382 | struct __tuple_compare<_Tp, _Up, __size, __size> |
1383 | { |
1384 | static constexpr bool |
1385 | __eq(const _Tp&, const _Up&) { return true; } |
1386 | |
1387 | static constexpr bool |
1388 | __less(const _Tp&, const _Up&) { return false; } |
1389 | }; |
1390 | |
1391 | template<typename... _TElements, typename... _UElements> |
1392 | constexpr bool |
1393 | operator==(const tuple<_TElements...>& __t, |
1394 | const tuple<_UElements...>& __u) |
1395 | { |
1396 | static_assert(sizeof...(_TElements) == sizeof...(_UElements), |
1397 | "tuple objects can only be compared if they have equal sizes."); |
1398 | using __compare = __tuple_compare<tuple<_TElements...>, |
1399 | tuple<_UElements...>, |
1400 | 0, sizeof...(_TElements)>; |
1401 | return __compare::__eq(__t, __u); |
1402 | } |
1403 | |
1404 | #if __cpp_lib_three_way_comparison |
1405 | template<typename _Cat, typename _Tp, typename _Up> |
1406 | constexpr _Cat |
1407 | __tuple_cmp(const _Tp&, const _Up&, index_sequence<>) |
1408 | { return _Cat::equivalent; } |
1409 | |
1410 | template<typename _Cat, typename _Tp, typename _Up, |
1411 | size_t _Idx0, size_t... _Idxs> |
1412 | constexpr _Cat |
1413 | __tuple_cmp(const _Tp& __t, const _Up& __u, |
1414 | index_sequence<_Idx0, _Idxs...>) |
1415 | { |
1416 | auto __c |
1417 | = __detail::__synth3way(std::get<_Idx0>(__t), std::get<_Idx0>(__u)); |
1418 | if (__c != 0) |
1419 | return __c; |
1420 | return std::__tuple_cmp<_Cat>(__t, __u, index_sequence<_Idxs...>()); |
1421 | } |
1422 | |
1423 | template<typename... _Tps, typename... _Ups> |
1424 | constexpr |
1425 | common_comparison_category_t<__detail::__synth3way_t<_Tps, _Ups>...> |
1426 | operator<=>(const tuple<_Tps...>& __t, const tuple<_Ups...>& __u) |
1427 | { |
1428 | using _Cat |
1429 | = common_comparison_category_t<__detail::__synth3way_t<_Tps, _Ups>...>; |
1430 | return std::__tuple_cmp<_Cat>(__t, __u, index_sequence_for<_Tps...>()); |
1431 | } |
1432 | #else |
1433 | template<typename... _TElements, typename... _UElements> |
1434 | constexpr bool |
1435 | operator<(const tuple<_TElements...>& __t, |
1436 | const tuple<_UElements...>& __u) |
1437 | { |
1438 | static_assert(sizeof...(_TElements) == sizeof...(_UElements), |
1439 | "tuple objects can only be compared if they have equal sizes."); |
1440 | using __compare = __tuple_compare<tuple<_TElements...>, |
1441 | tuple<_UElements...>, |
1442 | 0, sizeof...(_TElements)>; |
1443 | return __compare::__less(__t, __u); |
1444 | } |
1445 | |
1446 | template<typename... _TElements, typename... _UElements> |
1447 | constexpr bool |
1448 | operator!=(const tuple<_TElements...>& __t, |
1449 | const tuple<_UElements...>& __u) |
1450 | { return !(__t == __u); } |
1451 | |
1452 | template<typename... _TElements, typename... _UElements> |
1453 | constexpr bool |
1454 | operator>(const tuple<_TElements...>& __t, |
1455 | const tuple<_UElements...>& __u) |
1456 | { return __u < __t; } |
1457 | |
1458 | template<typename... _TElements, typename... _UElements> |
1459 | constexpr bool |
1460 | operator<=(const tuple<_TElements...>& __t, |
1461 | const tuple<_UElements...>& __u) |
1462 | { return !(__u < __t); } |
1463 | |
1464 | template<typename... _TElements, typename... _UElements> |
1465 | constexpr bool |
1466 | operator>=(const tuple<_TElements...>& __t, |
1467 | const tuple<_UElements...>& __u) |
1468 | { return !(__t < __u); } |
1469 | #endif // three_way_comparison |
1470 | |
1471 | // NB: DR 705. |
1472 | template<typename... _Elements> |
1473 | constexpr tuple<typename __decay_and_strip<_Elements>::__type...> |
1474 | make_tuple(_Elements&&... __args) |
1475 | { |
1476 | typedef tuple<typename __decay_and_strip<_Elements>::__type...> |
1477 | __result_type; |
1478 | return __result_type(std::forward<_Elements>(__args)...); |
1479 | } |
1480 | |
1481 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
1482 | // 2275. Why is forward_as_tuple not constexpr? |
1483 | /// std::forward_as_tuple |
1484 | template<typename... _Elements> |
1485 | constexpr tuple<_Elements&&...> |
1486 | forward_as_tuple(_Elements&&... __args) noexcept |
1487 | { return tuple<_Elements&&...>(std::forward<_Elements>(__args)...); } |
1488 | |
1489 | template<size_t, typename, typename, size_t> |
1490 | struct __make_tuple_impl; |
1491 | |
1492 | template<size_t _Idx, typename _Tuple, typename... _Tp, size_t _Nm> |
1493 | struct __make_tuple_impl<_Idx, tuple<_Tp...>, _Tuple, _Nm> |
1494 | : __make_tuple_impl<_Idx + 1, |
1495 | tuple<_Tp..., __tuple_element_t<_Idx, _Tuple>>, |
1496 | _Tuple, _Nm> |
1497 | { }; |
1498 | |
1499 | template<std::size_t _Nm, typename _Tuple, typename... _Tp> |
1500 | struct __make_tuple_impl<_Nm, tuple<_Tp...>, _Tuple, _Nm> |
1501 | { |
1502 | typedef tuple<_Tp...> __type; |
1503 | }; |
1504 | |
1505 | template<typename _Tuple> |
1506 | struct __do_make_tuple |
1507 | : __make_tuple_impl<0, tuple<>, _Tuple, std::tuple_size<_Tuple>::value> |
1508 | { }; |
1509 | |
1510 | // Returns the std::tuple equivalent of a tuple-like type. |
1511 | template<typename _Tuple> |
1512 | struct __make_tuple |
1513 | : public __do_make_tuple<__remove_cvref_t<_Tuple>> |
1514 | { }; |
1515 | |
1516 | // Combines several std::tuple's into a single one. |
1517 | template<typename...> |
1518 | struct __combine_tuples; |
1519 | |
1520 | template<> |
1521 | struct __combine_tuples<> |
1522 | { |
1523 | typedef tuple<> __type; |
1524 | }; |
1525 | |
1526 | template<typename... _Ts> |
1527 | struct __combine_tuples<tuple<_Ts...>> |
1528 | { |
1529 | typedef tuple<_Ts...> __type; |
1530 | }; |
1531 | |
1532 | template<typename... _T1s, typename... _T2s, typename... _Rem> |
1533 | struct __combine_tuples<tuple<_T1s...>, tuple<_T2s...>, _Rem...> |
1534 | { |
1535 | typedef typename __combine_tuples<tuple<_T1s..., _T2s...>, |
1536 | _Rem...>::__type __type; |
1537 | }; |
1538 | |
1539 | // Computes the result type of tuple_cat given a set of tuple-like types. |
1540 | template<typename... _Tpls> |
1541 | struct __tuple_cat_result |
1542 | { |
1543 | typedef typename __combine_tuples |
1544 | <typename __make_tuple<_Tpls>::__type...>::__type __type; |
1545 | }; |
1546 | |
1547 | // Helper to determine the index set for the first tuple-like |
1548 | // type of a given set. |
1549 | template<typename...> |
1550 | struct __make_1st_indices; |
1551 | |
1552 | template<> |
1553 | struct __make_1st_indices<> |
1554 | { |
1555 | typedef std::_Index_tuple<> __type; |
1556 | }; |
1557 | |
1558 | template<typename _Tp, typename... _Tpls> |
1559 | struct __make_1st_indices<_Tp, _Tpls...> |
1560 | { |
1561 | typedef typename std::_Build_index_tuple<std::tuple_size< |
1562 | typename std::remove_reference<_Tp>::type>::value>::__type __type; |
1563 | }; |
1564 | |
1565 | // Performs the actual concatenation by step-wise expanding tuple-like |
1566 | // objects into the elements, which are finally forwarded into the |
1567 | // result tuple. |
1568 | template<typename _Ret, typename _Indices, typename... _Tpls> |
1569 | struct __tuple_concater; |
1570 | |
1571 | template<typename _Ret, std::size_t... _Is, typename _Tp, typename... _Tpls> |
1572 | struct __tuple_concater<_Ret, std::_Index_tuple<_Is...>, _Tp, _Tpls...> |
1573 | { |
1574 | template<typename... _Us> |
1575 | static constexpr _Ret |
1576 | _S_do(_Tp&& __tp, _Tpls&&... __tps, _Us&&... __us) |
1577 | { |
1578 | typedef typename __make_1st_indices<_Tpls...>::__type __idx; |
1579 | typedef __tuple_concater<_Ret, __idx, _Tpls...> __next; |
1580 | return __next::_S_do(std::forward<_Tpls>(__tps)..., |
1581 | std::forward<_Us>(__us)..., |
1582 | std::get<_Is>(std::forward<_Tp>(__tp))...); |
1583 | } |
1584 | }; |
1585 | |
1586 | template<typename _Ret> |
1587 | struct __tuple_concater<_Ret, std::_Index_tuple<>> |
1588 | { |
1589 | template<typename... _Us> |
1590 | static constexpr _Ret |
1591 | _S_do(_Us&&... __us) |
1592 | { |
1593 | return _Ret(std::forward<_Us>(__us)...); |
1594 | } |
1595 | }; |
1596 | |
1597 | /// tuple_cat |
1598 | template<typename... _Tpls, typename = typename |
1599 | enable_if<__and_<__is_tuple_like<_Tpls>...>::value>::type> |
1600 | constexpr auto |
1601 | tuple_cat(_Tpls&&... __tpls) |
1602 | -> typename __tuple_cat_result<_Tpls...>::__type |
1603 | { |
1604 | typedef typename __tuple_cat_result<_Tpls...>::__type __ret; |
1605 | typedef typename __make_1st_indices<_Tpls...>::__type __idx; |
1606 | typedef __tuple_concater<__ret, __idx, _Tpls...> __concater; |
1607 | return __concater::_S_do(std::forward<_Tpls>(__tpls)...); |
1608 | } |
1609 | |
1610 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
1611 | // 2301. Why is tie not constexpr? |
1612 | /// tie |
1613 | template<typename... _Elements> |
1614 | constexpr tuple<_Elements&...> |
1615 | tie(_Elements&... __args) noexcept |
1616 | { return tuple<_Elements&...>(__args...); } |
1617 | |
1618 | /// swap |
1619 | template<typename... _Elements> |
1620 | _GLIBCXX20_CONSTEXPR |
1621 | inline |
1622 | #if __cplusplus201402L > 201402L || !defined(__STRICT_ANSI__1) // c++1z or gnu++11 |
1623 | // Constrained free swap overload, see p0185r1 |
1624 | typename enable_if<__and_<__is_swappable<_Elements>...>::value |
1625 | >::type |
1626 | #else |
1627 | void |
1628 | #endif |
1629 | swap(tuple<_Elements...>& __x, tuple<_Elements...>& __y) |
1630 | noexcept(noexcept(__x.swap(__y))) |
1631 | { __x.swap(__y); } |
1632 | |
1633 | #if __cplusplus201402L > 201402L || !defined(__STRICT_ANSI__1) // c++1z or gnu++11 |
1634 | template<typename... _Elements> |
1635 | _GLIBCXX20_CONSTEXPR |
1636 | typename enable_if<!__and_<__is_swappable<_Elements>...>::value>::type |
1637 | swap(tuple<_Elements...>&, tuple<_Elements...>&) = delete; |
1638 | #endif |
1639 | |
1640 | // A class (and instance) which can be used in 'tie' when an element |
1641 | // of a tuple is not required. |
1642 | // _GLIBCXX14_CONSTEXPR |
1643 | // 2933. PR for LWG 2773 could be clearer |
1644 | struct _Swallow_assign |
1645 | { |
1646 | template<class _Tp> |
1647 | _GLIBCXX14_CONSTEXPRconstexpr const _Swallow_assign& |
1648 | operator=(const _Tp&) const |
1649 | { return *this; } |
1650 | }; |
1651 | |
1652 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
1653 | // 2773. Making std::ignore constexpr |
1654 | _GLIBCXX17_INLINE constexpr _Swallow_assign ignore{}; |
1655 | |
1656 | /// Partial specialization for tuples |
1657 | template<typename... _Types, typename _Alloc> |
1658 | struct uses_allocator<tuple<_Types...>, _Alloc> : true_type { }; |
1659 | |
1660 | // See stl_pair.h... |
1661 | /** "piecewise construction" using a tuple of arguments for each member. |
1662 | * |
1663 | * @param __first Arguments for the first member of the pair. |
1664 | * @param __second Arguments for the second member of the pair. |
1665 | * |
1666 | * The elements of each tuple will be used as the constructor arguments |
1667 | * for the data members of the pair. |
1668 | */ |
1669 | template<class _T1, class _T2> |
1670 | template<typename... _Args1, typename... _Args2> |
1671 | _GLIBCXX20_CONSTEXPR |
1672 | inline |
1673 | pair<_T1, _T2>:: |
1674 | pair(piecewise_construct_t, |
1675 | tuple<_Args1...> __first, tuple<_Args2...> __second) |
1676 | : pair(__first, __second, |
1677 | typename _Build_index_tuple<sizeof...(_Args1)>::__type(), |
1678 | typename _Build_index_tuple<sizeof...(_Args2)>::__type()) |
1679 | { } |
1680 | |
1681 | template<class _T1, class _T2> |
1682 | template<typename... _Args1, std::size_t... _Indexes1, |
1683 | typename... _Args2, std::size_t... _Indexes2> |
1684 | _GLIBCXX20_CONSTEXPR inline |
1685 | pair<_T1, _T2>:: |
1686 | pair(tuple<_Args1...>& __tuple1, tuple<_Args2...>& __tuple2, |
1687 | _Index_tuple<_Indexes1...>, _Index_tuple<_Indexes2...>) |
1688 | : first(std::forward<_Args1>(std::get<_Indexes1>(__tuple1))...), |
1689 | second(std::forward<_Args2>(std::get<_Indexes2>(__tuple2))...) |
1690 | { } |
1691 | |
1692 | #if __cplusplus201402L >= 201703L |
1693 | |
1694 | // Unpack a std::tuple into a type trait and use its value. |
1695 | // For cv std::tuple<_Up> the result is _Trait<_Tp, cv _Up...>::value. |
1696 | // For cv std::tuple<_Up>& the result is _Trait<_Tp, cv _Up&...>::value. |
1697 | // Otherwise the result is false (because we don't know if std::get throws). |
1698 | template<template<typename...> class _Trait, typename _Tp, typename _Tuple> |
1699 | inline constexpr bool __unpack_std_tuple = false; |
1700 | |
1701 | template<template<typename...> class _Trait, typename _Tp, typename... _Up> |
1702 | inline constexpr bool __unpack_std_tuple<_Trait, _Tp, tuple<_Up...>> |
1703 | = _Trait<_Tp, _Up...>::value; |
1704 | |
1705 | template<template<typename...> class _Trait, typename _Tp, typename... _Up> |
1706 | inline constexpr bool __unpack_std_tuple<_Trait, _Tp, tuple<_Up...>&> |
1707 | = _Trait<_Tp, _Up&...>::value; |
1708 | |
1709 | template<template<typename...> class _Trait, typename _Tp, typename... _Up> |
1710 | inline constexpr bool __unpack_std_tuple<_Trait, _Tp, const tuple<_Up...>> |
1711 | = _Trait<_Tp, const _Up...>::value; |
1712 | |
1713 | template<template<typename...> class _Trait, typename _Tp, typename... _Up> |
1714 | inline constexpr bool __unpack_std_tuple<_Trait, _Tp, const tuple<_Up...>&> |
1715 | = _Trait<_Tp, const _Up&...>::value; |
1716 | |
1717 | # define __cpp_lib_apply 201603 |
1718 | |
1719 | template <typename _Fn, typename _Tuple, size_t... _Idx> |
1720 | constexpr decltype(auto) |
1721 | __apply_impl(_Fn&& __f, _Tuple&& __t, index_sequence<_Idx...>) |
1722 | { |
1723 | return std::__invoke(std::forward<_Fn>(__f), |
1724 | std::get<_Idx>(std::forward<_Tuple>(__t))...); |
1725 | } |
1726 | |
1727 | template <typename _Fn, typename _Tuple> |
1728 | constexpr decltype(auto) |
1729 | apply(_Fn&& __f, _Tuple&& __t) |
1730 | noexcept(__unpack_std_tuple<is_nothrow_invocable, _Fn, _Tuple>) |
1731 | { |
1732 | using _Indices |
1733 | = make_index_sequence<tuple_size_v<remove_reference_t<_Tuple>>>; |
1734 | return std::__apply_impl(std::forward<_Fn>(__f), |
1735 | std::forward<_Tuple>(__t), |
1736 | _Indices{}); |
1737 | } |
1738 | |
1739 | #define __cpp_lib_make_from_tuple 201606 |
1740 | |
1741 | template <typename _Tp, typename _Tuple, size_t... _Idx> |
1742 | constexpr _Tp |
1743 | __make_from_tuple_impl(_Tuple&& __t, index_sequence<_Idx...>) |
1744 | { return _Tp(std::get<_Idx>(std::forward<_Tuple>(__t))...); } |
1745 | |
1746 | template <typename _Tp, typename _Tuple> |
1747 | constexpr _Tp |
1748 | make_from_tuple(_Tuple&& __t) |
1749 | noexcept(__unpack_std_tuple<is_nothrow_constructible, _Tp, _Tuple>) |
1750 | { |
1751 | return __make_from_tuple_impl<_Tp>( |
1752 | std::forward<_Tuple>(__t), |
1753 | make_index_sequence<tuple_size_v<remove_reference_t<_Tuple>>>{}); |
1754 | } |
1755 | #endif // C++17 |
1756 | |
1757 | /// @} |
1758 | |
1759 | _GLIBCXX_END_NAMESPACE_VERSION |
1760 | } // namespace std |
1761 | |
1762 | #endif // C++11 |
1763 | |
1764 | #endif // _GLIBCXX_TUPLE |