File: | llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp |
Warning: | line 5964, column 67 The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==// | ||||||
2 | // | ||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||
6 | // | ||||||
7 | //===----------------------------------------------------------------------===// | ||||||
8 | /// \file | ||||||
9 | /// This file implements the targeting of the InstructionSelector class for | ||||||
10 | /// AArch64. | ||||||
11 | /// \todo This should be generated by TableGen. | ||||||
12 | //===----------------------------------------------------------------------===// | ||||||
13 | |||||||
14 | #include "AArch64GlobalISelUtils.h" | ||||||
15 | #include "AArch64InstrInfo.h" | ||||||
16 | #include "AArch64MachineFunctionInfo.h" | ||||||
17 | #include "AArch64RegisterBankInfo.h" | ||||||
18 | #include "AArch64RegisterInfo.h" | ||||||
19 | #include "AArch64Subtarget.h" | ||||||
20 | #include "AArch64TargetMachine.h" | ||||||
21 | #include "AArch64GlobalISelUtils.h" | ||||||
22 | #include "MCTargetDesc/AArch64AddressingModes.h" | ||||||
23 | #include "MCTargetDesc/AArch64MCTargetDesc.h" | ||||||
24 | #include "llvm/ADT/Optional.h" | ||||||
25 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" | ||||||
26 | #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" | ||||||
27 | #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" | ||||||
28 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" | ||||||
29 | #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" | ||||||
30 | #include "llvm/CodeGen/MachineBasicBlock.h" | ||||||
31 | #include "llvm/CodeGen/MachineConstantPool.h" | ||||||
32 | #include "llvm/CodeGen/MachineFunction.h" | ||||||
33 | #include "llvm/CodeGen/MachineInstr.h" | ||||||
34 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||||||
35 | #include "llvm/CodeGen/MachineMemOperand.h" | ||||||
36 | #include "llvm/CodeGen/MachineOperand.h" | ||||||
37 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||||||
38 | #include "llvm/CodeGen/TargetOpcodes.h" | ||||||
39 | #include "llvm/IR/Constants.h" | ||||||
40 | #include "llvm/IR/DerivedTypes.h" | ||||||
41 | #include "llvm/IR/Instructions.h" | ||||||
42 | #include "llvm/IR/PatternMatch.h" | ||||||
43 | #include "llvm/IR/Type.h" | ||||||
44 | #include "llvm/IR/IntrinsicsAArch64.h" | ||||||
45 | #include "llvm/Pass.h" | ||||||
46 | #include "llvm/Support/Debug.h" | ||||||
47 | #include "llvm/Support/raw_ostream.h" | ||||||
48 | |||||||
49 | #define DEBUG_TYPE"aarch64-isel" "aarch64-isel" | ||||||
50 | |||||||
51 | using namespace llvm; | ||||||
52 | using namespace MIPatternMatch; | ||||||
53 | using namespace AArch64GISelUtils; | ||||||
54 | |||||||
55 | namespace llvm { | ||||||
56 | class BlockFrequencyInfo; | ||||||
57 | class ProfileSummaryInfo; | ||||||
58 | } | ||||||
59 | |||||||
60 | namespace { | ||||||
61 | |||||||
62 | #define GET_GLOBALISEL_PREDICATE_BITSET | ||||||
63 | #include "AArch64GenGlobalISel.inc" | ||||||
64 | #undef GET_GLOBALISEL_PREDICATE_BITSET | ||||||
65 | |||||||
66 | class AArch64InstructionSelector : public InstructionSelector { | ||||||
67 | public: | ||||||
68 | AArch64InstructionSelector(const AArch64TargetMachine &TM, | ||||||
69 | const AArch64Subtarget &STI, | ||||||
70 | const AArch64RegisterBankInfo &RBI); | ||||||
71 | |||||||
72 | bool select(MachineInstr &I) override; | ||||||
73 | static const char *getName() { return DEBUG_TYPE"aarch64-isel"; } | ||||||
74 | |||||||
75 | void setupMF(MachineFunction &MF, GISelKnownBits *KB, | ||||||
76 | CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI, | ||||||
77 | BlockFrequencyInfo *BFI) override { | ||||||
78 | InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI); | ||||||
79 | MIB.setMF(MF); | ||||||
80 | |||||||
81 | // hasFnAttribute() is expensive to call on every BRCOND selection, so | ||||||
82 | // cache it here for each run of the selector. | ||||||
83 | ProduceNonFlagSettingCondBr = | ||||||
84 | !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); | ||||||
85 | MFReturnAddr = Register(); | ||||||
86 | |||||||
87 | processPHIs(MF); | ||||||
88 | } | ||||||
89 | |||||||
90 | private: | ||||||
91 | /// tblgen-erated 'select' implementation, used as the initial selector for | ||||||
92 | /// the patterns that don't require complex C++. | ||||||
93 | bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; | ||||||
94 | |||||||
95 | // A lowering phase that runs before any selection attempts. | ||||||
96 | // Returns true if the instruction was modified. | ||||||
97 | bool preISelLower(MachineInstr &I); | ||||||
98 | |||||||
99 | // An early selection function that runs before the selectImpl() call. | ||||||
100 | bool earlySelect(MachineInstr &I); | ||||||
101 | |||||||
102 | // Do some preprocessing of G_PHIs before we begin selection. | ||||||
103 | void processPHIs(MachineFunction &MF); | ||||||
104 | |||||||
105 | bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
106 | |||||||
107 | /// Eliminate same-sized cross-bank copies into stores before selectImpl(). | ||||||
108 | bool contractCrossBankCopyIntoStore(MachineInstr &I, | ||||||
109 | MachineRegisterInfo &MRI); | ||||||
110 | |||||||
111 | bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
112 | |||||||
113 | bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, | ||||||
114 | MachineRegisterInfo &MRI) const; | ||||||
115 | bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, | ||||||
116 | MachineRegisterInfo &MRI) const; | ||||||
117 | |||||||
118 | ///@{ | ||||||
119 | /// Helper functions for selectCompareBranch. | ||||||
120 | bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp, | ||||||
121 | MachineIRBuilder &MIB) const; | ||||||
122 | bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, | ||||||
123 | MachineIRBuilder &MIB) const; | ||||||
124 | bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, | ||||||
125 | MachineIRBuilder &MIB) const; | ||||||
126 | bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert, | ||||||
127 | MachineBasicBlock *DstMBB, | ||||||
128 | MachineIRBuilder &MIB) const; | ||||||
129 | ///@} | ||||||
130 | |||||||
131 | bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, | ||||||
132 | MachineRegisterInfo &MRI); | ||||||
133 | |||||||
134 | bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
135 | bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
136 | |||||||
137 | // Helper to generate an equivalent of scalar_to_vector into a new register, | ||||||
138 | // returned via 'Dst'. | ||||||
139 | MachineInstr *emitScalarToVector(unsigned EltSize, | ||||||
140 | const TargetRegisterClass *DstRC, | ||||||
141 | Register Scalar, | ||||||
142 | MachineIRBuilder &MIRBuilder) const; | ||||||
143 | |||||||
144 | /// Emit a lane insert into \p DstReg, or a new vector register if None is | ||||||
145 | /// provided. | ||||||
146 | /// | ||||||
147 | /// The lane inserted into is defined by \p LaneIdx. The vector source | ||||||
148 | /// register is given by \p SrcReg. The register containing the element is | ||||||
149 | /// given by \p EltReg. | ||||||
150 | MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg, | ||||||
151 | Register EltReg, unsigned LaneIdx, | ||||||
152 | const RegisterBank &RB, | ||||||
153 | MachineIRBuilder &MIRBuilder) const; | ||||||
154 | |||||||
155 | /// Emit a sequence of instructions representing a constant \p CV for a | ||||||
156 | /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.) | ||||||
157 | /// | ||||||
158 | /// \returns the last instruction in the sequence on success, and nullptr | ||||||
159 | /// otherwise. | ||||||
160 | MachineInstr *emitConstantVector(Register Dst, Constant *CV, | ||||||
161 | MachineIRBuilder &MIRBuilder, | ||||||
162 | MachineRegisterInfo &MRI); | ||||||
163 | |||||||
164 | bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
165 | bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, | ||||||
166 | MachineRegisterInfo &MRI); | ||||||
167 | /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a | ||||||
168 | /// SUBREG_TO_REG. | ||||||
169 | bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI); | ||||||
170 | bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
171 | bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
172 | bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
173 | |||||||
174 | bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
175 | bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
176 | bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
177 | bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
178 | |||||||
179 | /// Helper function to select vector load intrinsics like | ||||||
180 | /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc. | ||||||
181 | /// \p Opc is the opcode that the selected instruction should use. | ||||||
182 | /// \p NumVecs is the number of vector destinations for the instruction. | ||||||
183 | /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction. | ||||||
184 | bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs, | ||||||
185 | MachineInstr &I); | ||||||
186 | bool selectIntrinsicWithSideEffects(MachineInstr &I, | ||||||
187 | MachineRegisterInfo &MRI); | ||||||
188 | bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
189 | bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
190 | bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const; | ||||||
191 | bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const; | ||||||
192 | bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
193 | bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
194 | bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
195 | bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI); | ||||||
196 | |||||||
197 | unsigned emitConstantPoolEntry(const Constant *CPVal, | ||||||
198 | MachineFunction &MF) const; | ||||||
199 | MachineInstr *emitLoadFromConstantPool(const Constant *CPVal, | ||||||
200 | MachineIRBuilder &MIRBuilder) const; | ||||||
201 | |||||||
202 | // Emit a vector concat operation. | ||||||
203 | MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1, | ||||||
204 | Register Op2, | ||||||
205 | MachineIRBuilder &MIRBuilder) const; | ||||||
206 | |||||||
207 | // Emit an integer compare between LHS and RHS, which checks for Predicate. | ||||||
208 | MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, | ||||||
209 | MachineOperand &Predicate, | ||||||
210 | MachineIRBuilder &MIRBuilder) const; | ||||||
211 | |||||||
212 | /// Emit a floating point comparison between \p LHS and \p RHS. | ||||||
213 | /// \p Pred if given is the intended predicate to use. | ||||||
214 | MachineInstr *emitFPCompare(Register LHS, Register RHS, | ||||||
215 | MachineIRBuilder &MIRBuilder, | ||||||
216 | Optional<CmpInst::Predicate> = None) const; | ||||||
217 | |||||||
218 | MachineInstr *emitInstr(unsigned Opcode, | ||||||
219 | std::initializer_list<llvm::DstOp> DstOps, | ||||||
220 | std::initializer_list<llvm::SrcOp> SrcOps, | ||||||
221 | MachineIRBuilder &MIRBuilder, | ||||||
222 | const ComplexRendererFns &RenderFns = None) const; | ||||||
223 | /// Helper function to emit an add or sub instruction. | ||||||
224 | /// | ||||||
225 | /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above | ||||||
226 | /// in a specific order. | ||||||
227 | /// | ||||||
228 | /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode. | ||||||
229 | /// | ||||||
230 | /// \code | ||||||
231 | /// const std::array<std::array<unsigned, 2>, 4> Table { | ||||||
232 | /// {{AArch64::ADDXri, AArch64::ADDWri}, | ||||||
233 | /// {AArch64::ADDXrs, AArch64::ADDWrs}, | ||||||
234 | /// {AArch64::ADDXrr, AArch64::ADDWrr}, | ||||||
235 | /// {AArch64::SUBXri, AArch64::SUBWri}, | ||||||
236 | /// {AArch64::ADDXrx, AArch64::ADDWrx}}}; | ||||||
237 | /// \endcode | ||||||
238 | /// | ||||||
239 | /// Each row in the table corresponds to a different addressing mode. Each | ||||||
240 | /// column corresponds to a different register size. | ||||||
241 | /// | ||||||
242 | /// \attention Rows must be structured as follows: | ||||||
243 | /// - Row 0: The ri opcode variants | ||||||
244 | /// - Row 1: The rs opcode variants | ||||||
245 | /// - Row 2: The rr opcode variants | ||||||
246 | /// - Row 3: The ri opcode variants for negative immediates | ||||||
247 | /// - Row 4: The rx opcode variants | ||||||
248 | /// | ||||||
249 | /// \attention Columns must be structured as follows: | ||||||
250 | /// - Column 0: The 64-bit opcode variants | ||||||
251 | /// - Column 1: The 32-bit opcode variants | ||||||
252 | /// | ||||||
253 | /// \p Dst is the destination register of the binop to emit. | ||||||
254 | /// \p LHS is the left-hand operand of the binop to emit. | ||||||
255 | /// \p RHS is the right-hand operand of the binop to emit. | ||||||
256 | MachineInstr *emitAddSub( | ||||||
257 | const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, | ||||||
258 | Register Dst, MachineOperand &LHS, MachineOperand &RHS, | ||||||
259 | MachineIRBuilder &MIRBuilder) const; | ||||||
260 | MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, | ||||||
261 | MachineOperand &RHS, | ||||||
262 | MachineIRBuilder &MIRBuilder) const; | ||||||
263 | MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, | ||||||
264 | MachineIRBuilder &MIRBuilder) const; | ||||||
265 | MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, | ||||||
266 | MachineIRBuilder &MIRBuilder) const; | ||||||
267 | MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, | ||||||
268 | MachineIRBuilder &MIRBuilder) const; | ||||||
269 | MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, | ||||||
270 | MachineIRBuilder &MIRBuilder) const; | ||||||
271 | MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS, | ||||||
272 | AArch64CC::CondCode CC, | ||||||
273 | MachineIRBuilder &MIRBuilder) const; | ||||||
274 | MachineInstr *emitExtractVectorElt(Optional<Register> DstReg, | ||||||
275 | const RegisterBank &DstRB, LLT ScalarTy, | ||||||
276 | Register VecReg, unsigned LaneIdx, | ||||||
277 | MachineIRBuilder &MIRBuilder) const; | ||||||
278 | |||||||
279 | /// Emit a CSet for an integer compare. | ||||||
280 | /// | ||||||
281 | /// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers. | ||||||
282 | MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred, | ||||||
283 | MachineIRBuilder &MIRBuilder, | ||||||
284 | Register SrcReg = AArch64::WZR) const; | ||||||
285 | /// Emit a CSet for a FP compare. | ||||||
286 | /// | ||||||
287 | /// \p Dst is expected to be a 32-bit scalar register. | ||||||
288 | MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, | ||||||
289 | MachineIRBuilder &MIRBuilder) const; | ||||||
290 | |||||||
291 | /// Emit the overflow op for \p Opcode. | ||||||
292 | /// | ||||||
293 | /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, | ||||||
294 | /// G_USUBO, etc. | ||||||
295 | std::pair<MachineInstr *, AArch64CC::CondCode> | ||||||
296 | emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, | ||||||
297 | MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; | ||||||
298 | |||||||
299 | /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. | ||||||
300 | /// \p IsNegative is true if the test should be "not zero". | ||||||
301 | /// This will also optimize the test bit instruction when possible. | ||||||
302 | MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative, | ||||||
303 | MachineBasicBlock *DstMBB, | ||||||
304 | MachineIRBuilder &MIB) const; | ||||||
305 | |||||||
306 | /// Emit a CB(N)Z instruction which branches to \p DestMBB. | ||||||
307 | MachineInstr *emitCBZ(Register CompareReg, bool IsNegative, | ||||||
308 | MachineBasicBlock *DestMBB, | ||||||
309 | MachineIRBuilder &MIB) const; | ||||||
310 | |||||||
311 | // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. | ||||||
312 | // We use these manually instead of using the importer since it doesn't | ||||||
313 | // support SDNodeXForm. | ||||||
314 | ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const; | ||||||
315 | ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const; | ||||||
316 | ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; | ||||||
317 | ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; | ||||||
318 | |||||||
319 | ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; | ||||||
320 | ComplexRendererFns selectArithImmed(MachineOperand &Root) const; | ||||||
321 | ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; | ||||||
322 | |||||||
323 | ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, | ||||||
324 | unsigned Size) const; | ||||||
325 | |||||||
326 | ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { | ||||||
327 | return selectAddrModeUnscaled(Root, 1); | ||||||
328 | } | ||||||
329 | ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { | ||||||
330 | return selectAddrModeUnscaled(Root, 2); | ||||||
331 | } | ||||||
332 | ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { | ||||||
333 | return selectAddrModeUnscaled(Root, 4); | ||||||
334 | } | ||||||
335 | ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { | ||||||
336 | return selectAddrModeUnscaled(Root, 8); | ||||||
337 | } | ||||||
338 | ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { | ||||||
339 | return selectAddrModeUnscaled(Root, 16); | ||||||
340 | } | ||||||
341 | |||||||
342 | /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used | ||||||
343 | /// from complex pattern matchers like selectAddrModeIndexed(). | ||||||
344 | ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size, | ||||||
345 | MachineRegisterInfo &MRI) const; | ||||||
346 | |||||||
347 | ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, | ||||||
348 | unsigned Size) const; | ||||||
349 | template <int Width> | ||||||
350 | ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { | ||||||
351 | return selectAddrModeIndexed(Root, Width / 8); | ||||||
352 | } | ||||||
353 | |||||||
354 | bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, | ||||||
355 | const MachineRegisterInfo &MRI) const; | ||||||
356 | ComplexRendererFns | ||||||
357 | selectAddrModeShiftedExtendXReg(MachineOperand &Root, | ||||||
358 | unsigned SizeInBytes) const; | ||||||
359 | |||||||
360 | /// Returns a \p ComplexRendererFns which contains a base, offset, and whether | ||||||
361 | /// or not a shift + extend should be folded into an addressing mode. Returns | ||||||
362 | /// None when this is not profitable or possible. | ||||||
363 | ComplexRendererFns | ||||||
364 | selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, | ||||||
365 | MachineOperand &Offset, unsigned SizeInBytes, | ||||||
366 | bool WantsExt) const; | ||||||
367 | ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; | ||||||
368 | ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, | ||||||
369 | unsigned SizeInBytes) const; | ||||||
370 | template <int Width> | ||||||
371 | ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { | ||||||
372 | return selectAddrModeXRO(Root, Width / 8); | ||||||
373 | } | ||||||
374 | |||||||
375 | ComplexRendererFns selectAddrModeWRO(MachineOperand &Root, | ||||||
376 | unsigned SizeInBytes) const; | ||||||
377 | template <int Width> | ||||||
378 | ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const { | ||||||
379 | return selectAddrModeWRO(Root, Width / 8); | ||||||
380 | } | ||||||
381 | |||||||
382 | ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const; | ||||||
383 | |||||||
384 | ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { | ||||||
385 | return selectShiftedRegister(Root); | ||||||
386 | } | ||||||
387 | |||||||
388 | ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { | ||||||
389 | // TODO: selectShiftedRegister should allow for rotates on logical shifts. | ||||||
390 | // For now, make them the same. The only difference between the two is that | ||||||
391 | // logical shifts are allowed to fold in rotates. Otherwise, these are | ||||||
392 | // functionally the same. | ||||||
393 | return selectShiftedRegister(Root); | ||||||
394 | } | ||||||
395 | |||||||
396 | /// Given an extend instruction, determine the correct shift-extend type for | ||||||
397 | /// that instruction. | ||||||
398 | /// | ||||||
399 | /// If the instruction is going to be used in a load or store, pass | ||||||
400 | /// \p IsLoadStore = true. | ||||||
401 | AArch64_AM::ShiftExtendType | ||||||
402 | getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, | ||||||
403 | bool IsLoadStore = false) const; | ||||||
404 | |||||||
405 | /// Move \p Reg to \p RC if \p Reg is not already on \p RC. | ||||||
406 | /// | ||||||
407 | /// \returns Either \p Reg if no change was necessary, or the new register | ||||||
408 | /// created by moving \p Reg. | ||||||
409 | /// | ||||||
410 | /// Note: This uses emitCopy right now. | ||||||
411 | Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC, | ||||||
412 | MachineIRBuilder &MIB) const; | ||||||
413 | |||||||
414 | ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; | ||||||
415 | |||||||
416 | void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, | ||||||
417 | int OpIdx = -1) const; | ||||||
418 | void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I, | ||||||
419 | int OpIdx = -1) const; | ||||||
420 | void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I, | ||||||
421 | int OpIdx = -1) const; | ||||||
422 | void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI, | ||||||
423 | int OpIdx = -1) const; | ||||||
424 | void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, | ||||||
425 | int OpIdx = -1) const; | ||||||
426 | void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI, | ||||||
427 | int OpIdx = -1) const; | ||||||
428 | |||||||
429 | // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. | ||||||
430 | void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags); | ||||||
431 | |||||||
432 | // Optimization methods. | ||||||
433 | bool tryOptSelect(MachineInstr &MI); | ||||||
434 | MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, | ||||||
435 | MachineOperand &Predicate, | ||||||
436 | MachineIRBuilder &MIRBuilder) const; | ||||||
437 | |||||||
438 | /// Return true if \p MI is a load or store of \p NumBytes bytes. | ||||||
439 | bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; | ||||||
440 | |||||||
441 | /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit | ||||||
442 | /// register zeroed out. In other words, the result of MI has been explicitly | ||||||
443 | /// zero extended. | ||||||
444 | bool isDef32(const MachineInstr &MI) const; | ||||||
445 | |||||||
446 | const AArch64TargetMachine &TM; | ||||||
447 | const AArch64Subtarget &STI; | ||||||
448 | const AArch64InstrInfo &TII; | ||||||
449 | const AArch64RegisterInfo &TRI; | ||||||
450 | const AArch64RegisterBankInfo &RBI; | ||||||
451 | |||||||
452 | bool ProduceNonFlagSettingCondBr = false; | ||||||
453 | |||||||
454 | // Some cached values used during selection. | ||||||
455 | // We use LR as a live-in register, and we keep track of it here as it can be | ||||||
456 | // clobbered by calls. | ||||||
457 | Register MFReturnAddr; | ||||||
458 | |||||||
459 | MachineIRBuilder MIB; | ||||||
460 | |||||||
461 | #define GET_GLOBALISEL_PREDICATES_DECL | ||||||
462 | #include "AArch64GenGlobalISel.inc" | ||||||
463 | #undef GET_GLOBALISEL_PREDICATES_DECL | ||||||
464 | |||||||
465 | // We declare the temporaries used by selectImpl() in the class to minimize the | ||||||
466 | // cost of constructing placeholder values. | ||||||
467 | #define GET_GLOBALISEL_TEMPORARIES_DECL | ||||||
468 | #include "AArch64GenGlobalISel.inc" | ||||||
469 | #undef GET_GLOBALISEL_TEMPORARIES_DECL | ||||||
470 | }; | ||||||
471 | |||||||
472 | } // end anonymous namespace | ||||||
473 | |||||||
474 | #define GET_GLOBALISEL_IMPL | ||||||
475 | #include "AArch64GenGlobalISel.inc" | ||||||
476 | #undef GET_GLOBALISEL_IMPL | ||||||
477 | |||||||
478 | AArch64InstructionSelector::AArch64InstructionSelector( | ||||||
479 | const AArch64TargetMachine &TM, const AArch64Subtarget &STI, | ||||||
480 | const AArch64RegisterBankInfo &RBI) | ||||||
481 | : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), | ||||||
482 | TRI(*STI.getRegisterInfo()), RBI(RBI), | ||||||
483 | #define GET_GLOBALISEL_PREDICATES_INIT | ||||||
484 | #include "AArch64GenGlobalISel.inc" | ||||||
485 | #undef GET_GLOBALISEL_PREDICATES_INIT | ||||||
486 | #define GET_GLOBALISEL_TEMPORARIES_INIT | ||||||
487 | #include "AArch64GenGlobalISel.inc" | ||||||
488 | #undef GET_GLOBALISEL_TEMPORARIES_INIT | ||||||
489 | { | ||||||
490 | } | ||||||
491 | |||||||
492 | // FIXME: This should be target-independent, inferred from the types declared | ||||||
493 | // for each class in the bank. | ||||||
494 | static const TargetRegisterClass * | ||||||
495 | getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, | ||||||
496 | const RegisterBankInfo &RBI, | ||||||
497 | bool GetAllRegSet = false) { | ||||||
498 | if (RB.getID() == AArch64::GPRRegBankID) { | ||||||
499 | if (Ty.getSizeInBits() <= 32) | ||||||
500 | return GetAllRegSet ? &AArch64::GPR32allRegClass | ||||||
501 | : &AArch64::GPR32RegClass; | ||||||
502 | if (Ty.getSizeInBits() == 64) | ||||||
503 | return GetAllRegSet ? &AArch64::GPR64allRegClass | ||||||
504 | : &AArch64::GPR64RegClass; | ||||||
505 | if (Ty.getSizeInBits() == 128) | ||||||
506 | return &AArch64::XSeqPairsClassRegClass; | ||||||
507 | return nullptr; | ||||||
508 | } | ||||||
509 | |||||||
510 | if (RB.getID() == AArch64::FPRRegBankID) { | ||||||
511 | switch (Ty.getSizeInBits()) { | ||||||
512 | case 8: | ||||||
513 | return &AArch64::FPR8RegClass; | ||||||
514 | case 16: | ||||||
515 | return &AArch64::FPR16RegClass; | ||||||
516 | case 32: | ||||||
517 | return &AArch64::FPR32RegClass; | ||||||
518 | case 64: | ||||||
519 | return &AArch64::FPR64RegClass; | ||||||
520 | case 128: | ||||||
521 | return &AArch64::FPR128RegClass; | ||||||
522 | } | ||||||
523 | return nullptr; | ||||||
524 | } | ||||||
525 | |||||||
526 | return nullptr; | ||||||
527 | } | ||||||
528 | |||||||
529 | /// Given a register bank, and size in bits, return the smallest register class | ||||||
530 | /// that can represent that combination. | ||||||
531 | static const TargetRegisterClass * | ||||||
532 | getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, | ||||||
533 | bool GetAllRegSet = false) { | ||||||
534 | unsigned RegBankID = RB.getID(); | ||||||
535 | |||||||
536 | if (RegBankID == AArch64::GPRRegBankID) { | ||||||
537 | if (SizeInBits <= 32) | ||||||
538 | return GetAllRegSet ? &AArch64::GPR32allRegClass | ||||||
539 | : &AArch64::GPR32RegClass; | ||||||
540 | if (SizeInBits == 64) | ||||||
541 | return GetAllRegSet ? &AArch64::GPR64allRegClass | ||||||
542 | : &AArch64::GPR64RegClass; | ||||||
543 | if (SizeInBits == 128) | ||||||
544 | return &AArch64::XSeqPairsClassRegClass; | ||||||
545 | } | ||||||
546 | |||||||
547 | if (RegBankID == AArch64::FPRRegBankID) { | ||||||
548 | switch (SizeInBits) { | ||||||
549 | default: | ||||||
550 | return nullptr; | ||||||
551 | case 8: | ||||||
552 | return &AArch64::FPR8RegClass; | ||||||
553 | case 16: | ||||||
554 | return &AArch64::FPR16RegClass; | ||||||
555 | case 32: | ||||||
556 | return &AArch64::FPR32RegClass; | ||||||
557 | case 64: | ||||||
558 | return &AArch64::FPR64RegClass; | ||||||
559 | case 128: | ||||||
560 | return &AArch64::FPR128RegClass; | ||||||
561 | } | ||||||
562 | } | ||||||
563 | |||||||
564 | return nullptr; | ||||||
565 | } | ||||||
566 | |||||||
567 | /// Returns the correct subregister to use for a given register class. | ||||||
568 | static bool getSubRegForClass(const TargetRegisterClass *RC, | ||||||
569 | const TargetRegisterInfo &TRI, unsigned &SubReg) { | ||||||
570 | switch (TRI.getRegSizeInBits(*RC)) { | ||||||
571 | case 8: | ||||||
572 | SubReg = AArch64::bsub; | ||||||
573 | break; | ||||||
574 | case 16: | ||||||
575 | SubReg = AArch64::hsub; | ||||||
576 | break; | ||||||
577 | case 32: | ||||||
578 | if (RC != &AArch64::FPR32RegClass) | ||||||
579 | SubReg = AArch64::sub_32; | ||||||
580 | else | ||||||
581 | SubReg = AArch64::ssub; | ||||||
582 | break; | ||||||
583 | case 64: | ||||||
584 | SubReg = AArch64::dsub; | ||||||
585 | break; | ||||||
586 | default: | ||||||
587 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class." ; } } while (false) | ||||||
588 | dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class." ; } } while (false); | ||||||
589 | return false; | ||||||
590 | } | ||||||
591 | |||||||
592 | return true; | ||||||
593 | } | ||||||
594 | |||||||
595 | /// Returns the minimum size the given register bank can hold. | ||||||
596 | static unsigned getMinSizeForRegBank(const RegisterBank &RB) { | ||||||
597 | switch (RB.getID()) { | ||||||
598 | case AArch64::GPRRegBankID: | ||||||
599 | return 32; | ||||||
600 | case AArch64::FPRRegBankID: | ||||||
601 | return 8; | ||||||
602 | default: | ||||||
603 | llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank." , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 603); | ||||||
604 | } | ||||||
605 | } | ||||||
606 | |||||||
607 | /// Create a REG_SEQUENCE instruction using the registers in \p Regs. | ||||||
608 | /// Helper function for functions like createDTuple and createQTuple. | ||||||
609 | /// | ||||||
610 | /// \p RegClassIDs - The list of register class IDs available for some tuple of | ||||||
611 | /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is | ||||||
612 | /// expected to contain between 2 and 4 tuple classes. | ||||||
613 | /// | ||||||
614 | /// \p SubRegs - The list of subregister classes associated with each register | ||||||
615 | /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0 | ||||||
616 | /// subregister class. The index of each subregister class is expected to | ||||||
617 | /// correspond with the index of each register class. | ||||||
618 | /// | ||||||
619 | /// \returns Either the destination register of REG_SEQUENCE instruction that | ||||||
620 | /// was created, or the 0th element of \p Regs if \p Regs contains a single | ||||||
621 | /// element. | ||||||
622 | static Register createTuple(ArrayRef<Register> Regs, | ||||||
623 | const unsigned RegClassIDs[], | ||||||
624 | const unsigned SubRegs[], MachineIRBuilder &MIB) { | ||||||
625 | unsigned NumRegs = Regs.size(); | ||||||
626 | if (NumRegs == 1) | ||||||
627 | return Regs[0]; | ||||||
628 | assert(NumRegs >= 2 && NumRegs <= 4 &&(static_cast <bool> (NumRegs >= 2 && NumRegs <= 4 && "Only support between two and 4 registers in a tuple!" ) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 629, __extension__ __PRETTY_FUNCTION__)) | ||||||
629 | "Only support between two and 4 registers in a tuple!")(static_cast <bool> (NumRegs >= 2 && NumRegs <= 4 && "Only support between two and 4 registers in a tuple!" ) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 629, __extension__ __PRETTY_FUNCTION__)); | ||||||
630 | const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo(); | ||||||
631 | auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]); | ||||||
632 | auto RegSequence = | ||||||
633 | MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {}); | ||||||
634 | for (unsigned I = 0, E = Regs.size(); I < E; ++I) { | ||||||
635 | RegSequence.addUse(Regs[I]); | ||||||
636 | RegSequence.addImm(SubRegs[I]); | ||||||
637 | } | ||||||
638 | return RegSequence.getReg(0); | ||||||
639 | } | ||||||
640 | |||||||
641 | /// Create a tuple of D-registers using the registers in \p Regs. | ||||||
642 | static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { | ||||||
643 | static const unsigned RegClassIDs[] = { | ||||||
644 | AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; | ||||||
645 | static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, | ||||||
646 | AArch64::dsub2, AArch64::dsub3}; | ||||||
647 | return createTuple(Regs, RegClassIDs, SubRegs, MIB); | ||||||
648 | } | ||||||
649 | |||||||
650 | /// Create a tuple of Q-registers using the registers in \p Regs. | ||||||
651 | static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { | ||||||
652 | static const unsigned RegClassIDs[] = { | ||||||
653 | AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; | ||||||
654 | static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, | ||||||
655 | AArch64::qsub2, AArch64::qsub3}; | ||||||
656 | return createTuple(Regs, RegClassIDs, SubRegs, MIB); | ||||||
657 | } | ||||||
658 | |||||||
659 | static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { | ||||||
660 | auto &MI = *Root.getParent(); | ||||||
661 | auto &MBB = *MI.getParent(); | ||||||
662 | auto &MF = *MBB.getParent(); | ||||||
663 | auto &MRI = MF.getRegInfo(); | ||||||
664 | uint64_t Immed; | ||||||
665 | if (Root.isImm()) | ||||||
666 | Immed = Root.getImm(); | ||||||
667 | else if (Root.isCImm()) | ||||||
668 | Immed = Root.getCImm()->getZExtValue(); | ||||||
669 | else if (Root.isReg()) { | ||||||
670 | auto ValAndVReg = | ||||||
671 | getConstantVRegValWithLookThrough(Root.getReg(), MRI, true); | ||||||
672 | if (!ValAndVReg) | ||||||
673 | return None; | ||||||
674 | Immed = ValAndVReg->Value.getSExtValue(); | ||||||
675 | } else | ||||||
676 | return None; | ||||||
677 | return Immed; | ||||||
678 | } | ||||||
679 | |||||||
680 | /// Check whether \p I is a currently unsupported binary operation: | ||||||
681 | /// - it has an unsized type | ||||||
682 | /// - an operand is not a vreg | ||||||
683 | /// - all operands are not in the same bank | ||||||
684 | /// These are checks that should someday live in the verifier, but right now, | ||||||
685 | /// these are mostly limitations of the aarch64 selector. | ||||||
686 | static bool unsupportedBinOp(const MachineInstr &I, | ||||||
687 | const AArch64RegisterBankInfo &RBI, | ||||||
688 | const MachineRegisterInfo &MRI, | ||||||
689 | const AArch64RegisterInfo &TRI) { | ||||||
690 | LLT Ty = MRI.getType(I.getOperand(0).getReg()); | ||||||
691 | if (!Ty.isValid()) { | ||||||
692 | LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n" ; } } while (false); | ||||||
693 | return true; | ||||||
694 | } | ||||||
695 | |||||||
696 | const RegisterBank *PrevOpBank = nullptr; | ||||||
697 | for (auto &MO : I.operands()) { | ||||||
698 | // FIXME: Support non-register operands. | ||||||
699 | if (!MO.isReg()) { | ||||||
700 | LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n" ; } } while (false); | ||||||
701 | return true; | ||||||
702 | } | ||||||
703 | |||||||
704 | // FIXME: Can generic operations have physical registers operands? If | ||||||
705 | // so, this will need to be taught about that, and we'll need to get the | ||||||
706 | // bank out of the minimal class for the register. | ||||||
707 | // Either way, this needs to be documented (and possibly verified). | ||||||
708 | if (!Register::isVirtualRegister(MO.getReg())) { | ||||||
709 | LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n" ; } } while (false); | ||||||
710 | return true; | ||||||
711 | } | ||||||
712 | |||||||
713 | const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI); | ||||||
714 | if (!OpBank) { | ||||||
715 | LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n" ; } } while (false); | ||||||
716 | return true; | ||||||
717 | } | ||||||
718 | |||||||
719 | if (PrevOpBank && OpBank != PrevOpBank) { | ||||||
720 | LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n" ; } } while (false); | ||||||
721 | return true; | ||||||
722 | } | ||||||
723 | PrevOpBank = OpBank; | ||||||
724 | } | ||||||
725 | return false; | ||||||
726 | } | ||||||
727 | |||||||
728 | /// Select the AArch64 opcode for the basic binary operation \p GenericOpc | ||||||
729 | /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID | ||||||
730 | /// and of size \p OpSize. | ||||||
731 | /// \returns \p GenericOpc if the combination is unsupported. | ||||||
732 | static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, | ||||||
733 | unsigned OpSize) { | ||||||
734 | switch (RegBankID) { | ||||||
735 | case AArch64::GPRRegBankID: | ||||||
736 | if (OpSize == 32) { | ||||||
737 | switch (GenericOpc) { | ||||||
738 | case TargetOpcode::G_SHL: | ||||||
739 | return AArch64::LSLVWr; | ||||||
740 | case TargetOpcode::G_LSHR: | ||||||
741 | return AArch64::LSRVWr; | ||||||
742 | case TargetOpcode::G_ASHR: | ||||||
743 | return AArch64::ASRVWr; | ||||||
744 | default: | ||||||
745 | return GenericOpc; | ||||||
746 | } | ||||||
747 | } else if (OpSize == 64) { | ||||||
748 | switch (GenericOpc) { | ||||||
749 | case TargetOpcode::G_PTR_ADD: | ||||||
750 | return AArch64::ADDXrr; | ||||||
751 | case TargetOpcode::G_SHL: | ||||||
752 | return AArch64::LSLVXr; | ||||||
753 | case TargetOpcode::G_LSHR: | ||||||
754 | return AArch64::LSRVXr; | ||||||
755 | case TargetOpcode::G_ASHR: | ||||||
756 | return AArch64::ASRVXr; | ||||||
757 | default: | ||||||
758 | return GenericOpc; | ||||||
759 | } | ||||||
760 | } | ||||||
761 | break; | ||||||
762 | case AArch64::FPRRegBankID: | ||||||
763 | switch (OpSize) { | ||||||
764 | case 32: | ||||||
765 | switch (GenericOpc) { | ||||||
766 | case TargetOpcode::G_FADD: | ||||||
767 | return AArch64::FADDSrr; | ||||||
768 | case TargetOpcode::G_FSUB: | ||||||
769 | return AArch64::FSUBSrr; | ||||||
770 | case TargetOpcode::G_FMUL: | ||||||
771 | return AArch64::FMULSrr; | ||||||
772 | case TargetOpcode::G_FDIV: | ||||||
773 | return AArch64::FDIVSrr; | ||||||
774 | default: | ||||||
775 | return GenericOpc; | ||||||
776 | } | ||||||
777 | case 64: | ||||||
778 | switch (GenericOpc) { | ||||||
779 | case TargetOpcode::G_FADD: | ||||||
780 | return AArch64::FADDDrr; | ||||||
781 | case TargetOpcode::G_FSUB: | ||||||
782 | return AArch64::FSUBDrr; | ||||||
783 | case TargetOpcode::G_FMUL: | ||||||
784 | return AArch64::FMULDrr; | ||||||
785 | case TargetOpcode::G_FDIV: | ||||||
786 | return AArch64::FDIVDrr; | ||||||
787 | case TargetOpcode::G_OR: | ||||||
788 | return AArch64::ORRv8i8; | ||||||
789 | default: | ||||||
790 | return GenericOpc; | ||||||
791 | } | ||||||
792 | } | ||||||
793 | break; | ||||||
794 | } | ||||||
795 | return GenericOpc; | ||||||
796 | } | ||||||
797 | |||||||
798 | /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, | ||||||
799 | /// appropriate for the (value) register bank \p RegBankID and of memory access | ||||||
800 | /// size \p OpSize. This returns the variant with the base+unsigned-immediate | ||||||
801 | /// addressing mode (e.g., LDRXui). | ||||||
802 | /// \returns \p GenericOpc if the combination is unsupported. | ||||||
803 | static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, | ||||||
804 | unsigned OpSize) { | ||||||
805 | const bool isStore = GenericOpc == TargetOpcode::G_STORE; | ||||||
806 | switch (RegBankID) { | ||||||
807 | case AArch64::GPRRegBankID: | ||||||
808 | switch (OpSize) { | ||||||
809 | case 8: | ||||||
810 | return isStore ? AArch64::STRBBui : AArch64::LDRBBui; | ||||||
811 | case 16: | ||||||
812 | return isStore ? AArch64::STRHHui : AArch64::LDRHHui; | ||||||
813 | case 32: | ||||||
814 | return isStore ? AArch64::STRWui : AArch64::LDRWui; | ||||||
815 | case 64: | ||||||
816 | return isStore ? AArch64::STRXui : AArch64::LDRXui; | ||||||
817 | } | ||||||
818 | break; | ||||||
819 | case AArch64::FPRRegBankID: | ||||||
820 | switch (OpSize) { | ||||||
821 | case 8: | ||||||
822 | return isStore ? AArch64::STRBui : AArch64::LDRBui; | ||||||
823 | case 16: | ||||||
824 | return isStore ? AArch64::STRHui : AArch64::LDRHui; | ||||||
825 | case 32: | ||||||
826 | return isStore ? AArch64::STRSui : AArch64::LDRSui; | ||||||
827 | case 64: | ||||||
828 | return isStore ? AArch64::STRDui : AArch64::LDRDui; | ||||||
829 | } | ||||||
830 | break; | ||||||
831 | } | ||||||
832 | return GenericOpc; | ||||||
833 | } | ||||||
834 | |||||||
835 | #ifndef NDEBUG | ||||||
836 | /// Helper function that verifies that we have a valid copy at the end of | ||||||
837 | /// selectCopy. Verifies that the source and dest have the expected sizes and | ||||||
838 | /// then returns true. | ||||||
839 | static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, | ||||||
840 | const MachineRegisterInfo &MRI, | ||||||
841 | const TargetRegisterInfo &TRI, | ||||||
842 | const RegisterBankInfo &RBI) { | ||||||
843 | const Register DstReg = I.getOperand(0).getReg(); | ||||||
844 | const Register SrcReg = I.getOperand(1).getReg(); | ||||||
845 | const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); | ||||||
846 | const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); | ||||||
847 | |||||||
848 | // Make sure the size of the source and dest line up. | ||||||
849 | assert((static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 858, __extension__ __PRETTY_FUNCTION__)) | ||||||
850 | (DstSize == SrcSize ||(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 858, __extension__ __PRETTY_FUNCTION__)) | ||||||
851 | // Copies are a mean to setup initial types, the number of(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 858, __extension__ __PRETTY_FUNCTION__)) | ||||||
852 | // bits may not exactly match.(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 858, __extension__ __PRETTY_FUNCTION__)) | ||||||
853 | (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 858, __extension__ __PRETTY_FUNCTION__)) | ||||||
854 | // Copies are a mean to copy bits around, as long as we are(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 858, __extension__ __PRETTY_FUNCTION__)) | ||||||
855 | // on the same register class, that's fine. Otherwise, that(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 858, __extension__ __PRETTY_FUNCTION__)) | ||||||
856 | // means we need some SUBREG_TO_REG or AND & co.(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 858, __extension__ __PRETTY_FUNCTION__)) | ||||||
857 | (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 858, __extension__ __PRETTY_FUNCTION__)) | ||||||
858 | "Copy with different width?!")(static_cast <bool> ((DstSize == SrcSize || (Register:: isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && "Copy with different width?!") ? void (0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 858, __extension__ __PRETTY_FUNCTION__)); | ||||||
859 | |||||||
860 | // Check the size of the destination. | ||||||
861 | assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&(static_cast <bool> ((DstSize <= 64 || DstBank.getID () == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values" ) ? void (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 862, __extension__ __PRETTY_FUNCTION__)) | ||||||
862 | "GPRs cannot get more than 64-bit width values")(static_cast <bool> ((DstSize <= 64 || DstBank.getID () == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values" ) ? void (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 862, __extension__ __PRETTY_FUNCTION__)); | ||||||
863 | |||||||
864 | return true; | ||||||
865 | } | ||||||
866 | #endif | ||||||
867 | |||||||
868 | /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg | ||||||
869 | /// to \p *To. | ||||||
870 | /// | ||||||
871 | /// E.g "To = COPY SrcReg:SubReg" | ||||||
872 | static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, | ||||||
873 | const RegisterBankInfo &RBI, Register SrcReg, | ||||||
874 | const TargetRegisterClass *To, unsigned SubReg) { | ||||||
875 | assert(SrcReg.isValid() && "Expected a valid source register?")(static_cast <bool> (SrcReg.isValid() && "Expected a valid source register?" ) ? void (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 875, __extension__ __PRETTY_FUNCTION__)); | ||||||
876 | assert(To && "Destination register class cannot be null")(static_cast <bool> (To && "Destination register class cannot be null" ) ? void (0) : __assert_fail ("To && \"Destination register class cannot be null\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 876, __extension__ __PRETTY_FUNCTION__)); | ||||||
877 | assert(SubReg && "Expected a valid subregister")(static_cast <bool> (SubReg && "Expected a valid subregister" ) ? void (0) : __assert_fail ("SubReg && \"Expected a valid subregister\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 877, __extension__ __PRETTY_FUNCTION__)); | ||||||
878 | |||||||
879 | MachineIRBuilder MIB(I); | ||||||
880 | auto SubRegCopy = | ||||||
881 | MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg); | ||||||
882 | MachineOperand &RegOp = I.getOperand(1); | ||||||
883 | RegOp.setReg(SubRegCopy.getReg(0)); | ||||||
884 | |||||||
885 | // It's possible that the destination register won't be constrained. Make | ||||||
886 | // sure that happens. | ||||||
887 | if (!Register::isPhysicalRegister(I.getOperand(0).getReg())) | ||||||
888 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI); | ||||||
889 | |||||||
890 | return true; | ||||||
891 | } | ||||||
892 | |||||||
893 | /// Helper function to get the source and destination register classes for a | ||||||
894 | /// copy. Returns a std::pair containing the source register class for the | ||||||
895 | /// copy, and the destination register class for the copy. If a register class | ||||||
896 | /// cannot be determined, then it will be nullptr. | ||||||
897 | static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> | ||||||
898 | getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, | ||||||
899 | MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, | ||||||
900 | const RegisterBankInfo &RBI) { | ||||||
901 | Register DstReg = I.getOperand(0).getReg(); | ||||||
902 | Register SrcReg = I.getOperand(1).getReg(); | ||||||
903 | const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); | ||||||
904 | const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); | ||||||
905 | unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); | ||||||
906 | unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); | ||||||
907 | |||||||
908 | // Special casing for cross-bank copies of s1s. We can technically represent | ||||||
909 | // a 1-bit value with any size of register. The minimum size for a GPR is 32 | ||||||
910 | // bits. So, we need to put the FPR on 32 bits as well. | ||||||
911 | // | ||||||
912 | // FIXME: I'm not sure if this case holds true outside of copies. If it does, | ||||||
913 | // then we can pull it into the helpers that get the appropriate class for a | ||||||
914 | // register bank. Or make a new helper that carries along some constraint | ||||||
915 | // information. | ||||||
916 | if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) | ||||||
917 | SrcSize = DstSize = 32; | ||||||
918 | |||||||
919 | return {getMinClassForRegBank(SrcRegBank, SrcSize, true), | ||||||
920 | getMinClassForRegBank(DstRegBank, DstSize, true)}; | ||||||
921 | } | ||||||
922 | |||||||
923 | static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, | ||||||
924 | MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, | ||||||
925 | const RegisterBankInfo &RBI) { | ||||||
926 | Register DstReg = I.getOperand(0).getReg(); | ||||||
927 | Register SrcReg = I.getOperand(1).getReg(); | ||||||
928 | const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); | ||||||
929 | const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); | ||||||
930 | |||||||
931 | // Find the correct register classes for the source and destination registers. | ||||||
932 | const TargetRegisterClass *SrcRC; | ||||||
933 | const TargetRegisterClass *DstRC; | ||||||
934 | std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI); | ||||||
935 | |||||||
936 | if (!DstRC) { | ||||||
937 | LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unexpected dest size " << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while (false) | ||||||
938 | << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unexpected dest size " << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while (false); | ||||||
939 | return false; | ||||||
940 | } | ||||||
941 | |||||||
942 | // A couple helpers below, for making sure that the copy we produce is valid. | ||||||
943 | |||||||
944 | // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want | ||||||
945 | // to verify that the src and dst are the same size, since that's handled by | ||||||
946 | // the SUBREG_TO_REG. | ||||||
947 | bool KnownValid = false; | ||||||
948 | |||||||
949 | // Returns true, or asserts if something we don't expect happens. Instead of | ||||||
950 | // returning true, we return isValidCopy() to ensure that we verify the | ||||||
951 | // result. | ||||||
952 | auto CheckCopy = [&]() { | ||||||
953 | // If we have a bitcast or something, we can't have physical registers. | ||||||
954 | assert((I.isCopy() ||(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister (I.getOperand(0).getReg()) && !Register::isPhysicalRegister (I.getOperand(1).getReg()))) && "No phys reg on generic operator!" ) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 957, __extension__ __PRETTY_FUNCTION__)) | ||||||
955 | (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister (I.getOperand(0).getReg()) && !Register::isPhysicalRegister (I.getOperand(1).getReg()))) && "No phys reg on generic operator!" ) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 957, __extension__ __PRETTY_FUNCTION__)) | ||||||
956 | !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister (I.getOperand(0).getReg()) && !Register::isPhysicalRegister (I.getOperand(1).getReg()))) && "No phys reg on generic operator!" ) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 957, __extension__ __PRETTY_FUNCTION__)) | ||||||
957 | "No phys reg on generic operator!")(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister (I.getOperand(0).getReg()) && !Register::isPhysicalRegister (I.getOperand(1).getReg()))) && "No phys reg on generic operator!" ) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 957, __extension__ __PRETTY_FUNCTION__)); | ||||||
958 | bool ValidCopy = true; | ||||||
959 | #ifndef NDEBUG | ||||||
960 | ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI); | ||||||
961 | assert(ValidCopy && "Invalid copy.")(static_cast <bool> (ValidCopy && "Invalid copy." ) ? void (0) : __assert_fail ("ValidCopy && \"Invalid copy.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 961, __extension__ __PRETTY_FUNCTION__)); | ||||||
962 | #endif | ||||||
963 | (void)KnownValid; | ||||||
964 | return ValidCopy; | ||||||
965 | }; | ||||||
966 | |||||||
967 | // Is this a copy? If so, then we may need to insert a subregister copy. | ||||||
968 | if (I.isCopy()) { | ||||||
969 | // Yes. Check if there's anything to fix up. | ||||||
970 | if (!SrcRC) { | ||||||
971 | LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n" ; } } while (false); | ||||||
972 | return false; | ||||||
973 | } | ||||||
974 | |||||||
975 | unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); | ||||||
976 | unsigned DstSize = TRI.getRegSizeInBits(*DstRC); | ||||||
977 | unsigned SubReg; | ||||||
978 | |||||||
979 | // If the source bank doesn't support a subregister copy small enough, | ||||||
980 | // then we first need to copy to the destination bank. | ||||||
981 | if (getMinSizeForRegBank(SrcRegBank) > DstSize) { | ||||||
982 | const TargetRegisterClass *DstTempRC = | ||||||
983 | getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true); | ||||||
984 | getSubRegForClass(DstRC, TRI, SubReg); | ||||||
985 | |||||||
986 | MachineIRBuilder MIB(I); | ||||||
987 | auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg}); | ||||||
988 | copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg); | ||||||
989 | } else if (SrcSize > DstSize) { | ||||||
990 | // If the source register is bigger than the destination we need to | ||||||
991 | // perform a subregister copy. | ||||||
992 | const TargetRegisterClass *SubRegRC = | ||||||
993 | getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); | ||||||
994 | getSubRegForClass(SubRegRC, TRI, SubReg); | ||||||
995 | copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg); | ||||||
996 | } else if (DstSize > SrcSize) { | ||||||
997 | // If the destination register is bigger than the source we need to do | ||||||
998 | // a promotion using SUBREG_TO_REG. | ||||||
999 | const TargetRegisterClass *PromotionRC = | ||||||
1000 | getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); | ||||||
1001 | getSubRegForClass(SrcRC, TRI, SubReg); | ||||||
1002 | |||||||
1003 | Register PromoteReg = MRI.createVirtualRegister(PromotionRC); | ||||||
1004 | BuildMI(*I.getParent(), I, I.getDebugLoc(), | ||||||
1005 | TII.get(AArch64::SUBREG_TO_REG), PromoteReg) | ||||||
1006 | .addImm(0) | ||||||
1007 | .addUse(SrcReg) | ||||||
1008 | .addImm(SubReg); | ||||||
1009 | MachineOperand &RegOp = I.getOperand(1); | ||||||
1010 | RegOp.setReg(PromoteReg); | ||||||
1011 | |||||||
1012 | // Promise that the copy is implicitly validated by the SUBREG_TO_REG. | ||||||
1013 | KnownValid = true; | ||||||
1014 | } | ||||||
1015 | |||||||
1016 | // If the destination is a physical register, then there's nothing to | ||||||
1017 | // change, so we're done. | ||||||
1018 | if (Register::isPhysicalRegister(DstReg)) | ||||||
1019 | return CheckCopy(); | ||||||
1020 | } | ||||||
1021 | |||||||
1022 | // No need to constrain SrcReg. It will get constrained when we hit another | ||||||
1023 | // of its use or its defs. Copies do not have constraints. | ||||||
1024 | if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { | ||||||
1025 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) << " operand\n"; } } while ( false) | ||||||
1026 | << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) << " operand\n"; } } while ( false); | ||||||
1027 | return false; | ||||||
1028 | } | ||||||
1029 | |||||||
1030 | // If this a GPR ZEXT that we want to just reduce down into a copy. | ||||||
1031 | // The sizes will be mismatched with the source < 32b but that's ok. | ||||||
1032 | if (I.getOpcode() == TargetOpcode::G_ZEXT) { | ||||||
1033 | I.setDesc(TII.get(AArch64::COPY)); | ||||||
1034 | assert(SrcRegBank.getID() == AArch64::GPRRegBankID)(static_cast <bool> (SrcRegBank.getID() == AArch64::GPRRegBankID ) ? void (0) : __assert_fail ("SrcRegBank.getID() == AArch64::GPRRegBankID" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1034, __extension__ __PRETTY_FUNCTION__)); | ||||||
1035 | return selectCopy(I, TII, MRI, TRI, RBI); | ||||||
1036 | } | ||||||
1037 | |||||||
1038 | I.setDesc(TII.get(AArch64::COPY)); | ||||||
1039 | return CheckCopy(); | ||||||
1040 | } | ||||||
1041 | |||||||
1042 | static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { | ||||||
1043 | if (!DstTy.isScalar() || !SrcTy.isScalar()) | ||||||
1044 | return GenericOpc; | ||||||
1045 | |||||||
1046 | const unsigned DstSize = DstTy.getSizeInBits(); | ||||||
1047 | const unsigned SrcSize = SrcTy.getSizeInBits(); | ||||||
1048 | |||||||
1049 | switch (DstSize) { | ||||||
1050 | case 32: | ||||||
1051 | switch (SrcSize) { | ||||||
1052 | case 32: | ||||||
1053 | switch (GenericOpc) { | ||||||
1054 | case TargetOpcode::G_SITOFP: | ||||||
1055 | return AArch64::SCVTFUWSri; | ||||||
1056 | case TargetOpcode::G_UITOFP: | ||||||
1057 | return AArch64::UCVTFUWSri; | ||||||
1058 | case TargetOpcode::G_FPTOSI: | ||||||
1059 | return AArch64::FCVTZSUWSr; | ||||||
1060 | case TargetOpcode::G_FPTOUI: | ||||||
1061 | return AArch64::FCVTZUUWSr; | ||||||
1062 | default: | ||||||
1063 | return GenericOpc; | ||||||
1064 | } | ||||||
1065 | case 64: | ||||||
1066 | switch (GenericOpc) { | ||||||
1067 | case TargetOpcode::G_SITOFP: | ||||||
1068 | return AArch64::SCVTFUXSri; | ||||||
1069 | case TargetOpcode::G_UITOFP: | ||||||
1070 | return AArch64::UCVTFUXSri; | ||||||
1071 | case TargetOpcode::G_FPTOSI: | ||||||
1072 | return AArch64::FCVTZSUWDr; | ||||||
1073 | case TargetOpcode::G_FPTOUI: | ||||||
1074 | return AArch64::FCVTZUUWDr; | ||||||
1075 | default: | ||||||
1076 | return GenericOpc; | ||||||
1077 | } | ||||||
1078 | default: | ||||||
1079 | return GenericOpc; | ||||||
1080 | } | ||||||
1081 | case 64: | ||||||
1082 | switch (SrcSize) { | ||||||
1083 | case 32: | ||||||
1084 | switch (GenericOpc) { | ||||||
1085 | case TargetOpcode::G_SITOFP: | ||||||
1086 | return AArch64::SCVTFUWDri; | ||||||
1087 | case TargetOpcode::G_UITOFP: | ||||||
1088 | return AArch64::UCVTFUWDri; | ||||||
1089 | case TargetOpcode::G_FPTOSI: | ||||||
1090 | return AArch64::FCVTZSUXSr; | ||||||
1091 | case TargetOpcode::G_FPTOUI: | ||||||
1092 | return AArch64::FCVTZUUXSr; | ||||||
1093 | default: | ||||||
1094 | return GenericOpc; | ||||||
1095 | } | ||||||
1096 | case 64: | ||||||
1097 | switch (GenericOpc) { | ||||||
1098 | case TargetOpcode::G_SITOFP: | ||||||
1099 | return AArch64::SCVTFUXDri; | ||||||
1100 | case TargetOpcode::G_UITOFP: | ||||||
1101 | return AArch64::UCVTFUXDri; | ||||||
1102 | case TargetOpcode::G_FPTOSI: | ||||||
1103 | return AArch64::FCVTZSUXDr; | ||||||
1104 | case TargetOpcode::G_FPTOUI: | ||||||
1105 | return AArch64::FCVTZUUXDr; | ||||||
1106 | default: | ||||||
1107 | return GenericOpc; | ||||||
1108 | } | ||||||
1109 | default: | ||||||
1110 | return GenericOpc; | ||||||
1111 | } | ||||||
1112 | default: | ||||||
1113 | return GenericOpc; | ||||||
1114 | }; | ||||||
1115 | return GenericOpc; | ||||||
1116 | } | ||||||
1117 | |||||||
1118 | MachineInstr * | ||||||
1119 | AArch64InstructionSelector::emitSelect(Register Dst, Register True, | ||||||
1120 | Register False, AArch64CC::CondCode CC, | ||||||
1121 | MachineIRBuilder &MIB) const { | ||||||
1122 | MachineRegisterInfo &MRI = *MIB.getMRI(); | ||||||
1123 | assert(RBI.getRegBank(False, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)-> getID() == RBI.getRegBank(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?") ? void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1125, __extension__ __PRETTY_FUNCTION__)) | ||||||
1124 | RBI.getRegBank(True, MRI, TRI)->getID() &&(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)-> getID() == RBI.getRegBank(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?") ? void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1125, __extension__ __PRETTY_FUNCTION__)) | ||||||
1125 | "Expected both select operands to have the same regbank?")(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)-> getID() == RBI.getRegBank(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?") ? void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1125, __extension__ __PRETTY_FUNCTION__)); | ||||||
1126 | LLT Ty = MRI.getType(True); | ||||||
1127 | if (Ty.isVector()) | ||||||
1128 | return nullptr; | ||||||
1129 | const unsigned Size = Ty.getSizeInBits(); | ||||||
1130 | assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1131, __extension__ __PRETTY_FUNCTION__)) | ||||||
1131 | "Expected 32 bit or 64 bit select only?")(static_cast <bool> ((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1131, __extension__ __PRETTY_FUNCTION__)); | ||||||
1132 | const bool Is32Bit = Size == 32; | ||||||
1133 | if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) { | ||||||
1134 | unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr; | ||||||
1135 | auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); | ||||||
1136 | constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI); | ||||||
1137 | return &*FCSel; | ||||||
1138 | } | ||||||
1139 | |||||||
1140 | // By default, we'll try and emit a CSEL. | ||||||
1141 | unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; | ||||||
1142 | bool Optimized = false; | ||||||
1143 | auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI, | ||||||
1144 | &Optimized](Register &Reg, Register &OtherReg, | ||||||
1145 | bool Invert) { | ||||||
1146 | if (Optimized) | ||||||
1147 | return false; | ||||||
1148 | |||||||
1149 | // Attempt to fold: | ||||||
1150 | // | ||||||
1151 | // %sub = G_SUB 0, %x | ||||||
1152 | // %select = G_SELECT cc, %reg, %sub | ||||||
1153 | // | ||||||
1154 | // Into: | ||||||
1155 | // %select = CSNEG %reg, %x, cc | ||||||
1156 | Register MatchReg; | ||||||
1157 | if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) { | ||||||
1158 | Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; | ||||||
1159 | Reg = MatchReg; | ||||||
1160 | if (Invert) { | ||||||
1161 | CC = AArch64CC::getInvertedCondCode(CC); | ||||||
1162 | std::swap(Reg, OtherReg); | ||||||
1163 | } | ||||||
1164 | return true; | ||||||
1165 | } | ||||||
1166 | |||||||
1167 | // Attempt to fold: | ||||||
1168 | // | ||||||
1169 | // %xor = G_XOR %x, -1 | ||||||
1170 | // %select = G_SELECT cc, %reg, %xor | ||||||
1171 | // | ||||||
1172 | // Into: | ||||||
1173 | // %select = CSINV %reg, %x, cc | ||||||
1174 | if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) { | ||||||
1175 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; | ||||||
1176 | Reg = MatchReg; | ||||||
1177 | if (Invert) { | ||||||
1178 | CC = AArch64CC::getInvertedCondCode(CC); | ||||||
1179 | std::swap(Reg, OtherReg); | ||||||
1180 | } | ||||||
1181 | return true; | ||||||
1182 | } | ||||||
1183 | |||||||
1184 | // Attempt to fold: | ||||||
1185 | // | ||||||
1186 | // %add = G_ADD %x, 1 | ||||||
1187 | // %select = G_SELECT cc, %reg, %add | ||||||
1188 | // | ||||||
1189 | // Into: | ||||||
1190 | // %select = CSINC %reg, %x, cc | ||||||
1191 | if (mi_match(Reg, MRI, | ||||||
1192 | m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)), | ||||||
1193 | m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) { | ||||||
1194 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; | ||||||
1195 | Reg = MatchReg; | ||||||
1196 | if (Invert) { | ||||||
1197 | CC = AArch64CC::getInvertedCondCode(CC); | ||||||
1198 | std::swap(Reg, OtherReg); | ||||||
1199 | } | ||||||
1200 | return true; | ||||||
1201 | } | ||||||
1202 | |||||||
1203 | return false; | ||||||
1204 | }; | ||||||
1205 | |||||||
1206 | // Helper lambda which tries to use CSINC/CSINV for the instruction when its | ||||||
1207 | // true/false values are constants. | ||||||
1208 | // FIXME: All of these patterns already exist in tablegen. We should be | ||||||
1209 | // able to import these. | ||||||
1210 | auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI, | ||||||
1211 | &Optimized]() { | ||||||
1212 | if (Optimized) | ||||||
1213 | return false; | ||||||
1214 | auto TrueCst = getConstantVRegValWithLookThrough(True, MRI); | ||||||
1215 | auto FalseCst = getConstantVRegValWithLookThrough(False, MRI); | ||||||
1216 | if (!TrueCst && !FalseCst) | ||||||
1217 | return false; | ||||||
1218 | |||||||
1219 | Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; | ||||||
1220 | if (TrueCst && FalseCst) { | ||||||
1221 | int64_t T = TrueCst->Value.getSExtValue(); | ||||||
1222 | int64_t F = FalseCst->Value.getSExtValue(); | ||||||
1223 | |||||||
1224 | if (T == 0 && F == 1) { | ||||||
1225 | // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc | ||||||
1226 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; | ||||||
1227 | True = ZReg; | ||||||
1228 | False = ZReg; | ||||||
1229 | return true; | ||||||
1230 | } | ||||||
1231 | |||||||
1232 | if (T == 0 && F == -1) { | ||||||
1233 | // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc | ||||||
1234 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; | ||||||
1235 | True = ZReg; | ||||||
1236 | False = ZReg; | ||||||
1237 | return true; | ||||||
1238 | } | ||||||
1239 | } | ||||||
1240 | |||||||
1241 | if (TrueCst) { | ||||||
1242 | int64_t T = TrueCst->Value.getSExtValue(); | ||||||
1243 | if (T == 1) { | ||||||
1244 | // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc | ||||||
1245 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; | ||||||
1246 | True = False; | ||||||
1247 | False = ZReg; | ||||||
1248 | CC = AArch64CC::getInvertedCondCode(CC); | ||||||
1249 | return true; | ||||||
1250 | } | ||||||
1251 | |||||||
1252 | if (T == -1) { | ||||||
1253 | // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc | ||||||
1254 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; | ||||||
1255 | True = False; | ||||||
1256 | False = ZReg; | ||||||
1257 | CC = AArch64CC::getInvertedCondCode(CC); | ||||||
1258 | return true; | ||||||
1259 | } | ||||||
1260 | } | ||||||
1261 | |||||||
1262 | if (FalseCst) { | ||||||
1263 | int64_t F = FalseCst->Value.getSExtValue(); | ||||||
1264 | if (F == 1) { | ||||||
1265 | // G_SELECT cc, t, 1 -> CSINC t, zreg, cc | ||||||
1266 | Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; | ||||||
1267 | False = ZReg; | ||||||
1268 | return true; | ||||||
1269 | } | ||||||
1270 | |||||||
1271 | if (F == -1) { | ||||||
1272 | // G_SELECT cc, t, -1 -> CSINC t, zreg, cc | ||||||
1273 | Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; | ||||||
1274 | False = ZReg; | ||||||
1275 | return true; | ||||||
1276 | } | ||||||
1277 | } | ||||||
1278 | return false; | ||||||
1279 | }; | ||||||
1280 | |||||||
1281 | Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false); | ||||||
1282 | Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true); | ||||||
1283 | Optimized |= TryOptSelectCst(); | ||||||
1284 | auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); | ||||||
1285 | constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI); | ||||||
1286 | return &*SelectInst; | ||||||
1287 | } | ||||||
1288 | |||||||
1289 | static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { | ||||||
1290 | switch (P) { | ||||||
1291 | default: | ||||||
1292 | llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1292); | ||||||
1293 | case CmpInst::ICMP_NE: | ||||||
1294 | return AArch64CC::NE; | ||||||
1295 | case CmpInst::ICMP_EQ: | ||||||
1296 | return AArch64CC::EQ; | ||||||
1297 | case CmpInst::ICMP_SGT: | ||||||
1298 | return AArch64CC::GT; | ||||||
1299 | case CmpInst::ICMP_SGE: | ||||||
1300 | return AArch64CC::GE; | ||||||
1301 | case CmpInst::ICMP_SLT: | ||||||
1302 | return AArch64CC::LT; | ||||||
1303 | case CmpInst::ICMP_SLE: | ||||||
1304 | return AArch64CC::LE; | ||||||
1305 | case CmpInst::ICMP_UGT: | ||||||
1306 | return AArch64CC::HI; | ||||||
1307 | case CmpInst::ICMP_UGE: | ||||||
1308 | return AArch64CC::HS; | ||||||
1309 | case CmpInst::ICMP_ULT: | ||||||
1310 | return AArch64CC::LO; | ||||||
1311 | case CmpInst::ICMP_ULE: | ||||||
1312 | return AArch64CC::LS; | ||||||
1313 | } | ||||||
1314 | } | ||||||
1315 | |||||||
1316 | /// Return a register which can be used as a bit to test in a TB(N)Z. | ||||||
1317 | static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, | ||||||
1318 | MachineRegisterInfo &MRI) { | ||||||
1319 | assert(Reg.isValid() && "Expected valid register!")(static_cast <bool> (Reg.isValid() && "Expected valid register!" ) ? void (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1319, __extension__ __PRETTY_FUNCTION__)); | ||||||
1320 | while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) { | ||||||
1321 | unsigned Opc = MI->getOpcode(); | ||||||
1322 | |||||||
1323 | if (!MI->getOperand(0).isReg() || | ||||||
1324 | !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) | ||||||
1325 | break; | ||||||
1326 | |||||||
1327 | // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. | ||||||
1328 | // | ||||||
1329 | // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number | ||||||
1330 | // on the truncated x is the same as the bit number on x. | ||||||
1331 | if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT || | ||||||
1332 | Opc == TargetOpcode::G_TRUNC) { | ||||||
1333 | Register NextReg = MI->getOperand(1).getReg(); | ||||||
1334 | // Did we find something worth folding? | ||||||
1335 | if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg)) | ||||||
1336 | break; | ||||||
1337 | |||||||
1338 | // NextReg is worth folding. Keep looking. | ||||||
1339 | Reg = NextReg; | ||||||
1340 | continue; | ||||||
1341 | } | ||||||
1342 | |||||||
1343 | // Attempt to find a suitable operation with a constant on one side. | ||||||
1344 | Optional<uint64_t> C; | ||||||
1345 | Register TestReg; | ||||||
1346 | switch (Opc) { | ||||||
1347 | default: | ||||||
1348 | break; | ||||||
1349 | case TargetOpcode::G_AND: | ||||||
1350 | case TargetOpcode::G_XOR: { | ||||||
1351 | TestReg = MI->getOperand(1).getReg(); | ||||||
1352 | Register ConstantReg = MI->getOperand(2).getReg(); | ||||||
1353 | auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI); | ||||||
1354 | if (!VRegAndVal) { | ||||||
1355 | // AND commutes, check the other side for a constant. | ||||||
1356 | // FIXME: Can we canonicalize the constant so that it's always on the | ||||||
1357 | // same side at some point earlier? | ||||||
1358 | std::swap(ConstantReg, TestReg); | ||||||
1359 | VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI); | ||||||
1360 | } | ||||||
1361 | if (VRegAndVal) | ||||||
1362 | C = VRegAndVal->Value.getSExtValue(); | ||||||
1363 | break; | ||||||
1364 | } | ||||||
1365 | case TargetOpcode::G_ASHR: | ||||||
1366 | case TargetOpcode::G_LSHR: | ||||||
1367 | case TargetOpcode::G_SHL: { | ||||||
1368 | TestReg = MI->getOperand(1).getReg(); | ||||||
1369 | auto VRegAndVal = | ||||||
1370 | getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); | ||||||
1371 | if (VRegAndVal) | ||||||
1372 | C = VRegAndVal->Value.getSExtValue(); | ||||||
1373 | break; | ||||||
1374 | } | ||||||
1375 | } | ||||||
1376 | |||||||
1377 | // Didn't find a constant or viable register. Bail out of the loop. | ||||||
1378 | if (!C || !TestReg.isValid()) | ||||||
1379 | break; | ||||||
1380 | |||||||
1381 | // We found a suitable instruction with a constant. Check to see if we can | ||||||
1382 | // walk through the instruction. | ||||||
1383 | Register NextReg; | ||||||
1384 | unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits(); | ||||||
1385 | switch (Opc) { | ||||||
1386 | default: | ||||||
1387 | break; | ||||||
1388 | case TargetOpcode::G_AND: | ||||||
1389 | // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set. | ||||||
1390 | if ((*C >> Bit) & 1) | ||||||
1391 | NextReg = TestReg; | ||||||
1392 | break; | ||||||
1393 | case TargetOpcode::G_SHL: | ||||||
1394 | // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in | ||||||
1395 | // the type of the register. | ||||||
1396 | if (*C <= Bit && (Bit - *C) < TestRegSize) { | ||||||
1397 | NextReg = TestReg; | ||||||
1398 | Bit = Bit - *C; | ||||||
1399 | } | ||||||
1400 | break; | ||||||
1401 | case TargetOpcode::G_ASHR: | ||||||
1402 | // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits | ||||||
1403 | // in x | ||||||
1404 | NextReg = TestReg; | ||||||
1405 | Bit = Bit + *C; | ||||||
1406 | if (Bit >= TestRegSize) | ||||||
1407 | Bit = TestRegSize - 1; | ||||||
1408 | break; | ||||||
1409 | case TargetOpcode::G_LSHR: | ||||||
1410 | // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x | ||||||
1411 | if ((Bit + *C) < TestRegSize) { | ||||||
1412 | NextReg = TestReg; | ||||||
1413 | Bit = Bit + *C; | ||||||
1414 | } | ||||||
1415 | break; | ||||||
1416 | case TargetOpcode::G_XOR: | ||||||
1417 | // We can walk through a G_XOR by inverting whether we use tbz/tbnz when | ||||||
1418 | // appropriate. | ||||||
1419 | // | ||||||
1420 | // e.g. If x' = xor x, c, and the b-th bit is set in c then | ||||||
1421 | // | ||||||
1422 | // tbz x', b -> tbnz x, b | ||||||
1423 | // | ||||||
1424 | // Because x' only has the b-th bit set if x does not. | ||||||
1425 | if ((*C >> Bit) & 1) | ||||||
1426 | Invert = !Invert; | ||||||
1427 | NextReg = TestReg; | ||||||
1428 | break; | ||||||
1429 | } | ||||||
1430 | |||||||
1431 | // Check if we found anything worth folding. | ||||||
1432 | if (!NextReg.isValid()) | ||||||
1433 | return Reg; | ||||||
1434 | Reg = NextReg; | ||||||
1435 | } | ||||||
1436 | |||||||
1437 | return Reg; | ||||||
1438 | } | ||||||
1439 | |||||||
1440 | MachineInstr *AArch64InstructionSelector::emitTestBit( | ||||||
1441 | Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB, | ||||||
1442 | MachineIRBuilder &MIB) const { | ||||||
1443 | assert(TestReg.isValid())(static_cast <bool> (TestReg.isValid()) ? void (0) : __assert_fail ("TestReg.isValid()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1443, __extension__ __PRETTY_FUNCTION__)); | ||||||
1444 | assert(ProduceNonFlagSettingCondBr &&(static_cast <bool> (ProduceNonFlagSettingCondBr && "Cannot emit TB(N)Z with speculation tracking!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1445, __extension__ __PRETTY_FUNCTION__)) | ||||||
1445 | "Cannot emit TB(N)Z with speculation tracking!")(static_cast <bool> (ProduceNonFlagSettingCondBr && "Cannot emit TB(N)Z with speculation tracking!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1445, __extension__ __PRETTY_FUNCTION__)); | ||||||
1446 | MachineRegisterInfo &MRI = *MIB.getMRI(); | ||||||
1447 | |||||||
1448 | // Attempt to optimize the test bit by walking over instructions. | ||||||
1449 | TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI); | ||||||
1450 | LLT Ty = MRI.getType(TestReg); | ||||||
1451 | unsigned Size = Ty.getSizeInBits(); | ||||||
1452 | assert(!Ty.isVector() && "Expected a scalar!")(static_cast <bool> (!Ty.isVector() && "Expected a scalar!" ) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1452, __extension__ __PRETTY_FUNCTION__)); | ||||||
1453 | assert(Bit < 64 && "Bit is too large!")(static_cast <bool> (Bit < 64 && "Bit is too large!" ) ? void (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1453, __extension__ __PRETTY_FUNCTION__)); | ||||||
1454 | |||||||
1455 | // When the test register is a 64-bit register, we have to narrow to make | ||||||
1456 | // TBNZW work. | ||||||
1457 | bool UseWReg = Bit < 32; | ||||||
1458 | unsigned NecessarySize = UseWReg ? 32 : 64; | ||||||
1459 | if (Size != NecessarySize) | ||||||
1460 | TestReg = moveScalarRegClass( | ||||||
1461 | TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass, | ||||||
1462 | MIB); | ||||||
1463 | |||||||
1464 | static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX}, | ||||||
1465 | {AArch64::TBZW, AArch64::TBNZW}}; | ||||||
1466 | unsigned Opc = OpcTable[UseWReg][IsNegative]; | ||||||
1467 | auto TestBitMI = | ||||||
1468 | MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB); | ||||||
1469 | constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI); | ||||||
1470 | return &*TestBitMI; | ||||||
1471 | } | ||||||
1472 | |||||||
1473 | bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( | ||||||
1474 | MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB, | ||||||
1475 | MachineIRBuilder &MIB) const { | ||||||
1476 | assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")(static_cast <bool> (AndInst.getOpcode() == TargetOpcode ::G_AND && "Expected G_AND only?") ? void (0) : __assert_fail ("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1476, __extension__ __PRETTY_FUNCTION__)); | ||||||
1477 | // Given something like this: | ||||||
1478 | // | ||||||
1479 | // %x = ...Something... | ||||||
1480 | // %one = G_CONSTANT i64 1 | ||||||
1481 | // %zero = G_CONSTANT i64 0 | ||||||
1482 | // %and = G_AND %x, %one | ||||||
1483 | // %cmp = G_ICMP intpred(ne), %and, %zero | ||||||
1484 | // %cmp_trunc = G_TRUNC %cmp | ||||||
1485 | // G_BRCOND %cmp_trunc, %bb.3 | ||||||
1486 | // | ||||||
1487 | // We want to try and fold the AND into the G_BRCOND and produce either a | ||||||
1488 | // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)). | ||||||
1489 | // | ||||||
1490 | // In this case, we'd get | ||||||
1491 | // | ||||||
1492 | // TBNZ %x %bb.3 | ||||||
1493 | // | ||||||
1494 | |||||||
1495 | // Check if the AND has a constant on its RHS which we can use as a mask. | ||||||
1496 | // If it's a power of 2, then it's the same as checking a specific bit. | ||||||
1497 | // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set) | ||||||
1498 | auto MaybeBit = getConstantVRegValWithLookThrough( | ||||||
1499 | AndInst.getOperand(2).getReg(), *MIB.getMRI()); | ||||||
1500 | if (!MaybeBit) | ||||||
1501 | return false; | ||||||
1502 | |||||||
1503 | int32_t Bit = MaybeBit->Value.exactLogBase2(); | ||||||
1504 | if (Bit < 0) | ||||||
1505 | return false; | ||||||
1506 | |||||||
1507 | Register TestReg = AndInst.getOperand(1).getReg(); | ||||||
1508 | |||||||
1509 | // Emit a TB(N)Z. | ||||||
1510 | emitTestBit(TestReg, Bit, Invert, DstMBB, MIB); | ||||||
1511 | return true; | ||||||
1512 | } | ||||||
1513 | |||||||
1514 | MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg, | ||||||
1515 | bool IsNegative, | ||||||
1516 | MachineBasicBlock *DestMBB, | ||||||
1517 | MachineIRBuilder &MIB) const { | ||||||
1518 | assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")(static_cast <bool> (ProduceNonFlagSettingCondBr && "CBZ does not set flags!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1518, __extension__ __PRETTY_FUNCTION__)); | ||||||
1519 | MachineRegisterInfo &MRI = *MIB.getMRI(); | ||||||
1520 | assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI )->getID() == AArch64::GPRRegBankID && "Expected GPRs only?" ) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1522, __extension__ __PRETTY_FUNCTION__)) | ||||||
1521 | AArch64::GPRRegBankID &&(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI )->getID() == AArch64::GPRRegBankID && "Expected GPRs only?" ) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1522, __extension__ __PRETTY_FUNCTION__)) | ||||||
1522 | "Expected GPRs only?")(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI )->getID() == AArch64::GPRRegBankID && "Expected GPRs only?" ) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1522, __extension__ __PRETTY_FUNCTION__)); | ||||||
1523 | auto Ty = MRI.getType(CompareReg); | ||||||
1524 | unsigned Width = Ty.getSizeInBits(); | ||||||
1525 | assert(!Ty.isVector() && "Expected scalar only?")(static_cast <bool> (!Ty.isVector() && "Expected scalar only?" ) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1525, __extension__ __PRETTY_FUNCTION__)); | ||||||
1526 | assert(Width <= 64 && "Expected width to be at most 64?")(static_cast <bool> (Width <= 64 && "Expected width to be at most 64?" ) ? void (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1526, __extension__ __PRETTY_FUNCTION__)); | ||||||
1527 | static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX}, | ||||||
1528 | {AArch64::CBNZW, AArch64::CBNZX}}; | ||||||
1529 | unsigned Opc = OpcTable[IsNegative][Width == 64]; | ||||||
1530 | auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB); | ||||||
1531 | constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI); | ||||||
1532 | return &*BranchMI; | ||||||
1533 | } | ||||||
1534 | |||||||
1535 | bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( | ||||||
1536 | MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const { | ||||||
1537 | assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)(static_cast <bool> (FCmp.getOpcode() == TargetOpcode:: G_FCMP) ? void (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1537, __extension__ __PRETTY_FUNCTION__)); | ||||||
1538 | assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1538, __extension__ __PRETTY_FUNCTION__)); | ||||||
1539 | // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't | ||||||
1540 | // totally clean. Some of them require two branches to implement. | ||||||
1541 | auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate(); | ||||||
1542 | emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB, | ||||||
1543 | Pred); | ||||||
1544 | AArch64CC::CondCode CC1, CC2; | ||||||
1545 | changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2); | ||||||
1546 | MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); | ||||||
1547 | MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); | ||||||
1548 | if (CC2 != AArch64CC::AL) | ||||||
1549 | MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); | ||||||
1550 | I.eraseFromParent(); | ||||||
1551 | return true; | ||||||
1552 | } | ||||||
1553 | |||||||
1554 | bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( | ||||||
1555 | MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { | ||||||
1556 | assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode:: G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1556, __extension__ __PRETTY_FUNCTION__)); | ||||||
1557 | assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1557, __extension__ __PRETTY_FUNCTION__)); | ||||||
1558 | // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z. | ||||||
1559 | // | ||||||
1560 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z | ||||||
1561 | // instructions will not be produced, as they are conditional branch | ||||||
1562 | // instructions that do not set flags. | ||||||
1563 | if (!ProduceNonFlagSettingCondBr) | ||||||
1564 | return false; | ||||||
1565 | |||||||
1566 | MachineRegisterInfo &MRI = *MIB.getMRI(); | ||||||
1567 | MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); | ||||||
1568 | auto Pred = | ||||||
1569 | static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate()); | ||||||
1570 | Register LHS = ICmp.getOperand(2).getReg(); | ||||||
1571 | Register RHS = ICmp.getOperand(3).getReg(); | ||||||
1572 | |||||||
1573 | // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that. | ||||||
1574 | auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); | ||||||
1575 | MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); | ||||||
1576 | |||||||
1577 | // When we can emit a TB(N)Z, prefer that. | ||||||
1578 | // | ||||||
1579 | // Handle non-commutative condition codes first. | ||||||
1580 | // Note that we don't want to do this when we have a G_AND because it can | ||||||
1581 | // become a tst. The tst will make the test bit in the TB(N)Z redundant. | ||||||
1582 | if (VRegAndVal && !AndInst) { | ||||||
1583 | int64_t C = VRegAndVal->Value.getSExtValue(); | ||||||
1584 | |||||||
1585 | // When we have a greater-than comparison, we can just test if the msb is | ||||||
1586 | // zero. | ||||||
1587 | if (C == -1 && Pred == CmpInst::ICMP_SGT) { | ||||||
1588 | uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; | ||||||
1589 | emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB); | ||||||
1590 | I.eraseFromParent(); | ||||||
1591 | return true; | ||||||
1592 | } | ||||||
1593 | |||||||
1594 | // When we have a less than comparison, we can just test if the msb is not | ||||||
1595 | // zero. | ||||||
1596 | if (C == 0 && Pred == CmpInst::ICMP_SLT) { | ||||||
1597 | uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; | ||||||
1598 | emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB); | ||||||
1599 | I.eraseFromParent(); | ||||||
1600 | return true; | ||||||
1601 | } | ||||||
1602 | } | ||||||
1603 | |||||||
1604 | // Attempt to handle commutative condition codes. Right now, that's only | ||||||
1605 | // eq/ne. | ||||||
1606 | if (ICmpInst::isEquality(Pred)) { | ||||||
1607 | if (!VRegAndVal) { | ||||||
1608 | std::swap(RHS, LHS); | ||||||
1609 | VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); | ||||||
1610 | AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); | ||||||
1611 | } | ||||||
1612 | |||||||
1613 | if (VRegAndVal && VRegAndVal->Value == 0) { | ||||||
1614 | // If there's a G_AND feeding into this branch, try to fold it away by | ||||||
1615 | // emitting a TB(N)Z instead. | ||||||
1616 | // | ||||||
1617 | // Note: If we have LT, then it *is* possible to fold, but it wouldn't be | ||||||
1618 | // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding | ||||||
1619 | // would be redundant. | ||||||
1620 | if (AndInst && | ||||||
1621 | tryOptAndIntoCompareBranch( | ||||||
1622 | *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) { | ||||||
1623 | I.eraseFromParent(); | ||||||
1624 | return true; | ||||||
1625 | } | ||||||
1626 | |||||||
1627 | // Otherwise, try to emit a CB(N)Z instead. | ||||||
1628 | auto LHSTy = MRI.getType(LHS); | ||||||
1629 | if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) { | ||||||
1630 | emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB); | ||||||
1631 | I.eraseFromParent(); | ||||||
1632 | return true; | ||||||
1633 | } | ||||||
1634 | } | ||||||
1635 | } | ||||||
1636 | |||||||
1637 | return false; | ||||||
1638 | } | ||||||
1639 | |||||||
1640 | bool AArch64InstructionSelector::selectCompareBranchFedByICmp( | ||||||
1641 | MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { | ||||||
1642 | assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode:: G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1642, __extension__ __PRETTY_FUNCTION__)); | ||||||
1643 | assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1643, __extension__ __PRETTY_FUNCTION__)); | ||||||
1644 | if (tryOptCompareBranchFedByICmp(I, ICmp, MIB)) | ||||||
1645 | return true; | ||||||
1646 | |||||||
1647 | // Couldn't optimize. Emit a compare + a Bcc. | ||||||
1648 | MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); | ||||||
1649 | auto PredOp = ICmp.getOperand(1); | ||||||
1650 | emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB); | ||||||
1651 | const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( | ||||||
1652 | static_cast<CmpInst::Predicate>(PredOp.getPredicate())); | ||||||
1653 | MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); | ||||||
1654 | I.eraseFromParent(); | ||||||
1655 | return true; | ||||||
1656 | } | ||||||
1657 | |||||||
1658 | bool AArch64InstructionSelector::selectCompareBranch( | ||||||
1659 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) { | ||||||
1660 | Register CondReg = I.getOperand(0).getReg(); | ||||||
1661 | MachineInstr *CCMI = MRI.getVRegDef(CondReg); | ||||||
1662 | if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) { | ||||||
1663 | CondReg = CCMI->getOperand(1).getReg(); | ||||||
1664 | CCMI = MRI.getVRegDef(CondReg); | ||||||
1665 | } | ||||||
1666 | |||||||
1667 | // Try to select the G_BRCOND using whatever is feeding the condition if | ||||||
1668 | // possible. | ||||||
1669 | unsigned CCMIOpc = CCMI->getOpcode(); | ||||||
1670 | if (CCMIOpc == TargetOpcode::G_FCMP) | ||||||
1671 | return selectCompareBranchFedByFCmp(I, *CCMI, MIB); | ||||||
1672 | if (CCMIOpc == TargetOpcode::G_ICMP) | ||||||
1673 | return selectCompareBranchFedByICmp(I, *CCMI, MIB); | ||||||
1674 | |||||||
1675 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z | ||||||
1676 | // instructions will not be produced, as they are conditional branch | ||||||
1677 | // instructions that do not set flags. | ||||||
1678 | if (ProduceNonFlagSettingCondBr) { | ||||||
1679 | emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true, | ||||||
1680 | I.getOperand(1).getMBB(), MIB); | ||||||
1681 | I.eraseFromParent(); | ||||||
1682 | return true; | ||||||
1683 | } | ||||||
1684 | |||||||
1685 | // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead. | ||||||
1686 | auto TstMI = | ||||||
1687 | MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1); | ||||||
1688 | constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); | ||||||
1689 | auto Bcc = MIB.buildInstr(AArch64::Bcc) | ||||||
1690 | .addImm(AArch64CC::EQ) | ||||||
1691 | .addMBB(I.getOperand(1).getMBB()); | ||||||
1692 | I.eraseFromParent(); | ||||||
1693 | return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI); | ||||||
1694 | } | ||||||
1695 | |||||||
1696 | /// Returns the element immediate value of a vector shift operand if found. | ||||||
1697 | /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR. | ||||||
1698 | static Optional<int64_t> getVectorShiftImm(Register Reg, | ||||||
1699 | MachineRegisterInfo &MRI) { | ||||||
1700 | assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")(static_cast <bool> (MRI.getType(Reg).isVector() && "Expected a *vector* shift operand") ? void (0) : __assert_fail ("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1700, __extension__ __PRETTY_FUNCTION__)); | ||||||
1701 | MachineInstr *OpMI = MRI.getVRegDef(Reg); | ||||||
1702 | assert(OpMI && "Expected to find a vreg def for vector shift operand")(static_cast <bool> (OpMI && "Expected to find a vreg def for vector shift operand" ) ? void (0) : __assert_fail ("OpMI && \"Expected to find a vreg def for vector shift operand\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1702, __extension__ __PRETTY_FUNCTION__)); | ||||||
1703 | return getAArch64VectorSplatScalar(*OpMI, MRI); | ||||||
1704 | } | ||||||
1705 | |||||||
1706 | /// Matches and returns the shift immediate value for a SHL instruction given | ||||||
1707 | /// a shift operand. | ||||||
1708 | static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) { | ||||||
1709 | Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI); | ||||||
1710 | if (!ShiftImm) | ||||||
1711 | return None; | ||||||
1712 | // Check the immediate is in range for a SHL. | ||||||
1713 | int64_t Imm = *ShiftImm; | ||||||
1714 | if (Imm < 0) | ||||||
1715 | return None; | ||||||
1716 | switch (SrcTy.getElementType().getSizeInBits()) { | ||||||
1717 | default: | ||||||
1718 | LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift" ; } } while (false); | ||||||
1719 | return None; | ||||||
1720 | case 8: | ||||||
1721 | if (Imm > 7) | ||||||
1722 | return None; | ||||||
1723 | break; | ||||||
1724 | case 16: | ||||||
1725 | if (Imm > 15) | ||||||
1726 | return None; | ||||||
1727 | break; | ||||||
1728 | case 32: | ||||||
1729 | if (Imm > 31) | ||||||
1730 | return None; | ||||||
1731 | break; | ||||||
1732 | case 64: | ||||||
1733 | if (Imm > 63) | ||||||
1734 | return None; | ||||||
1735 | break; | ||||||
1736 | } | ||||||
1737 | return Imm; | ||||||
1738 | } | ||||||
1739 | |||||||
1740 | bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I, | ||||||
1741 | MachineRegisterInfo &MRI) { | ||||||
1742 | assert(I.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1742, __extension__ __PRETTY_FUNCTION__)); | ||||||
1743 | Register DstReg = I.getOperand(0).getReg(); | ||||||
1744 | const LLT Ty = MRI.getType(DstReg); | ||||||
1745 | Register Src1Reg = I.getOperand(1).getReg(); | ||||||
1746 | Register Src2Reg = I.getOperand(2).getReg(); | ||||||
1747 | |||||||
1748 | if (!Ty.isVector()) | ||||||
1749 | return false; | ||||||
1750 | |||||||
1751 | // Check if we have a vector of constants on RHS that we can select as the | ||||||
1752 | // immediate form. | ||||||
1753 | Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI); | ||||||
1754 | |||||||
1755 | unsigned Opc = 0; | ||||||
1756 | if (Ty == LLT::fixed_vector(2, 64)) { | ||||||
1757 | Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64; | ||||||
1758 | } else if (Ty == LLT::fixed_vector(4, 32)) { | ||||||
1759 | Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32; | ||||||
1760 | } else if (Ty == LLT::fixed_vector(2, 32)) { | ||||||
1761 | Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32; | ||||||
1762 | } else if (Ty == LLT::fixed_vector(4, 16)) { | ||||||
1763 | Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16; | ||||||
1764 | } else if (Ty == LLT::fixed_vector(8, 16)) { | ||||||
1765 | Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; | ||||||
1766 | } else if (Ty == LLT::fixed_vector(16, 8)) { | ||||||
1767 | Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; | ||||||
1768 | } else if (Ty == LLT::fixed_vector(8, 8)) { | ||||||
1769 | Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; | ||||||
1770 | } else { | ||||||
1771 | LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; } } while (false); | ||||||
1772 | return false; | ||||||
1773 | } | ||||||
1774 | |||||||
1775 | auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg}); | ||||||
1776 | if (ImmVal) | ||||||
1777 | Shl.addImm(*ImmVal); | ||||||
1778 | else | ||||||
1779 | Shl.addUse(Src2Reg); | ||||||
1780 | constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI); | ||||||
1781 | I.eraseFromParent(); | ||||||
1782 | return true; | ||||||
1783 | } | ||||||
1784 | |||||||
1785 | bool AArch64InstructionSelector::selectVectorAshrLshr( | ||||||
1786 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||||
1787 | assert(I.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1788, __extension__ __PRETTY_FUNCTION__)) | ||||||
1788 | I.getOpcode() == TargetOpcode::G_LSHR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1788, __extension__ __PRETTY_FUNCTION__)); | ||||||
1789 | Register DstReg = I.getOperand(0).getReg(); | ||||||
1790 | const LLT Ty = MRI.getType(DstReg); | ||||||
1791 | Register Src1Reg = I.getOperand(1).getReg(); | ||||||
1792 | Register Src2Reg = I.getOperand(2).getReg(); | ||||||
1793 | |||||||
1794 | if (!Ty.isVector()) | ||||||
1795 | return false; | ||||||
1796 | |||||||
1797 | bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR; | ||||||
1798 | |||||||
1799 | // We expect the immediate case to be lowered in the PostLegalCombiner to | ||||||
1800 | // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents. | ||||||
1801 | |||||||
1802 | // There is not a shift right register instruction, but the shift left | ||||||
1803 | // register instruction takes a signed value, where negative numbers specify a | ||||||
1804 | // right shift. | ||||||
1805 | |||||||
1806 | unsigned Opc = 0; | ||||||
1807 | unsigned NegOpc = 0; | ||||||
1808 | const TargetRegisterClass *RC = | ||||||
1809 | getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI); | ||||||
1810 | if (Ty == LLT::fixed_vector(2, 64)) { | ||||||
1811 | Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64; | ||||||
1812 | NegOpc = AArch64::NEGv2i64; | ||||||
1813 | } else if (Ty == LLT::fixed_vector(4, 32)) { | ||||||
1814 | Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32; | ||||||
1815 | NegOpc = AArch64::NEGv4i32; | ||||||
1816 | } else if (Ty == LLT::fixed_vector(2, 32)) { | ||||||
1817 | Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32; | ||||||
1818 | NegOpc = AArch64::NEGv2i32; | ||||||
1819 | } else if (Ty == LLT::fixed_vector(4, 16)) { | ||||||
1820 | Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16; | ||||||
1821 | NegOpc = AArch64::NEGv4i16; | ||||||
1822 | } else if (Ty == LLT::fixed_vector(8, 16)) { | ||||||
1823 | Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16; | ||||||
1824 | NegOpc = AArch64::NEGv8i16; | ||||||
1825 | } else if (Ty == LLT::fixed_vector(16, 8)) { | ||||||
1826 | Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; | ||||||
1827 | NegOpc = AArch64::NEGv16i8; | ||||||
1828 | } else if (Ty == LLT::fixed_vector(8, 8)) { | ||||||
1829 | Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; | ||||||
1830 | NegOpc = AArch64::NEGv8i8; | ||||||
1831 | } else { | ||||||
1832 | LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; } } while (false); | ||||||
1833 | return false; | ||||||
1834 | } | ||||||
1835 | |||||||
1836 | auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg}); | ||||||
1837 | constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI); | ||||||
1838 | auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg}); | ||||||
1839 | constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI); | ||||||
1840 | I.eraseFromParent(); | ||||||
1841 | return true; | ||||||
1842 | } | ||||||
1843 | |||||||
1844 | bool AArch64InstructionSelector::selectVaStartAAPCS( | ||||||
1845 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { | ||||||
1846 | return false; | ||||||
1847 | } | ||||||
1848 | |||||||
1849 | bool AArch64InstructionSelector::selectVaStartDarwin( | ||||||
1850 | MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { | ||||||
1851 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); | ||||||
1852 | Register ListReg = I.getOperand(0).getReg(); | ||||||
1853 | |||||||
1854 | Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); | ||||||
1855 | |||||||
1856 | auto MIB = | ||||||
1857 | BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri)) | ||||||
1858 | .addDef(ArgsAddrReg) | ||||||
1859 | .addFrameIndex(FuncInfo->getVarArgsStackIndex()) | ||||||
1860 | .addImm(0) | ||||||
1861 | .addImm(0); | ||||||
1862 | |||||||
1863 | constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); | ||||||
1864 | |||||||
1865 | MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui)) | ||||||
1866 | .addUse(ArgsAddrReg) | ||||||
1867 | .addUse(ListReg) | ||||||
1868 | .addImm(0) | ||||||
1869 | .addMemOperand(*I.memoperands_begin()); | ||||||
1870 | |||||||
1871 | constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); | ||||||
1872 | I.eraseFromParent(); | ||||||
1873 | return true; | ||||||
1874 | } | ||||||
1875 | |||||||
1876 | void AArch64InstructionSelector::materializeLargeCMVal( | ||||||
1877 | MachineInstr &I, const Value *V, unsigned OpFlags) { | ||||||
1878 | MachineBasicBlock &MBB = *I.getParent(); | ||||||
1879 | MachineFunction &MF = *MBB.getParent(); | ||||||
1880 | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||||||
1881 | |||||||
1882 | auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {}); | ||||||
1883 | MovZ->addOperand(MF, I.getOperand(1)); | ||||||
1884 | MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | | ||||||
1885 | AArch64II::MO_NC); | ||||||
1886 | MovZ->addOperand(MF, MachineOperand::CreateImm(0)); | ||||||
1887 | constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); | ||||||
1888 | |||||||
1889 | auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset, | ||||||
1890 | Register ForceDstReg) { | ||||||
1891 | Register DstReg = ForceDstReg | ||||||
1892 | ? ForceDstReg | ||||||
1893 | : MRI.createVirtualRegister(&AArch64::GPR64RegClass); | ||||||
1894 | auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg); | ||||||
1895 | if (auto *GV = dyn_cast<GlobalValue>(V)) { | ||||||
1896 | MovI->addOperand(MF, MachineOperand::CreateGA( | ||||||
1897 | GV, MovZ->getOperand(1).getOffset(), Flags)); | ||||||
1898 | } else { | ||||||
1899 | MovI->addOperand( | ||||||
1900 | MF, MachineOperand::CreateBA(cast<BlockAddress>(V), | ||||||
1901 | MovZ->getOperand(1).getOffset(), Flags)); | ||||||
1902 | } | ||||||
1903 | MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); | ||||||
1904 | constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); | ||||||
1905 | return DstReg; | ||||||
1906 | }; | ||||||
1907 | Register DstReg = BuildMovK(MovZ.getReg(0), | ||||||
1908 | AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); | ||||||
1909 | DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); | ||||||
1910 | BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); | ||||||
1911 | } | ||||||
1912 | |||||||
1913 | bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { | ||||||
1914 | MachineBasicBlock &MBB = *I.getParent(); | ||||||
1915 | MachineFunction &MF = *MBB.getParent(); | ||||||
1916 | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||||||
1917 | |||||||
1918 | switch (I.getOpcode()) { | ||||||
1919 | case TargetOpcode::G_SHL: | ||||||
1920 | case TargetOpcode::G_ASHR: | ||||||
1921 | case TargetOpcode::G_LSHR: { | ||||||
1922 | // These shifts are legalized to have 64 bit shift amounts because we want | ||||||
1923 | // to take advantage of the existing imported selection patterns that assume | ||||||
1924 | // the immediates are s64s. However, if the shifted type is 32 bits and for | ||||||
1925 | // some reason we receive input GMIR that has an s64 shift amount that's not | ||||||
1926 | // a G_CONSTANT, insert a truncate so that we can still select the s32 | ||||||
1927 | // register-register variant. | ||||||
1928 | Register SrcReg = I.getOperand(1).getReg(); | ||||||
1929 | Register ShiftReg = I.getOperand(2).getReg(); | ||||||
1930 | const LLT ShiftTy = MRI.getType(ShiftReg); | ||||||
1931 | const LLT SrcTy = MRI.getType(SrcReg); | ||||||
1932 | if (SrcTy.isVector()) | ||||||
1933 | return false; | ||||||
1934 | assert(!ShiftTy.isVector() && "unexpected vector shift ty")(static_cast <bool> (!ShiftTy.isVector() && "unexpected vector shift ty" ) ? void (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1934, __extension__ __PRETTY_FUNCTION__)); | ||||||
1935 | if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64) | ||||||
1936 | return false; | ||||||
1937 | auto *AmtMI = MRI.getVRegDef(ShiftReg); | ||||||
1938 | assert(AmtMI && "could not find a vreg definition for shift amount")(static_cast <bool> (AmtMI && "could not find a vreg definition for shift amount" ) ? void (0) : __assert_fail ("AmtMI && \"could not find a vreg definition for shift amount\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 1938, __extension__ __PRETTY_FUNCTION__)); | ||||||
1939 | if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) { | ||||||
1940 | // Insert a subregister copy to implement a 64->32 trunc | ||||||
1941 | auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {}) | ||||||
1942 | .addReg(ShiftReg, 0, AArch64::sub_32); | ||||||
1943 | MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); | ||||||
1944 | I.getOperand(2).setReg(Trunc.getReg(0)); | ||||||
1945 | } | ||||||
1946 | return true; | ||||||
1947 | } | ||||||
1948 | case TargetOpcode::G_STORE: { | ||||||
1949 | bool Changed = contractCrossBankCopyIntoStore(I, MRI); | ||||||
1950 | MachineOperand &SrcOp = I.getOperand(0); | ||||||
1951 | if (MRI.getType(SrcOp.getReg()).isPointer()) { | ||||||
1952 | // Allow matching with imported patterns for stores of pointers. Unlike | ||||||
1953 | // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy | ||||||
1954 | // and constrain. | ||||||
1955 | auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp); | ||||||
1956 | Register NewSrc = Copy.getReg(0); | ||||||
1957 | SrcOp.setReg(NewSrc); | ||||||
1958 | RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI); | ||||||
1959 | Changed = true; | ||||||
1960 | } | ||||||
1961 | return Changed; | ||||||
1962 | } | ||||||
1963 | case TargetOpcode::G_PTR_ADD: | ||||||
1964 | return convertPtrAddToAdd(I, MRI); | ||||||
1965 | case TargetOpcode::G_LOAD: { | ||||||
1966 | // For scalar loads of pointers, we try to convert the dest type from p0 | ||||||
1967 | // to s64 so that our imported patterns can match. Like with the G_PTR_ADD | ||||||
1968 | // conversion, this should be ok because all users should have been | ||||||
1969 | // selected already, so the type doesn't matter for them. | ||||||
1970 | Register DstReg = I.getOperand(0).getReg(); | ||||||
1971 | const LLT DstTy = MRI.getType(DstReg); | ||||||
1972 | if (!DstTy.isPointer()) | ||||||
1973 | return false; | ||||||
1974 | MRI.setType(DstReg, LLT::scalar(64)); | ||||||
1975 | return true; | ||||||
1976 | } | ||||||
1977 | case AArch64::G_DUP: { | ||||||
1978 | // Convert the type from p0 to s64 to help selection. | ||||||
1979 | LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||||
1980 | if (!DstTy.getElementType().isPointer()) | ||||||
1981 | return false; | ||||||
1982 | auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg()); | ||||||
1983 | MRI.setType(I.getOperand(0).getReg(), | ||||||
1984 | DstTy.changeElementType(LLT::scalar(64))); | ||||||
1985 | MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass); | ||||||
1986 | I.getOperand(1).setReg(NewSrc.getReg(0)); | ||||||
1987 | return true; | ||||||
1988 | } | ||||||
1989 | case TargetOpcode::G_UITOFP: | ||||||
1990 | case TargetOpcode::G_SITOFP: { | ||||||
1991 | // If both source and destination regbanks are FPR, then convert the opcode | ||||||
1992 | // to G_SITOF so that the importer can select it to an fpr variant. | ||||||
1993 | // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank | ||||||
1994 | // copy. | ||||||
1995 | Register SrcReg = I.getOperand(1).getReg(); | ||||||
1996 | LLT SrcTy = MRI.getType(SrcReg); | ||||||
1997 | LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||||
1998 | if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits()) | ||||||
1999 | return false; | ||||||
2000 | |||||||
2001 | if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) { | ||||||
2002 | if (I.getOpcode() == TargetOpcode::G_SITOFP) | ||||||
2003 | I.setDesc(TII.get(AArch64::G_SITOF)); | ||||||
2004 | else | ||||||
2005 | I.setDesc(TII.get(AArch64::G_UITOF)); | ||||||
2006 | return true; | ||||||
2007 | } | ||||||
2008 | return false; | ||||||
2009 | } | ||||||
2010 | default: | ||||||
2011 | return false; | ||||||
2012 | } | ||||||
2013 | } | ||||||
2014 | |||||||
2015 | /// This lowering tries to look for G_PTR_ADD instructions and then converts | ||||||
2016 | /// them to a standard G_ADD with a COPY on the source. | ||||||
2017 | /// | ||||||
2018 | /// The motivation behind this is to expose the add semantics to the imported | ||||||
2019 | /// tablegen patterns. We shouldn't need to check for uses being loads/stores, | ||||||
2020 | /// because the selector works bottom up, uses before defs. By the time we | ||||||
2021 | /// end up trying to select a G_PTR_ADD, we should have already attempted to | ||||||
2022 | /// fold this into addressing modes and were therefore unsuccessful. | ||||||
2023 | bool AArch64InstructionSelector::convertPtrAddToAdd( | ||||||
2024 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||||
2025 | assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2025, __extension__ __PRETTY_FUNCTION__)); | ||||||
2026 | Register DstReg = I.getOperand(0).getReg(); | ||||||
2027 | Register AddOp1Reg = I.getOperand(1).getReg(); | ||||||
2028 | const LLT PtrTy = MRI.getType(DstReg); | ||||||
2029 | if (PtrTy.getAddressSpace() != 0) | ||||||
2030 | return false; | ||||||
2031 | |||||||
2032 | const LLT CastPtrTy = | ||||||
2033 | PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64); | ||||||
2034 | auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg); | ||||||
2035 | // Set regbanks on the registers. | ||||||
2036 | if (PtrTy.isVector()) | ||||||
2037 | MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID)); | ||||||
2038 | else | ||||||
2039 | MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); | ||||||
2040 | |||||||
2041 | // Now turn the %dst(p0) = G_PTR_ADD %base, off into: | ||||||
2042 | // %dst(intty) = G_ADD %intbase, off | ||||||
2043 | I.setDesc(TII.get(TargetOpcode::G_ADD)); | ||||||
2044 | MRI.setType(DstReg, CastPtrTy); | ||||||
2045 | I.getOperand(1).setReg(PtrToInt.getReg(0)); | ||||||
2046 | if (!select(*PtrToInt)) { | ||||||
2047 | LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd" ; } } while (false); | ||||||
2048 | return false; | ||||||
2049 | } | ||||||
2050 | |||||||
2051 | // Also take the opportunity here to try to do some optimization. | ||||||
2052 | // Try to convert this into a G_SUB if the offset is a 0-x negate idiom. | ||||||
2053 | Register NegatedReg; | ||||||
2054 | if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg)))) | ||||||
2055 | return true; | ||||||
2056 | I.getOperand(2).setReg(NegatedReg); | ||||||
2057 | I.setDesc(TII.get(TargetOpcode::G_SUB)); | ||||||
2058 | return true; | ||||||
2059 | } | ||||||
2060 | |||||||
2061 | bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I, | ||||||
2062 | MachineRegisterInfo &MRI) { | ||||||
2063 | // We try to match the immediate variant of LSL, which is actually an alias | ||||||
2064 | // for a special case of UBFM. Otherwise, we fall back to the imported | ||||||
2065 | // selector which will match the register variant. | ||||||
2066 | assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL && "unexpected op") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2066, __extension__ __PRETTY_FUNCTION__)); | ||||||
2067 | const auto &MO = I.getOperand(2); | ||||||
2068 | auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI); | ||||||
2069 | if (!VRegAndVal) | ||||||
2070 | return false; | ||||||
2071 | |||||||
2072 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||||
2073 | if (DstTy.isVector()) | ||||||
2074 | return false; | ||||||
2075 | bool Is64Bit = DstTy.getSizeInBits() == 64; | ||||||
2076 | auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO); | ||||||
2077 | auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO); | ||||||
2078 | |||||||
2079 | if (!Imm1Fn || !Imm2Fn) | ||||||
2080 | return false; | ||||||
2081 | |||||||
2082 | auto NewI = | ||||||
2083 | MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri, | ||||||
2084 | {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()}); | ||||||
2085 | |||||||
2086 | for (auto &RenderFn : *Imm1Fn) | ||||||
2087 | RenderFn(NewI); | ||||||
2088 | for (auto &RenderFn : *Imm2Fn) | ||||||
2089 | RenderFn(NewI); | ||||||
2090 | |||||||
2091 | I.eraseFromParent(); | ||||||
2092 | return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); | ||||||
2093 | } | ||||||
2094 | |||||||
2095 | bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( | ||||||
2096 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||||
2097 | assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2097, __extension__ __PRETTY_FUNCTION__)); | ||||||
2098 | // If we're storing a scalar, it doesn't matter what register bank that | ||||||
2099 | // scalar is on. All that matters is the size. | ||||||
2100 | // | ||||||
2101 | // So, if we see something like this (with a 32-bit scalar as an example): | ||||||
2102 | // | ||||||
2103 | // %x:gpr(s32) = ... something ... | ||||||
2104 | // %y:fpr(s32) = COPY %x:gpr(s32) | ||||||
2105 | // G_STORE %y:fpr(s32) | ||||||
2106 | // | ||||||
2107 | // We can fix this up into something like this: | ||||||
2108 | // | ||||||
2109 | // G_STORE %x:gpr(s32) | ||||||
2110 | // | ||||||
2111 | // And then continue the selection process normally. | ||||||
2112 | Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI); | ||||||
2113 | if (!DefDstReg.isValid()) | ||||||
2114 | return false; | ||||||
2115 | LLT DefDstTy = MRI.getType(DefDstReg); | ||||||
2116 | Register StoreSrcReg = I.getOperand(0).getReg(); | ||||||
2117 | LLT StoreSrcTy = MRI.getType(StoreSrcReg); | ||||||
2118 | |||||||
2119 | // If we get something strange like a physical register, then we shouldn't | ||||||
2120 | // go any further. | ||||||
2121 | if (!DefDstTy.isValid()) | ||||||
2122 | return false; | ||||||
2123 | |||||||
2124 | // Are the source and dst types the same size? | ||||||
2125 | if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) | ||||||
2126 | return false; | ||||||
2127 | |||||||
2128 | if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == | ||||||
2129 | RBI.getRegBank(DefDstReg, MRI, TRI)) | ||||||
2130 | return false; | ||||||
2131 | |||||||
2132 | // We have a cross-bank copy, which is entering a store. Let's fold it. | ||||||
2133 | I.getOperand(0).setReg(DefDstReg); | ||||||
2134 | return true; | ||||||
2135 | } | ||||||
2136 | |||||||
2137 | bool AArch64InstructionSelector::earlySelect(MachineInstr &I) { | ||||||
2138 | assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!" ) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2138, __extension__ __PRETTY_FUNCTION__)); | ||||||
2139 | assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() && "Instruction should be in a function!") ? void (0) : __assert_fail ("I.getParent()->getParent() && \"Instruction should be in a function!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2139, __extension__ __PRETTY_FUNCTION__)); | ||||||
2140 | |||||||
2141 | MachineBasicBlock &MBB = *I.getParent(); | ||||||
2142 | MachineFunction &MF = *MBB.getParent(); | ||||||
2143 | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||||||
2144 | |||||||
2145 | switch (I.getOpcode()) { | ||||||
2146 | case AArch64::G_DUP: { | ||||||
2147 | // Before selecting a DUP instruction, check if it is better selected as a | ||||||
2148 | // MOV or load from a constant pool. | ||||||
2149 | Register Src = I.getOperand(1).getReg(); | ||||||
2150 | auto ValAndVReg = getConstantVRegValWithLookThrough(Src, MRI); | ||||||
2151 | if (!ValAndVReg) | ||||||
2152 | return false; | ||||||
2153 | LLVMContext &Ctx = MF.getFunction().getContext(); | ||||||
2154 | Register Dst = I.getOperand(0).getReg(); | ||||||
2155 | auto *CV = ConstantDataVector::getSplat( | ||||||
2156 | MRI.getType(Dst).getNumElements(), | ||||||
2157 | ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()), | ||||||
2158 | ValAndVReg->Value)); | ||||||
2159 | if (!emitConstantVector(Dst, CV, MIB, MRI)) | ||||||
2160 | return false; | ||||||
2161 | I.eraseFromParent(); | ||||||
2162 | return true; | ||||||
2163 | } | ||||||
2164 | case TargetOpcode::G_BR: | ||||||
2165 | return false; | ||||||
2166 | case TargetOpcode::G_SHL: | ||||||
2167 | return earlySelectSHL(I, MRI); | ||||||
2168 | case TargetOpcode::G_CONSTANT: { | ||||||
2169 | bool IsZero = false; | ||||||
2170 | if (I.getOperand(1).isCImm()) | ||||||
2171 | IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0; | ||||||
2172 | else if (I.getOperand(1).isImm()) | ||||||
2173 | IsZero = I.getOperand(1).getImm() == 0; | ||||||
2174 | |||||||
2175 | if (!IsZero) | ||||||
2176 | return false; | ||||||
2177 | |||||||
2178 | Register DefReg = I.getOperand(0).getReg(); | ||||||
2179 | LLT Ty = MRI.getType(DefReg); | ||||||
2180 | if (Ty.getSizeInBits() == 64) { | ||||||
2181 | I.getOperand(1).ChangeToRegister(AArch64::XZR, false); | ||||||
2182 | RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); | ||||||
2183 | } else if (Ty.getSizeInBits() == 32) { | ||||||
2184 | I.getOperand(1).ChangeToRegister(AArch64::WZR, false); | ||||||
2185 | RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI); | ||||||
2186 | } else | ||||||
2187 | return false; | ||||||
2188 | |||||||
2189 | I.setDesc(TII.get(TargetOpcode::COPY)); | ||||||
2190 | return true; | ||||||
2191 | } | ||||||
2192 | |||||||
2193 | case TargetOpcode::G_ADD: { | ||||||
2194 | // Check if this is being fed by a G_ICMP on either side. | ||||||
2195 | // | ||||||
2196 | // (cmp pred, x, y) + z | ||||||
2197 | // | ||||||
2198 | // In the above case, when the cmp is true, we increment z by 1. So, we can | ||||||
2199 | // fold the add into the cset for the cmp by using cinc. | ||||||
2200 | // | ||||||
2201 | // FIXME: This would probably be a lot nicer in PostLegalizerLowering. | ||||||
2202 | Register X = I.getOperand(1).getReg(); | ||||||
2203 | |||||||
2204 | // Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out | ||||||
2205 | // early if we see it. | ||||||
2206 | LLT Ty = MRI.getType(X); | ||||||
2207 | if (Ty.isVector() || Ty.getSizeInBits() != 32) | ||||||
2208 | return false; | ||||||
2209 | |||||||
2210 | Register CmpReg = I.getOperand(2).getReg(); | ||||||
2211 | MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI); | ||||||
2212 | if (!Cmp) { | ||||||
2213 | std::swap(X, CmpReg); | ||||||
2214 | Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI); | ||||||
2215 | if (!Cmp) | ||||||
2216 | return false; | ||||||
2217 | } | ||||||
2218 | auto Pred = | ||||||
2219 | static_cast<CmpInst::Predicate>(Cmp->getOperand(1).getPredicate()); | ||||||
2220 | emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3), | ||||||
2221 | Cmp->getOperand(1), MIB); | ||||||
2222 | emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB, X); | ||||||
2223 | I.eraseFromParent(); | ||||||
2224 | return true; | ||||||
2225 | } | ||||||
2226 | case TargetOpcode::G_OR: { | ||||||
2227 | // Look for operations that take the lower `Width=Size-ShiftImm` bits of | ||||||
2228 | // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via | ||||||
2229 | // shifting and masking that we can replace with a BFI (encoded as a BFM). | ||||||
2230 | Register Dst = I.getOperand(0).getReg(); | ||||||
2231 | LLT Ty = MRI.getType(Dst); | ||||||
2232 | |||||||
2233 | if (!Ty.isScalar()) | ||||||
2234 | return false; | ||||||
2235 | |||||||
2236 | unsigned Size = Ty.getSizeInBits(); | ||||||
2237 | if (Size != 32 && Size != 64) | ||||||
2238 | return false; | ||||||
2239 | |||||||
2240 | Register ShiftSrc; | ||||||
2241 | int64_t ShiftImm; | ||||||
2242 | Register MaskSrc; | ||||||
2243 | int64_t MaskImm; | ||||||
2244 | if (!mi_match( | ||||||
2245 | Dst, MRI, | ||||||
2246 | m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))), | ||||||
2247 | m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm)))))) | ||||||
2248 | return false; | ||||||
2249 | |||||||
2250 | if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm)) | ||||||
2251 | return false; | ||||||
2252 | |||||||
2253 | int64_t Immr = Size - ShiftImm; | ||||||
2254 | int64_t Imms = Size - ShiftImm - 1; | ||||||
2255 | unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri; | ||||||
2256 | emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB); | ||||||
2257 | I.eraseFromParent(); | ||||||
2258 | return true; | ||||||
2259 | } | ||||||
2260 | default: | ||||||
2261 | return false; | ||||||
2262 | } | ||||||
2263 | } | ||||||
2264 | |||||||
2265 | bool AArch64InstructionSelector::select(MachineInstr &I) { | ||||||
2266 | assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!" ) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2266, __extension__ __PRETTY_FUNCTION__)); | ||||||
2267 | assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() && "Instruction should be in a function!") ? void (0) : __assert_fail ("I.getParent()->getParent() && \"Instruction should be in a function!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2267, __extension__ __PRETTY_FUNCTION__)); | ||||||
2268 | |||||||
2269 | MachineBasicBlock &MBB = *I.getParent(); | ||||||
2270 | MachineFunction &MF = *MBB.getParent(); | ||||||
2271 | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||||||
2272 | |||||||
2273 | const AArch64Subtarget *Subtarget = | ||||||
2274 | &static_cast<const AArch64Subtarget &>(MF.getSubtarget()); | ||||||
2275 | if (Subtarget->requiresStrictAlign()) { | ||||||
2276 | // We don't support this feature yet. | ||||||
2277 | LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n" ; } } while (false); | ||||||
2278 | return false; | ||||||
2279 | } | ||||||
2280 | |||||||
2281 | MIB.setInstrAndDebugLoc(I); | ||||||
2282 | |||||||
2283 | unsigned Opcode = I.getOpcode(); | ||||||
2284 | // G_PHI requires same handling as PHI | ||||||
2285 | if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { | ||||||
2286 | // Certain non-generic instructions also need some special handling. | ||||||
2287 | |||||||
2288 | if (Opcode == TargetOpcode::LOAD_STACK_GUARD) | ||||||
2289 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
2290 | |||||||
2291 | if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { | ||||||
2292 | const Register DefReg = I.getOperand(0).getReg(); | ||||||
2293 | const LLT DefTy = MRI.getType(DefReg); | ||||||
2294 | |||||||
2295 | const RegClassOrRegBank &RegClassOrBank = | ||||||
2296 | MRI.getRegClassOrRegBank(DefReg); | ||||||
2297 | |||||||
2298 | const TargetRegisterClass *DefRC | ||||||
2299 | = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); | ||||||
2300 | if (!DefRC) { | ||||||
2301 | if (!DefTy.isValid()) { | ||||||
2302 | LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n" ; } } while (false); | ||||||
2303 | return false; | ||||||
2304 | } | ||||||
2305 | const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); | ||||||
2306 | DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI); | ||||||
2307 | if (!DefRC) { | ||||||
2308 | LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n" ; } } while (false); | ||||||
2309 | return false; | ||||||
2310 | } | ||||||
2311 | } | ||||||
2312 | |||||||
2313 | I.setDesc(TII.get(TargetOpcode::PHI)); | ||||||
2314 | |||||||
2315 | return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); | ||||||
2316 | } | ||||||
2317 | |||||||
2318 | if (I.isCopy()) | ||||||
2319 | return selectCopy(I, TII, MRI, TRI, RBI); | ||||||
2320 | |||||||
2321 | return true; | ||||||
2322 | } | ||||||
2323 | |||||||
2324 | |||||||
2325 | if (I.getNumOperands() != I.getNumExplicitOperands()) { | ||||||
2326 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n" ; } } while (false) | ||||||
2327 | dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n" ; } } while (false); | ||||||
2328 | return false; | ||||||
2329 | } | ||||||
2330 | |||||||
2331 | // Try to do some lowering before we start instruction selecting. These | ||||||
2332 | // lowerings are purely transformations on the input G_MIR and so selection | ||||||
2333 | // must continue after any modification of the instruction. | ||||||
2334 | if (preISelLower(I)) { | ||||||
2335 | Opcode = I.getOpcode(); // The opcode may have been modified, refresh it. | ||||||
2336 | } | ||||||
2337 | |||||||
2338 | // There may be patterns where the importer can't deal with them optimally, | ||||||
2339 | // but does select it to a suboptimal sequence so our custom C++ selection | ||||||
2340 | // code later never has a chance to work on it. Therefore, we have an early | ||||||
2341 | // selection attempt here to give priority to certain selection routines | ||||||
2342 | // over the imported ones. | ||||||
2343 | if (earlySelect(I)) | ||||||
2344 | return true; | ||||||
2345 | |||||||
2346 | if (selectImpl(I, *CoverageInfo)) | ||||||
2347 | return true; | ||||||
2348 | |||||||
2349 | LLT Ty = | ||||||
2350 | I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{}; | ||||||
2351 | |||||||
2352 | switch (Opcode) { | ||||||
2353 | case TargetOpcode::G_SBFX: | ||||||
2354 | case TargetOpcode::G_UBFX: { | ||||||
2355 | static const unsigned OpcTable[2][2] = { | ||||||
2356 | {AArch64::UBFMWri, AArch64::UBFMXri}, | ||||||
2357 | {AArch64::SBFMWri, AArch64::SBFMXri}}; | ||||||
2358 | bool IsSigned = Opcode == TargetOpcode::G_SBFX; | ||||||
2359 | unsigned Size = Ty.getSizeInBits(); | ||||||
2360 | unsigned Opc = OpcTable[IsSigned][Size == 64]; | ||||||
2361 | auto Cst1 = | ||||||
2362 | getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI); | ||||||
2363 | assert(Cst1 && "Should have gotten a constant for src 1?")(static_cast <bool> (Cst1 && "Should have gotten a constant for src 1?" ) ? void (0) : __assert_fail ("Cst1 && \"Should have gotten a constant for src 1?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2363, __extension__ __PRETTY_FUNCTION__)); | ||||||
2364 | auto Cst2 = | ||||||
2365 | getConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI); | ||||||
2366 | assert(Cst2 && "Should have gotten a constant for src 2?")(static_cast <bool> (Cst2 && "Should have gotten a constant for src 2?" ) ? void (0) : __assert_fail ("Cst2 && \"Should have gotten a constant for src 2?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2366, __extension__ __PRETTY_FUNCTION__)); | ||||||
2367 | auto LSB = Cst1->Value.getZExtValue(); | ||||||
2368 | auto Width = Cst2->Value.getZExtValue(); | ||||||
2369 | auto BitfieldInst = | ||||||
2370 | MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)}) | ||||||
2371 | .addImm(LSB) | ||||||
2372 | .addImm(LSB + Width - 1); | ||||||
2373 | I.eraseFromParent(); | ||||||
2374 | return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI); | ||||||
2375 | } | ||||||
2376 | case TargetOpcode::G_BRCOND: | ||||||
2377 | return selectCompareBranch(I, MF, MRI); | ||||||
2378 | |||||||
2379 | case TargetOpcode::G_BRINDIRECT: { | ||||||
2380 | I.setDesc(TII.get(AArch64::BR)); | ||||||
2381 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
2382 | } | ||||||
2383 | |||||||
2384 | case TargetOpcode::G_BRJT: | ||||||
2385 | return selectBrJT(I, MRI); | ||||||
2386 | |||||||
2387 | case AArch64::G_ADD_LOW: { | ||||||
2388 | // This op may have been separated from it's ADRP companion by the localizer | ||||||
2389 | // or some other code motion pass. Given that many CPUs will try to | ||||||
2390 | // macro fuse these operations anyway, select this into a MOVaddr pseudo | ||||||
2391 | // which will later be expanded into an ADRP+ADD pair after scheduling. | ||||||
2392 | MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg()); | ||||||
2393 | if (BaseMI->getOpcode() != AArch64::ADRP) { | ||||||
2394 | I.setDesc(TII.get(AArch64::ADDXri)); | ||||||
2395 | I.addOperand(MachineOperand::CreateImm(0)); | ||||||
2396 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
2397 | } | ||||||
2398 | assert(TM.getCodeModel() == CodeModel::Small &&(static_cast <bool> (TM.getCodeModel() == CodeModel::Small && "Expected small code model") ? void (0) : __assert_fail ("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2399, __extension__ __PRETTY_FUNCTION__)) | ||||||
2399 | "Expected small code model")(static_cast <bool> (TM.getCodeModel() == CodeModel::Small && "Expected small code model") ? void (0) : __assert_fail ("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2399, __extension__ __PRETTY_FUNCTION__)); | ||||||
2400 | auto Op1 = BaseMI->getOperand(1); | ||||||
2401 | auto Op2 = I.getOperand(2); | ||||||
2402 | auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {}) | ||||||
2403 | .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(), | ||||||
2404 | Op1.getTargetFlags()) | ||||||
2405 | .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(), | ||||||
2406 | Op2.getTargetFlags()); | ||||||
2407 | I.eraseFromParent(); | ||||||
2408 | return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI); | ||||||
2409 | } | ||||||
2410 | |||||||
2411 | case TargetOpcode::G_BSWAP: { | ||||||
2412 | // Handle vector types for G_BSWAP directly. | ||||||
2413 | Register DstReg = I.getOperand(0).getReg(); | ||||||
2414 | LLT DstTy = MRI.getType(DstReg); | ||||||
2415 | |||||||
2416 | // We should only get vector types here; everything else is handled by the | ||||||
2417 | // importer right now. | ||||||
2418 | if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) { | ||||||
2419 | LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n" ; } } while (false); | ||||||
2420 | return false; | ||||||
2421 | } | ||||||
2422 | |||||||
2423 | // Only handle 4 and 2 element vectors for now. | ||||||
2424 | // TODO: 16-bit elements. | ||||||
2425 | unsigned NumElts = DstTy.getNumElements(); | ||||||
2426 | if (NumElts != 4 && NumElts != 2) { | ||||||
2427 | LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n" ; } } while (false); | ||||||
2428 | return false; | ||||||
2429 | } | ||||||
2430 | |||||||
2431 | // Choose the correct opcode for the supported types. Right now, that's | ||||||
2432 | // v2s32, v4s32, and v2s64. | ||||||
2433 | unsigned Opc = 0; | ||||||
2434 | unsigned EltSize = DstTy.getElementType().getSizeInBits(); | ||||||
2435 | if (EltSize == 32) | ||||||
2436 | Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8 | ||||||
2437 | : AArch64::REV32v16i8; | ||||||
2438 | else if (EltSize == 64) | ||||||
2439 | Opc = AArch64::REV64v16i8; | ||||||
2440 | |||||||
2441 | // We should always get something by the time we get here... | ||||||
2442 | assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")(static_cast <bool> (Opc != 0 && "Didn't get an opcode for G_BSWAP?" ) ? void (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2442, __extension__ __PRETTY_FUNCTION__)); | ||||||
2443 | |||||||
2444 | I.setDesc(TII.get(Opc)); | ||||||
2445 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
2446 | } | ||||||
2447 | |||||||
2448 | case TargetOpcode::G_FCONSTANT: | ||||||
2449 | case TargetOpcode::G_CONSTANT: { | ||||||
2450 | const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; | ||||||
2451 | |||||||
2452 | const LLT s8 = LLT::scalar(8); | ||||||
2453 | const LLT s16 = LLT::scalar(16); | ||||||
2454 | const LLT s32 = LLT::scalar(32); | ||||||
2455 | const LLT s64 = LLT::scalar(64); | ||||||
2456 | const LLT s128 = LLT::scalar(128); | ||||||
2457 | const LLT p0 = LLT::pointer(0, 64); | ||||||
2458 | |||||||
2459 | const Register DefReg = I.getOperand(0).getReg(); | ||||||
2460 | const LLT DefTy = MRI.getType(DefReg); | ||||||
2461 | const unsigned DefSize = DefTy.getSizeInBits(); | ||||||
2462 | const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); | ||||||
2463 | |||||||
2464 | // FIXME: Redundant check, but even less readable when factored out. | ||||||
2465 | if (isFP) { | ||||||
2466 | if (Ty != s32 && Ty != s64 && Ty != s128) { | ||||||
2467 | LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant, expected: " << s32 << " or " << s64 << " or " << s128 << '\n' ; } } while (false) | ||||||
2468 | << " constant, expected: " << s32 << " or " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant, expected: " << s32 << " or " << s64 << " or " << s128 << '\n' ; } } while (false) | ||||||
2469 | << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant, expected: " << s32 << " or " << s64 << " or " << s128 << '\n' ; } } while (false); | ||||||
2470 | return false; | ||||||
2471 | } | ||||||
2472 | |||||||
2473 | if (RB.getID() != AArch64::FPRRegBankID) { | ||||||
2474 | LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant on bank: " << RB << ", expected: FPR\n"; } } while (false) | ||||||
2475 | << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant on bank: " << RB << ", expected: FPR\n"; } } while (false) | ||||||
2476 | << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize FP " << Ty << " constant on bank: " << RB << ", expected: FPR\n"; } } while (false); | ||||||
2477 | return false; | ||||||
2478 | } | ||||||
2479 | |||||||
2480 | // The case when we have 0.0 is covered by tablegen. Reject it here so we | ||||||
2481 | // can be sure tablegen works correctly and isn't rescued by this code. | ||||||
2482 | // 0.0 is not covered by tablegen for FP128. So we will handle this | ||||||
2483 | // scenario in the code here. | ||||||
2484 | if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0)) | ||||||
2485 | return false; | ||||||
2486 | } else { | ||||||
2487 | // s32 and s64 are covered by tablegen. | ||||||
2488 | if (Ty != p0 && Ty != s8 && Ty != s16) { | ||||||
2489 | LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant, expected: " << s32 << ", " << s64 << ", or " << p0 << '\n' ; } } while (false) | ||||||
2490 | << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant, expected: " << s32 << ", " << s64 << ", or " << p0 << '\n' ; } } while (false) | ||||||
2491 | << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant, expected: " << s32 << ", " << s64 << ", or " << p0 << '\n' ; } } while (false); | ||||||
2492 | return false; | ||||||
2493 | } | ||||||
2494 | |||||||
2495 | if (RB.getID() != AArch64::GPRRegBankID) { | ||||||
2496 | LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant on bank: " << RB << ", expected: GPR\n"; } } while (false) | ||||||
2497 | << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant on bank: " << RB << ", expected: GPR\n"; } } while (false) | ||||||
2498 | << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unable to materialize integer " << Ty << " constant on bank: " << RB << ", expected: GPR\n"; } } while (false); | ||||||
2499 | return false; | ||||||
2500 | } | ||||||
2501 | } | ||||||
2502 | |||||||
2503 | // We allow G_CONSTANT of types < 32b. | ||||||
2504 | const unsigned MovOpc = | ||||||
2505 | DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm; | ||||||
2506 | |||||||
2507 | if (isFP) { | ||||||
2508 | // Either emit a FMOV, or emit a copy to emit a normal mov. | ||||||
2509 | const TargetRegisterClass &GPRRC = | ||||||
2510 | DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass; | ||||||
2511 | const TargetRegisterClass &FPRRC = | ||||||
2512 | DefSize == 32 ? AArch64::FPR32RegClass | ||||||
2513 | : (DefSize == 64 ? AArch64::FPR64RegClass | ||||||
2514 | : AArch64::FPR128RegClass); | ||||||
2515 | |||||||
2516 | // For 64b values, emit a constant pool load instead. | ||||||
2517 | // For s32, use a cp load if we have optsize/minsize. | ||||||
2518 | if (DefSize == 64 || DefSize == 128 || | ||||||
2519 | (DefSize == 32 && shouldOptForSize(&MF))) { | ||||||
2520 | auto *FPImm = I.getOperand(1).getFPImm(); | ||||||
2521 | auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB); | ||||||
2522 | if (!LoadMI) { | ||||||
2523 | LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n" ; } } while (false); | ||||||
2524 | return false; | ||||||
2525 | } | ||||||
2526 | MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()}); | ||||||
2527 | I.eraseFromParent(); | ||||||
2528 | return RBI.constrainGenericRegister(DefReg, FPRRC, MRI); | ||||||
2529 | } | ||||||
2530 | |||||||
2531 | // Nope. Emit a copy and use a normal mov instead. | ||||||
2532 | const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC); | ||||||
2533 | MachineOperand &RegOp = I.getOperand(0); | ||||||
2534 | RegOp.setReg(DefGPRReg); | ||||||
2535 | MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); | ||||||
2536 | MIB.buildCopy({DefReg}, {DefGPRReg}); | ||||||
2537 | |||||||
2538 | if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) { | ||||||
2539 | LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n" ; } } while (false); | ||||||
2540 | return false; | ||||||
2541 | } | ||||||
2542 | |||||||
2543 | MachineOperand &ImmOp = I.getOperand(1); | ||||||
2544 | // FIXME: Is going through int64_t always correct? | ||||||
2545 | ImmOp.ChangeToImmediate( | ||||||
2546 | ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); | ||||||
2547 | } else if (I.getOperand(1).isCImm()) { | ||||||
2548 | uint64_t Val = I.getOperand(1).getCImm()->getZExtValue(); | ||||||
2549 | I.getOperand(1).ChangeToImmediate(Val); | ||||||
2550 | } else if (I.getOperand(1).isImm()) { | ||||||
2551 | uint64_t Val = I.getOperand(1).getImm(); | ||||||
2552 | I.getOperand(1).ChangeToImmediate(Val); | ||||||
2553 | } | ||||||
2554 | |||||||
2555 | I.setDesc(TII.get(MovOpc)); | ||||||
2556 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
2557 | return true; | ||||||
2558 | } | ||||||
2559 | case TargetOpcode::G_EXTRACT: { | ||||||
2560 | Register DstReg = I.getOperand(0).getReg(); | ||||||
2561 | Register SrcReg = I.getOperand(1).getReg(); | ||||||
2562 | LLT SrcTy = MRI.getType(SrcReg); | ||||||
2563 | LLT DstTy = MRI.getType(DstReg); | ||||||
2564 | (void)DstTy; | ||||||
2565 | unsigned SrcSize = SrcTy.getSizeInBits(); | ||||||
2566 | |||||||
2567 | if (SrcTy.getSizeInBits() > 64) { | ||||||
2568 | // This should be an extract of an s128, which is like a vector extract. | ||||||
2569 | if (SrcTy.getSizeInBits() != 128) | ||||||
2570 | return false; | ||||||
2571 | // Only support extracting 64 bits from an s128 at the moment. | ||||||
2572 | if (DstTy.getSizeInBits() != 64) | ||||||
2573 | return false; | ||||||
2574 | |||||||
2575 | unsigned Offset = I.getOperand(2).getImm(); | ||||||
2576 | if (Offset % 64 != 0) | ||||||
2577 | return false; | ||||||
2578 | |||||||
2579 | // Check we have the right regbank always. | ||||||
2580 | const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); | ||||||
2581 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); | ||||||
2582 | assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!")(static_cast <bool> (SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!") ? void (0) : __assert_fail ("SrcRB.getID() == DstRB.getID() && \"Wrong extract regbank!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2582, __extension__ __PRETTY_FUNCTION__)); | ||||||
2583 | |||||||
2584 | if (SrcRB.getID() == AArch64::GPRRegBankID) { | ||||||
2585 | MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) | ||||||
2586 | .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64); | ||||||
2587 | I.eraseFromParent(); | ||||||
2588 | return true; | ||||||
2589 | } | ||||||
2590 | |||||||
2591 | // Emit the same code as a vector extract. | ||||||
2592 | // Offset must be a multiple of 64. | ||||||
2593 | unsigned LaneIdx = Offset / 64; | ||||||
2594 | MachineInstr *Extract = emitExtractVectorElt( | ||||||
2595 | DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB); | ||||||
2596 | if (!Extract) | ||||||
2597 | return false; | ||||||
2598 | I.eraseFromParent(); | ||||||
2599 | return true; | ||||||
2600 | } | ||||||
2601 | |||||||
2602 | I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); | ||||||
2603 | MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() + | ||||||
2604 | Ty.getSizeInBits() - 1); | ||||||
2605 | |||||||
2606 | if (SrcSize < 64) { | ||||||
2607 | assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits () == 16 && "unexpected G_EXTRACT types") ? void (0) : __assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2608, __extension__ __PRETTY_FUNCTION__)) | ||||||
2608 | "unexpected G_EXTRACT types")(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits () == 16 && "unexpected G_EXTRACT types") ? void (0) : __assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2608, __extension__ __PRETTY_FUNCTION__)); | ||||||
2609 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
2610 | } | ||||||
2611 | |||||||
2612 | DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); | ||||||
2613 | MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); | ||||||
2614 | MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) | ||||||
2615 | .addReg(DstReg, 0, AArch64::sub_32); | ||||||
2616 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), | ||||||
2617 | AArch64::GPR32RegClass, MRI); | ||||||
2618 | I.getOperand(0).setReg(DstReg); | ||||||
2619 | |||||||
2620 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
2621 | } | ||||||
2622 | |||||||
2623 | case TargetOpcode::G_INSERT: { | ||||||
2624 | LLT SrcTy = MRI.getType(I.getOperand(2).getReg()); | ||||||
2625 | LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||||
2626 | unsigned DstSize = DstTy.getSizeInBits(); | ||||||
2627 | // Larger inserts are vectors, same-size ones should be something else by | ||||||
2628 | // now (split up or turned into COPYs). | ||||||
2629 | if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) | ||||||
2630 | return false; | ||||||
2631 | |||||||
2632 | I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); | ||||||
2633 | unsigned LSB = I.getOperand(3).getImm(); | ||||||
2634 | unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); | ||||||
2635 | I.getOperand(3).setImm((DstSize - LSB) % DstSize); | ||||||
2636 | MachineInstrBuilder(MF, I).addImm(Width - 1); | ||||||
2637 | |||||||
2638 | if (DstSize < 64) { | ||||||
2639 | assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits () == 16 && "unexpected G_INSERT types") ? void (0) : __assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2640, __extension__ __PRETTY_FUNCTION__)) | ||||||
2640 | "unexpected G_INSERT types")(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits () == 16 && "unexpected G_INSERT types") ? void (0) : __assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2640, __extension__ __PRETTY_FUNCTION__)); | ||||||
2641 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
2642 | } | ||||||
2643 | |||||||
2644 | Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); | ||||||
2645 | BuildMI(MBB, I.getIterator(), I.getDebugLoc(), | ||||||
2646 | TII.get(AArch64::SUBREG_TO_REG)) | ||||||
2647 | .addDef(SrcReg) | ||||||
2648 | .addImm(0) | ||||||
2649 | .addUse(I.getOperand(2).getReg()) | ||||||
2650 | .addImm(AArch64::sub_32); | ||||||
2651 | RBI.constrainGenericRegister(I.getOperand(2).getReg(), | ||||||
2652 | AArch64::GPR32RegClass, MRI); | ||||||
2653 | I.getOperand(2).setReg(SrcReg); | ||||||
2654 | |||||||
2655 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
2656 | } | ||||||
2657 | case TargetOpcode::G_FRAME_INDEX: { | ||||||
2658 | // allocas and G_FRAME_INDEX are only supported in addrspace(0). | ||||||
2659 | if (Ty != LLT::pointer(0, 64)) { | ||||||
2660 | LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: " << Ty << ", expected: " << LLT::pointer(0, 64) << '\n'; } } while (false) | ||||||
2661 | << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: " << Ty << ", expected: " << LLT::pointer(0, 64) << '\n'; } } while (false); | ||||||
2662 | return false; | ||||||
2663 | } | ||||||
2664 | I.setDesc(TII.get(AArch64::ADDXri)); | ||||||
2665 | |||||||
2666 | // MOs for a #0 shifted immediate. | ||||||
2667 | I.addOperand(MachineOperand::CreateImm(0)); | ||||||
2668 | I.addOperand(MachineOperand::CreateImm(0)); | ||||||
2669 | |||||||
2670 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
2671 | } | ||||||
2672 | |||||||
2673 | case TargetOpcode::G_GLOBAL_VALUE: { | ||||||
2674 | auto GV = I.getOperand(1).getGlobal(); | ||||||
2675 | if (GV->isThreadLocal()) | ||||||
2676 | return selectTLSGlobalValue(I, MRI); | ||||||
2677 | |||||||
2678 | unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); | ||||||
2679 | if (OpFlags & AArch64II::MO_GOT) { | ||||||
2680 | I.setDesc(TII.get(AArch64::LOADgot)); | ||||||
2681 | I.getOperand(1).setTargetFlags(OpFlags); | ||||||
2682 | } else if (TM.getCodeModel() == CodeModel::Large) { | ||||||
2683 | // Materialize the global using movz/movk instructions. | ||||||
2684 | materializeLargeCMVal(I, GV, OpFlags); | ||||||
2685 | I.eraseFromParent(); | ||||||
2686 | return true; | ||||||
2687 | } else if (TM.getCodeModel() == CodeModel::Tiny) { | ||||||
2688 | I.setDesc(TII.get(AArch64::ADR)); | ||||||
2689 | I.getOperand(1).setTargetFlags(OpFlags); | ||||||
2690 | } else { | ||||||
2691 | I.setDesc(TII.get(AArch64::MOVaddr)); | ||||||
2692 | I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); | ||||||
2693 | MachineInstrBuilder MIB(MF, I); | ||||||
2694 | MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(), | ||||||
2695 | OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); | ||||||
2696 | } | ||||||
2697 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
2698 | } | ||||||
2699 | |||||||
2700 | case TargetOpcode::G_ZEXTLOAD: | ||||||
2701 | case TargetOpcode::G_LOAD: | ||||||
2702 | case TargetOpcode::G_STORE: { | ||||||
2703 | GLoadStore &LdSt = cast<GLoadStore>(I); | ||||||
2704 | bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; | ||||||
2705 | LLT PtrTy = MRI.getType(LdSt.getPointerReg()); | ||||||
2706 | |||||||
2707 | if (PtrTy != LLT::pointer(0, 64)) { | ||||||
2708 | LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Load/Store pointer has type: " << PtrTy << ", expected: " << LLT::pointer (0, 64) << '\n'; } } while (false) | ||||||
2709 | << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Load/Store pointer has type: " << PtrTy << ", expected: " << LLT::pointer (0, 64) << '\n'; } } while (false); | ||||||
2710 | return false; | ||||||
2711 | } | ||||||
2712 | |||||||
2713 | uint64_t MemSizeInBytes = LdSt.getMemSize(); | ||||||
2714 | unsigned MemSizeInBits = LdSt.getMemSizeInBits(); | ||||||
2715 | AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); | ||||||
2716 | |||||||
2717 | // Need special instructions for atomics that affect ordering. | ||||||
2718 | if (Order != AtomicOrdering::NotAtomic && | ||||||
2719 | Order != AtomicOrdering::Unordered && | ||||||
2720 | Order != AtomicOrdering::Monotonic) { | ||||||
2721 | assert(!isa<GZExtLoad>(LdSt))(static_cast <bool> (!isa<GZExtLoad>(LdSt)) ? void (0) : __assert_fail ("!isa<GZExtLoad>(LdSt)", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2721, __extension__ __PRETTY_FUNCTION__)); | ||||||
2722 | if (MemSizeInBytes > 64) | ||||||
2723 | return false; | ||||||
2724 | |||||||
2725 | if (isa<GLoad>(LdSt)) { | ||||||
2726 | static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH, | ||||||
2727 | AArch64::LDARW, AArch64::LDARX}; | ||||||
2728 | I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); | ||||||
2729 | } else { | ||||||
2730 | static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH, | ||||||
2731 | AArch64::STLRW, AArch64::STLRX}; | ||||||
2732 | I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); | ||||||
2733 | } | ||||||
2734 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
2735 | return true; | ||||||
2736 | } | ||||||
2737 | |||||||
2738 | #ifndef NDEBUG | ||||||
2739 | const Register PtrReg = LdSt.getPointerReg(); | ||||||
2740 | const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); | ||||||
2741 | // Sanity-check the pointer register. | ||||||
2742 | assert(PtrRB.getID() == AArch64::GPRRegBankID &&(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR") ? void ( 0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2743, __extension__ __PRETTY_FUNCTION__)) | ||||||
2743 | "Load/Store pointer operand isn't a GPR")(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR") ? void ( 0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2743, __extension__ __PRETTY_FUNCTION__)); | ||||||
2744 | assert(MRI.getType(PtrReg).isPointer() &&(static_cast <bool> (MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail ("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2745, __extension__ __PRETTY_FUNCTION__)) | ||||||
2745 | "Load/Store pointer operand isn't a pointer")(static_cast <bool> (MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail ("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2745, __extension__ __PRETTY_FUNCTION__)); | ||||||
2746 | #endif | ||||||
2747 | |||||||
2748 | const Register ValReg = LdSt.getReg(0); | ||||||
2749 | const LLT ValTy = MRI.getType(ValReg); | ||||||
2750 | const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); | ||||||
2751 | |||||||
2752 | // The code below doesn't support truncating stores, so we need to split it | ||||||
2753 | // again. | ||||||
2754 | if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { | ||||||
2755 | unsigned SubReg; | ||||||
2756 | LLT MemTy = LdSt.getMMO().getMemoryType(); | ||||||
2757 | auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI); | ||||||
2758 | if (!getSubRegForClass(RC, TRI, SubReg)) | ||||||
2759 | return false; | ||||||
2760 | |||||||
2761 | // Generate a subreg copy. | ||||||
2762 | auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {}) | ||||||
2763 | .addReg(ValReg, 0, SubReg) | ||||||
2764 | .getReg(0); | ||||||
2765 | RBI.constrainGenericRegister(Copy, *RC, MRI); | ||||||
2766 | LdSt.getOperand(0).setReg(Copy); | ||||||
2767 | } | ||||||
2768 | |||||||
2769 | // Helper lambda for partially selecting I. Either returns the original | ||||||
2770 | // instruction with an updated opcode, or a new instruction. | ||||||
2771 | auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { | ||||||
2772 | bool IsStore = isa<GStore>(I); | ||||||
| |||||||
2773 | const unsigned NewOpc = | ||||||
2774 | selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); | ||||||
2775 | if (NewOpc == I.getOpcode()) | ||||||
2776 | return nullptr; | ||||||
2777 | // Check if we can fold anything into the addressing mode. | ||||||
2778 | auto AddrModeFns = | ||||||
2779 | selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); | ||||||
2780 | if (!AddrModeFns) { | ||||||
2781 | // Can't fold anything. Use the original instruction. | ||||||
2782 | I.setDesc(TII.get(NewOpc)); | ||||||
2783 | I.addOperand(MachineOperand::CreateImm(0)); | ||||||
2784 | return &I; | ||||||
2785 | } | ||||||
2786 | |||||||
2787 | // Folded something. Create a new instruction and return it. | ||||||
2788 | auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags()); | ||||||
2789 | Register CurValReg = I.getOperand(0).getReg(); | ||||||
2790 | IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg); | ||||||
2791 | NewInst.cloneMemRefs(I); | ||||||
2792 | for (auto &Fn : *AddrModeFns) | ||||||
2793 | Fn(NewInst); | ||||||
2794 | I.eraseFromParent(); | ||||||
2795 | return &*NewInst; | ||||||
2796 | }; | ||||||
2797 | |||||||
2798 | MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); | ||||||
2799 | if (!LoadStore) | ||||||
2800 | return false; | ||||||
2801 | |||||||
2802 | // If we're storing a 0, use WZR/XZR. | ||||||
2803 | if (Opcode == TargetOpcode::G_STORE) { | ||||||
2804 | auto CVal = getConstantVRegValWithLookThrough( | ||||||
2805 | LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true, | ||||||
2806 | /*HandleFConstants = */ false); | ||||||
2807 | if (CVal && CVal->Value == 0) { | ||||||
2808 | switch (LoadStore->getOpcode()) { | ||||||
2809 | case AArch64::STRWui: | ||||||
2810 | case AArch64::STRHHui: | ||||||
2811 | case AArch64::STRBBui: | ||||||
2812 | LoadStore->getOperand(0).setReg(AArch64::WZR); | ||||||
2813 | break; | ||||||
2814 | case AArch64::STRXui: | ||||||
2815 | LoadStore->getOperand(0).setReg(AArch64::XZR); | ||||||
2816 | break; | ||||||
2817 | } | ||||||
2818 | } | ||||||
2819 | } | ||||||
2820 | |||||||
2821 | if (IsZExtLoad) { | ||||||
2822 | // The zextload from a smaller type to i32 should be handled by the | ||||||
2823 | // importer. | ||||||
2824 | if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64) | ||||||
2825 | return false; | ||||||
2826 | // If we have a ZEXTLOAD then change the load's type to be a narrower reg | ||||||
2827 | // and zero_extend with SUBREG_TO_REG. | ||||||
2828 | Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); | ||||||
2829 | Register DstReg = LoadStore->getOperand(0).getReg(); | ||||||
2830 | LoadStore->getOperand(0).setReg(LdReg); | ||||||
2831 | |||||||
2832 | MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator())); | ||||||
2833 | MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {}) | ||||||
2834 | .addImm(0) | ||||||
2835 | .addUse(LdReg) | ||||||
2836 | .addImm(AArch64::sub_32); | ||||||
2837 | constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); | ||||||
2838 | return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass, | ||||||
2839 | MRI); | ||||||
2840 | } | ||||||
2841 | return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); | ||||||
2842 | } | ||||||
2843 | |||||||
2844 | case TargetOpcode::G_SMULH: | ||||||
2845 | case TargetOpcode::G_UMULH: { | ||||||
2846 | // Reject the various things we don't support yet. | ||||||
2847 | if (unsupportedBinOp(I, RBI, MRI, TRI)) | ||||||
2848 | return false; | ||||||
2849 | |||||||
2850 | const Register DefReg = I.getOperand(0).getReg(); | ||||||
2851 | const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); | ||||||
2852 | |||||||
2853 | if (RB.getID() != AArch64::GPRRegBankID) { | ||||||
2854 | LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n"; } } while (false); | ||||||
2855 | return false; | ||||||
2856 | } | ||||||
2857 | |||||||
2858 | if (Ty != LLT::scalar(64)) { | ||||||
2859 | LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " << Ty << ", expected: " << LLT::scalar(64) << '\n'; } } while (false) | ||||||
2860 | << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " << Ty << ", expected: " << LLT::scalar(64) << '\n'; } } while (false); | ||||||
2861 | return false; | ||||||
2862 | } | ||||||
2863 | |||||||
2864 | unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr | ||||||
2865 | : AArch64::UMULHrr; | ||||||
2866 | I.setDesc(TII.get(NewOpc)); | ||||||
2867 | |||||||
2868 | // Now that we selected an opcode, we need to constrain the register | ||||||
2869 | // operands to use appropriate classes. | ||||||
2870 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
2871 | } | ||||||
2872 | case TargetOpcode::G_LSHR: | ||||||
2873 | case TargetOpcode::G_ASHR: | ||||||
2874 | if (MRI.getType(I.getOperand(0).getReg()).isVector()) | ||||||
2875 | return selectVectorAshrLshr(I, MRI); | ||||||
2876 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||
2877 | case TargetOpcode::G_SHL: | ||||||
2878 | if (Opcode == TargetOpcode::G_SHL && | ||||||
2879 | MRI.getType(I.getOperand(0).getReg()).isVector()) | ||||||
2880 | return selectVectorSHL(I, MRI); | ||||||
2881 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||
2882 | case TargetOpcode::G_FADD: | ||||||
2883 | case TargetOpcode::G_FSUB: | ||||||
2884 | case TargetOpcode::G_FMUL: | ||||||
2885 | case TargetOpcode::G_FDIV: | ||||||
2886 | case TargetOpcode::G_OR: { | ||||||
2887 | // Reject the various things we don't support yet. | ||||||
2888 | if (unsupportedBinOp(I, RBI, MRI, TRI)) | ||||||
2889 | return false; | ||||||
2890 | |||||||
2891 | const unsigned OpSize = Ty.getSizeInBits(); | ||||||
2892 | |||||||
2893 | const Register DefReg = I.getOperand(0).getReg(); | ||||||
2894 | const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); | ||||||
2895 | |||||||
2896 | const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize); | ||||||
2897 | if (NewOpc == I.getOpcode()) | ||||||
2898 | return false; | ||||||
2899 | |||||||
2900 | I.setDesc(TII.get(NewOpc)); | ||||||
2901 | // FIXME: Should the type be always reset in setDesc? | ||||||
2902 | |||||||
2903 | // Now that we selected an opcode, we need to constrain the register | ||||||
2904 | // operands to use appropriate classes. | ||||||
2905 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
2906 | } | ||||||
2907 | |||||||
2908 | case TargetOpcode::G_PTR_ADD: { | ||||||
2909 | emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB); | ||||||
2910 | I.eraseFromParent(); | ||||||
2911 | return true; | ||||||
2912 | } | ||||||
2913 | case TargetOpcode::G_SADDO: | ||||||
2914 | case TargetOpcode::G_UADDO: | ||||||
2915 | case TargetOpcode::G_SSUBO: | ||||||
2916 | case TargetOpcode::G_USUBO: { | ||||||
2917 | // Emit the operation and get the correct condition code. | ||||||
2918 | auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(), | ||||||
2919 | I.getOperand(2), I.getOperand(3), MIB); | ||||||
2920 | |||||||
2921 | // Now, put the overflow result in the register given by the first operand | ||||||
2922 | // to the overflow op. CSINC increments the result when the predicate is | ||||||
2923 | // false, so to get the increment when it's true, we need to use the | ||||||
2924 | // inverse. In this case, we want to increment when carry is set. | ||||||
2925 | Register ZReg = AArch64::WZR; | ||||||
2926 | auto CsetMI = MIB.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, | ||||||
2927 | {ZReg, ZReg}) | ||||||
2928 | .addImm(getInvertedCondCode(OpAndCC.second)); | ||||||
2929 | constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI); | ||||||
2930 | I.eraseFromParent(); | ||||||
2931 | return true; | ||||||
2932 | } | ||||||
2933 | |||||||
2934 | case TargetOpcode::G_PTRMASK: { | ||||||
2935 | Register MaskReg = I.getOperand(2).getReg(); | ||||||
2936 | Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI); | ||||||
2937 | // TODO: Implement arbitrary cases | ||||||
2938 | if (!MaskVal || !isShiftedMask_64(*MaskVal)) | ||||||
2939 | return false; | ||||||
2940 | |||||||
2941 | uint64_t Mask = *MaskVal; | ||||||
2942 | I.setDesc(TII.get(AArch64::ANDXri)); | ||||||
2943 | I.getOperand(2).ChangeToImmediate( | ||||||
2944 | AArch64_AM::encodeLogicalImmediate(Mask, 64)); | ||||||
2945 | |||||||
2946 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
2947 | } | ||||||
2948 | case TargetOpcode::G_PTRTOINT: | ||||||
2949 | case TargetOpcode::G_TRUNC: { | ||||||
2950 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||||
2951 | const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); | ||||||
2952 | |||||||
2953 | const Register DstReg = I.getOperand(0).getReg(); | ||||||
2954 | const Register SrcReg = I.getOperand(1).getReg(); | ||||||
2955 | |||||||
2956 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); | ||||||
2957 | const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); | ||||||
2958 | |||||||
2959 | if (DstRB.getID() != SrcRB.getID()) { | ||||||
2960 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n" ; } } while (false) | ||||||
2961 | dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n" ; } } while (false); | ||||||
2962 | return false; | ||||||
2963 | } | ||||||
2964 | |||||||
2965 | if (DstRB.getID() == AArch64::GPRRegBankID) { | ||||||
2966 | const TargetRegisterClass *DstRC = | ||||||
2967 | getRegClassForTypeOnBank(DstTy, DstRB, RBI); | ||||||
2968 | if (!DstRC) | ||||||
2969 | return false; | ||||||
2970 | |||||||
2971 | const TargetRegisterClass *SrcRC = | ||||||
2972 | getRegClassForTypeOnBank(SrcTy, SrcRB, RBI); | ||||||
2973 | if (!SrcRC) | ||||||
2974 | return false; | ||||||
2975 | |||||||
2976 | if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || | ||||||
2977 | !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { | ||||||
2978 | LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n" ; } } while (false); | ||||||
2979 | return false; | ||||||
2980 | } | ||||||
2981 | |||||||
2982 | if (DstRC == SrcRC) { | ||||||
2983 | // Nothing to be done | ||||||
2984 | } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) && | ||||||
2985 | SrcTy == LLT::scalar(64)) { | ||||||
2986 | llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 2986); | ||||||
2987 | return false; | ||||||
2988 | } else if (DstRC == &AArch64::GPR32RegClass && | ||||||
2989 | SrcRC == &AArch64::GPR64RegClass) { | ||||||
2990 | I.getOperand(1).setSubReg(AArch64::sub_32); | ||||||
2991 | } else { | ||||||
2992 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n" ; } } while (false) | ||||||
2993 | dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n" ; } } while (false); | ||||||
2994 | return false; | ||||||
2995 | } | ||||||
2996 | |||||||
2997 | I.setDesc(TII.get(TargetOpcode::COPY)); | ||||||
2998 | return true; | ||||||
2999 | } else if (DstRB.getID() == AArch64::FPRRegBankID) { | ||||||
3000 | if (DstTy == LLT::fixed_vector(4, 16) && | ||||||
3001 | SrcTy == LLT::fixed_vector(4, 32)) { | ||||||
3002 | I.setDesc(TII.get(AArch64::XTNv4i16)); | ||||||
3003 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
3004 | return true; | ||||||
3005 | } | ||||||
3006 | |||||||
3007 | if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { | ||||||
3008 | MachineInstr *Extract = emitExtractVectorElt( | ||||||
3009 | DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB); | ||||||
3010 | if (!Extract) | ||||||
3011 | return false; | ||||||
3012 | I.eraseFromParent(); | ||||||
3013 | return true; | ||||||
3014 | } | ||||||
3015 | |||||||
3016 | // We might have a vector G_PTRTOINT, in which case just emit a COPY. | ||||||
3017 | if (Opcode == TargetOpcode::G_PTRTOINT) { | ||||||
3018 | assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")(static_cast <bool> (DstTy.isVector() && "Expected an FPR ptrtoint to be a vector" ) ? void (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3018, __extension__ __PRETTY_FUNCTION__)); | ||||||
3019 | I.setDesc(TII.get(TargetOpcode::COPY)); | ||||||
3020 | return selectCopy(I, TII, MRI, TRI, RBI); | ||||||
3021 | } | ||||||
3022 | } | ||||||
3023 | |||||||
3024 | return false; | ||||||
3025 | } | ||||||
3026 | |||||||
3027 | case TargetOpcode::G_ANYEXT: { | ||||||
3028 | const Register DstReg = I.getOperand(0).getReg(); | ||||||
3029 | const Register SrcReg = I.getOperand(1).getReg(); | ||||||
3030 | |||||||
3031 | const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI); | ||||||
3032 | if (RBDst.getID() != AArch64::GPRRegBankID) { | ||||||
3033 | LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " << RBDst << ", expected: GPR\n"; } } while (false) | ||||||
3034 | << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " << RBDst << ", expected: GPR\n"; } } while (false); | ||||||
3035 | return false; | ||||||
3036 | } | ||||||
3037 | |||||||
3038 | const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI); | ||||||
3039 | if (RBSrc.getID() != AArch64::GPRRegBankID) { | ||||||
3040 | LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " << RBSrc << ", expected: GPR\n"; } } while (false) | ||||||
3041 | << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " << RBSrc << ", expected: GPR\n"; } } while (false); | ||||||
3042 | return false; | ||||||
3043 | } | ||||||
3044 | |||||||
3045 | const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); | ||||||
3046 | |||||||
3047 | if (DstSize == 0) { | ||||||
3048 | LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n" ; } } while (false); | ||||||
3049 | return false; | ||||||
3050 | } | ||||||
3051 | |||||||
3052 | if (DstSize != 64 && DstSize > 32) { | ||||||
3053 | LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " << DstSize << ", expected: 32 or 64\n"; } } while (false) | ||||||
3054 | << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " << DstSize << ", expected: 32 or 64\n"; } } while (false); | ||||||
3055 | return false; | ||||||
3056 | } | ||||||
3057 | // At this point G_ANYEXT is just like a plain COPY, but we need | ||||||
3058 | // to explicitly form the 64-bit value if any. | ||||||
3059 | if (DstSize > 32) { | ||||||
3060 | Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass); | ||||||
3061 | BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) | ||||||
3062 | .addDef(ExtSrc) | ||||||
3063 | .addImm(0) | ||||||
3064 | .addUse(SrcReg) | ||||||
3065 | .addImm(AArch64::sub_32); | ||||||
3066 | I.getOperand(1).setReg(ExtSrc); | ||||||
3067 | } | ||||||
3068 | return selectCopy(I, TII, MRI, TRI, RBI); | ||||||
3069 | } | ||||||
3070 | |||||||
3071 | case TargetOpcode::G_ZEXT: | ||||||
3072 | case TargetOpcode::G_SEXT_INREG: | ||||||
3073 | case TargetOpcode::G_SEXT: { | ||||||
3074 | unsigned Opcode = I.getOpcode(); | ||||||
3075 | const bool IsSigned = Opcode != TargetOpcode::G_ZEXT; | ||||||
3076 | const Register DefReg = I.getOperand(0).getReg(); | ||||||
3077 | Register SrcReg = I.getOperand(1).getReg(); | ||||||
3078 | const LLT DstTy = MRI.getType(DefReg); | ||||||
3079 | const LLT SrcTy = MRI.getType(SrcReg); | ||||||
3080 | unsigned DstSize = DstTy.getSizeInBits(); | ||||||
3081 | unsigned SrcSize = SrcTy.getSizeInBits(); | ||||||
3082 | |||||||
3083 | // SEXT_INREG has the same src reg size as dst, the size of the value to be | ||||||
3084 | // extended is encoded in the imm. | ||||||
3085 | if (Opcode == TargetOpcode::G_SEXT_INREG) | ||||||
3086 | SrcSize = I.getOperand(2).getImm(); | ||||||
3087 | |||||||
3088 | if (DstTy.isVector()) | ||||||
3089 | return false; // Should be handled by imported patterns. | ||||||
3090 | |||||||
3091 | assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI) ).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank" ) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3093, __extension__ __PRETTY_FUNCTION__)) | ||||||
3092 | AArch64::GPRRegBankID &&(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI) ).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank" ) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3093, __extension__ __PRETTY_FUNCTION__)) | ||||||
3093 | "Unexpected ext regbank")(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI) ).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank" ) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3093, __extension__ __PRETTY_FUNCTION__)); | ||||||
3094 | |||||||
3095 | MachineInstr *ExtI; | ||||||
3096 | |||||||
3097 | // First check if we're extending the result of a load which has a dest type | ||||||
3098 | // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest | ||||||
3099 | // GPR register on AArch64 and all loads which are smaller automatically | ||||||
3100 | // zero-extend the upper bits. E.g. | ||||||
3101 | // %v(s8) = G_LOAD %p, :: (load 1) | ||||||
3102 | // %v2(s32) = G_ZEXT %v(s8) | ||||||
3103 | if (!IsSigned) { | ||||||
3104 | auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); | ||||||
3105 | bool IsGPR = | ||||||
3106 | RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID; | ||||||
3107 | if (LoadMI && IsGPR) { | ||||||
3108 | const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); | ||||||
3109 | unsigned BytesLoaded = MemOp->getSize(); | ||||||
3110 | if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) | ||||||
3111 | return selectCopy(I, TII, MRI, TRI, RBI); | ||||||
3112 | } | ||||||
3113 | |||||||
3114 | // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs) | ||||||
3115 | // + SUBREG_TO_REG. | ||||||
3116 | // | ||||||
3117 | // If we are zero extending from 32 bits to 64 bits, it's possible that | ||||||
3118 | // the instruction implicitly does the zero extend for us. In that case, | ||||||
3119 | // we only need the SUBREG_TO_REG. | ||||||
3120 | if (IsGPR && SrcSize == 32 && DstSize == 64) { | ||||||
3121 | // Unlike with the G_LOAD case, we don't want to look through copies | ||||||
3122 | // here. (See isDef32.) | ||||||
3123 | MachineInstr *Def = MRI.getVRegDef(SrcReg); | ||||||
3124 | Register SubregToRegSrc = SrcReg; | ||||||
3125 | |||||||
3126 | // Does the instruction implicitly zero extend? | ||||||
3127 | if (!Def || !isDef32(*Def)) { | ||||||
3128 | // No. Zero out using an OR. | ||||||
3129 | Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass); | ||||||
3130 | const Register ZReg = AArch64::WZR; | ||||||
3131 | MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0); | ||||||
3132 | SubregToRegSrc = OrDst; | ||||||
3133 | } | ||||||
3134 | |||||||
3135 | MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) | ||||||
3136 | .addImm(0) | ||||||
3137 | .addUse(SubregToRegSrc) | ||||||
3138 | .addImm(AArch64::sub_32); | ||||||
3139 | |||||||
3140 | if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, | ||||||
3141 | MRI)) { | ||||||
3142 | LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n" ; } } while (false); | ||||||
3143 | return false; | ||||||
3144 | } | ||||||
3145 | |||||||
3146 | if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, | ||||||
3147 | MRI)) { | ||||||
3148 | LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n" ; } } while (false); | ||||||
3149 | return false; | ||||||
3150 | } | ||||||
3151 | |||||||
3152 | I.eraseFromParent(); | ||||||
3153 | return true; | ||||||
3154 | } | ||||||
3155 | } | ||||||
3156 | |||||||
3157 | if (DstSize == 64) { | ||||||
3158 | if (Opcode != TargetOpcode::G_SEXT_INREG) { | ||||||
3159 | // FIXME: Can we avoid manually doing this? | ||||||
3160 | if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, | ||||||
3161 | MRI)) { | ||||||
3162 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain " << TII.getName(Opcode) << " operand\n"; } } while (false) | ||||||
3163 | << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Failed to constrain " << TII.getName(Opcode) << " operand\n"; } } while (false); | ||||||
3164 | return false; | ||||||
3165 | } | ||||||
3166 | SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, | ||||||
3167 | {&AArch64::GPR64RegClass}, {}) | ||||||
3168 | .addImm(0) | ||||||
3169 | .addUse(SrcReg) | ||||||
3170 | .addImm(AArch64::sub_32) | ||||||
3171 | .getReg(0); | ||||||
3172 | } | ||||||
3173 | |||||||
3174 | ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, | ||||||
3175 | {DefReg}, {SrcReg}) | ||||||
3176 | .addImm(0) | ||||||
3177 | .addImm(SrcSize - 1); | ||||||
3178 | } else if (DstSize <= 32) { | ||||||
3179 | ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, | ||||||
3180 | {DefReg}, {SrcReg}) | ||||||
3181 | .addImm(0) | ||||||
3182 | .addImm(SrcSize - 1); | ||||||
3183 | } else { | ||||||
3184 | return false; | ||||||
3185 | } | ||||||
3186 | |||||||
3187 | constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); | ||||||
3188 | I.eraseFromParent(); | ||||||
3189 | return true; | ||||||
3190 | } | ||||||
3191 | |||||||
3192 | case TargetOpcode::G_SITOFP: | ||||||
3193 | case TargetOpcode::G_UITOFP: | ||||||
3194 | case TargetOpcode::G_FPTOSI: | ||||||
3195 | case TargetOpcode::G_FPTOUI: { | ||||||
3196 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()), | ||||||
3197 | SrcTy = MRI.getType(I.getOperand(1).getReg()); | ||||||
3198 | const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy); | ||||||
3199 | if (NewOpc == Opcode) | ||||||
3200 | return false; | ||||||
3201 | |||||||
3202 | I.setDesc(TII.get(NewOpc)); | ||||||
3203 | constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
3204 | |||||||
3205 | return true; | ||||||
3206 | } | ||||||
3207 | |||||||
3208 | case TargetOpcode::G_FREEZE: | ||||||
3209 | return selectCopy(I, TII, MRI, TRI, RBI); | ||||||
3210 | |||||||
3211 | case TargetOpcode::G_INTTOPTR: | ||||||
3212 | // The importer is currently unable to import pointer types since they | ||||||
3213 | // didn't exist in SelectionDAG. | ||||||
3214 | return selectCopy(I, TII, MRI, TRI, RBI); | ||||||
3215 | |||||||
3216 | case TargetOpcode::G_BITCAST: | ||||||
3217 | // Imported SelectionDAG rules can handle every bitcast except those that | ||||||
3218 | // bitcast from a type to the same type. Ideally, these shouldn't occur | ||||||
3219 | // but we might not run an optimizer that deletes them. The other exception | ||||||
3220 | // is bitcasts involving pointer types, as SelectionDAG has no knowledge | ||||||
3221 | // of them. | ||||||
3222 | return selectCopy(I, TII, MRI, TRI, RBI); | ||||||
3223 | |||||||
3224 | case TargetOpcode::G_SELECT: { | ||||||
3225 | if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) { | ||||||
3226 | LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_SELECT cond has type: " << Ty << ", expected: " << LLT::scalar(1) << '\n'; } } while (false) | ||||||
3227 | << ", expected: " << LLT::scalar(1) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_SELECT cond has type: " << Ty << ", expected: " << LLT::scalar(1) << '\n'; } } while (false); | ||||||
3228 | return false; | ||||||
3229 | } | ||||||
3230 | |||||||
3231 | const Register CondReg = I.getOperand(1).getReg(); | ||||||
3232 | const Register TReg = I.getOperand(2).getReg(); | ||||||
3233 | const Register FReg = I.getOperand(3).getReg(); | ||||||
3234 | |||||||
3235 | if (tryOptSelect(I)) | ||||||
3236 | return true; | ||||||
3237 | |||||||
3238 | // Make sure to use an unused vreg instead of wzr, so that the peephole | ||||||
3239 | // optimizations will be able to optimize these. | ||||||
3240 | Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); | ||||||
3241 | auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg}) | ||||||
3242 | .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); | ||||||
3243 | constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); | ||||||
3244 | if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB)) | ||||||
3245 | return false; | ||||||
3246 | I.eraseFromParent(); | ||||||
3247 | return true; | ||||||
3248 | } | ||||||
3249 | case TargetOpcode::G_ICMP: { | ||||||
3250 | if (Ty.isVector()) | ||||||
3251 | return selectVectorICmp(I, MRI); | ||||||
3252 | |||||||
3253 | if (Ty != LLT::scalar(32)) { | ||||||
3254 | LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ICMP result has type: " << Ty << ", expected: " << LLT::scalar(32) << '\n'; } } while (false) | ||||||
3255 | << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "G_ICMP result has type: " << Ty << ", expected: " << LLT::scalar(32) << '\n'; } } while (false); | ||||||
3256 | return false; | ||||||
3257 | } | ||||||
3258 | |||||||
3259 | auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); | ||||||
3260 | emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), | ||||||
3261 | MIB); | ||||||
3262 | emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB); | ||||||
3263 | I.eraseFromParent(); | ||||||
3264 | return true; | ||||||
3265 | } | ||||||
3266 | |||||||
3267 | case TargetOpcode::G_FCMP: { | ||||||
3268 | CmpInst::Predicate Pred = | ||||||
3269 | static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); | ||||||
3270 | if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB, | ||||||
3271 | Pred) || | ||||||
3272 | !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB)) | ||||||
3273 | return false; | ||||||
3274 | I.eraseFromParent(); | ||||||
3275 | return true; | ||||||
3276 | } | ||||||
3277 | case TargetOpcode::G_VASTART: | ||||||
3278 | return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) | ||||||
3279 | : selectVaStartAAPCS(I, MF, MRI); | ||||||
3280 | case TargetOpcode::G_INTRINSIC: | ||||||
3281 | return selectIntrinsic(I, MRI); | ||||||
3282 | case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: | ||||||
3283 | return selectIntrinsicWithSideEffects(I, MRI); | ||||||
3284 | case TargetOpcode::G_IMPLICIT_DEF: { | ||||||
3285 | I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); | ||||||
3286 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||||
3287 | const Register DstReg = I.getOperand(0).getReg(); | ||||||
3288 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); | ||||||
3289 | const TargetRegisterClass *DstRC = | ||||||
3290 | getRegClassForTypeOnBank(DstTy, DstRB, RBI); | ||||||
3291 | RBI.constrainGenericRegister(DstReg, *DstRC, MRI); | ||||||
3292 | return true; | ||||||
3293 | } | ||||||
3294 | case TargetOpcode::G_BLOCK_ADDR: { | ||||||
3295 | if (TM.getCodeModel() == CodeModel::Large) { | ||||||
3296 | materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0); | ||||||
3297 | I.eraseFromParent(); | ||||||
3298 | return true; | ||||||
3299 | } else { | ||||||
3300 | I.setDesc(TII.get(AArch64::MOVaddrBA)); | ||||||
3301 | auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA), | ||||||
3302 | I.getOperand(0).getReg()) | ||||||
3303 | .addBlockAddress(I.getOperand(1).getBlockAddress(), | ||||||
3304 | /* Offset */ 0, AArch64II::MO_PAGE) | ||||||
3305 | .addBlockAddress( | ||||||
3306 | I.getOperand(1).getBlockAddress(), /* Offset */ 0, | ||||||
3307 | AArch64II::MO_NC | AArch64II::MO_PAGEOFF); | ||||||
3308 | I.eraseFromParent(); | ||||||
3309 | return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); | ||||||
3310 | } | ||||||
3311 | } | ||||||
3312 | case AArch64::G_DUP: { | ||||||
3313 | // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by | ||||||
3314 | // imported patterns. Do it manually here. Avoiding generating s16 gpr is | ||||||
3315 | // difficult because at RBS we may end up pessimizing the fpr case if we | ||||||
3316 | // decided to add an anyextend to fix this. Manual selection is the most | ||||||
3317 | // robust solution for now. | ||||||
3318 | if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != | ||||||
3319 | AArch64::GPRRegBankID) | ||||||
3320 | return false; // We expect the fpr regbank case to be imported. | ||||||
3321 | LLT VecTy = MRI.getType(I.getOperand(0).getReg()); | ||||||
3322 | if (VecTy == LLT::fixed_vector(8, 8)) | ||||||
3323 | I.setDesc(TII.get(AArch64::DUPv8i8gpr)); | ||||||
3324 | else if (VecTy == LLT::fixed_vector(16, 8)) | ||||||
3325 | I.setDesc(TII.get(AArch64::DUPv16i8gpr)); | ||||||
3326 | else if (VecTy == LLT::fixed_vector(4, 16)) | ||||||
3327 | I.setDesc(TII.get(AArch64::DUPv4i16gpr)); | ||||||
3328 | else if (VecTy == LLT::fixed_vector(8, 16)) | ||||||
3329 | I.setDesc(TII.get(AArch64::DUPv8i16gpr)); | ||||||
3330 | else | ||||||
3331 | return false; | ||||||
3332 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
3333 | } | ||||||
3334 | case TargetOpcode::G_INTRINSIC_TRUNC: | ||||||
3335 | return selectIntrinsicTrunc(I, MRI); | ||||||
3336 | case TargetOpcode::G_INTRINSIC_ROUND: | ||||||
3337 | return selectIntrinsicRound(I, MRI); | ||||||
3338 | case TargetOpcode::G_BUILD_VECTOR: | ||||||
3339 | return selectBuildVector(I, MRI); | ||||||
3340 | case TargetOpcode::G_MERGE_VALUES: | ||||||
3341 | return selectMergeValues(I, MRI); | ||||||
3342 | case TargetOpcode::G_UNMERGE_VALUES: | ||||||
3343 | return selectUnmergeValues(I, MRI); | ||||||
3344 | case TargetOpcode::G_SHUFFLE_VECTOR: | ||||||
3345 | return selectShuffleVector(I, MRI); | ||||||
3346 | case TargetOpcode::G_EXTRACT_VECTOR_ELT: | ||||||
3347 | return selectExtractElt(I, MRI); | ||||||
3348 | case TargetOpcode::G_INSERT_VECTOR_ELT: | ||||||
3349 | return selectInsertElt(I, MRI); | ||||||
3350 | case TargetOpcode::G_CONCAT_VECTORS: | ||||||
3351 | return selectConcatVectors(I, MRI); | ||||||
3352 | case TargetOpcode::G_JUMP_TABLE: | ||||||
3353 | return selectJumpTable(I, MRI); | ||||||
3354 | case TargetOpcode::G_VECREDUCE_FADD: | ||||||
3355 | case TargetOpcode::G_VECREDUCE_ADD: | ||||||
3356 | return selectReduction(I, MRI); | ||||||
3357 | } | ||||||
3358 | |||||||
3359 | return false; | ||||||
3360 | } | ||||||
3361 | |||||||
3362 | bool AArch64InstructionSelector::selectReduction(MachineInstr &I, | ||||||
3363 | MachineRegisterInfo &MRI) { | ||||||
3364 | Register VecReg = I.getOperand(1).getReg(); | ||||||
3365 | LLT VecTy = MRI.getType(VecReg); | ||||||
3366 | if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) { | ||||||
3367 | // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit | ||||||
3368 | // a subregister copy afterwards. | ||||||
3369 | if (VecTy == LLT::fixed_vector(2, 32)) { | ||||||
3370 | Register DstReg = I.getOperand(0).getReg(); | ||||||
3371 | auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass}, | ||||||
3372 | {VecReg, VecReg}); | ||||||
3373 | auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) | ||||||
3374 | .addReg(AddP.getReg(0), 0, AArch64::ssub) | ||||||
3375 | .getReg(0); | ||||||
3376 | RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI); | ||||||
3377 | I.eraseFromParent(); | ||||||
3378 | return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI); | ||||||
3379 | } | ||||||
3380 | |||||||
3381 | unsigned Opc = 0; | ||||||
3382 | if (VecTy == LLT::fixed_vector(16, 8)) | ||||||
3383 | Opc = AArch64::ADDVv16i8v; | ||||||
3384 | else if (VecTy == LLT::fixed_vector(8, 16)) | ||||||
3385 | Opc = AArch64::ADDVv8i16v; | ||||||
3386 | else if (VecTy == LLT::fixed_vector(4, 32)) | ||||||
3387 | Opc = AArch64::ADDVv4i32v; | ||||||
3388 | else if (VecTy == LLT::fixed_vector(2, 64)) | ||||||
3389 | Opc = AArch64::ADDPv2i64p; | ||||||
3390 | else { | ||||||
3391 | LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled type for add reduction" ; } } while (false); | ||||||
3392 | return false; | ||||||
3393 | } | ||||||
3394 | I.setDesc(TII.get(Opc)); | ||||||
3395 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
3396 | } | ||||||
3397 | |||||||
3398 | if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) { | ||||||
3399 | unsigned Opc = 0; | ||||||
3400 | if (VecTy == LLT::fixed_vector(2, 32)) | ||||||
3401 | Opc = AArch64::FADDPv2i32p; | ||||||
3402 | else if (VecTy == LLT::fixed_vector(2, 64)) | ||||||
3403 | Opc = AArch64::FADDPv2i64p; | ||||||
3404 | else { | ||||||
3405 | LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction" ; } } while (false); | ||||||
3406 | return false; | ||||||
3407 | } | ||||||
3408 | I.setDesc(TII.get(Opc)); | ||||||
3409 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
3410 | } | ||||||
3411 | return false; | ||||||
3412 | } | ||||||
3413 | |||||||
3414 | bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, | ||||||
3415 | MachineRegisterInfo &MRI) { | ||||||
3416 | assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3416, __extension__ __PRETTY_FUNCTION__)); | ||||||
3417 | Register JTAddr = I.getOperand(0).getReg(); | ||||||
3418 | unsigned JTI = I.getOperand(1).getIndex(); | ||||||
3419 | Register Index = I.getOperand(2).getReg(); | ||||||
3420 | |||||||
3421 | Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); | ||||||
3422 | Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); | ||||||
3423 | |||||||
3424 | MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr); | ||||||
3425 | auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32, | ||||||
3426 | {TargetReg, ScratchReg}, {JTAddr, Index}) | ||||||
3427 | .addJumpTableIndex(JTI); | ||||||
3428 | // Build the indirect branch. | ||||||
3429 | MIB.buildInstr(AArch64::BR, {}, {TargetReg}); | ||||||
3430 | I.eraseFromParent(); | ||||||
3431 | return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI); | ||||||
3432 | } | ||||||
3433 | |||||||
3434 | bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I, | ||||||
3435 | MachineRegisterInfo &MRI) { | ||||||
3436 | assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3436, __extension__ __PRETTY_FUNCTION__)); | ||||||
3437 | assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")(static_cast <bool> (I.getOperand(1).isJTI() && "Jump table op should have a JTI!") ? void (0) : __assert_fail ("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3437, __extension__ __PRETTY_FUNCTION__)); | ||||||
3438 | |||||||
3439 | Register DstReg = I.getOperand(0).getReg(); | ||||||
3440 | unsigned JTI = I.getOperand(1).getIndex(); | ||||||
3441 | // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. | ||||||
3442 | auto MovMI = | ||||||
3443 | MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) | ||||||
3444 | .addJumpTableIndex(JTI, AArch64II::MO_PAGE) | ||||||
3445 | .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF); | ||||||
3446 | I.eraseFromParent(); | ||||||
3447 | return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); | ||||||
3448 | } | ||||||
3449 | |||||||
3450 | bool AArch64InstructionSelector::selectTLSGlobalValue( | ||||||
3451 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||||
3452 | if (!STI.isTargetMachO()) | ||||||
3453 | return false; | ||||||
3454 | MachineFunction &MF = *I.getParent()->getParent(); | ||||||
3455 | MF.getFrameInfo().setAdjustsStack(true); | ||||||
3456 | |||||||
3457 | const auto &GlobalOp = I.getOperand(1); | ||||||
3458 | assert(GlobalOp.getOffset() == 0 &&(static_cast <bool> (GlobalOp.getOffset() == 0 && "Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail ("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3459, __extension__ __PRETTY_FUNCTION__)) | ||||||
3459 | "Shouldn't have an offset on TLS globals!")(static_cast <bool> (GlobalOp.getOffset() == 0 && "Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail ("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3459, __extension__ __PRETTY_FUNCTION__)); | ||||||
3460 | const GlobalValue &GV = *GlobalOp.getGlobal(); | ||||||
3461 | |||||||
3462 | auto LoadGOT = | ||||||
3463 | MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {}) | ||||||
3464 | .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); | ||||||
3465 | |||||||
3466 | auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass}, | ||||||
3467 | {LoadGOT.getReg(0)}) | ||||||
3468 | .addImm(0); | ||||||
3469 | |||||||
3470 | MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0)); | ||||||
3471 | // TLS calls preserve all registers except those that absolutely must be | ||||||
3472 | // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be | ||||||
3473 | // silly). | ||||||
3474 | MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load}) | ||||||
3475 | .addUse(AArch64::X0, RegState::Implicit) | ||||||
3476 | .addDef(AArch64::X0, RegState::Implicit) | ||||||
3477 | .addRegMask(TRI.getTLSCallPreservedMask()); | ||||||
3478 | |||||||
3479 | MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0)); | ||||||
3480 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass, | ||||||
3481 | MRI); | ||||||
3482 | I.eraseFromParent(); | ||||||
3483 | return true; | ||||||
3484 | } | ||||||
3485 | |||||||
3486 | bool AArch64InstructionSelector::selectIntrinsicTrunc( | ||||||
3487 | MachineInstr &I, MachineRegisterInfo &MRI) const { | ||||||
3488 | const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); | ||||||
3489 | |||||||
3490 | // Select the correct opcode. | ||||||
3491 | unsigned Opc = 0; | ||||||
3492 | if (!SrcTy.isVector()) { | ||||||
3493 | switch (SrcTy.getSizeInBits()) { | ||||||
3494 | default: | ||||||
3495 | case 16: | ||||||
3496 | Opc = AArch64::FRINTZHr; | ||||||
3497 | break; | ||||||
3498 | case 32: | ||||||
3499 | Opc = AArch64::FRINTZSr; | ||||||
3500 | break; | ||||||
3501 | case 64: | ||||||
3502 | Opc = AArch64::FRINTZDr; | ||||||
3503 | break; | ||||||
3504 | } | ||||||
3505 | } else { | ||||||
3506 | unsigned NumElts = SrcTy.getNumElements(); | ||||||
3507 | switch (SrcTy.getElementType().getSizeInBits()) { | ||||||
3508 | default: | ||||||
3509 | break; | ||||||
3510 | case 16: | ||||||
3511 | if (NumElts == 4) | ||||||
3512 | Opc = AArch64::FRINTZv4f16; | ||||||
3513 | else if (NumElts == 8) | ||||||
3514 | Opc = AArch64::FRINTZv8f16; | ||||||
3515 | break; | ||||||
3516 | case 32: | ||||||
3517 | if (NumElts == 2) | ||||||
3518 | Opc = AArch64::FRINTZv2f32; | ||||||
3519 | else if (NumElts == 4) | ||||||
3520 | Opc = AArch64::FRINTZv4f32; | ||||||
3521 | break; | ||||||
3522 | case 64: | ||||||
3523 | if (NumElts == 2) | ||||||
3524 | Opc = AArch64::FRINTZv2f64; | ||||||
3525 | break; | ||||||
3526 | } | ||||||
3527 | } | ||||||
3528 | |||||||
3529 | if (!Opc) { | ||||||
3530 | // Didn't get an opcode above, bail. | ||||||
3531 | LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n" ; } } while (false); | ||||||
3532 | return false; | ||||||
3533 | } | ||||||
3534 | |||||||
3535 | // Legalization would have set us up perfectly for this; we just need to | ||||||
3536 | // set the opcode and move on. | ||||||
3537 | I.setDesc(TII.get(Opc)); | ||||||
3538 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
3539 | } | ||||||
3540 | |||||||
3541 | bool AArch64InstructionSelector::selectIntrinsicRound( | ||||||
3542 | MachineInstr &I, MachineRegisterInfo &MRI) const { | ||||||
3543 | const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); | ||||||
3544 | |||||||
3545 | // Select the correct opcode. | ||||||
3546 | unsigned Opc = 0; | ||||||
3547 | if (!SrcTy.isVector()) { | ||||||
3548 | switch (SrcTy.getSizeInBits()) { | ||||||
3549 | default: | ||||||
3550 | case 16: | ||||||
3551 | Opc = AArch64::FRINTAHr; | ||||||
3552 | break; | ||||||
3553 | case 32: | ||||||
3554 | Opc = AArch64::FRINTASr; | ||||||
3555 | break; | ||||||
3556 | case 64: | ||||||
3557 | Opc = AArch64::FRINTADr; | ||||||
3558 | break; | ||||||
3559 | } | ||||||
3560 | } else { | ||||||
3561 | unsigned NumElts = SrcTy.getNumElements(); | ||||||
3562 | switch (SrcTy.getElementType().getSizeInBits()) { | ||||||
3563 | default: | ||||||
3564 | break; | ||||||
3565 | case 16: | ||||||
3566 | if (NumElts == 4) | ||||||
3567 | Opc = AArch64::FRINTAv4f16; | ||||||
3568 | else if (NumElts == 8) | ||||||
3569 | Opc = AArch64::FRINTAv8f16; | ||||||
3570 | break; | ||||||
3571 | case 32: | ||||||
3572 | if (NumElts == 2) | ||||||
3573 | Opc = AArch64::FRINTAv2f32; | ||||||
3574 | else if (NumElts == 4) | ||||||
3575 | Opc = AArch64::FRINTAv4f32; | ||||||
3576 | break; | ||||||
3577 | case 64: | ||||||
3578 | if (NumElts == 2) | ||||||
3579 | Opc = AArch64::FRINTAv2f64; | ||||||
3580 | break; | ||||||
3581 | } | ||||||
3582 | } | ||||||
3583 | |||||||
3584 | if (!Opc) { | ||||||
3585 | // Didn't get an opcode above, bail. | ||||||
3586 | LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n" ; } } while (false); | ||||||
3587 | return false; | ||||||
3588 | } | ||||||
3589 | |||||||
3590 | // Legalization would have set us up perfectly for this; we just need to | ||||||
3591 | // set the opcode and move on. | ||||||
3592 | I.setDesc(TII.get(Opc)); | ||||||
3593 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); | ||||||
3594 | } | ||||||
3595 | |||||||
3596 | bool AArch64InstructionSelector::selectVectorICmp( | ||||||
3597 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||||
3598 | Register DstReg = I.getOperand(0).getReg(); | ||||||
3599 | LLT DstTy = MRI.getType(DstReg); | ||||||
3600 | Register SrcReg = I.getOperand(2).getReg(); | ||||||
3601 | Register Src2Reg = I.getOperand(3).getReg(); | ||||||
3602 | LLT SrcTy = MRI.getType(SrcReg); | ||||||
3603 | |||||||
3604 | unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); | ||||||
3605 | unsigned NumElts = DstTy.getNumElements(); | ||||||
3606 | |||||||
3607 | // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b | ||||||
3608 | // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16 | ||||||
3609 | // Third index is cc opcode: | ||||||
3610 | // 0 == eq | ||||||
3611 | // 1 == ugt | ||||||
3612 | // 2 == uge | ||||||
3613 | // 3 == ult | ||||||
3614 | // 4 == ule | ||||||
3615 | // 5 == sgt | ||||||
3616 | // 6 == sge | ||||||
3617 | // 7 == slt | ||||||
3618 | // 8 == sle | ||||||
3619 | // ne is done by negating 'eq' result. | ||||||
3620 | |||||||
3621 | // This table below assumes that for some comparisons the operands will be | ||||||
3622 | // commuted. | ||||||
3623 | // ult op == commute + ugt op | ||||||
3624 | // ule op == commute + uge op | ||||||
3625 | // slt op == commute + sgt op | ||||||
3626 | // sle op == commute + sge op | ||||||
3627 | unsigned PredIdx = 0; | ||||||
3628 | bool SwapOperands = false; | ||||||
3629 | CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); | ||||||
3630 | switch (Pred) { | ||||||
3631 | case CmpInst::ICMP_NE: | ||||||
3632 | case CmpInst::ICMP_EQ: | ||||||
3633 | PredIdx = 0; | ||||||
3634 | break; | ||||||
3635 | case CmpInst::ICMP_UGT: | ||||||
3636 | PredIdx = 1; | ||||||
3637 | break; | ||||||
3638 | case CmpInst::ICMP_UGE: | ||||||
3639 | PredIdx = 2; | ||||||
3640 | break; | ||||||
3641 | case CmpInst::ICMP_ULT: | ||||||
3642 | PredIdx = 3; | ||||||
3643 | SwapOperands = true; | ||||||
3644 | break; | ||||||
3645 | case CmpInst::ICMP_ULE: | ||||||
3646 | PredIdx = 4; | ||||||
3647 | SwapOperands = true; | ||||||
3648 | break; | ||||||
3649 | case CmpInst::ICMP_SGT: | ||||||
3650 | PredIdx = 5; | ||||||
3651 | break; | ||||||
3652 | case CmpInst::ICMP_SGE: | ||||||
3653 | PredIdx = 6; | ||||||
3654 | break; | ||||||
3655 | case CmpInst::ICMP_SLT: | ||||||
3656 | PredIdx = 7; | ||||||
3657 | SwapOperands = true; | ||||||
3658 | break; | ||||||
3659 | case CmpInst::ICMP_SLE: | ||||||
3660 | PredIdx = 8; | ||||||
3661 | SwapOperands = true; | ||||||
3662 | break; | ||||||
3663 | default: | ||||||
3664 | llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3664); | ||||||
3665 | return false; | ||||||
3666 | } | ||||||
3667 | |||||||
3668 | // This table obviously should be tablegen'd when we have our GISel native | ||||||
3669 | // tablegen selector. | ||||||
3670 | |||||||
3671 | static const unsigned OpcTable[4][4][9] = { | ||||||
3672 | { | ||||||
3673 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3674 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3675 | 0 /* invalid */}, | ||||||
3676 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3677 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3678 | 0 /* invalid */}, | ||||||
3679 | {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, | ||||||
3680 | AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, | ||||||
3681 | AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, | ||||||
3682 | {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, | ||||||
3683 | AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, | ||||||
3684 | AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} | ||||||
3685 | }, | ||||||
3686 | { | ||||||
3687 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3688 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3689 | 0 /* invalid */}, | ||||||
3690 | {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, | ||||||
3691 | AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, | ||||||
3692 | AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, | ||||||
3693 | {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, | ||||||
3694 | AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, | ||||||
3695 | AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, | ||||||
3696 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3697 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3698 | 0 /* invalid */} | ||||||
3699 | }, | ||||||
3700 | { | ||||||
3701 | {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, | ||||||
3702 | AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, | ||||||
3703 | AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, | ||||||
3704 | {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, | ||||||
3705 | AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, | ||||||
3706 | AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, | ||||||
3707 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3708 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3709 | 0 /* invalid */}, | ||||||
3710 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3711 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3712 | 0 /* invalid */} | ||||||
3713 | }, | ||||||
3714 | { | ||||||
3715 | {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, | ||||||
3716 | AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, | ||||||
3717 | AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, | ||||||
3718 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3719 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3720 | 0 /* invalid */}, | ||||||
3721 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3722 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3723 | 0 /* invalid */}, | ||||||
3724 | {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3725 | 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, | ||||||
3726 | 0 /* invalid */} | ||||||
3727 | }, | ||||||
3728 | }; | ||||||
3729 | unsigned EltIdx = Log2_32(SrcEltSize / 8); | ||||||
3730 | unsigned NumEltsIdx = Log2_32(NumElts / 2); | ||||||
3731 | unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx]; | ||||||
3732 | if (!Opc) { | ||||||
3733 | LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode" ; } } while (false); | ||||||
3734 | return false; | ||||||
3735 | } | ||||||
3736 | |||||||
3737 | const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI); | ||||||
3738 | const TargetRegisterClass *SrcRC = | ||||||
3739 | getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true); | ||||||
3740 | if (!SrcRC) { | ||||||
3741 | LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not determine source register class.\n" ; } } while (false); | ||||||
3742 | return false; | ||||||
3743 | } | ||||||
3744 | |||||||
3745 | unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0; | ||||||
3746 | if (SrcTy.getSizeInBits() == 128) | ||||||
3747 | NotOpc = NotOpc ? AArch64::NOTv16i8 : 0; | ||||||
3748 | |||||||
3749 | if (SwapOperands) | ||||||
3750 | std::swap(SrcReg, Src2Reg); | ||||||
3751 | |||||||
3752 | auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg}); | ||||||
3753 | constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); | ||||||
3754 | |||||||
3755 | // Invert if we had a 'ne' cc. | ||||||
3756 | if (NotOpc) { | ||||||
3757 | Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp}); | ||||||
3758 | constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); | ||||||
3759 | } else { | ||||||
3760 | MIB.buildCopy(DstReg, Cmp.getReg(0)); | ||||||
3761 | } | ||||||
3762 | RBI.constrainGenericRegister(DstReg, *SrcRC, MRI); | ||||||
3763 | I.eraseFromParent(); | ||||||
3764 | return true; | ||||||
3765 | } | ||||||
3766 | |||||||
3767 | MachineInstr *AArch64InstructionSelector::emitScalarToVector( | ||||||
3768 | unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar, | ||||||
3769 | MachineIRBuilder &MIRBuilder) const { | ||||||
3770 | auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {}); | ||||||
3771 | |||||||
3772 | auto BuildFn = [&](unsigned SubregIndex) { | ||||||
3773 | auto Ins = | ||||||
3774 | MIRBuilder | ||||||
3775 | .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar}) | ||||||
3776 | .addImm(SubregIndex); | ||||||
3777 | constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI); | ||||||
3778 | constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI); | ||||||
3779 | return &*Ins; | ||||||
3780 | }; | ||||||
3781 | |||||||
3782 | switch (EltSize) { | ||||||
3783 | case 16: | ||||||
3784 | return BuildFn(AArch64::hsub); | ||||||
3785 | case 32: | ||||||
3786 | return BuildFn(AArch64::ssub); | ||||||
3787 | case 64: | ||||||
3788 | return BuildFn(AArch64::dsub); | ||||||
3789 | default: | ||||||
3790 | return nullptr; | ||||||
3791 | } | ||||||
3792 | } | ||||||
3793 | |||||||
3794 | bool AArch64InstructionSelector::selectMergeValues( | ||||||
3795 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||||
3796 | assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3796, __extension__ __PRETTY_FUNCTION__)); | ||||||
3797 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||||
3798 | const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); | ||||||
3799 | assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")(static_cast <bool> (!DstTy.isVector() && !SrcTy .isVector() && "invalid merge operation") ? void (0) : __assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3799, __extension__ __PRETTY_FUNCTION__)); | ||||||
3800 | const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); | ||||||
3801 | |||||||
3802 | if (I.getNumOperands() != 3) | ||||||
3803 | return false; | ||||||
3804 | |||||||
3805 | // Merging 2 s64s into an s128. | ||||||
3806 | if (DstTy == LLT::scalar(128)) { | ||||||
3807 | if (SrcTy.getSizeInBits() != 64) | ||||||
3808 | return false; | ||||||
3809 | Register DstReg = I.getOperand(0).getReg(); | ||||||
3810 | Register Src1Reg = I.getOperand(1).getReg(); | ||||||
3811 | Register Src2Reg = I.getOperand(2).getReg(); | ||||||
3812 | auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {}); | ||||||
3813 | MachineInstr *InsMI = | ||||||
3814 | emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB); | ||||||
3815 | if (!InsMI) | ||||||
3816 | return false; | ||||||
3817 | MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(), | ||||||
3818 | Src2Reg, /* LaneIdx */ 1, RB, MIB); | ||||||
3819 | if (!Ins2MI) | ||||||
3820 | return false; | ||||||
3821 | constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); | ||||||
3822 | constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI); | ||||||
3823 | I.eraseFromParent(); | ||||||
3824 | return true; | ||||||
3825 | } | ||||||
3826 | |||||||
3827 | if (RB.getID() != AArch64::GPRRegBankID) | ||||||
3828 | return false; | ||||||
3829 | |||||||
3830 | if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) | ||||||
3831 | return false; | ||||||
3832 | |||||||
3833 | auto *DstRC = &AArch64::GPR64RegClass; | ||||||
3834 | Register SubToRegDef = MRI.createVirtualRegister(DstRC); | ||||||
3835 | MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(), | ||||||
3836 | TII.get(TargetOpcode::SUBREG_TO_REG)) | ||||||
3837 | .addDef(SubToRegDef) | ||||||
3838 | .addImm(0) | ||||||
3839 | .addUse(I.getOperand(1).getReg()) | ||||||
3840 | .addImm(AArch64::sub_32); | ||||||
3841 | Register SubToRegDef2 = MRI.createVirtualRegister(DstRC); | ||||||
3842 | // Need to anyext the second scalar before we can use bfm | ||||||
3843 | MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), | ||||||
3844 | TII.get(TargetOpcode::SUBREG_TO_REG)) | ||||||
3845 | .addDef(SubToRegDef2) | ||||||
3846 | .addImm(0) | ||||||
3847 | .addUse(I.getOperand(2).getReg()) | ||||||
3848 | .addImm(AArch64::sub_32); | ||||||
3849 | MachineInstr &BFM = | ||||||
3850 | *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri)) | ||||||
3851 | .addDef(I.getOperand(0).getReg()) | ||||||
3852 | .addUse(SubToRegDef) | ||||||
3853 | .addUse(SubToRegDef2) | ||||||
3854 | .addImm(32) | ||||||
3855 | .addImm(31); | ||||||
3856 | constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI); | ||||||
3857 | constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI); | ||||||
3858 | constrainSelectedInstRegOperands(BFM, TII, TRI, RBI); | ||||||
3859 | I.eraseFromParent(); | ||||||
3860 | return true; | ||||||
3861 | } | ||||||
3862 | |||||||
3863 | static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, | ||||||
3864 | const unsigned EltSize) { | ||||||
3865 | // Choose a lane copy opcode and subregister based off of the size of the | ||||||
3866 | // vector's elements. | ||||||
3867 | switch (EltSize) { | ||||||
3868 | case 8: | ||||||
3869 | CopyOpc = AArch64::CPYi8; | ||||||
3870 | ExtractSubReg = AArch64::bsub; | ||||||
3871 | break; | ||||||
3872 | case 16: | ||||||
3873 | CopyOpc = AArch64::CPYi16; | ||||||
3874 | ExtractSubReg = AArch64::hsub; | ||||||
3875 | break; | ||||||
3876 | case 32: | ||||||
3877 | CopyOpc = AArch64::CPYi32; | ||||||
3878 | ExtractSubReg = AArch64::ssub; | ||||||
3879 | break; | ||||||
3880 | case 64: | ||||||
3881 | CopyOpc = AArch64::CPYi64; | ||||||
3882 | ExtractSubReg = AArch64::dsub; | ||||||
3883 | break; | ||||||
3884 | default: | ||||||
3885 | // Unknown size, bail out. | ||||||
3886 | LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Elt size '" << EltSize << "' unsupported.\n"; } } while (false); | ||||||
3887 | return false; | ||||||
3888 | } | ||||||
3889 | return true; | ||||||
3890 | } | ||||||
3891 | |||||||
3892 | MachineInstr *AArch64InstructionSelector::emitExtractVectorElt( | ||||||
3893 | Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy, | ||||||
3894 | Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const { | ||||||
3895 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | ||||||
3896 | unsigned CopyOpc = 0; | ||||||
3897 | unsigned ExtractSubReg = 0; | ||||||
3898 | if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) { | ||||||
3899 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n" ; } } while (false) | ||||||
3900 | dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n" ; } } while (false); | ||||||
3901 | return nullptr; | ||||||
3902 | } | ||||||
3903 | |||||||
3904 | const TargetRegisterClass *DstRC = | ||||||
3905 | getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true); | ||||||
3906 | if (!DstRC) { | ||||||
3907 | LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n" ; } } while (false); | ||||||
3908 | return nullptr; | ||||||
3909 | } | ||||||
3910 | |||||||
3911 | const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI); | ||||||
3912 | const LLT &VecTy = MRI.getType(VecReg); | ||||||
3913 | const TargetRegisterClass *VecRC = | ||||||
3914 | getRegClassForTypeOnBank(VecTy, VecRB, RBI, true); | ||||||
3915 | if (!VecRC) { | ||||||
3916 | LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not determine source register class.\n" ; } } while (false); | ||||||
3917 | return nullptr; | ||||||
3918 | } | ||||||
3919 | |||||||
3920 | // The register that we're going to copy into. | ||||||
3921 | Register InsertReg = VecReg; | ||||||
3922 | if (!DstReg) | ||||||
3923 | DstReg = MRI.createVirtualRegister(DstRC); | ||||||
3924 | // If the lane index is 0, we just use a subregister COPY. | ||||||
3925 | if (LaneIdx == 0) { | ||||||
3926 | auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {}) | ||||||
3927 | .addReg(VecReg, 0, ExtractSubReg); | ||||||
3928 | RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); | ||||||
3929 | return &*Copy; | ||||||
3930 | } | ||||||
3931 | |||||||
3932 | // Lane copies require 128-bit wide registers. If we're dealing with an | ||||||
3933 | // unpacked vector, then we need to move up to that width. Insert an implicit | ||||||
3934 | // def and a subregister insert to get us there. | ||||||
3935 | if (VecTy.getSizeInBits() != 128) { | ||||||
3936 | MachineInstr *ScalarToVector = emitScalarToVector( | ||||||
3937 | VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder); | ||||||
3938 | if (!ScalarToVector) | ||||||
3939 | return nullptr; | ||||||
3940 | InsertReg = ScalarToVector->getOperand(0).getReg(); | ||||||
3941 | } | ||||||
3942 | |||||||
3943 | MachineInstr *LaneCopyMI = | ||||||
3944 | MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx); | ||||||
3945 | constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI); | ||||||
3946 | |||||||
3947 | // Make sure that we actually constrain the initial copy. | ||||||
3948 | RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); | ||||||
3949 | return LaneCopyMI; | ||||||
3950 | } | ||||||
3951 | |||||||
3952 | bool AArch64InstructionSelector::selectExtractElt( | ||||||
3953 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||||
3954 | assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && "unexpected opcode!") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3955, __extension__ __PRETTY_FUNCTION__)) | ||||||
3955 | "unexpected opcode!")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && "unexpected opcode!") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3955, __extension__ __PRETTY_FUNCTION__)); | ||||||
3956 | Register DstReg = I.getOperand(0).getReg(); | ||||||
3957 | const LLT NarrowTy = MRI.getType(DstReg); | ||||||
3958 | const Register SrcReg = I.getOperand(1).getReg(); | ||||||
3959 | const LLT WideTy = MRI.getType(SrcReg); | ||||||
3960 | (void)WideTy; | ||||||
3961 | assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy .getSizeInBits() && "source register size too small!" ) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3962, __extension__ __PRETTY_FUNCTION__)) | ||||||
3962 | "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy .getSizeInBits() && "source register size too small!" ) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3962, __extension__ __PRETTY_FUNCTION__)); | ||||||
3963 | assert(!NarrowTy.isVector() && "cannot extract vector into vector!")(static_cast <bool> (!NarrowTy.isVector() && "cannot extract vector into vector!" ) ? void (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3963, __extension__ __PRETTY_FUNCTION__)); | ||||||
3964 | |||||||
3965 | // Need the lane index to determine the correct copy opcode. | ||||||
3966 | MachineOperand &LaneIdxOp = I.getOperand(2); | ||||||
3967 | assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")(static_cast <bool> (LaneIdxOp.isReg() && "Lane index operand was not a register?" ) ? void (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3967, __extension__ __PRETTY_FUNCTION__)); | ||||||
3968 | |||||||
3969 | if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { | ||||||
3970 | LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n" ; } } while (false); | ||||||
3971 | return false; | ||||||
3972 | } | ||||||
3973 | |||||||
3974 | // Find the index to extract from. | ||||||
3975 | auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI); | ||||||
3976 | if (!VRegAndVal) | ||||||
3977 | return false; | ||||||
3978 | unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); | ||||||
3979 | |||||||
3980 | |||||||
3981 | const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); | ||||||
3982 | MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg, | ||||||
3983 | LaneIdx, MIB); | ||||||
3984 | if (!Extract) | ||||||
3985 | return false; | ||||||
3986 | |||||||
3987 | I.eraseFromParent(); | ||||||
3988 | return true; | ||||||
3989 | } | ||||||
3990 | |||||||
3991 | bool AArch64InstructionSelector::selectSplitVectorUnmerge( | ||||||
3992 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||||
3993 | unsigned NumElts = I.getNumOperands() - 1; | ||||||
3994 | Register SrcReg = I.getOperand(NumElts).getReg(); | ||||||
3995 | const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); | ||||||
3996 | const LLT SrcTy = MRI.getType(SrcReg); | ||||||
3997 | |||||||
3998 | assert(NarrowTy.isVector() && "Expected an unmerge into vectors")(static_cast <bool> (NarrowTy.isVector() && "Expected an unmerge into vectors" ) ? void (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 3998, __extension__ __PRETTY_FUNCTION__)); | ||||||
3999 | if (SrcTy.getSizeInBits() > 128) { | ||||||
4000 | LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge" ; } } while (false); | ||||||
4001 | return false; | ||||||
4002 | } | ||||||
4003 | |||||||
4004 | // We implement a split vector operation by treating the sub-vectors as | ||||||
4005 | // scalars and extracting them. | ||||||
4006 | const RegisterBank &DstRB = | ||||||
4007 | *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI); | ||||||
4008 | for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) { | ||||||
4009 | Register Dst = I.getOperand(OpIdx).getReg(); | ||||||
4010 | MachineInstr *Extract = | ||||||
4011 | emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB); | ||||||
4012 | if (!Extract) | ||||||
4013 | return false; | ||||||
4014 | } | ||||||
4015 | I.eraseFromParent(); | ||||||
4016 | return true; | ||||||
4017 | } | ||||||
4018 | |||||||
4019 | bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I, | ||||||
4020 | MachineRegisterInfo &MRI) { | ||||||
4021 | assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "unexpected opcode") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4022, __extension__ __PRETTY_FUNCTION__)) | ||||||
4022 | "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "unexpected opcode") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4022, __extension__ __PRETTY_FUNCTION__)); | ||||||
4023 | |||||||
4024 | // TODO: Handle unmerging into GPRs and from scalars to scalars. | ||||||
4025 | if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != | ||||||
4026 | AArch64::FPRRegBankID || | ||||||
4027 | RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != | ||||||
4028 | AArch64::FPRRegBankID) { | ||||||
4029 | LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " "currently unsupported.\n"; } } while (false) | ||||||
4030 | "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " "currently unsupported.\n"; } } while (false); | ||||||
4031 | return false; | ||||||
4032 | } | ||||||
4033 | |||||||
4034 | // The last operand is the vector source register, and every other operand is | ||||||
4035 | // a register to unpack into. | ||||||
4036 | unsigned NumElts = I.getNumOperands() - 1; | ||||||
4037 | Register SrcReg = I.getOperand(NumElts).getReg(); | ||||||
4038 | const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); | ||||||
4039 | const LLT WideTy = MRI.getType(SrcReg); | ||||||
4040 | (void)WideTy; | ||||||
4041 | assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits () == 128) && "can only unmerge from vector or s128 types!" ) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4042, __extension__ __PRETTY_FUNCTION__)) | ||||||
4042 | "can only unmerge from vector or s128 types!")(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits () == 128) && "can only unmerge from vector or s128 types!" ) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4042, __extension__ __PRETTY_FUNCTION__)); | ||||||
4043 | assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy .getSizeInBits() && "source register size too small!" ) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4044, __extension__ __PRETTY_FUNCTION__)) | ||||||
4044 | "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy .getSizeInBits() && "source register size too small!" ) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4044, __extension__ __PRETTY_FUNCTION__)); | ||||||
4045 | |||||||
4046 | if (!NarrowTy.isScalar()) | ||||||
4047 | return selectSplitVectorUnmerge(I, MRI); | ||||||
4048 | |||||||
4049 | // Choose a lane copy opcode and subregister based off of the size of the | ||||||
4050 | // vector's elements. | ||||||
4051 | unsigned CopyOpc = 0; | ||||||
4052 | unsigned ExtractSubReg = 0; | ||||||
4053 | if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) | ||||||
4054 | return false; | ||||||
4055 | |||||||
4056 | // Set up for the lane copies. | ||||||
4057 | MachineBasicBlock &MBB = *I.getParent(); | ||||||
4058 | |||||||
4059 | // Stores the registers we'll be copying from. | ||||||
4060 | SmallVector<Register, 4> InsertRegs; | ||||||
4061 | |||||||
4062 | // We'll use the first register twice, so we only need NumElts-1 registers. | ||||||
4063 | unsigned NumInsertRegs = NumElts - 1; | ||||||
4064 | |||||||
4065 | // If our elements fit into exactly 128 bits, then we can copy from the source | ||||||
4066 | // directly. Otherwise, we need to do a bit of setup with some subregister | ||||||
4067 | // inserts. | ||||||
4068 | if (NarrowTy.getSizeInBits() * NumElts == 128) { | ||||||
4069 | InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg); | ||||||
4070 | } else { | ||||||
4071 | // No. We have to perform subregister inserts. For each insert, create an | ||||||
4072 | // implicit def and a subregister insert, and save the register we create. | ||||||
4073 | const TargetRegisterClass *RC = | ||||||
4074 | getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI), | ||||||
4075 | WideTy.getScalarSizeInBits() * NumElts); | ||||||
4076 | unsigned SubReg = 0; | ||||||
4077 | bool Found = getSubRegForClass(RC, TRI, SubReg); | ||||||
4078 | (void)Found; | ||||||
4079 | assert(Found && "expected to find last operand's subeg idx")(static_cast <bool> (Found && "expected to find last operand's subeg idx" ) ? void (0) : __assert_fail ("Found && \"expected to find last operand's subeg idx\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4079, __extension__ __PRETTY_FUNCTION__)); | ||||||
4080 | for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) { | ||||||
4081 | Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); | ||||||
4082 | MachineInstr &ImpDefMI = | ||||||
4083 | *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF), | ||||||
4084 | ImpDefReg); | ||||||
4085 | |||||||
4086 | // Now, create the subregister insert from SrcReg. | ||||||
4087 | Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); | ||||||
4088 | MachineInstr &InsMI = | ||||||
4089 | *BuildMI(MBB, I, I.getDebugLoc(), | ||||||
4090 | TII.get(TargetOpcode::INSERT_SUBREG), InsertReg) | ||||||
4091 | .addUse(ImpDefReg) | ||||||
4092 | .addUse(SrcReg) | ||||||
4093 | .addImm(SubReg); | ||||||
4094 | |||||||
4095 | constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI); | ||||||
4096 | constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI); | ||||||
4097 | |||||||
4098 | // Save the register so that we can copy from it after. | ||||||
4099 | InsertRegs.push_back(InsertReg); | ||||||
4100 | } | ||||||
4101 | } | ||||||
4102 | |||||||
4103 | // Now that we've created any necessary subregister inserts, we can | ||||||
4104 | // create the copies. | ||||||
4105 | // | ||||||
4106 | // Perform the first copy separately as a subregister copy. | ||||||
4107 | Register CopyTo = I.getOperand(0).getReg(); | ||||||
4108 | auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {}) | ||||||
4109 | .addReg(InsertRegs[0], 0, ExtractSubReg); | ||||||
4110 | constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI); | ||||||
4111 | |||||||
4112 | // Now, perform the remaining copies as vector lane copies. | ||||||
4113 | unsigned LaneIdx = 1; | ||||||
4114 | for (Register InsReg : InsertRegs) { | ||||||
4115 | Register CopyTo = I.getOperand(LaneIdx).getReg(); | ||||||
4116 | MachineInstr &CopyInst = | ||||||
4117 | *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo) | ||||||
4118 | .addUse(InsReg) | ||||||
4119 | .addImm(LaneIdx); | ||||||
4120 | constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI); | ||||||
4121 | ++LaneIdx; | ||||||
4122 | } | ||||||
4123 | |||||||
4124 | // Separately constrain the first copy's destination. Because of the | ||||||
4125 | // limitation in constrainOperandRegClass, we can't guarantee that this will | ||||||
4126 | // actually be constrained. So, do it ourselves using the second operand. | ||||||
4127 | const TargetRegisterClass *RC = | ||||||
4128 | MRI.getRegClassOrNull(I.getOperand(1).getReg()); | ||||||
4129 | if (!RC) { | ||||||
4130 | LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n" ; } } while (false); | ||||||
4131 | return false; | ||||||
4132 | } | ||||||
4133 | |||||||
4134 | RBI.constrainGenericRegister(CopyTo, *RC, MRI); | ||||||
4135 | I.eraseFromParent(); | ||||||
4136 | return true; | ||||||
4137 | } | ||||||
4138 | |||||||
4139 | bool AArch64InstructionSelector::selectConcatVectors( | ||||||
4140 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||||
4141 | assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Unexpected opcode") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4142, __extension__ __PRETTY_FUNCTION__)) | ||||||
4142 | "Unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Unexpected opcode") ? void (0) : __assert_fail ( "I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4142, __extension__ __PRETTY_FUNCTION__)); | ||||||
4143 | Register Dst = I.getOperand(0).getReg(); | ||||||
4144 | Register Op1 = I.getOperand(1).getReg(); | ||||||
4145 | Register Op2 = I.getOperand(2).getReg(); | ||||||
4146 | MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB); | ||||||
4147 | if (!ConcatMI) | ||||||
4148 | return false; | ||||||
4149 | I.eraseFromParent(); | ||||||
4150 | return true; | ||||||
4151 | } | ||||||
4152 | |||||||
4153 | unsigned | ||||||
4154 | AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal, | ||||||
4155 | MachineFunction &MF) const { | ||||||
4156 | Type *CPTy = CPVal->getType(); | ||||||
4157 | Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy); | ||||||
4158 | |||||||
4159 | MachineConstantPool *MCP = MF.getConstantPool(); | ||||||
4160 | return MCP->getConstantPoolIndex(CPVal, Alignment); | ||||||
4161 | } | ||||||
4162 | |||||||
4163 | MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( | ||||||
4164 | const Constant *CPVal, MachineIRBuilder &MIRBuilder) const { | ||||||
4165 | auto &MF = MIRBuilder.getMF(); | ||||||
4166 | unsigned CPIdx = emitConstantPoolEntry(CPVal, MF); | ||||||
4167 | |||||||
4168 | auto Adrp = | ||||||
4169 | MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {}) | ||||||
4170 | .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); | ||||||
4171 | |||||||
4172 | MachineInstr *LoadMI = nullptr; | ||||||
4173 | MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF); | ||||||
4174 | unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType()); | ||||||
4175 | switch (Size) { | ||||||
4176 | case 16: | ||||||
4177 | LoadMI = | ||||||
4178 | &*MIRBuilder | ||||||
4179 | .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp}) | ||||||
4180 | .addConstantPoolIndex(CPIdx, 0, | ||||||
4181 | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); | ||||||
4182 | break; | ||||||
4183 | case 8: | ||||||
4184 | LoadMI = | ||||||
4185 | &*MIRBuilder | ||||||
4186 | .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp}) | ||||||
4187 | .addConstantPoolIndex(CPIdx, 0, | ||||||
4188 | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); | ||||||
4189 | break; | ||||||
4190 | case 4: | ||||||
4191 | LoadMI = | ||||||
4192 | &*MIRBuilder | ||||||
4193 | .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp}) | ||||||
4194 | .addConstantPoolIndex(CPIdx, 0, | ||||||
4195 | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); | ||||||
4196 | break; | ||||||
4197 | default: | ||||||
4198 | LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not load from constant pool of type " << *CPVal->getType(); } } while (false) | ||||||
4199 | << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not load from constant pool of type " << *CPVal->getType(); } } while (false); | ||||||
4200 | return nullptr; | ||||||
4201 | } | ||||||
4202 | LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo, | ||||||
4203 | MachineMemOperand::MOLoad, | ||||||
4204 | Size, Align(Size))); | ||||||
4205 | constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI); | ||||||
4206 | constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI); | ||||||
4207 | return LoadMI; | ||||||
4208 | } | ||||||
4209 | |||||||
4210 | /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given | ||||||
4211 | /// size and RB. | ||||||
4212 | static std::pair<unsigned, unsigned> | ||||||
4213 | getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { | ||||||
4214 | unsigned Opc, SubregIdx; | ||||||
4215 | if (RB.getID() == AArch64::GPRRegBankID) { | ||||||
4216 | if (EltSize == 16) { | ||||||
4217 | Opc = AArch64::INSvi16gpr; | ||||||
4218 | SubregIdx = AArch64::ssub; | ||||||
4219 | } else if (EltSize == 32) { | ||||||
4220 | Opc = AArch64::INSvi32gpr; | ||||||
4221 | SubregIdx = AArch64::ssub; | ||||||
4222 | } else if (EltSize == 64) { | ||||||
4223 | Opc = AArch64::INSvi64gpr; | ||||||
4224 | SubregIdx = AArch64::dsub; | ||||||
4225 | } else { | ||||||
4226 | llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4226); | ||||||
4227 | } | ||||||
4228 | } else { | ||||||
4229 | if (EltSize == 8) { | ||||||
4230 | Opc = AArch64::INSvi8lane; | ||||||
4231 | SubregIdx = AArch64::bsub; | ||||||
4232 | } else if (EltSize == 16) { | ||||||
4233 | Opc = AArch64::INSvi16lane; | ||||||
4234 | SubregIdx = AArch64::hsub; | ||||||
4235 | } else if (EltSize == 32) { | ||||||
4236 | Opc = AArch64::INSvi32lane; | ||||||
4237 | SubregIdx = AArch64::ssub; | ||||||
4238 | } else if (EltSize == 64) { | ||||||
4239 | Opc = AArch64::INSvi64lane; | ||||||
4240 | SubregIdx = AArch64::dsub; | ||||||
4241 | } else { | ||||||
4242 | llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4242); | ||||||
4243 | } | ||||||
4244 | } | ||||||
4245 | return std::make_pair(Opc, SubregIdx); | ||||||
4246 | } | ||||||
4247 | |||||||
4248 | MachineInstr *AArch64InstructionSelector::emitInstr( | ||||||
4249 | unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, | ||||||
4250 | std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder, | ||||||
4251 | const ComplexRendererFns &RenderFns) const { | ||||||
4252 | assert(Opcode && "Expected an opcode?")(static_cast <bool> (Opcode && "Expected an opcode?" ) ? void (0) : __assert_fail ("Opcode && \"Expected an opcode?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4252, __extension__ __PRETTY_FUNCTION__)); | ||||||
4253 | assert(!isPreISelGenericOpcode(Opcode) &&(static_cast <bool> (!isPreISelGenericOpcode(Opcode) && "Function should only be used to produce selected instructions!" ) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4254, __extension__ __PRETTY_FUNCTION__)) | ||||||
4254 | "Function should only be used to produce selected instructions!")(static_cast <bool> (!isPreISelGenericOpcode(Opcode) && "Function should only be used to produce selected instructions!" ) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4254, __extension__ __PRETTY_FUNCTION__)); | ||||||
4255 | auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps); | ||||||
4256 | if (RenderFns) | ||||||
4257 | for (auto &Fn : *RenderFns) | ||||||
4258 | Fn(MI); | ||||||
4259 | constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); | ||||||
4260 | return &*MI; | ||||||
4261 | } | ||||||
4262 | |||||||
4263 | MachineInstr *AArch64InstructionSelector::emitAddSub( | ||||||
4264 | const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, | ||||||
4265 | Register Dst, MachineOperand &LHS, MachineOperand &RHS, | ||||||
4266 | MachineIRBuilder &MIRBuilder) const { | ||||||
4267 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); | ||||||
4268 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg() && "Expected register operands?") ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected register operands?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4268, __extension__ __PRETTY_FUNCTION__)); | ||||||
4269 | auto Ty = MRI.getType(LHS.getReg()); | ||||||
4270 | assert(!Ty.isVector() && "Expected a scalar or pointer?")(static_cast <bool> (!Ty.isVector() && "Expected a scalar or pointer?" ) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4270, __extension__ __PRETTY_FUNCTION__)); | ||||||
4271 | unsigned Size = Ty.getSizeInBits(); | ||||||
4272 | assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(static_cast <bool> ((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only") ? void (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4272, __extension__ __PRETTY_FUNCTION__)); | ||||||
4273 | bool Is32Bit = Size == 32; | ||||||
4274 | |||||||
4275 | // INSTRri form with positive arithmetic immediate. | ||||||
4276 | if (auto Fns = selectArithImmed(RHS)) | ||||||
4277 | return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS}, | ||||||
4278 | MIRBuilder, Fns); | ||||||
4279 | |||||||
4280 | // INSTRri form with negative arithmetic immediate. | ||||||
4281 | if (auto Fns = selectNegArithImmed(RHS)) | ||||||
4282 | return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS}, | ||||||
4283 | MIRBuilder, Fns); | ||||||
4284 | |||||||
4285 | // INSTRrx form. | ||||||
4286 | if (auto Fns = selectArithExtendedRegister(RHS)) | ||||||
4287 | return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS}, | ||||||
4288 | MIRBuilder, Fns); | ||||||
4289 | |||||||
4290 | // INSTRrs form. | ||||||
4291 | if (auto Fns = selectShiftedRegister(RHS)) | ||||||
4292 | return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS}, | ||||||
4293 | MIRBuilder, Fns); | ||||||
4294 | return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS}, | ||||||
4295 | MIRBuilder); | ||||||
4296 | } | ||||||
4297 | |||||||
4298 | MachineInstr * | ||||||
4299 | AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, | ||||||
4300 | MachineOperand &RHS, | ||||||
4301 | MachineIRBuilder &MIRBuilder) const { | ||||||
4302 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ | ||||||
4303 | {{AArch64::ADDXri, AArch64::ADDWri}, | ||||||
4304 | {AArch64::ADDXrs, AArch64::ADDWrs}, | ||||||
4305 | {AArch64::ADDXrr, AArch64::ADDWrr}, | ||||||
4306 | {AArch64::SUBXri, AArch64::SUBWri}, | ||||||
4307 | {AArch64::ADDXrx, AArch64::ADDWrx}}}; | ||||||
4308 | return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder); | ||||||
4309 | } | ||||||
4310 | |||||||
4311 | MachineInstr * | ||||||
4312 | AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS, | ||||||
4313 | MachineOperand &RHS, | ||||||
4314 | MachineIRBuilder &MIRBuilder) const { | ||||||
4315 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ | ||||||
4316 | {{AArch64::ADDSXri, AArch64::ADDSWri}, | ||||||
4317 | {AArch64::ADDSXrs, AArch64::ADDSWrs}, | ||||||
4318 | {AArch64::ADDSXrr, AArch64::ADDSWrr}, | ||||||
4319 | {AArch64::SUBSXri, AArch64::SUBSWri}, | ||||||
4320 | {AArch64::ADDSXrx, AArch64::ADDSWrx}}}; | ||||||
4321 | return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); | ||||||
4322 | } | ||||||
4323 | |||||||
4324 | MachineInstr * | ||||||
4325 | AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS, | ||||||
4326 | MachineOperand &RHS, | ||||||
4327 | MachineIRBuilder &MIRBuilder) const { | ||||||
4328 | const std::array<std::array<unsigned, 2>, 5> OpcTable{ | ||||||
4329 | {{AArch64::SUBSXri, AArch64::SUBSWri}, | ||||||
4330 | {AArch64::SUBSXrs, AArch64::SUBSWrs}, | ||||||
4331 | {AArch64::SUBSXrr, AArch64::SUBSWrr}, | ||||||
4332 | {AArch64::ADDSXri, AArch64::ADDSWri}, | ||||||
4333 | {AArch64::SUBSXrx, AArch64::SUBSWrx}}}; | ||||||
4334 | return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); | ||||||
4335 | } | ||||||
4336 | |||||||
4337 | MachineInstr * | ||||||
4338 | AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, | ||||||
4339 | MachineIRBuilder &MIRBuilder) const { | ||||||
4340 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); | ||||||
4341 | bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32); | ||||||
4342 | auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; | ||||||
4343 | return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder); | ||||||
4344 | } | ||||||
4345 | |||||||
4346 | MachineInstr * | ||||||
4347 | AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, | ||||||
4348 | MachineIRBuilder &MIRBuilder) const { | ||||||
4349 | assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg() && "Expected register operands?") ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected register operands?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4349, __extension__ __PRETTY_FUNCTION__)); | ||||||
4350 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); | ||||||
4351 | LLT Ty = MRI.getType(LHS.getReg()); | ||||||
4352 | unsigned RegSize = Ty.getSizeInBits(); | ||||||
4353 | bool Is32Bit = (RegSize == 32); | ||||||
4354 | const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri}, | ||||||
4355 | {AArch64::ANDSXrs, AArch64::ANDSWrs}, | ||||||
4356 | {AArch64::ANDSXrr, AArch64::ANDSWrr}}; | ||||||
4357 | // ANDS needs a logical immediate for its immediate form. Check if we can | ||||||
4358 | // fold one in. | ||||||
4359 | if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) { | ||||||
4360 | int64_t Imm = ValAndVReg->Value.getSExtValue(); | ||||||
4361 | |||||||
4362 | if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) { | ||||||
4363 | auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS}); | ||||||
4364 | TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); | ||||||
4365 | constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); | ||||||
4366 | return &*TstMI; | ||||||
4367 | } | ||||||
4368 | } | ||||||
4369 | |||||||
4370 | if (auto Fns = selectLogicalShiftedRegister(RHS)) | ||||||
4371 | return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns); | ||||||
4372 | return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder); | ||||||
4373 | } | ||||||
4374 | |||||||
4375 | MachineInstr *AArch64InstructionSelector::emitIntegerCompare( | ||||||
4376 | MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, | ||||||
4377 | MachineIRBuilder &MIRBuilder) const { | ||||||
4378 | assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")(static_cast <bool> (LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!") ? void ( 0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4378, __extension__ __PRETTY_FUNCTION__)); | ||||||
4379 | assert(Predicate.isPredicate() && "Expected predicate?")(static_cast <bool> (Predicate.isPredicate() && "Expected predicate?") ? void (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4379, __extension__ __PRETTY_FUNCTION__)); | ||||||
4380 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); | ||||||
4381 | LLT CmpTy = MRI.getType(LHS.getReg()); | ||||||
4382 | assert(!CmpTy.isVector() && "Expected scalar or pointer")(static_cast <bool> (!CmpTy.isVector() && "Expected scalar or pointer" ) ? void (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4382, __extension__ __PRETTY_FUNCTION__)); | ||||||
4383 | unsigned Size = CmpTy.getSizeInBits(); | ||||||
4384 | (void)Size; | ||||||
4385 | assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(static_cast <bool> ((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?") ? void (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4385, __extension__ __PRETTY_FUNCTION__)); | ||||||
4386 | // Fold the compare into a cmn or tst if possible. | ||||||
4387 | if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder)) | ||||||
4388 | return FoldCmp; | ||||||
4389 | auto Dst = MRI.cloneVirtualRegister(LHS.getReg()); | ||||||
4390 | return emitSUBS(Dst, LHS, RHS, MIRBuilder); | ||||||
4391 | } | ||||||
4392 | |||||||
4393 | MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( | ||||||
4394 | Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const { | ||||||
4395 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | ||||||
4396 | #ifndef NDEBUG | ||||||
4397 | LLT Ty = MRI.getType(Dst); | ||||||
4398 | assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits () == 32 && "Expected a 32-bit scalar register?") ? void (0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4399, __extension__ __PRETTY_FUNCTION__)) | ||||||
4399 | "Expected a 32-bit scalar register?")(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits () == 32 && "Expected a 32-bit scalar register?") ? void (0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4399, __extension__ __PRETTY_FUNCTION__)); | ||||||
4400 | #endif | ||||||
4401 | const Register ZeroReg = AArch64::WZR; | ||||||
4402 | auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) { | ||||||
4403 | auto CSet = | ||||||
4404 | MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg}) | ||||||
4405 | .addImm(getInvertedCondCode(CC)); | ||||||
4406 | constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI); | ||||||
4407 | return &*CSet; | ||||||
4408 | }; | ||||||
4409 | |||||||
4410 | AArch64CC::CondCode CC1, CC2; | ||||||
4411 | changeFCMPPredToAArch64CC(Pred, CC1, CC2); | ||||||
4412 | if (CC2 == AArch64CC::AL) | ||||||
4413 | return EmitCSet(Dst, CC1); | ||||||
4414 | |||||||
4415 | const TargetRegisterClass *RC = &AArch64::GPR32RegClass; | ||||||
4416 | Register Def1Reg = MRI.createVirtualRegister(RC); | ||||||
4417 | Register Def2Reg = MRI.createVirtualRegister(RC); | ||||||
4418 | EmitCSet(Def1Reg, CC1); | ||||||
4419 | EmitCSet(Def2Reg, CC2); | ||||||
4420 | auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg}); | ||||||
4421 | constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI); | ||||||
4422 | return &*OrMI; | ||||||
4423 | } | ||||||
4424 | |||||||
4425 | MachineInstr * | ||||||
4426 | AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS, | ||||||
4427 | MachineIRBuilder &MIRBuilder, | ||||||
4428 | Optional<CmpInst::Predicate> Pred) const { | ||||||
4429 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | ||||||
4430 | LLT Ty = MRI.getType(LHS); | ||||||
4431 | if (Ty.isVector()) | ||||||
4432 | return nullptr; | ||||||
4433 | unsigned OpSize = Ty.getSizeInBits(); | ||||||
4434 | if (OpSize != 32 && OpSize != 64) | ||||||
4435 | return nullptr; | ||||||
4436 | |||||||
4437 | // If this is a compare against +0.0, then we don't have | ||||||
4438 | // to explicitly materialize a constant. | ||||||
4439 | const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI); | ||||||
4440 | bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); | ||||||
4441 | |||||||
4442 | auto IsEqualityPred = [](CmpInst::Predicate P) { | ||||||
4443 | return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE || | ||||||
4444 | P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE; | ||||||
4445 | }; | ||||||
4446 | if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) { | ||||||
4447 | // Try commutating the operands. | ||||||
4448 | const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI); | ||||||
4449 | if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) { | ||||||
4450 | ShouldUseImm = true; | ||||||
4451 | std::swap(LHS, RHS); | ||||||
4452 | } | ||||||
4453 | } | ||||||
4454 | unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, | ||||||
4455 | {AArch64::FCMPSri, AArch64::FCMPDri}}; | ||||||
4456 | unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64]; | ||||||
4457 | |||||||
4458 | // Partially build the compare. Decide if we need to add a use for the | ||||||
4459 | // third operand based off whether or not we're comparing against 0.0. | ||||||
4460 | auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS); | ||||||
4461 | if (!ShouldUseImm) | ||||||
4462 | CmpMI.addUse(RHS); | ||||||
4463 | constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); | ||||||
4464 | return &*CmpMI; | ||||||
4465 | } | ||||||
4466 | |||||||
4467 | MachineInstr *AArch64InstructionSelector::emitVectorConcat( | ||||||
4468 | Optional<Register> Dst, Register Op1, Register Op2, | ||||||
4469 | MachineIRBuilder &MIRBuilder) const { | ||||||
4470 | // We implement a vector concat by: | ||||||
4471 | // 1. Use scalar_to_vector to insert the lower vector into the larger dest | ||||||
4472 | // 2. Insert the upper vector into the destination's upper element | ||||||
4473 | // TODO: some of this code is common with G_BUILD_VECTOR handling. | ||||||
4474 | MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); | ||||||
4475 | |||||||
4476 | const LLT Op1Ty = MRI.getType(Op1); | ||||||
4477 | const LLT Op2Ty = MRI.getType(Op2); | ||||||
4478 | |||||||
4479 | if (Op1Ty != Op2Ty) { | ||||||
4480 | LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys" ; } } while (false); | ||||||
4481 | return nullptr; | ||||||
4482 | } | ||||||
4483 | assert(Op1Ty.isVector() && "Expected a vector for vector concat")(static_cast <bool> (Op1Ty.isVector() && "Expected a vector for vector concat" ) ? void (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4483, __extension__ __PRETTY_FUNCTION__)); | ||||||
4484 | |||||||
4485 | if (Op1Ty.getSizeInBits() >= 128) { | ||||||
4486 | LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors" ; } } while (false); | ||||||
4487 | return nullptr; | ||||||
4488 | } | ||||||
4489 | |||||||
4490 | // At the moment we just support 64 bit vector concats. | ||||||
4491 | if (Op1Ty.getSizeInBits() != 64) { | ||||||
4492 | LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors" ; } } while (false); | ||||||
4493 | return nullptr; | ||||||
4494 | } | ||||||
4495 | |||||||
4496 | const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits()); | ||||||
4497 | const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI); | ||||||
4498 | const TargetRegisterClass *DstRC = | ||||||
4499 | getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2); | ||||||
4500 | |||||||
4501 | MachineInstr *WidenedOp1 = | ||||||
4502 | emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder); | ||||||
4503 | MachineInstr *WidenedOp2 = | ||||||
4504 | emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder); | ||||||
4505 | if (!WidenedOp1 || !WidenedOp2) { | ||||||
4506 | LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value" ; } } while (false); | ||||||
4507 | return nullptr; | ||||||
4508 | } | ||||||
4509 | |||||||
4510 | // Now do the insert of the upper element. | ||||||
4511 | unsigned InsertOpc, InsSubRegIdx; | ||||||
4512 | std::tie(InsertOpc, InsSubRegIdx) = | ||||||
4513 | getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits()); | ||||||
4514 | |||||||
4515 | if (!Dst) | ||||||
4516 | Dst = MRI.createVirtualRegister(DstRC); | ||||||
4517 | auto InsElt = | ||||||
4518 | MIRBuilder | ||||||
4519 | .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()}) | ||||||
4520 | .addImm(1) /* Lane index */ | ||||||
4521 | .addUse(WidenedOp2->getOperand(0).getReg()) | ||||||
4522 | .addImm(0); | ||||||
4523 | constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); | ||||||
4524 | return &*InsElt; | ||||||
4525 | } | ||||||
4526 | |||||||
4527 | MachineInstr * | ||||||
4528 | AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred, | ||||||
4529 | MachineIRBuilder &MIRBuilder, | ||||||
4530 | Register SrcReg) const { | ||||||
4531 | // CSINC increments the result when the predicate is false. Invert it. | ||||||
4532 | const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC( | ||||||
4533 | CmpInst::getInversePredicate((CmpInst::Predicate)Pred)); | ||||||
4534 | auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg}) | ||||||
4535 | .addImm(InvCC); | ||||||
4536 | constrainSelectedInstRegOperands(*I, TII, TRI, RBI); | ||||||
4537 | return &*I; | ||||||
4538 | } | ||||||
4539 | |||||||
4540 | std::pair<MachineInstr *, AArch64CC::CondCode> | ||||||
4541 | AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, | ||||||
4542 | MachineOperand &LHS, | ||||||
4543 | MachineOperand &RHS, | ||||||
4544 | MachineIRBuilder &MIRBuilder) const { | ||||||
4545 | switch (Opcode) { | ||||||
4546 | default: | ||||||
4547 | llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4547); | ||||||
4548 | case TargetOpcode::G_SADDO: | ||||||
4549 | return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); | ||||||
4550 | case TargetOpcode::G_UADDO: | ||||||
4551 | return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS); | ||||||
4552 | case TargetOpcode::G_SSUBO: | ||||||
4553 | return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); | ||||||
4554 | case TargetOpcode::G_USUBO: | ||||||
4555 | return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO); | ||||||
4556 | } | ||||||
4557 | } | ||||||
4558 | |||||||
4559 | bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) { | ||||||
4560 | MachineRegisterInfo &MRI = *MIB.getMRI(); | ||||||
4561 | // We want to recognize this pattern: | ||||||
4562 | // | ||||||
4563 | // $z = G_FCMP pred, $x, $y | ||||||
4564 | // ... | ||||||
4565 | // $w = G_SELECT $z, $a, $b | ||||||
4566 | // | ||||||
4567 | // Where the value of $z is *only* ever used by the G_SELECT (possibly with | ||||||
4568 | // some copies/truncs in between.) | ||||||
4569 | // | ||||||
4570 | // If we see this, then we can emit something like this: | ||||||
4571 | // | ||||||
4572 | // fcmp $x, $y | ||||||
4573 | // fcsel $w, $a, $b, pred | ||||||
4574 | // | ||||||
4575 | // Rather than emitting both of the rather long sequences in the standard | ||||||
4576 | // G_FCMP/G_SELECT select methods. | ||||||
4577 | |||||||
4578 | // First, check if the condition is defined by a compare. | ||||||
4579 | MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg()); | ||||||
4580 | while (CondDef) { | ||||||
4581 | // We can only fold if all of the defs have one use. | ||||||
4582 | Register CondDefReg = CondDef->getOperand(0).getReg(); | ||||||
4583 | if (!MRI.hasOneNonDBGUse(CondDefReg)) { | ||||||
4584 | // Unless it's another select. | ||||||
4585 | for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) { | ||||||
4586 | if (CondDef == &UI) | ||||||
4587 | continue; | ||||||
4588 | if (UI.getOpcode() != TargetOpcode::G_SELECT) | ||||||
4589 | return false; | ||||||
4590 | } | ||||||
4591 | } | ||||||
4592 | |||||||
4593 | // We can skip over G_TRUNC since the condition is 1-bit. | ||||||
4594 | // Truncating/extending can have no impact on the value. | ||||||
4595 | unsigned Opc = CondDef->getOpcode(); | ||||||
4596 | if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC) | ||||||
4597 | break; | ||||||
4598 | |||||||
4599 | // Can't see past copies from physregs. | ||||||
4600 | if (Opc == TargetOpcode::COPY && | ||||||
4601 | Register::isPhysicalRegister(CondDef->getOperand(1).getReg())) | ||||||
4602 | return false; | ||||||
4603 | |||||||
4604 | CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg()); | ||||||
4605 | } | ||||||
4606 | |||||||
4607 | // Is the condition defined by a compare? | ||||||
4608 | if (!CondDef) | ||||||
4609 | return false; | ||||||
4610 | |||||||
4611 | unsigned CondOpc = CondDef->getOpcode(); | ||||||
4612 | if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) | ||||||
4613 | return false; | ||||||
4614 | |||||||
4615 | AArch64CC::CondCode CondCode; | ||||||
4616 | if (CondOpc == TargetOpcode::G_ICMP) { | ||||||
4617 | auto Pred = | ||||||
4618 | static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); | ||||||
4619 | CondCode = changeICMPPredToAArch64CC(Pred); | ||||||
4620 | emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), | ||||||
4621 | CondDef->getOperand(1), MIB); | ||||||
4622 | } else { | ||||||
4623 | // Get the condition code for the select. | ||||||
4624 | auto Pred = | ||||||
4625 | static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); | ||||||
4626 | AArch64CC::CondCode CondCode2; | ||||||
4627 | changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2); | ||||||
4628 | |||||||
4629 | // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two | ||||||
4630 | // instructions to emit the comparison. | ||||||
4631 | // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be | ||||||
4632 | // unnecessary. | ||||||
4633 | if (CondCode2 != AArch64CC::AL) | ||||||
4634 | return false; | ||||||
4635 | |||||||
4636 | if (!emitFPCompare(CondDef->getOperand(2).getReg(), | ||||||
4637 | CondDef->getOperand(3).getReg(), MIB)) { | ||||||
4638 | LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n" ; } } while (false); | ||||||
4639 | return false; | ||||||
4640 | } | ||||||
4641 | } | ||||||
4642 | |||||||
4643 | // Emit the select. | ||||||
4644 | emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(), | ||||||
4645 | I.getOperand(3).getReg(), CondCode, MIB); | ||||||
4646 | I.eraseFromParent(); | ||||||
4647 | return true; | ||||||
4648 | } | ||||||
4649 | |||||||
4650 | MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( | ||||||
4651 | MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, | ||||||
4652 | MachineIRBuilder &MIRBuilder) const { | ||||||
4653 | assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&(static_cast <bool> (LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && "Unexpected MachineOperand" ) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4654, __extension__ __PRETTY_FUNCTION__)) | ||||||
4654 | "Unexpected MachineOperand")(static_cast <bool> (LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && "Unexpected MachineOperand" ) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4654, __extension__ __PRETTY_FUNCTION__)); | ||||||
4655 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | ||||||
4656 | // We want to find this sort of thing: | ||||||
4657 | // x = G_SUB 0, y | ||||||
4658 | // G_ICMP z, x | ||||||
4659 | // | ||||||
4660 | // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. | ||||||
4661 | // e.g: | ||||||
4662 | // | ||||||
4663 | // cmn z, y | ||||||
4664 | |||||||
4665 | // Check if the RHS or LHS of the G_ICMP is defined by a SUB | ||||||
4666 | MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI); | ||||||
4667 | MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI); | ||||||
4668 | auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate()); | ||||||
4669 | // Given this: | ||||||
4670 | // | ||||||
4671 | // x = G_SUB 0, y | ||||||
4672 | // G_ICMP x, z | ||||||
4673 | // | ||||||
4674 | // Produce this: | ||||||
4675 | // | ||||||
4676 | // cmn y, z | ||||||
4677 | if (isCMN(LHSDef, P, MRI)) | ||||||
4678 | return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder); | ||||||
4679 | |||||||
4680 | // Same idea here, but with the RHS of the compare instead: | ||||||
4681 | // | ||||||
4682 | // Given this: | ||||||
4683 | // | ||||||
4684 | // x = G_SUB 0, y | ||||||
4685 | // G_ICMP z, x | ||||||
4686 | // | ||||||
4687 | // Produce this: | ||||||
4688 | // | ||||||
4689 | // cmn z, y | ||||||
4690 | if (isCMN(RHSDef, P, MRI)) | ||||||
4691 | return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder); | ||||||
4692 | |||||||
4693 | // Given this: | ||||||
4694 | // | ||||||
4695 | // z = G_AND x, y | ||||||
4696 | // G_ICMP z, 0 | ||||||
4697 | // | ||||||
4698 | // Produce this if the compare is signed: | ||||||
4699 | // | ||||||
4700 | // tst x, y | ||||||
4701 | if (!CmpInst::isUnsigned(P) && LHSDef && | ||||||
4702 | LHSDef->getOpcode() == TargetOpcode::G_AND) { | ||||||
4703 | // Make sure that the RHS is 0. | ||||||
4704 | auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI); | ||||||
4705 | if (!ValAndVReg || ValAndVReg->Value != 0) | ||||||
4706 | return nullptr; | ||||||
4707 | |||||||
4708 | return emitTST(LHSDef->getOperand(1), | ||||||
4709 | LHSDef->getOperand(2), MIRBuilder); | ||||||
4710 | } | ||||||
4711 | |||||||
4712 | return nullptr; | ||||||
4713 | } | ||||||
4714 | |||||||
4715 | bool AArch64InstructionSelector::selectShuffleVector( | ||||||
4716 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||||
4717 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||||
4718 | Register Src1Reg = I.getOperand(1).getReg(); | ||||||
4719 | const LLT Src1Ty = MRI.getType(Src1Reg); | ||||||
4720 | Register Src2Reg = I.getOperand(2).getReg(); | ||||||
4721 | const LLT Src2Ty = MRI.getType(Src2Reg); | ||||||
4722 | ArrayRef<int> Mask = I.getOperand(3).getShuffleMask(); | ||||||
4723 | |||||||
4724 | MachineBasicBlock &MBB = *I.getParent(); | ||||||
4725 | MachineFunction &MF = *MBB.getParent(); | ||||||
4726 | LLVMContext &Ctx = MF.getFunction().getContext(); | ||||||
4727 | |||||||
4728 | // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if | ||||||
4729 | // it's originated from a <1 x T> type. Those should have been lowered into | ||||||
4730 | // G_BUILD_VECTOR earlier. | ||||||
4731 | if (!Src1Ty.isVector() || !Src2Ty.isVector()) { | ||||||
4732 | LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n" ; } } while (false); | ||||||
4733 | return false; | ||||||
4734 | } | ||||||
4735 | |||||||
4736 | unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; | ||||||
4737 | |||||||
4738 | SmallVector<Constant *, 64> CstIdxs; | ||||||
4739 | for (int Val : Mask) { | ||||||
4740 | // For now, any undef indexes we'll just assume to be 0. This should be | ||||||
4741 | // optimized in future, e.g. to select DUP etc. | ||||||
4742 | Val = Val < 0 ? 0 : Val; | ||||||
4743 | for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { | ||||||
4744 | unsigned Offset = Byte + Val * BytesPerElt; | ||||||
4745 | CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset)); | ||||||
4746 | } | ||||||
4747 | } | ||||||
4748 | |||||||
4749 | // Use a constant pool to load the index vector for TBL. | ||||||
4750 | Constant *CPVal = ConstantVector::get(CstIdxs); | ||||||
4751 | MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB); | ||||||
4752 | if (!IndexLoad) { | ||||||
4753 | LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not load from a constant pool" ; } } while (false); | ||||||
4754 | return false; | ||||||
4755 | } | ||||||
4756 | |||||||
4757 | if (DstTy.getSizeInBits() != 128) { | ||||||
4758 | assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")(static_cast <bool> (DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty") ? void (0) : __assert_fail ( "DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4758, __extension__ __PRETTY_FUNCTION__)); | ||||||
4759 | // This case can be done with TBL1. | ||||||
4760 | MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB); | ||||||
4761 | if (!Concat) { | ||||||
4762 | LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1" ; } } while (false); | ||||||
4763 | return false; | ||||||
4764 | } | ||||||
4765 | |||||||
4766 | // The constant pool load will be 64 bits, so need to convert to FPR128 reg. | ||||||
4767 | IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass, | ||||||
4768 | IndexLoad->getOperand(0).getReg(), MIB); | ||||||
4769 | |||||||
4770 | auto TBL1 = MIB.buildInstr( | ||||||
4771 | AArch64::TBLv16i8One, {&AArch64::FPR128RegClass}, | ||||||
4772 | {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()}); | ||||||
4773 | constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI); | ||||||
4774 | |||||||
4775 | auto Copy = | ||||||
4776 | MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) | ||||||
4777 | .addReg(TBL1.getReg(0), 0, AArch64::dsub); | ||||||
4778 | RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI); | ||||||
4779 | I.eraseFromParent(); | ||||||
4780 | return true; | ||||||
4781 | } | ||||||
4782 | |||||||
4783 | // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive | ||||||
4784 | // Q registers for regalloc. | ||||||
4785 | SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg}; | ||||||
4786 | auto RegSeq = createQTuple(Regs, MIB); | ||||||
4787 | auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)}, | ||||||
4788 | {RegSeq, IndexLoad->getOperand(0)}); | ||||||
4789 | constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI); | ||||||
4790 | I.eraseFromParent(); | ||||||
4791 | return true; | ||||||
4792 | } | ||||||
4793 | |||||||
4794 | MachineInstr *AArch64InstructionSelector::emitLaneInsert( | ||||||
4795 | Optional<Register> DstReg, Register SrcReg, Register EltReg, | ||||||
4796 | unsigned LaneIdx, const RegisterBank &RB, | ||||||
4797 | MachineIRBuilder &MIRBuilder) const { | ||||||
4798 | MachineInstr *InsElt = nullptr; | ||||||
4799 | const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; | ||||||
4800 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); | ||||||
4801 | |||||||
4802 | // Create a register to define with the insert if one wasn't passed in. | ||||||
4803 | if (!DstReg) | ||||||
4804 | DstReg = MRI.createVirtualRegister(DstRC); | ||||||
4805 | |||||||
4806 | unsigned EltSize = MRI.getType(EltReg).getSizeInBits(); | ||||||
4807 | unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first; | ||||||
4808 | |||||||
4809 | if (RB.getID() == AArch64::FPRRegBankID) { | ||||||
4810 | auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder); | ||||||
4811 | InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) | ||||||
4812 | .addImm(LaneIdx) | ||||||
4813 | .addUse(InsSub->getOperand(0).getReg()) | ||||||
4814 | .addImm(0); | ||||||
4815 | } else { | ||||||
4816 | InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) | ||||||
4817 | .addImm(LaneIdx) | ||||||
4818 | .addUse(EltReg); | ||||||
4819 | } | ||||||
4820 | |||||||
4821 | constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); | ||||||
4822 | return InsElt; | ||||||
4823 | } | ||||||
4824 | |||||||
4825 | bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I, | ||||||
4826 | MachineRegisterInfo &MRI) { | ||||||
4827 | assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4827, __extension__ __PRETTY_FUNCTION__)); | ||||||
4828 | |||||||
4829 | // Get information on the destination. | ||||||
4830 | Register DstReg = I.getOperand(0).getReg(); | ||||||
4831 | const LLT DstTy = MRI.getType(DstReg); | ||||||
4832 | unsigned VecSize = DstTy.getSizeInBits(); | ||||||
4833 | |||||||
4834 | // Get information on the element we want to insert into the destination. | ||||||
4835 | Register EltReg = I.getOperand(2).getReg(); | ||||||
4836 | const LLT EltTy = MRI.getType(EltReg); | ||||||
4837 | unsigned EltSize = EltTy.getSizeInBits(); | ||||||
4838 | if (EltSize < 16 || EltSize > 64) | ||||||
4839 | return false; // Don't support all element types yet. | ||||||
4840 | |||||||
4841 | // Find the definition of the index. Bail out if it's not defined by a | ||||||
4842 | // G_CONSTANT. | ||||||
4843 | Register IdxReg = I.getOperand(3).getReg(); | ||||||
4844 | auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI); | ||||||
4845 | if (!VRegAndVal) | ||||||
4846 | return false; | ||||||
4847 | unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); | ||||||
4848 | |||||||
4849 | // Perform the lane insert. | ||||||
4850 | Register SrcReg = I.getOperand(1).getReg(); | ||||||
4851 | const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); | ||||||
4852 | |||||||
4853 | if (VecSize < 128) { | ||||||
4854 | // If the vector we're inserting into is smaller than 128 bits, widen it | ||||||
4855 | // to 128 to do the insert. | ||||||
4856 | MachineInstr *ScalarToVec = | ||||||
4857 | emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB); | ||||||
4858 | if (!ScalarToVec) | ||||||
4859 | return false; | ||||||
4860 | SrcReg = ScalarToVec->getOperand(0).getReg(); | ||||||
4861 | } | ||||||
4862 | |||||||
4863 | // Create an insert into a new FPR128 register. | ||||||
4864 | // Note that if our vector is already 128 bits, we end up emitting an extra | ||||||
4865 | // register. | ||||||
4866 | MachineInstr *InsMI = | ||||||
4867 | emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB); | ||||||
4868 | |||||||
4869 | if (VecSize < 128) { | ||||||
4870 | // If we had to widen to perform the insert, then we have to demote back to | ||||||
4871 | // the original size to get the result we want. | ||||||
4872 | Register DemoteVec = InsMI->getOperand(0).getReg(); | ||||||
4873 | const TargetRegisterClass *RC = | ||||||
4874 | getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize); | ||||||
4875 | if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { | ||||||
4876 | LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported register class!\n" ; } } while (false); | ||||||
4877 | return false; | ||||||
4878 | } | ||||||
4879 | unsigned SubReg = 0; | ||||||
4880 | if (!getSubRegForClass(RC, TRI, SubReg)) | ||||||
4881 | return false; | ||||||
4882 | if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { | ||||||
4883 | LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported destination size! (" << VecSize << "\n"; } } while (false) | ||||||
4884 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported destination size! (" << VecSize << "\n"; } } while (false); | ||||||
4885 | return false; | ||||||
4886 | } | ||||||
4887 | MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) | ||||||
4888 | .addReg(DemoteVec, 0, SubReg); | ||||||
4889 | RBI.constrainGenericRegister(DstReg, *RC, MRI); | ||||||
4890 | } else { | ||||||
4891 | // No widening needed. | ||||||
4892 | InsMI->getOperand(0).setReg(DstReg); | ||||||
4893 | constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); | ||||||
4894 | } | ||||||
4895 | |||||||
4896 | I.eraseFromParent(); | ||||||
4897 | return true; | ||||||
4898 | } | ||||||
4899 | |||||||
4900 | MachineInstr * | ||||||
4901 | AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, | ||||||
4902 | MachineIRBuilder &MIRBuilder, | ||||||
4903 | MachineRegisterInfo &MRI) { | ||||||
4904 | LLT DstTy = MRI.getType(Dst); | ||||||
4905 | unsigned DstSize = DstTy.getSizeInBits(); | ||||||
4906 | if (CV->isNullValue()) { | ||||||
4907 | if (DstSize == 128) { | ||||||
4908 | auto Mov = | ||||||
4909 | MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0); | ||||||
4910 | constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); | ||||||
4911 | return &*Mov; | ||||||
4912 | } | ||||||
4913 | |||||||
4914 | if (DstSize == 64) { | ||||||
4915 | auto Mov = | ||||||
4916 | MIRBuilder | ||||||
4917 | .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) | ||||||
4918 | .addImm(0); | ||||||
4919 | auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {}) | ||||||
4920 | .addReg(Mov.getReg(0), 0, AArch64::dsub); | ||||||
4921 | RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI); | ||||||
4922 | return &*Copy; | ||||||
4923 | } | ||||||
4924 | } | ||||||
4925 | |||||||
4926 | auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder); | ||||||
4927 | if (!CPLoad) { | ||||||
4928 | LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Could not generate cp load for constant vector!" ; } } while (false); | ||||||
4929 | return nullptr; | ||||||
4930 | } | ||||||
4931 | |||||||
4932 | auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0)); | ||||||
4933 | RBI.constrainGenericRegister( | ||||||
4934 | Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI); | ||||||
4935 | return &*Copy; | ||||||
4936 | } | ||||||
4937 | |||||||
4938 | bool AArch64InstructionSelector::tryOptConstantBuildVec( | ||||||
4939 | MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) { | ||||||
4940 | assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4940, __extension__ __PRETTY_FUNCTION__)); | ||||||
4941 | unsigned DstSize = DstTy.getSizeInBits(); | ||||||
4942 | assert(DstSize <= 128 && "Unexpected build_vec type!")(static_cast <bool> (DstSize <= 128 && "Unexpected build_vec type!" ) ? void (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 4942, __extension__ __PRETTY_FUNCTION__)); | ||||||
4943 | if (DstSize < 32) | ||||||
4944 | return false; | ||||||
4945 | // Check if we're building a constant vector, in which case we want to | ||||||
4946 | // generate a constant pool load instead of a vector insert sequence. | ||||||
4947 | SmallVector<Constant *, 16> Csts; | ||||||
4948 | for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) { | ||||||
4949 | // Try to find G_CONSTANT or G_FCONSTANT | ||||||
4950 | auto *OpMI = | ||||||
4951 | getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI); | ||||||
4952 | if (OpMI) | ||||||
4953 | Csts.emplace_back( | ||||||
4954 | const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm())); | ||||||
4955 | else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT, | ||||||
4956 | I.getOperand(Idx).getReg(), MRI))) | ||||||
4957 | Csts.emplace_back( | ||||||
4958 | const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm())); | ||||||
4959 | else | ||||||
4960 | return false; | ||||||
4961 | } | ||||||
4962 | Constant *CV = ConstantVector::get(Csts); | ||||||
4963 | if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI)) | ||||||
4964 | return false; | ||||||
4965 | I.eraseFromParent(); | ||||||
4966 | return true; | ||||||
4967 | } | ||||||
4968 | |||||||
4969 | bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg( | ||||||
4970 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||||
4971 | // Given: | ||||||
4972 | // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef | ||||||
4973 | // | ||||||
4974 | // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt. | ||||||
4975 | Register Dst = I.getOperand(0).getReg(); | ||||||
4976 | Register EltReg = I.getOperand(1).getReg(); | ||||||
4977 | LLT EltTy = MRI.getType(EltReg); | ||||||
4978 | // If the index isn't on the same bank as its elements, then this can't be a | ||||||
4979 | // SUBREG_TO_REG. | ||||||
4980 | const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); | ||||||
4981 | const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI); | ||||||
4982 | if (EltRB != DstRB) | ||||||
4983 | return false; | ||||||
4984 | if (any_of(make_range(I.operands_begin() + 2, I.operands_end()), | ||||||
4985 | [&MRI](const MachineOperand &Op) { | ||||||
4986 | return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), | ||||||
4987 | MRI); | ||||||
4988 | })) | ||||||
4989 | return false; | ||||||
4990 | unsigned SubReg; | ||||||
4991 | const TargetRegisterClass *EltRC = | ||||||
4992 | getMinClassForRegBank(EltRB, EltTy.getSizeInBits()); | ||||||
4993 | if (!EltRC) | ||||||
4994 | return false; | ||||||
4995 | const TargetRegisterClass *DstRC = | ||||||
4996 | getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits()); | ||||||
4997 | if (!DstRC) | ||||||
4998 | return false; | ||||||
4999 | if (!getSubRegForClass(EltRC, TRI, SubReg)) | ||||||
5000 | return false; | ||||||
5001 | auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {}) | ||||||
5002 | .addImm(0) | ||||||
5003 | .addUse(EltReg) | ||||||
5004 | .addImm(SubReg); | ||||||
5005 | I.eraseFromParent(); | ||||||
5006 | constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI); | ||||||
5007 | return RBI.constrainGenericRegister(Dst, *DstRC, MRI); | ||||||
5008 | } | ||||||
5009 | |||||||
5010 | bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I, | ||||||
5011 | MachineRegisterInfo &MRI) { | ||||||
5012 | assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5012, __extension__ __PRETTY_FUNCTION__)); | ||||||
5013 | // Until we port more of the optimized selections, for now just use a vector | ||||||
5014 | // insert sequence. | ||||||
5015 | const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); | ||||||
5016 | const LLT EltTy = MRI.getType(I.getOperand(1).getReg()); | ||||||
5017 | unsigned EltSize = EltTy.getSizeInBits(); | ||||||
5018 | |||||||
5019 | if (tryOptConstantBuildVec(I, DstTy, MRI)) | ||||||
5020 | return true; | ||||||
5021 | if (tryOptBuildVecToSubregToReg(I, MRI)) | ||||||
5022 | return true; | ||||||
5023 | |||||||
5024 | if (EltSize < 16 || EltSize > 64) | ||||||
5025 | return false; // Don't support all element types yet. | ||||||
5026 | const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); | ||||||
5027 | |||||||
5028 | const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; | ||||||
5029 | MachineInstr *ScalarToVec = | ||||||
5030 | emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC, | ||||||
5031 | I.getOperand(1).getReg(), MIB); | ||||||
5032 | if (!ScalarToVec) | ||||||
5033 | return false; | ||||||
5034 | |||||||
5035 | Register DstVec = ScalarToVec->getOperand(0).getReg(); | ||||||
5036 | unsigned DstSize = DstTy.getSizeInBits(); | ||||||
5037 | |||||||
5038 | // Keep track of the last MI we inserted. Later on, we might be able to save | ||||||
5039 | // a copy using it. | ||||||
5040 | MachineInstr *PrevMI = nullptr; | ||||||
5041 | for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { | ||||||
5042 | // Note that if we don't do a subregister copy, we can end up making an | ||||||
5043 | // extra register. | ||||||
5044 | PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB, | ||||||
5045 | MIB); | ||||||
5046 | DstVec = PrevMI->getOperand(0).getReg(); | ||||||
5047 | } | ||||||
5048 | |||||||
5049 | // If DstTy's size in bits is less than 128, then emit a subregister copy | ||||||
5050 | // from DstVec to the last register we've defined. | ||||||
5051 | if (DstSize < 128) { | ||||||
5052 | // Force this to be FPR using the destination vector. | ||||||
5053 | const TargetRegisterClass *RC = | ||||||
5054 | getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize); | ||||||
5055 | if (!RC) | ||||||
5056 | return false; | ||||||
5057 | if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { | ||||||
5058 | LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported register class!\n" ; } } while (false); | ||||||
5059 | return false; | ||||||
5060 | } | ||||||
5061 | |||||||
5062 | unsigned SubReg = 0; | ||||||
5063 | if (!getSubRegForClass(RC, TRI, SubReg)) | ||||||
5064 | return false; | ||||||
5065 | if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { | ||||||
5066 | LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported destination size! (" << DstSize << "\n"; } } while (false) | ||||||
5067 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-isel")) { dbgs() << "Unsupported destination size! (" << DstSize << "\n"; } } while (false); | ||||||
5068 | return false; | ||||||
5069 | } | ||||||
5070 | |||||||
5071 | Register Reg = MRI.createVirtualRegister(RC); | ||||||
5072 | Register DstReg = I.getOperand(0).getReg(); | ||||||
5073 | |||||||
5074 | MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg); | ||||||
5075 | MachineOperand &RegOp = I.getOperand(1); | ||||||
5076 | RegOp.setReg(Reg); | ||||||
5077 | RBI.constrainGenericRegister(DstReg, *RC, MRI); | ||||||
5078 | } else { | ||||||
5079 | // We don't need a subregister copy. Save a copy by re-using the | ||||||
5080 | // destination register on the final insert. | ||||||
5081 | assert(PrevMI && "PrevMI was null?")(static_cast <bool> (PrevMI && "PrevMI was null?" ) ? void (0) : __assert_fail ("PrevMI && \"PrevMI was null?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5081, __extension__ __PRETTY_FUNCTION__)); | ||||||
5082 | PrevMI->getOperand(0).setReg(I.getOperand(0).getReg()); | ||||||
5083 | constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI); | ||||||
5084 | } | ||||||
5085 | |||||||
5086 | I.eraseFromParent(); | ||||||
5087 | return true; | ||||||
5088 | } | ||||||
5089 | |||||||
5090 | /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the | ||||||
5091 | /// ID if it exists, and 0 otherwise. | ||||||
5092 | static unsigned findIntrinsicID(MachineInstr &I) { | ||||||
5093 | auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) { | ||||||
5094 | return Op.isIntrinsicID(); | ||||||
5095 | }); | ||||||
5096 | if (IntrinOp == I.operands_end()) | ||||||
5097 | return 0; | ||||||
5098 | return IntrinOp->getIntrinsicID(); | ||||||
5099 | } | ||||||
5100 | |||||||
5101 | bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc, | ||||||
5102 | unsigned NumVecs, | ||||||
5103 | MachineInstr &I) { | ||||||
5104 | assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5104, __extension__ __PRETTY_FUNCTION__)); | ||||||
5105 | assert(Opc && "Expected an opcode?")(static_cast <bool> (Opc && "Expected an opcode?" ) ? void (0) : __assert_fail ("Opc && \"Expected an opcode?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5105, __extension__ __PRETTY_FUNCTION__)); | ||||||
5106 | assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors")(static_cast <bool> (NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors") ? void (0) : __assert_fail ("NumVecs > 1 && NumVecs < 5 && \"Only support 2, 3, or 4 vectors\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5106, __extension__ __PRETTY_FUNCTION__)); | ||||||
5107 | auto &MRI = *MIB.getMRI(); | ||||||
5108 | LLT Ty = MRI.getType(I.getOperand(0).getReg()); | ||||||
5109 | unsigned Size = Ty.getSizeInBits(); | ||||||
5110 | assert((Size == 64 || Size == 128) &&(static_cast <bool> ((Size == 64 || Size == 128) && "Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail ("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5111, __extension__ __PRETTY_FUNCTION__)) | ||||||
5111 | "Destination must be 64 bits or 128 bits?")(static_cast <bool> ((Size == 64 || Size == 128) && "Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail ("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5111, __extension__ __PRETTY_FUNCTION__)); | ||||||
5112 | unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0; | ||||||
5113 | auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg(); | ||||||
5114 | assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?")(static_cast <bool> (MRI.getType(Ptr).isPointer() && "Expected a pointer type?") ? void (0) : __assert_fail ("MRI.getType(Ptr).isPointer() && \"Expected a pointer type?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5114, __extension__ __PRETTY_FUNCTION__)); | ||||||
5115 | auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr}); | ||||||
5116 | Load.cloneMemRefs(I); | ||||||
5117 | constrainSelectedInstRegOperands(*Load, TII, TRI, RBI); | ||||||
5118 | Register SelectedLoadDst = Load->getOperand(0).getReg(); | ||||||
5119 | for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { | ||||||
5120 | auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {}) | ||||||
5121 | .addReg(SelectedLoadDst, 0, SubReg + Idx); | ||||||
5122 | // Emit the subreg copies and immediately select them. | ||||||
5123 | // FIXME: We should refactor our copy code into an emitCopy helper and | ||||||
5124 | // clean up uses of this pattern elsewhere in the selector. | ||||||
5125 | selectCopy(*Vec, TII, MRI, TRI, RBI); | ||||||
5126 | } | ||||||
5127 | return true; | ||||||
5128 | } | ||||||
5129 | |||||||
5130 | bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( | ||||||
5131 | MachineInstr &I, MachineRegisterInfo &MRI) { | ||||||
5132 | // Find the intrinsic ID. | ||||||
5133 | unsigned IntrinID = findIntrinsicID(I); | ||||||
5134 | if (!IntrinID) | ||||||
5135 | return false; | ||||||
5136 | |||||||
5137 | const LLT S8 = LLT::scalar(8); | ||||||
5138 | const LLT S16 = LLT::scalar(16); | ||||||
5139 | const LLT S32 = LLT::scalar(32); | ||||||
5140 | const LLT S64 = LLT::scalar(64); | ||||||
5141 | const LLT P0 = LLT::pointer(0, 64); | ||||||
5142 | // Select the instruction. | ||||||
5143 | switch (IntrinID) { | ||||||
5144 | default: | ||||||
5145 | return false; | ||||||
5146 | case Intrinsic::aarch64_ldxp: | ||||||
5147 | case Intrinsic::aarch64_ldaxp: { | ||||||
5148 | auto NewI = MIB.buildInstr( | ||||||
5149 | IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX, | ||||||
5150 | {I.getOperand(0).getReg(), I.getOperand(1).getReg()}, | ||||||
5151 | {I.getOperand(3)}); | ||||||
5152 | NewI.cloneMemRefs(I); | ||||||
5153 | constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); | ||||||
5154 | break; | ||||||
5155 | } | ||||||
5156 | case Intrinsic::trap: | ||||||
5157 | MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1); | ||||||
5158 | break; | ||||||
5159 | case Intrinsic::debugtrap: | ||||||
5160 | MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); | ||||||
5161 | break; | ||||||
5162 | case Intrinsic::ubsantrap: | ||||||
5163 | MIB.buildInstr(AArch64::BRK, {}, {}) | ||||||
5164 | .addImm(I.getOperand(1).getImm() | ('U' << 8)); | ||||||
5165 | break; | ||||||
5166 | case Intrinsic::aarch64_neon_ld2: { | ||||||
5167 | LLT Ty = MRI.getType(I.getOperand(0).getReg()); | ||||||
5168 | unsigned Opc = 0; | ||||||
5169 | if (Ty == LLT::fixed_vector(8, S8)) | ||||||
5170 | Opc = AArch64::LD2Twov8b; | ||||||
5171 | else if (Ty == LLT::fixed_vector(16, S8)) | ||||||
5172 | Opc = AArch64::LD2Twov16b; | ||||||
5173 | else if (Ty == LLT::fixed_vector(4, S16)) | ||||||
5174 | Opc = AArch64::LD2Twov4h; | ||||||
5175 | else if (Ty == LLT::fixed_vector(8, S16)) | ||||||
5176 | Opc = AArch64::LD2Twov8h; | ||||||
5177 | else if (Ty == LLT::fixed_vector(2, S32)) | ||||||
5178 | Opc = AArch64::LD2Twov2s; | ||||||
5179 | else if (Ty == LLT::fixed_vector(4, S32)) | ||||||
5180 | Opc = AArch64::LD2Twov4s; | ||||||
5181 | else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) | ||||||
5182 | Opc = AArch64::LD2Twov2d; | ||||||
5183 | else if (Ty == S64 || Ty == P0) | ||||||
5184 | Opc = AArch64::LD1Twov1d; | ||||||
5185 | else | ||||||
5186 | llvm_unreachable("Unexpected type for ld2!")::llvm::llvm_unreachable_internal("Unexpected type for ld2!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5186); | ||||||
5187 | selectVectorLoadIntrinsic(Opc, 2, I); | ||||||
5188 | break; | ||||||
5189 | } | ||||||
5190 | case Intrinsic::aarch64_neon_ld4: { | ||||||
5191 | LLT Ty = MRI.getType(I.getOperand(0).getReg()); | ||||||
5192 | unsigned Opc = 0; | ||||||
5193 | if (Ty == LLT::fixed_vector(8, S8)) | ||||||
5194 | Opc = AArch64::LD4Fourv8b; | ||||||
5195 | else if (Ty == LLT::fixed_vector(16, S8)) | ||||||
5196 | Opc = AArch64::LD4Fourv16b; | ||||||
5197 | else if (Ty == LLT::fixed_vector(4, S16)) | ||||||
5198 | Opc = AArch64::LD4Fourv4h; | ||||||
5199 | else if (Ty == LLT::fixed_vector(8, S16)) | ||||||
5200 | Opc = AArch64::LD4Fourv8h; | ||||||
5201 | else if (Ty == LLT::fixed_vector(2, S32)) | ||||||
5202 | Opc = AArch64::LD4Fourv2s; | ||||||
5203 | else if (Ty == LLT::fixed_vector(4, S32)) | ||||||
5204 | Opc = AArch64::LD4Fourv4s; | ||||||
5205 | else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) | ||||||
5206 | Opc = AArch64::LD4Fourv2d; | ||||||
5207 | else if (Ty == S64 || Ty == P0) | ||||||
5208 | Opc = AArch64::LD1Fourv1d; | ||||||
5209 | else | ||||||
5210 | llvm_unreachable("Unexpected type for ld4!")::llvm::llvm_unreachable_internal("Unexpected type for ld4!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5210); | ||||||
5211 | selectVectorLoadIntrinsic(Opc, 4, I); | ||||||
5212 | break; | ||||||
5213 | } | ||||||
5214 | case Intrinsic::aarch64_neon_st2: { | ||||||
5215 | Register Src1 = I.getOperand(1).getReg(); | ||||||
5216 | Register Src2 = I.getOperand(2).getReg(); | ||||||
5217 | Register Ptr = I.getOperand(3).getReg(); | ||||||
5218 | LLT Ty = MRI.getType(Src1); | ||||||
5219 | unsigned Opc; | ||||||
5220 | if (Ty == LLT::fixed_vector(8, S8)) | ||||||
5221 | Opc = AArch64::ST2Twov8b; | ||||||
5222 | else if (Ty == LLT::fixed_vector(16, S8)) | ||||||
5223 | Opc = AArch64::ST2Twov16b; | ||||||
5224 | else if (Ty == LLT::fixed_vector(4, S16)) | ||||||
5225 | Opc = AArch64::ST2Twov4h; | ||||||
5226 | else if (Ty == LLT::fixed_vector(8, S16)) | ||||||
5227 | Opc = AArch64::ST2Twov8h; | ||||||
5228 | else if (Ty == LLT::fixed_vector(2, S32)) | ||||||
5229 | Opc = AArch64::ST2Twov2s; | ||||||
5230 | else if (Ty == LLT::fixed_vector(4, S32)) | ||||||
5231 | Opc = AArch64::ST2Twov4s; | ||||||
5232 | else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) | ||||||
5233 | Opc = AArch64::ST2Twov2d; | ||||||
5234 | else if (Ty == S64 || Ty == P0) | ||||||
5235 | Opc = AArch64::ST1Twov1d; | ||||||
5236 | else | ||||||
5237 | llvm_unreachable("Unexpected type for st2!")::llvm::llvm_unreachable_internal("Unexpected type for st2!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5237); | ||||||
5238 | SmallVector<Register, 2> Regs = {Src1, Src2}; | ||||||
5239 | Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB) | ||||||
5240 | : createDTuple(Regs, MIB); | ||||||
5241 | auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr}); | ||||||
5242 | Store.cloneMemRefs(I); | ||||||
5243 | constrainSelectedInstRegOperands(*Store, TII, TRI, RBI); | ||||||
5244 | break; | ||||||
5245 | } | ||||||
5246 | } | ||||||
5247 | |||||||
5248 | I.eraseFromParent(); | ||||||
5249 | return true; | ||||||
5250 | } | ||||||
5251 | |||||||
5252 | bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, | ||||||
5253 | MachineRegisterInfo &MRI) { | ||||||
5254 | unsigned IntrinID = findIntrinsicID(I); | ||||||
5255 | if (!IntrinID) | ||||||
5256 | return false; | ||||||
5257 | |||||||
5258 | switch (IntrinID) { | ||||||
5259 | default: | ||||||
5260 | break; | ||||||
5261 | case Intrinsic::aarch64_crypto_sha1h: { | ||||||
5262 | Register DstReg = I.getOperand(0).getReg(); | ||||||
5263 | Register SrcReg = I.getOperand(2).getReg(); | ||||||
5264 | |||||||
5265 | // FIXME: Should this be an assert? | ||||||
5266 | if (MRI.getType(DstReg).getSizeInBits() != 32 || | ||||||
5267 | MRI.getType(SrcReg).getSizeInBits() != 32) | ||||||
5268 | return false; | ||||||
5269 | |||||||
5270 | // The operation has to happen on FPRs. Set up some new FPR registers for | ||||||
5271 | // the source and destination if they are on GPRs. | ||||||
5272 | if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { | ||||||
5273 | SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); | ||||||
5274 | MIB.buildCopy({SrcReg}, {I.getOperand(2)}); | ||||||
5275 | |||||||
5276 | // Make sure the copy ends up getting constrained properly. | ||||||
5277 | RBI.constrainGenericRegister(I.getOperand(2).getReg(), | ||||||
5278 | AArch64::GPR32RegClass, MRI); | ||||||
5279 | } | ||||||
5280 | |||||||
5281 | if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) | ||||||
5282 | DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); | ||||||
5283 | |||||||
5284 | // Actually insert the instruction. | ||||||
5285 | auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg}); | ||||||
5286 | constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI); | ||||||
5287 | |||||||
5288 | // Did we create a new register for the destination? | ||||||
5289 | if (DstReg != I.getOperand(0).getReg()) { | ||||||
5290 | // Yep. Copy the result of the instruction back into the original | ||||||
5291 | // destination. | ||||||
5292 | MIB.buildCopy({I.getOperand(0)}, {DstReg}); | ||||||
5293 | RBI.constrainGenericRegister(I.getOperand(0).getReg(), | ||||||
5294 | AArch64::GPR32RegClass, MRI); | ||||||
5295 | } | ||||||
5296 | |||||||
5297 | I.eraseFromParent(); | ||||||
5298 | return true; | ||||||
5299 | } | ||||||
5300 | case Intrinsic::frameaddress: | ||||||
5301 | case Intrinsic::returnaddress: { | ||||||
5302 | MachineFunction &MF = *I.getParent()->getParent(); | ||||||
5303 | MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||||
5304 | |||||||
5305 | unsigned Depth = I.getOperand(2).getImm(); | ||||||
5306 | Register DstReg = I.getOperand(0).getReg(); | ||||||
5307 | RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); | ||||||
5308 | |||||||
5309 | if (Depth == 0 && IntrinID == Intrinsic::returnaddress) { | ||||||
5310 | if (!MFReturnAddr) { | ||||||
5311 | // Insert the copy from LR/X30 into the entry block, before it can be | ||||||
5312 | // clobbered by anything. | ||||||
5313 | MFI.setReturnAddressIsTaken(true); | ||||||
5314 | MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR, | ||||||
5315 | AArch64::GPR64RegClass); | ||||||
5316 | } | ||||||
5317 | |||||||
5318 | if (STI.hasPAuth()) { | ||||||
5319 | MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr}); | ||||||
5320 | } else { | ||||||
5321 | MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr}); | ||||||
5322 | MIB.buildInstr(AArch64::XPACLRI); | ||||||
5323 | MIB.buildCopy({DstReg}, {Register(AArch64::LR)}); | ||||||
5324 | } | ||||||
5325 | |||||||
5326 | I.eraseFromParent(); | ||||||
5327 | return true; | ||||||
5328 | } | ||||||
5329 | |||||||
5330 | MFI.setFrameAddressIsTaken(true); | ||||||
5331 | Register FrameAddr(AArch64::FP); | ||||||
5332 | while (Depth--) { | ||||||
5333 | Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); | ||||||
5334 | auto Ldr = | ||||||
5335 | MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0); | ||||||
5336 | constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI); | ||||||
5337 | FrameAddr = NextFrame; | ||||||
5338 | } | ||||||
5339 | |||||||
5340 | if (IntrinID == Intrinsic::frameaddress) | ||||||
5341 | MIB.buildCopy({DstReg}, {FrameAddr}); | ||||||
5342 | else { | ||||||
5343 | MFI.setReturnAddressIsTaken(true); | ||||||
5344 | |||||||
5345 | if (STI.hasPAuth()) { | ||||||
5346 | Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); | ||||||
5347 | MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1); | ||||||
5348 | MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg}); | ||||||
5349 | } else { | ||||||
5350 | MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}) | ||||||
5351 | .addImm(1); | ||||||
5352 | MIB.buildInstr(AArch64::XPACLRI); | ||||||
5353 | MIB.buildCopy({DstReg}, {Register(AArch64::LR)}); | ||||||
5354 | } | ||||||
5355 | } | ||||||
5356 | |||||||
5357 | I.eraseFromParent(); | ||||||
5358 | return true; | ||||||
5359 | } | ||||||
5360 | case Intrinsic::swift_async_context_addr: | ||||||
5361 | auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()}, | ||||||
5362 | {Register(AArch64::FP)}) | ||||||
5363 | .addImm(8) | ||||||
5364 | .addImm(0); | ||||||
5365 | constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI); | ||||||
5366 | |||||||
5367 | MF->getFrameInfo().setFrameAddressIsTaken(true); | ||||||
5368 | MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); | ||||||
5369 | I.eraseFromParent(); | ||||||
5370 | return true; | ||||||
5371 | } | ||||||
5372 | return false; | ||||||
5373 | } | ||||||
5374 | |||||||
5375 | InstructionSelector::ComplexRendererFns | ||||||
5376 | AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { | ||||||
5377 | auto MaybeImmed = getImmedFromMO(Root); | ||||||
5378 | if (MaybeImmed == None || *MaybeImmed > 31) | ||||||
5379 | return None; | ||||||
5380 | uint64_t Enc = (32 - *MaybeImmed) & 0x1f; | ||||||
5381 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; | ||||||
5382 | } | ||||||
5383 | |||||||
5384 | InstructionSelector::ComplexRendererFns | ||||||
5385 | AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const { | ||||||
5386 | auto MaybeImmed = getImmedFromMO(Root); | ||||||
5387 | if (MaybeImmed == None || *MaybeImmed > 31) | ||||||
5388 | return None; | ||||||
5389 | uint64_t Enc = 31 - *MaybeImmed; | ||||||
5390 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; | ||||||
5391 | } | ||||||
5392 | |||||||
5393 | InstructionSelector::ComplexRendererFns | ||||||
5394 | AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const { | ||||||
5395 | auto MaybeImmed = getImmedFromMO(Root); | ||||||
5396 | if (MaybeImmed == None || *MaybeImmed > 63) | ||||||
5397 | return None; | ||||||
5398 | uint64_t Enc = (64 - *MaybeImmed) & 0x3f; | ||||||
5399 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; | ||||||
5400 | } | ||||||
5401 | |||||||
5402 | InstructionSelector::ComplexRendererFns | ||||||
5403 | AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { | ||||||
5404 | auto MaybeImmed = getImmedFromMO(Root); | ||||||
5405 | if (MaybeImmed == None || *MaybeImmed > 63) | ||||||
5406 | return None; | ||||||
5407 | uint64_t Enc = 63 - *MaybeImmed; | ||||||
5408 | return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; | ||||||
5409 | } | ||||||
5410 | |||||||
5411 | /// Helper to select an immediate value that can be represented as a 12-bit | ||||||
5412 | /// value shifted left by either 0 or 12. If it is possible to do so, return | ||||||
5413 | /// the immediate and shift value. If not, return None. | ||||||
5414 | /// | ||||||
5415 | /// Used by selectArithImmed and selectNegArithImmed. | ||||||
5416 | InstructionSelector::ComplexRendererFns | ||||||
5417 | AArch64InstructionSelector::select12BitValueWithLeftShift( | ||||||
5418 | uint64_t Immed) const { | ||||||
5419 | unsigned ShiftAmt; | ||||||
5420 | if (Immed >> 12 == 0) { | ||||||
5421 | ShiftAmt = 0; | ||||||
5422 | } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { | ||||||
5423 | ShiftAmt = 12; | ||||||
5424 | Immed = Immed >> 12; | ||||||
5425 | } else | ||||||
5426 | return None; | ||||||
5427 | |||||||
5428 | unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); | ||||||
5429 | return {{ | ||||||
5430 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); }, | ||||||
5431 | [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); }, | ||||||
5432 | }}; | ||||||
5433 | } | ||||||
5434 | |||||||
5435 | /// SelectArithImmed - Select an immediate value that can be represented as | ||||||
5436 | /// a 12-bit value shifted left by either 0 or 12. If so, return true with | ||||||
5437 | /// Val set to the 12-bit value and Shift set to the shifter operand. | ||||||
5438 | InstructionSelector::ComplexRendererFns | ||||||
5439 | AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { | ||||||
5440 | // This function is called from the addsub_shifted_imm ComplexPattern, | ||||||
5441 | // which lists [imm] as the list of opcode it's interested in, however | ||||||
5442 | // we still need to check whether the operand is actually an immediate | ||||||
5443 | // here because the ComplexPattern opcode list is only used in | ||||||
5444 | // root-level opcode matching. | ||||||
5445 | auto MaybeImmed = getImmedFromMO(Root); | ||||||
5446 | if (MaybeImmed == None) | ||||||
5447 | return None; | ||||||
5448 | return select12BitValueWithLeftShift(*MaybeImmed); | ||||||
5449 | } | ||||||
5450 | |||||||
5451 | /// SelectNegArithImmed - As above, but negates the value before trying to | ||||||
5452 | /// select it. | ||||||
5453 | InstructionSelector::ComplexRendererFns | ||||||
5454 | AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { | ||||||
5455 | // We need a register here, because we need to know if we have a 64 or 32 | ||||||
5456 | // bit immediate. | ||||||
5457 | if (!Root.isReg()) | ||||||
5458 | return None; | ||||||
5459 | auto MaybeImmed = getImmedFromMO(Root); | ||||||
5460 | if (MaybeImmed == None) | ||||||
5461 | return None; | ||||||
5462 | uint64_t Immed = *MaybeImmed; | ||||||
5463 | |||||||
5464 | // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" | ||||||
5465 | // have the opposite effect on the C flag, so this pattern mustn't match under | ||||||
5466 | // those circumstances. | ||||||
5467 | if (Immed == 0) | ||||||
5468 | return None; | ||||||
5469 | |||||||
5470 | // Check if we're dealing with a 32-bit type on the root or a 64-bit type on | ||||||
5471 | // the root. | ||||||
5472 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | ||||||
5473 | if (MRI.getType(Root.getReg()).getSizeInBits() == 32) | ||||||
5474 | Immed = ~((uint32_t)Immed) + 1; | ||||||
5475 | else | ||||||
5476 | Immed = ~Immed + 1ULL; | ||||||
5477 | |||||||
5478 | if (Immed & 0xFFFFFFFFFF000000ULL) | ||||||
5479 | return None; | ||||||
5480 | |||||||
5481 | Immed &= 0xFFFFFFULL; | ||||||
5482 | return select12BitValueWithLeftShift(Immed); | ||||||
5483 | } | ||||||
5484 | |||||||
5485 | /// Return true if it is worth folding MI into an extended register. That is, | ||||||
5486 | /// if it's safe to pull it into the addressing mode of a load or store as a | ||||||
5487 | /// shift. | ||||||
5488 | bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( | ||||||
5489 | MachineInstr &MI, const MachineRegisterInfo &MRI) const { | ||||||
5490 | // Always fold if there is one use, or if we're optimizing for size. | ||||||
5491 | Register DefReg = MI.getOperand(0).getReg(); | ||||||
5492 | if (MRI.hasOneNonDBGUse(DefReg) || | ||||||
5493 | MI.getParent()->getParent()->getFunction().hasOptSize()) | ||||||
5494 | return true; | ||||||
5495 | |||||||
5496 | // It's better to avoid folding and recomputing shifts when we don't have a | ||||||
5497 | // fastpath. | ||||||
5498 | if (!STI.hasLSLFast()) | ||||||
5499 | return false; | ||||||
5500 | |||||||
5501 | // We have a fastpath, so folding a shift in and potentially computing it | ||||||
5502 | // many times may be beneficial. Check if this is only used in memory ops. | ||||||
5503 | // If it is, then we should fold. | ||||||
5504 | return all_of(MRI.use_nodbg_instructions(DefReg), | ||||||
5505 | [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); | ||||||
5506 | } | ||||||
5507 | |||||||
5508 | static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) { | ||||||
5509 | switch (Type) { | ||||||
5510 | case AArch64_AM::SXTB: | ||||||
5511 | case AArch64_AM::SXTH: | ||||||
5512 | case AArch64_AM::SXTW: | ||||||
5513 | return true; | ||||||
5514 | default: | ||||||
5515 | return false; | ||||||
5516 | } | ||||||
5517 | } | ||||||
5518 | |||||||
5519 | InstructionSelector::ComplexRendererFns | ||||||
5520 | AArch64InstructionSelector::selectExtendedSHL( | ||||||
5521 | MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, | ||||||
5522 | unsigned SizeInBytes, bool WantsExt) const { | ||||||
5523 | assert(Base.isReg() && "Expected base to be a register operand")(static_cast <bool> (Base.isReg() && "Expected base to be a register operand" ) ? void (0) : __assert_fail ("Base.isReg() && \"Expected base to be a register operand\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5523, __extension__ __PRETTY_FUNCTION__)); | ||||||
5524 | assert(Offset.isReg() && "Expected offset to be a register operand")(static_cast <bool> (Offset.isReg() && "Expected offset to be a register operand" ) ? void (0) : __assert_fail ("Offset.isReg() && \"Expected offset to be a register operand\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 5524, __extension__ __PRETTY_FUNCTION__)); | ||||||
5525 | |||||||
5526 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | ||||||
5527 | MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg()); | ||||||
5528 | if (!OffsetInst) | ||||||
5529 | return None; | ||||||
5530 | |||||||
5531 | unsigned OffsetOpc = OffsetInst->getOpcode(); | ||||||
5532 | bool LookedThroughZExt = false; | ||||||
5533 | if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) { | ||||||
5534 | // Try to look through a ZEXT. | ||||||
5535 | if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt) | ||||||
5536 | return None; | ||||||
5537 | |||||||
5538 | OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg()); | ||||||
5539 | OffsetOpc = OffsetInst->getOpcode(); | ||||||
5540 | LookedThroughZExt = true; | ||||||
5541 | |||||||
5542 | if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) | ||||||
5543 | return None; | ||||||
5544 | } | ||||||
5545 | // Make sure that the memory op is a valid size. | ||||||
5546 | int64_t LegalShiftVal = Log2_32(SizeInBytes); | ||||||
5547 | if (LegalShiftVal == 0) | ||||||
5548 | return None; | ||||||
5549 | if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) | ||||||
5550 | return None; | ||||||
5551 | |||||||
5552 | // Now, try to find the specific G_CONSTANT. Start by assuming that the | ||||||
5553 | // register we will offset is the LHS, and the register containing the | ||||||
5554 | // constant is the RHS. | ||||||
5555 | Register OffsetReg = OffsetInst->getOperand(1).getReg(); | ||||||
5556 | Register ConstantReg = OffsetInst->getOperand(2).getReg(); | ||||||
5557 | auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); | ||||||
5558 | if (!ValAndVReg) { | ||||||
5559 | // We didn't get a constant on the RHS. If the opcode is a shift, then | ||||||
5560 | // we're done. | ||||||
5561 | if (OffsetOpc == TargetOpcode::G_SHL) | ||||||
5562 | return None; | ||||||
5563 | |||||||
5564 | // If we have a G_MUL, we can use either register. Try looking at the RHS. | ||||||
5565 | std::swap(OffsetReg, ConstantReg); | ||||||
5566 | ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); | ||||||
5567 | if (!ValAndVReg) | ||||||
5568 | return None; | ||||||
5569 | } | ||||||
5570 | |||||||
5571 | // The value must fit into 3 bits, and must be positive. Make sure that is | ||||||
5572 | // true. | ||||||
5573 | int64_t ImmVal = ValAndVReg->Value.getSExtValue(); | ||||||
5574 | |||||||
5575 | // Since we're going to pull this into a shift, the constant value must be | ||||||
5576 | // a power of 2. If we got a multiply, then we need to check this. | ||||||
5577 | if (OffsetOpc == TargetOpcode::G_MUL) { | ||||||
5578 | if (!isPowerOf2_32(ImmVal)) | ||||||
5579 | return None; | ||||||
5580 | |||||||
5581 | // Got a power of 2. So, the amount we'll shift is the log base-2 of that. | ||||||
5582 | ImmVal = Log2_32(ImmVal); | ||||||
5583 | } | ||||||
5584 | |||||||
5585 | if ((ImmVal & 0x7) != ImmVal) | ||||||
5586 | return None; | ||||||
5587 | |||||||
5588 | // We are only allowed to shift by LegalShiftVal. This shift value is built | ||||||
5589 | // into the instruction, so we can't just use whatever we want. | ||||||
5590 | if (ImmVal != LegalShiftVal) | ||||||
5591 | return None; | ||||||
5592 | |||||||
5593 | unsigned SignExtend = 0; | ||||||
5594 | if (WantsExt) { | ||||||
5595 | // Check if the offset is defined by an extend, unless we looked through a | ||||||
5596 | // G_ZEXT earlier. | ||||||
5597 | if (!LookedThroughZExt) { | ||||||
5598 | MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI); | ||||||
5599 | auto Ext = getExtendTypeForInst(*ExtInst, MRI, true); | ||||||
5600 | if (Ext == AArch64_AM::InvalidShiftExtend) | ||||||
5601 | return None; | ||||||
5602 | |||||||
5603 | SignExtend = isSignExtendShiftType(Ext) ? 1 : 0; | ||||||
5604 | // We only support SXTW for signed extension here. | ||||||
5605 | if (SignExtend && Ext != AArch64_AM::SXTW) | ||||||
5606 | return None; | ||||||
5607 | OffsetReg = ExtInst->getOperand(1).getReg(); | ||||||
5608 | } | ||||||
5609 | |||||||
5610 | // Need a 32-bit wide register here. | ||||||
5611 | MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg())); | ||||||
5612 | OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB); | ||||||
5613 | } | ||||||
5614 | |||||||
5615 | // We can use the LHS of the GEP as the base, and the LHS of the shift as an | ||||||
5616 | // offset. Signify that we are shifting by setting the shift flag to 1. | ||||||
5617 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); }, | ||||||
5618 | [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, | ||||||
5619 | [=](MachineInstrBuilder &MIB) { | ||||||
5620 | // Need to add both immediates here to make sure that they are both | ||||||
5621 | // added to the instruction. | ||||||
5622 | MIB.addImm(SignExtend); | ||||||
5623 | MIB.addImm(1); | ||||||
5624 | }}}; | ||||||
5625 | } | ||||||
5626 | |||||||
5627 | /// This is used for computing addresses like this: | ||||||
5628 | /// | ||||||
5629 | /// ldr x1, [x2, x3, lsl #3] | ||||||
5630 | /// | ||||||
5631 | /// Where x2 is the base register, and x3 is an offset register. The shift-left | ||||||
5632 | /// is a constant value specific to this load instruction. That is, we'll never | ||||||
5633 | /// see anything other than a 3 here (which corresponds to the size of the | ||||||
5634 | /// element being loaded.) | ||||||
5635 | InstructionSelector::ComplexRendererFns | ||||||
5636 | AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( | ||||||
5637 | MachineOperand &Root, unsigned SizeInBytes) const { | ||||||
5638 | if (!Root.isReg()) | ||||||
5639 | return None; | ||||||
5640 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | ||||||
5641 | |||||||
5642 | // We want to find something like this: | ||||||
5643 | // | ||||||
5644 | // val = G_CONSTANT LegalShiftVal | ||||||
5645 | // shift = G_SHL off_reg val | ||||||
5646 | // ptr = G_PTR_ADD base_reg shift | ||||||
5647 | // x = G_LOAD ptr | ||||||
5648 | // | ||||||
5649 | // And fold it into this addressing mode: | ||||||
5650 | // | ||||||
5651 | // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] | ||||||
5652 | |||||||
5653 | // Check if we can find the G_PTR_ADD. | ||||||
5654 | MachineInstr *PtrAdd = | ||||||
5655 | getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); | ||||||
5656 | if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) | ||||||
5657 | return None; | ||||||
5658 | |||||||
5659 | // Now, try to match an opcode which will match our specific offset. | ||||||
5660 | // We want a G_SHL or a G_MUL. | ||||||
5661 | MachineInstr *OffsetInst = | ||||||
5662 | getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI); | ||||||
5663 | return selectExtendedSHL(Root, PtrAdd->getOperand(1), | ||||||
5664 | OffsetInst->getOperand(0), SizeInBytes, | ||||||
5665 | /*WantsExt=*/false); | ||||||
5666 | } | ||||||
5667 | |||||||
5668 | /// This is used for computing addresses like this: | ||||||
5669 | /// | ||||||
5670 | /// ldr x1, [x2, x3] | ||||||
5671 | /// | ||||||
5672 | /// Where x2 is the base register, and x3 is an offset register. | ||||||
5673 | /// | ||||||
5674 | /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation, | ||||||
5675 | /// this will do so. Otherwise, it will return None. | ||||||
5676 | InstructionSelector::ComplexRendererFns | ||||||
5677 | AArch64InstructionSelector::selectAddrModeRegisterOffset( | ||||||
5678 | MachineOperand &Root) const { | ||||||
5679 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | ||||||
5680 | |||||||
5681 | // We need a GEP. | ||||||
5682 | MachineInstr *Gep = MRI.getVRegDef(Root.getReg()); | ||||||
5683 | if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD) | ||||||
5684 | return None; | ||||||
5685 | |||||||
5686 | // If this is used more than once, let's not bother folding. | ||||||
5687 | // TODO: Check if they are memory ops. If they are, then we can still fold | ||||||
5688 | // without having to recompute anything. | ||||||
5689 | if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg())) | ||||||
5690 | return None; | ||||||
5691 | |||||||
5692 | // Base is the GEP's LHS, offset is its RHS. | ||||||
5693 | return {{[=](MachineInstrBuilder &MIB) { | ||||||
5694 | MIB.addUse(Gep->getOperand(1).getReg()); | ||||||
5695 | }, | ||||||
5696 | [=](MachineInstrBuilder &MIB) { | ||||||
5697 | MIB.addUse(Gep->getOperand(2).getReg()); | ||||||
5698 | }, | ||||||
5699 | [=](MachineInstrBuilder &MIB) { | ||||||
5700 | // Need to add both immediates here to make sure that they are both | ||||||
5701 | // added to the instruction. | ||||||
5702 | MIB.addImm(0); | ||||||
5703 | MIB.addImm(0); | ||||||
5704 | }}}; | ||||||
5705 | } | ||||||
5706 | |||||||
5707 | /// This is intended to be equivalent to selectAddrModeXRO in | ||||||
5708 | /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. | ||||||
5709 | InstructionSelector::ComplexRendererFns | ||||||
5710 | AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, | ||||||
5711 | unsigned SizeInBytes) const { | ||||||
5712 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | ||||||
5713 | if (!Root.isReg()) | ||||||
5714 | return None; | ||||||
5715 | MachineInstr *PtrAdd = | ||||||
5716 | getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); | ||||||
5717 | if (!PtrAdd) | ||||||
5718 | return None; | ||||||
5719 | |||||||
5720 | // Check for an immediates which cannot be encoded in the [base + imm] | ||||||
5721 | // addressing mode, and can't be encoded in an add/sub. If this happens, we'll | ||||||
5722 | // end up with code like: | ||||||
5723 | // | ||||||
5724 | // mov x0, wide | ||||||
5725 | // add x1 base, x0 | ||||||
5726 | // ldr x2, [x1, x0] | ||||||
5727 | // | ||||||
5728 | // In this situation, we can use the [base, xreg] addressing mode to save an | ||||||
5729 | // add/sub: | ||||||
5730 | // | ||||||
5731 | // mov x0, wide | ||||||
5732 | // ldr x2, [base, x0] | ||||||
5733 | auto ValAndVReg = | ||||||
5734 | getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI); | ||||||
5735 | if (ValAndVReg) { | ||||||
5736 | unsigned Scale = Log2_32(SizeInBytes); | ||||||
5737 | int64_t ImmOff = ValAndVReg->Value.getSExtValue(); | ||||||
5738 | |||||||
5739 | // Skip immediates that can be selected in the load/store addresing | ||||||
5740 | // mode. | ||||||
5741 | if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && | ||||||
5742 | ImmOff < (0x1000 << Scale)) | ||||||
5743 | return None; | ||||||
5744 | |||||||
5745 | // Helper lambda to decide whether or not it is preferable to emit an add. | ||||||
5746 | auto isPreferredADD = [](int64_t ImmOff) { | ||||||
5747 | // Constants in [0x0, 0xfff] can be encoded in an add. | ||||||
5748 | if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) | ||||||
5749 | return true; | ||||||
5750 | |||||||
5751 | // Can it be encoded in an add lsl #12? | ||||||
5752 | if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL) | ||||||
5753 | return false; | ||||||
5754 | |||||||
5755 | // It can be encoded in an add lsl #12, but we may not want to. If it is | ||||||
5756 | // possible to select this as a single movz, then prefer that. A single | ||||||
5757 | // movz is faster than an add with a shift. | ||||||
5758 | return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && | ||||||
5759 | (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; | ||||||
5760 | }; | ||||||
5761 | |||||||
5762 | // If the immediate can be encoded in a single add/sub, then bail out. | ||||||
5763 | if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) | ||||||
5764 | return None; | ||||||
5765 | } | ||||||
5766 | |||||||
5767 | // Try to fold shifts into the addressing mode. | ||||||
5768 | auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); | ||||||
5769 | if (AddrModeFns) | ||||||
5770 | return AddrModeFns; | ||||||
5771 | |||||||
5772 | // If that doesn't work, see if it's possible to fold in registers from | ||||||
5773 | // a GEP. | ||||||
5774 | return selectAddrModeRegisterOffset(Root); | ||||||
5775 | } | ||||||
5776 | |||||||
5777 | /// This is used for computing addresses like this: | ||||||
5778 | /// | ||||||
5779 | /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal] | ||||||
5780 | /// | ||||||
5781 | /// Where we have a 64-bit base register, a 32-bit offset register, and an | ||||||
5782 | /// extend (which may or may not be signed). | ||||||
5783 | InstructionSelector::ComplexRendererFns | ||||||
5784 | AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, | ||||||
5785 | unsigned SizeInBytes) const { | ||||||
5786 | MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); | ||||||
5787 | |||||||
5788 | MachineInstr *PtrAdd = | ||||||
5789 | getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); | ||||||
5790 | if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) | ||||||
5791 | return None; | ||||||
5792 | |||||||
5793 | MachineOperand &LHS = PtrAdd->getOperand(1); | ||||||
5794 | MachineOperand &RHS = PtrAdd->getOperand(2); | ||||||
5795 | MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI); | ||||||
5796 | |||||||
5797 | // The first case is the same as selectAddrModeXRO, except we need an extend. | ||||||
5798 | // In this case, we try to find a shift and extend, and fold them into the | ||||||
5799 | // addressing mode. | ||||||
5800 | // | ||||||
5801 | // E.g. | ||||||
5802 | // | ||||||
5803 | // off_reg = G_Z/S/ANYEXT ext_reg | ||||||
5804 | // val = G_CONSTANT LegalShiftVal | ||||||
5805 | // shift = G_SHL off_reg val | ||||||
5806 | // ptr = G_PTR_ADD base_reg shift | ||||||
5807 | // x = G_LOAD ptr | ||||||
5808 | // | ||||||
5809 | // In this case we can get a load like this: | ||||||
5810 | // | ||||||
5811 | // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal] | ||||||
5812 | auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0), | ||||||
5813 | SizeInBytes, /*WantsExt=*/true); | ||||||
5814 | if (ExtendedShl) | ||||||
5815 | return ExtendedShl; | ||||||
5816 | |||||||
5817 | // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though. | ||||||
5818 | // | ||||||
5819 | // e.g. | ||||||
5820 | // ldr something, [base_reg, ext_reg, sxtw] | ||||||
5821 | if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) | ||||||
5822 | return None; | ||||||
5823 | |||||||
5824 | // Check if this is an extend. We'll get an extend type if it is. | ||||||
5825 | AArch64_AM::ShiftExtendType Ext = | ||||||
5826 | getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true); | ||||||
5827 | if (Ext == AArch64_AM::InvalidShiftExtend) | ||||||
5828 | return None; | ||||||
5829 | |||||||
5830 | // Need a 32-bit wide register. | ||||||
5831 | MachineIRBuilder MIB(*PtrAdd); | ||||||
5832 | Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(), | ||||||
5833 | AArch64::GPR32RegClass, MIB); | ||||||
5834 | unsigned SignExtend = Ext == AArch64_AM::SXTW; | ||||||
5835 | |||||||
5836 | // Base is LHS, offset is ExtReg. | ||||||
5837 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); }, | ||||||
5838 | [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, | ||||||
5839 | [=](MachineInstrBuilder &MIB) { | ||||||
5840 | MIB.addImm(SignExtend); | ||||||
5841 | MIB.addImm(0); | ||||||
5842 | }}}; | ||||||
5843 | } | ||||||
5844 | |||||||
5845 | /// Select a "register plus unscaled signed 9-bit immediate" address. This | ||||||
5846 | /// should only match when there is an offset that is not valid for a scaled | ||||||
5847 | /// immediate addressing mode. The "Size" argument is the size in bytes of the | ||||||
5848 | /// memory reference, which is needed here to know what is valid for a scaled | ||||||
5849 | /// immediate. | ||||||
5850 | InstructionSelector::ComplexRendererFns | ||||||
5851 | AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, | ||||||
5852 | unsigned Size) const { | ||||||
5853 | MachineRegisterInfo &MRI = | ||||||
5854 | Root.getParent()->getParent()->getParent()->getRegInfo(); | ||||||
5855 | |||||||
5856 | if (!Root.isReg()) | ||||||
5857 | return None; | ||||||
5858 | |||||||
5859 | if (!isBaseWithConstantOffset(Root, MRI)) | ||||||
5860 | return None; | ||||||
5861 | |||||||
5862 | MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); | ||||||
5863 | if (!RootDef) | ||||||
5864 | return None; | ||||||
5865 | |||||||
5866 | MachineOperand &OffImm = RootDef->getOperand(2); | ||||||
5867 | if (!OffImm.isReg()) | ||||||
5868 | return None; | ||||||
5869 | MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); | ||||||
5870 | if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT) | ||||||
5871 | return None; | ||||||
5872 | int64_t RHSC; | ||||||
5873 | MachineOperand &RHSOp1 = RHS->getOperand(1); | ||||||
5874 | if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) | ||||||
5875 | return None; | ||||||
5876 | RHSC = RHSOp1.getCImm()->getSExtValue(); | ||||||
5877 | |||||||
5878 | // If the offset is valid as a scaled immediate, don't match here. | ||||||
5879 | if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) | ||||||
5880 | return None; | ||||||
5881 | if (RHSC >= -256 && RHSC < 256) { | ||||||
5882 | MachineOperand &Base = RootDef->getOperand(1); | ||||||
5883 | return {{ | ||||||
5884 | [=](MachineInstrBuilder &MIB) { MIB.add(Base); }, | ||||||
5885 | [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, | ||||||
5886 | }}; | ||||||
5887 | } | ||||||
5888 | return None; | ||||||
5889 | } | ||||||
5890 | |||||||
5891 | InstructionSelector::ComplexRendererFns | ||||||
5892 | AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, | ||||||
5893 | unsigned Size, | ||||||
5894 | MachineRegisterInfo &MRI) const { | ||||||
5895 | if (RootDef.getOpcode() != AArch64::G_ADD_LOW) | ||||||
5896 | return None; | ||||||
5897 | MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg()); | ||||||
5898 | if (Adrp.getOpcode() != AArch64::ADRP) | ||||||
5899 | return None; | ||||||
5900 | |||||||
5901 | // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. | ||||||
5902 | auto Offset = Adrp.getOperand(1).getOffset(); | ||||||
5903 | if (Offset % Size != 0) | ||||||
5904 | return None; | ||||||
5905 | |||||||
5906 | auto GV = Adrp.getOperand(1).getGlobal(); | ||||||
5907 | if (GV->isThreadLocal()) | ||||||
5908 | return None; | ||||||
5909 | |||||||
5910 | auto &MF = *RootDef.getParent()->getParent(); | ||||||
5911 | if (GV->getPointerAlignment(MF.getDataLayout()) < Size) | ||||||
5912 | return None; | ||||||
5913 | |||||||
5914 | unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget()); | ||||||
5915 | MachineIRBuilder MIRBuilder(RootDef); | ||||||
5916 | Register AdrpReg = Adrp.getOperand(0).getReg(); | ||||||
5917 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); }, | ||||||
5918 | [=](MachineInstrBuilder &MIB) { | ||||||
5919 | MIB.addGlobalAddress(GV, Offset, | ||||||
5920 | OpFlags | AArch64II::MO_PAGEOFF | | ||||||
5921 | AArch64II::MO_NC); | ||||||
5922 | }}}; | ||||||
5923 | } | ||||||
5924 | |||||||
5925 | /// Select a "register plus scaled unsigned 12-bit immediate" address. The | ||||||
5926 | /// "Size" argument is the size in bytes of the memory reference, which | ||||||
5927 | /// determines the scale. | ||||||
5928 | InstructionSelector::ComplexRendererFns | ||||||
5929 | AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, | ||||||
5930 | unsigned Size) const { | ||||||
5931 | MachineFunction &MF = *Root.getParent()->getParent()->getParent(); | ||||||
5932 | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||||||
5933 | |||||||
5934 | if (!Root.isReg()) | ||||||
5935 | return None; | ||||||
5936 | |||||||
5937 | MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); | ||||||
5938 | if (!RootDef) | ||||||
5939 | return None; | ||||||
5940 | |||||||
5941 | if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { | ||||||
5942 | return {{ | ||||||
5943 | [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, | ||||||
5944 | [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, | ||||||
5945 | }}; | ||||||
5946 | } | ||||||
5947 | |||||||
5948 | CodeModel::Model CM = MF.getTarget().getCodeModel(); | ||||||
5949 | // Check if we can fold in the ADD of small code model ADRP + ADD address. | ||||||
5950 | if (CM == CodeModel::Small) { | ||||||
5951 | auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI); | ||||||
5952 | if (OpFns) | ||||||
5953 | return OpFns; | ||||||
5954 | } | ||||||
5955 | |||||||
5956 | if (isBaseWithConstantOffset(Root, MRI)) { | ||||||
5957 | MachineOperand &LHS = RootDef->getOperand(1); | ||||||
5958 | MachineOperand &RHS = RootDef->getOperand(2); | ||||||
5959 | MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); | ||||||
5960 | MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); | ||||||
5961 | if (LHSDef && RHSDef) { | ||||||
5962 | int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); | ||||||
5963 | unsigned Scale = Log2_32(Size); | ||||||
5964 | if ((RHSC & (Size - 1)) == 0 && RHSC
| ||||||
| |||||||
5965 | if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) | ||||||
5966 | return {{ | ||||||
5967 | [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, | ||||||
5968 | [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, | ||||||
5969 | }}; | ||||||
5970 | |||||||
5971 | return {{ | ||||||
5972 | [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, | ||||||
5973 | [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, | ||||||
5974 | }}; | ||||||
5975 | } | ||||||
5976 | } | ||||||
5977 | } | ||||||
5978 | |||||||
5979 | // Before falling back to our general case, check if the unscaled | ||||||
5980 | // instructions can handle this. If so, that's preferable. | ||||||
5981 | if (selectAddrModeUnscaled(Root, Size).hasValue()) | ||||||
5982 | return None; | ||||||
5983 | |||||||
5984 | return {{ | ||||||
5985 | [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, | ||||||
5986 | [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, | ||||||
5987 | }}; | ||||||
5988 | } | ||||||
5989 | |||||||
5990 | /// Given a shift instruction, return the correct shift type for that | ||||||
5991 | /// instruction. | ||||||
5992 | static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { | ||||||
5993 | // TODO: Handle AArch64_AM::ROR | ||||||
5994 | switch (MI.getOpcode()) { | ||||||
5995 | default: | ||||||
5996 | return AArch64_AM::InvalidShiftExtend; | ||||||
5997 | case TargetOpcode::G_SHL: | ||||||
5998 | return AArch64_AM::LSL; | ||||||
5999 | case TargetOpcode::G_LSHR: | ||||||
6000 | return AArch64_AM::LSR; | ||||||
6001 | case TargetOpcode::G_ASHR: | ||||||
6002 | return AArch64_AM::ASR; | ||||||
6003 | } | ||||||
6004 | } | ||||||
6005 | |||||||
6006 | /// Select a "shifted register" operand. If the value is not shifted, set the | ||||||
6007 | /// shift operand to a default value of "lsl 0". | ||||||
6008 | /// | ||||||
6009 | /// TODO: Allow shifted register to be rotated in logical instructions. | ||||||
6010 | InstructionSelector::ComplexRendererFns | ||||||
6011 | AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const { | ||||||
6012 | if (!Root.isReg()) | ||||||
6013 | return None; | ||||||
6014 | MachineRegisterInfo &MRI = | ||||||
6015 | Root.getParent()->getParent()->getParent()->getRegInfo(); | ||||||
6016 | |||||||
6017 | // Check if the operand is defined by an instruction which corresponds to | ||||||
6018 | // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. | ||||||
6019 | // | ||||||
6020 | // TODO: Handle AArch64_AM::ROR for logical instructions. | ||||||
6021 | MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg()); | ||||||
6022 | if (!ShiftInst) | ||||||
6023 | return None; | ||||||
6024 | AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst); | ||||||
6025 | if (ShType == AArch64_AM::InvalidShiftExtend) | ||||||
6026 | return None; | ||||||
6027 | if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI)) | ||||||
6028 | return None; | ||||||
6029 | |||||||
6030 | // Need an immediate on the RHS. | ||||||
6031 | MachineOperand &ShiftRHS = ShiftInst->getOperand(2); | ||||||
6032 | auto Immed = getImmedFromMO(ShiftRHS); | ||||||
6033 | if (!Immed) | ||||||
6034 | return None; | ||||||
6035 | |||||||
6036 | // We have something that we can fold. Fold in the shift's LHS and RHS into | ||||||
6037 | // the instruction. | ||||||
6038 | MachineOperand &ShiftLHS = ShiftInst->getOperand(1); | ||||||
6039 | Register ShiftReg = ShiftLHS.getReg(); | ||||||
6040 | |||||||
6041 | unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits(); | ||||||
6042 | unsigned Val = *Immed & (NumBits - 1); | ||||||
6043 | unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val); | ||||||
6044 | |||||||
6045 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); }, | ||||||
6046 | [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}}; | ||||||
6047 | } | ||||||
6048 | |||||||
6049 | AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( | ||||||
6050 | MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const { | ||||||
6051 | unsigned Opc = MI.getOpcode(); | ||||||
6052 | |||||||
6053 | // Handle explicit extend instructions first. | ||||||
6054 | if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { | ||||||
6055 | unsigned Size; | ||||||
6056 | if (Opc == TargetOpcode::G_SEXT) | ||||||
6057 | Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); | ||||||
6058 | else | ||||||
6059 | Size = MI.getOperand(2).getImm(); | ||||||
6060 | assert(Size != 64 && "Extend from 64 bits?")(static_cast <bool> (Size != 64 && "Extend from 64 bits?" ) ? void (0) : __assert_fail ("Size != 64 && \"Extend from 64 bits?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6060, __extension__ __PRETTY_FUNCTION__)); | ||||||
6061 | switch (Size) { | ||||||
6062 | case 8: | ||||||
6063 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB; | ||||||
6064 | case 16: | ||||||
6065 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH; | ||||||
6066 | case 32: | ||||||
6067 | return AArch64_AM::SXTW; | ||||||
6068 | default: | ||||||
6069 | return AArch64_AM::InvalidShiftExtend; | ||||||
6070 | } | ||||||
6071 | } | ||||||
6072 | |||||||
6073 | if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { | ||||||
6074 | unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); | ||||||
6075 | assert(Size != 64 && "Extend from 64 bits?")(static_cast <bool> (Size != 64 && "Extend from 64 bits?" ) ? void (0) : __assert_fail ("Size != 64 && \"Extend from 64 bits?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6075, __extension__ __PRETTY_FUNCTION__)); | ||||||
6076 | switch (Size) { | ||||||
6077 | case 8: | ||||||
6078 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB; | ||||||
6079 | case 16: | ||||||
6080 | return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH; | ||||||
6081 | case 32: | ||||||
6082 | return AArch64_AM::UXTW; | ||||||
6083 | default: | ||||||
6084 | return AArch64_AM::InvalidShiftExtend; | ||||||
6085 | } | ||||||
6086 | } | ||||||
6087 | |||||||
6088 | // Don't have an explicit extend. Try to handle a G_AND with a constant mask | ||||||
6089 | // on the RHS. | ||||||
6090 | if (Opc != TargetOpcode::G_AND) | ||||||
6091 | return AArch64_AM::InvalidShiftExtend; | ||||||
6092 | |||||||
6093 | Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2)); | ||||||
6094 | if (!MaybeAndMask) | ||||||
6095 | return AArch64_AM::InvalidShiftExtend; | ||||||
6096 | uint64_t AndMask = *MaybeAndMask; | ||||||
6097 | switch (AndMask) { | ||||||
6098 | default: | ||||||
6099 | return AArch64_AM::InvalidShiftExtend; | ||||||
6100 | case 0xFF: | ||||||
6101 | return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; | ||||||
6102 | case 0xFFFF: | ||||||
6103 | return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; | ||||||
6104 | case 0xFFFFFFFF: | ||||||
6105 | return AArch64_AM::UXTW; | ||||||
6106 | } | ||||||
6107 | } | ||||||
6108 | |||||||
6109 | Register AArch64InstructionSelector::moveScalarRegClass( | ||||||
6110 | Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const { | ||||||
6111 | MachineRegisterInfo &MRI = *MIB.getMRI(); | ||||||
6112 | auto Ty = MRI.getType(Reg); | ||||||
6113 | assert(!Ty.isVector() && "Expected scalars only!")(static_cast <bool> (!Ty.isVector() && "Expected scalars only!" ) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalars only!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6113, __extension__ __PRETTY_FUNCTION__)); | ||||||
6114 | if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC)) | ||||||
6115 | return Reg; | ||||||
6116 | |||||||
6117 | // Create a copy and immediately select it. | ||||||
6118 | // FIXME: We should have an emitCopy function? | ||||||
6119 | auto Copy = MIB.buildCopy({&RC}, {Reg}); | ||||||
6120 | selectCopy(*Copy, TII, MRI, TRI, RBI); | ||||||
6121 | return Copy.getReg(0); | ||||||
6122 | } | ||||||
6123 | |||||||
6124 | /// Select an "extended register" operand. This operand folds in an extend | ||||||
6125 | /// followed by an optional left shift. | ||||||
6126 | InstructionSelector::ComplexRendererFns | ||||||
6127 | AArch64InstructionSelector::selectArithExtendedRegister( | ||||||
6128 | MachineOperand &Root) const { | ||||||
6129 | if (!Root.isReg()) | ||||||
6130 | return None; | ||||||
6131 | MachineRegisterInfo &MRI = | ||||||
6132 | Root.getParent()->getParent()->getParent()->getRegInfo(); | ||||||
6133 | |||||||
6134 | uint64_t ShiftVal = 0; | ||||||
6135 | Register ExtReg; | ||||||
6136 | AArch64_AM::ShiftExtendType Ext; | ||||||
6137 | MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI); | ||||||
6138 | if (!RootDef) | ||||||
6139 | return None; | ||||||
6140 | |||||||
6141 | if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI)) | ||||||
6142 | return None; | ||||||
6143 | |||||||
6144 | // Check if we can fold a shift and an extend. | ||||||
6145 | if (RootDef->getOpcode() == TargetOpcode::G_SHL) { | ||||||
6146 | // Look for a constant on the RHS of the shift. | ||||||
6147 | MachineOperand &RHS = RootDef->getOperand(2); | ||||||
6148 | Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS); | ||||||
6149 | if (!MaybeShiftVal) | ||||||
6150 | return None; | ||||||
6151 | ShiftVal = *MaybeShiftVal; | ||||||
6152 | if (ShiftVal > 4) | ||||||
6153 | return None; | ||||||
6154 | // Look for a valid extend instruction on the LHS of the shift. | ||||||
6155 | MachineOperand &LHS = RootDef->getOperand(1); | ||||||
6156 | MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI); | ||||||
6157 | if (!ExtDef) | ||||||
6158 | return None; | ||||||
6159 | Ext = getExtendTypeForInst(*ExtDef, MRI); | ||||||
6160 | if (Ext == AArch64_AM::InvalidShiftExtend) | ||||||
6161 | return None; | ||||||
6162 | ExtReg = ExtDef->getOperand(1).getReg(); | ||||||
6163 | } else { | ||||||
6164 | // Didn't get a shift. Try just folding an extend. | ||||||
6165 | Ext = getExtendTypeForInst(*RootDef, MRI); | ||||||
6166 | if (Ext == AArch64_AM::InvalidShiftExtend) | ||||||
6167 | return None; | ||||||
6168 | ExtReg = RootDef->getOperand(1).getReg(); | ||||||
6169 | |||||||
6170 | // If we have a 32 bit instruction which zeroes out the high half of a | ||||||
6171 | // register, we get an implicit zero extend for free. Check if we have one. | ||||||
6172 | // FIXME: We actually emit the extend right now even though we don't have | ||||||
6173 | // to. | ||||||
6174 | if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) { | ||||||
6175 | MachineInstr *ExtInst = MRI.getVRegDef(ExtReg); | ||||||
6176 | if (ExtInst && isDef32(*ExtInst)) | ||||||
6177 | return None; | ||||||
6178 | } | ||||||
6179 | } | ||||||
6180 | |||||||
6181 | // We require a GPR32 here. Narrow the ExtReg if needed using a subregister | ||||||
6182 | // copy. | ||||||
6183 | MachineIRBuilder MIB(*RootDef); | ||||||
6184 | ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB); | ||||||
6185 | |||||||
6186 | return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, | ||||||
6187 | [=](MachineInstrBuilder &MIB) { | ||||||
6188 | MIB.addImm(getArithExtendImm(Ext, ShiftVal)); | ||||||
6189 | }}}; | ||||||
6190 | } | ||||||
6191 | |||||||
6192 | void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, | ||||||
6193 | const MachineInstr &MI, | ||||||
6194 | int OpIdx) const { | ||||||
6195 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); | ||||||
6196 | assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6197, __extension__ __PRETTY_FUNCTION__)) | ||||||
6197 | "Expected G_CONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6197, __extension__ __PRETTY_FUNCTION__)); | ||||||
6198 | Optional<int64_t> CstVal = | ||||||
6199 | getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI); | ||||||
6200 | assert(CstVal && "Expected constant value")(static_cast <bool> (CstVal && "Expected constant value" ) ? void (0) : __assert_fail ("CstVal && \"Expected constant value\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6200, __extension__ __PRETTY_FUNCTION__)); | ||||||
6201 | MIB.addImm(CstVal.getValue()); | ||||||
6202 | } | ||||||
6203 | |||||||
6204 | void AArch64InstructionSelector::renderLogicalImm32( | ||||||
6205 | MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { | ||||||
6206 | assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6207, __extension__ __PRETTY_FUNCTION__)) | ||||||
6207 | "Expected G_CONSTANT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6207, __extension__ __PRETTY_FUNCTION__)); | ||||||
6208 | uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); | ||||||
6209 | uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32); | ||||||
6210 | MIB.addImm(Enc); | ||||||
6211 | } | ||||||
6212 | |||||||
6213 | void AArch64InstructionSelector::renderLogicalImm64( | ||||||
6214 | MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { | ||||||
6215 | assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6216, __extension__ __PRETTY_FUNCTION__)) | ||||||
6216 | "Expected G_CONSTANT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && "Expected G_CONSTANT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6216, __extension__ __PRETTY_FUNCTION__)); | ||||||
6217 | uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); | ||||||
6218 | uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64); | ||||||
6219 | MIB.addImm(Enc); | ||||||
6220 | } | ||||||
6221 | |||||||
6222 | void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB, | ||||||
6223 | const MachineInstr &MI, | ||||||
6224 | int OpIdx) const { | ||||||
6225 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6226, __extension__ __PRETTY_FUNCTION__)) | ||||||
6226 | "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6226, __extension__ __PRETTY_FUNCTION__)); | ||||||
6227 | MIB.addImm( | ||||||
6228 | AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF())); | ||||||
6229 | } | ||||||
6230 | |||||||
6231 | void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB, | ||||||
6232 | const MachineInstr &MI, | ||||||
6233 | int OpIdx) const { | ||||||
6234 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6235, __extension__ __PRETTY_FUNCTION__)) | ||||||
6235 | "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6235, __extension__ __PRETTY_FUNCTION__)); | ||||||
6236 | MIB.addImm( | ||||||
6237 | AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF())); | ||||||
6238 | } | ||||||
6239 | |||||||
6240 | void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB, | ||||||
6241 | const MachineInstr &MI, | ||||||
6242 | int OpIdx) const { | ||||||
6243 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6244, __extension__ __PRETTY_FUNCTION__)) | ||||||
6244 | "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && "Expected G_FCONSTANT") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6244, __extension__ __PRETTY_FUNCTION__)); | ||||||
6245 | MIB.addImm( | ||||||
6246 | AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF())); | ||||||
6247 | } | ||||||
6248 | |||||||
6249 | bool AArch64InstructionSelector::isLoadStoreOfNumBytes( | ||||||
6250 | const MachineInstr &MI, unsigned NumBytes) const { | ||||||
6251 | if (!MI.mayLoadOrStore()) | ||||||
6252 | return false; | ||||||
6253 | assert(MI.hasOneMemOperand() &&(static_cast <bool> (MI.hasOneMemOperand() && "Expected load/store to have only one mem op!" ) ? void (0) : __assert_fail ("MI.hasOneMemOperand() && \"Expected load/store to have only one mem op!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6254, __extension__ __PRETTY_FUNCTION__)) | ||||||
6254 | "Expected load/store to have only one mem op!")(static_cast <bool> (MI.hasOneMemOperand() && "Expected load/store to have only one mem op!" ) ? void (0) : __assert_fail ("MI.hasOneMemOperand() && \"Expected load/store to have only one mem op!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6254, __extension__ __PRETTY_FUNCTION__)); | ||||||
6255 | return (*MI.memoperands_begin())->getSize() == NumBytes; | ||||||
6256 | } | ||||||
6257 | |||||||
6258 | bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { | ||||||
6259 | const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); | ||||||
6260 | if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32) | ||||||
6261 | return false; | ||||||
6262 | |||||||
6263 | // Only return true if we know the operation will zero-out the high half of | ||||||
6264 | // the 64-bit register. Truncates can be subregister copies, which don't | ||||||
6265 | // zero out the high bits. Copies and other copy-like instructions can be | ||||||
6266 | // fed by truncates, or could be lowered as subregister copies. | ||||||
6267 | switch (MI.getOpcode()) { | ||||||
6268 | default: | ||||||
6269 | return true; | ||||||
6270 | case TargetOpcode::COPY: | ||||||
6271 | case TargetOpcode::G_BITCAST: | ||||||
6272 | case TargetOpcode::G_TRUNC: | ||||||
6273 | case TargetOpcode::G_PHI: | ||||||
6274 | return false; | ||||||
6275 | } | ||||||
6276 | } | ||||||
6277 | |||||||
6278 | |||||||
6279 | // Perform fixups on the given PHI instruction's operands to force them all | ||||||
6280 | // to be the same as the destination regbank. | ||||||
6281 | static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, | ||||||
6282 | const AArch64RegisterBankInfo &RBI) { | ||||||
6283 | assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_PHI && \"Expected a G_PHI\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6283, __extension__ __PRETTY_FUNCTION__)); | ||||||
6284 | Register DstReg = MI.getOperand(0).getReg(); | ||||||
6285 | const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg); | ||||||
6286 | assert(DstRB && "Expected PHI dst to have regbank assigned")(static_cast <bool> (DstRB && "Expected PHI dst to have regbank assigned" ) ? void (0) : __assert_fail ("DstRB && \"Expected PHI dst to have regbank assigned\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp" , 6286, __extension__ __PRETTY_FUNCTION__)); | ||||||
6287 | MachineIRBuilder MIB(MI); | ||||||
6288 | |||||||
6289 | // Go through each operand and ensure it has the same regbank. | ||||||
6290 | for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) { | ||||||
6291 | MachineOperand &MO = MI.getOperand(OpIdx); | ||||||
6292 | if (!MO.isReg()) | ||||||
6293 | continue; | ||||||
6294 | Register OpReg = MO.getReg(); | ||||||
6295 | const RegisterBank *RB = MRI.getRegBankOrNull(OpReg); | ||||||
6296 | if (RB != DstRB) { | ||||||
6297 | // Insert a cross-bank copy. | ||||||
6298 | auto *OpDef = MRI.getVRegDef(OpReg); | ||||||
6299 | const LLT &Ty = MRI.getType(OpReg); | ||||||
6300 | MachineBasicBlock &OpDefBB = *OpDef->getParent(); | ||||||
6301 | |||||||
6302 | // Any instruction we insert must appear after all PHIs in the block | ||||||
6303 | // for the block to be valid MIR. | ||||||
6304 | MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator()); | ||||||
6305 | if (InsertPt != OpDefBB.end() && InsertPt->isPHI()) | ||||||
6306 | InsertPt = OpDefBB.getFirstNonPHI(); | ||||||
6307 | MIB.setInsertPt(*OpDef->getParent(), InsertPt); | ||||||
6308 | auto Copy = MIB.buildCopy(Ty, OpReg); | ||||||
6309 | MRI.setRegBank(Copy.getReg(0), *DstRB); | ||||||
6310 | MO.setReg(Copy.getReg(0)); | ||||||
6311 | } | ||||||
6312 | } | ||||||
6313 | } | ||||||
6314 | |||||||
6315 | void AArch64InstructionSelector::processPHIs(MachineFunction &MF) { | ||||||
6316 | // We're looking for PHIs, build a list so we don't invalidate iterators. | ||||||
6317 | MachineRegisterInfo &MRI = MF.getRegInfo(); | ||||||
6318 | SmallVector<MachineInstr *, 32> Phis; | ||||||
6319 | for (auto &BB : MF) { | ||||||
6320 | for (auto &MI : BB) { | ||||||
6321 | if (MI.getOpcode() == TargetOpcode::G_PHI) | ||||||
6322 | Phis.emplace_back(&MI); | ||||||
6323 | } | ||||||
6324 | } | ||||||
6325 | |||||||
6326 | for (auto *MI : Phis) { | ||||||
6327 | // We need to do some work here if the operand types are < 16 bit and they | ||||||
6328 | // are split across fpr/gpr banks. Since all types <32b on gpr | ||||||
6329 | // end up being assigned gpr32 regclasses, we can end up with PHIs here | ||||||
6330 | // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't | ||||||
6331 | // be selecting heterogenous regbanks for operands if possible, but we | ||||||
6332 | // still need to be able to deal with it here. | ||||||
6333 | // | ||||||
6334 | // To fix this, if we have a gpr-bank operand < 32b in size and at least | ||||||
6335 | // one other operand is on the fpr bank, then we add cross-bank copies | ||||||
6336 | // to homogenize the operand banks. For simplicity the bank that we choose | ||||||
6337 | // to settle on is whatever bank the def operand has. For example: | ||||||
6338 | // | ||||||
6339 | // %endbb: | ||||||
6340 | // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2 | ||||||
6341 | // => | ||||||
6342 | // %bb2: | ||||||
6343 | // ... | ||||||
6344 | // %in2_copy:gpr(s16) = COPY %in2:fpr(s16) | ||||||
6345 | // ... | ||||||
6346 | // %endbb: | ||||||
6347 | // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2 | ||||||
6348 | bool HasGPROp = false, HasFPROp = false; | ||||||
6349 | for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) { | ||||||
6350 | const auto &MO = MI->getOperand(OpIdx); | ||||||
6351 | if (!MO.isReg()) | ||||||
6352 | continue; | ||||||
6353 | const LLT &Ty = MRI.getType(MO.getReg()); | ||||||
6354 | if (!Ty.isValid() || !Ty.isScalar()) | ||||||
6355 | break; | ||||||
6356 | if (Ty.getSizeInBits() >= 32) | ||||||
6357 | break; | ||||||
6358 | const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()); | ||||||
6359 | // If for some reason we don't have a regbank yet. Don't try anything. | ||||||
6360 | if (!RB) | ||||||
6361 | break; | ||||||
6362 | |||||||
6363 | if (RB->getID() == AArch64::GPRRegBankID) | ||||||
6364 | HasGPROp = true; | ||||||
6365 | else | ||||||
6366 | HasFPROp = true; | ||||||
6367 | } | ||||||
6368 | // We have heterogenous regbanks, need to fixup. | ||||||
6369 | if (HasGPROp && HasFPROp) | ||||||
6370 | fixupPHIOpBanks(*MI, MRI, RBI); | ||||||
6371 | } | ||||||
6372 | } | ||||||
6373 | |||||||
6374 | namespace llvm { | ||||||
6375 | InstructionSelector * | ||||||
6376 | createAArch64InstructionSelector(const AArch64TargetMachine &TM, | ||||||
6377 | AArch64Subtarget &Subtarget, | ||||||
6378 | AArch64RegisterBankInfo &RBI) { | ||||||
6379 | return new AArch64InstructionSelector(TM, Subtarget, RBI); | ||||||
6380 | } | ||||||
6381 | } |
1 | //===-- llvm/CodeGen/MachineOperand.h - MachineOperand class ----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the declaration of the MachineOperand class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_CODEGEN_MACHINEOPERAND_H |
14 | #define LLVM_CODEGEN_MACHINEOPERAND_H |
15 | |
16 | #include "llvm/ADT/DenseMap.h" |
17 | #include "llvm/CodeGen/Register.h" |
18 | #include "llvm/IR/Intrinsics.h" |
19 | #include "llvm/Support/DataTypes.h" |
20 | #include "llvm/Support/LowLevelTypeImpl.h" |
21 | #include <cassert> |
22 | |
23 | namespace llvm { |
24 | |
25 | class BlockAddress; |
26 | class Constant; |
27 | class ConstantFP; |
28 | class ConstantInt; |
29 | class GlobalValue; |
30 | class MachineBasicBlock; |
31 | class MachineInstr; |
32 | class MachineRegisterInfo; |
33 | class MCCFIInstruction; |
34 | class MDNode; |
35 | class ModuleSlotTracker; |
36 | class TargetIntrinsicInfo; |
37 | class TargetRegisterInfo; |
38 | class hash_code; |
39 | class raw_ostream; |
40 | class MCSymbol; |
41 | |
42 | /// MachineOperand class - Representation of each machine instruction operand. |
43 | /// |
44 | /// This class isn't a POD type because it has a private constructor, but its |
45 | /// destructor must be trivial. Functions like MachineInstr::addOperand(), |
46 | /// MachineRegisterInfo::moveOperands(), and MF::DeleteMachineInstr() depend on |
47 | /// not having to call the MachineOperand destructor. |
48 | /// |
49 | class MachineOperand { |
50 | public: |
51 | enum MachineOperandType : unsigned char { |
52 | MO_Register, ///< Register operand. |
53 | MO_Immediate, ///< Immediate operand |
54 | MO_CImmediate, ///< Immediate >64bit operand |
55 | MO_FPImmediate, ///< Floating-point immediate operand |
56 | MO_MachineBasicBlock, ///< MachineBasicBlock reference |
57 | MO_FrameIndex, ///< Abstract Stack Frame Index |
58 | MO_ConstantPoolIndex, ///< Address of indexed Constant in Constant Pool |
59 | MO_TargetIndex, ///< Target-dependent index+offset operand. |
60 | MO_JumpTableIndex, ///< Address of indexed Jump Table for switch |
61 | MO_ExternalSymbol, ///< Name of external global symbol |
62 | MO_GlobalAddress, ///< Address of a global value |
63 | MO_BlockAddress, ///< Address of a basic block |
64 | MO_RegisterMask, ///< Mask of preserved registers. |
65 | MO_RegisterLiveOut, ///< Mask of live-out registers. |
66 | MO_Metadata, ///< Metadata reference (for debug info) |
67 | MO_MCSymbol, ///< MCSymbol reference (for debug/eh info) |
68 | MO_CFIIndex, ///< MCCFIInstruction index. |
69 | MO_IntrinsicID, ///< Intrinsic ID for ISel |
70 | MO_Predicate, ///< Generic predicate for ISel |
71 | MO_ShuffleMask, ///< Other IR Constant for ISel (shuffle masks) |
72 | MO_Last = MO_ShuffleMask |
73 | }; |
74 | |
75 | private: |
76 | /// OpKind - Specify what kind of operand this is. This discriminates the |
77 | /// union. |
78 | unsigned OpKind : 8; |
79 | |
80 | /// Subregister number for MO_Register. A value of 0 indicates the |
81 | /// MO_Register has no subReg. |
82 | /// |
83 | /// For all other kinds of operands, this field holds target-specific flags. |
84 | unsigned SubReg_TargetFlags : 12; |
85 | |
86 | /// TiedTo - Non-zero when this register operand is tied to another register |
87 | /// operand. The encoding of this field is described in the block comment |
88 | /// before MachineInstr::tieOperands(). |
89 | unsigned TiedTo : 4; |
90 | |
91 | /// IsDef - True if this is a def, false if this is a use of the register. |
92 | /// This is only valid on register operands. |
93 | /// |
94 | unsigned IsDef : 1; |
95 | |
96 | /// IsImp - True if this is an implicit def or use, false if it is explicit. |
97 | /// This is only valid on register opderands. |
98 | /// |
99 | unsigned IsImp : 1; |
100 | |
101 | /// IsDeadOrKill |
102 | /// For uses: IsKill - Conservatively indicates the last use of a register |
103 | /// on this path through the function. A register operand with true value of |
104 | /// this flag must be the last use of the register, a register operand with |
105 | /// false value may or may not be the last use of the register. After regalloc |
106 | /// we can use recomputeLivenessFlags to get precise kill flags. |
107 | /// For defs: IsDead - True if this register is never used by a subsequent |
108 | /// instruction. |
109 | /// This is only valid on register operands. |
110 | unsigned IsDeadOrKill : 1; |
111 | |
112 | /// See isRenamable(). |
113 | unsigned IsRenamable : 1; |
114 | |
115 | /// IsUndef - True if this register operand reads an "undef" value, i.e. the |
116 | /// read value doesn't matter. This flag can be set on both use and def |
117 | /// operands. On a sub-register def operand, it refers to the part of the |
118 | /// register that isn't written. On a full-register def operand, it is a |
119 | /// noop. See readsReg(). |
120 | /// |
121 | /// This is only valid on registers. |
122 | /// |
123 | /// Note that an instruction may have multiple <undef> operands referring to |
124 | /// the same register. In that case, the instruction may depend on those |
125 | /// operands reading the same dont-care value. For example: |
126 | /// |
127 | /// %1 = XOR undef %2, undef %2 |
128 | /// |
129 | /// Any register can be used for %2, and its value doesn't matter, but |
130 | /// the two operands must be the same register. |
131 | /// |
132 | unsigned IsUndef : 1; |
133 | |
134 | /// IsInternalRead - True if this operand reads a value that was defined |
135 | /// inside the same instruction or bundle. This flag can be set on both use |
136 | /// and def operands. On a sub-register def operand, it refers to the part |
137 | /// of the register that isn't written. On a full-register def operand, it |
138 | /// is a noop. |
139 | /// |
140 | /// When this flag is set, the instruction bundle must contain at least one |
141 | /// other def of the register. If multiple instructions in the bundle define |
142 | /// the register, the meaning is target-defined. |
143 | unsigned IsInternalRead : 1; |
144 | |
145 | /// IsEarlyClobber - True if this MO_Register 'def' operand is written to |
146 | /// by the MachineInstr before all input registers are read. This is used to |
147 | /// model the GCC inline asm '&' constraint modifier. |
148 | unsigned IsEarlyClobber : 1; |
149 | |
150 | /// IsDebug - True if this MO_Register 'use' operand is in a debug pseudo, |
151 | /// not a real instruction. Such uses should be ignored during codegen. |
152 | unsigned IsDebug : 1; |
153 | |
154 | /// SmallContents - This really should be part of the Contents union, but |
155 | /// lives out here so we can get a better packed struct. |
156 | /// MO_Register: Register number. |
157 | /// OffsetedInfo: Low bits of offset. |
158 | union { |
159 | unsigned RegNo; // For MO_Register. |
160 | unsigned OffsetLo; // Matches Contents.OffsetedInfo.OffsetHi. |
161 | } SmallContents; |
162 | |
163 | /// ParentMI - This is the instruction that this operand is embedded into. |
164 | /// This is valid for all operand types, when the operand is in an instr. |
165 | MachineInstr *ParentMI; |
166 | |
167 | /// Contents union - This contains the payload for the various operand types. |
168 | union ContentsUnion { |
169 | ContentsUnion() {} |
170 | MachineBasicBlock *MBB; // For MO_MachineBasicBlock. |
171 | const ConstantFP *CFP; // For MO_FPImmediate. |
172 | const ConstantInt *CI; // For MO_CImmediate. Integers > 64bit. |
173 | int64_t ImmVal; // For MO_Immediate. |
174 | const uint32_t *RegMask; // For MO_RegisterMask and MO_RegisterLiveOut. |
175 | const MDNode *MD; // For MO_Metadata. |
176 | MCSymbol *Sym; // For MO_MCSymbol. |
177 | unsigned CFIIndex; // For MO_CFI. |
178 | Intrinsic::ID IntrinsicID; // For MO_IntrinsicID. |
179 | unsigned Pred; // For MO_Predicate |
180 | ArrayRef<int> ShuffleMask; // For MO_ShuffleMask |
181 | |
182 | struct { // For MO_Register. |
183 | // Register number is in SmallContents.RegNo. |
184 | MachineOperand *Prev; // Access list for register. See MRI. |
185 | MachineOperand *Next; |
186 | } Reg; |
187 | |
188 | /// OffsetedInfo - This struct contains the offset and an object identifier. |
189 | /// this represent the object as with an optional offset from it. |
190 | struct { |
191 | union { |
192 | int Index; // For MO_*Index - The index itself. |
193 | const char *SymbolName; // For MO_ExternalSymbol. |
194 | const GlobalValue *GV; // For MO_GlobalAddress. |
195 | const BlockAddress *BA; // For MO_BlockAddress. |
196 | } Val; |
197 | // Low bits of offset are in SmallContents.OffsetLo. |
198 | int OffsetHi; // An offset from the object, high 32 bits. |
199 | } OffsetedInfo; |
200 | } Contents; |
201 | |
202 | explicit MachineOperand(MachineOperandType K) |
203 | : OpKind(K), SubReg_TargetFlags(0), ParentMI(nullptr) { |
204 | // Assert that the layout is what we expect. It's easy to grow this object. |
205 | static_assert(alignof(MachineOperand) <= alignof(int64_t), |
206 | "MachineOperand shouldn't be more than 8 byte aligned"); |
207 | static_assert(sizeof(Contents) <= 2 * sizeof(void *), |
208 | "Contents should be at most two pointers"); |
209 | static_assert(sizeof(MachineOperand) <= |
210 | alignTo<alignof(int64_t)>(2 * sizeof(unsigned) + |
211 | 3 * sizeof(void *)), |
212 | "MachineOperand too big. Should be Kind, SmallContents, " |
213 | "ParentMI, and Contents"); |
214 | } |
215 | |
216 | public: |
217 | /// getType - Returns the MachineOperandType for this operand. |
218 | /// |
219 | MachineOperandType getType() const { return (MachineOperandType)OpKind; } |
220 | |
221 | unsigned getTargetFlags() const { |
222 | return isReg() ? 0 : SubReg_TargetFlags; |
223 | } |
224 | void setTargetFlags(unsigned F) { |
225 | assert(!isReg() && "Register operands can't have target flags")(static_cast <bool> (!isReg() && "Register operands can't have target flags" ) ? void (0) : __assert_fail ("!isReg() && \"Register operands can't have target flags\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 225, __extension__ __PRETTY_FUNCTION__)); |
226 | SubReg_TargetFlags = F; |
227 | assert(SubReg_TargetFlags == F && "Target flags out of range")(static_cast <bool> (SubReg_TargetFlags == F && "Target flags out of range") ? void (0) : __assert_fail ("SubReg_TargetFlags == F && \"Target flags out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 227, __extension__ __PRETTY_FUNCTION__)); |
228 | } |
229 | void addTargetFlag(unsigned F) { |
230 | assert(!isReg() && "Register operands can't have target flags")(static_cast <bool> (!isReg() && "Register operands can't have target flags" ) ? void (0) : __assert_fail ("!isReg() && \"Register operands can't have target flags\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 230, __extension__ __PRETTY_FUNCTION__)); |
231 | SubReg_TargetFlags |= F; |
232 | assert((SubReg_TargetFlags & F) && "Target flags out of range")(static_cast <bool> ((SubReg_TargetFlags & F) && "Target flags out of range") ? void (0) : __assert_fail ("(SubReg_TargetFlags & F) && \"Target flags out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 232, __extension__ __PRETTY_FUNCTION__)); |
233 | } |
234 | |
235 | |
236 | /// getParent - Return the instruction that this operand belongs to. |
237 | /// |
238 | MachineInstr *getParent() { return ParentMI; } |
239 | const MachineInstr *getParent() const { return ParentMI; } |
240 | |
241 | /// clearParent - Reset the parent pointer. |
242 | /// |
243 | /// The MachineOperand copy constructor also copies ParentMI, expecting the |
244 | /// original to be deleted. If a MachineOperand is ever stored outside a |
245 | /// MachineInstr, the parent pointer must be cleared. |
246 | /// |
247 | /// Never call clearParent() on an operand in a MachineInstr. |
248 | /// |
249 | void clearParent() { ParentMI = nullptr; } |
250 | |
251 | /// Print a subreg index operand. |
252 | /// MO_Immediate operands can also be subreg idices. If it's the case, the |
253 | /// subreg index name will be printed. MachineInstr::isOperandSubregIdx can be |
254 | /// called to check this. |
255 | static void printSubRegIdx(raw_ostream &OS, uint64_t Index, |
256 | const TargetRegisterInfo *TRI); |
257 | |
258 | /// Print operand target flags. |
259 | static void printTargetFlags(raw_ostream& OS, const MachineOperand &Op); |
260 | |
261 | /// Print a MCSymbol as an operand. |
262 | static void printSymbol(raw_ostream &OS, MCSymbol &Sym); |
263 | |
264 | /// Print a stack object reference. |
265 | static void printStackObjectReference(raw_ostream &OS, unsigned FrameIndex, |
266 | bool IsFixed, StringRef Name); |
267 | |
268 | /// Print the offset with explicit +/- signs. |
269 | static void printOperandOffset(raw_ostream &OS, int64_t Offset); |
270 | |
271 | /// Print an IRSlotNumber. |
272 | static void printIRSlotNumber(raw_ostream &OS, int Slot); |
273 | |
274 | /// Print the MachineOperand to \p os. |
275 | /// Providing a valid \p TRI and \p IntrinsicInfo results in a more |
276 | /// target-specific printing. If \p TRI and \p IntrinsicInfo are null, the |
277 | /// function will try to pick it up from the parent. |
278 | void print(raw_ostream &os, const TargetRegisterInfo *TRI = nullptr, |
279 | const TargetIntrinsicInfo *IntrinsicInfo = nullptr) const; |
280 | |
281 | /// More complex way of printing a MachineOperand. |
282 | /// \param TypeToPrint specifies the generic type to be printed on uses and |
283 | /// defs. It can be determined using MachineInstr::getTypeToPrint. |
284 | /// \param OpIdx - specifies the index of the operand in machine instruction. |
285 | /// This will be used by target dependent MIR formatter. Could be None if the |
286 | /// index is unknown, e.g. called by dump(). |
287 | /// \param PrintDef - whether we want to print `def` on an operand which |
288 | /// isDef. Sometimes, if the operand is printed before '=', we don't print |
289 | /// `def`. |
290 | /// \param IsStandalone - whether we want a verbose output of the MO. This |
291 | /// prints extra information that can be easily inferred when printing the |
292 | /// whole function, but not when printing only a fragment of it. |
293 | /// \param ShouldPrintRegisterTies - whether we want to print register ties. |
294 | /// Sometimes they are easily determined by the instruction's descriptor |
295 | /// (MachineInstr::hasComplexRegiterTies can determine if it's needed). |
296 | /// \param TiedOperandIdx - if we need to print register ties this needs to |
297 | /// provide the index of the tied register. If not, it will be ignored. |
298 | /// \param TRI - provide more target-specific information to the printer. |
299 | /// Unlike the previous function, this one will not try and get the |
300 | /// information from it's parent. |
301 | /// \param IntrinsicInfo - same as \p TRI. |
302 | void print(raw_ostream &os, ModuleSlotTracker &MST, LLT TypeToPrint, |
303 | Optional<unsigned> OpIdx, bool PrintDef, bool IsStandalone, |
304 | bool ShouldPrintRegisterTies, unsigned TiedOperandIdx, |
305 | const TargetRegisterInfo *TRI, |
306 | const TargetIntrinsicInfo *IntrinsicInfo) const; |
307 | |
308 | /// Same as print(os, TRI, IntrinsicInfo), but allows to specify the low-level |
309 | /// type to be printed the same way the full version of print(...) does it. |
310 | void print(raw_ostream &os, LLT TypeToPrint, |
311 | const TargetRegisterInfo *TRI = nullptr, |
312 | const TargetIntrinsicInfo *IntrinsicInfo = nullptr) const; |
313 | |
314 | void dump() const; |
315 | |
316 | //===--------------------------------------------------------------------===// |
317 | // Accessors that tell you what kind of MachineOperand you're looking at. |
318 | //===--------------------------------------------------------------------===// |
319 | |
320 | /// isReg - Tests if this is a MO_Register operand. |
321 | bool isReg() const { return OpKind == MO_Register; } |
322 | /// isImm - Tests if this is a MO_Immediate operand. |
323 | bool isImm() const { return OpKind == MO_Immediate; } |
324 | /// isCImm - Test if this is a MO_CImmediate operand. |
325 | bool isCImm() const { return OpKind == MO_CImmediate; } |
326 | /// isFPImm - Tests if this is a MO_FPImmediate operand. |
327 | bool isFPImm() const { return OpKind == MO_FPImmediate; } |
328 | /// isMBB - Tests if this is a MO_MachineBasicBlock operand. |
329 | bool isMBB() const { return OpKind == MO_MachineBasicBlock; } |
330 | /// isFI - Tests if this is a MO_FrameIndex operand. |
331 | bool isFI() const { return OpKind == MO_FrameIndex; } |
332 | /// isCPI - Tests if this is a MO_ConstantPoolIndex operand. |
333 | bool isCPI() const { return OpKind == MO_ConstantPoolIndex; } |
334 | /// isTargetIndex - Tests if this is a MO_TargetIndex operand. |
335 | bool isTargetIndex() const { return OpKind == MO_TargetIndex; } |
336 | /// isJTI - Tests if this is a MO_JumpTableIndex operand. |
337 | bool isJTI() const { return OpKind == MO_JumpTableIndex; } |
338 | /// isGlobal - Tests if this is a MO_GlobalAddress operand. |
339 | bool isGlobal() const { return OpKind == MO_GlobalAddress; } |
340 | /// isSymbol - Tests if this is a MO_ExternalSymbol operand. |
341 | bool isSymbol() const { return OpKind == MO_ExternalSymbol; } |
342 | /// isBlockAddress - Tests if this is a MO_BlockAddress operand. |
343 | bool isBlockAddress() const { return OpKind == MO_BlockAddress; } |
344 | /// isRegMask - Tests if this is a MO_RegisterMask operand. |
345 | bool isRegMask() const { return OpKind == MO_RegisterMask; } |
346 | /// isRegLiveOut - Tests if this is a MO_RegisterLiveOut operand. |
347 | bool isRegLiveOut() const { return OpKind == MO_RegisterLiveOut; } |
348 | /// isMetadata - Tests if this is a MO_Metadata operand. |
349 | bool isMetadata() const { return OpKind == MO_Metadata; } |
350 | bool isMCSymbol() const { return OpKind == MO_MCSymbol; } |
351 | bool isCFIIndex() const { return OpKind == MO_CFIIndex; } |
352 | bool isIntrinsicID() const { return OpKind == MO_IntrinsicID; } |
353 | bool isPredicate() const { return OpKind == MO_Predicate; } |
354 | bool isShuffleMask() const { return OpKind == MO_ShuffleMask; } |
355 | //===--------------------------------------------------------------------===// |
356 | // Accessors for Register Operands |
357 | //===--------------------------------------------------------------------===// |
358 | |
359 | /// getReg - Returns the register number. |
360 | Register getReg() const { |
361 | assert(isReg() && "This is not a register operand!")(static_cast <bool> (isReg() && "This is not a register operand!" ) ? void (0) : __assert_fail ("isReg() && \"This is not a register operand!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 361, __extension__ __PRETTY_FUNCTION__)); |
362 | return Register(SmallContents.RegNo); |
363 | } |
364 | |
365 | unsigned getSubReg() const { |
366 | assert(isReg() && "Wrong MachineOperand accessor")(static_cast <bool> (isReg() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isReg() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 366, __extension__ __PRETTY_FUNCTION__)); |
367 | return SubReg_TargetFlags; |
368 | } |
369 | |
370 | bool isUse() const { |
371 | assert(isReg() && "Wrong MachineOperand accessor")(static_cast <bool> (isReg() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isReg() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 371, __extension__ __PRETTY_FUNCTION__)); |
372 | return !IsDef; |
373 | } |
374 | |
375 | bool isDef() const { |
376 | assert(isReg() && "Wrong MachineOperand accessor")(static_cast <bool> (isReg() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isReg() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 376, __extension__ __PRETTY_FUNCTION__)); |
377 | return IsDef; |
378 | } |
379 | |
380 | bool isImplicit() const { |
381 | assert(isReg() && "Wrong MachineOperand accessor")(static_cast <bool> (isReg() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isReg() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 381, __extension__ __PRETTY_FUNCTION__)); |
382 | return IsImp; |
383 | } |
384 | |
385 | bool isDead() const { |
386 | assert(isReg() && "Wrong MachineOperand accessor")(static_cast <bool> (isReg() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isReg() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 386, __extension__ __PRETTY_FUNCTION__)); |
387 | return IsDeadOrKill & IsDef; |
388 | } |
389 | |
390 | bool isKill() const { |
391 | assert(isReg() && "Wrong MachineOperand accessor")(static_cast <bool> (isReg() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isReg() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 391, __extension__ __PRETTY_FUNCTION__)); |
392 | return IsDeadOrKill & !IsDef; |
393 | } |
394 | |
395 | bool isUndef() const { |
396 | assert(isReg() && "Wrong MachineOperand accessor")(static_cast <bool> (isReg() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isReg() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 396, __extension__ __PRETTY_FUNCTION__)); |
397 | return IsUndef; |
398 | } |
399 | |
400 | /// isRenamable - Returns true if this register may be renamed, i.e. it does |
401 | /// not generate a value that is somehow read in a way that is not represented |
402 | /// by the Machine IR (e.g. to meet an ABI or ISA requirement). This is only |
403 | /// valid on physical register operands. Virtual registers are assumed to |
404 | /// always be renamable regardless of the value of this field. |
405 | /// |
406 | /// Operands that are renamable can freely be changed to any other register |
407 | /// that is a member of the register class returned by |
408 | /// MI->getRegClassConstraint(). |
409 | /// |
410 | /// isRenamable can return false for several different reasons: |
411 | /// |
412 | /// - ABI constraints (since liveness is not always precisely modeled). We |
413 | /// conservatively handle these cases by setting all physical register |
414 | /// operands that didn’t start out as virtual regs to not be renamable. |
415 | /// Also any physical register operands created after register allocation or |
416 | /// whose register is changed after register allocation will not be |
417 | /// renamable. This state is tracked in the MachineOperand::IsRenamable |
418 | /// bit. |
419 | /// |
420 | /// - Opcode/target constraints: for opcodes that have complex register class |
421 | /// requirements (e.g. that depend on other operands/instructions), we set |
422 | /// hasExtraSrcRegAllocReq/hasExtraDstRegAllocReq in the machine opcode |
423 | /// description. Operands belonging to instructions with opcodes that are |
424 | /// marked hasExtraSrcRegAllocReq/hasExtraDstRegAllocReq return false from |
425 | /// isRenamable(). Additionally, the AllowRegisterRenaming target property |
426 | /// prevents any operands from being marked renamable for targets that don't |
427 | /// have detailed opcode hasExtraSrcRegAllocReq/hasExtraDstRegAllocReq |
428 | /// values. |
429 | bool isRenamable() const; |
430 | |
431 | bool isInternalRead() const { |
432 | assert(isReg() && "Wrong MachineOperand accessor")(static_cast <bool> (isReg() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isReg() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 432, __extension__ __PRETTY_FUNCTION__)); |
433 | return IsInternalRead; |
434 | } |
435 | |
436 | bool isEarlyClobber() const { |
437 | assert(isReg() && "Wrong MachineOperand accessor")(static_cast <bool> (isReg() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isReg() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 437, __extension__ __PRETTY_FUNCTION__)); |
438 | return IsEarlyClobber; |
439 | } |
440 | |
441 | bool isTied() const { |
442 | assert(isReg() && "Wrong MachineOperand accessor")(static_cast <bool> (isReg() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isReg() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 442, __extension__ __PRETTY_FUNCTION__)); |
443 | return TiedTo; |
444 | } |
445 | |
446 | bool isDebug() const { |
447 | assert(isReg() && "Wrong MachineOperand accessor")(static_cast <bool> (isReg() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isReg() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 447, __extension__ __PRETTY_FUNCTION__)); |
448 | return IsDebug; |
449 | } |
450 | |
451 | /// readsReg - Returns true if this operand reads the previous value of its |
452 | /// register. A use operand with the <undef> flag set doesn't read its |
453 | /// register. A sub-register def implicitly reads the other parts of the |
454 | /// register being redefined unless the <undef> flag is set. |
455 | /// |
456 | /// This refers to reading the register value from before the current |
457 | /// instruction or bundle. Internal bundle reads are not included. |
458 | bool readsReg() const { |
459 | assert(isReg() && "Wrong MachineOperand accessor")(static_cast <bool> (isReg() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isReg() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 459, __extension__ __PRETTY_FUNCTION__)); |
460 | return !isUndef() && !isInternalRead() && (isUse() || getSubReg()); |
461 | } |
462 | |
463 | //===--------------------------------------------------------------------===// |
464 | // Mutators for Register Operands |
465 | //===--------------------------------------------------------------------===// |
466 | |
467 | /// Change the register this operand corresponds to. |
468 | /// |
469 | void setReg(Register Reg); |
470 | |
471 | void setSubReg(unsigned subReg) { |
472 | assert(isReg() && "Wrong MachineOperand mutator")(static_cast <bool> (isReg() && "Wrong MachineOperand mutator" ) ? void (0) : __assert_fail ("isReg() && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 472, __extension__ __PRETTY_FUNCTION__)); |
473 | SubReg_TargetFlags = subReg; |
474 | assert(SubReg_TargetFlags == subReg && "SubReg out of range")(static_cast <bool> (SubReg_TargetFlags == subReg && "SubReg out of range") ? void (0) : __assert_fail ("SubReg_TargetFlags == subReg && \"SubReg out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 474, __extension__ __PRETTY_FUNCTION__)); |
475 | } |
476 | |
477 | /// substVirtReg - Substitute the current register with the virtual |
478 | /// subregister Reg:SubReg. Take any existing SubReg index into account, |
479 | /// using TargetRegisterInfo to compose the subreg indices if necessary. |
480 | /// Reg must be a virtual register, SubIdx can be 0. |
481 | /// |
482 | void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo&); |
483 | |
484 | /// substPhysReg - Substitute the current register with the physical register |
485 | /// Reg, taking any existing SubReg into account. For instance, |
486 | /// substPhysReg(%eax) will change %reg1024:sub_8bit to %al. |
487 | /// |
488 | void substPhysReg(MCRegister Reg, const TargetRegisterInfo&); |
489 | |
490 | void setIsUse(bool Val = true) { setIsDef(!Val); } |
491 | |
492 | /// Change a def to a use, or a use to a def. |
493 | void setIsDef(bool Val = true); |
494 | |
495 | void setImplicit(bool Val = true) { |
496 | assert(isReg() && "Wrong MachineOperand mutator")(static_cast <bool> (isReg() && "Wrong MachineOperand mutator" ) ? void (0) : __assert_fail ("isReg() && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 496, __extension__ __PRETTY_FUNCTION__)); |
497 | IsImp = Val; |
498 | } |
499 | |
500 | void setIsKill(bool Val = true) { |
501 | assert(isReg() && !IsDef && "Wrong MachineOperand mutator")(static_cast <bool> (isReg() && !IsDef && "Wrong MachineOperand mutator") ? void (0) : __assert_fail ( "isReg() && !IsDef && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 501, __extension__ __PRETTY_FUNCTION__)); |
502 | assert((!Val || !isDebug()) && "Marking a debug operation as kill")(static_cast <bool> ((!Val || !isDebug()) && "Marking a debug operation as kill" ) ? void (0) : __assert_fail ("(!Val || !isDebug()) && \"Marking a debug operation as kill\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 502, __extension__ __PRETTY_FUNCTION__)); |
503 | IsDeadOrKill = Val; |
504 | } |
505 | |
506 | void setIsDead(bool Val = true) { |
507 | assert(isReg() && IsDef && "Wrong MachineOperand mutator")(static_cast <bool> (isReg() && IsDef && "Wrong MachineOperand mutator") ? void (0) : __assert_fail ( "isReg() && IsDef && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 507, __extension__ __PRETTY_FUNCTION__)); |
508 | IsDeadOrKill = Val; |
509 | } |
510 | |
511 | void setIsUndef(bool Val = true) { |
512 | assert(isReg() && "Wrong MachineOperand mutator")(static_cast <bool> (isReg() && "Wrong MachineOperand mutator" ) ? void (0) : __assert_fail ("isReg() && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 512, __extension__ __PRETTY_FUNCTION__)); |
513 | IsUndef = Val; |
514 | } |
515 | |
516 | void setIsRenamable(bool Val = true); |
517 | |
518 | void setIsInternalRead(bool Val = true) { |
519 | assert(isReg() && "Wrong MachineOperand mutator")(static_cast <bool> (isReg() && "Wrong MachineOperand mutator" ) ? void (0) : __assert_fail ("isReg() && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 519, __extension__ __PRETTY_FUNCTION__)); |
520 | IsInternalRead = Val; |
521 | } |
522 | |
523 | void setIsEarlyClobber(bool Val = true) { |
524 | assert(isReg() && IsDef && "Wrong MachineOperand mutator")(static_cast <bool> (isReg() && IsDef && "Wrong MachineOperand mutator") ? void (0) : __assert_fail ( "isReg() && IsDef && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 524, __extension__ __PRETTY_FUNCTION__)); |
525 | IsEarlyClobber = Val; |
526 | } |
527 | |
528 | void setIsDebug(bool Val = true) { |
529 | assert(isReg() && !IsDef && "Wrong MachineOperand mutator")(static_cast <bool> (isReg() && !IsDef && "Wrong MachineOperand mutator") ? void (0) : __assert_fail ( "isReg() && !IsDef && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 529, __extension__ __PRETTY_FUNCTION__)); |
530 | IsDebug = Val; |
531 | } |
532 | |
533 | //===--------------------------------------------------------------------===// |
534 | // Accessors for various operand types. |
535 | //===--------------------------------------------------------------------===// |
536 | |
537 | int64_t getImm() const { |
538 | assert(isImm() && "Wrong MachineOperand accessor")(static_cast <bool> (isImm() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isImm() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 538, __extension__ __PRETTY_FUNCTION__)); |
539 | return Contents.ImmVal; |
540 | } |
541 | |
542 | const ConstantInt *getCImm() const { |
543 | assert(isCImm() && "Wrong MachineOperand accessor")(static_cast <bool> (isCImm() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isCImm() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 543, __extension__ __PRETTY_FUNCTION__)); |
544 | return Contents.CI; |
545 | } |
546 | |
547 | const ConstantFP *getFPImm() const { |
548 | assert(isFPImm() && "Wrong MachineOperand accessor")(static_cast <bool> (isFPImm() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isFPImm() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 548, __extension__ __PRETTY_FUNCTION__)); |
549 | return Contents.CFP; |
550 | } |
551 | |
552 | MachineBasicBlock *getMBB() const { |
553 | assert(isMBB() && "Wrong MachineOperand accessor")(static_cast <bool> (isMBB() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isMBB() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 553, __extension__ __PRETTY_FUNCTION__)); |
554 | return Contents.MBB; |
555 | } |
556 | |
557 | int getIndex() const { |
558 | assert((isFI() || isCPI() || isTargetIndex() || isJTI()) &&(static_cast <bool> ((isFI() || isCPI() || isTargetIndex () || isJTI()) && "Wrong MachineOperand accessor") ? void (0) : __assert_fail ("(isFI() || isCPI() || isTargetIndex() || isJTI()) && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 559, __extension__ __PRETTY_FUNCTION__)) |
559 | "Wrong MachineOperand accessor")(static_cast <bool> ((isFI() || isCPI() || isTargetIndex () || isJTI()) && "Wrong MachineOperand accessor") ? void (0) : __assert_fail ("(isFI() || isCPI() || isTargetIndex() || isJTI()) && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 559, __extension__ __PRETTY_FUNCTION__)); |
560 | return Contents.OffsetedInfo.Val.Index; |
561 | } |
562 | |
563 | const GlobalValue *getGlobal() const { |
564 | assert(isGlobal() && "Wrong MachineOperand accessor")(static_cast <bool> (isGlobal() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isGlobal() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 564, __extension__ __PRETTY_FUNCTION__)); |
565 | return Contents.OffsetedInfo.Val.GV; |
566 | } |
567 | |
568 | const BlockAddress *getBlockAddress() const { |
569 | assert(isBlockAddress() && "Wrong MachineOperand accessor")(static_cast <bool> (isBlockAddress() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isBlockAddress() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 569, __extension__ __PRETTY_FUNCTION__)); |
570 | return Contents.OffsetedInfo.Val.BA; |
571 | } |
572 | |
573 | MCSymbol *getMCSymbol() const { |
574 | assert(isMCSymbol() && "Wrong MachineOperand accessor")(static_cast <bool> (isMCSymbol() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isMCSymbol() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 574, __extension__ __PRETTY_FUNCTION__)); |
575 | return Contents.Sym; |
576 | } |
577 | |
578 | unsigned getCFIIndex() const { |
579 | assert(isCFIIndex() && "Wrong MachineOperand accessor")(static_cast <bool> (isCFIIndex() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isCFIIndex() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 579, __extension__ __PRETTY_FUNCTION__)); |
580 | return Contents.CFIIndex; |
581 | } |
582 | |
583 | Intrinsic::ID getIntrinsicID() const { |
584 | assert(isIntrinsicID() && "Wrong MachineOperand accessor")(static_cast <bool> (isIntrinsicID() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isIntrinsicID() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 584, __extension__ __PRETTY_FUNCTION__)); |
585 | return Contents.IntrinsicID; |
586 | } |
587 | |
588 | unsigned getPredicate() const { |
589 | assert(isPredicate() && "Wrong MachineOperand accessor")(static_cast <bool> (isPredicate() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isPredicate() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 589, __extension__ __PRETTY_FUNCTION__)); |
590 | return Contents.Pred; |
591 | } |
592 | |
593 | ArrayRef<int> getShuffleMask() const { |
594 | assert(isShuffleMask() && "Wrong MachineOperand accessor")(static_cast <bool> (isShuffleMask() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isShuffleMask() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 594, __extension__ __PRETTY_FUNCTION__)); |
595 | return Contents.ShuffleMask; |
596 | } |
597 | |
598 | /// Return the offset from the symbol in this operand. This always returns 0 |
599 | /// for ExternalSymbol operands. |
600 | int64_t getOffset() const { |
601 | assert((isGlobal() || isSymbol() || isMCSymbol() || isCPI() ||(static_cast <bool> ((isGlobal() || isSymbol() || isMCSymbol () || isCPI() || isTargetIndex() || isBlockAddress()) && "Wrong MachineOperand accessor") ? void (0) : __assert_fail ( "(isGlobal() || isSymbol() || isMCSymbol() || isCPI() || isTargetIndex() || isBlockAddress()) && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 603, __extension__ __PRETTY_FUNCTION__)) |
602 | isTargetIndex() || isBlockAddress()) &&(static_cast <bool> ((isGlobal() || isSymbol() || isMCSymbol () || isCPI() || isTargetIndex() || isBlockAddress()) && "Wrong MachineOperand accessor") ? void (0) : __assert_fail ( "(isGlobal() || isSymbol() || isMCSymbol() || isCPI() || isTargetIndex() || isBlockAddress()) && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 603, __extension__ __PRETTY_FUNCTION__)) |
603 | "Wrong MachineOperand accessor")(static_cast <bool> ((isGlobal() || isSymbol() || isMCSymbol () || isCPI() || isTargetIndex() || isBlockAddress()) && "Wrong MachineOperand accessor") ? void (0) : __assert_fail ( "(isGlobal() || isSymbol() || isMCSymbol() || isCPI() || isTargetIndex() || isBlockAddress()) && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 603, __extension__ __PRETTY_FUNCTION__)); |
604 | return int64_t(uint64_t(Contents.OffsetedInfo.OffsetHi) << 32) | |
605 | SmallContents.OffsetLo; |
606 | } |
607 | |
608 | const char *getSymbolName() const { |
609 | assert(isSymbol() && "Wrong MachineOperand accessor")(static_cast <bool> (isSymbol() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isSymbol() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 609, __extension__ __PRETTY_FUNCTION__)); |
610 | return Contents.OffsetedInfo.Val.SymbolName; |
611 | } |
612 | |
613 | /// clobbersPhysReg - Returns true if this RegMask clobbers PhysReg. |
614 | /// It is sometimes necessary to detach the register mask pointer from its |
615 | /// machine operand. This static method can be used for such detached bit |
616 | /// mask pointers. |
617 | static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg) { |
618 | // See TargetRegisterInfo.h. |
619 | assert(PhysReg < (1u << 30) && "Not a physical register")(static_cast <bool> (PhysReg < (1u << 30) && "Not a physical register") ? void (0) : __assert_fail ("PhysReg < (1u << 30) && \"Not a physical register\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 619, __extension__ __PRETTY_FUNCTION__)); |
620 | return !(RegMask[PhysReg / 32] & (1u << PhysReg % 32)); |
621 | } |
622 | |
623 | /// clobbersPhysReg - Returns true if this RegMask operand clobbers PhysReg. |
624 | bool clobbersPhysReg(MCRegister PhysReg) const { |
625 | return clobbersPhysReg(getRegMask(), PhysReg); |
626 | } |
627 | |
628 | /// getRegMask - Returns a bit mask of registers preserved by this RegMask |
629 | /// operand. |
630 | const uint32_t *getRegMask() const { |
631 | assert(isRegMask() && "Wrong MachineOperand accessor")(static_cast <bool> (isRegMask() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isRegMask() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 631, __extension__ __PRETTY_FUNCTION__)); |
632 | return Contents.RegMask; |
633 | } |
634 | |
635 | /// Returns number of elements needed for a regmask array. |
636 | static unsigned getRegMaskSize(unsigned NumRegs) { |
637 | return (NumRegs + 31) / 32; |
638 | } |
639 | |
640 | /// getRegLiveOut - Returns a bit mask of live-out registers. |
641 | const uint32_t *getRegLiveOut() const { |
642 | assert(isRegLiveOut() && "Wrong MachineOperand accessor")(static_cast <bool> (isRegLiveOut() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isRegLiveOut() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 642, __extension__ __PRETTY_FUNCTION__)); |
643 | return Contents.RegMask; |
644 | } |
645 | |
646 | const MDNode *getMetadata() const { |
647 | assert(isMetadata() && "Wrong MachineOperand accessor")(static_cast <bool> (isMetadata() && "Wrong MachineOperand accessor" ) ? void (0) : __assert_fail ("isMetadata() && \"Wrong MachineOperand accessor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 647, __extension__ __PRETTY_FUNCTION__)); |
648 | return Contents.MD; |
649 | } |
650 | |
651 | //===--------------------------------------------------------------------===// |
652 | // Mutators for various operand types. |
653 | //===--------------------------------------------------------------------===// |
654 | |
655 | void setImm(int64_t immVal) { |
656 | assert(isImm() && "Wrong MachineOperand mutator")(static_cast <bool> (isImm() && "Wrong MachineOperand mutator" ) ? void (0) : __assert_fail ("isImm() && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 656, __extension__ __PRETTY_FUNCTION__)); |
657 | Contents.ImmVal = immVal; |
658 | } |
659 | |
660 | void setCImm(const ConstantInt *CI) { |
661 | assert(isCImm() && "Wrong MachineOperand mutator")(static_cast <bool> (isCImm() && "Wrong MachineOperand mutator" ) ? void (0) : __assert_fail ("isCImm() && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 661, __extension__ __PRETTY_FUNCTION__)); |
662 | Contents.CI = CI; |
663 | } |
664 | |
665 | void setFPImm(const ConstantFP *CFP) { |
666 | assert(isFPImm() && "Wrong MachineOperand mutator")(static_cast <bool> (isFPImm() && "Wrong MachineOperand mutator" ) ? void (0) : __assert_fail ("isFPImm() && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 666, __extension__ __PRETTY_FUNCTION__)); |
667 | Contents.CFP = CFP; |
668 | } |
669 | |
670 | void setOffset(int64_t Offset) { |
671 | assert((isGlobal() || isSymbol() || isMCSymbol() || isCPI() ||(static_cast <bool> ((isGlobal() || isSymbol() || isMCSymbol () || isCPI() || isTargetIndex() || isBlockAddress()) && "Wrong MachineOperand mutator") ? void (0) : __assert_fail ( "(isGlobal() || isSymbol() || isMCSymbol() || isCPI() || isTargetIndex() || isBlockAddress()) && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 673, __extension__ __PRETTY_FUNCTION__)) |
672 | isTargetIndex() || isBlockAddress()) &&(static_cast <bool> ((isGlobal() || isSymbol() || isMCSymbol () || isCPI() || isTargetIndex() || isBlockAddress()) && "Wrong MachineOperand mutator") ? void (0) : __assert_fail ( "(isGlobal() || isSymbol() || isMCSymbol() || isCPI() || isTargetIndex() || isBlockAddress()) && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 673, __extension__ __PRETTY_FUNCTION__)) |
673 | "Wrong MachineOperand mutator")(static_cast <bool> ((isGlobal() || isSymbol() || isMCSymbol () || isCPI() || isTargetIndex() || isBlockAddress()) && "Wrong MachineOperand mutator") ? void (0) : __assert_fail ( "(isGlobal() || isSymbol() || isMCSymbol() || isCPI() || isTargetIndex() || isBlockAddress()) && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 673, __extension__ __PRETTY_FUNCTION__)); |
674 | SmallContents.OffsetLo = unsigned(Offset); |
675 | Contents.OffsetedInfo.OffsetHi = int(Offset >> 32); |
676 | } |
677 | |
678 | void setIndex(int Idx) { |
679 | assert((isFI() || isCPI() || isTargetIndex() || isJTI()) &&(static_cast <bool> ((isFI() || isCPI() || isTargetIndex () || isJTI()) && "Wrong MachineOperand mutator") ? void (0) : __assert_fail ("(isFI() || isCPI() || isTargetIndex() || isJTI()) && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 680, __extension__ __PRETTY_FUNCTION__)) |
680 | "Wrong MachineOperand mutator")(static_cast <bool> ((isFI() || isCPI() || isTargetIndex () || isJTI()) && "Wrong MachineOperand mutator") ? void (0) : __assert_fail ("(isFI() || isCPI() || isTargetIndex() || isJTI()) && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 680, __extension__ __PRETTY_FUNCTION__)); |
681 | Contents.OffsetedInfo.Val.Index = Idx; |
682 | } |
683 | |
684 | void setMetadata(const MDNode *MD) { |
685 | assert(isMetadata() && "Wrong MachineOperand mutator")(static_cast <bool> (isMetadata() && "Wrong MachineOperand mutator" ) ? void (0) : __assert_fail ("isMetadata() && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 685, __extension__ __PRETTY_FUNCTION__)); |
686 | Contents.MD = MD; |
687 | } |
688 | |
689 | void setMBB(MachineBasicBlock *MBB) { |
690 | assert(isMBB() && "Wrong MachineOperand mutator")(static_cast <bool> (isMBB() && "Wrong MachineOperand mutator" ) ? void (0) : __assert_fail ("isMBB() && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 690, __extension__ __PRETTY_FUNCTION__)); |
691 | Contents.MBB = MBB; |
692 | } |
693 | |
694 | /// Sets value of register mask operand referencing Mask. The |
695 | /// operand does not take ownership of the memory referenced by Mask, it must |
696 | /// remain valid for the lifetime of the operand. See CreateRegMask(). |
697 | /// Any physreg with a 0 bit in the mask is clobbered by the instruction. |
698 | void setRegMask(const uint32_t *RegMaskPtr) { |
699 | assert(isRegMask() && "Wrong MachineOperand mutator")(static_cast <bool> (isRegMask() && "Wrong MachineOperand mutator" ) ? void (0) : __assert_fail ("isRegMask() && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 699, __extension__ __PRETTY_FUNCTION__)); |
700 | Contents.RegMask = RegMaskPtr; |
701 | } |
702 | |
703 | void setIntrinsicID(Intrinsic::ID IID) { |
704 | assert(isIntrinsicID() && "Wrong MachineOperand mutator")(static_cast <bool> (isIntrinsicID() && "Wrong MachineOperand mutator" ) ? void (0) : __assert_fail ("isIntrinsicID() && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 704, __extension__ __PRETTY_FUNCTION__)); |
705 | Contents.IntrinsicID = IID; |
706 | } |
707 | |
708 | void setPredicate(unsigned Predicate) { |
709 | assert(isPredicate() && "Wrong MachineOperand mutator")(static_cast <bool> (isPredicate() && "Wrong MachineOperand mutator" ) ? void (0) : __assert_fail ("isPredicate() && \"Wrong MachineOperand mutator\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 709, __extension__ __PRETTY_FUNCTION__)); |
710 | Contents.Pred = Predicate; |
711 | } |
712 | |
713 | //===--------------------------------------------------------------------===// |
714 | // Other methods. |
715 | //===--------------------------------------------------------------------===// |
716 | |
717 | /// Returns true if this operand is identical to the specified operand except |
718 | /// for liveness related flags (isKill, isUndef and isDead). Note that this |
719 | /// should stay in sync with the hash_value overload below. |
720 | bool isIdenticalTo(const MachineOperand &Other) const; |
721 | |
722 | /// MachineOperand hash_value overload. |
723 | /// |
724 | /// Note that this includes the same information in the hash that |
725 | /// isIdenticalTo uses for comparison. It is thus suited for use in hash |
726 | /// tables which use that function for equality comparisons only. This must |
727 | /// stay exactly in sync with isIdenticalTo above. |
728 | friend hash_code hash_value(const MachineOperand &MO); |
729 | |
730 | /// ChangeToImmediate - Replace this operand with a new immediate operand of |
731 | /// the specified value. If an operand is known to be an immediate already, |
732 | /// the setImm method should be used. |
733 | void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags = 0); |
734 | |
735 | /// ChangeToFPImmediate - Replace this operand with a new FP immediate operand |
736 | /// of the specified value. If an operand is known to be an FP immediate |
737 | /// already, the setFPImm method should be used. |
738 | void ChangeToFPImmediate(const ConstantFP *FPImm, unsigned TargetFlags = 0); |
739 | |
740 | /// ChangeToES - Replace this operand with a new external symbol operand. |
741 | void ChangeToES(const char *SymName, unsigned TargetFlags = 0); |
742 | |
743 | /// ChangeToGA - Replace this operand with a new global address operand. |
744 | void ChangeToGA(const GlobalValue *GV, int64_t Offset, |
745 | unsigned TargetFlags = 0); |
746 | |
747 | /// ChangeToMCSymbol - Replace this operand with a new MC symbol operand. |
748 | void ChangeToMCSymbol(MCSymbol *Sym, unsigned TargetFlags = 0); |
749 | |
750 | /// Replace this operand with a frame index. |
751 | void ChangeToFrameIndex(int Idx, unsigned TargetFlags = 0); |
752 | |
753 | /// Replace this operand with a target index. |
754 | void ChangeToTargetIndex(unsigned Idx, int64_t Offset, |
755 | unsigned TargetFlags = 0); |
756 | |
757 | /// ChangeToRegister - Replace this operand with a new register operand of |
758 | /// the specified value. If an operand is known to be an register already, |
759 | /// the setReg method should be used. |
760 | void ChangeToRegister(Register Reg, bool isDef, bool isImp = false, |
761 | bool isKill = false, bool isDead = false, |
762 | bool isUndef = false, bool isDebug = false); |
763 | |
764 | /// getTargetIndexName - If this MachineOperand is a TargetIndex that has a |
765 | /// name, attempt to get the name. Returns nullptr if the TargetIndex does not |
766 | /// have a name. Asserts if MO is not a TargetIndex. |
767 | const char *getTargetIndexName() const; |
768 | |
769 | //===--------------------------------------------------------------------===// |
770 | // Construction methods. |
771 | //===--------------------------------------------------------------------===// |
772 | |
773 | static MachineOperand CreateImm(int64_t Val) { |
774 | MachineOperand Op(MachineOperand::MO_Immediate); |
775 | Op.setImm(Val); |
776 | return Op; |
777 | } |
778 | |
779 | static MachineOperand CreateCImm(const ConstantInt *CI) { |
780 | MachineOperand Op(MachineOperand::MO_CImmediate); |
781 | Op.Contents.CI = CI; |
782 | return Op; |
783 | } |
784 | |
785 | static MachineOperand CreateFPImm(const ConstantFP *CFP) { |
786 | MachineOperand Op(MachineOperand::MO_FPImmediate); |
787 | Op.Contents.CFP = CFP; |
788 | return Op; |
789 | } |
790 | |
791 | static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp = false, |
792 | bool isKill = false, bool isDead = false, |
793 | bool isUndef = false, |
794 | bool isEarlyClobber = false, |
795 | unsigned SubReg = 0, bool isDebug = false, |
796 | bool isInternalRead = false, |
797 | bool isRenamable = false) { |
798 | assert(!(isDead && !isDef) && "Dead flag on non-def")(static_cast <bool> (!(isDead && !isDef) && "Dead flag on non-def") ? void (0) : __assert_fail ("!(isDead && !isDef) && \"Dead flag on non-def\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 798, __extension__ __PRETTY_FUNCTION__)); |
799 | assert(!(isKill && isDef) && "Kill flag on def")(static_cast <bool> (!(isKill && isDef) && "Kill flag on def") ? void (0) : __assert_fail ("!(isKill && isDef) && \"Kill flag on def\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 799, __extension__ __PRETTY_FUNCTION__)); |
800 | MachineOperand Op(MachineOperand::MO_Register); |
801 | Op.IsDef = isDef; |
802 | Op.IsImp = isImp; |
803 | Op.IsDeadOrKill = isKill | isDead; |
804 | Op.IsRenamable = isRenamable; |
805 | Op.IsUndef = isUndef; |
806 | Op.IsInternalRead = isInternalRead; |
807 | Op.IsEarlyClobber = isEarlyClobber; |
808 | Op.TiedTo = 0; |
809 | Op.IsDebug = isDebug; |
810 | Op.SmallContents.RegNo = Reg; |
811 | Op.Contents.Reg.Prev = nullptr; |
812 | Op.Contents.Reg.Next = nullptr; |
813 | Op.setSubReg(SubReg); |
814 | return Op; |
815 | } |
816 | static MachineOperand CreateMBB(MachineBasicBlock *MBB, |
817 | unsigned TargetFlags = 0) { |
818 | MachineOperand Op(MachineOperand::MO_MachineBasicBlock); |
819 | Op.setMBB(MBB); |
820 | Op.setTargetFlags(TargetFlags); |
821 | return Op; |
822 | } |
823 | static MachineOperand CreateFI(int Idx) { |
824 | MachineOperand Op(MachineOperand::MO_FrameIndex); |
825 | Op.setIndex(Idx); |
826 | return Op; |
827 | } |
828 | static MachineOperand CreateCPI(unsigned Idx, int Offset, |
829 | unsigned TargetFlags = 0) { |
830 | MachineOperand Op(MachineOperand::MO_ConstantPoolIndex); |
831 | Op.setIndex(Idx); |
832 | Op.setOffset(Offset); |
833 | Op.setTargetFlags(TargetFlags); |
834 | return Op; |
835 | } |
836 | static MachineOperand CreateTargetIndex(unsigned Idx, int64_t Offset, |
837 | unsigned TargetFlags = 0) { |
838 | MachineOperand Op(MachineOperand::MO_TargetIndex); |
839 | Op.setIndex(Idx); |
840 | Op.setOffset(Offset); |
841 | Op.setTargetFlags(TargetFlags); |
842 | return Op; |
843 | } |
844 | static MachineOperand CreateJTI(unsigned Idx, unsigned TargetFlags = 0) { |
845 | MachineOperand Op(MachineOperand::MO_JumpTableIndex); |
846 | Op.setIndex(Idx); |
847 | Op.setTargetFlags(TargetFlags); |
848 | return Op; |
849 | } |
850 | static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, |
851 | unsigned TargetFlags = 0) { |
852 | MachineOperand Op(MachineOperand::MO_GlobalAddress); |
853 | Op.Contents.OffsetedInfo.Val.GV = GV; |
854 | Op.setOffset(Offset); |
855 | Op.setTargetFlags(TargetFlags); |
856 | return Op; |
857 | } |
858 | static MachineOperand CreateES(const char *SymName, |
859 | unsigned TargetFlags = 0) { |
860 | MachineOperand Op(MachineOperand::MO_ExternalSymbol); |
861 | Op.Contents.OffsetedInfo.Val.SymbolName = SymName; |
862 | Op.setOffset(0); // Offset is always 0. |
863 | Op.setTargetFlags(TargetFlags); |
864 | return Op; |
865 | } |
866 | static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, |
867 | unsigned TargetFlags = 0) { |
868 | MachineOperand Op(MachineOperand::MO_BlockAddress); |
869 | Op.Contents.OffsetedInfo.Val.BA = BA; |
870 | Op.setOffset(Offset); |
871 | Op.setTargetFlags(TargetFlags); |
872 | return Op; |
873 | } |
874 | /// CreateRegMask - Creates a register mask operand referencing Mask. The |
875 | /// operand does not take ownership of the memory referenced by Mask, it |
876 | /// must remain valid for the lifetime of the operand. |
877 | /// |
878 | /// A RegMask operand represents a set of non-clobbered physical registers |
879 | /// on an instruction that clobbers many registers, typically a call. The |
880 | /// bit mask has a bit set for each physreg that is preserved by this |
881 | /// instruction, as described in the documentation for |
882 | /// TargetRegisterInfo::getCallPreservedMask(). |
883 | /// |
884 | /// Any physreg with a 0 bit in the mask is clobbered by the instruction. |
885 | /// |
886 | static MachineOperand CreateRegMask(const uint32_t *Mask) { |
887 | assert(Mask && "Missing register mask")(static_cast <bool> (Mask && "Missing register mask" ) ? void (0) : __assert_fail ("Mask && \"Missing register mask\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 887, __extension__ __PRETTY_FUNCTION__)); |
888 | MachineOperand Op(MachineOperand::MO_RegisterMask); |
889 | Op.Contents.RegMask = Mask; |
890 | return Op; |
891 | } |
892 | static MachineOperand CreateRegLiveOut(const uint32_t *Mask) { |
893 | assert(Mask && "Missing live-out register mask")(static_cast <bool> (Mask && "Missing live-out register mask" ) ? void (0) : __assert_fail ("Mask && \"Missing live-out register mask\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 893, __extension__ __PRETTY_FUNCTION__)); |
894 | MachineOperand Op(MachineOperand::MO_RegisterLiveOut); |
895 | Op.Contents.RegMask = Mask; |
896 | return Op; |
897 | } |
898 | static MachineOperand CreateMetadata(const MDNode *Meta) { |
899 | MachineOperand Op(MachineOperand::MO_Metadata); |
900 | Op.Contents.MD = Meta; |
901 | return Op; |
902 | } |
903 | |
904 | static MachineOperand CreateMCSymbol(MCSymbol *Sym, |
905 | unsigned TargetFlags = 0) { |
906 | MachineOperand Op(MachineOperand::MO_MCSymbol); |
907 | Op.Contents.Sym = Sym; |
908 | Op.setOffset(0); |
909 | Op.setTargetFlags(TargetFlags); |
910 | return Op; |
911 | } |
912 | |
913 | static MachineOperand CreateCFIIndex(unsigned CFIIndex) { |
914 | MachineOperand Op(MachineOperand::MO_CFIIndex); |
915 | Op.Contents.CFIIndex = CFIIndex; |
916 | return Op; |
917 | } |
918 | |
919 | static MachineOperand CreateIntrinsicID(Intrinsic::ID ID) { |
920 | MachineOperand Op(MachineOperand::MO_IntrinsicID); |
921 | Op.Contents.IntrinsicID = ID; |
922 | return Op; |
923 | } |
924 | |
925 | static MachineOperand CreatePredicate(unsigned Pred) { |
926 | MachineOperand Op(MachineOperand::MO_Predicate); |
927 | Op.Contents.Pred = Pred; |
928 | return Op; |
929 | } |
930 | |
931 | static MachineOperand CreateShuffleMask(ArrayRef<int> Mask) { |
932 | MachineOperand Op(MachineOperand::MO_ShuffleMask); |
933 | Op.Contents.ShuffleMask = Mask; |
934 | return Op; |
935 | } |
936 | |
937 | friend class MachineInstr; |
938 | friend class MachineRegisterInfo; |
939 | |
940 | private: |
941 | // If this operand is currently a register operand, and if this is in a |
942 | // function, deregister the operand from the register's use/def list. |
943 | void removeRegFromUses(); |
944 | |
945 | /// Artificial kinds for DenseMap usage. |
946 | enum : unsigned char { |
947 | MO_Empty = MO_Last + 1, |
948 | MO_Tombstone, |
949 | }; |
950 | |
951 | friend struct DenseMapInfo<MachineOperand>; |
952 | |
953 | //===--------------------------------------------------------------------===// |
954 | // Methods for handling register use/def lists. |
955 | //===--------------------------------------------------------------------===// |
956 | |
957 | /// isOnRegUseList - Return true if this operand is on a register use/def |
958 | /// list or false if not. This can only be called for register operands |
959 | /// that are part of a machine instruction. |
960 | bool isOnRegUseList() const { |
961 | assert(isReg() && "Can only add reg operand to use lists")(static_cast <bool> (isReg() && "Can only add reg operand to use lists" ) ? void (0) : __assert_fail ("isReg() && \"Can only add reg operand to use lists\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/MachineOperand.h" , 961, __extension__ __PRETTY_FUNCTION__)); |
962 | return Contents.Reg.Prev != nullptr; |
963 | } |
964 | }; |
965 | |
966 | template <> struct DenseMapInfo<MachineOperand> { |
967 | static MachineOperand getEmptyKey() { |
968 | return MachineOperand(static_cast<MachineOperand::MachineOperandType>( |
969 | MachineOperand::MO_Empty)); |
970 | } |
971 | static MachineOperand getTombstoneKey() { |
972 | return MachineOperand(static_cast<MachineOperand::MachineOperandType>( |
973 | MachineOperand::MO_Tombstone)); |
974 | } |
975 | static unsigned getHashValue(const MachineOperand &MO) { |
976 | return hash_value(MO); |
977 | } |
978 | static bool isEqual(const MachineOperand &LHS, const MachineOperand &RHS) { |
979 | if (LHS.getType() == static_cast<MachineOperand::MachineOperandType>( |
980 | MachineOperand::MO_Empty) || |
981 | LHS.getType() == static_cast<MachineOperand::MachineOperandType>( |
982 | MachineOperand::MO_Tombstone)) |
983 | return LHS.getType() == RHS.getType(); |
984 | return LHS.isIdenticalTo(RHS); |
985 | } |
986 | }; |
987 | |
988 | inline raw_ostream &operator<<(raw_ostream &OS, const MachineOperand &MO) { |
989 | MO.print(OS); |
990 | return OS; |
991 | } |
992 | |
993 | // See friend declaration above. This additional declaration is required in |
994 | // order to compile LLVM with IBM xlC compiler. |
995 | hash_code hash_value(const MachineOperand &MO); |
996 | } // namespace llvm |
997 | |
998 | #endif |
1 | //===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains some functions that are useful for math stuff. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_SUPPORT_MATHEXTRAS_H |
14 | #define LLVM_SUPPORT_MATHEXTRAS_H |
15 | |
16 | #include "llvm/Support/Compiler.h" |
17 | #include <cassert> |
18 | #include <climits> |
19 | #include <cmath> |
20 | #include <cstdint> |
21 | #include <cstring> |
22 | #include <limits> |
23 | #include <type_traits> |
24 | |
25 | #ifdef __ANDROID_NDK__ |
26 | #include <android/api-level.h> |
27 | #endif |
28 | |
29 | #ifdef _MSC_VER |
30 | // Declare these intrinsics manually rather including intrin.h. It's very |
31 | // expensive, and MathExtras.h is popular. |
32 | // #include <intrin.h> |
33 | extern "C" { |
34 | unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); |
35 | unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); |
36 | unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); |
37 | unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); |
38 | } |
39 | #endif |
40 | |
41 | namespace llvm { |
42 | |
43 | /// The behavior an operation has on an input of 0. |
44 | enum ZeroBehavior { |
45 | /// The returned value is undefined. |
46 | ZB_Undefined, |
47 | /// The returned value is numeric_limits<T>::max() |
48 | ZB_Max, |
49 | /// The returned value is numeric_limits<T>::digits |
50 | ZB_Width |
51 | }; |
52 | |
53 | /// Mathematical constants. |
54 | namespace numbers { |
55 | // TODO: Track C++20 std::numbers. |
56 | // TODO: Favor using the hexadecimal FP constants (requires C++17). |
57 | constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113 |
58 | egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620 |
59 | ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162 |
60 | ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392 |
61 | log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0) |
62 | log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2) |
63 | pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796 |
64 | inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541 |
65 | sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161 |
66 | inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197 |
67 | sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219 |
68 | inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1) |
69 | sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194 |
70 | inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1) |
71 | phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622 |
72 | constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113 |
73 | egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620 |
74 | ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162 |
75 | ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392 |
76 | log2ef = 1.44269504F, // (0x1.715476P+0) |
77 | log10ef = .434294482F, // (0x1.bcb7b2P-2) |
78 | pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796 |
79 | inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541 |
80 | sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161 |
81 | inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197 |
82 | sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193 |
83 | inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1) |
84 | sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194 |
85 | inv_sqrt3f = .577350269F, // (0x1.279a74P-1) |
86 | phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622 |
87 | } // namespace numbers |
88 | |
89 | namespace detail { |
90 | template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter { |
91 | static unsigned count(T Val, ZeroBehavior) { |
92 | if (!Val) |
93 | return std::numeric_limits<T>::digits; |
94 | if (Val & 0x1) |
95 | return 0; |
96 | |
97 | // Bisection method. |
98 | unsigned ZeroBits = 0; |
99 | T Shift = std::numeric_limits<T>::digits >> 1; |
100 | T Mask = std::numeric_limits<T>::max() >> Shift; |
101 | while (Shift) { |
102 | if ((Val & Mask) == 0) { |
103 | Val >>= Shift; |
104 | ZeroBits |= Shift; |
105 | } |
106 | Shift >>= 1; |
107 | Mask >>= Shift; |
108 | } |
109 | return ZeroBits; |
110 | } |
111 | }; |
112 | |
113 | #if defined(__GNUC__4) || defined(_MSC_VER) |
114 | template <typename T> struct TrailingZerosCounter<T, 4> { |
115 | static unsigned count(T Val, ZeroBehavior ZB) { |
116 | if (ZB != ZB_Undefined && Val == 0) |
117 | return 32; |
118 | |
119 | #if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4) |
120 | return __builtin_ctz(Val); |
121 | #elif defined(_MSC_VER) |
122 | unsigned long Index; |
123 | _BitScanForward(&Index, Val); |
124 | return Index; |
125 | #endif |
126 | } |
127 | }; |
128 | |
129 | #if !defined(_MSC_VER) || defined(_M_X64) |
130 | template <typename T> struct TrailingZerosCounter<T, 8> { |
131 | static unsigned count(T Val, ZeroBehavior ZB) { |
132 | if (ZB != ZB_Undefined && Val == 0) |
133 | return 64; |
134 | |
135 | #if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4) |
136 | return __builtin_ctzll(Val); |
137 | #elif defined(_MSC_VER) |
138 | unsigned long Index; |
139 | _BitScanForward64(&Index, Val); |
140 | return Index; |
141 | #endif |
142 | } |
143 | }; |
144 | #endif |
145 | #endif |
146 | } // namespace detail |
147 | |
148 | /// Count number of 0's from the least significant bit to the most |
149 | /// stopping at the first 1. |
150 | /// |
151 | /// Only unsigned integral types are allowed. |
152 | /// |
153 | /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are |
154 | /// valid arguments. |
155 | template <typename T> |
156 | unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { |
157 | static_assert(std::numeric_limits<T>::is_integer && |
158 | !std::numeric_limits<T>::is_signed, |
159 | "Only unsigned integral types are allowed."); |
160 | return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB); |
161 | } |
162 | |
163 | namespace detail { |
164 | template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter { |
165 | static unsigned count(T Val, ZeroBehavior) { |
166 | if (!Val) |
167 | return std::numeric_limits<T>::digits; |
168 | |
169 | // Bisection method. |
170 | unsigned ZeroBits = 0; |
171 | for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) { |
172 | T Tmp = Val >> Shift; |
173 | if (Tmp) |
174 | Val = Tmp; |
175 | else |
176 | ZeroBits |= Shift; |
177 | } |
178 | return ZeroBits; |
179 | } |
180 | }; |
181 | |
182 | #if defined(__GNUC__4) || defined(_MSC_VER) |
183 | template <typename T> struct LeadingZerosCounter<T, 4> { |
184 | static unsigned count(T Val, ZeroBehavior ZB) { |
185 | if (ZB != ZB_Undefined && Val == 0) |
186 | return 32; |
187 | |
188 | #if __has_builtin(__builtin_clz)1 || defined(__GNUC__4) |
189 | return __builtin_clz(Val); |
190 | #elif defined(_MSC_VER) |
191 | unsigned long Index; |
192 | _BitScanReverse(&Index, Val); |
193 | return Index ^ 31; |
194 | #endif |
195 | } |
196 | }; |
197 | |
198 | #if !defined(_MSC_VER) || defined(_M_X64) |
199 | template <typename T> struct LeadingZerosCounter<T, 8> { |
200 | static unsigned count(T Val, ZeroBehavior ZB) { |
201 | if (ZB != ZB_Undefined && Val == 0) |
202 | return 64; |
203 | |
204 | #if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4) |
205 | return __builtin_clzll(Val); |
206 | #elif defined(_MSC_VER) |
207 | unsigned long Index; |
208 | _BitScanReverse64(&Index, Val); |
209 | return Index ^ 63; |
210 | #endif |
211 | } |
212 | }; |
213 | #endif |
214 | #endif |
215 | } // namespace detail |
216 | |
217 | /// Count number of 0's from the most significant bit to the least |
218 | /// stopping at the first 1. |
219 | /// |
220 | /// Only unsigned integral types are allowed. |
221 | /// |
222 | /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are |
223 | /// valid arguments. |
224 | template <typename T> |
225 | unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) { |
226 | static_assert(std::numeric_limits<T>::is_integer && |
227 | !std::numeric_limits<T>::is_signed, |
228 | "Only unsigned integral types are allowed."); |
229 | return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB); |
230 | } |
231 | |
232 | /// Get the index of the first set bit starting from the least |
233 | /// significant bit. |
234 | /// |
235 | /// Only unsigned integral types are allowed. |
236 | /// |
237 | /// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are |
238 | /// valid arguments. |
239 | template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) { |
240 | if (ZB == ZB_Max && Val == 0) |
241 | return std::numeric_limits<T>::max(); |
242 | |
243 | return countTrailingZeros(Val, ZB_Undefined); |
244 | } |
245 | |
246 | /// Create a bitmask with the N right-most bits set to 1, and all other |
247 | /// bits set to 0. Only unsigned types are allowed. |
248 | template <typename T> T maskTrailingOnes(unsigned N) { |
249 | static_assert(std::is_unsigned<T>::value, "Invalid type!"); |
250 | const unsigned Bits = CHAR_BIT8 * sizeof(T); |
251 | assert(N <= Bits && "Invalid bit index")(static_cast <bool> (N <= Bits && "Invalid bit index" ) ? void (0) : __assert_fail ("N <= Bits && \"Invalid bit index\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 251, __extension__ __PRETTY_FUNCTION__)); |
252 | return N == 0 ? 0 : (T(-1) >> (Bits - N)); |
253 | } |
254 | |
255 | /// Create a bitmask with the N left-most bits set to 1, and all other |
256 | /// bits set to 0. Only unsigned types are allowed. |
257 | template <typename T> T maskLeadingOnes(unsigned N) { |
258 | return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N); |
259 | } |
260 | |
261 | /// Create a bitmask with the N right-most bits set to 0, and all other |
262 | /// bits set to 1. Only unsigned types are allowed. |
263 | template <typename T> T maskTrailingZeros(unsigned N) { |
264 | return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N); |
265 | } |
266 | |
267 | /// Create a bitmask with the N left-most bits set to 0, and all other |
268 | /// bits set to 1. Only unsigned types are allowed. |
269 | template <typename T> T maskLeadingZeros(unsigned N) { |
270 | return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N); |
271 | } |
272 | |
273 | /// Get the index of the last set bit starting from the least |
274 | /// significant bit. |
275 | /// |
276 | /// Only unsigned integral types are allowed. |
277 | /// |
278 | /// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are |
279 | /// valid arguments. |
280 | template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) { |
281 | if (ZB == ZB_Max && Val == 0) |
282 | return std::numeric_limits<T>::max(); |
283 | |
284 | // Use ^ instead of - because both gcc and llvm can remove the associated ^ |
285 | // in the __builtin_clz intrinsic on x86. |
286 | return countLeadingZeros(Val, ZB_Undefined) ^ |
287 | (std::numeric_limits<T>::digits - 1); |
288 | } |
289 | |
290 | /// Macro compressed bit reversal table for 256 bits. |
291 | /// |
292 | /// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable |
293 | static const unsigned char BitReverseTable256[256] = { |
294 | #define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64 |
295 | #define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16) |
296 | #define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4) |
297 | R6(0), R6(2), R6(1), R6(3) |
298 | #undef R2 |
299 | #undef R4 |
300 | #undef R6 |
301 | }; |
302 | |
303 | /// Reverse the bits in \p Val. |
304 | template <typename T> |
305 | T reverseBits(T Val) { |
306 | unsigned char in[sizeof(Val)]; |
307 | unsigned char out[sizeof(Val)]; |
308 | std::memcpy(in, &Val, sizeof(Val)); |
309 | for (unsigned i = 0; i < sizeof(Val); ++i) |
310 | out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]]; |
311 | std::memcpy(&Val, out, sizeof(Val)); |
312 | return Val; |
313 | } |
314 | |
315 | #if __has_builtin(__builtin_bitreverse8)1 |
316 | template<> |
317 | inline uint8_t reverseBits<uint8_t>(uint8_t Val) { |
318 | return __builtin_bitreverse8(Val); |
319 | } |
320 | #endif |
321 | |
322 | #if __has_builtin(__builtin_bitreverse16)1 |
323 | template<> |
324 | inline uint16_t reverseBits<uint16_t>(uint16_t Val) { |
325 | return __builtin_bitreverse16(Val); |
326 | } |
327 | #endif |
328 | |
329 | #if __has_builtin(__builtin_bitreverse32)1 |
330 | template<> |
331 | inline uint32_t reverseBits<uint32_t>(uint32_t Val) { |
332 | return __builtin_bitreverse32(Val); |
333 | } |
334 | #endif |
335 | |
336 | #if __has_builtin(__builtin_bitreverse64)1 |
337 | template<> |
338 | inline uint64_t reverseBits<uint64_t>(uint64_t Val) { |
339 | return __builtin_bitreverse64(Val); |
340 | } |
341 | #endif |
342 | |
343 | // NOTE: The following support functions use the _32/_64 extensions instead of |
344 | // type overloading so that signed and unsigned integers can be used without |
345 | // ambiguity. |
346 | |
347 | /// Return the high 32 bits of a 64 bit value. |
348 | constexpr inline uint32_t Hi_32(uint64_t Value) { |
349 | return static_cast<uint32_t>(Value >> 32); |
350 | } |
351 | |
352 | /// Return the low 32 bits of a 64 bit value. |
353 | constexpr inline uint32_t Lo_32(uint64_t Value) { |
354 | return static_cast<uint32_t>(Value); |
355 | } |
356 | |
357 | /// Make a 64-bit integer from a high / low pair of 32-bit integers. |
358 | constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) { |
359 | return ((uint64_t)High << 32) | (uint64_t)Low; |
360 | } |
361 | |
362 | /// Checks if an integer fits into the given bit width. |
363 | template <unsigned N> constexpr inline bool isInt(int64_t x) { |
364 | return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1))); |
365 | } |
366 | // Template specializations to get better code for common cases. |
367 | template <> constexpr inline bool isInt<8>(int64_t x) { |
368 | return static_cast<int8_t>(x) == x; |
369 | } |
370 | template <> constexpr inline bool isInt<16>(int64_t x) { |
371 | return static_cast<int16_t>(x) == x; |
372 | } |
373 | template <> constexpr inline bool isInt<32>(int64_t x) { |
374 | return static_cast<int32_t>(x) == x; |
375 | } |
376 | |
377 | /// Checks if a signed integer is an N bit number shifted left by S. |
378 | template <unsigned N, unsigned S> |
379 | constexpr inline bool isShiftedInt(int64_t x) { |
380 | static_assert( |
381 | N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number."); |
382 | static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide."); |
383 | return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0); |
384 | } |
385 | |
386 | /// Checks if an unsigned integer fits into the given bit width. |
387 | /// |
388 | /// This is written as two functions rather than as simply |
389 | /// |
390 | /// return N >= 64 || X < (UINT64_C(1) << N); |
391 | /// |
392 | /// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting |
393 | /// left too many places. |
394 | template <unsigned N> |
395 | constexpr inline std::enable_if_t<(N < 64), bool> isUInt(uint64_t X) { |
396 | static_assert(N > 0, "isUInt<0> doesn't make sense"); |
397 | return X < (UINT64_C(1)1UL << (N)); |
398 | } |
399 | template <unsigned N> |
400 | constexpr inline std::enable_if_t<N >= 64, bool> isUInt(uint64_t) { |
401 | return true; |
402 | } |
403 | |
404 | // Template specializations to get better code for common cases. |
405 | template <> constexpr inline bool isUInt<8>(uint64_t x) { |
406 | return static_cast<uint8_t>(x) == x; |
407 | } |
408 | template <> constexpr inline bool isUInt<16>(uint64_t x) { |
409 | return static_cast<uint16_t>(x) == x; |
410 | } |
411 | template <> constexpr inline bool isUInt<32>(uint64_t x) { |
412 | return static_cast<uint32_t>(x) == x; |
413 | } |
414 | |
415 | /// Checks if a unsigned integer is an N bit number shifted left by S. |
416 | template <unsigned N, unsigned S> |
417 | constexpr inline bool isShiftedUInt(uint64_t x) { |
418 | static_assert( |
419 | N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)"); |
420 | static_assert(N + S <= 64, |
421 | "isShiftedUInt<N, S> with N + S > 64 is too wide."); |
422 | // Per the two static_asserts above, S must be strictly less than 64. So |
423 | // 1 << S is not undefined behavior. |
424 | return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0); |
425 | } |
426 | |
427 | /// Gets the maximum value for a N-bit unsigned integer. |
428 | inline uint64_t maxUIntN(uint64_t N) { |
429 | assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 && "integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 429, __extension__ __PRETTY_FUNCTION__)); |
430 | |
431 | // uint64_t(1) << 64 is undefined behavior, so we can't do |
432 | // (uint64_t(1) << N) - 1 |
433 | // without checking first that N != 64. But this works and doesn't have a |
434 | // branch. |
435 | return UINT64_MAX(18446744073709551615UL) >> (64 - N); |
436 | } |
437 | |
438 | /// Gets the minimum value for a N-bit signed integer. |
439 | inline int64_t minIntN(int64_t N) { |
440 | assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 && "integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 440, __extension__ __PRETTY_FUNCTION__)); |
441 | |
442 | return UINT64_C(1)1UL + ~(UINT64_C(1)1UL << (N - 1)); |
443 | } |
444 | |
445 | /// Gets the maximum value for a N-bit signed integer. |
446 | inline int64_t maxIntN(int64_t N) { |
447 | assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 && "integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 447, __extension__ __PRETTY_FUNCTION__)); |
448 | |
449 | // This relies on two's complement wraparound when N == 64, so we convert to |
450 | // int64_t only at the very end to avoid UB. |
451 | return (UINT64_C(1)1UL << (N - 1)) - 1; |
452 | } |
453 | |
454 | /// Checks if an unsigned integer fits into the given (dynamic) bit width. |
455 | inline bool isUIntN(unsigned N, uint64_t x) { |
456 | return N >= 64 || x <= maxUIntN(N); |
457 | } |
458 | |
459 | /// Checks if an signed integer fits into the given (dynamic) bit width. |
460 | inline bool isIntN(unsigned N, int64_t x) { |
461 | return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N)); |
462 | } |
463 | |
464 | /// Return true if the argument is a non-empty sequence of ones starting at the |
465 | /// least significant bit with the remainder zero (32 bit version). |
466 | /// Ex. isMask_32(0x0000FFFFU) == true. |
467 | constexpr inline bool isMask_32(uint32_t Value) { |
468 | return Value && ((Value + 1) & Value) == 0; |
469 | } |
470 | |
471 | /// Return true if the argument is a non-empty sequence of ones starting at the |
472 | /// least significant bit with the remainder zero (64 bit version). |
473 | constexpr inline bool isMask_64(uint64_t Value) { |
474 | return Value && ((Value + 1) & Value) == 0; |
475 | } |
476 | |
477 | /// Return true if the argument contains a non-empty sequence of ones with the |
478 | /// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true. |
479 | constexpr inline bool isShiftedMask_32(uint32_t Value) { |
480 | return Value && isMask_32((Value - 1) | Value); |
481 | } |
482 | |
483 | /// Return true if the argument contains a non-empty sequence of ones with the |
484 | /// remainder zero (64 bit version.) |
485 | constexpr inline bool isShiftedMask_64(uint64_t Value) { |
486 | return Value && isMask_64((Value - 1) | Value); |
487 | } |
488 | |
489 | /// Return true if the argument is a power of two > 0. |
490 | /// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.) |
491 | constexpr inline bool isPowerOf2_32(uint32_t Value) { |
492 | return Value && !(Value & (Value - 1)); |
493 | } |
494 | |
495 | /// Return true if the argument is a power of two > 0 (64 bit edition.) |
496 | constexpr inline bool isPowerOf2_64(uint64_t Value) { |
497 | return Value && !(Value & (Value - 1)); |
498 | } |
499 | |
500 | /// Count the number of ones from the most significant bit to the first |
501 | /// zero bit. |
502 | /// |
503 | /// Ex. countLeadingOnes(0xFF0FFF00) == 8. |
504 | /// Only unsigned integral types are allowed. |
505 | /// |
506 | /// \param ZB the behavior on an input of all ones. Only ZB_Width and |
507 | /// ZB_Undefined are valid arguments. |
508 | template <typename T> |
509 | unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) { |
510 | static_assert(std::numeric_limits<T>::is_integer && |
511 | !std::numeric_limits<T>::is_signed, |
512 | "Only unsigned integral types are allowed."); |
513 | return countLeadingZeros<T>(~Value, ZB); |
514 | } |
515 | |
516 | /// Count the number of ones from the least significant bit to the first |
517 | /// zero bit. |
518 | /// |
519 | /// Ex. countTrailingOnes(0x00FF00FF) == 8. |
520 | /// Only unsigned integral types are allowed. |
521 | /// |
522 | /// \param ZB the behavior on an input of all ones. Only ZB_Width and |
523 | /// ZB_Undefined are valid arguments. |
524 | template <typename T> |
525 | unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) { |
526 | static_assert(std::numeric_limits<T>::is_integer && |
527 | !std::numeric_limits<T>::is_signed, |
528 | "Only unsigned integral types are allowed."); |
529 | return countTrailingZeros<T>(~Value, ZB); |
530 | } |
531 | |
532 | namespace detail { |
533 | template <typename T, std::size_t SizeOfT> struct PopulationCounter { |
534 | static unsigned count(T Value) { |
535 | // Generic version, forward to 32 bits. |
536 | static_assert(SizeOfT <= 4, "Not implemented!"); |
537 | #if defined(__GNUC__4) |
538 | return __builtin_popcount(Value); |
539 | #else |
540 | uint32_t v = Value; |
541 | v = v - ((v >> 1) & 0x55555555); |
542 | v = (v & 0x33333333) + ((v >> 2) & 0x33333333); |
543 | return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; |
544 | #endif |
545 | } |
546 | }; |
547 | |
548 | template <typename T> struct PopulationCounter<T, 8> { |
549 | static unsigned count(T Value) { |
550 | #if defined(__GNUC__4) |
551 | return __builtin_popcountll(Value); |
552 | #else |
553 | uint64_t v = Value; |
554 | v = v - ((v >> 1) & 0x5555555555555555ULL); |
555 | v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL); |
556 | v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL; |
557 | return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56); |
558 | #endif |
559 | } |
560 | }; |
561 | } // namespace detail |
562 | |
563 | /// Count the number of set bits in a value. |
564 | /// Ex. countPopulation(0xF000F000) = 8 |
565 | /// Returns 0 if the word is zero. |
566 | template <typename T> |
567 | inline unsigned countPopulation(T Value) { |
568 | static_assert(std::numeric_limits<T>::is_integer && |
569 | !std::numeric_limits<T>::is_signed, |
570 | "Only unsigned integral types are allowed."); |
571 | return detail::PopulationCounter<T, sizeof(T)>::count(Value); |
572 | } |
573 | |
574 | /// Compile time Log2. |
575 | /// Valid only for positive powers of two. |
576 | template <size_t kValue> constexpr inline size_t CTLog2() { |
577 | static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue), |
578 | "Value is not a valid power of 2"); |
579 | return 1 + CTLog2<kValue / 2>(); |
580 | } |
581 | |
582 | template <> constexpr inline size_t CTLog2<1>() { return 0; } |
583 | |
584 | /// Return the log base 2 of the specified value. |
585 | inline double Log2(double Value) { |
586 | #if defined(__ANDROID_API__) && __ANDROID_API__ < 18 |
587 | return __builtin_log(Value) / __builtin_log(2.0); |
588 | #else |
589 | return log2(Value); |
590 | #endif |
591 | } |
592 | |
593 | /// Return the floor log base 2 of the specified value, -1 if the value is zero. |
594 | /// (32 bit edition.) |
595 | /// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2 |
596 | inline unsigned Log2_32(uint32_t Value) { |
597 | return 31 - countLeadingZeros(Value); |
598 | } |
599 | |
600 | /// Return the floor log base 2 of the specified value, -1 if the value is zero. |
601 | /// (64 bit edition.) |
602 | inline unsigned Log2_64(uint64_t Value) { |
603 | return 63 - countLeadingZeros(Value); |
604 | } |
605 | |
606 | /// Return the ceil log base 2 of the specified value, 32 if the value is zero. |
607 | /// (32 bit edition). |
608 | /// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3 |
609 | inline unsigned Log2_32_Ceil(uint32_t Value) { |
610 | return 32 - countLeadingZeros(Value - 1); |
611 | } |
612 | |
613 | /// Return the ceil log base 2 of the specified value, 64 if the value is zero. |
614 | /// (64 bit edition.) |
615 | inline unsigned Log2_64_Ceil(uint64_t Value) { |
616 | return 64 - countLeadingZeros(Value - 1); |
617 | } |
618 | |
619 | /// Return the greatest common divisor of the values using Euclid's algorithm. |
620 | template <typename T> |
621 | inline T greatestCommonDivisor(T A, T B) { |
622 | while (B) { |
623 | T Tmp = B; |
624 | B = A % B; |
625 | A = Tmp; |
626 | } |
627 | return A; |
628 | } |
629 | |
630 | inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) { |
631 | return greatestCommonDivisor<uint64_t>(A, B); |
632 | } |
633 | |
634 | /// This function takes a 64-bit integer and returns the bit equivalent double. |
635 | inline double BitsToDouble(uint64_t Bits) { |
636 | double D; |
637 | static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); |
638 | memcpy(&D, &Bits, sizeof(Bits)); |
639 | return D; |
640 | } |
641 | |
642 | /// This function takes a 32-bit integer and returns the bit equivalent float. |
643 | inline float BitsToFloat(uint32_t Bits) { |
644 | float F; |
645 | static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); |
646 | memcpy(&F, &Bits, sizeof(Bits)); |
647 | return F; |
648 | } |
649 | |
650 | /// This function takes a double and returns the bit equivalent 64-bit integer. |
651 | /// Note that copying doubles around changes the bits of NaNs on some hosts, |
652 | /// notably x86, so this routine cannot be used if these bits are needed. |
653 | inline uint64_t DoubleToBits(double Double) { |
654 | uint64_t Bits; |
655 | static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); |
656 | memcpy(&Bits, &Double, sizeof(Double)); |
657 | return Bits; |
658 | } |
659 | |
660 | /// This function takes a float and returns the bit equivalent 32-bit integer. |
661 | /// Note that copying floats around changes the bits of NaNs on some hosts, |
662 | /// notably x86, so this routine cannot be used if these bits are needed. |
663 | inline uint32_t FloatToBits(float Float) { |
664 | uint32_t Bits; |
665 | static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); |
666 | memcpy(&Bits, &Float, sizeof(Float)); |
667 | return Bits; |
668 | } |
669 | |
670 | /// A and B are either alignments or offsets. Return the minimum alignment that |
671 | /// may be assumed after adding the two together. |
672 | constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) { |
673 | // The largest power of 2 that divides both A and B. |
674 | // |
675 | // Replace "-Value" by "1+~Value" in the following commented code to avoid |
676 | // MSVC warning C4146 |
677 | // return (A | B) & -(A | B); |
678 | return (A | B) & (1 + ~(A | B)); |
679 | } |
680 | |
681 | /// Returns the next power of two (in 64-bits) that is strictly greater than A. |
682 | /// Returns zero on overflow. |
683 | inline uint64_t NextPowerOf2(uint64_t A) { |
684 | A |= (A >> 1); |
685 | A |= (A >> 2); |
686 | A |= (A >> 4); |
687 | A |= (A >> 8); |
688 | A |= (A >> 16); |
689 | A |= (A >> 32); |
690 | return A + 1; |
691 | } |
692 | |
693 | /// Returns the power of two which is less than or equal to the given value. |
694 | /// Essentially, it is a floor operation across the domain of powers of two. |
695 | inline uint64_t PowerOf2Floor(uint64_t A) { |
696 | if (!A) return 0; |
697 | return 1ull << (63 - countLeadingZeros(A, ZB_Undefined)); |
698 | } |
699 | |
700 | /// Returns the power of two which is greater than or equal to the given value. |
701 | /// Essentially, it is a ceil operation across the domain of powers of two. |
702 | inline uint64_t PowerOf2Ceil(uint64_t A) { |
703 | if (!A) |
704 | return 0; |
705 | return NextPowerOf2(A - 1); |
706 | } |
707 | |
708 | /// Returns the next integer (mod 2**64) that is greater than or equal to |
709 | /// \p Value and is a multiple of \p Align. \p Align must be non-zero. |
710 | /// |
711 | /// If non-zero \p Skew is specified, the return value will be a minimal |
712 | /// integer that is greater than or equal to \p Value and equal to |
713 | /// \p Align * N + \p Skew for some integer N. If \p Skew is larger than |
714 | /// \p Align, its value is adjusted to '\p Skew mod \p Align'. |
715 | /// |
716 | /// Examples: |
717 | /// \code |
718 | /// alignTo(5, 8) = 8 |
719 | /// alignTo(17, 8) = 24 |
720 | /// alignTo(~0LL, 8) = 0 |
721 | /// alignTo(321, 255) = 510 |
722 | /// |
723 | /// alignTo(5, 8, 7) = 7 |
724 | /// alignTo(17, 8, 1) = 17 |
725 | /// alignTo(~0LL, 8, 3) = 3 |
726 | /// alignTo(321, 255, 42) = 552 |
727 | /// \endcode |
728 | inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { |
729 | assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0." ) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 729, __extension__ __PRETTY_FUNCTION__)); |
730 | Skew %= Align; |
731 | return (Value + Align - 1 - Skew) / Align * Align + Skew; |
732 | } |
733 | |
734 | /// Returns the next integer (mod 2**64) that is greater than or equal to |
735 | /// \p Value and is a multiple of \c Align. \c Align must be non-zero. |
736 | template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) { |
737 | static_assert(Align != 0u, "Align must be non-zero"); |
738 | return (Value + Align - 1) / Align * Align; |
739 | } |
740 | |
741 | /// Returns the integer ceil(Numerator / Denominator). |
742 | inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) { |
743 | return alignTo(Numerator, Denominator) / Denominator; |
744 | } |
745 | |
746 | /// Returns the integer nearest(Numerator / Denominator). |
747 | inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) { |
748 | return (Numerator + (Denominator / 2)) / Denominator; |
749 | } |
750 | |
751 | /// Returns the largest uint64_t less than or equal to \p Value and is |
752 | /// \p Skew mod \p Align. \p Align must be non-zero |
753 | inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { |
754 | assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0." ) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 754, __extension__ __PRETTY_FUNCTION__)); |
755 | Skew %= Align; |
756 | return (Value - Skew) / Align * Align + Skew; |
757 | } |
758 | |
759 | /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. |
760 | /// Requires 0 < B <= 32. |
761 | template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) { |
762 | static_assert(B > 0, "Bit width can't be 0."); |
763 | static_assert(B <= 32, "Bit width out of range."); |
764 | return int32_t(X << (32 - B)) >> (32 - B); |
765 | } |
766 | |
767 | /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. |
768 | /// Requires 0 < B <= 32. |
769 | inline int32_t SignExtend32(uint32_t X, unsigned B) { |
770 | assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0." ) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 770, __extension__ __PRETTY_FUNCTION__)); |
771 | assert(B <= 32 && "Bit width out of range.")(static_cast <bool> (B <= 32 && "Bit width out of range." ) ? void (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 771, __extension__ __PRETTY_FUNCTION__)); |
772 | return int32_t(X << (32 - B)) >> (32 - B); |
773 | } |
774 | |
775 | /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. |
776 | /// Requires 0 < B <= 64. |
777 | template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) { |
778 | static_assert(B > 0, "Bit width can't be 0."); |
779 | static_assert(B <= 64, "Bit width out of range."); |
780 | return int64_t(x << (64 - B)) >> (64 - B); |
781 | } |
782 | |
783 | /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. |
784 | /// Requires 0 < B <= 64. |
785 | inline int64_t SignExtend64(uint64_t X, unsigned B) { |
786 | assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0." ) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 786, __extension__ __PRETTY_FUNCTION__)); |
787 | assert(B <= 64 && "Bit width out of range.")(static_cast <bool> (B <= 64 && "Bit width out of range." ) ? void (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 787, __extension__ __PRETTY_FUNCTION__)); |
788 | return int64_t(X << (64 - B)) >> (64 - B); |
789 | } |
790 | |
791 | /// Subtract two unsigned integers, X and Y, of type T and return the absolute |
792 | /// value of the result. |
793 | template <typename T> |
794 | std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) { |
795 | return X > Y ? (X - Y) : (Y - X); |
796 | } |
797 | |
798 | /// Add two unsigned integers, X and Y, of type T. Clamp the result to the |
799 | /// maximum representable value of T on overflow. ResultOverflowed indicates if |
800 | /// the result is larger than the maximum representable value of type T. |
801 | template <typename T> |
802 | std::enable_if_t<std::is_unsigned<T>::value, T> |
803 | SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) { |
804 | bool Dummy; |
805 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |
806 | // Hacker's Delight, p. 29 |
807 | T Z = X + Y; |
808 | Overflowed = (Z < X || Z < Y); |
809 | if (Overflowed) |
810 | return std::numeric_limits<T>::max(); |
811 | else |
812 | return Z; |
813 | } |
814 | |
815 | /// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the |
816 | /// maximum representable value of T on overflow. ResultOverflowed indicates if |
817 | /// the result is larger than the maximum representable value of type T. |
818 | template <typename T> |
819 | std::enable_if_t<std::is_unsigned<T>::value, T> |
820 | SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) { |
821 | bool Dummy; |
822 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |
823 | |
824 | // Hacker's Delight, p. 30 has a different algorithm, but we don't use that |
825 | // because it fails for uint16_t (where multiplication can have undefined |
826 | // behavior due to promotion to int), and requires a division in addition |
827 | // to the multiplication. |
828 | |
829 | Overflowed = false; |
830 | |
831 | // Log2(Z) would be either Log2Z or Log2Z + 1. |
832 | // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z |
833 | // will necessarily be less than Log2Max as desired. |
834 | int Log2Z = Log2_64(X) + Log2_64(Y); |
835 | const T Max = std::numeric_limits<T>::max(); |
836 | int Log2Max = Log2_64(Max); |
837 | if (Log2Z < Log2Max) { |
838 | return X * Y; |
839 | } |
840 | if (Log2Z > Log2Max) { |
841 | Overflowed = true; |
842 | return Max; |
843 | } |
844 | |
845 | // We're going to use the top bit, and maybe overflow one |
846 | // bit past it. Multiply all but the bottom bit then add |
847 | // that on at the end. |
848 | T Z = (X >> 1) * Y; |
849 | if (Z & ~(Max >> 1)) { |
850 | Overflowed = true; |
851 | return Max; |
852 | } |
853 | Z <<= 1; |
854 | if (X & 1) |
855 | return SaturatingAdd(Z, Y, ResultOverflowed); |
856 | |
857 | return Z; |
858 | } |
859 | |
860 | /// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to |
861 | /// the product. Clamp the result to the maximum representable value of T on |
862 | /// overflow. ResultOverflowed indicates if the result is larger than the |
863 | /// maximum representable value of type T. |
864 | template <typename T> |
865 | std::enable_if_t<std::is_unsigned<T>::value, T> |
866 | SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) { |
867 | bool Dummy; |
868 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |
869 | |
870 | T Product = SaturatingMultiply(X, Y, &Overflowed); |
871 | if (Overflowed) |
872 | return Product; |
873 | |
874 | return SaturatingAdd(A, Product, &Overflowed); |
875 | } |
876 | |
877 | /// Use this rather than HUGE_VALF; the latter causes warnings on MSVC. |
878 | extern const float huge_valf; |
879 | |
880 | |
881 | /// Add two signed integers, computing the two's complement truncated result, |
882 | /// returning true if overflow occured. |
883 | template <typename T> |
884 | std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) { |
885 | #if __has_builtin(__builtin_add_overflow)1 |
886 | return __builtin_add_overflow(X, Y, &Result); |
887 | #else |
888 | // Perform the unsigned addition. |
889 | using U = std::make_unsigned_t<T>; |
890 | const U UX = static_cast<U>(X); |
891 | const U UY = static_cast<U>(Y); |
892 | const U UResult = UX + UY; |
893 | |
894 | // Convert to signed. |
895 | Result = static_cast<T>(UResult); |
896 | |
897 | // Adding two positive numbers should result in a positive number. |
898 | if (X > 0 && Y > 0) |
899 | return Result <= 0; |
900 | // Adding two negatives should result in a negative number. |
901 | if (X < 0 && Y < 0) |
902 | return Result >= 0; |
903 | return false; |
904 | #endif |
905 | } |
906 | |
907 | /// Subtract two signed integers, computing the two's complement truncated |
908 | /// result, returning true if an overflow ocurred. |
909 | template <typename T> |
910 | std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) { |
911 | #if __has_builtin(__builtin_sub_overflow)1 |
912 | return __builtin_sub_overflow(X, Y, &Result); |
913 | #else |
914 | // Perform the unsigned addition. |
915 | using U = std::make_unsigned_t<T>; |
916 | const U UX = static_cast<U>(X); |
917 | const U UY = static_cast<U>(Y); |
918 | const U UResult = UX - UY; |
919 | |
920 | // Convert to signed. |
921 | Result = static_cast<T>(UResult); |
922 | |
923 | // Subtracting a positive number from a negative results in a negative number. |
924 | if (X <= 0 && Y > 0) |
925 | return Result >= 0; |
926 | // Subtracting a negative number from a positive results in a positive number. |
927 | if (X >= 0 && Y < 0) |
928 | return Result <= 0; |
929 | return false; |
930 | #endif |
931 | } |
932 | |
933 | /// Multiply two signed integers, computing the two's complement truncated |
934 | /// result, returning true if an overflow ocurred. |
935 | template <typename T> |
936 | std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) { |
937 | // Perform the unsigned multiplication on absolute values. |
938 | using U = std::make_unsigned_t<T>; |
939 | const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X); |
940 | const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y); |
941 | const U UResult = UX * UY; |
942 | |
943 | // Convert to signed. |
944 | const bool IsNegative = (X < 0) ^ (Y < 0); |
945 | Result = IsNegative ? (0 - UResult) : UResult; |
946 | |
947 | // If any of the args was 0, result is 0 and no overflow occurs. |
948 | if (UX == 0 || UY == 0) |
949 | return false; |
950 | |
951 | // UX and UY are in [1, 2^n], where n is the number of digits. |
952 | // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for |
953 | // positive) divided by an argument compares to the other. |
954 | if (IsNegative) |
955 | return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY; |
956 | else |
957 | return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY; |
958 | } |
959 | |
960 | } // End llvm namespace |
961 | |
962 | #endif |