File: | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp |
Warning: | line 5968, column 5 1st function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | ||||
9 | #include "AMDKernelCodeT.h" | |||
10 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" | |||
11 | #include "MCTargetDesc/AMDGPUTargetStreamer.h" | |||
12 | #include "SIDefines.h" | |||
13 | #include "SIInstrInfo.h" | |||
14 | #include "SIRegisterInfo.h" | |||
15 | #include "TargetInfo/AMDGPUTargetInfo.h" | |||
16 | #include "Utils/AMDGPUAsmUtils.h" | |||
17 | #include "Utils/AMDGPUBaseInfo.h" | |||
18 | #include "Utils/AMDKernelCodeTUtils.h" | |||
19 | #include "llvm/ADT/APFloat.h" | |||
20 | #include "llvm/ADT/SmallBitVector.h" | |||
21 | #include "llvm/ADT/StringSet.h" | |||
22 | #include "llvm/ADT/Twine.h" | |||
23 | #include "llvm/MC/MCAsmInfo.h" | |||
24 | #include "llvm/MC/MCContext.h" | |||
25 | #include "llvm/MC/MCExpr.h" | |||
26 | #include "llvm/MC/MCInst.h" | |||
27 | #include "llvm/MC/MCParser/MCAsmParser.h" | |||
28 | #include "llvm/MC/MCParser/MCParsedAsmOperand.h" | |||
29 | #include "llvm/MC/MCParser/MCTargetAsmParser.h" | |||
30 | #include "llvm/MC/MCSymbol.h" | |||
31 | #include "llvm/Support/AMDGPUMetadata.h" | |||
32 | #include "llvm/Support/AMDHSAKernelDescriptor.h" | |||
33 | #include "llvm/Support/Casting.h" | |||
34 | #include "llvm/Support/MachineValueType.h" | |||
35 | #include "llvm/Support/TargetParser.h" | |||
36 | #include "llvm/Support/TargetRegistry.h" | |||
37 | ||||
38 | using namespace llvm; | |||
39 | using namespace llvm::AMDGPU; | |||
40 | using namespace llvm::amdhsa; | |||
41 | ||||
42 | namespace { | |||
43 | ||||
44 | class AMDGPUAsmParser; | |||
45 | ||||
46 | enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; | |||
47 | ||||
48 | //===----------------------------------------------------------------------===// | |||
49 | // Operand | |||
50 | //===----------------------------------------------------------------------===// | |||
51 | ||||
52 | class AMDGPUOperand : public MCParsedAsmOperand { | |||
53 | enum KindTy { | |||
54 | Token, | |||
55 | Immediate, | |||
56 | Register, | |||
57 | Expression | |||
58 | } Kind; | |||
59 | ||||
60 | SMLoc StartLoc, EndLoc; | |||
61 | const AMDGPUAsmParser *AsmParser; | |||
62 | ||||
63 | public: | |||
64 | AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) | |||
65 | : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} | |||
66 | ||||
67 | using Ptr = std::unique_ptr<AMDGPUOperand>; | |||
68 | ||||
69 | struct Modifiers { | |||
70 | bool Abs = false; | |||
71 | bool Neg = false; | |||
72 | bool Sext = false; | |||
73 | ||||
74 | bool hasFPModifiers() const { return Abs || Neg; } | |||
75 | bool hasIntModifiers() const { return Sext; } | |||
76 | bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } | |||
77 | ||||
78 | int64_t getFPModifiersOperand() const { | |||
79 | int64_t Operand = 0; | |||
80 | Operand |= Abs ? SISrcMods::ABS : 0u; | |||
81 | Operand |= Neg ? SISrcMods::NEG : 0u; | |||
82 | return Operand; | |||
83 | } | |||
84 | ||||
85 | int64_t getIntModifiersOperand() const { | |||
86 | int64_t Operand = 0; | |||
87 | Operand |= Sext ? SISrcMods::SEXT : 0u; | |||
88 | return Operand; | |||
89 | } | |||
90 | ||||
91 | int64_t getModifiersOperand() const { | |||
92 | assert(!(hasFPModifiers() && hasIntModifiers())(static_cast<void> (0)) | |||
93 | && "fp and int modifiers should not be used simultaneously")(static_cast<void> (0)); | |||
94 | if (hasFPModifiers()) { | |||
95 | return getFPModifiersOperand(); | |||
96 | } else if (hasIntModifiers()) { | |||
97 | return getIntModifiersOperand(); | |||
98 | } else { | |||
99 | return 0; | |||
100 | } | |||
101 | } | |||
102 | ||||
103 | friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); | |||
104 | }; | |||
105 | ||||
106 | enum ImmTy { | |||
107 | ImmTyNone, | |||
108 | ImmTyGDS, | |||
109 | ImmTyLDS, | |||
110 | ImmTyOffen, | |||
111 | ImmTyIdxen, | |||
112 | ImmTyAddr64, | |||
113 | ImmTyOffset, | |||
114 | ImmTyInstOffset, | |||
115 | ImmTyOffset0, | |||
116 | ImmTyOffset1, | |||
117 | ImmTyCPol, | |||
118 | ImmTySWZ, | |||
119 | ImmTyTFE, | |||
120 | ImmTyD16, | |||
121 | ImmTyClampSI, | |||
122 | ImmTyOModSI, | |||
123 | ImmTyDPP8, | |||
124 | ImmTyDppCtrl, | |||
125 | ImmTyDppRowMask, | |||
126 | ImmTyDppBankMask, | |||
127 | ImmTyDppBoundCtrl, | |||
128 | ImmTyDppFi, | |||
129 | ImmTySdwaDstSel, | |||
130 | ImmTySdwaSrc0Sel, | |||
131 | ImmTySdwaSrc1Sel, | |||
132 | ImmTySdwaDstUnused, | |||
133 | ImmTyDMask, | |||
134 | ImmTyDim, | |||
135 | ImmTyUNorm, | |||
136 | ImmTyDA, | |||
137 | ImmTyR128A16, | |||
138 | ImmTyA16, | |||
139 | ImmTyLWE, | |||
140 | ImmTyExpTgt, | |||
141 | ImmTyExpCompr, | |||
142 | ImmTyExpVM, | |||
143 | ImmTyFORMAT, | |||
144 | ImmTyHwreg, | |||
145 | ImmTyOff, | |||
146 | ImmTySendMsg, | |||
147 | ImmTyInterpSlot, | |||
148 | ImmTyInterpAttr, | |||
149 | ImmTyAttrChan, | |||
150 | ImmTyOpSel, | |||
151 | ImmTyOpSelHi, | |||
152 | ImmTyNegLo, | |||
153 | ImmTyNegHi, | |||
154 | ImmTySwizzle, | |||
155 | ImmTyGprIdxMode, | |||
156 | ImmTyHigh, | |||
157 | ImmTyBLGP, | |||
158 | ImmTyCBSZ, | |||
159 | ImmTyABID, | |||
160 | ImmTyEndpgm, | |||
161 | }; | |||
162 | ||||
163 | enum ImmKindTy { | |||
164 | ImmKindTyNone, | |||
165 | ImmKindTyLiteral, | |||
166 | ImmKindTyConst, | |||
167 | }; | |||
168 | ||||
169 | private: | |||
170 | struct TokOp { | |||
171 | const char *Data; | |||
172 | unsigned Length; | |||
173 | }; | |||
174 | ||||
175 | struct ImmOp { | |||
176 | int64_t Val; | |||
177 | ImmTy Type; | |||
178 | bool IsFPImm; | |||
179 | mutable ImmKindTy Kind; | |||
180 | Modifiers Mods; | |||
181 | }; | |||
182 | ||||
183 | struct RegOp { | |||
184 | unsigned RegNo; | |||
185 | Modifiers Mods; | |||
186 | }; | |||
187 | ||||
188 | union { | |||
189 | TokOp Tok; | |||
190 | ImmOp Imm; | |||
191 | RegOp Reg; | |||
192 | const MCExpr *Expr; | |||
193 | }; | |||
194 | ||||
195 | public: | |||
196 | bool isToken() const override { | |||
197 | if (Kind == Token) | |||
198 | return true; | |||
199 | ||||
200 | // When parsing operands, we can't always tell if something was meant to be | |||
201 | // a token, like 'gds', or an expression that references a global variable. | |||
202 | // In this case, we assume the string is an expression, and if we need to | |||
203 | // interpret is a token, then we treat the symbol name as the token. | |||
204 | return isSymbolRefExpr(); | |||
205 | } | |||
206 | ||||
207 | bool isSymbolRefExpr() const { | |||
208 | return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); | |||
209 | } | |||
210 | ||||
211 | bool isImm() const override { | |||
212 | return Kind == Immediate; | |||
213 | } | |||
214 | ||||
215 | void setImmKindNone() const { | |||
216 | assert(isImm())(static_cast<void> (0)); | |||
217 | Imm.Kind = ImmKindTyNone; | |||
218 | } | |||
219 | ||||
220 | void setImmKindLiteral() const { | |||
221 | assert(isImm())(static_cast<void> (0)); | |||
222 | Imm.Kind = ImmKindTyLiteral; | |||
223 | } | |||
224 | ||||
225 | void setImmKindConst() const { | |||
226 | assert(isImm())(static_cast<void> (0)); | |||
227 | Imm.Kind = ImmKindTyConst; | |||
228 | } | |||
229 | ||||
230 | bool IsImmKindLiteral() const { | |||
231 | return isImm() && Imm.Kind == ImmKindTyLiteral; | |||
232 | } | |||
233 | ||||
234 | bool isImmKindConst() const { | |||
235 | return isImm() && Imm.Kind == ImmKindTyConst; | |||
236 | } | |||
237 | ||||
238 | bool isInlinableImm(MVT type) const; | |||
239 | bool isLiteralImm(MVT type) const; | |||
240 | ||||
241 | bool isRegKind() const { | |||
242 | return Kind == Register; | |||
243 | } | |||
244 | ||||
245 | bool isReg() const override { | |||
246 | return isRegKind() && !hasModifiers(); | |||
247 | } | |||
248 | ||||
249 | bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { | |||
250 | return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); | |||
251 | } | |||
252 | ||||
253 | bool isRegOrImmWithInt16InputMods() const { | |||
254 | return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); | |||
255 | } | |||
256 | ||||
257 | bool isRegOrImmWithInt32InputMods() const { | |||
258 | return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); | |||
259 | } | |||
260 | ||||
261 | bool isRegOrImmWithInt64InputMods() const { | |||
262 | return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); | |||
263 | } | |||
264 | ||||
265 | bool isRegOrImmWithFP16InputMods() const { | |||
266 | return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); | |||
267 | } | |||
268 | ||||
269 | bool isRegOrImmWithFP32InputMods() const { | |||
270 | return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); | |||
271 | } | |||
272 | ||||
273 | bool isRegOrImmWithFP64InputMods() const { | |||
274 | return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); | |||
275 | } | |||
276 | ||||
277 | bool isVReg() const { | |||
278 | return isRegClass(AMDGPU::VGPR_32RegClassID) || | |||
279 | isRegClass(AMDGPU::VReg_64RegClassID) || | |||
280 | isRegClass(AMDGPU::VReg_96RegClassID) || | |||
281 | isRegClass(AMDGPU::VReg_128RegClassID) || | |||
282 | isRegClass(AMDGPU::VReg_160RegClassID) || | |||
283 | isRegClass(AMDGPU::VReg_192RegClassID) || | |||
284 | isRegClass(AMDGPU::VReg_256RegClassID) || | |||
285 | isRegClass(AMDGPU::VReg_512RegClassID) || | |||
286 | isRegClass(AMDGPU::VReg_1024RegClassID); | |||
287 | } | |||
288 | ||||
289 | bool isVReg32() const { | |||
290 | return isRegClass(AMDGPU::VGPR_32RegClassID); | |||
291 | } | |||
292 | ||||
293 | bool isVReg32OrOff() const { | |||
294 | return isOff() || isVReg32(); | |||
295 | } | |||
296 | ||||
297 | bool isNull() const { | |||
298 | return isRegKind() && getReg() == AMDGPU::SGPR_NULL; | |||
299 | } | |||
300 | ||||
301 | bool isVRegWithInputMods() const; | |||
302 | ||||
303 | bool isSDWAOperand(MVT type) const; | |||
304 | bool isSDWAFP16Operand() const; | |||
305 | bool isSDWAFP32Operand() const; | |||
306 | bool isSDWAInt16Operand() const; | |||
307 | bool isSDWAInt32Operand() const; | |||
308 | ||||
309 | bool isImmTy(ImmTy ImmT) const { | |||
310 | return isImm() && Imm.Type == ImmT; | |||
311 | } | |||
312 | ||||
313 | bool isImmModifier() const { | |||
314 | return isImm() && Imm.Type != ImmTyNone; | |||
315 | } | |||
316 | ||||
317 | bool isClampSI() const { return isImmTy(ImmTyClampSI); } | |||
318 | bool isOModSI() const { return isImmTy(ImmTyOModSI); } | |||
319 | bool isDMask() const { return isImmTy(ImmTyDMask); } | |||
320 | bool isDim() const { return isImmTy(ImmTyDim); } | |||
321 | bool isUNorm() const { return isImmTy(ImmTyUNorm); } | |||
322 | bool isDA() const { return isImmTy(ImmTyDA); } | |||
323 | bool isR128A16() const { return isImmTy(ImmTyR128A16); } | |||
324 | bool isGFX10A16() const { return isImmTy(ImmTyA16); } | |||
325 | bool isLWE() const { return isImmTy(ImmTyLWE); } | |||
326 | bool isOff() const { return isImmTy(ImmTyOff); } | |||
327 | bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } | |||
328 | bool isExpVM() const { return isImmTy(ImmTyExpVM); } | |||
329 | bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } | |||
330 | bool isOffen() const { return isImmTy(ImmTyOffen); } | |||
331 | bool isIdxen() const { return isImmTy(ImmTyIdxen); } | |||
332 | bool isAddr64() const { return isImmTy(ImmTyAddr64); } | |||
333 | bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } | |||
334 | bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } | |||
335 | bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } | |||
336 | ||||
337 | bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } | |||
338 | bool isGDS() const { return isImmTy(ImmTyGDS); } | |||
339 | bool isLDS() const { return isImmTy(ImmTyLDS); } | |||
340 | bool isCPol() const { return isImmTy(ImmTyCPol); } | |||
341 | bool isSWZ() const { return isImmTy(ImmTySWZ); } | |||
342 | bool isTFE() const { return isImmTy(ImmTyTFE); } | |||
343 | bool isD16() const { return isImmTy(ImmTyD16); } | |||
344 | bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } | |||
345 | bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } | |||
346 | bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } | |||
347 | bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } | |||
348 | bool isFI() const { return isImmTy(ImmTyDppFi); } | |||
349 | bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } | |||
350 | bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } | |||
351 | bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } | |||
352 | bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } | |||
353 | bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } | |||
354 | bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } | |||
355 | bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } | |||
356 | bool isOpSel() const { return isImmTy(ImmTyOpSel); } | |||
357 | bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } | |||
358 | bool isNegLo() const { return isImmTy(ImmTyNegLo); } | |||
359 | bool isNegHi() const { return isImmTy(ImmTyNegHi); } | |||
360 | bool isHigh() const { return isImmTy(ImmTyHigh); } | |||
361 | ||||
362 | bool isMod() const { | |||
363 | return isClampSI() || isOModSI(); | |||
364 | } | |||
365 | ||||
366 | bool isRegOrImm() const { | |||
367 | return isReg() || isImm(); | |||
368 | } | |||
369 | ||||
370 | bool isRegClass(unsigned RCID) const; | |||
371 | ||||
372 | bool isInlineValue() const; | |||
373 | ||||
374 | bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { | |||
375 | return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); | |||
376 | } | |||
377 | ||||
378 | bool isSCSrcB16() const { | |||
379 | return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); | |||
380 | } | |||
381 | ||||
382 | bool isSCSrcV2B16() const { | |||
383 | return isSCSrcB16(); | |||
384 | } | |||
385 | ||||
386 | bool isSCSrcB32() const { | |||
387 | return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); | |||
388 | } | |||
389 | ||||
390 | bool isSCSrcB64() const { | |||
391 | return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); | |||
392 | } | |||
393 | ||||
394 | bool isBoolReg() const; | |||
395 | ||||
396 | bool isSCSrcF16() const { | |||
397 | return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); | |||
398 | } | |||
399 | ||||
400 | bool isSCSrcV2F16() const { | |||
401 | return isSCSrcF16(); | |||
402 | } | |||
403 | ||||
404 | bool isSCSrcF32() const { | |||
405 | return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); | |||
406 | } | |||
407 | ||||
408 | bool isSCSrcF64() const { | |||
409 | return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); | |||
410 | } | |||
411 | ||||
412 | bool isSSrcB32() const { | |||
413 | return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); | |||
414 | } | |||
415 | ||||
416 | bool isSSrcB16() const { | |||
417 | return isSCSrcB16() || isLiteralImm(MVT::i16); | |||
418 | } | |||
419 | ||||
420 | bool isSSrcV2B16() const { | |||
421 | llvm_unreachable("cannot happen")__builtin_unreachable(); | |||
422 | return isSSrcB16(); | |||
423 | } | |||
424 | ||||
425 | bool isSSrcB64() const { | |||
426 | // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. | |||
427 | // See isVSrc64(). | |||
428 | return isSCSrcB64() || isLiteralImm(MVT::i64); | |||
429 | } | |||
430 | ||||
431 | bool isSSrcF32() const { | |||
432 | return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); | |||
433 | } | |||
434 | ||||
435 | bool isSSrcF64() const { | |||
436 | return isSCSrcB64() || isLiteralImm(MVT::f64); | |||
437 | } | |||
438 | ||||
439 | bool isSSrcF16() const { | |||
440 | return isSCSrcB16() || isLiteralImm(MVT::f16); | |||
441 | } | |||
442 | ||||
443 | bool isSSrcV2F16() const { | |||
444 | llvm_unreachable("cannot happen")__builtin_unreachable(); | |||
445 | return isSSrcF16(); | |||
446 | } | |||
447 | ||||
448 | bool isSSrcV2FP32() const { | |||
449 | llvm_unreachable("cannot happen")__builtin_unreachable(); | |||
450 | return isSSrcF32(); | |||
451 | } | |||
452 | ||||
453 | bool isSCSrcV2FP32() const { | |||
454 | llvm_unreachable("cannot happen")__builtin_unreachable(); | |||
455 | return isSCSrcF32(); | |||
456 | } | |||
457 | ||||
458 | bool isSSrcV2INT32() const { | |||
459 | llvm_unreachable("cannot happen")__builtin_unreachable(); | |||
460 | return isSSrcB32(); | |||
461 | } | |||
462 | ||||
463 | bool isSCSrcV2INT32() const { | |||
464 | llvm_unreachable("cannot happen")__builtin_unreachable(); | |||
465 | return isSCSrcB32(); | |||
466 | } | |||
467 | ||||
468 | bool isSSrcOrLdsB32() const { | |||
469 | return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || | |||
470 | isLiteralImm(MVT::i32) || isExpr(); | |||
471 | } | |||
472 | ||||
473 | bool isVCSrcB32() const { | |||
474 | return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); | |||
475 | } | |||
476 | ||||
477 | bool isVCSrcB64() const { | |||
478 | return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); | |||
479 | } | |||
480 | ||||
481 | bool isVCSrcB16() const { | |||
482 | return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); | |||
483 | } | |||
484 | ||||
485 | bool isVCSrcV2B16() const { | |||
486 | return isVCSrcB16(); | |||
487 | } | |||
488 | ||||
489 | bool isVCSrcF32() const { | |||
490 | return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); | |||
491 | } | |||
492 | ||||
493 | bool isVCSrcF64() const { | |||
494 | return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); | |||
495 | } | |||
496 | ||||
497 | bool isVCSrcF16() const { | |||
498 | return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); | |||
499 | } | |||
500 | ||||
501 | bool isVCSrcV2F16() const { | |||
502 | return isVCSrcF16(); | |||
503 | } | |||
504 | ||||
505 | bool isVSrcB32() const { | |||
506 | return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); | |||
507 | } | |||
508 | ||||
509 | bool isVSrcB64() const { | |||
510 | return isVCSrcF64() || isLiteralImm(MVT::i64); | |||
511 | } | |||
512 | ||||
513 | bool isVSrcB16() const { | |||
514 | return isVCSrcB16() || isLiteralImm(MVT::i16); | |||
515 | } | |||
516 | ||||
517 | bool isVSrcV2B16() const { | |||
518 | return isVSrcB16() || isLiteralImm(MVT::v2i16); | |||
519 | } | |||
520 | ||||
521 | bool isVCSrcV2FP32() const { | |||
522 | return isVCSrcF64(); | |||
523 | } | |||
524 | ||||
525 | bool isVSrcV2FP32() const { | |||
526 | return isVSrcF64() || isLiteralImm(MVT::v2f32); | |||
527 | } | |||
528 | ||||
529 | bool isVCSrcV2INT32() const { | |||
530 | return isVCSrcB64(); | |||
531 | } | |||
532 | ||||
533 | bool isVSrcV2INT32() const { | |||
534 | return isVSrcB64() || isLiteralImm(MVT::v2i32); | |||
535 | } | |||
536 | ||||
537 | bool isVSrcF32() const { | |||
538 | return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); | |||
539 | } | |||
540 | ||||
541 | bool isVSrcF64() const { | |||
542 | return isVCSrcF64() || isLiteralImm(MVT::f64); | |||
543 | } | |||
544 | ||||
545 | bool isVSrcF16() const { | |||
546 | return isVCSrcF16() || isLiteralImm(MVT::f16); | |||
547 | } | |||
548 | ||||
549 | bool isVSrcV2F16() const { | |||
550 | return isVSrcF16() || isLiteralImm(MVT::v2f16); | |||
551 | } | |||
552 | ||||
553 | bool isVISrcB32() const { | |||
554 | return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); | |||
555 | } | |||
556 | ||||
557 | bool isVISrcB16() const { | |||
558 | return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); | |||
559 | } | |||
560 | ||||
561 | bool isVISrcV2B16() const { | |||
562 | return isVISrcB16(); | |||
563 | } | |||
564 | ||||
565 | bool isVISrcF32() const { | |||
566 | return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); | |||
567 | } | |||
568 | ||||
569 | bool isVISrcF16() const { | |||
570 | return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); | |||
571 | } | |||
572 | ||||
573 | bool isVISrcV2F16() const { | |||
574 | return isVISrcF16() || isVISrcB32(); | |||
575 | } | |||
576 | ||||
577 | bool isVISrc_64B64() const { | |||
578 | return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); | |||
579 | } | |||
580 | ||||
581 | bool isVISrc_64F64() const { | |||
582 | return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); | |||
583 | } | |||
584 | ||||
585 | bool isVISrc_64V2FP32() const { | |||
586 | return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); | |||
587 | } | |||
588 | ||||
589 | bool isVISrc_64V2INT32() const { | |||
590 | return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); | |||
591 | } | |||
592 | ||||
593 | bool isVISrc_256B64() const { | |||
594 | return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); | |||
595 | } | |||
596 | ||||
597 | bool isVISrc_256F64() const { | |||
598 | return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); | |||
599 | } | |||
600 | ||||
601 | bool isVISrc_128B16() const { | |||
602 | return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); | |||
603 | } | |||
604 | ||||
605 | bool isVISrc_128V2B16() const { | |||
606 | return isVISrc_128B16(); | |||
607 | } | |||
608 | ||||
609 | bool isVISrc_128B32() const { | |||
610 | return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); | |||
611 | } | |||
612 | ||||
613 | bool isVISrc_128F32() const { | |||
614 | return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); | |||
615 | } | |||
616 | ||||
617 | bool isVISrc_256V2FP32() const { | |||
618 | return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); | |||
619 | } | |||
620 | ||||
621 | bool isVISrc_256V2INT32() const { | |||
622 | return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); | |||
623 | } | |||
624 | ||||
625 | bool isVISrc_512B32() const { | |||
626 | return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); | |||
627 | } | |||
628 | ||||
629 | bool isVISrc_512B16() const { | |||
630 | return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); | |||
631 | } | |||
632 | ||||
633 | bool isVISrc_512V2B16() const { | |||
634 | return isVISrc_512B16(); | |||
635 | } | |||
636 | ||||
637 | bool isVISrc_512F32() const { | |||
638 | return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); | |||
639 | } | |||
640 | ||||
641 | bool isVISrc_512F16() const { | |||
642 | return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); | |||
643 | } | |||
644 | ||||
645 | bool isVISrc_512V2F16() const { | |||
646 | return isVISrc_512F16() || isVISrc_512B32(); | |||
647 | } | |||
648 | ||||
649 | bool isVISrc_1024B32() const { | |||
650 | return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); | |||
651 | } | |||
652 | ||||
653 | bool isVISrc_1024B16() const { | |||
654 | return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); | |||
655 | } | |||
656 | ||||
657 | bool isVISrc_1024V2B16() const { | |||
658 | return isVISrc_1024B16(); | |||
659 | } | |||
660 | ||||
661 | bool isVISrc_1024F32() const { | |||
662 | return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); | |||
663 | } | |||
664 | ||||
665 | bool isVISrc_1024F16() const { | |||
666 | return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); | |||
667 | } | |||
668 | ||||
669 | bool isVISrc_1024V2F16() const { | |||
670 | return isVISrc_1024F16() || isVISrc_1024B32(); | |||
671 | } | |||
672 | ||||
673 | bool isAISrcB32() const { | |||
674 | return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); | |||
675 | } | |||
676 | ||||
677 | bool isAISrcB16() const { | |||
678 | return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); | |||
679 | } | |||
680 | ||||
681 | bool isAISrcV2B16() const { | |||
682 | return isAISrcB16(); | |||
683 | } | |||
684 | ||||
685 | bool isAISrcF32() const { | |||
686 | return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); | |||
687 | } | |||
688 | ||||
689 | bool isAISrcF16() const { | |||
690 | return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); | |||
691 | } | |||
692 | ||||
693 | bool isAISrcV2F16() const { | |||
694 | return isAISrcF16() || isAISrcB32(); | |||
695 | } | |||
696 | ||||
697 | bool isAISrc_64B64() const { | |||
698 | return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); | |||
699 | } | |||
700 | ||||
701 | bool isAISrc_64F64() const { | |||
702 | return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); | |||
703 | } | |||
704 | ||||
705 | bool isAISrc_128B32() const { | |||
706 | return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); | |||
707 | } | |||
708 | ||||
709 | bool isAISrc_128B16() const { | |||
710 | return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); | |||
711 | } | |||
712 | ||||
713 | bool isAISrc_128V2B16() const { | |||
714 | return isAISrc_128B16(); | |||
715 | } | |||
716 | ||||
717 | bool isAISrc_128F32() const { | |||
718 | return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); | |||
719 | } | |||
720 | ||||
721 | bool isAISrc_128F16() const { | |||
722 | return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); | |||
723 | } | |||
724 | ||||
725 | bool isAISrc_128V2F16() const { | |||
726 | return isAISrc_128F16() || isAISrc_128B32(); | |||
727 | } | |||
728 | ||||
729 | bool isVISrc_128F16() const { | |||
730 | return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); | |||
731 | } | |||
732 | ||||
733 | bool isVISrc_128V2F16() const { | |||
734 | return isVISrc_128F16() || isVISrc_128B32(); | |||
735 | } | |||
736 | ||||
737 | bool isAISrc_256B64() const { | |||
738 | return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); | |||
739 | } | |||
740 | ||||
741 | bool isAISrc_256F64() const { | |||
742 | return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); | |||
743 | } | |||
744 | ||||
745 | bool isAISrc_512B32() const { | |||
746 | return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); | |||
747 | } | |||
748 | ||||
749 | bool isAISrc_512B16() const { | |||
750 | return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); | |||
751 | } | |||
752 | ||||
753 | bool isAISrc_512V2B16() const { | |||
754 | return isAISrc_512B16(); | |||
755 | } | |||
756 | ||||
757 | bool isAISrc_512F32() const { | |||
758 | return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); | |||
759 | } | |||
760 | ||||
761 | bool isAISrc_512F16() const { | |||
762 | return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); | |||
763 | } | |||
764 | ||||
765 | bool isAISrc_512V2F16() const { | |||
766 | return isAISrc_512F16() || isAISrc_512B32(); | |||
767 | } | |||
768 | ||||
769 | bool isAISrc_1024B32() const { | |||
770 | return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); | |||
771 | } | |||
772 | ||||
773 | bool isAISrc_1024B16() const { | |||
774 | return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); | |||
775 | } | |||
776 | ||||
777 | bool isAISrc_1024V2B16() const { | |||
778 | return isAISrc_1024B16(); | |||
779 | } | |||
780 | ||||
781 | bool isAISrc_1024F32() const { | |||
782 | return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); | |||
783 | } | |||
784 | ||||
785 | bool isAISrc_1024F16() const { | |||
786 | return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); | |||
787 | } | |||
788 | ||||
789 | bool isAISrc_1024V2F16() const { | |||
790 | return isAISrc_1024F16() || isAISrc_1024B32(); | |||
791 | } | |||
792 | ||||
793 | bool isKImmFP32() const { | |||
794 | return isLiteralImm(MVT::f32); | |||
795 | } | |||
796 | ||||
797 | bool isKImmFP16() const { | |||
798 | return isLiteralImm(MVT::f16); | |||
799 | } | |||
800 | ||||
801 | bool isMem() const override { | |||
802 | return false; | |||
803 | } | |||
804 | ||||
805 | bool isExpr() const { | |||
806 | return Kind == Expression; | |||
807 | } | |||
808 | ||||
809 | bool isSoppBrTarget() const { | |||
810 | return isExpr() || isImm(); | |||
811 | } | |||
812 | ||||
813 | bool isSWaitCnt() const; | |||
814 | bool isHwreg() const; | |||
815 | bool isSendMsg() const; | |||
816 | bool isSwizzle() const; | |||
817 | bool isSMRDOffset8() const; | |||
818 | bool isSMEMOffset() const; | |||
819 | bool isSMRDLiteralOffset() const; | |||
820 | bool isDPP8() const; | |||
821 | bool isDPPCtrl() const; | |||
822 | bool isBLGP() const; | |||
823 | bool isCBSZ() const; | |||
824 | bool isABID() const; | |||
825 | bool isGPRIdxMode() const; | |||
826 | bool isS16Imm() const; | |||
827 | bool isU16Imm() const; | |||
828 | bool isEndpgm() const; | |||
829 | ||||
830 | StringRef getExpressionAsToken() const { | |||
831 | assert(isExpr())(static_cast<void> (0)); | |||
832 | const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); | |||
833 | return S->getSymbol().getName(); | |||
834 | } | |||
835 | ||||
836 | StringRef getToken() const { | |||
837 | assert(isToken())(static_cast<void> (0)); | |||
838 | ||||
839 | if (Kind == Expression) | |||
840 | return getExpressionAsToken(); | |||
841 | ||||
842 | return StringRef(Tok.Data, Tok.Length); | |||
843 | } | |||
844 | ||||
845 | int64_t getImm() const { | |||
846 | assert(isImm())(static_cast<void> (0)); | |||
847 | return Imm.Val; | |||
848 | } | |||
849 | ||||
850 | void setImm(int64_t Val) { | |||
851 | assert(isImm())(static_cast<void> (0)); | |||
852 | Imm.Val = Val; | |||
853 | } | |||
854 | ||||
855 | ImmTy getImmTy() const { | |||
856 | assert(isImm())(static_cast<void> (0)); | |||
857 | return Imm.Type; | |||
858 | } | |||
859 | ||||
860 | unsigned getReg() const override { | |||
861 | assert(isRegKind())(static_cast<void> (0)); | |||
862 | return Reg.RegNo; | |||
863 | } | |||
864 | ||||
865 | SMLoc getStartLoc() const override { | |||
866 | return StartLoc; | |||
867 | } | |||
868 | ||||
869 | SMLoc getEndLoc() const override { | |||
870 | return EndLoc; | |||
871 | } | |||
872 | ||||
873 | SMRange getLocRange() const { | |||
874 | return SMRange(StartLoc, EndLoc); | |||
875 | } | |||
876 | ||||
877 | Modifiers getModifiers() const { | |||
878 | assert(isRegKind() || isImmTy(ImmTyNone))(static_cast<void> (0)); | |||
879 | return isRegKind() ? Reg.Mods : Imm.Mods; | |||
880 | } | |||
881 | ||||
882 | void setModifiers(Modifiers Mods) { | |||
883 | assert(isRegKind() || isImmTy(ImmTyNone))(static_cast<void> (0)); | |||
884 | if (isRegKind()) | |||
885 | Reg.Mods = Mods; | |||
886 | else | |||
887 | Imm.Mods = Mods; | |||
888 | } | |||
889 | ||||
890 | bool hasModifiers() const { | |||
891 | return getModifiers().hasModifiers(); | |||
892 | } | |||
893 | ||||
894 | bool hasFPModifiers() const { | |||
895 | return getModifiers().hasFPModifiers(); | |||
896 | } | |||
897 | ||||
898 | bool hasIntModifiers() const { | |||
899 | return getModifiers().hasIntModifiers(); | |||
900 | } | |||
901 | ||||
902 | uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; | |||
903 | ||||
904 | void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; | |||
905 | ||||
906 | void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; | |||
907 | ||||
908 | template <unsigned Bitwidth> | |||
909 | void addKImmFPOperands(MCInst &Inst, unsigned N) const; | |||
910 | ||||
911 | void addKImmFP16Operands(MCInst &Inst, unsigned N) const { | |||
912 | addKImmFPOperands<16>(Inst, N); | |||
913 | } | |||
914 | ||||
915 | void addKImmFP32Operands(MCInst &Inst, unsigned N) const { | |||
916 | addKImmFPOperands<32>(Inst, N); | |||
917 | } | |||
918 | ||||
919 | void addRegOperands(MCInst &Inst, unsigned N) const; | |||
920 | ||||
921 | void addBoolRegOperands(MCInst &Inst, unsigned N) const { | |||
922 | addRegOperands(Inst, N); | |||
923 | } | |||
924 | ||||
925 | void addRegOrImmOperands(MCInst &Inst, unsigned N) const { | |||
926 | if (isRegKind()) | |||
927 | addRegOperands(Inst, N); | |||
928 | else if (isExpr()) | |||
929 | Inst.addOperand(MCOperand::createExpr(Expr)); | |||
930 | else | |||
931 | addImmOperands(Inst, N); | |||
932 | } | |||
933 | ||||
934 | void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { | |||
935 | Modifiers Mods = getModifiers(); | |||
936 | Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); | |||
937 | if (isRegKind()) { | |||
938 | addRegOperands(Inst, N); | |||
939 | } else { | |||
940 | addImmOperands(Inst, N, false); | |||
941 | } | |||
942 | } | |||
943 | ||||
944 | void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { | |||
945 | assert(!hasIntModifiers())(static_cast<void> (0)); | |||
946 | addRegOrImmWithInputModsOperands(Inst, N); | |||
947 | } | |||
948 | ||||
949 | void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { | |||
950 | assert(!hasFPModifiers())(static_cast<void> (0)); | |||
951 | addRegOrImmWithInputModsOperands(Inst, N); | |||
952 | } | |||
953 | ||||
954 | void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { | |||
955 | Modifiers Mods = getModifiers(); | |||
956 | Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); | |||
957 | assert(isRegKind())(static_cast<void> (0)); | |||
958 | addRegOperands(Inst, N); | |||
959 | } | |||
960 | ||||
961 | void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { | |||
962 | assert(!hasIntModifiers())(static_cast<void> (0)); | |||
963 | addRegWithInputModsOperands(Inst, N); | |||
964 | } | |||
965 | ||||
966 | void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { | |||
967 | assert(!hasFPModifiers())(static_cast<void> (0)); | |||
968 | addRegWithInputModsOperands(Inst, N); | |||
969 | } | |||
970 | ||||
971 | void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { | |||
972 | if (isImm()) | |||
973 | addImmOperands(Inst, N); | |||
974 | else { | |||
975 | assert(isExpr())(static_cast<void> (0)); | |||
976 | Inst.addOperand(MCOperand::createExpr(Expr)); | |||
977 | } | |||
978 | } | |||
979 | ||||
980 | static void printImmTy(raw_ostream& OS, ImmTy Type) { | |||
981 | switch (Type) { | |||
982 | case ImmTyNone: OS << "None"; break; | |||
983 | case ImmTyGDS: OS << "GDS"; break; | |||
984 | case ImmTyLDS: OS << "LDS"; break; | |||
985 | case ImmTyOffen: OS << "Offen"; break; | |||
986 | case ImmTyIdxen: OS << "Idxen"; break; | |||
987 | case ImmTyAddr64: OS << "Addr64"; break; | |||
988 | case ImmTyOffset: OS << "Offset"; break; | |||
989 | case ImmTyInstOffset: OS << "InstOffset"; break; | |||
990 | case ImmTyOffset0: OS << "Offset0"; break; | |||
991 | case ImmTyOffset1: OS << "Offset1"; break; | |||
992 | case ImmTyCPol: OS << "CPol"; break; | |||
993 | case ImmTySWZ: OS << "SWZ"; break; | |||
994 | case ImmTyTFE: OS << "TFE"; break; | |||
995 | case ImmTyD16: OS << "D16"; break; | |||
996 | case ImmTyFORMAT: OS << "FORMAT"; break; | |||
997 | case ImmTyClampSI: OS << "ClampSI"; break; | |||
998 | case ImmTyOModSI: OS << "OModSI"; break; | |||
999 | case ImmTyDPP8: OS << "DPP8"; break; | |||
1000 | case ImmTyDppCtrl: OS << "DppCtrl"; break; | |||
1001 | case ImmTyDppRowMask: OS << "DppRowMask"; break; | |||
1002 | case ImmTyDppBankMask: OS << "DppBankMask"; break; | |||
1003 | case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; | |||
1004 | case ImmTyDppFi: OS << "FI"; break; | |||
1005 | case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; | |||
1006 | case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; | |||
1007 | case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; | |||
1008 | case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; | |||
1009 | case ImmTyDMask: OS << "DMask"; break; | |||
1010 | case ImmTyDim: OS << "Dim"; break; | |||
1011 | case ImmTyUNorm: OS << "UNorm"; break; | |||
1012 | case ImmTyDA: OS << "DA"; break; | |||
1013 | case ImmTyR128A16: OS << "R128A16"; break; | |||
1014 | case ImmTyA16: OS << "A16"; break; | |||
1015 | case ImmTyLWE: OS << "LWE"; break; | |||
1016 | case ImmTyOff: OS << "Off"; break; | |||
1017 | case ImmTyExpTgt: OS << "ExpTgt"; break; | |||
1018 | case ImmTyExpCompr: OS << "ExpCompr"; break; | |||
1019 | case ImmTyExpVM: OS << "ExpVM"; break; | |||
1020 | case ImmTyHwreg: OS << "Hwreg"; break; | |||
1021 | case ImmTySendMsg: OS << "SendMsg"; break; | |||
1022 | case ImmTyInterpSlot: OS << "InterpSlot"; break; | |||
1023 | case ImmTyInterpAttr: OS << "InterpAttr"; break; | |||
1024 | case ImmTyAttrChan: OS << "AttrChan"; break; | |||
1025 | case ImmTyOpSel: OS << "OpSel"; break; | |||
1026 | case ImmTyOpSelHi: OS << "OpSelHi"; break; | |||
1027 | case ImmTyNegLo: OS << "NegLo"; break; | |||
1028 | case ImmTyNegHi: OS << "NegHi"; break; | |||
1029 | case ImmTySwizzle: OS << "Swizzle"; break; | |||
1030 | case ImmTyGprIdxMode: OS << "GprIdxMode"; break; | |||
1031 | case ImmTyHigh: OS << "High"; break; | |||
1032 | case ImmTyBLGP: OS << "BLGP"; break; | |||
1033 | case ImmTyCBSZ: OS << "CBSZ"; break; | |||
1034 | case ImmTyABID: OS << "ABID"; break; | |||
1035 | case ImmTyEndpgm: OS << "Endpgm"; break; | |||
1036 | } | |||
1037 | } | |||
1038 | ||||
1039 | void print(raw_ostream &OS) const override { | |||
1040 | switch (Kind) { | |||
1041 | case Register: | |||
1042 | OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; | |||
1043 | break; | |||
1044 | case Immediate: | |||
1045 | OS << '<' << getImm(); | |||
1046 | if (getImmTy() != ImmTyNone) { | |||
1047 | OS << " type: "; printImmTy(OS, getImmTy()); | |||
1048 | } | |||
1049 | OS << " mods: " << Imm.Mods << '>'; | |||
1050 | break; | |||
1051 | case Token: | |||
1052 | OS << '\'' << getToken() << '\''; | |||
1053 | break; | |||
1054 | case Expression: | |||
1055 | OS << "<expr " << *Expr << '>'; | |||
1056 | break; | |||
1057 | } | |||
1058 | } | |||
1059 | ||||
1060 | static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, | |||
1061 | int64_t Val, SMLoc Loc, | |||
1062 | ImmTy Type = ImmTyNone, | |||
1063 | bool IsFPImm = false) { | |||
1064 | auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); | |||
1065 | Op->Imm.Val = Val; | |||
1066 | Op->Imm.IsFPImm = IsFPImm; | |||
1067 | Op->Imm.Kind = ImmKindTyNone; | |||
1068 | Op->Imm.Type = Type; | |||
1069 | Op->Imm.Mods = Modifiers(); | |||
1070 | Op->StartLoc = Loc; | |||
1071 | Op->EndLoc = Loc; | |||
1072 | return Op; | |||
1073 | } | |||
1074 | ||||
1075 | static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, | |||
1076 | StringRef Str, SMLoc Loc, | |||
1077 | bool HasExplicitEncodingSize = true) { | |||
1078 | auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); | |||
1079 | Res->Tok.Data = Str.data(); | |||
1080 | Res->Tok.Length = Str.size(); | |||
1081 | Res->StartLoc = Loc; | |||
1082 | Res->EndLoc = Loc; | |||
1083 | return Res; | |||
1084 | } | |||
1085 | ||||
1086 | static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, | |||
1087 | unsigned RegNo, SMLoc S, | |||
1088 | SMLoc E) { | |||
1089 | auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); | |||
1090 | Op->Reg.RegNo = RegNo; | |||
1091 | Op->Reg.Mods = Modifiers(); | |||
1092 | Op->StartLoc = S; | |||
1093 | Op->EndLoc = E; | |||
1094 | return Op; | |||
1095 | } | |||
1096 | ||||
1097 | static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, | |||
1098 | const class MCExpr *Expr, SMLoc S) { | |||
1099 | auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); | |||
1100 | Op->Expr = Expr; | |||
1101 | Op->StartLoc = S; | |||
1102 | Op->EndLoc = S; | |||
1103 | return Op; | |||
1104 | } | |||
1105 | }; | |||
1106 | ||||
1107 | raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { | |||
1108 | OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; | |||
1109 | return OS; | |||
1110 | } | |||
1111 | ||||
1112 | //===----------------------------------------------------------------------===// | |||
1113 | // AsmParser | |||
1114 | //===----------------------------------------------------------------------===// | |||
1115 | ||||
1116 | // Holds info related to the current kernel, e.g. count of SGPRs used. | |||
1117 | // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next | |||
1118 | // .amdgpu_hsa_kernel or at EOF. | |||
1119 | class KernelScopeInfo { | |||
1120 | int SgprIndexUnusedMin = -1; | |||
1121 | int VgprIndexUnusedMin = -1; | |||
1122 | MCContext *Ctx = nullptr; | |||
1123 | ||||
1124 | void usesSgprAt(int i) { | |||
1125 | if (i >= SgprIndexUnusedMin) { | |||
1126 | SgprIndexUnusedMin = ++i; | |||
1127 | if (Ctx) { | |||
1128 | MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); | |||
1129 | Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); | |||
1130 | } | |||
1131 | } | |||
1132 | } | |||
1133 | ||||
1134 | void usesVgprAt(int i) { | |||
1135 | if (i >= VgprIndexUnusedMin) { | |||
1136 | VgprIndexUnusedMin = ++i; | |||
1137 | if (Ctx) { | |||
1138 | MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); | |||
1139 | Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); | |||
1140 | } | |||
1141 | } | |||
1142 | } | |||
1143 | ||||
1144 | public: | |||
1145 | KernelScopeInfo() = default; | |||
1146 | ||||
1147 | void initialize(MCContext &Context) { | |||
1148 | Ctx = &Context; | |||
1149 | usesSgprAt(SgprIndexUnusedMin = -1); | |||
1150 | usesVgprAt(VgprIndexUnusedMin = -1); | |||
1151 | } | |||
1152 | ||||
1153 | void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { | |||
1154 | switch (RegKind) { | |||
1155 | case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; | |||
1156 | case IS_AGPR: // fall through | |||
1157 | case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; | |||
1158 | default: break; | |||
1159 | } | |||
1160 | } | |||
1161 | }; | |||
1162 | ||||
1163 | class AMDGPUAsmParser : public MCTargetAsmParser { | |||
1164 | MCAsmParser &Parser; | |||
1165 | ||||
1166 | // Number of extra operands parsed after the first optional operand. | |||
1167 | // This may be necessary to skip hardcoded mandatory operands. | |||
1168 | static const unsigned MAX_OPR_LOOKAHEAD = 8; | |||
1169 | ||||
1170 | unsigned ForcedEncodingSize = 0; | |||
1171 | bool ForcedDPP = false; | |||
1172 | bool ForcedSDWA = false; | |||
1173 | KernelScopeInfo KernelScope; | |||
1174 | unsigned CPolSeen; | |||
1175 | ||||
1176 | /// @name Auto-generated Match Functions | |||
1177 | /// { | |||
1178 | ||||
1179 | #define GET_ASSEMBLER_HEADER | |||
1180 | #include "AMDGPUGenAsmMatcher.inc" | |||
1181 | ||||
1182 | /// } | |||
1183 | ||||
1184 | private: | |||
1185 | bool ParseAsAbsoluteExpression(uint32_t &Ret); | |||
1186 | bool OutOfRangeError(SMRange Range); | |||
1187 | /// Calculate VGPR/SGPR blocks required for given target, reserved | |||
1188 | /// registers, and user-specified NextFreeXGPR values. | |||
1189 | /// | |||
1190 | /// \param Features [in] Target features, used for bug corrections. | |||
1191 | /// \param VCCUsed [in] Whether VCC special SGPR is reserved. | |||
1192 | /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. | |||
1193 | /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. | |||
1194 | /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel | |||
1195 | /// descriptor field, if valid. | |||
1196 | /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. | |||
1197 | /// \param VGPRRange [in] Token range, used for VGPR diagnostics. | |||
1198 | /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. | |||
1199 | /// \param SGPRRange [in] Token range, used for SGPR diagnostics. | |||
1200 | /// \param VGPRBlocks [out] Result VGPR block count. | |||
1201 | /// \param SGPRBlocks [out] Result SGPR block count. | |||
1202 | bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, | |||
1203 | bool FlatScrUsed, bool XNACKUsed, | |||
1204 | Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, | |||
1205 | SMRange VGPRRange, unsigned NextFreeSGPR, | |||
1206 | SMRange SGPRRange, unsigned &VGPRBlocks, | |||
1207 | unsigned &SGPRBlocks); | |||
1208 | bool ParseDirectiveAMDGCNTarget(); | |||
1209 | bool ParseDirectiveAMDHSAKernel(); | |||
1210 | bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); | |||
1211 | bool ParseDirectiveHSACodeObjectVersion(); | |||
1212 | bool ParseDirectiveHSACodeObjectISA(); | |||
1213 | bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); | |||
1214 | bool ParseDirectiveAMDKernelCodeT(); | |||
1215 | // TODO: Possibly make subtargetHasRegister const. | |||
1216 | bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); | |||
1217 | bool ParseDirectiveAMDGPUHsaKernel(); | |||
1218 | ||||
1219 | bool ParseDirectiveISAVersion(); | |||
1220 | bool ParseDirectiveHSAMetadata(); | |||
1221 | bool ParseDirectivePALMetadataBegin(); | |||
1222 | bool ParseDirectivePALMetadata(); | |||
1223 | bool ParseDirectiveAMDGPULDS(); | |||
1224 | ||||
1225 | /// Common code to parse out a block of text (typically YAML) between start and | |||
1226 | /// end directives. | |||
1227 | bool ParseToEndDirective(const char *AssemblerDirectiveBegin, | |||
1228 | const char *AssemblerDirectiveEnd, | |||
1229 | std::string &CollectString); | |||
1230 | ||||
1231 | bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, | |||
1232 | RegisterKind RegKind, unsigned Reg1, SMLoc Loc); | |||
1233 | bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, | |||
1234 | unsigned &RegNum, unsigned &RegWidth, | |||
1235 | bool RestoreOnFailure = false); | |||
1236 | bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, | |||
1237 | unsigned &RegNum, unsigned &RegWidth, | |||
1238 | SmallVectorImpl<AsmToken> &Tokens); | |||
1239 | unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, | |||
1240 | unsigned &RegWidth, | |||
1241 | SmallVectorImpl<AsmToken> &Tokens); | |||
1242 | unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, | |||
1243 | unsigned &RegWidth, | |||
1244 | SmallVectorImpl<AsmToken> &Tokens); | |||
1245 | unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, | |||
1246 | unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); | |||
1247 | bool ParseRegRange(unsigned& Num, unsigned& Width); | |||
1248 | unsigned getRegularReg(RegisterKind RegKind, | |||
1249 | unsigned RegNum, | |||
1250 | unsigned RegWidth, | |||
1251 | SMLoc Loc); | |||
1252 | ||||
1253 | bool isRegister(); | |||
1254 | bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; | |||
1255 | Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); | |||
1256 | void initializeGprCountSymbol(RegisterKind RegKind); | |||
1257 | bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, | |||
1258 | unsigned RegWidth); | |||
1259 | void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, | |||
1260 | bool IsAtomic, bool IsLds = false); | |||
1261 | void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, | |||
1262 | bool IsGdsHardcoded); | |||
1263 | ||||
1264 | public: | |||
1265 | enum AMDGPUMatchResultTy { | |||
1266 | Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY | |||
1267 | }; | |||
1268 | enum OperandMode { | |||
1269 | OperandMode_Default, | |||
1270 | OperandMode_NSA, | |||
1271 | }; | |||
1272 | ||||
1273 | using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; | |||
1274 | ||||
1275 | AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, | |||
1276 | const MCInstrInfo &MII, | |||
1277 | const MCTargetOptions &Options) | |||
1278 | : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { | |||
1279 | MCAsmParserExtension::Initialize(Parser); | |||
1280 | ||||
1281 | if (getFeatureBits().none()) { | |||
1282 | // Set default features. | |||
1283 | copySTI().ToggleFeature("southern-islands"); | |||
1284 | } | |||
1285 | ||||
1286 | setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); | |||
1287 | ||||
1288 | { | |||
1289 | // TODO: make those pre-defined variables read-only. | |||
1290 | // Currently there is none suitable machinery in the core llvm-mc for this. | |||
1291 | // MCSymbol::isRedefinable is intended for another purpose, and | |||
1292 | // AsmParser::parseDirectiveSet() cannot be specialized for specific target. | |||
1293 | AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); | |||
1294 | MCContext &Ctx = getContext(); | |||
1295 | if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { | |||
1296 | MCSymbol *Sym = | |||
1297 | Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); | |||
1298 | Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); | |||
1299 | Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); | |||
1300 | Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); | |||
1301 | Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); | |||
1302 | Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); | |||
1303 | } else { | |||
1304 | MCSymbol *Sym = | |||
1305 | Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); | |||
1306 | Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); | |||
1307 | Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); | |||
1308 | Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); | |||
1309 | Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); | |||
1310 | Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); | |||
1311 | } | |||
1312 | if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { | |||
1313 | initializeGprCountSymbol(IS_VGPR); | |||
1314 | initializeGprCountSymbol(IS_SGPR); | |||
1315 | } else | |||
1316 | KernelScope.initialize(getContext()); | |||
1317 | } | |||
1318 | } | |||
1319 | ||||
1320 | bool hasMIMG_R128() const { | |||
1321 | return AMDGPU::hasMIMG_R128(getSTI()); | |||
1322 | } | |||
1323 | ||||
1324 | bool hasPackedD16() const { | |||
1325 | return AMDGPU::hasPackedD16(getSTI()); | |||
1326 | } | |||
1327 | ||||
1328 | bool hasGFX10A16() const { | |||
1329 | return AMDGPU::hasGFX10A16(getSTI()); | |||
1330 | } | |||
1331 | ||||
1332 | bool hasG16() const { return AMDGPU::hasG16(getSTI()); } | |||
1333 | ||||
1334 | bool isSI() const { | |||
1335 | return AMDGPU::isSI(getSTI()); | |||
1336 | } | |||
1337 | ||||
1338 | bool isCI() const { | |||
1339 | return AMDGPU::isCI(getSTI()); | |||
1340 | } | |||
1341 | ||||
1342 | bool isVI() const { | |||
1343 | return AMDGPU::isVI(getSTI()); | |||
1344 | } | |||
1345 | ||||
1346 | bool isGFX9() const { | |||
1347 | return AMDGPU::isGFX9(getSTI()); | |||
1348 | } | |||
1349 | ||||
1350 | bool isGFX90A() const { | |||
1351 | return AMDGPU::isGFX90A(getSTI()); | |||
1352 | } | |||
1353 | ||||
1354 | bool isGFX9Plus() const { | |||
1355 | return AMDGPU::isGFX9Plus(getSTI()); | |||
1356 | } | |||
1357 | ||||
1358 | bool isGFX10() const { | |||
1359 | return AMDGPU::isGFX10(getSTI()); | |||
1360 | } | |||
1361 | ||||
1362 | bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } | |||
1363 | ||||
1364 | bool isGFX10_BEncoding() const { | |||
1365 | return AMDGPU::isGFX10_BEncoding(getSTI()); | |||
1366 | } | |||
1367 | ||||
1368 | bool hasInv2PiInlineImm() const { | |||
1369 | return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; | |||
1370 | } | |||
1371 | ||||
1372 | bool hasFlatOffsets() const { | |||
1373 | return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; | |||
1374 | } | |||
1375 | ||||
1376 | bool hasArchitectedFlatScratch() const { | |||
1377 | return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; | |||
1378 | } | |||
1379 | ||||
1380 | bool hasSGPR102_SGPR103() const { | |||
1381 | return !isVI() && !isGFX9(); | |||
1382 | } | |||
1383 | ||||
1384 | bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } | |||
1385 | ||||
1386 | bool hasIntClamp() const { | |||
1387 | return getFeatureBits()[AMDGPU::FeatureIntClamp]; | |||
1388 | } | |||
1389 | ||||
1390 | AMDGPUTargetStreamer &getTargetStreamer() { | |||
1391 | MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); | |||
1392 | return static_cast<AMDGPUTargetStreamer &>(TS); | |||
1393 | } | |||
1394 | ||||
1395 | const MCRegisterInfo *getMRI() const { | |||
1396 | // We need this const_cast because for some reason getContext() is not const | |||
1397 | // in MCAsmParser. | |||
1398 | return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); | |||
1399 | } | |||
1400 | ||||
1401 | const MCInstrInfo *getMII() const { | |||
1402 | return &MII; | |||
1403 | } | |||
1404 | ||||
1405 | const FeatureBitset &getFeatureBits() const { | |||
1406 | return getSTI().getFeatureBits(); | |||
1407 | } | |||
1408 | ||||
1409 | void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } | |||
1410 | void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } | |||
1411 | void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } | |||
1412 | ||||
1413 | unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } | |||
1414 | bool isForcedVOP3() const { return ForcedEncodingSize == 64; } | |||
1415 | bool isForcedDPP() const { return ForcedDPP; } | |||
1416 | bool isForcedSDWA() const { return ForcedSDWA; } | |||
1417 | ArrayRef<unsigned> getMatchedVariants() const; | |||
1418 | StringRef getMatchedVariantName() const; | |||
1419 | ||||
1420 | std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); | |||
1421 | bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, | |||
1422 | bool RestoreOnFailure); | |||
1423 | bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; | |||
1424 | OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, | |||
1425 | SMLoc &EndLoc) override; | |||
1426 | unsigned checkTargetMatchPredicate(MCInst &Inst) override; | |||
1427 | unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, | |||
1428 | unsigned Kind) override; | |||
1429 | bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, | |||
1430 | OperandVector &Operands, MCStreamer &Out, | |||
1431 | uint64_t &ErrorInfo, | |||
1432 | bool MatchingInlineAsm) override; | |||
1433 | bool ParseDirective(AsmToken DirectiveID) override; | |||
1434 | OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, | |||
1435 | OperandMode Mode = OperandMode_Default); | |||
1436 | StringRef parseMnemonicSuffix(StringRef Name); | |||
1437 | bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, | |||
1438 | SMLoc NameLoc, OperandVector &Operands) override; | |||
1439 | //bool ProcessInstruction(MCInst &Inst); | |||
1440 | ||||
1441 | OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); | |||
1442 | ||||
1443 | OperandMatchResultTy | |||
1444 | parseIntWithPrefix(const char *Prefix, OperandVector &Operands, | |||
1445 | AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, | |||
1446 | bool (*ConvertResult)(int64_t &) = nullptr); | |||
1447 | ||||
1448 | OperandMatchResultTy | |||
1449 | parseOperandArrayWithPrefix(const char *Prefix, | |||
1450 | OperandVector &Operands, | |||
1451 | AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, | |||
1452 | bool (*ConvertResult)(int64_t&) = nullptr); | |||
1453 | ||||
1454 | OperandMatchResultTy | |||
1455 | parseNamedBit(StringRef Name, OperandVector &Operands, | |||
1456 | AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); | |||
1457 | OperandMatchResultTy parseCPol(OperandVector &Operands); | |||
1458 | OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, | |||
1459 | StringRef &Value, | |||
1460 | SMLoc &StringLoc); | |||
1461 | ||||
1462 | bool isModifier(); | |||
1463 | bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; | |||
1464 | bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; | |||
1465 | bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; | |||
1466 | bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; | |||
1467 | bool parseSP3NegModifier(); | |||
1468 | OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); | |||
1469 | OperandMatchResultTy parseReg(OperandVector &Operands); | |||
1470 | OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); | |||
1471 | OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); | |||
1472 | OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); | |||
1473 | OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); | |||
1474 | OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); | |||
1475 | OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); | |||
1476 | OperandMatchResultTy parseDfmtNfmt(int64_t &Format); | |||
1477 | OperandMatchResultTy parseUfmt(int64_t &Format); | |||
1478 | OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); | |||
1479 | OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); | |||
1480 | OperandMatchResultTy parseFORMAT(OperandVector &Operands); | |||
1481 | OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); | |||
1482 | OperandMatchResultTy parseNumericFormat(int64_t &Format); | |||
1483 | bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); | |||
1484 | bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); | |||
1485 | ||||
1486 | void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); | |||
1487 | void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } | |||
1488 | void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } | |||
1489 | void cvtExp(MCInst &Inst, const OperandVector &Operands); | |||
1490 | ||||
1491 | bool parseCnt(int64_t &IntVal); | |||
1492 | OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); | |||
1493 | OperandMatchResultTy parseHwreg(OperandVector &Operands); | |||
1494 | ||||
1495 | private: | |||
1496 | struct OperandInfoTy { | |||
1497 | SMLoc Loc; | |||
1498 | int64_t Id; | |||
1499 | bool IsSymbolic = false; | |||
1500 | bool IsDefined = false; | |||
1501 | ||||
1502 | OperandInfoTy(int64_t Id_) : Id(Id_) {} | |||
1503 | }; | |||
1504 | ||||
1505 | bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); | |||
1506 | bool validateSendMsg(const OperandInfoTy &Msg, | |||
1507 | const OperandInfoTy &Op, | |||
1508 | const OperandInfoTy &Stream); | |||
1509 | ||||
1510 | bool parseHwregBody(OperandInfoTy &HwReg, | |||
1511 | OperandInfoTy &Offset, | |||
1512 | OperandInfoTy &Width); | |||
1513 | bool validateHwreg(const OperandInfoTy &HwReg, | |||
1514 | const OperandInfoTy &Offset, | |||
1515 | const OperandInfoTy &Width); | |||
1516 | ||||
1517 | SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; | |||
1518 | SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; | |||
1519 | ||||
1520 | SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, | |||
1521 | const OperandVector &Operands) const; | |||
1522 | SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; | |||
1523 | SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; | |||
1524 | SMLoc getLitLoc(const OperandVector &Operands) const; | |||
1525 | SMLoc getConstLoc(const OperandVector &Operands) const; | |||
1526 | ||||
1527 | bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); | |||
1528 | bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); | |||
1529 | bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); | |||
1530 | bool validateSOPLiteral(const MCInst &Inst) const; | |||
1531 | bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); | |||
1532 | bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); | |||
1533 | bool validateIntClampSupported(const MCInst &Inst); | |||
1534 | bool validateMIMGAtomicDMask(const MCInst &Inst); | |||
1535 | bool validateMIMGGatherDMask(const MCInst &Inst); | |||
1536 | bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); | |||
1537 | bool validateMIMGDataSize(const MCInst &Inst); | |||
1538 | bool validateMIMGAddrSize(const MCInst &Inst); | |||
1539 | bool validateMIMGD16(const MCInst &Inst); | |||
1540 | bool validateMIMGDim(const MCInst &Inst); | |||
1541 | bool validateMIMGMSAA(const MCInst &Inst); | |||
1542 | bool validateOpSel(const MCInst &Inst); | |||
1543 | bool validateDPP(const MCInst &Inst, const OperandVector &Operands); | |||
1544 | bool validateVccOperand(unsigned Reg) const; | |||
1545 | bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); | |||
1546 | bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); | |||
1547 | bool validateAGPRLdSt(const MCInst &Inst) const; | |||
1548 | bool validateVGPRAlign(const MCInst &Inst) const; | |||
1549 | bool validateGWS(const MCInst &Inst, const OperandVector &Operands); | |||
1550 | bool validateDivScale(const MCInst &Inst); | |||
1551 | bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, | |||
1552 | const SMLoc &IDLoc); | |||
1553 | Optional<StringRef> validateLdsDirect(const MCInst &Inst); | |||
1554 | unsigned getConstantBusLimit(unsigned Opcode) const; | |||
1555 | bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); | |||
1556 | bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; | |||
1557 | unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; | |||
1558 | ||||
1559 | bool isSupportedMnemo(StringRef Mnemo, | |||
1560 | const FeatureBitset &FBS); | |||
1561 | bool isSupportedMnemo(StringRef Mnemo, | |||
1562 | const FeatureBitset &FBS, | |||
1563 | ArrayRef<unsigned> Variants); | |||
1564 | bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); | |||
1565 | ||||
1566 | bool isId(const StringRef Id) const; | |||
1567 | bool isId(const AsmToken &Token, const StringRef Id) const; | |||
1568 | bool isToken(const AsmToken::TokenKind Kind) const; | |||
1569 | bool trySkipId(const StringRef Id); | |||
1570 | bool trySkipId(const StringRef Pref, const StringRef Id); | |||
1571 | bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); | |||
1572 | bool trySkipToken(const AsmToken::TokenKind Kind); | |||
1573 | bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); | |||
1574 | bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); | |||
1575 | bool parseId(StringRef &Val, const StringRef ErrMsg = ""); | |||
1576 | ||||
1577 | void peekTokens(MutableArrayRef<AsmToken> Tokens); | |||
1578 | AsmToken::TokenKind getTokenKind() const; | |||
1579 | bool parseExpr(int64_t &Imm, StringRef Expected = ""); | |||
1580 | bool parseExpr(OperandVector &Operands); | |||
1581 | StringRef getTokenStr() const; | |||
1582 | AsmToken peekToken(); | |||
1583 | AsmToken getToken() const; | |||
1584 | SMLoc getLoc() const; | |||
1585 | void lex(); | |||
1586 | ||||
1587 | public: | |||
1588 | void onBeginOfFile() override; | |||
1589 | ||||
1590 | OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); | |||
1591 | OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); | |||
1592 | ||||
1593 | OperandMatchResultTy parseExpTgt(OperandVector &Operands); | |||
1594 | OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); | |||
1595 | OperandMatchResultTy parseInterpSlot(OperandVector &Operands); | |||
1596 | OperandMatchResultTy parseInterpAttr(OperandVector &Operands); | |||
1597 | OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); | |||
1598 | OperandMatchResultTy parseBoolReg(OperandVector &Operands); | |||
1599 | ||||
1600 | bool parseSwizzleOperand(int64_t &Op, | |||
1601 | const unsigned MinVal, | |||
1602 | const unsigned MaxVal, | |||
1603 | const StringRef ErrMsg, | |||
1604 | SMLoc &Loc); | |||
1605 | bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, | |||
1606 | const unsigned MinVal, | |||
1607 | const unsigned MaxVal, | |||
1608 | const StringRef ErrMsg); | |||
1609 | OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); | |||
1610 | bool parseSwizzleOffset(int64_t &Imm); | |||
1611 | bool parseSwizzleMacro(int64_t &Imm); | |||
1612 | bool parseSwizzleQuadPerm(int64_t &Imm); | |||
1613 | bool parseSwizzleBitmaskPerm(int64_t &Imm); | |||
1614 | bool parseSwizzleBroadcast(int64_t &Imm); | |||
1615 | bool parseSwizzleSwap(int64_t &Imm); | |||
1616 | bool parseSwizzleReverse(int64_t &Imm); | |||
1617 | ||||
1618 | OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); | |||
1619 | int64_t parseGPRIdxMacro(); | |||
1620 | ||||
1621 | void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } | |||
1622 | void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } | |||
1623 | void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } | |||
1624 | void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); | |||
1625 | ||||
1626 | AMDGPUOperand::Ptr defaultCPol() const; | |||
1627 | ||||
1628 | AMDGPUOperand::Ptr defaultSMRDOffset8() const; | |||
1629 | AMDGPUOperand::Ptr defaultSMEMOffset() const; | |||
1630 | AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; | |||
1631 | AMDGPUOperand::Ptr defaultFlatOffset() const; | |||
1632 | ||||
1633 | OperandMatchResultTy parseOModOperand(OperandVector &Operands); | |||
1634 | ||||
1635 | void cvtVOP3(MCInst &Inst, const OperandVector &Operands, | |||
1636 | OptionalImmIndexMap &OptionalIdx); | |||
1637 | void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); | |||
1638 | void cvtVOP3(MCInst &Inst, const OperandVector &Operands); | |||
1639 | void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); | |||
1640 | void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, | |||
1641 | OptionalImmIndexMap &OptionalIdx); | |||
1642 | ||||
1643 | void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); | |||
1644 | ||||
1645 | void cvtMIMG(MCInst &Inst, const OperandVector &Operands, | |||
1646 | bool IsAtomic = false); | |||
1647 | void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); | |||
1648 | void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); | |||
1649 | ||||
1650 | void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); | |||
1651 | ||||
1652 | bool parseDimId(unsigned &Encoding); | |||
1653 | OperandMatchResultTy parseDim(OperandVector &Operands); | |||
1654 | OperandMatchResultTy parseDPP8(OperandVector &Operands); | |||
1655 | OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); | |||
1656 | bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); | |||
1657 | int64_t parseDPPCtrlSel(StringRef Ctrl); | |||
1658 | int64_t parseDPPCtrlPerm(); | |||
1659 | AMDGPUOperand::Ptr defaultRowMask() const; | |||
1660 | AMDGPUOperand::Ptr defaultBankMask() const; | |||
1661 | AMDGPUOperand::Ptr defaultBoundCtrl() const; | |||
1662 | AMDGPUOperand::Ptr defaultFI() const; | |||
1663 | void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); | |||
1664 | void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } | |||
1665 | ||||
1666 | OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, | |||
1667 | AMDGPUOperand::ImmTy Type); | |||
1668 | OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); | |||
1669 | void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); | |||
1670 | void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); | |||
1671 | void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); | |||
1672 | void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); | |||
1673 | void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); | |||
1674 | void cvtSDWA(MCInst &Inst, const OperandVector &Operands, | |||
1675 | uint64_t BasicInstType, | |||
1676 | bool SkipDstVcc = false, | |||
1677 | bool SkipSrcVcc = false); | |||
1678 | ||||
1679 | AMDGPUOperand::Ptr defaultBLGP() const; | |||
1680 | AMDGPUOperand::Ptr defaultCBSZ() const; | |||
1681 | AMDGPUOperand::Ptr defaultABID() const; | |||
1682 | ||||
1683 | OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); | |||
1684 | AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; | |||
1685 | }; | |||
1686 | ||||
1687 | struct OptionalOperand { | |||
1688 | const char *Name; | |||
1689 | AMDGPUOperand::ImmTy Type; | |||
1690 | bool IsBit; | |||
1691 | bool (*ConvertResult)(int64_t&); | |||
1692 | }; | |||
1693 | ||||
1694 | } // end anonymous namespace | |||
1695 | ||||
1696 | // May be called with integer type with equivalent bitwidth. | |||
1697 | static const fltSemantics *getFltSemantics(unsigned Size) { | |||
1698 | switch (Size) { | |||
1699 | case 4: | |||
1700 | return &APFloat::IEEEsingle(); | |||
1701 | case 8: | |||
1702 | return &APFloat::IEEEdouble(); | |||
1703 | case 2: | |||
1704 | return &APFloat::IEEEhalf(); | |||
1705 | default: | |||
1706 | llvm_unreachable("unsupported fp type")__builtin_unreachable(); | |||
1707 | } | |||
1708 | } | |||
1709 | ||||
1710 | static const fltSemantics *getFltSemantics(MVT VT) { | |||
1711 | return getFltSemantics(VT.getSizeInBits() / 8); | |||
1712 | } | |||
1713 | ||||
1714 | static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { | |||
1715 | switch (OperandType) { | |||
1716 | case AMDGPU::OPERAND_REG_IMM_INT32: | |||
1717 | case AMDGPU::OPERAND_REG_IMM_FP32: | |||
1718 | case AMDGPU::OPERAND_REG_INLINE_C_INT32: | |||
1719 | case AMDGPU::OPERAND_REG_INLINE_C_FP32: | |||
1720 | case AMDGPU::OPERAND_REG_INLINE_AC_INT32: | |||
1721 | case AMDGPU::OPERAND_REG_INLINE_AC_FP32: | |||
1722 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: | |||
1723 | case AMDGPU::OPERAND_REG_IMM_V2FP32: | |||
1724 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: | |||
1725 | case AMDGPU::OPERAND_REG_IMM_V2INT32: | |||
1726 | return &APFloat::IEEEsingle(); | |||
1727 | case AMDGPU::OPERAND_REG_IMM_INT64: | |||
1728 | case AMDGPU::OPERAND_REG_IMM_FP64: | |||
1729 | case AMDGPU::OPERAND_REG_INLINE_C_INT64: | |||
1730 | case AMDGPU::OPERAND_REG_INLINE_C_FP64: | |||
1731 | case AMDGPU::OPERAND_REG_INLINE_AC_FP64: | |||
1732 | return &APFloat::IEEEdouble(); | |||
1733 | case AMDGPU::OPERAND_REG_IMM_INT16: | |||
1734 | case AMDGPU::OPERAND_REG_IMM_FP16: | |||
1735 | case AMDGPU::OPERAND_REG_INLINE_C_INT16: | |||
1736 | case AMDGPU::OPERAND_REG_INLINE_C_FP16: | |||
1737 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: | |||
1738 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: | |||
1739 | case AMDGPU::OPERAND_REG_INLINE_AC_INT16: | |||
1740 | case AMDGPU::OPERAND_REG_INLINE_AC_FP16: | |||
1741 | case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: | |||
1742 | case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: | |||
1743 | case AMDGPU::OPERAND_REG_IMM_V2INT16: | |||
1744 | case AMDGPU::OPERAND_REG_IMM_V2FP16: | |||
1745 | return &APFloat::IEEEhalf(); | |||
1746 | default: | |||
1747 | llvm_unreachable("unsupported fp type")__builtin_unreachable(); | |||
1748 | } | |||
1749 | } | |||
1750 | ||||
1751 | //===----------------------------------------------------------------------===// | |||
1752 | // Operand | |||
1753 | //===----------------------------------------------------------------------===// | |||
1754 | ||||
1755 | static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { | |||
1756 | bool Lost; | |||
1757 | ||||
1758 | // Convert literal to single precision | |||
1759 | APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), | |||
1760 | APFloat::rmNearestTiesToEven, | |||
1761 | &Lost); | |||
1762 | // We allow precision lost but not overflow or underflow | |||
1763 | if (Status != APFloat::opOK && | |||
1764 | Lost && | |||
1765 | ((Status & APFloat::opOverflow) != 0 || | |||
1766 | (Status & APFloat::opUnderflow) != 0)) { | |||
1767 | return false; | |||
1768 | } | |||
1769 | ||||
1770 | return true; | |||
1771 | } | |||
1772 | ||||
1773 | static bool isSafeTruncation(int64_t Val, unsigned Size) { | |||
1774 | return isUIntN(Size, Val) || isIntN(Size, Val); | |||
1775 | } | |||
1776 | ||||
1777 | static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { | |||
1778 | if (VT.getScalarType() == MVT::i16) { | |||
1779 | // FP immediate values are broken. | |||
1780 | return isInlinableIntLiteral(Val); | |||
1781 | } | |||
1782 | ||||
1783 | // f16/v2f16 operands work correctly for all values. | |||
1784 | return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); | |||
1785 | } | |||
1786 | ||||
1787 | bool AMDGPUOperand::isInlinableImm(MVT type) const { | |||
1788 | ||||
1789 | // This is a hack to enable named inline values like | |||
1790 | // shared_base with both 32-bit and 64-bit operands. | |||
1791 | // Note that these values are defined as | |||
1792 | // 32-bit operands only. | |||
1793 | if (isInlineValue()) { | |||
1794 | return true; | |||
1795 | } | |||
1796 | ||||
1797 | if (!isImmTy(ImmTyNone)) { | |||
1798 | // Only plain immediates are inlinable (e.g. "clamp" attribute is not) | |||
1799 | return false; | |||
1800 | } | |||
1801 | // TODO: We should avoid using host float here. It would be better to | |||
1802 | // check the float bit values which is what a few other places do. | |||
1803 | // We've had bot failures before due to weird NaN support on mips hosts. | |||
1804 | ||||
1805 | APInt Literal(64, Imm.Val); | |||
1806 | ||||
1807 | if (Imm.IsFPImm) { // We got fp literal token | |||
1808 | if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand | |||
1809 | return AMDGPU::isInlinableLiteral64(Imm.Val, | |||
1810 | AsmParser->hasInv2PiInlineImm()); | |||
1811 | } | |||
1812 | ||||
1813 | APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); | |||
1814 | if (!canLosslesslyConvertToFPType(FPLiteral, type)) | |||
1815 | return false; | |||
1816 | ||||
1817 | if (type.getScalarSizeInBits() == 16) { | |||
1818 | return isInlineableLiteralOp16( | |||
1819 | static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), | |||
1820 | type, AsmParser->hasInv2PiInlineImm()); | |||
1821 | } | |||
1822 | ||||
1823 | // Check if single precision literal is inlinable | |||
1824 | return AMDGPU::isInlinableLiteral32( | |||
1825 | static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), | |||
1826 | AsmParser->hasInv2PiInlineImm()); | |||
1827 | } | |||
1828 | ||||
1829 | // We got int literal token. | |||
1830 | if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand | |||
1831 | return AMDGPU::isInlinableLiteral64(Imm.Val, | |||
1832 | AsmParser->hasInv2PiInlineImm()); | |||
1833 | } | |||
1834 | ||||
1835 | if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { | |||
1836 | return false; | |||
1837 | } | |||
1838 | ||||
1839 | if (type.getScalarSizeInBits() == 16) { | |||
1840 | return isInlineableLiteralOp16( | |||
1841 | static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), | |||
1842 | type, AsmParser->hasInv2PiInlineImm()); | |||
1843 | } | |||
1844 | ||||
1845 | return AMDGPU::isInlinableLiteral32( | |||
1846 | static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), | |||
1847 | AsmParser->hasInv2PiInlineImm()); | |||
1848 | } | |||
1849 | ||||
1850 | bool AMDGPUOperand::isLiteralImm(MVT type) const { | |||
1851 | // Check that this immediate can be added as literal | |||
1852 | if (!isImmTy(ImmTyNone)) { | |||
1853 | return false; | |||
1854 | } | |||
1855 | ||||
1856 | if (!Imm.IsFPImm) { | |||
1857 | // We got int literal token. | |||
1858 | ||||
1859 | if (type == MVT::f64 && hasFPModifiers()) { | |||
1860 | // Cannot apply fp modifiers to int literals preserving the same semantics | |||
1861 | // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, | |||
1862 | // disable these cases. | |||
1863 | return false; | |||
1864 | } | |||
1865 | ||||
1866 | unsigned Size = type.getSizeInBits(); | |||
1867 | if (Size == 64) | |||
1868 | Size = 32; | |||
1869 | ||||
1870 | // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP | |||
1871 | // types. | |||
1872 | return isSafeTruncation(Imm.Val, Size); | |||
1873 | } | |||
1874 | ||||
1875 | // We got fp literal token | |||
1876 | if (type == MVT::f64) { // Expected 64-bit fp operand | |||
1877 | // We would set low 64-bits of literal to zeroes but we accept this literals | |||
1878 | return true; | |||
1879 | } | |||
1880 | ||||
1881 | if (type == MVT::i64) { // Expected 64-bit int operand | |||
1882 | // We don't allow fp literals in 64-bit integer instructions. It is | |||
1883 | // unclear how we should encode them. | |||
1884 | return false; | |||
1885 | } | |||
1886 | ||||
1887 | // We allow fp literals with f16x2 operands assuming that the specified | |||
1888 | // literal goes into the lower half and the upper half is zero. We also | |||
1889 | // require that the literal may be losslesly converted to f16. | |||
1890 | MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : | |||
1891 | (type == MVT::v2i16)? MVT::i16 : | |||
1892 | (type == MVT::v2f32)? MVT::f32 : type; | |||
1893 | ||||
1894 | APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); | |||
1895 | return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); | |||
1896 | } | |||
1897 | ||||
1898 | bool AMDGPUOperand::isRegClass(unsigned RCID) const { | |||
1899 | return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); | |||
1900 | } | |||
1901 | ||||
1902 | bool AMDGPUOperand::isVRegWithInputMods() const { | |||
1903 | return isRegClass(AMDGPU::VGPR_32RegClassID) || | |||
1904 | // GFX90A allows DPP on 64-bit operands. | |||
1905 | (isRegClass(AMDGPU::VReg_64RegClassID) && | |||
1906 | AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); | |||
1907 | } | |||
1908 | ||||
1909 | bool AMDGPUOperand::isSDWAOperand(MVT type) const { | |||
1910 | if (AsmParser->isVI()) | |||
1911 | return isVReg32(); | |||
1912 | else if (AsmParser->isGFX9Plus()) | |||
1913 | return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); | |||
1914 | else | |||
1915 | return false; | |||
1916 | } | |||
1917 | ||||
1918 | bool AMDGPUOperand::isSDWAFP16Operand() const { | |||
1919 | return isSDWAOperand(MVT::f16); | |||
1920 | } | |||
1921 | ||||
1922 | bool AMDGPUOperand::isSDWAFP32Operand() const { | |||
1923 | return isSDWAOperand(MVT::f32); | |||
1924 | } | |||
1925 | ||||
1926 | bool AMDGPUOperand::isSDWAInt16Operand() const { | |||
1927 | return isSDWAOperand(MVT::i16); | |||
1928 | } | |||
1929 | ||||
1930 | bool AMDGPUOperand::isSDWAInt32Operand() const { | |||
1931 | return isSDWAOperand(MVT::i32); | |||
1932 | } | |||
1933 | ||||
1934 | bool AMDGPUOperand::isBoolReg() const { | |||
1935 | auto FB = AsmParser->getFeatureBits(); | |||
1936 | return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || | |||
1937 | (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); | |||
1938 | } | |||
1939 | ||||
1940 | uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const | |||
1941 | { | |||
1942 | assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers())(static_cast<void> (0)); | |||
1943 | assert(Size == 2 || Size == 4 || Size == 8)(static_cast<void> (0)); | |||
1944 | ||||
1945 | const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); | |||
1946 | ||||
1947 | if (Imm.Mods.Abs) { | |||
1948 | Val &= ~FpSignMask; | |||
1949 | } | |||
1950 | if (Imm.Mods.Neg) { | |||
1951 | Val ^= FpSignMask; | |||
1952 | } | |||
1953 | ||||
1954 | return Val; | |||
1955 | } | |||
1956 | ||||
1957 | void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { | |||
1958 | if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), | |||
1959 | Inst.getNumOperands())) { | |||
1960 | addLiteralImmOperand(Inst, Imm.Val, | |||
1961 | ApplyModifiers & | |||
1962 | isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); | |||
1963 | } else { | |||
1964 | assert(!isImmTy(ImmTyNone) || !hasModifiers())(static_cast<void> (0)); | |||
1965 | Inst.addOperand(MCOperand::createImm(Imm.Val)); | |||
1966 | setImmKindNone(); | |||
1967 | } | |||
1968 | } | |||
1969 | ||||
1970 | void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { | |||
1971 | const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); | |||
1972 | auto OpNum = Inst.getNumOperands(); | |||
1973 | // Check that this operand accepts literals | |||
1974 | assert(AMDGPU::isSISrcOperand(InstDesc, OpNum))(static_cast<void> (0)); | |||
1975 | ||||
1976 | if (ApplyModifiers) { | |||
1977 | assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum))(static_cast<void> (0)); | |||
1978 | const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); | |||
1979 | Val = applyInputFPModifiers(Val, Size); | |||
1980 | } | |||
1981 | ||||
1982 | APInt Literal(64, Val); | |||
1983 | uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; | |||
1984 | ||||
1985 | if (Imm.IsFPImm) { // We got fp literal token | |||
1986 | switch (OpTy) { | |||
1987 | case AMDGPU::OPERAND_REG_IMM_INT64: | |||
1988 | case AMDGPU::OPERAND_REG_IMM_FP64: | |||
1989 | case AMDGPU::OPERAND_REG_INLINE_C_INT64: | |||
1990 | case AMDGPU::OPERAND_REG_INLINE_C_FP64: | |||
1991 | case AMDGPU::OPERAND_REG_INLINE_AC_FP64: | |||
1992 | if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), | |||
1993 | AsmParser->hasInv2PiInlineImm())) { | |||
1994 | Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); | |||
1995 | setImmKindConst(); | |||
1996 | return; | |||
1997 | } | |||
1998 | ||||
1999 | // Non-inlineable | |||
2000 | if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand | |||
2001 | // For fp operands we check if low 32 bits are zeros | |||
2002 | if (Literal.getLoBits(32) != 0) { | |||
2003 | const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), | |||
2004 | "Can't encode literal as exact 64-bit floating-point operand. " | |||
2005 | "Low 32-bits will be set to zero"); | |||
2006 | } | |||
2007 | ||||
2008 | Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); | |||
2009 | setImmKindLiteral(); | |||
2010 | return; | |||
2011 | } | |||
2012 | ||||
2013 | // We don't allow fp literals in 64-bit integer instructions. It is | |||
2014 | // unclear how we should encode them. This case should be checked earlier | |||
2015 | // in predicate methods (isLiteralImm()) | |||
2016 | llvm_unreachable("fp literal in 64-bit integer instruction.")__builtin_unreachable(); | |||
2017 | ||||
2018 | case AMDGPU::OPERAND_REG_IMM_INT32: | |||
2019 | case AMDGPU::OPERAND_REG_IMM_FP32: | |||
2020 | case AMDGPU::OPERAND_REG_INLINE_C_INT32: | |||
2021 | case AMDGPU::OPERAND_REG_INLINE_C_FP32: | |||
2022 | case AMDGPU::OPERAND_REG_INLINE_AC_INT32: | |||
2023 | case AMDGPU::OPERAND_REG_INLINE_AC_FP32: | |||
2024 | case AMDGPU::OPERAND_REG_IMM_INT16: | |||
2025 | case AMDGPU::OPERAND_REG_IMM_FP16: | |||
2026 | case AMDGPU::OPERAND_REG_INLINE_C_INT16: | |||
2027 | case AMDGPU::OPERAND_REG_INLINE_C_FP16: | |||
2028 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: | |||
2029 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: | |||
2030 | case AMDGPU::OPERAND_REG_INLINE_AC_INT16: | |||
2031 | case AMDGPU::OPERAND_REG_INLINE_AC_FP16: | |||
2032 | case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: | |||
2033 | case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: | |||
2034 | case AMDGPU::OPERAND_REG_IMM_V2INT16: | |||
2035 | case AMDGPU::OPERAND_REG_IMM_V2FP16: | |||
2036 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: | |||
2037 | case AMDGPU::OPERAND_REG_IMM_V2FP32: | |||
2038 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: | |||
2039 | case AMDGPU::OPERAND_REG_IMM_V2INT32: { | |||
2040 | bool lost; | |||
2041 | APFloat FPLiteral(APFloat::IEEEdouble(), Literal); | |||
2042 | // Convert literal to single precision | |||
2043 | FPLiteral.convert(*getOpFltSemantics(OpTy), | |||
2044 | APFloat::rmNearestTiesToEven, &lost); | |||
2045 | // We allow precision lost but not overflow or underflow. This should be | |||
2046 | // checked earlier in isLiteralImm() | |||
2047 | ||||
2048 | uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); | |||
2049 | Inst.addOperand(MCOperand::createImm(ImmVal)); | |||
2050 | setImmKindLiteral(); | |||
2051 | return; | |||
2052 | } | |||
2053 | default: | |||
2054 | llvm_unreachable("invalid operand size")__builtin_unreachable(); | |||
2055 | } | |||
2056 | ||||
2057 | return; | |||
2058 | } | |||
2059 | ||||
2060 | // We got int literal token. | |||
2061 | // Only sign extend inline immediates. | |||
2062 | switch (OpTy) { | |||
2063 | case AMDGPU::OPERAND_REG_IMM_INT32: | |||
2064 | case AMDGPU::OPERAND_REG_IMM_FP32: | |||
2065 | case AMDGPU::OPERAND_REG_INLINE_C_INT32: | |||
2066 | case AMDGPU::OPERAND_REG_INLINE_C_FP32: | |||
2067 | case AMDGPU::OPERAND_REG_INLINE_AC_INT32: | |||
2068 | case AMDGPU::OPERAND_REG_INLINE_AC_FP32: | |||
2069 | case AMDGPU::OPERAND_REG_IMM_V2INT16: | |||
2070 | case AMDGPU::OPERAND_REG_IMM_V2FP16: | |||
2071 | case AMDGPU::OPERAND_REG_IMM_V2FP32: | |||
2072 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: | |||
2073 | case AMDGPU::OPERAND_REG_IMM_V2INT32: | |||
2074 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: | |||
2075 | if (isSafeTruncation(Val, 32) && | |||
2076 | AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), | |||
2077 | AsmParser->hasInv2PiInlineImm())) { | |||
2078 | Inst.addOperand(MCOperand::createImm(Val)); | |||
2079 | setImmKindConst(); | |||
2080 | return; | |||
2081 | } | |||
2082 | ||||
2083 | Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); | |||
2084 | setImmKindLiteral(); | |||
2085 | return; | |||
2086 | ||||
2087 | case AMDGPU::OPERAND_REG_IMM_INT64: | |||
2088 | case AMDGPU::OPERAND_REG_IMM_FP64: | |||
2089 | case AMDGPU::OPERAND_REG_INLINE_C_INT64: | |||
2090 | case AMDGPU::OPERAND_REG_INLINE_C_FP64: | |||
2091 | case AMDGPU::OPERAND_REG_INLINE_AC_FP64: | |||
2092 | if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { | |||
2093 | Inst.addOperand(MCOperand::createImm(Val)); | |||
2094 | setImmKindConst(); | |||
2095 | return; | |||
2096 | } | |||
2097 | ||||
2098 | Inst.addOperand(MCOperand::createImm(Lo_32(Val))); | |||
2099 | setImmKindLiteral(); | |||
2100 | return; | |||
2101 | ||||
2102 | case AMDGPU::OPERAND_REG_IMM_INT16: | |||
2103 | case AMDGPU::OPERAND_REG_IMM_FP16: | |||
2104 | case AMDGPU::OPERAND_REG_INLINE_C_INT16: | |||
2105 | case AMDGPU::OPERAND_REG_INLINE_C_FP16: | |||
2106 | case AMDGPU::OPERAND_REG_INLINE_AC_INT16: | |||
2107 | case AMDGPU::OPERAND_REG_INLINE_AC_FP16: | |||
2108 | if (isSafeTruncation(Val, 16) && | |||
2109 | AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), | |||
2110 | AsmParser->hasInv2PiInlineImm())) { | |||
2111 | Inst.addOperand(MCOperand::createImm(Val)); | |||
2112 | setImmKindConst(); | |||
2113 | return; | |||
2114 | } | |||
2115 | ||||
2116 | Inst.addOperand(MCOperand::createImm(Val & 0xffff)); | |||
2117 | setImmKindLiteral(); | |||
2118 | return; | |||
2119 | ||||
2120 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: | |||
2121 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: | |||
2122 | case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: | |||
2123 | case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { | |||
2124 | assert(isSafeTruncation(Val, 16))(static_cast<void> (0)); | |||
2125 | assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),(static_cast<void> (0)) | |||
2126 | AsmParser->hasInv2PiInlineImm()))(static_cast<void> (0)); | |||
2127 | ||||
2128 | Inst.addOperand(MCOperand::createImm(Val)); | |||
2129 | return; | |||
2130 | } | |||
2131 | default: | |||
2132 | llvm_unreachable("invalid operand size")__builtin_unreachable(); | |||
2133 | } | |||
2134 | } | |||
2135 | ||||
2136 | template <unsigned Bitwidth> | |||
2137 | void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { | |||
2138 | APInt Literal(64, Imm.Val); | |||
2139 | setImmKindNone(); | |||
2140 | ||||
2141 | if (!Imm.IsFPImm) { | |||
2142 | // We got int literal token. | |||
2143 | Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); | |||
2144 | return; | |||
2145 | } | |||
2146 | ||||
2147 | bool Lost; | |||
2148 | APFloat FPLiteral(APFloat::IEEEdouble(), Literal); | |||
2149 | FPLiteral.convert(*getFltSemantics(Bitwidth / 8), | |||
2150 | APFloat::rmNearestTiesToEven, &Lost); | |||
2151 | Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); | |||
2152 | } | |||
2153 | ||||
2154 | void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { | |||
2155 | Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); | |||
2156 | } | |||
2157 | ||||
2158 | static bool isInlineValue(unsigned Reg) { | |||
2159 | switch (Reg) { | |||
2160 | case AMDGPU::SRC_SHARED_BASE: | |||
2161 | case AMDGPU::SRC_SHARED_LIMIT: | |||
2162 | case AMDGPU::SRC_PRIVATE_BASE: | |||
2163 | case AMDGPU::SRC_PRIVATE_LIMIT: | |||
2164 | case AMDGPU::SRC_POPS_EXITING_WAVE_ID: | |||
2165 | return true; | |||
2166 | case AMDGPU::SRC_VCCZ: | |||
2167 | case AMDGPU::SRC_EXECZ: | |||
2168 | case AMDGPU::SRC_SCC: | |||
2169 | return true; | |||
2170 | case AMDGPU::SGPR_NULL: | |||
2171 | return true; | |||
2172 | default: | |||
2173 | return false; | |||
2174 | } | |||
2175 | } | |||
2176 | ||||
2177 | bool AMDGPUOperand::isInlineValue() const { | |||
2178 | return isRegKind() && ::isInlineValue(getReg()); | |||
2179 | } | |||
2180 | ||||
2181 | //===----------------------------------------------------------------------===// | |||
2182 | // AsmParser | |||
2183 | //===----------------------------------------------------------------------===// | |||
2184 | ||||
2185 | static int getRegClass(RegisterKind Is, unsigned RegWidth) { | |||
2186 | if (Is == IS_VGPR) { | |||
2187 | switch (RegWidth) { | |||
2188 | default: return -1; | |||
2189 | case 1: return AMDGPU::VGPR_32RegClassID; | |||
2190 | case 2: return AMDGPU::VReg_64RegClassID; | |||
2191 | case 3: return AMDGPU::VReg_96RegClassID; | |||
2192 | case 4: return AMDGPU::VReg_128RegClassID; | |||
2193 | case 5: return AMDGPU::VReg_160RegClassID; | |||
2194 | case 6: return AMDGPU::VReg_192RegClassID; | |||
2195 | case 7: return AMDGPU::VReg_224RegClassID; | |||
2196 | case 8: return AMDGPU::VReg_256RegClassID; | |||
2197 | case 16: return AMDGPU::VReg_512RegClassID; | |||
2198 | case 32: return AMDGPU::VReg_1024RegClassID; | |||
2199 | } | |||
2200 | } else if (Is == IS_TTMP) { | |||
2201 | switch (RegWidth) { | |||
2202 | default: return -1; | |||
2203 | case 1: return AMDGPU::TTMP_32RegClassID; | |||
2204 | case 2: return AMDGPU::TTMP_64RegClassID; | |||
2205 | case 4: return AMDGPU::TTMP_128RegClassID; | |||
2206 | case 8: return AMDGPU::TTMP_256RegClassID; | |||
2207 | case 16: return AMDGPU::TTMP_512RegClassID; | |||
2208 | } | |||
2209 | } else if (Is == IS_SGPR) { | |||
2210 | switch (RegWidth) { | |||
2211 | default: return -1; | |||
2212 | case 1: return AMDGPU::SGPR_32RegClassID; | |||
2213 | case 2: return AMDGPU::SGPR_64RegClassID; | |||
2214 | case 3: return AMDGPU::SGPR_96RegClassID; | |||
2215 | case 4: return AMDGPU::SGPR_128RegClassID; | |||
2216 | case 5: return AMDGPU::SGPR_160RegClassID; | |||
2217 | case 6: return AMDGPU::SGPR_192RegClassID; | |||
2218 | case 7: return AMDGPU::SGPR_224RegClassID; | |||
2219 | case 8: return AMDGPU::SGPR_256RegClassID; | |||
2220 | case 16: return AMDGPU::SGPR_512RegClassID; | |||
2221 | } | |||
2222 | } else if (Is == IS_AGPR) { | |||
2223 | switch (RegWidth) { | |||
2224 | default: return -1; | |||
2225 | case 1: return AMDGPU::AGPR_32RegClassID; | |||
2226 | case 2: return AMDGPU::AReg_64RegClassID; | |||
2227 | case 3: return AMDGPU::AReg_96RegClassID; | |||
2228 | case 4: return AMDGPU::AReg_128RegClassID; | |||
2229 | case 5: return AMDGPU::AReg_160RegClassID; | |||
2230 | case 6: return AMDGPU::AReg_192RegClassID; | |||
2231 | case 7: return AMDGPU::AReg_224RegClassID; | |||
2232 | case 8: return AMDGPU::AReg_256RegClassID; | |||
2233 | case 16: return AMDGPU::AReg_512RegClassID; | |||
2234 | case 32: return AMDGPU::AReg_1024RegClassID; | |||
2235 | } | |||
2236 | } | |||
2237 | return -1; | |||
2238 | } | |||
2239 | ||||
2240 | static unsigned getSpecialRegForName(StringRef RegName) { | |||
2241 | return StringSwitch<unsigned>(RegName) | |||
2242 | .Case("exec", AMDGPU::EXEC) | |||
2243 | .Case("vcc", AMDGPU::VCC) | |||
2244 | .Case("flat_scratch", AMDGPU::FLAT_SCR) | |||
2245 | .Case("xnack_mask", AMDGPU::XNACK_MASK) | |||
2246 | .Case("shared_base", AMDGPU::SRC_SHARED_BASE) | |||
2247 | .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) | |||
2248 | .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) | |||
2249 | .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) | |||
2250 | .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) | |||
2251 | .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) | |||
2252 | .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) | |||
2253 | .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) | |||
2254 | .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) | |||
2255 | .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) | |||
2256 | .Case("lds_direct", AMDGPU::LDS_DIRECT) | |||
2257 | .Case("src_lds_direct", AMDGPU::LDS_DIRECT) | |||
2258 | .Case("m0", AMDGPU::M0) | |||
2259 | .Case("vccz", AMDGPU::SRC_VCCZ) | |||
2260 | .Case("src_vccz", AMDGPU::SRC_VCCZ) | |||
2261 | .Case("execz", AMDGPU::SRC_EXECZ) | |||
2262 | .Case("src_execz", AMDGPU::SRC_EXECZ) | |||
2263 | .Case("scc", AMDGPU::SRC_SCC) | |||
2264 | .Case("src_scc", AMDGPU::SRC_SCC) | |||
2265 | .Case("tba", AMDGPU::TBA) | |||
2266 | .Case("tma", AMDGPU::TMA) | |||
2267 | .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) | |||
2268 | .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) | |||
2269 | .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) | |||
2270 | .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) | |||
2271 | .Case("vcc_lo", AMDGPU::VCC_LO) | |||
2272 | .Case("vcc_hi", AMDGPU::VCC_HI) | |||
2273 | .Case("exec_lo", AMDGPU::EXEC_LO) | |||
2274 | .Case("exec_hi", AMDGPU::EXEC_HI) | |||
2275 | .Case("tma_lo", AMDGPU::TMA_LO) | |||
2276 | .Case("tma_hi", AMDGPU::TMA_HI) | |||
2277 | .Case("tba_lo", AMDGPU::TBA_LO) | |||
2278 | .Case("tba_hi", AMDGPU::TBA_HI) | |||
2279 | .Case("pc", AMDGPU::PC_REG) | |||
2280 | .Case("null", AMDGPU::SGPR_NULL) | |||
2281 | .Default(AMDGPU::NoRegister); | |||
2282 | } | |||
2283 | ||||
2284 | bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, | |||
2285 | SMLoc &EndLoc, bool RestoreOnFailure) { | |||
2286 | auto R = parseRegister(); | |||
2287 | if (!R) return true; | |||
2288 | assert(R->isReg())(static_cast<void> (0)); | |||
2289 | RegNo = R->getReg(); | |||
2290 | StartLoc = R->getStartLoc(); | |||
2291 | EndLoc = R->getEndLoc(); | |||
2292 | return false; | |||
2293 | } | |||
2294 | ||||
2295 | bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, | |||
2296 | SMLoc &EndLoc) { | |||
2297 | return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); | |||
2298 | } | |||
2299 | ||||
2300 | OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, | |||
2301 | SMLoc &StartLoc, | |||
2302 | SMLoc &EndLoc) { | |||
2303 | bool Result = | |||
2304 | ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); | |||
2305 | bool PendingErrors = getParser().hasPendingError(); | |||
2306 | getParser().clearPendingErrors(); | |||
2307 | if (PendingErrors) | |||
2308 | return MatchOperand_ParseFail; | |||
2309 | if (Result) | |||
2310 | return MatchOperand_NoMatch; | |||
2311 | return MatchOperand_Success; | |||
2312 | } | |||
2313 | ||||
2314 | bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, | |||
2315 | RegisterKind RegKind, unsigned Reg1, | |||
2316 | SMLoc Loc) { | |||
2317 | switch (RegKind) { | |||
2318 | case IS_SPECIAL: | |||
2319 | if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { | |||
2320 | Reg = AMDGPU::EXEC; | |||
2321 | RegWidth = 2; | |||
2322 | return true; | |||
2323 | } | |||
2324 | if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { | |||
2325 | Reg = AMDGPU::FLAT_SCR; | |||
2326 | RegWidth = 2; | |||
2327 | return true; | |||
2328 | } | |||
2329 | if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { | |||
2330 | Reg = AMDGPU::XNACK_MASK; | |||
2331 | RegWidth = 2; | |||
2332 | return true; | |||
2333 | } | |||
2334 | if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { | |||
2335 | Reg = AMDGPU::VCC; | |||
2336 | RegWidth = 2; | |||
2337 | return true; | |||
2338 | } | |||
2339 | if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { | |||
2340 | Reg = AMDGPU::TBA; | |||
2341 | RegWidth = 2; | |||
2342 | return true; | |||
2343 | } | |||
2344 | if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { | |||
2345 | Reg = AMDGPU::TMA; | |||
2346 | RegWidth = 2; | |||
2347 | return true; | |||
2348 | } | |||
2349 | Error(Loc, "register does not fit in the list"); | |||
2350 | return false; | |||
2351 | case IS_VGPR: | |||
2352 | case IS_SGPR: | |||
2353 | case IS_AGPR: | |||
2354 | case IS_TTMP: | |||
2355 | if (Reg1 != Reg + RegWidth) { | |||
2356 | Error(Loc, "registers in a list must have consecutive indices"); | |||
2357 | return false; | |||
2358 | } | |||
2359 | RegWidth++; | |||
2360 | return true; | |||
2361 | default: | |||
2362 | llvm_unreachable("unexpected register kind")__builtin_unreachable(); | |||
2363 | } | |||
2364 | } | |||
2365 | ||||
2366 | struct RegInfo { | |||
2367 | StringLiteral Name; | |||
2368 | RegisterKind Kind; | |||
2369 | }; | |||
2370 | ||||
2371 | static constexpr RegInfo RegularRegisters[] = { | |||
2372 | {{"v"}, IS_VGPR}, | |||
2373 | {{"s"}, IS_SGPR}, | |||
2374 | {{"ttmp"}, IS_TTMP}, | |||
2375 | {{"acc"}, IS_AGPR}, | |||
2376 | {{"a"}, IS_AGPR}, | |||
2377 | }; | |||
2378 | ||||
2379 | static bool isRegularReg(RegisterKind Kind) { | |||
2380 | return Kind == IS_VGPR || | |||
2381 | Kind == IS_SGPR || | |||
2382 | Kind == IS_TTMP || | |||
2383 | Kind == IS_AGPR; | |||
2384 | } | |||
2385 | ||||
2386 | static const RegInfo* getRegularRegInfo(StringRef Str) { | |||
2387 | for (const RegInfo &Reg : RegularRegisters) | |||
2388 | if (Str.startswith(Reg.Name)) | |||
2389 | return &Reg; | |||
2390 | return nullptr; | |||
2391 | } | |||
2392 | ||||
2393 | static bool getRegNum(StringRef Str, unsigned& Num) { | |||
2394 | return !Str.getAsInteger(10, Num); | |||
2395 | } | |||
2396 | ||||
2397 | bool | |||
2398 | AMDGPUAsmParser::isRegister(const AsmToken &Token, | |||
2399 | const AsmToken &NextToken) const { | |||
2400 | ||||
2401 | // A list of consecutive registers: [s0,s1,s2,s3] | |||
2402 | if (Token.is(AsmToken::LBrac)) | |||
2403 | return true; | |||
2404 | ||||
2405 | if (!Token.is(AsmToken::Identifier)) | |||
2406 | return false; | |||
2407 | ||||
2408 | // A single register like s0 or a range of registers like s[0:1] | |||
2409 | ||||
2410 | StringRef Str = Token.getString(); | |||
2411 | const RegInfo *Reg = getRegularRegInfo(Str); | |||
2412 | if (Reg) { | |||
2413 | StringRef RegName = Reg->Name; | |||
2414 | StringRef RegSuffix = Str.substr(RegName.size()); | |||
2415 | if (!RegSuffix.empty()) { | |||
2416 | unsigned Num; | |||
2417 | // A single register with an index: rXX | |||
2418 | if (getRegNum(RegSuffix, Num)) | |||
2419 | return true; | |||
2420 | } else { | |||
2421 | // A range of registers: r[XX:YY]. | |||
2422 | if (NextToken.is(AsmToken::LBrac)) | |||
2423 | return true; | |||
2424 | } | |||
2425 | } | |||
2426 | ||||
2427 | return getSpecialRegForName(Str) != AMDGPU::NoRegister; | |||
2428 | } | |||
2429 | ||||
2430 | bool | |||
2431 | AMDGPUAsmParser::isRegister() | |||
2432 | { | |||
2433 | return isRegister(getToken(), peekToken()); | |||
2434 | } | |||
2435 | ||||
2436 | unsigned | |||
2437 | AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, | |||
2438 | unsigned RegNum, | |||
2439 | unsigned RegWidth, | |||
2440 | SMLoc Loc) { | |||
2441 | ||||
2442 | assert(isRegularReg(RegKind))(static_cast<void> (0)); | |||
2443 | ||||
2444 | unsigned AlignSize = 1; | |||
2445 | if (RegKind == IS_SGPR || RegKind == IS_TTMP) { | |||
2446 | // SGPR and TTMP registers must be aligned. | |||
2447 | // Max required alignment is 4 dwords. | |||
2448 | AlignSize = std::min(RegWidth, 4u); | |||
2449 | } | |||
2450 | ||||
2451 | if (RegNum % AlignSize != 0) { | |||
2452 | Error(Loc, "invalid register alignment"); | |||
2453 | return AMDGPU::NoRegister; | |||
2454 | } | |||
2455 | ||||
2456 | unsigned RegIdx = RegNum / AlignSize; | |||
2457 | int RCID = getRegClass(RegKind, RegWidth); | |||
2458 | if (RCID == -1) { | |||
2459 | Error(Loc, "invalid or unsupported register size"); | |||
2460 | return AMDGPU::NoRegister; | |||
2461 | } | |||
2462 | ||||
2463 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); | |||
2464 | const MCRegisterClass RC = TRI->getRegClass(RCID); | |||
2465 | if (RegIdx >= RC.getNumRegs()) { | |||
2466 | Error(Loc, "register index is out of range"); | |||
2467 | return AMDGPU::NoRegister; | |||
2468 | } | |||
2469 | ||||
2470 | return RC.getRegister(RegIdx); | |||
2471 | } | |||
2472 | ||||
2473 | bool | |||
2474 | AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { | |||
2475 | int64_t RegLo, RegHi; | |||
2476 | if (!skipToken(AsmToken::LBrac, "missing register index")) | |||
2477 | return false; | |||
2478 | ||||
2479 | SMLoc FirstIdxLoc = getLoc(); | |||
2480 | SMLoc SecondIdxLoc; | |||
2481 | ||||
2482 | if (!parseExpr(RegLo)) | |||
2483 | return false; | |||
2484 | ||||
2485 | if (trySkipToken(AsmToken::Colon)) { | |||
2486 | SecondIdxLoc = getLoc(); | |||
2487 | if (!parseExpr(RegHi)) | |||
2488 | return false; | |||
2489 | } else { | |||
2490 | RegHi = RegLo; | |||
2491 | } | |||
2492 | ||||
2493 | if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) | |||
2494 | return false; | |||
2495 | ||||
2496 | if (!isUInt<32>(RegLo)) { | |||
2497 | Error(FirstIdxLoc, "invalid register index"); | |||
2498 | return false; | |||
2499 | } | |||
2500 | ||||
2501 | if (!isUInt<32>(RegHi)) { | |||
2502 | Error(SecondIdxLoc, "invalid register index"); | |||
2503 | return false; | |||
2504 | } | |||
2505 | ||||
2506 | if (RegLo > RegHi) { | |||
2507 | Error(FirstIdxLoc, "first register index should not exceed second index"); | |||
2508 | return false; | |||
2509 | } | |||
2510 | ||||
2511 | Num = static_cast<unsigned>(RegLo); | |||
2512 | Width = (RegHi - RegLo) + 1; | |||
2513 | return true; | |||
2514 | } | |||
2515 | ||||
2516 | unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, | |||
2517 | unsigned &RegNum, unsigned &RegWidth, | |||
2518 | SmallVectorImpl<AsmToken> &Tokens) { | |||
2519 | assert(isToken(AsmToken::Identifier))(static_cast<void> (0)); | |||
2520 | unsigned Reg = getSpecialRegForName(getTokenStr()); | |||
2521 | if (Reg) { | |||
2522 | RegNum = 0; | |||
2523 | RegWidth = 1; | |||
2524 | RegKind = IS_SPECIAL; | |||
2525 | Tokens.push_back(getToken()); | |||
2526 | lex(); // skip register name | |||
2527 | } | |||
2528 | return Reg; | |||
2529 | } | |||
2530 | ||||
2531 | unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, | |||
2532 | unsigned &RegNum, unsigned &RegWidth, | |||
2533 | SmallVectorImpl<AsmToken> &Tokens) { | |||
2534 | assert(isToken(AsmToken::Identifier))(static_cast<void> (0)); | |||
2535 | StringRef RegName = getTokenStr(); | |||
2536 | auto Loc = getLoc(); | |||
2537 | ||||
2538 | const RegInfo *RI = getRegularRegInfo(RegName); | |||
2539 | if (!RI) { | |||
2540 | Error(Loc, "invalid register name"); | |||
2541 | return AMDGPU::NoRegister; | |||
2542 | } | |||
2543 | ||||
2544 | Tokens.push_back(getToken()); | |||
2545 | lex(); // skip register name | |||
2546 | ||||
2547 | RegKind = RI->Kind; | |||
2548 | StringRef RegSuffix = RegName.substr(RI->Name.size()); | |||
2549 | if (!RegSuffix.empty()) { | |||
2550 | // Single 32-bit register: vXX. | |||
2551 | if (!getRegNum(RegSuffix, RegNum)) { | |||
2552 | Error(Loc, "invalid register index"); | |||
2553 | return AMDGPU::NoRegister; | |||
2554 | } | |||
2555 | RegWidth = 1; | |||
2556 | } else { | |||
2557 | // Range of registers: v[XX:YY]. ":YY" is optional. | |||
2558 | if (!ParseRegRange(RegNum, RegWidth)) | |||
2559 | return AMDGPU::NoRegister; | |||
2560 | } | |||
2561 | ||||
2562 | return getRegularReg(RegKind, RegNum, RegWidth, Loc); | |||
2563 | } | |||
2564 | ||||
2565 | unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, | |||
2566 | unsigned &RegWidth, | |||
2567 | SmallVectorImpl<AsmToken> &Tokens) { | |||
2568 | unsigned Reg = AMDGPU::NoRegister; | |||
2569 | auto ListLoc = getLoc(); | |||
2570 | ||||
2571 | if (!skipToken(AsmToken::LBrac, | |||
2572 | "expected a register or a list of registers")) { | |||
2573 | return AMDGPU::NoRegister; | |||
2574 | } | |||
2575 | ||||
2576 | // List of consecutive registers, e.g.: [s0,s1,s2,s3] | |||
2577 | ||||
2578 | auto Loc = getLoc(); | |||
2579 | if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) | |||
2580 | return AMDGPU::NoRegister; | |||
2581 | if (RegWidth != 1) { | |||
2582 | Error(Loc, "expected a single 32-bit register"); | |||
2583 | return AMDGPU::NoRegister; | |||
2584 | } | |||
2585 | ||||
2586 | for (; trySkipToken(AsmToken::Comma); ) { | |||
2587 | RegisterKind NextRegKind; | |||
2588 | unsigned NextReg, NextRegNum, NextRegWidth; | |||
2589 | Loc = getLoc(); | |||
2590 | ||||
2591 | if (!ParseAMDGPURegister(NextRegKind, NextReg, | |||
2592 | NextRegNum, NextRegWidth, | |||
2593 | Tokens)) { | |||
2594 | return AMDGPU::NoRegister; | |||
2595 | } | |||
2596 | if (NextRegWidth != 1) { | |||
2597 | Error(Loc, "expected a single 32-bit register"); | |||
2598 | return AMDGPU::NoRegister; | |||
2599 | } | |||
2600 | if (NextRegKind != RegKind) { | |||
2601 | Error(Loc, "registers in a list must be of the same kind"); | |||
2602 | return AMDGPU::NoRegister; | |||
2603 | } | |||
2604 | if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) | |||
2605 | return AMDGPU::NoRegister; | |||
2606 | } | |||
2607 | ||||
2608 | if (!skipToken(AsmToken::RBrac, | |||
2609 | "expected a comma or a closing square bracket")) { | |||
2610 | return AMDGPU::NoRegister; | |||
2611 | } | |||
2612 | ||||
2613 | if (isRegularReg(RegKind)) | |||
2614 | Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); | |||
2615 | ||||
2616 | return Reg; | |||
2617 | } | |||
2618 | ||||
2619 | bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, | |||
2620 | unsigned &RegNum, unsigned &RegWidth, | |||
2621 | SmallVectorImpl<AsmToken> &Tokens) { | |||
2622 | auto Loc = getLoc(); | |||
2623 | Reg = AMDGPU::NoRegister; | |||
2624 | ||||
2625 | if (isToken(AsmToken::Identifier)) { | |||
2626 | Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); | |||
2627 | if (Reg == AMDGPU::NoRegister) | |||
2628 | Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); | |||
2629 | } else { | |||
2630 | Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); | |||
2631 | } | |||
2632 | ||||
2633 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); | |||
2634 | if (Reg == AMDGPU::NoRegister) { | |||
2635 | assert(Parser.hasPendingError())(static_cast<void> (0)); | |||
2636 | return false; | |||
2637 | } | |||
2638 | ||||
2639 | if (!subtargetHasRegister(*TRI, Reg)) { | |||
2640 | if (Reg == AMDGPU::SGPR_NULL) { | |||
2641 | Error(Loc, "'null' operand is not supported on this GPU"); | |||
2642 | } else { | |||
2643 | Error(Loc, "register not available on this GPU"); | |||
2644 | } | |||
2645 | return false; | |||
2646 | } | |||
2647 | ||||
2648 | return true; | |||
2649 | } | |||
2650 | ||||
2651 | bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, | |||
2652 | unsigned &RegNum, unsigned &RegWidth, | |||
2653 | bool RestoreOnFailure /*=false*/) { | |||
2654 | Reg = AMDGPU::NoRegister; | |||
2655 | ||||
2656 | SmallVector<AsmToken, 1> Tokens; | |||
2657 | if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { | |||
2658 | if (RestoreOnFailure) { | |||
2659 | while (!Tokens.empty()) { | |||
2660 | getLexer().UnLex(Tokens.pop_back_val()); | |||
2661 | } | |||
2662 | } | |||
2663 | return true; | |||
2664 | } | |||
2665 | return false; | |||
2666 | } | |||
2667 | ||||
2668 | Optional<StringRef> | |||
2669 | AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { | |||
2670 | switch (RegKind) { | |||
2671 | case IS_VGPR: | |||
2672 | return StringRef(".amdgcn.next_free_vgpr"); | |||
2673 | case IS_SGPR: | |||
2674 | return StringRef(".amdgcn.next_free_sgpr"); | |||
2675 | default: | |||
2676 | return None; | |||
2677 | } | |||
2678 | } | |||
2679 | ||||
2680 | void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { | |||
2681 | auto SymbolName = getGprCountSymbolName(RegKind); | |||
2682 | assert(SymbolName && "initializing invalid register kind")(static_cast<void> (0)); | |||
2683 | MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); | |||
2684 | Sym->setVariableValue(MCConstantExpr::create(0, getContext())); | |||
2685 | } | |||
2686 | ||||
2687 | bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, | |||
2688 | unsigned DwordRegIndex, | |||
2689 | unsigned RegWidth) { | |||
2690 | // Symbols are only defined for GCN targets | |||
2691 | if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) | |||
2692 | return true; | |||
2693 | ||||
2694 | auto SymbolName = getGprCountSymbolName(RegKind); | |||
2695 | if (!SymbolName) | |||
2696 | return true; | |||
2697 | MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); | |||
2698 | ||||
2699 | int64_t NewMax = DwordRegIndex + RegWidth - 1; | |||
2700 | int64_t OldCount; | |||
2701 | ||||
2702 | if (!Sym->isVariable()) | |||
2703 | return !Error(getLoc(), | |||
2704 | ".amdgcn.next_free_{v,s}gpr symbols must be variable"); | |||
2705 | if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) | |||
2706 | return !Error( | |||
2707 | getLoc(), | |||
2708 | ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); | |||
2709 | ||||
2710 | if (OldCount <= NewMax) | |||
2711 | Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); | |||
2712 | ||||
2713 | return true; | |||
2714 | } | |||
2715 | ||||
2716 | std::unique_ptr<AMDGPUOperand> | |||
2717 | AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { | |||
2718 | const auto &Tok = getToken(); | |||
2719 | SMLoc StartLoc = Tok.getLoc(); | |||
2720 | SMLoc EndLoc = Tok.getEndLoc(); | |||
2721 | RegisterKind RegKind; | |||
2722 | unsigned Reg, RegNum, RegWidth; | |||
2723 | ||||
2724 | if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { | |||
2725 | return nullptr; | |||
2726 | } | |||
2727 | if (isHsaAbiVersion3Or4(&getSTI())) { | |||
2728 | if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) | |||
2729 | return nullptr; | |||
2730 | } else | |||
2731 | KernelScope.usesRegister(RegKind, RegNum, RegWidth); | |||
2732 | return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); | |||
2733 | } | |||
2734 | ||||
2735 | OperandMatchResultTy | |||
2736 | AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { | |||
2737 | // TODO: add syntactic sugar for 1/(2*PI) | |||
2738 | ||||
2739 | assert(!isRegister())(static_cast<void> (0)); | |||
2740 | assert(!isModifier())(static_cast<void> (0)); | |||
2741 | ||||
2742 | const auto& Tok = getToken(); | |||
2743 | const auto& NextTok = peekToken(); | |||
2744 | bool IsReal = Tok.is(AsmToken::Real); | |||
2745 | SMLoc S = getLoc(); | |||
2746 | bool Negate = false; | |||
2747 | ||||
2748 | if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { | |||
2749 | lex(); | |||
2750 | IsReal = true; | |||
2751 | Negate = true; | |||
2752 | } | |||
2753 | ||||
2754 | if (IsReal) { | |||
2755 | // Floating-point expressions are not supported. | |||
2756 | // Can only allow floating-point literals with an | |||
2757 | // optional sign. | |||
2758 | ||||
2759 | StringRef Num = getTokenStr(); | |||
2760 | lex(); | |||
2761 | ||||
2762 | APFloat RealVal(APFloat::IEEEdouble()); | |||
2763 | auto roundMode = APFloat::rmNearestTiesToEven; | |||
2764 | if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { | |||
2765 | return MatchOperand_ParseFail; | |||
2766 | } | |||
2767 | if (Negate) | |||
2768 | RealVal.changeSign(); | |||
2769 | ||||
2770 | Operands.push_back( | |||
2771 | AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, | |||
2772 | AMDGPUOperand::ImmTyNone, true)); | |||
2773 | ||||
2774 | return MatchOperand_Success; | |||
2775 | ||||
2776 | } else { | |||
2777 | int64_t IntVal; | |||
2778 | const MCExpr *Expr; | |||
2779 | SMLoc S = getLoc(); | |||
2780 | ||||
2781 | if (HasSP3AbsModifier) { | |||
2782 | // This is a workaround for handling expressions | |||
2783 | // as arguments of SP3 'abs' modifier, for example: | |||
2784 | // |1.0| | |||
2785 | // |-1| | |||
2786 | // |1+x| | |||
2787 | // This syntax is not compatible with syntax of standard | |||
2788 | // MC expressions (due to the trailing '|'). | |||
2789 | SMLoc EndLoc; | |||
2790 | if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) | |||
2791 | return MatchOperand_ParseFail; | |||
2792 | } else { | |||
2793 | if (Parser.parseExpression(Expr)) | |||
2794 | return MatchOperand_ParseFail; | |||
2795 | } | |||
2796 | ||||
2797 | if (Expr->evaluateAsAbsolute(IntVal)) { | |||
2798 | Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); | |||
2799 | } else { | |||
2800 | Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); | |||
2801 | } | |||
2802 | ||||
2803 | return MatchOperand_Success; | |||
2804 | } | |||
2805 | ||||
2806 | return MatchOperand_NoMatch; | |||
2807 | } | |||
2808 | ||||
2809 | OperandMatchResultTy | |||
2810 | AMDGPUAsmParser::parseReg(OperandVector &Operands) { | |||
2811 | if (!isRegister()) | |||
2812 | return MatchOperand_NoMatch; | |||
2813 | ||||
2814 | if (auto R = parseRegister()) { | |||
2815 | assert(R->isReg())(static_cast<void> (0)); | |||
2816 | Operands.push_back(std::move(R)); | |||
2817 | return MatchOperand_Success; | |||
2818 | } | |||
2819 | return MatchOperand_ParseFail; | |||
2820 | } | |||
2821 | ||||
2822 | OperandMatchResultTy | |||
2823 | AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { | |||
2824 | auto res = parseReg(Operands); | |||
2825 | if (res != MatchOperand_NoMatch) { | |||
2826 | return res; | |||
2827 | } else if (isModifier()) { | |||
2828 | return MatchOperand_NoMatch; | |||
2829 | } else { | |||
2830 | return parseImm(Operands, HasSP3AbsMod); | |||
2831 | } | |||
2832 | } | |||
2833 | ||||
2834 | bool | |||
2835 | AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { | |||
2836 | if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { | |||
2837 | const auto &str = Token.getString(); | |||
2838 | return str == "abs" || str == "neg" || str == "sext"; | |||
2839 | } | |||
2840 | return false; | |||
2841 | } | |||
2842 | ||||
2843 | bool | |||
2844 | AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { | |||
2845 | return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); | |||
2846 | } | |||
2847 | ||||
2848 | bool | |||
2849 | AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { | |||
2850 | return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); | |||
2851 | } | |||
2852 | ||||
2853 | bool | |||
2854 | AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { | |||
2855 | return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); | |||
2856 | } | |||
2857 | ||||
2858 | // Check if this is an operand modifier or an opcode modifier | |||
2859 | // which may look like an expression but it is not. We should | |||
2860 | // avoid parsing these modifiers as expressions. Currently | |||
2861 | // recognized sequences are: | |||
2862 | // |...| | |||
2863 | // abs(...) | |||
2864 | // neg(...) | |||
2865 | // sext(...) | |||
2866 | // -reg | |||
2867 | // -|...| | |||
2868 | // -abs(...) | |||
2869 | // name:... | |||
2870 | // Note that simple opcode modifiers like 'gds' may be parsed as | |||
2871 | // expressions; this is a special case. See getExpressionAsToken. | |||
2872 | // | |||
2873 | bool | |||
2874 | AMDGPUAsmParser::isModifier() { | |||
2875 | ||||
2876 | AsmToken Tok = getToken(); | |||
2877 | AsmToken NextToken[2]; | |||
2878 | peekTokens(NextToken); | |||
2879 | ||||
2880 | return isOperandModifier(Tok, NextToken[0]) || | |||
2881 | (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || | |||
2882 | isOpcodeModifierWithVal(Tok, NextToken[0]); | |||
2883 | } | |||
2884 | ||||
2885 | // Check if the current token is an SP3 'neg' modifier. | |||
2886 | // Currently this modifier is allowed in the following context: | |||
2887 | // | |||
2888 | // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". | |||
2889 | // 2. Before an 'abs' modifier: -abs(...) | |||
2890 | // 3. Before an SP3 'abs' modifier: -|...| | |||
2891 | // | |||
2892 | // In all other cases "-" is handled as a part | |||
2893 | // of an expression that follows the sign. | |||
2894 | // | |||
2895 | // Note: When "-" is followed by an integer literal, | |||
2896 | // this is interpreted as integer negation rather | |||
2897 | // than a floating-point NEG modifier applied to N. | |||
2898 | // Beside being contr-intuitive, such use of floating-point | |||
2899 | // NEG modifier would have resulted in different meaning | |||
2900 | // of integer literals used with VOP1/2/C and VOP3, | |||
2901 | // for example: | |||
2902 | // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF | |||
2903 | // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 | |||
2904 | // Negative fp literals with preceding "-" are | |||
2905 | // handled likewise for unifomtity | |||
2906 | // | |||
2907 | bool | |||
2908 | AMDGPUAsmParser::parseSP3NegModifier() { | |||
2909 | ||||
2910 | AsmToken NextToken[2]; | |||
2911 | peekTokens(NextToken); | |||
2912 | ||||
2913 | if (isToken(AsmToken::Minus) && | |||
2914 | (isRegister(NextToken[0], NextToken[1]) || | |||
2915 | NextToken[0].is(AsmToken::Pipe) || | |||
2916 | isId(NextToken[0], "abs"))) { | |||
2917 | lex(); | |||
2918 | return true; | |||
2919 | } | |||
2920 | ||||
2921 | return false; | |||
2922 | } | |||
2923 | ||||
2924 | OperandMatchResultTy | |||
2925 | AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, | |||
2926 | bool AllowImm) { | |||
2927 | bool Neg, SP3Neg; | |||
2928 | bool Abs, SP3Abs; | |||
2929 | SMLoc Loc; | |||
2930 | ||||
2931 | // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. | |||
2932 | if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { | |||
2933 | Error(getLoc(), "invalid syntax, expected 'neg' modifier"); | |||
2934 | return MatchOperand_ParseFail; | |||
2935 | } | |||
2936 | ||||
2937 | SP3Neg = parseSP3NegModifier(); | |||
2938 | ||||
2939 | Loc = getLoc(); | |||
2940 | Neg = trySkipId("neg"); | |||
2941 | if (Neg && SP3Neg) { | |||
2942 | Error(Loc, "expected register or immediate"); | |||
2943 | return MatchOperand_ParseFail; | |||
2944 | } | |||
2945 | if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) | |||
2946 | return MatchOperand_ParseFail; | |||
2947 | ||||
2948 | Abs = trySkipId("abs"); | |||
2949 | if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) | |||
2950 | return MatchOperand_ParseFail; | |||
2951 | ||||
2952 | Loc = getLoc(); | |||
2953 | SP3Abs = trySkipToken(AsmToken::Pipe); | |||
2954 | if (Abs && SP3Abs) { | |||
2955 | Error(Loc, "expected register or immediate"); | |||
2956 | return MatchOperand_ParseFail; | |||
2957 | } | |||
2958 | ||||
2959 | OperandMatchResultTy Res; | |||
2960 | if (AllowImm) { | |||
2961 | Res = parseRegOrImm(Operands, SP3Abs); | |||
2962 | } else { | |||
2963 | Res = parseReg(Operands); | |||
2964 | } | |||
2965 | if (Res != MatchOperand_Success) { | |||
2966 | return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; | |||
2967 | } | |||
2968 | ||||
2969 | if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) | |||
2970 | return MatchOperand_ParseFail; | |||
2971 | if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) | |||
2972 | return MatchOperand_ParseFail; | |||
2973 | if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) | |||
2974 | return MatchOperand_ParseFail; | |||
2975 | ||||
2976 | AMDGPUOperand::Modifiers Mods; | |||
2977 | Mods.Abs = Abs || SP3Abs; | |||
2978 | Mods.Neg = Neg || SP3Neg; | |||
2979 | ||||
2980 | if (Mods.hasFPModifiers()) { | |||
2981 | AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); | |||
2982 | if (Op.isExpr()) { | |||
2983 | Error(Op.getStartLoc(), "expected an absolute expression"); | |||
2984 | return MatchOperand_ParseFail; | |||
2985 | } | |||
2986 | Op.setModifiers(Mods); | |||
2987 | } | |||
2988 | return MatchOperand_Success; | |||
2989 | } | |||
2990 | ||||
2991 | OperandMatchResultTy | |||
2992 | AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, | |||
2993 | bool AllowImm) { | |||
2994 | bool Sext = trySkipId("sext"); | |||
2995 | if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) | |||
2996 | return MatchOperand_ParseFail; | |||
2997 | ||||
2998 | OperandMatchResultTy Res; | |||
2999 | if (AllowImm) { | |||
3000 | Res = parseRegOrImm(Operands); | |||
3001 | } else { | |||
3002 | Res = parseReg(Operands); | |||
3003 | } | |||
3004 | if (Res != MatchOperand_Success) { | |||
3005 | return Sext? MatchOperand_ParseFail : Res; | |||
3006 | } | |||
3007 | ||||
3008 | if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) | |||
3009 | return MatchOperand_ParseFail; | |||
3010 | ||||
3011 | AMDGPUOperand::Modifiers Mods; | |||
3012 | Mods.Sext = Sext; | |||
3013 | ||||
3014 | if (Mods.hasIntModifiers()) { | |||
3015 | AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); | |||
3016 | if (Op.isExpr()) { | |||
3017 | Error(Op.getStartLoc(), "expected an absolute expression"); | |||
3018 | return MatchOperand_ParseFail; | |||
3019 | } | |||
3020 | Op.setModifiers(Mods); | |||
3021 | } | |||
3022 | ||||
3023 | return MatchOperand_Success; | |||
3024 | } | |||
3025 | ||||
3026 | OperandMatchResultTy | |||
3027 | AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { | |||
3028 | return parseRegOrImmWithFPInputMods(Operands, false); | |||
3029 | } | |||
3030 | ||||
3031 | OperandMatchResultTy | |||
3032 | AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { | |||
3033 | return parseRegOrImmWithIntInputMods(Operands, false); | |||
3034 | } | |||
3035 | ||||
3036 | OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { | |||
3037 | auto Loc = getLoc(); | |||
3038 | if (trySkipId("off")) { | |||
3039 | Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, | |||
3040 | AMDGPUOperand::ImmTyOff, false)); | |||
3041 | return MatchOperand_Success; | |||
3042 | } | |||
3043 | ||||
3044 | if (!isRegister()) | |||
3045 | return MatchOperand_NoMatch; | |||
3046 | ||||
3047 | std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); | |||
3048 | if (Reg) { | |||
3049 | Operands.push_back(std::move(Reg)); | |||
3050 | return MatchOperand_Success; | |||
3051 | } | |||
3052 | ||||
3053 | return MatchOperand_ParseFail; | |||
3054 | ||||
3055 | } | |||
3056 | ||||
3057 | unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { | |||
3058 | uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; | |||
3059 | ||||
3060 | if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || | |||
3061 | (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || | |||
3062 | (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || | |||
3063 | (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) | |||
3064 | return Match_InvalidOperand; | |||
3065 | ||||
3066 | if ((TSFlags & SIInstrFlags::VOP3) && | |||
3067 | (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && | |||
3068 | getForcedEncodingSize() != 64) | |||
3069 | return Match_PreferE32; | |||
3070 | ||||
3071 | if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || | |||
3072 | Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { | |||
3073 | // v_mac_f32/16 allow only dst_sel == DWORD; | |||
3074 | auto OpNum = | |||
3075 | AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); | |||
3076 | const auto &Op = Inst.getOperand(OpNum); | |||
3077 | if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { | |||
3078 | return Match_InvalidOperand; | |||
3079 | } | |||
3080 | } | |||
3081 | ||||
3082 | return Match_Success; | |||
3083 | } | |||
3084 | ||||
3085 | static ArrayRef<unsigned> getAllVariants() { | |||
3086 | static const unsigned Variants[] = { | |||
3087 | AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, | |||
3088 | AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP | |||
3089 | }; | |||
3090 | ||||
3091 | return makeArrayRef(Variants); | |||
3092 | } | |||
3093 | ||||
3094 | // What asm variants we should check | |||
3095 | ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { | |||
3096 | if (getForcedEncodingSize() == 32) { | |||
3097 | static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; | |||
3098 | return makeArrayRef(Variants); | |||
3099 | } | |||
3100 | ||||
3101 | if (isForcedVOP3()) { | |||
3102 | static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; | |||
3103 | return makeArrayRef(Variants); | |||
3104 | } | |||
3105 | ||||
3106 | if (isForcedSDWA()) { | |||
3107 | static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, | |||
3108 | AMDGPUAsmVariants::SDWA9}; | |||
3109 | return makeArrayRef(Variants); | |||
3110 | } | |||
3111 | ||||
3112 | if (isForcedDPP()) { | |||
3113 | static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; | |||
3114 | return makeArrayRef(Variants); | |||
3115 | } | |||
3116 | ||||
3117 | return getAllVariants(); | |||
3118 | } | |||
3119 | ||||
3120 | StringRef AMDGPUAsmParser::getMatchedVariantName() const { | |||
3121 | if (getForcedEncodingSize() == 32) | |||
3122 | return "e32"; | |||
3123 | ||||
3124 | if (isForcedVOP3()) | |||
3125 | return "e64"; | |||
3126 | ||||
3127 | if (isForcedSDWA()) | |||
3128 | return "sdwa"; | |||
3129 | ||||
3130 | if (isForcedDPP()) | |||
3131 | return "dpp"; | |||
3132 | ||||
3133 | return ""; | |||
3134 | } | |||
3135 | ||||
3136 | unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { | |||
3137 | const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); | |||
3138 | const unsigned Num = Desc.getNumImplicitUses(); | |||
3139 | for (unsigned i = 0; i < Num; ++i) { | |||
3140 | unsigned Reg = Desc.ImplicitUses[i]; | |||
3141 | switch (Reg) { | |||
3142 | case AMDGPU::FLAT_SCR: | |||
3143 | case AMDGPU::VCC: | |||
3144 | case AMDGPU::VCC_LO: | |||
3145 | case AMDGPU::VCC_HI: | |||
3146 | case AMDGPU::M0: | |||
3147 | return Reg; | |||
3148 | default: | |||
3149 | break; | |||
3150 | } | |||
3151 | } | |||
3152 | return AMDGPU::NoRegister; | |||
3153 | } | |||
3154 | ||||
3155 | // NB: This code is correct only when used to check constant | |||
3156 | // bus limitations because GFX7 support no f16 inline constants. | |||
3157 | // Note that there are no cases when a GFX7 opcode violates | |||
3158 | // constant bus limitations due to the use of an f16 constant. | |||
3159 | bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, | |||
3160 | unsigned OpIdx) const { | |||
3161 | const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); | |||
3162 | ||||
3163 | if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { | |||
3164 | return false; | |||
3165 | } | |||
3166 | ||||
3167 | const MCOperand &MO = Inst.getOperand(OpIdx); | |||
3168 | ||||
3169 | int64_t Val = MO.getImm(); | |||
3170 | auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); | |||
3171 | ||||
3172 | switch (OpSize) { // expected operand size | |||
3173 | case 8: | |||
3174 | return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); | |||
3175 | case 4: | |||
3176 | return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); | |||
3177 | case 2: { | |||
3178 | const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; | |||
3179 | if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || | |||
3180 | OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || | |||
3181 | OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) | |||
3182 | return AMDGPU::isInlinableIntLiteral(Val); | |||
3183 | ||||
3184 | if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || | |||
3185 | OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || | |||
3186 | OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) | |||
3187 | return AMDGPU::isInlinableIntLiteralV216(Val); | |||
3188 | ||||
3189 | if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || | |||
3190 | OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || | |||
3191 | OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) | |||
3192 | return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); | |||
3193 | ||||
3194 | return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); | |||
3195 | } | |||
3196 | default: | |||
3197 | llvm_unreachable("invalid operand size")__builtin_unreachable(); | |||
3198 | } | |||
3199 | } | |||
3200 | ||||
3201 | unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { | |||
3202 | if (!isGFX10Plus()) | |||
3203 | return 1; | |||
3204 | ||||
3205 | switch (Opcode) { | |||
3206 | // 64-bit shift instructions can use only one scalar value input | |||
3207 | case AMDGPU::V_LSHLREV_B64_e64: | |||
3208 | case AMDGPU::V_LSHLREV_B64_gfx10: | |||
3209 | case AMDGPU::V_LSHRREV_B64_e64: | |||
3210 | case AMDGPU::V_LSHRREV_B64_gfx10: | |||
3211 | case AMDGPU::V_ASHRREV_I64_e64: | |||
3212 | case AMDGPU::V_ASHRREV_I64_gfx10: | |||
3213 | case AMDGPU::V_LSHL_B64_e64: | |||
3214 | case AMDGPU::V_LSHR_B64_e64: | |||
3215 | case AMDGPU::V_ASHR_I64_e64: | |||
3216 | return 1; | |||
3217 | default: | |||
3218 | return 2; | |||
3219 | } | |||
3220 | } | |||
3221 | ||||
3222 | bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { | |||
3223 | const MCOperand &MO = Inst.getOperand(OpIdx); | |||
3224 | if (MO.isImm()) { | |||
3225 | return !isInlineConstant(Inst, OpIdx); | |||
3226 | } else if (MO.isReg()) { | |||
3227 | auto Reg = MO.getReg(); | |||
3228 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); | |||
3229 | auto PReg = mc2PseudoReg(Reg); | |||
3230 | return isSGPR(PReg, TRI) && PReg != SGPR_NULL; | |||
3231 | } else { | |||
3232 | return true; | |||
3233 | } | |||
3234 | } | |||
3235 | ||||
3236 | bool | |||
3237 | AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, | |||
3238 | const OperandVector &Operands) { | |||
3239 | const unsigned Opcode = Inst.getOpcode(); | |||
3240 | const MCInstrDesc &Desc = MII.get(Opcode); | |||
3241 | unsigned LastSGPR = AMDGPU::NoRegister; | |||
3242 | unsigned ConstantBusUseCount = 0; | |||
3243 | unsigned NumLiterals = 0; | |||
3244 | unsigned LiteralSize; | |||
3245 | ||||
3246 | if (Desc.TSFlags & | |||
3247 | (SIInstrFlags::VOPC | | |||
3248 | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | | |||
3249 | SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | | |||
3250 | SIInstrFlags::SDWA)) { | |||
3251 | // Check special imm operands (used by madmk, etc) | |||
3252 | if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { | |||
3253 | ++ConstantBusUseCount; | |||
3254 | } | |||
3255 | ||||
3256 | SmallDenseSet<unsigned> SGPRsUsed; | |||
3257 | unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); | |||
3258 | if (SGPRUsed != AMDGPU::NoRegister) { | |||
3259 | SGPRsUsed.insert(SGPRUsed); | |||
3260 | ++ConstantBusUseCount; | |||
3261 | } | |||
3262 | ||||
3263 | const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); | |||
3264 | const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); | |||
3265 | const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); | |||
3266 | ||||
3267 | const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; | |||
3268 | ||||
3269 | for (int OpIdx : OpIndices) { | |||
3270 | if (OpIdx == -1) break; | |||
3271 | ||||
3272 | const MCOperand &MO = Inst.getOperand(OpIdx); | |||
3273 | if (usesConstantBus(Inst, OpIdx)) { | |||
3274 | if (MO.isReg()) { | |||
3275 | LastSGPR = mc2PseudoReg(MO.getReg()); | |||
3276 | // Pairs of registers with a partial intersections like these | |||
3277 | // s0, s[0:1] | |||
3278 | // flat_scratch_lo, flat_scratch | |||
3279 | // flat_scratch_lo, flat_scratch_hi | |||
3280 | // are theoretically valid but they are disabled anyway. | |||
3281 | // Note that this code mimics SIInstrInfo::verifyInstruction | |||
3282 | if (!SGPRsUsed.count(LastSGPR)) { | |||
3283 | SGPRsUsed.insert(LastSGPR); | |||
3284 | ++ConstantBusUseCount; | |||
3285 | } | |||
3286 | } else { // Expression or a literal | |||
3287 | ||||
3288 | if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) | |||
3289 | continue; // special operand like VINTERP attr_chan | |||
3290 | ||||
3291 | // An instruction may use only one literal. | |||
3292 | // This has been validated on the previous step. | |||
3293 | // See validateVOP3Literal. | |||
3294 | // This literal may be used as more than one operand. | |||
3295 | // If all these operands are of the same size, | |||
3296 | // this literal counts as one scalar value. | |||
3297 | // Otherwise it counts as 2 scalar values. | |||
3298 | // See "GFX10 Shader Programming", section 3.6.2.3. | |||
3299 | ||||
3300 | unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); | |||
3301 | if (Size < 4) Size = 4; | |||
3302 | ||||
3303 | if (NumLiterals == 0) { | |||
3304 | NumLiterals = 1; | |||
3305 | LiteralSize = Size; | |||
3306 | } else if (LiteralSize != Size) { | |||
3307 | NumLiterals = 2; | |||
3308 | } | |||
3309 | } | |||
3310 | } | |||
3311 | } | |||
3312 | } | |||
3313 | ConstantBusUseCount += NumLiterals; | |||
3314 | ||||
3315 | if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) | |||
3316 | return true; | |||
3317 | ||||
3318 | SMLoc LitLoc = getLitLoc(Operands); | |||
3319 | SMLoc RegLoc = getRegLoc(LastSGPR, Operands); | |||
3320 | SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; | |||
3321 | Error(Loc, "invalid operand (violates constant bus restrictions)"); | |||
3322 | return false; | |||
3323 | } | |||
3324 | ||||
3325 | bool | |||
3326 | AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, | |||
3327 | const OperandVector &Operands) { | |||
3328 | const unsigned Opcode = Inst.getOpcode(); | |||
3329 | const MCInstrDesc &Desc = MII.get(Opcode); | |||
3330 | ||||
3331 | const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); | |||
3332 | if (DstIdx == -1 || | |||
3333 | Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { | |||
3334 | return true; | |||
3335 | } | |||
3336 | ||||
3337 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); | |||
3338 | ||||
3339 | const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); | |||
3340 | const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); | |||
3341 | const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); | |||
3342 | ||||
3343 | assert(DstIdx != -1)(static_cast<void> (0)); | |||
3344 | const MCOperand &Dst = Inst.getOperand(DstIdx); | |||
3345 | assert(Dst.isReg())(static_cast<void> (0)); | |||
3346 | const unsigned DstReg = mc2PseudoReg(Dst.getReg()); | |||
3347 | ||||
3348 | const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; | |||
3349 | ||||
3350 | for (int SrcIdx : SrcIndices) { | |||
3351 | if (SrcIdx == -1) break; | |||
3352 | const MCOperand &Src = Inst.getOperand(SrcIdx); | |||
3353 | if (Src.isReg()) { | |||
3354 | const unsigned SrcReg = mc2PseudoReg(Src.getReg()); | |||
3355 | if (isRegIntersect(DstReg, SrcReg, TRI)) { | |||
3356 | Error(getRegLoc(SrcReg, Operands), | |||
3357 | "destination must be different than all sources"); | |||
3358 | return false; | |||
3359 | } | |||
3360 | } | |||
3361 | } | |||
3362 | ||||
3363 | return true; | |||
3364 | } | |||
3365 | ||||
3366 | bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { | |||
3367 | ||||
3368 | const unsigned Opc = Inst.getOpcode(); | |||
3369 | const MCInstrDesc &Desc = MII.get(Opc); | |||
3370 | ||||
3371 | if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { | |||
3372 | int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); | |||
3373 | assert(ClampIdx != -1)(static_cast<void> (0)); | |||
3374 | return Inst.getOperand(ClampIdx).getImm() == 0; | |||
3375 | } | |||
3376 | ||||
3377 | return true; | |||
3378 | } | |||
3379 | ||||
3380 | bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { | |||
3381 | ||||
3382 | const unsigned Opc = Inst.getOpcode(); | |||
3383 | const MCInstrDesc &Desc = MII.get(Opc); | |||
3384 | ||||
3385 | if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) | |||
3386 | return true; | |||
3387 | ||||
3388 | int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); | |||
3389 | int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); | |||
3390 | int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); | |||
3391 | ||||
3392 | assert(VDataIdx != -1)(static_cast<void> (0)); | |||
3393 | ||||
3394 | if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray | |||
3395 | return true; | |||
3396 | ||||
3397 | unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); | |||
3398 | unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; | |||
3399 | unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; | |||
3400 | if (DMask == 0) | |||
3401 | DMask = 1; | |||
3402 | ||||
3403 | unsigned DataSize = | |||
3404 | (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); | |||
3405 | if (hasPackedD16()) { | |||
3406 | int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); | |||
3407 | if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) | |||
3408 | DataSize = (DataSize + 1) / 2; | |||
3409 | } | |||
3410 | ||||
3411 | return (VDataSize / 4) == DataSize + TFESize; | |||
3412 | } | |||
3413 | ||||
3414 | bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { | |||
3415 | const unsigned Opc = Inst.getOpcode(); | |||
3416 | const MCInstrDesc &Desc = MII.get(Opc); | |||
3417 | ||||
3418 | if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) | |||
3419 | return true; | |||
3420 | ||||
3421 | const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); | |||
3422 | ||||
3423 | const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = | |||
3424 | AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); | |||
3425 | int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); | |||
3426 | int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); | |||
3427 | int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); | |||
3428 | int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); | |||
3429 | ||||
3430 | assert(VAddr0Idx != -1)(static_cast<void> (0)); | |||
3431 | assert(SrsrcIdx != -1)(static_cast<void> (0)); | |||
3432 | assert(SrsrcIdx > VAddr0Idx)(static_cast<void> (0)); | |||
3433 | ||||
3434 | if (DimIdx == -1) | |||
3435 | return true; // intersect_ray | |||
3436 | ||||
3437 | unsigned Dim = Inst.getOperand(DimIdx).getImm(); | |||
3438 | const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); | |||
3439 | bool IsNSA = SrsrcIdx - VAddr0Idx > 1; | |||
3440 | unsigned ActualAddrSize = | |||
3441 | IsNSA ? SrsrcIdx - VAddr0Idx | |||
3442 | : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; | |||
3443 | bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); | |||
3444 | ||||
3445 | unsigned ExpectedAddrSize = | |||
3446 | AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); | |||
3447 | ||||
3448 | if (!IsNSA) { | |||
3449 | if (ExpectedAddrSize > 8) | |||
3450 | ExpectedAddrSize = 16; | |||
3451 | ||||
3452 | // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. | |||
3453 | // This provides backward compatibility for assembly created | |||
3454 | // before 160b/192b/224b types were directly supported. | |||
3455 | if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) | |||
3456 | return true; | |||
3457 | } | |||
3458 | ||||
3459 | return ActualAddrSize == ExpectedAddrSize; | |||
3460 | } | |||
3461 | ||||
3462 | bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { | |||
3463 | ||||
3464 | const unsigned Opc = Inst.getOpcode(); | |||
3465 | const MCInstrDesc &Desc = MII.get(Opc); | |||
3466 | ||||
3467 | if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) | |||
3468 | return true; | |||
3469 | if (!Desc.mayLoad() || !Desc.mayStore()) | |||
3470 | return true; // Not atomic | |||
3471 | ||||
3472 | int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); | |||
3473 | unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; | |||
3474 | ||||
3475 | // This is an incomplete check because image_atomic_cmpswap | |||
3476 | // may only use 0x3 and 0xf while other atomic operations | |||
3477 | // may use 0x1 and 0x3. However these limitations are | |||
3478 | // verified when we check that dmask matches dst size. | |||
3479 | return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; | |||
3480 | } | |||
3481 | ||||
3482 | bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { | |||
3483 | ||||
3484 | const unsigned Opc = Inst.getOpcode(); | |||
3485 | const MCInstrDesc &Desc = MII.get(Opc); | |||
3486 | ||||
3487 | if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) | |||
3488 | return true; | |||
3489 | ||||
3490 | int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); | |||
3491 | unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; | |||
3492 | ||||
3493 | // GATHER4 instructions use dmask in a different fashion compared to | |||
3494 | // other MIMG instructions. The only useful DMASK values are | |||
3495 | // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns | |||
3496 | // (red,red,red,red) etc.) The ISA document doesn't mention | |||
3497 | // this. | |||
3498 | return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; | |||
3499 | } | |||
3500 | ||||
3501 | bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { | |||
3502 | const unsigned Opc = Inst.getOpcode(); | |||
3503 | const MCInstrDesc &Desc = MII.get(Opc); | |||
3504 | ||||
3505 | if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) | |||
3506 | return true; | |||
3507 | ||||
3508 | const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); | |||
3509 | const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = | |||
3510 | AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); | |||
3511 | ||||
3512 | if (!BaseOpcode->MSAA) | |||
3513 | return true; | |||
3514 | ||||
3515 | int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); | |||
3516 | assert(DimIdx != -1)(static_cast<void> (0)); | |||
3517 | ||||
3518 | unsigned Dim = Inst.getOperand(DimIdx).getImm(); | |||
3519 | const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); | |||
3520 | ||||
3521 | return DimInfo->MSAA; | |||
3522 | } | |||
3523 | ||||
3524 | static bool IsMovrelsSDWAOpcode(const unsigned Opcode) | |||
3525 | { | |||
3526 | switch (Opcode) { | |||
3527 | case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: | |||
3528 | case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: | |||
3529 | case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: | |||
3530 | return true; | |||
3531 | default: | |||
3532 | return false; | |||
3533 | } | |||
3534 | } | |||
3535 | ||||
3536 | // movrels* opcodes should only allow VGPRS as src0. | |||
3537 | // This is specified in .td description for vop1/vop3, | |||
3538 | // but sdwa is handled differently. See isSDWAOperand. | |||
3539 | bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, | |||
3540 | const OperandVector &Operands) { | |||
3541 | ||||
3542 | const unsigned Opc = Inst.getOpcode(); | |||
3543 | const MCInstrDesc &Desc = MII.get(Opc); | |||
3544 | ||||
3545 | if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) | |||
3546 | return true; | |||
3547 | ||||
3548 | const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); | |||
3549 | assert(Src0Idx != -1)(static_cast<void> (0)); | |||
3550 | ||||
3551 | SMLoc ErrLoc; | |||
3552 | const MCOperand &Src0 = Inst.getOperand(Src0Idx); | |||
3553 | if (Src0.isReg()) { | |||
3554 | auto Reg = mc2PseudoReg(Src0.getReg()); | |||
3555 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); | |||
3556 | if (!isSGPR(Reg, TRI)) | |||
3557 | return true; | |||
3558 | ErrLoc = getRegLoc(Reg, Operands); | |||
3559 | } else { | |||
3560 | ErrLoc = getConstLoc(Operands); | |||
3561 | } | |||
3562 | ||||
3563 | Error(ErrLoc, "source operand must be a VGPR"); | |||
3564 | return false; | |||
3565 | } | |||
3566 | ||||
3567 | bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, | |||
3568 | const OperandVector &Operands) { | |||
3569 | ||||
3570 | const unsigned Opc = Inst.getOpcode(); | |||
3571 | ||||
3572 | if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) | |||
3573 | return true; | |||
3574 | ||||
3575 | const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); | |||
3576 | assert(Src0Idx != -1)(static_cast<void> (0)); | |||
3577 | ||||
3578 | const MCOperand &Src0 = Inst.getOperand(Src0Idx); | |||
3579 | if (!Src0.isReg()) | |||
3580 | return true; | |||
3581 | ||||
3582 | auto Reg = mc2PseudoReg(Src0.getReg()); | |||
3583 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); | |||
3584 | if (isSGPR(Reg, TRI)) { | |||
3585 | Error(getRegLoc(Reg, Operands), | |||
3586 | "source operand must be either a VGPR or an inline constant"); | |||
3587 | return false; | |||
3588 | } | |||
3589 | ||||
3590 | return true; | |||
3591 | } | |||
3592 | ||||
3593 | bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { | |||
3594 | switch (Inst.getOpcode()) { | |||
3595 | default: | |||
3596 | return true; | |||
3597 | case V_DIV_SCALE_F32_gfx6_gfx7: | |||
3598 | case V_DIV_SCALE_F32_vi: | |||
3599 | case V_DIV_SCALE_F32_gfx10: | |||
3600 | case V_DIV_SCALE_F64_gfx6_gfx7: | |||
3601 | case V_DIV_SCALE_F64_vi: | |||
3602 | case V_DIV_SCALE_F64_gfx10: | |||
3603 | break; | |||
3604 | } | |||
3605 | ||||
3606 | // TODO: Check that src0 = src1 or src2. | |||
3607 | ||||
3608 | for (auto Name : {AMDGPU::OpName::src0_modifiers, | |||
3609 | AMDGPU::OpName::src2_modifiers, | |||
3610 | AMDGPU::OpName::src2_modifiers}) { | |||
3611 | if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) | |||
3612 | .getImm() & | |||
3613 | SISrcMods::ABS) { | |||
3614 | return false; | |||
3615 | } | |||
3616 | } | |||
3617 | ||||
3618 | return true; | |||
3619 | } | |||
3620 | ||||
3621 | bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { | |||
3622 | ||||
3623 | const unsigned Opc = Inst.getOpcode(); | |||
3624 | const MCInstrDesc &Desc = MII.get(Opc); | |||
3625 | ||||
3626 | if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) | |||
3627 | return true; | |||
3628 | ||||
3629 | int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); | |||
3630 | if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { | |||
3631 | if (isCI() || isSI()) | |||
3632 | return false; | |||
3633 | } | |||
3634 | ||||
3635 | return true; | |||
3636 | } | |||
3637 | ||||
3638 | bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { | |||
3639 | const unsigned Opc = Inst.getOpcode(); | |||
3640 | const MCInstrDesc &Desc = MII.get(Opc); | |||
3641 | ||||
3642 | if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) | |||
3643 | return true; | |||
3644 | ||||
3645 | int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); | |||
3646 | if (DimIdx < 0) | |||
3647 | return true; | |||
3648 | ||||
3649 | long Imm = Inst.getOperand(DimIdx).getImm(); | |||
3650 | if (Imm < 0 || Imm >= 8) | |||
3651 | return false; | |||
3652 | ||||
3653 | return true; | |||
3654 | } | |||
3655 | ||||
3656 | static bool IsRevOpcode(const unsigned Opcode) | |||
3657 | { | |||
3658 | switch (Opcode) { | |||
3659 | case AMDGPU::V_SUBREV_F32_e32: | |||
3660 | case AMDGPU::V_SUBREV_F32_e64: | |||
3661 | case AMDGPU::V_SUBREV_F32_e32_gfx10: | |||
3662 | case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: | |||
3663 | case AMDGPU::V_SUBREV_F32_e32_vi: | |||
3664 | case AMDGPU::V_SUBREV_F32_e64_gfx10: | |||
3665 | case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: | |||
3666 | case AMDGPU::V_SUBREV_F32_e64_vi: | |||
3667 | ||||
3668 | case AMDGPU::V_SUBREV_CO_U32_e32: | |||
3669 | case AMDGPU::V_SUBREV_CO_U32_e64: | |||
3670 | case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: | |||
3671 | case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: | |||
3672 | ||||
3673 | case AMDGPU::V_SUBBREV_U32_e32: | |||
3674 | case AMDGPU::V_SUBBREV_U32_e64: | |||
3675 | case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: | |||
3676 | case AMDGPU::V_SUBBREV_U32_e32_vi: | |||
3677 | case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: | |||
3678 | case AMDGPU::V_SUBBREV_U32_e64_vi: | |||
3679 | ||||
3680 | case AMDGPU::V_SUBREV_U32_e32: | |||
3681 | case AMDGPU::V_SUBREV_U32_e64: | |||
3682 | case AMDGPU::V_SUBREV_U32_e32_gfx9: | |||
3683 | case AMDGPU::V_SUBREV_U32_e32_vi: | |||
3684 | case AMDGPU::V_SUBREV_U32_e64_gfx9: | |||
3685 | case AMDGPU::V_SUBREV_U32_e64_vi: | |||
3686 | ||||
3687 | case AMDGPU::V_SUBREV_F16_e32: | |||
3688 | case AMDGPU::V_SUBREV_F16_e64: | |||
3689 | case AMDGPU::V_SUBREV_F16_e32_gfx10: | |||
3690 | case AMDGPU::V_SUBREV_F16_e32_vi: | |||
3691 | case AMDGPU::V_SUBREV_F16_e64_gfx10: | |||
3692 | case AMDGPU::V_SUBREV_F16_e64_vi: | |||
3693 | ||||
3694 | case AMDGPU::V_SUBREV_U16_e32: | |||
3695 | case AMDGPU::V_SUBREV_U16_e64: | |||
3696 | case AMDGPU::V_SUBREV_U16_e32_vi: | |||
3697 | case AMDGPU::V_SUBREV_U16_e64_vi: | |||
3698 | ||||
3699 | case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: | |||
3700 | case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: | |||
3701 | case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: | |||
3702 | ||||
3703 | case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: | |||
3704 | case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: | |||
3705 | ||||
3706 | case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: | |||
3707 | case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: | |||
3708 | ||||
3709 | case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: | |||
3710 | case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: | |||
3711 | ||||
3712 | case AMDGPU::V_LSHRREV_B32_e32: | |||
3713 | case AMDGPU::V_LSHRREV_B32_e64: | |||
3714 | case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: | |||
3715 | case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: | |||
3716 | case AMDGPU::V_LSHRREV_B32_e32_vi: | |||
3717 | case AMDGPU::V_LSHRREV_B32_e64_vi: | |||
3718 | case AMDGPU::V_LSHRREV_B32_e32_gfx10: | |||
3719 | case AMDGPU::V_LSHRREV_B32_e64_gfx10: | |||
3720 | ||||
3721 | case AMDGPU::V_ASHRREV_I32_e32: | |||
3722 | case AMDGPU::V_ASHRREV_I32_e64: | |||
3723 | case AMDGPU::V_ASHRREV_I32_e32_gfx10: | |||
3724 | case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: | |||
3725 | case AMDGPU::V_ASHRREV_I32_e32_vi: | |||
3726 | case AMDGPU::V_ASHRREV_I32_e64_gfx10: | |||
3727 | case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: | |||
3728 | case AMDGPU::V_ASHRREV_I32_e64_vi: | |||
3729 | ||||
3730 | case AMDGPU::V_LSHLREV_B32_e32: | |||
3731 | case AMDGPU::V_LSHLREV_B32_e64: | |||
3732 | case AMDGPU::V_LSHLREV_B32_e32_gfx10: | |||
3733 | case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: | |||
3734 | case AMDGPU::V_LSHLREV_B32_e32_vi: | |||
3735 | case AMDGPU::V_LSHLREV_B32_e64_gfx10: | |||
3736 | case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: | |||
3737 | case AMDGPU::V_LSHLREV_B32_e64_vi: | |||
3738 | ||||
3739 | case AMDGPU::V_LSHLREV_B16_e32: | |||
3740 | case AMDGPU::V_LSHLREV_B16_e64: | |||
3741 | case AMDGPU::V_LSHLREV_B16_e32_vi: | |||
3742 | case AMDGPU::V_LSHLREV_B16_e64_vi: | |||
3743 | case AMDGPU::V_LSHLREV_B16_gfx10: | |||
3744 | ||||
3745 | case AMDGPU::V_LSHRREV_B16_e32: | |||
3746 | case AMDGPU::V_LSHRREV_B16_e64: | |||
3747 | case AMDGPU::V_LSHRREV_B16_e32_vi: | |||
3748 | case AMDGPU::V_LSHRREV_B16_e64_vi: | |||
3749 | case AMDGPU::V_LSHRREV_B16_gfx10: | |||
3750 | ||||
3751 | case AMDGPU::V_ASHRREV_I16_e32: | |||
3752 | case AMDGPU::V_ASHRREV_I16_e64: | |||
3753 | case AMDGPU::V_ASHRREV_I16_e32_vi: | |||
3754 | case AMDGPU::V_ASHRREV_I16_e64_vi: | |||
3755 | case AMDGPU::V_ASHRREV_I16_gfx10: | |||
3756 | ||||
3757 | case AMDGPU::V_LSHLREV_B64_e64: | |||
3758 | case AMDGPU::V_LSHLREV_B64_gfx10: | |||
3759 | case AMDGPU::V_LSHLREV_B64_vi: | |||
3760 | ||||
3761 | case AMDGPU::V_LSHRREV_B64_e64: | |||
3762 | case AMDGPU::V_LSHRREV_B64_gfx10: | |||
3763 | case AMDGPU::V_LSHRREV_B64_vi: | |||
3764 | ||||
3765 | case AMDGPU::V_ASHRREV_I64_e64: | |||
3766 | case AMDGPU::V_ASHRREV_I64_gfx10: | |||
3767 | case AMDGPU::V_ASHRREV_I64_vi: | |||
3768 | ||||
3769 | case AMDGPU::V_PK_LSHLREV_B16: | |||
3770 | case AMDGPU::V_PK_LSHLREV_B16_gfx10: | |||
3771 | case AMDGPU::V_PK_LSHLREV_B16_vi: | |||
3772 | ||||
3773 | case AMDGPU::V_PK_LSHRREV_B16: | |||
3774 | case AMDGPU::V_PK_LSHRREV_B16_gfx10: | |||
3775 | case AMDGPU::V_PK_LSHRREV_B16_vi: | |||
3776 | case AMDGPU::V_PK_ASHRREV_I16: | |||
3777 | case AMDGPU::V_PK_ASHRREV_I16_gfx10: | |||
3778 | case AMDGPU::V_PK_ASHRREV_I16_vi: | |||
3779 | return true; | |||
3780 | default: | |||
3781 | return false; | |||
3782 | } | |||
3783 | } | |||
3784 | ||||
3785 | Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { | |||
3786 | ||||
3787 | using namespace SIInstrFlags; | |||
3788 | const unsigned Opcode = Inst.getOpcode(); | |||
3789 | const MCInstrDesc &Desc = MII.get(Opcode); | |||
3790 | ||||
3791 | // lds_direct register is defined so that it can be used | |||
3792 | // with 9-bit operands only. Ignore encodings which do not accept these. | |||
3793 | const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; | |||
3794 | if ((Desc.TSFlags & Enc) == 0) | |||
3795 | return None; | |||
3796 | ||||
3797 | for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { | |||
3798 | auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); | |||
3799 | if (SrcIdx == -1) | |||
3800 | break; | |||
3801 | const auto &Src = Inst.getOperand(SrcIdx); | |||
3802 | if (Src.isReg() && Src.getReg() == LDS_DIRECT) { | |||
3803 | ||||
3804 | if (isGFX90A()) | |||
3805 | return StringRef("lds_direct is not supported on this GPU"); | |||
3806 | ||||
3807 | if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) | |||
3808 | return StringRef("lds_direct cannot be used with this instruction"); | |||
3809 | ||||
3810 | if (SrcName != OpName::src0) | |||
3811 | return StringRef("lds_direct may be used as src0 only"); | |||
3812 | } | |||
3813 | } | |||
3814 | ||||
3815 | return None; | |||
3816 | } | |||
3817 | ||||
3818 | SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { | |||
3819 | for (unsigned i = 1, e = Operands.size(); i != e; ++i) { | |||
3820 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); | |||
3821 | if (Op.isFlatOffset()) | |||
3822 | return Op.getStartLoc(); | |||
3823 | } | |||
3824 | return getLoc(); | |||
3825 | } | |||
3826 | ||||
3827 | bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, | |||
3828 | const OperandVector &Operands) { | |||
3829 | uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; | |||
3830 | if ((TSFlags & SIInstrFlags::FLAT) == 0) | |||
3831 | return true; | |||
3832 | ||||
3833 | auto Opcode = Inst.getOpcode(); | |||
3834 | auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); | |||
3835 | assert(OpNum != -1)(static_cast<void> (0)); | |||
3836 | ||||
3837 | const auto &Op = Inst.getOperand(OpNum); | |||
3838 | if (!hasFlatOffsets() && Op.getImm() != 0) { | |||
3839 | Error(getFlatOffsetLoc(Operands), | |||
3840 | "flat offset modifier is not supported on this GPU"); | |||
3841 | return false; | |||
3842 | } | |||
3843 | ||||
3844 | // For FLAT segment the offset must be positive; | |||
3845 | // MSB is ignored and forced to zero. | |||
3846 | if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { | |||
3847 | unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); | |||
3848 | if (!isIntN(OffsetSize, Op.getImm())) { | |||
3849 | Error(getFlatOffsetLoc(Operands), | |||
3850 | Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); | |||
3851 | return false; | |||
3852 | } | |||
3853 | } else { | |||
3854 | unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); | |||
3855 | if (!isUIntN(OffsetSize, Op.getImm())) { | |||
3856 | Error(getFlatOffsetLoc(Operands), | |||
3857 | Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); | |||
3858 | return false; | |||
3859 | } | |||
3860 | } | |||
3861 | ||||
3862 | return true; | |||
3863 | } | |||
3864 | ||||
3865 | SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { | |||
3866 | // Start with second operand because SMEM Offset cannot be dst or src0. | |||
3867 | for (unsigned i = 2, e = Operands.size(); i != e; ++i) { | |||
3868 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); | |||
3869 | if (Op.isSMEMOffset()) | |||
3870 | return Op.getStartLoc(); | |||
3871 | } | |||
3872 | return getLoc(); | |||
3873 | } | |||
3874 | ||||
3875 | bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, | |||
3876 | const OperandVector &Operands) { | |||
3877 | if (isCI() || isSI()) | |||
3878 | return true; | |||
3879 | ||||
3880 | uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; | |||
3881 | if ((TSFlags & SIInstrFlags::SMRD) == 0) | |||
3882 | return true; | |||
3883 | ||||
3884 | auto Opcode = Inst.getOpcode(); | |||
3885 | auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); | |||
3886 | if (OpNum == -1) | |||
3887 | return true; | |||
3888 | ||||
3889 | const auto &Op = Inst.getOperand(OpNum); | |||
3890 | if (!Op.isImm()) | |||
3891 | return true; | |||
3892 | ||||
3893 | uint64_t Offset = Op.getImm(); | |||
3894 | bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); | |||
3895 | if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || | |||
3896 | AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) | |||
3897 | return true; | |||
3898 | ||||
3899 | Error(getSMEMOffsetLoc(Operands), | |||
3900 | (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : | |||
3901 | "expected a 21-bit signed offset"); | |||
3902 | ||||
3903 | return false; | |||
3904 | } | |||
3905 | ||||
3906 | bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { | |||
3907 | unsigned Opcode = Inst.getOpcode(); | |||
3908 | const MCInstrDesc &Desc = MII.get(Opcode); | |||
3909 | if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) | |||
3910 | return true; | |||
3911 | ||||
3912 | const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); | |||
3913 | const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); | |||
3914 | ||||
3915 | const int OpIndices[] = { Src0Idx, Src1Idx }; | |||
3916 | ||||
3917 | unsigned NumExprs = 0; | |||
3918 | unsigned NumLiterals = 0; | |||
3919 | uint32_t LiteralValue; | |||
3920 | ||||
3921 | for (int OpIdx : OpIndices) { | |||
3922 | if (OpIdx == -1) break; | |||
3923 | ||||
3924 | const MCOperand &MO = Inst.getOperand(OpIdx); | |||
3925 | // Exclude special imm operands (like that used by s_set_gpr_idx_on) | |||
3926 | if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { | |||
3927 | if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { | |||
3928 | uint32_t Value = static_cast<uint32_t>(MO.getImm()); | |||
3929 | if (NumLiterals == 0 || LiteralValue != Value) { | |||
3930 | LiteralValue = Value; | |||
3931 | ++NumLiterals; | |||
3932 | } | |||
3933 | } else if (MO.isExpr()) { | |||
3934 | ++NumExprs; | |||
3935 | } | |||
3936 | } | |||
3937 | } | |||
3938 | ||||
3939 | return NumLiterals + NumExprs <= 1; | |||
3940 | } | |||
3941 | ||||
3942 | bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { | |||
3943 | const unsigned Opc = Inst.getOpcode(); | |||
3944 | if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || | |||
3945 | Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { | |||
3946 | int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); | |||
3947 | unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); | |||
3948 | ||||
3949 | if (OpSel & ~3) | |||
3950 | return false; | |||
3951 | } | |||
3952 | return true; | |||
3953 | } | |||
3954 | ||||
3955 | bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, | |||
3956 | const OperandVector &Operands) { | |||
3957 | const unsigned Opc = Inst.getOpcode(); | |||
3958 | int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); | |||
3959 | if (DppCtrlIdx < 0) | |||
3960 | return true; | |||
3961 | unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); | |||
3962 | ||||
3963 | if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { | |||
3964 | // DPP64 is supported for row_newbcast only. | |||
3965 | int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); | |||
3966 | if (Src0Idx >= 0 && | |||
3967 | getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { | |||
3968 | SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); | |||
3969 | Error(S, "64 bit dpp only supports row_newbcast"); | |||
3970 | return false; | |||
3971 | } | |||
3972 | } | |||
3973 | ||||
3974 | return true; | |||
3975 | } | |||
3976 | ||||
3977 | // Check if VCC register matches wavefront size | |||
3978 | bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { | |||
3979 | auto FB = getFeatureBits(); | |||
3980 | return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || | |||
3981 | (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); | |||
3982 | } | |||
3983 | ||||
3984 | // VOP3 literal is only allowed in GFX10+ and only one can be used | |||
3985 | bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, | |||
3986 | const OperandVector &Operands) { | |||
3987 | unsigned Opcode = Inst.getOpcode(); | |||
3988 | const MCInstrDesc &Desc = MII.get(Opcode); | |||
3989 | if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) | |||
3990 | return true; | |||
3991 | ||||
3992 | const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); | |||
3993 | const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); | |||
3994 | const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); | |||
3995 | ||||
3996 | const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; | |||
3997 | ||||
3998 | unsigned NumExprs = 0; | |||
3999 | unsigned NumLiterals = 0; | |||
4000 | uint32_t LiteralValue; | |||
4001 | ||||
4002 | for (int OpIdx : OpIndices) { | |||
4003 | if (OpIdx == -1) break; | |||
4004 | ||||
4005 | const MCOperand &MO = Inst.getOperand(OpIdx); | |||
4006 | if (!MO.isImm() && !MO.isExpr()) | |||
4007 | continue; | |||
4008 | if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) | |||
4009 | continue; | |||
4010 | ||||
4011 | if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && | |||
4012 | getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { | |||
4013 | Error(getConstLoc(Operands), | |||
4014 | "inline constants are not allowed for this operand"); | |||
4015 | return false; | |||
4016 | } | |||
4017 | ||||
4018 | if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { | |||
4019 | uint32_t Value = static_cast<uint32_t>(MO.getImm()); | |||
4020 | if (NumLiterals == 0 || LiteralValue != Value) { | |||
4021 | LiteralValue = Value; | |||
4022 | ++NumLiterals; | |||
4023 | } | |||
4024 | } else if (MO.isExpr()) { | |||
4025 | ++NumExprs; | |||
4026 | } | |||
4027 | } | |||
4028 | NumLiterals += NumExprs; | |||
4029 | ||||
4030 | if (!NumLiterals) | |||
4031 | return true; | |||
4032 | ||||
4033 | if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { | |||
4034 | Error(getLitLoc(Operands), "literal operands are not supported"); | |||
4035 | return false; | |||
4036 | } | |||
4037 | ||||
4038 | if (NumLiterals > 1) { | |||
4039 | Error(getLitLoc(Operands), "only one literal operand is allowed"); | |||
4040 | return false; | |||
4041 | } | |||
4042 | ||||
4043 | return true; | |||
4044 | } | |||
4045 | ||||
4046 | // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. | |||
4047 | static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, | |||
4048 | const MCRegisterInfo *MRI) { | |||
4049 | int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); | |||
4050 | if (OpIdx < 0) | |||
4051 | return -1; | |||
4052 | ||||
4053 | const MCOperand &Op = Inst.getOperand(OpIdx); | |||
4054 | if (!Op.isReg()) | |||
4055 | return -1; | |||
4056 | ||||
4057 | unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); | |||
4058 | auto Reg = Sub ? Sub : Op.getReg(); | |||
4059 | const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); | |||
4060 | return AGPR32.contains(Reg) ? 1 : 0; | |||
4061 | } | |||
4062 | ||||
4063 | bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { | |||
4064 | uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; | |||
4065 | if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | | |||
4066 | SIInstrFlags::MTBUF | SIInstrFlags::MIMG | | |||
4067 | SIInstrFlags::DS)) == 0) | |||
4068 | return true; | |||
4069 | ||||
4070 | uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 | |||
4071 | : AMDGPU::OpName::vdata; | |||
4072 | ||||
4073 | const MCRegisterInfo *MRI = getMRI(); | |||
4074 | int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); | |||
4075 | int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); | |||
4076 | ||||
4077 | if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { | |||
4078 | int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); | |||
4079 | if (Data2Areg >= 0 && Data2Areg != DataAreg) | |||
4080 | return false; | |||
4081 | } | |||
4082 | ||||
4083 | auto FB = getFeatureBits(); | |||
4084 | if (FB[AMDGPU::FeatureGFX90AInsts]) { | |||
4085 | if (DataAreg < 0 || DstAreg < 0) | |||
4086 | return true; | |||
4087 | return DstAreg == DataAreg; | |||
4088 | } | |||
4089 | ||||
4090 | return DstAreg < 1 && DataAreg < 1; | |||
4091 | } | |||
4092 | ||||
4093 | bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { | |||
4094 | auto FB = getFeatureBits(); | |||
4095 | if (!FB[AMDGPU::FeatureGFX90AInsts]) | |||
4096 | return true; | |||
4097 | ||||
4098 | const MCRegisterInfo *MRI = getMRI(); | |||
4099 | const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); | |||
4100 | const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); | |||
4101 | for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { | |||
4102 | const MCOperand &Op = Inst.getOperand(I); | |||
4103 | if (!Op.isReg()) | |||
4104 | continue; | |||
4105 | ||||
4106 | unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); | |||
4107 | if (!Sub) | |||
4108 | continue; | |||
4109 | ||||
4110 | if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) | |||
4111 | return false; | |||
4112 | if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) | |||
4113 | return false; | |||
4114 | } | |||
4115 | ||||
4116 | return true; | |||
4117 | } | |||
4118 | ||||
4119 | // gfx90a has an undocumented limitation: | |||
4120 | // DS_GWS opcodes must use even aligned registers. | |||
4121 | bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, | |||
4122 | const OperandVector &Operands) { | |||
4123 | if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) | |||
4124 | return true; | |||
4125 | ||||
4126 | int Opc = Inst.getOpcode(); | |||
4127 | if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && | |||
4128 | Opc != AMDGPU::DS_GWS_SEMA_BR_vi) | |||
4129 | return true; | |||
4130 | ||||
4131 | const MCRegisterInfo *MRI = getMRI(); | |||
4132 | const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); | |||
4133 | int Data0Pos = | |||
4134 | AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); | |||
4135 | assert(Data0Pos != -1)(static_cast<void> (0)); | |||
4136 | auto Reg = Inst.getOperand(Data0Pos).getReg(); | |||
4137 | auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); | |||
4138 | if (RegIdx & 1) { | |||
4139 | SMLoc RegLoc = getRegLoc(Reg, Operands); | |||
4140 | Error(RegLoc, "vgpr must be even aligned"); | |||
4141 | return false; | |||
4142 | } | |||
4143 | ||||
4144 | return true; | |||
4145 | } | |||
4146 | ||||
4147 | bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, | |||
4148 | const OperandVector &Operands, | |||
4149 | const SMLoc &IDLoc) { | |||
4150 | int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), | |||
4151 | AMDGPU::OpName::cpol); | |||
4152 | if (CPolPos == -1) | |||
4153 | return true; | |||
4154 | ||||
4155 | unsigned CPol = Inst.getOperand(CPolPos).getImm(); | |||
4156 | ||||
4157 | uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; | |||
4158 | if ((TSFlags & (SIInstrFlags::SMRD)) && | |||
4159 | (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { | |||
4160 | Error(IDLoc, "invalid cache policy for SMRD instruction"); | |||
4161 | return false; | |||
4162 | } | |||
4163 | ||||
4164 | if (isGFX90A() && (CPol & CPol::SCC)) { | |||
4165 | SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); | |||
4166 | StringRef CStr(S.getPointer()); | |||
4167 | S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); | |||
4168 | Error(S, "scc is not supported on this GPU"); | |||
4169 | return false; | |||
4170 | } | |||
4171 | ||||
4172 | if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) | |||
4173 | return true; | |||
4174 | ||||
4175 | if (TSFlags & SIInstrFlags::IsAtomicRet) { | |||
4176 | if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { | |||
4177 | Error(IDLoc, "instruction must use glc"); | |||
4178 | return false; | |||
4179 | } | |||
4180 | } else { | |||
4181 | if (CPol & CPol::GLC) { | |||
4182 | SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); | |||
4183 | StringRef CStr(S.getPointer()); | |||
4184 | S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); | |||
4185 | Error(S, "instruction must not use glc"); | |||
4186 | return false; | |||
4187 | } | |||
4188 | } | |||
4189 | ||||
4190 | return true; | |||
4191 | } | |||
4192 | ||||
4193 | bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, | |||
4194 | const SMLoc &IDLoc, | |||
4195 | const OperandVector &Operands) { | |||
4196 | if (auto ErrMsg = validateLdsDirect(Inst)) { | |||
4197 | Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); | |||
4198 | return false; | |||
4199 | } | |||
4200 | if (!validateSOPLiteral(Inst)) { | |||
4201 | Error(getLitLoc(Operands), | |||
4202 | "only one literal operand is allowed"); | |||
4203 | return false; | |||
4204 | } | |||
4205 | if (!validateVOP3Literal(Inst, Operands)) { | |||
4206 | return false; | |||
4207 | } | |||
4208 | if (!validateConstantBusLimitations(Inst, Operands)) { | |||
4209 | return false; | |||
4210 | } | |||
4211 | if (!validateEarlyClobberLimitations(Inst, Operands)) { | |||
4212 | return false; | |||
4213 | } | |||
4214 | if (!validateIntClampSupported(Inst)) { | |||
4215 | Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), | |||
4216 | "integer clamping is not supported on this GPU"); | |||
4217 | return false; | |||
4218 | } | |||
4219 | if (!validateOpSel(Inst)) { | |||
4220 | Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), | |||
4221 | "invalid op_sel operand"); | |||
4222 | return false; | |||
4223 | } | |||
4224 | if (!validateDPP(Inst, Operands)) { | |||
4225 | return false; | |||
4226 | } | |||
4227 | // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. | |||
4228 | if (!validateMIMGD16(Inst)) { | |||
4229 | Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), | |||
4230 | "d16 modifier is not supported on this GPU"); | |||
4231 | return false; | |||
4232 | } | |||
4233 | if (!validateMIMGDim(Inst)) { | |||
4234 | Error(IDLoc, "dim modifier is required on this GPU"); | |||
4235 | return false; | |||
4236 | } | |||
4237 | if (!validateMIMGMSAA(Inst)) { | |||
4238 | Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), | |||
4239 | "invalid dim; must be MSAA type"); | |||
4240 | return false; | |||
4241 | } | |||
4242 | if (!validateMIMGDataSize(Inst)) { | |||
4243 | Error(IDLoc, | |||
4244 | "image data size does not match dmask and tfe"); | |||
4245 | return false; | |||
4246 | } | |||
4247 | if (!validateMIMGAddrSize(Inst)) { | |||
4248 | Error(IDLoc, | |||
4249 | "image address size does not match dim and a16"); | |||
4250 | return false; | |||
4251 | } | |||
4252 | if (!validateMIMGAtomicDMask(Inst)) { | |||
4253 | Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), | |||
4254 | "invalid atomic image dmask"); | |||
4255 | return false; | |||
4256 | } | |||
4257 | if (!validateMIMGGatherDMask(Inst)) { | |||
4258 | Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), | |||
4259 | "invalid image_gather dmask: only one bit must be set"); | |||
4260 | return false; | |||
4261 | } | |||
4262 | if (!validateMovrels(Inst, Operands)) { | |||
4263 | return false; | |||
4264 | } | |||
4265 | if (!validateFlatOffset(Inst, Operands)) { | |||
4266 | return false; | |||
4267 | } | |||
4268 | if (!validateSMEMOffset(Inst, Operands)) { | |||
4269 | return false; | |||
4270 | } | |||
4271 | if (!validateMAIAccWrite(Inst, Operands)) { | |||
4272 | return false; | |||
4273 | } | |||
4274 | if (!validateCoherencyBits(Inst, Operands, IDLoc)) { | |||
4275 | return false; | |||
4276 | } | |||
4277 | ||||
4278 | if (!validateAGPRLdSt(Inst)) { | |||
4279 | Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] | |||
4280 | ? "invalid register class: data and dst should be all VGPR or AGPR" | |||
4281 | : "invalid register class: agpr loads and stores not supported on this GPU" | |||
4282 | ); | |||
4283 | return false; | |||
4284 | } | |||
4285 | if (!validateVGPRAlign(Inst)) { | |||
4286 | Error(IDLoc, | |||
4287 | "invalid register class: vgpr tuples must be 64 bit aligned"); | |||
4288 | return false; | |||
4289 | } | |||
4290 | if (!validateGWS(Inst, Operands)) { | |||
4291 | return false; | |||
4292 | } | |||
4293 | ||||
4294 | if (!validateDivScale(Inst)) { | |||
4295 | Error(IDLoc, "ABS not allowed in VOP3B instructions"); | |||
4296 | return false; | |||
4297 | } | |||
4298 | if (!validateCoherencyBits(Inst, Operands, IDLoc)) { | |||
4299 | return false; | |||
4300 | } | |||
4301 | ||||
4302 | return true; | |||
4303 | } | |||
4304 | ||||
4305 | static std::string AMDGPUMnemonicSpellCheck(StringRef S, | |||
4306 | const FeatureBitset &FBS, | |||
4307 | unsigned VariantID = 0); | |||
4308 | ||||
4309 | static bool AMDGPUCheckMnemonic(StringRef Mnemonic, | |||
4310 | const FeatureBitset &AvailableFeatures, | |||
4311 | unsigned VariantID); | |||
4312 | ||||
4313 | bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, | |||
4314 | const FeatureBitset &FBS) { | |||
4315 | return isSupportedMnemo(Mnemo, FBS, getAllVariants()); | |||
4316 | } | |||
4317 | ||||
4318 | bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, | |||
4319 | const FeatureBitset &FBS, | |||
4320 | ArrayRef<unsigned> Variants) { | |||
4321 | for (auto Variant : Variants) { | |||
4322 | if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) | |||
4323 | return true; | |||
4324 | } | |||
4325 | ||||
4326 | return false; | |||
4327 | } | |||
4328 | ||||
4329 | bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, | |||
4330 | const SMLoc &IDLoc) { | |||
4331 | FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); | |||
4332 | ||||
4333 | // Check if requested instruction variant is supported. | |||
4334 | if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) | |||
4335 | return false; | |||
4336 | ||||
4337 | // This instruction is not supported. | |||
4338 | // Clear any other pending errors because they are no longer relevant. | |||
4339 | getParser().clearPendingErrors(); | |||
4340 | ||||
4341 | // Requested instruction variant is not supported. | |||
4342 | // Check if any other variants are supported. | |||
4343 | StringRef VariantName = getMatchedVariantName(); | |||
4344 | if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { | |||
4345 | return Error(IDLoc, | |||
4346 | Twine(VariantName, | |||
4347 | " variant of this instruction is not supported")); | |||
4348 | } | |||
4349 | ||||
4350 | // Finally check if this instruction is supported on any other GPU. | |||
4351 | if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { | |||
4352 | return Error(IDLoc, "instruction not supported on this GPU"); | |||
4353 | } | |||
4354 | ||||
4355 | // Instruction not supported on any GPU. Probably a typo. | |||
4356 | std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); | |||
4357 | return Error(IDLoc, "invalid instruction" + Suggestion); | |||
4358 | } | |||
4359 | ||||
4360 | bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, | |||
4361 | OperandVector &Operands, | |||
4362 | MCStreamer &Out, | |||
4363 | uint64_t &ErrorInfo, | |||
4364 | bool MatchingInlineAsm) { | |||
4365 | MCInst Inst; | |||
4366 | unsigned Result = Match_Success; | |||
4367 | for (auto Variant : getMatchedVariants()) { | |||
4368 | uint64_t EI; | |||
4369 | auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, | |||
4370 | Variant); | |||
4371 | // We order match statuses from least to most specific. We use most specific | |||
4372 | // status as resulting | |||
4373 | // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 | |||
4374 | if ((R == Match_Success) || | |||
4375 | (R == Match_PreferE32) || | |||
4376 | (R == Match_MissingFeature && Result != Match_PreferE32) || | |||
4377 | (R == Match_InvalidOperand && Result != Match_MissingFeature | |||
4378 | && Result != Match_PreferE32) || | |||
4379 | (R == Match_MnemonicFail && Result != Match_InvalidOperand | |||
4380 | && Result != Match_MissingFeature | |||
4381 | && Result != Match_PreferE32)) { | |||
4382 | Result = R; | |||
4383 | ErrorInfo = EI; | |||
4384 | } | |||
4385 | if (R == Match_Success) | |||
4386 | break; | |||
4387 | } | |||
4388 | ||||
4389 | if (Result == Match_Success) { | |||
4390 | if (!validateInstruction(Inst, IDLoc, Operands)) { | |||
4391 | return true; | |||
4392 | } | |||
4393 | Inst.setLoc(IDLoc); | |||
4394 | Out.emitInstruction(Inst, getSTI()); | |||
4395 | return false; | |||
4396 | } | |||
4397 | ||||
4398 | StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); | |||
4399 | if (checkUnsupportedInstruction(Mnemo, IDLoc)) { | |||
4400 | return true; | |||
4401 | } | |||
4402 | ||||
4403 | switch (Result) { | |||
4404 | default: break; | |||
4405 | case Match_MissingFeature: | |||
4406 | // It has been verified that the specified instruction | |||
4407 | // mnemonic is valid. A match was found but it requires | |||
4408 | // features which are not supported on this GPU. | |||
4409 | return Error(IDLoc, "operands are not valid for this GPU or mode"); | |||
4410 | ||||
4411 | case Match_InvalidOperand: { | |||
4412 | SMLoc ErrorLoc = IDLoc; | |||
4413 | if (ErrorInfo != ~0ULL) { | |||
4414 | if (ErrorInfo >= Operands.size()) { | |||
4415 | return Error(IDLoc, "too few operands for instruction"); | |||
4416 | } | |||
4417 | ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); | |||
4418 | if (ErrorLoc == SMLoc()) | |||
4419 | ErrorLoc = IDLoc; | |||
4420 | } | |||
4421 | return Error(ErrorLoc, "invalid operand for instruction"); | |||
4422 | } | |||
4423 | ||||
4424 | case Match_PreferE32: | |||
4425 | return Error(IDLoc, "internal error: instruction without _e64 suffix " | |||
4426 | "should be encoded as e32"); | |||
4427 | case Match_MnemonicFail: | |||
4428 | llvm_unreachable("Invalid instructions should have been handled already")__builtin_unreachable(); | |||
4429 | } | |||
4430 | llvm_unreachable("Implement any new match types added!")__builtin_unreachable(); | |||
4431 | } | |||
4432 | ||||
4433 | bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { | |||
4434 | int64_t Tmp = -1; | |||
4435 | if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { | |||
4436 | return true; | |||
4437 | } | |||
4438 | if (getParser().parseAbsoluteExpression(Tmp)) { | |||
4439 | return true; | |||
4440 | } | |||
4441 | Ret = static_cast<uint32_t>(Tmp); | |||
4442 | return false; | |||
4443 | } | |||
4444 | ||||
4445 | bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, | |||
4446 | uint32_t &Minor) { | |||
4447 | if (ParseAsAbsoluteExpression(Major)) | |||
4448 | return TokError("invalid major version"); | |||
4449 | ||||
4450 | if (!trySkipToken(AsmToken::Comma)) | |||
4451 | return TokError("minor version number required, comma expected"); | |||
4452 | ||||
4453 | if (ParseAsAbsoluteExpression(Minor)) | |||
4454 | return TokError("invalid minor version"); | |||
4455 | ||||
4456 | return false; | |||
4457 | } | |||
4458 | ||||
4459 | bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { | |||
4460 | if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) | |||
4461 | return TokError("directive only supported for amdgcn architecture"); | |||
4462 | ||||
4463 | std::string TargetIDDirective; | |||
4464 | SMLoc TargetStart = getTok().getLoc(); | |||
4465 | if (getParser().parseEscapedString(TargetIDDirective)) | |||
4466 | return true; | |||
4467 | ||||
4468 | SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); | |||
4469 | if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) | |||
4470 | return getParser().Error(TargetRange.Start, | |||
4471 | (Twine(".amdgcn_target directive's target id ") + | |||
4472 | Twine(TargetIDDirective) + | |||
4473 | Twine(" does not match the specified target id ") + | |||
4474 | Twine(getTargetStreamer().getTargetID()->toString())).str()); | |||
4475 | ||||
4476 | return false; | |||
4477 | } | |||
4478 | ||||
4479 | bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { | |||
4480 | return Error(Range.Start, "value out of range", Range); | |||
4481 | } | |||
4482 | ||||
4483 | bool AMDGPUAsmParser::calculateGPRBlocks( | |||
4484 | const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, | |||
4485 | bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, | |||
4486 | SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, | |||
4487 | unsigned &VGPRBlocks, unsigned &SGPRBlocks) { | |||
4488 | // TODO(scott.linder): These calculations are duplicated from | |||
4489 | // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. | |||
4490 | IsaVersion Version = getIsaVersion(getSTI().getCPU()); | |||
4491 | ||||
4492 | unsigned NumVGPRs = NextFreeVGPR; | |||
4493 | unsigned NumSGPRs = NextFreeSGPR; | |||
4494 | ||||
4495 | if (Version.Major >= 10) | |||
4496 | NumSGPRs = 0; | |||
4497 | else { | |||
4498 | unsigned MaxAddressableNumSGPRs = | |||
4499 | IsaInfo::getAddressableNumSGPRs(&getSTI()); | |||
4500 | ||||
4501 | if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && | |||
4502 | NumSGPRs > MaxAddressableNumSGPRs) | |||
4503 | return OutOfRangeError(SGPRRange); | |||
4504 | ||||
4505 | NumSGPRs += | |||
4506 | IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); | |||
4507 | ||||
4508 | if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && | |||
4509 | NumSGPRs > MaxAddressableNumSGPRs) | |||
4510 | return OutOfRangeError(SGPRRange); | |||
4511 | ||||
4512 | if (Features.test(FeatureSGPRInitBug)) | |||
4513 | NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; | |||
4514 | } | |||
4515 | ||||
4516 | VGPRBlocks = | |||
4517 | IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); | |||
4518 | SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); | |||
4519 | ||||
4520 | return false; | |||
4521 | } | |||
4522 | ||||
4523 | bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { | |||
4524 | if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) | |||
4525 | return TokError("directive only supported for amdgcn architecture"); | |||
4526 | ||||
4527 | if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) | |||
4528 | return TokError("directive only supported for amdhsa OS"); | |||
4529 | ||||
4530 | StringRef KernelName; | |||
4531 | if (getParser().parseIdentifier(KernelName)) | |||
4532 | return true; | |||
4533 | ||||
4534 | kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); | |||
4535 | ||||
4536 | StringSet<> Seen; | |||
4537 | ||||
4538 | IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); | |||
4539 | ||||
4540 | SMRange VGPRRange; | |||
4541 | uint64_t NextFreeVGPR = 0; | |||
4542 | uint64_t AccumOffset = 0; | |||
4543 | SMRange SGPRRange; | |||
4544 | uint64_t NextFreeSGPR = 0; | |||
4545 | unsigned UserSGPRCount = 0; | |||
4546 | bool ReserveVCC = true; | |||
4547 | bool ReserveFlatScr = true; | |||
4548 | Optional<bool> EnableWavefrontSize32; | |||
4549 | ||||
4550 | while (true) { | |||
4551 | while (trySkipToken(AsmToken::EndOfStatement)); | |||
4552 | ||||
4553 | StringRef ID; | |||
4554 | SMRange IDRange = getTok().getLocRange(); | |||
4555 | if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) | |||
4556 | return true; | |||
4557 | ||||
4558 | if (ID == ".end_amdhsa_kernel") | |||
4559 | break; | |||
4560 | ||||
4561 | if (Seen.find(ID) != Seen.end()) | |||
4562 | return TokError(".amdhsa_ directives cannot be repeated"); | |||
4563 | Seen.insert(ID); | |||
4564 | ||||
4565 | SMLoc ValStart = getLoc(); | |||
4566 | int64_t IVal; | |||
4567 | if (getParser().parseAbsoluteExpression(IVal)) | |||
4568 | return true; | |||
4569 | SMLoc ValEnd = getLoc(); | |||
4570 | SMRange ValRange = SMRange(ValStart, ValEnd); | |||
4571 | ||||
4572 | if (IVal < 0) | |||
4573 | return OutOfRangeError(ValRange); | |||
4574 | ||||
4575 | uint64_t Val = IVal; | |||
4576 | ||||
4577 | #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ | |||
4578 | if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ | |||
4579 | return OutOfRangeError(RANGE); \ | |||
4580 | AMDHSA_BITS_SET(FIELD, ENTRY, VALUE)FIELD &= ~ENTRY; FIELD |= ((VALUE << ENTRY_SHIFT) & ENTRY); | |||
4581 | ||||
4582 | if (ID == ".amdhsa_group_segment_fixed_size") { | |||
4583 | if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT8>(Val)) | |||
4584 | return OutOfRangeError(ValRange); | |||
4585 | KD.group_segment_fixed_size = Val; | |||
4586 | } else if (ID == ".amdhsa_private_segment_fixed_size") { | |||
4587 | if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT8>(Val)) | |||
4588 | return OutOfRangeError(ValRange); | |||
4589 | KD.private_segment_fixed_size = Val; | |||
4590 | } else if (ID == ".amdhsa_kernarg_size") { | |||
4591 | if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT8>(Val)) | |||
4592 | return OutOfRangeError(ValRange); | |||
4593 | KD.kernarg_size = Val; | |||
4594 | } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { | |||
4595 | if (hasArchitectedFlatScratch()) | |||
4596 | return Error(IDRange.Start, | |||
4597 | "directive is not supported with architected flat scratch", | |||
4598 | IDRange); | |||
4599 | PARSE_BITS_ENTRY(KD.kernel_code_properties, | |||
4600 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, | |||
4601 | Val, ValRange); | |||
4602 | if (Val) | |||
4603 | UserSGPRCount += 4; | |||
4604 | } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { | |||
4605 | PARSE_BITS_ENTRY(KD.kernel_code_properties, | |||
4606 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, | |||
4607 | ValRange); | |||
4608 | if (Val) | |||
4609 | UserSGPRCount += 2; | |||
4610 | } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { | |||
4611 | PARSE_BITS_ENTRY(KD.kernel_code_properties, | |||
4612 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, | |||
4613 | ValRange); | |||
4614 | if (Val) | |||
4615 | UserSGPRCount += 2; | |||
4616 | } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { | |||
4617 | PARSE_BITS_ENTRY(KD.kernel_code_properties, | |||
4618 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, | |||
4619 | Val, ValRange); | |||
4620 | if (Val) | |||
4621 | UserSGPRCount += 2; | |||
4622 | } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { | |||
4623 | PARSE_BITS_ENTRY(KD.kernel_code_properties, | |||
4624 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, | |||
4625 | ValRange); | |||
4626 | if (Val) | |||
4627 | UserSGPRCount += 2; | |||
4628 | } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { | |||
4629 | if (hasArchitectedFlatScratch()) | |||
4630 | return Error(IDRange.Start, | |||
4631 | "directive is not supported with architected flat scratch", | |||
4632 | IDRange); | |||
4633 | PARSE_BITS_ENTRY(KD.kernel_code_properties, | |||
4634 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, | |||
4635 | ValRange); | |||
4636 | if (Val) | |||
4637 | UserSGPRCount += 2; | |||
4638 | } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { | |||
4639 | PARSE_BITS_ENTRY(KD.kernel_code_properties, | |||
4640 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, | |||
4641 | Val, ValRange); | |||
4642 | if (Val) | |||
4643 | UserSGPRCount += 1; | |||
4644 | } else if (ID == ".amdhsa_wavefront_size32") { | |||
4645 | if (IVersion.Major < 10) | |||
4646 | return Error(IDRange.Start, "directive requires gfx10+", IDRange); | |||
4647 | EnableWavefrontSize32 = Val; | |||
4648 | PARSE_BITS_ENTRY(KD.kernel_code_properties, | |||
4649 | KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, | |||
4650 | Val, ValRange); | |||
4651 | } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { | |||
4652 | if (hasArchitectedFlatScratch()) | |||
4653 | return Error(IDRange.Start, | |||
4654 | "directive is not supported with architected flat scratch", | |||
4655 | IDRange); | |||
4656 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, | |||
4657 | COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); | |||
4658 | } else if (ID == ".amdhsa_enable_private_segment") { | |||
4659 | if (!hasArchitectedFlatScratch()) | |||
4660 | return Error( | |||
4661 | IDRange.Start, | |||
4662 | "directive is not supported without architected flat scratch", | |||
4663 | IDRange); | |||
4664 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, | |||
4665 | COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); | |||
4666 | } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { | |||
4667 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, | |||
4668 | COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, | |||
4669 | ValRange); | |||
4670 | } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { | |||
4671 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, | |||
4672 | COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, | |||
4673 | ValRange); | |||
4674 | } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { | |||
4675 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, | |||
4676 | COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, | |||
4677 | ValRange); | |||
4678 | } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { | |||
4679 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, | |||
4680 | COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, | |||
4681 | ValRange); | |||
4682 | } else if (ID == ".amdhsa_system_vgpr_workitem_id") { | |||
4683 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, | |||
4684 | COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, | |||
4685 | ValRange); | |||
4686 | } else if (ID == ".amdhsa_next_free_vgpr") { | |||
4687 | VGPRRange = ValRange; | |||
4688 | NextFreeVGPR = Val; | |||
4689 | } else if (ID == ".amdhsa_next_free_sgpr") { | |||
4690 | SGPRRange = ValRange; | |||
4691 | NextFreeSGPR = Val; | |||
4692 | } else if (ID == ".amdhsa_accum_offset") { | |||
4693 | if (!isGFX90A()) | |||
4694 | return Error(IDRange.Start, "directive requires gfx90a+", IDRange); | |||
4695 | AccumOffset = Val; | |||
4696 | } else if (ID == ".amdhsa_reserve_vcc") { | |||
4697 | if (!isUInt<1>(Val)) | |||
4698 | return OutOfRangeError(ValRange); | |||
4699 | ReserveVCC = Val; | |||
4700 | } else if (ID == ".amdhsa_reserve_flat_scratch") { | |||
4701 | if (IVersion.Major < 7) | |||
4702 | return Error(IDRange.Start, "directive requires gfx7+", IDRange); | |||
4703 | if (hasArchitectedFlatScratch()) | |||
4704 | return Error(IDRange.Start, | |||
4705 | "directive is not supported with architected flat scratch", | |||
4706 | IDRange); | |||
4707 | if (!isUInt<1>(Val)) | |||
4708 | return OutOfRangeError(ValRange); | |||
4709 | ReserveFlatScr = Val; | |||
4710 | } else if (ID == ".amdhsa_reserve_xnack_mask") { | |||
4711 | if (IVersion.Major < 8) | |||
4712 | return Error(IDRange.Start, "directive requires gfx8+", IDRange); | |||
4713 | if (!isUInt<1>(Val)) | |||
4714 | return OutOfRangeError(ValRange); | |||
4715 | if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) | |||
4716 | return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", | |||
4717 | IDRange); | |||
4718 | } else if (ID == ".amdhsa_float_round_mode_32") { | |||
4719 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, | |||
4720 | COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); | |||
4721 | } else if (ID == ".amdhsa_float_round_mode_16_64") { | |||
4722 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, | |||
4723 | COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); | |||
4724 | } else if (ID == ".amdhsa_float_denorm_mode_32") { | |||
4725 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, | |||
4726 | COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); | |||
4727 | } else if (ID == ".amdhsa_float_denorm_mode_16_64") { | |||
4728 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, | |||
4729 | COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, | |||
4730 | ValRange); | |||
4731 | } else if (ID == ".amdhsa_dx10_clamp") { | |||
4732 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, | |||
4733 | COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); | |||
4734 | } else if (ID == ".amdhsa_ieee_mode") { | |||
4735 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, | |||
4736 | Val, ValRange); | |||
4737 | } else if (ID == ".amdhsa_fp16_overflow") { | |||
4738 | if (IVersion.Major < 9) | |||
4739 | return Error(IDRange.Start, "directive requires gfx9+", IDRange); | |||
4740 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, | |||
4741 | ValRange); | |||
4742 | } else if (ID == ".amdhsa_tg_split") { | |||
4743 | if (!isGFX90A()) | |||
4744 | return Error(IDRange.Start, "directive requires gfx90a+", IDRange); | |||
4745 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, | |||
4746 | ValRange); | |||
4747 | } else if (ID == ".amdhsa_workgroup_processor_mode") { | |||
4748 | if (IVersion.Major < 10) | |||
4749 | return Error(IDRange.Start, "directive requires gfx10+", IDRange); | |||
4750 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, | |||
4751 | ValRange); | |||
4752 | } else if (ID == ".amdhsa_memory_ordered") { | |||
4753 | if (IVersion.Major < 10) | |||
4754 | return Error(IDRange.Start, "directive requires gfx10+", IDRange); | |||
4755 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, | |||
4756 | ValRange); | |||
4757 | } else if (ID == ".amdhsa_forward_progress") { | |||
4758 | if (IVersion.Major < 10) | |||
4759 | return Error(IDRange.Start, "directive requires gfx10+", IDRange); | |||
4760 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, | |||
4761 | ValRange); | |||
4762 | } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { | |||
4763 | PARSE_BITS_ENTRY( | |||
4764 | KD.compute_pgm_rsrc2, | |||
4765 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, | |||
4766 | ValRange); | |||
4767 | } else if (ID == ".amdhsa_exception_fp_denorm_src") { | |||
4768 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, | |||
4769 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, | |||
4770 | Val, ValRange); | |||
4771 | } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { | |||
4772 | PARSE_BITS_ENTRY( | |||
4773 | KD.compute_pgm_rsrc2, | |||
4774 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, | |||
4775 | ValRange); | |||
4776 | } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { | |||
4777 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, | |||
4778 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, | |||
4779 | Val, ValRange); | |||
4780 | } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { | |||
4781 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, | |||
4782 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, | |||
4783 | Val, ValRange); | |||
4784 | } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { | |||
4785 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, | |||
4786 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, | |||
4787 | Val, ValRange); | |||
4788 | } else if (ID == ".amdhsa_exception_int_div_zero") { | |||
4789 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, | |||
4790 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, | |||
4791 | Val, ValRange); | |||
4792 | } else { | |||
4793 | return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); | |||
4794 | } | |||
4795 | ||||
4796 | #undef PARSE_BITS_ENTRY | |||
4797 | } | |||
4798 | ||||
4799 | if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) | |||
4800 | return TokError(".amdhsa_next_free_vgpr directive is required"); | |||
4801 | ||||
4802 | if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) | |||
4803 | return TokError(".amdhsa_next_free_sgpr directive is required"); | |||
4804 | ||||
4805 | unsigned VGPRBlocks; | |||
4806 | unsigned SGPRBlocks; | |||
4807 | if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, | |||
4808 | getTargetStreamer().getTargetID()->isXnackOnOrAny(), | |||
4809 | EnableWavefrontSize32, NextFreeVGPR, | |||
4810 | VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, | |||
4811 | SGPRBlocks)) | |||
4812 | return true; | |||
4813 | ||||
4814 | if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( | |||
4815 | VGPRBlocks)) | |||
4816 | return OutOfRangeError(VGPRRange); | |||
4817 | AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,KD.compute_pgm_rsrc1 &= ~COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT ; KD.compute_pgm_rsrc1 |= ((VGPRBlocks << COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT ) & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) | |||
4818 | COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks)KD.compute_pgm_rsrc1 &= ~COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT ; KD.compute_pgm_rsrc1 |= ((VGPRBlocks << COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT ) & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT); | |||
4819 | ||||
4820 | if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( | |||
4821 | SGPRBlocks)) | |||
4822 | return OutOfRangeError(SGPRRange); | |||
4823 | AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,KD.compute_pgm_rsrc1 &= ~COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT ; KD.compute_pgm_rsrc1 |= ((SGPRBlocks << COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT ) & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) | |||
4824 | COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,KD.compute_pgm_rsrc1 &= ~COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT ; KD.compute_pgm_rsrc1 |= ((SGPRBlocks << COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT ) & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) | |||
4825 | SGPRBlocks)KD.compute_pgm_rsrc1 &= ~COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT ; KD.compute_pgm_rsrc1 |= ((SGPRBlocks << COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT ) & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT); | |||
4826 | ||||
4827 | if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) | |||
4828 | return TokError("too many user SGPRs enabled"); | |||
4829 | AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,KD.compute_pgm_rsrc2 &= ~COMPUTE_PGM_RSRC2_USER_SGPR_COUNT ; KD.compute_pgm_rsrc2 |= ((UserSGPRCount << COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT ) & COMPUTE_PGM_RSRC2_USER_SGPR_COUNT) | |||
4830 | UserSGPRCount)KD.compute_pgm_rsrc2 &= ~COMPUTE_PGM_RSRC2_USER_SGPR_COUNT ; KD.compute_pgm_rsrc2 |= ((UserSGPRCount << COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT ) & COMPUTE_PGM_RSRC2_USER_SGPR_COUNT); | |||
4831 | ||||
4832 | if (isGFX90A()) { | |||
4833 | if (Seen.find(".amdhsa_accum_offset") == Seen.end()) | |||
4834 | return TokError(".amdhsa_accum_offset directive is required"); | |||
4835 | if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) | |||
4836 | return TokError("accum_offset should be in range [4..256] in " | |||
4837 | "increments of 4"); | |||
4838 | if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) | |||
4839 | return TokError("accum_offset exceeds total VGPR allocation"); | |||
4840 | AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,KD.compute_pgm_rsrc3 &= ~COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET ; KD.compute_pgm_rsrc3 |= (((AccumOffset / 4 - 1) << COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT ) & COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) | |||
4841 | (AccumOffset / 4 - 1))KD.compute_pgm_rsrc3 &= ~COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET ; KD.compute_pgm_rsrc3 |= (((AccumOffset / 4 - 1) << COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT ) & COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET); | |||
4842 | } | |||
4843 | ||||
4844 | getTargetStreamer().EmitAmdhsaKernelDescriptor( | |||
4845 | getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, | |||
4846 | ReserveFlatScr); | |||
4847 | return false; | |||
4848 | } | |||
4849 | ||||
4850 | bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { | |||
4851 | uint32_t Major; | |||
4852 | uint32_t Minor; | |||
4853 | ||||
4854 | if (ParseDirectiveMajorMinor(Major, Minor)) | |||
4855 | return true; | |||
4856 | ||||
4857 | getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); | |||
4858 | return false; | |||
4859 | } | |||
4860 | ||||
4861 | bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { | |||
4862 | uint32_t Major; | |||
4863 | uint32_t Minor; | |||
4864 | uint32_t Stepping; | |||
4865 | StringRef VendorName; | |||
4866 | StringRef ArchName; | |||
4867 | ||||
4868 | // If this directive has no arguments, then use the ISA version for the | |||
4869 | // targeted GPU. | |||
4870 | if (isToken(AsmToken::EndOfStatement)) { | |||
4871 | AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); | |||
4872 | getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, | |||
4873 | ISA.Stepping, | |||
4874 | "AMD", "AMDGPU"); | |||
4875 | return false; | |||
4876 | } | |||
4877 | ||||
4878 | if (ParseDirectiveMajorMinor(Major, Minor)) | |||
4879 | return true; | |||
4880 | ||||
4881 | if (!trySkipToken(AsmToken::Comma)) | |||
4882 | return TokError("stepping version number required, comma expected"); | |||
4883 | ||||
4884 | if (ParseAsAbsoluteExpression(Stepping)) | |||
4885 | return TokError("invalid stepping version"); | |||
4886 | ||||
4887 | if (!trySkipToken(AsmToken::Comma)) | |||
4888 | return TokError("vendor name required, comma expected"); | |||
4889 | ||||
4890 | if (!parseString(VendorName, "invalid vendor name")) | |||
4891 | return true; | |||
4892 | ||||
4893 | if (!trySkipToken(AsmToken::Comma)) | |||
4894 | return TokError("arch name required, comma expected"); | |||
4895 | ||||
4896 | if (!parseString(ArchName, "invalid arch name")) | |||
4897 | return true; | |||
4898 | ||||
4899 | getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, | |||
4900 | VendorName, ArchName); | |||
4901 | return false; | |||
4902 | } | |||
4903 | ||||
4904 | bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, | |||
4905 | amd_kernel_code_t &Header) { | |||
4906 | // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing | |||
4907 | // assembly for backwards compatibility. | |||
4908 | if (ID == "max_scratch_backing_memory_byte_size") { | |||
4909 | Parser.eatToEndOfStatement(); | |||
4910 | return false; | |||
4911 | } | |||
4912 | ||||
4913 | SmallString<40> ErrStr; | |||
4914 | raw_svector_ostream Err(ErrStr); | |||
4915 | if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { | |||
4916 | return TokError(Err.str()); | |||
4917 | } | |||
4918 | Lex(); | |||
4919 | ||||
4920 | if (ID == "enable_wavefront_size32") { | |||
4921 | if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { | |||
4922 | if (!isGFX10Plus()) | |||
4923 | return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); | |||
4924 | if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) | |||
4925 | return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); | |||
4926 | } else { | |||
4927 | if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) | |||
4928 | return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); | |||
4929 | } | |||
4930 | } | |||
4931 | ||||
4932 | if (ID == "wavefront_size") { | |||
4933 | if (Header.wavefront_size == 5) { | |||
4934 | if (!isGFX10Plus()) | |||
4935 | return TokError("wavefront_size=5 is only allowed on GFX10+"); | |||
4936 | if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) | |||
4937 | return TokError("wavefront_size=5 requires +WavefrontSize32"); | |||
4938 | } else if (Header.wavefront_size == 6) { | |||
4939 | if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) | |||
4940 | return TokError("wavefront_size=6 requires +WavefrontSize64"); | |||
4941 | } | |||
4942 | } | |||
4943 | ||||
4944 | if (ID == "enable_wgp_mode") { | |||
4945 | if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers)(((Header.compute_pgm_resource_registers) >> 29) & 0x1 ) && | |||
4946 | !isGFX10Plus()) | |||
4947 | return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); | |||
4948 | } | |||
4949 | ||||
4950 | if (ID == "enable_mem_ordered") { | |||
4951 | if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers)(((Header.compute_pgm_resource_registers) >> 30) & 0x1 ) && | |||
4952 | !isGFX10Plus()) | |||
4953 | return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); | |||
4954 | } | |||
4955 | ||||
4956 | if (ID == "enable_fwd_progress") { | |||
4957 | if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers)(((Header.compute_pgm_resource_registers) >> 31) & 0x1 ) && | |||
4958 | !isGFX10Plus()) | |||
4959 | return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); | |||
4960 | } | |||
4961 | ||||
4962 | return false; | |||
4963 | } | |||
4964 | ||||
4965 | bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { | |||
4966 | amd_kernel_code_t Header; | |||
4967 | AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); | |||
4968 | ||||
4969 | while (true) { | |||
4970 | // Lex EndOfStatement. This is in a while loop, because lexing a comment | |||
4971 | // will set the current token to EndOfStatement. | |||
4972 | while(trySkipToken(AsmToken::EndOfStatement)); | |||
4973 | ||||
4974 | StringRef ID; | |||
4975 | if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) | |||
4976 | return true; | |||
4977 | ||||
4978 | if (ID == ".end_amd_kernel_code_t") | |||
4979 | break; | |||
4980 | ||||
4981 | if (ParseAMDKernelCodeTValue(ID, Header)) | |||
4982 | return true; | |||
4983 | } | |||
4984 | ||||
4985 | getTargetStreamer().EmitAMDKernelCodeT(Header); | |||
4986 | ||||
4987 | return false; | |||
4988 | } | |||
4989 | ||||
4990 | bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { | |||
4991 | StringRef KernelName; | |||
4992 | if (!parseId(KernelName, "expected symbol name")) | |||
4993 | return true; | |||
4994 | ||||
4995 | getTargetStreamer().EmitAMDGPUSymbolType(KernelName, | |||
4996 | ELF::STT_AMDGPU_HSA_KERNEL); | |||
4997 | ||||
4998 | KernelScope.initialize(getContext()); | |||
4999 | return false; | |||
5000 | } | |||
5001 | ||||
5002 | bool AMDGPUAsmParser::ParseDirectiveISAVersion() { | |||
5003 | if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { | |||
5004 | return Error(getLoc(), | |||
5005 | ".amd_amdgpu_isa directive is not available on non-amdgcn " | |||
5006 | "architectures"); | |||
5007 | } | |||
5008 | ||||
5009 | auto TargetIDDirective = getLexer().getTok().getStringContents(); | |||
5010 | if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) | |||
5011 | return Error(getParser().getTok().getLoc(), "target id must match options"); | |||
5012 | ||||
5013 | getTargetStreamer().EmitISAVersion(); | |||
5014 | Lex(); | |||
5015 | ||||
5016 | return false; | |||
5017 | } | |||
5018 | ||||
5019 | bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { | |||
5020 | const char *AssemblerDirectiveBegin; | |||
5021 | const char *AssemblerDirectiveEnd; | |||
5022 | std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = | |||
5023 | isHsaAbiVersion3Or4(&getSTI()) | |||
5024 | ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, | |||
5025 | HSAMD::V3::AssemblerDirectiveEnd) | |||
5026 | : std::make_tuple(HSAMD::AssemblerDirectiveBegin, | |||
5027 | HSAMD::AssemblerDirectiveEnd); | |||
5028 | ||||
5029 | if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { | |||
5030 | return Error(getLoc(), | |||
5031 | (Twine(AssemblerDirectiveBegin) + Twine(" directive is " | |||
5032 | "not available on non-amdhsa OSes")).str()); | |||
5033 | } | |||
5034 | ||||
5035 | std::string HSAMetadataString; | |||
5036 | if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, | |||
5037 | HSAMetadataString)) | |||
5038 | return true; | |||
5039 | ||||
5040 | if (isHsaAbiVersion3Or4(&getSTI())) { | |||
5041 | if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) | |||
5042 | return Error(getLoc(), "invalid HSA metadata"); | |||
5043 | } else { | |||
5044 | if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) | |||
5045 | return Error(getLoc(), "invalid HSA metadata"); | |||
5046 | } | |||
5047 | ||||
5048 | return false; | |||
5049 | } | |||
5050 | ||||
5051 | /// Common code to parse out a block of text (typically YAML) between start and | |||
5052 | /// end directives. | |||
5053 | bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, | |||
5054 | const char *AssemblerDirectiveEnd, | |||
5055 | std::string &CollectString) { | |||
5056 | ||||
5057 | raw_string_ostream CollectStream(CollectString); | |||
5058 | ||||
5059 | getLexer().setSkipSpace(false); | |||
5060 | ||||
5061 | bool FoundEnd = false; | |||
5062 | while (!isToken(AsmToken::Eof)) { | |||
5063 | while (isToken(AsmToken::Space)) { | |||
5064 | CollectStream << getTokenStr(); | |||
5065 | Lex(); | |||
5066 | } | |||
5067 | ||||
5068 | if (trySkipId(AssemblerDirectiveEnd)) { | |||
5069 | FoundEnd = true; | |||
5070 | break; | |||
5071 | } | |||
5072 | ||||
5073 | CollectStream << Parser.parseStringToEndOfStatement() | |||
5074 | << getContext().getAsmInfo()->getSeparatorString(); | |||
5075 | ||||
5076 | Parser.eatToEndOfStatement(); | |||
5077 | } | |||
5078 | ||||
5079 | getLexer().setSkipSpace(true); | |||
5080 | ||||
5081 | if (isToken(AsmToken::Eof) && !FoundEnd) { | |||
5082 | return TokError(Twine("expected directive ") + | |||
5083 | Twine(AssemblerDirectiveEnd) + Twine(" not found")); | |||
5084 | } | |||
5085 | ||||
5086 | CollectStream.flush(); | |||
5087 | return false; | |||
5088 | } | |||
5089 | ||||
5090 | /// Parse the assembler directive for new MsgPack-format PAL metadata. | |||
5091 | bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { | |||
5092 | std::string String; | |||
5093 | if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, | |||
5094 | AMDGPU::PALMD::AssemblerDirectiveEnd, String)) | |||
5095 | return true; | |||
5096 | ||||
5097 | auto PALMetadata = getTargetStreamer().getPALMetadata(); | |||
5098 | if (!PALMetadata->setFromString(String)) | |||
5099 | return Error(getLoc(), "invalid PAL metadata"); | |||
5100 | return false; | |||
5101 | } | |||
5102 | ||||
5103 | /// Parse the assembler directive for old linear-format PAL metadata. | |||
5104 | bool AMDGPUAsmParser::ParseDirectivePALMetadata() { | |||
5105 | if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { | |||
5106 | return Error(getLoc(), | |||
5107 | (Twine(PALMD::AssemblerDirective) + Twine(" directive is " | |||
5108 | "not available on non-amdpal OSes")).str()); | |||
5109 | } | |||
5110 | ||||
5111 | auto PALMetadata = getTargetStreamer().getPALMetadata(); | |||
5112 | PALMetadata->setLegacy(); | |||
5113 | for (;;) { | |||
5114 | uint32_t Key, Value; | |||
5115 | if (ParseAsAbsoluteExpression(Key)) { | |||
5116 | return TokError(Twine("invalid value in ") + | |||
5117 | Twine(PALMD::AssemblerDirective)); | |||
5118 | } | |||
5119 | if (!trySkipToken(AsmToken::Comma)) { | |||
5120 | return TokError(Twine("expected an even number of values in ") + | |||
5121 | Twine(PALMD::AssemblerDirective)); | |||
5122 | } | |||
5123 | if (ParseAsAbsoluteExpression(Value)) { | |||
5124 | return TokError(Twine("invalid value in ") + | |||
5125 | Twine(PALMD::AssemblerDirective)); | |||
5126 | } | |||
5127 | PALMetadata->setRegister(Key, Value); | |||
5128 | if (!trySkipToken(AsmToken::Comma)) | |||
5129 | break; | |||
5130 | } | |||
5131 | return false; | |||
5132 | } | |||
5133 | ||||
5134 | /// ParseDirectiveAMDGPULDS | |||
5135 | /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] | |||
5136 | bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { | |||
5137 | if (getParser().checkForValidSection()) | |||
5138 | return true; | |||
5139 | ||||
5140 | StringRef Name; | |||
5141 | SMLoc NameLoc = getLoc(); | |||
5142 | if (getParser().parseIdentifier(Name)) | |||
5143 | return TokError("expected identifier in directive"); | |||
5144 | ||||
5145 | MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); | |||
5146 | if (parseToken(AsmToken::Comma, "expected ','")) | |||
5147 | return true; | |||
5148 | ||||
5149 | unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); | |||
5150 | ||||
5151 | int64_t Size; | |||
5152 | SMLoc SizeLoc = getLoc(); | |||
5153 | if (getParser().parseAbsoluteExpression(Size)) | |||
5154 | return true; | |||
5155 | if (Size < 0) | |||
5156 | return Error(SizeLoc, "size must be non-negative"); | |||
5157 | if (Size > LocalMemorySize) | |||
5158 | return Error(SizeLoc, "size is too large"); | |||
5159 | ||||
5160 | int64_t Alignment = 4; | |||
5161 | if (trySkipToken(AsmToken::Comma)) { | |||
5162 | SMLoc AlignLoc = getLoc(); | |||
5163 | if (getParser().parseAbsoluteExpression(Alignment)) | |||
5164 | return true; | |||
5165 | if (Alignment < 0 || !isPowerOf2_64(Alignment)) | |||
5166 | return Error(AlignLoc, "alignment must be a power of two"); | |||
5167 | ||||
5168 | // Alignment larger than the size of LDS is possible in theory, as long | |||
5169 | // as the linker manages to place to symbol at address 0, but we do want | |||
5170 | // to make sure the alignment fits nicely into a 32-bit integer. | |||
5171 | if (Alignment >= 1u << 31) | |||
5172 | return Error(AlignLoc, "alignment is too large"); | |||
5173 | } | |||
5174 | ||||
5175 | if (parseToken(AsmToken::EndOfStatement, | |||
5176 | "unexpected token in '.amdgpu_lds' directive")) | |||
5177 | return true; | |||
5178 | ||||
5179 | Symbol->redefineIfPossible(); | |||
5180 | if (!Symbol->isUndefined()) | |||
5181 | return Error(NameLoc, "invalid symbol redefinition"); | |||
5182 | ||||
5183 | getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); | |||
5184 | return false; | |||
5185 | } | |||
5186 | ||||
5187 | bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { | |||
5188 | StringRef IDVal = DirectiveID.getString(); | |||
5189 | ||||
5190 | if (isHsaAbiVersion3Or4(&getSTI())) { | |||
5191 | if (IDVal == ".amdhsa_kernel") | |||
5192 | return ParseDirectiveAMDHSAKernel(); | |||
5193 | ||||
5194 | // TODO: Restructure/combine with PAL metadata directive. | |||
5195 | if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) | |||
5196 | return ParseDirectiveHSAMetadata(); | |||
5197 | } else { | |||
5198 | if (IDVal == ".hsa_code_object_version") | |||
5199 | return ParseDirectiveHSACodeObjectVersion(); | |||
5200 | ||||
5201 | if (IDVal == ".hsa_code_object_isa") | |||
5202 | return ParseDirectiveHSACodeObjectISA(); | |||
5203 | ||||
5204 | if (IDVal == ".amd_kernel_code_t") | |||
5205 | return ParseDirectiveAMDKernelCodeT(); | |||
5206 | ||||
5207 | if (IDVal == ".amdgpu_hsa_kernel") | |||
5208 | return ParseDirectiveAMDGPUHsaKernel(); | |||
5209 | ||||
5210 | if (IDVal == ".amd_amdgpu_isa") | |||
5211 | return ParseDirectiveISAVersion(); | |||
5212 | ||||
5213 | if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) | |||
5214 | return ParseDirectiveHSAMetadata(); | |||
5215 | } | |||
5216 | ||||
5217 | if (IDVal == ".amdgcn_target") | |||
5218 | return ParseDirectiveAMDGCNTarget(); | |||
5219 | ||||
5220 | if (IDVal == ".amdgpu_lds") | |||
5221 | return ParseDirectiveAMDGPULDS(); | |||
5222 | ||||
5223 | if (IDVal == PALMD::AssemblerDirectiveBegin) | |||
5224 | return ParseDirectivePALMetadataBegin(); | |||
5225 | ||||
5226 | if (IDVal == PALMD::AssemblerDirective) | |||
5227 | return ParseDirectivePALMetadata(); | |||
5228 | ||||
5229 | return true; | |||
5230 | } | |||
5231 | ||||
5232 | bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, | |||
5233 | unsigned RegNo) { | |||
5234 | ||||
5235 | for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); | |||
5236 | R.isValid(); ++R) { | |||
5237 | if (*R == RegNo) | |||
5238 | return isGFX9Plus(); | |||
5239 | } | |||
5240 | ||||
5241 | // GFX10 has 2 more SGPRs 104 and 105. | |||
5242 | for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); | |||
5243 | R.isValid(); ++R) { | |||
5244 | if (*R == RegNo) | |||
5245 | return hasSGPR104_SGPR105(); | |||
5246 | } | |||
5247 | ||||
5248 | switch (RegNo) { | |||
5249 | case AMDGPU::SRC_SHARED_BASE: | |||
5250 | case AMDGPU::SRC_SHARED_LIMIT: | |||
5251 | case AMDGPU::SRC_PRIVATE_BASE: | |||
5252 | case AMDGPU::SRC_PRIVATE_LIMIT: | |||
5253 | case AMDGPU::SRC_POPS_EXITING_WAVE_ID: | |||
5254 | return isGFX9Plus(); | |||
5255 | case AMDGPU::TBA: | |||
5256 | case AMDGPU::TBA_LO: | |||
5257 | case AMDGPU::TBA_HI: | |||
5258 | case AMDGPU::TMA: | |||
5259 | case AMDGPU::TMA_LO: | |||
5260 | case AMDGPU::TMA_HI: | |||
5261 | return !isGFX9Plus(); | |||
5262 | case AMDGPU::XNACK_MASK: | |||
5263 | case AMDGPU::XNACK_MASK_LO: | |||
5264 | case AMDGPU::XNACK_MASK_HI: | |||
5265 | return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); | |||
5266 | case AMDGPU::SGPR_NULL: | |||
5267 | return isGFX10Plus(); | |||
5268 | default: | |||
5269 | break; | |||
5270 | } | |||
5271 | ||||
5272 | if (isCI()) | |||
5273 | return true; | |||
5274 | ||||
5275 | if (isSI() || isGFX10Plus()) { | |||
5276 | // No flat_scr on SI. | |||
5277 | // On GFX10 flat scratch is not a valid register operand and can only be | |||
5278 | // accessed with s_setreg/s_getreg. | |||
5279 | switch (RegNo) { | |||
5280 | case AMDGPU::FLAT_SCR: | |||
5281 | case AMDGPU::FLAT_SCR_LO: | |||
5282 | case AMDGPU::FLAT_SCR_HI: | |||
5283 | return false; | |||
5284 | default: | |||
5285 | return true; | |||
5286 | } | |||
5287 | } | |||
5288 | ||||
5289 | // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that | |||
5290 | // SI/CI have. | |||
5291 | for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); | |||
5292 | R.isValid(); ++R) { | |||
5293 | if (*R == RegNo) | |||
5294 | return hasSGPR102_SGPR103(); | |||
5295 | } | |||
5296 | ||||
5297 | return true; | |||
5298 | } | |||
5299 | ||||
5300 | OperandMatchResultTy | |||
5301 | AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, | |||
5302 | OperandMode Mode) { | |||
5303 | // Try to parse with a custom parser | |||
5304 | OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); | |||
5305 | ||||
5306 | // If we successfully parsed the operand or if there as an error parsing, | |||
5307 | // we are done. | |||
5308 | // | |||
5309 | // If we are parsing after we reach EndOfStatement then this means we | |||
5310 | // are appending default values to the Operands list. This is only done | |||
5311 | // by custom parser, so we shouldn't continue on to the generic parsing. | |||
5312 | if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || | |||
5313 | isToken(AsmToken::EndOfStatement)) | |||
5314 | return ResTy; | |||
5315 | ||||
5316 | SMLoc RBraceLoc; | |||
5317 | SMLoc LBraceLoc = getLoc(); | |||
5318 | if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { | |||
5319 | unsigned Prefix = Operands.size(); | |||
5320 | ||||
5321 | for (;;) { | |||
5322 | auto Loc = getLoc(); | |||
5323 | ResTy = parseReg(Operands); | |||
5324 | if (ResTy == MatchOperand_NoMatch) | |||
5325 | Error(Loc, "expected a register"); | |||
5326 | if (ResTy != MatchOperand_Success) | |||
5327 | return MatchOperand_ParseFail; | |||
5328 | ||||
5329 | RBraceLoc = getLoc(); | |||
5330 | if (trySkipToken(AsmToken::RBrac)) | |||
5331 | break; | |||
5332 | ||||
5333 | if (!skipToken(AsmToken::Comma, | |||
5334 | "expected a comma or a closing square bracket")) { | |||
5335 | return MatchOperand_ParseFail; | |||
5336 | } | |||
5337 | } | |||
5338 | ||||
5339 | if (Operands.size() - Prefix > 1) { | |||
5340 | Operands.insert(Operands.begin() + Prefix, | |||
5341 | AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); | |||
5342 | Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); | |||
5343 | } | |||
5344 | ||||
5345 | return MatchOperand_Success; | |||
5346 | } | |||
5347 | ||||
5348 | return parseRegOrImm(Operands); | |||
5349 | } | |||
5350 | ||||
5351 | StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { | |||
5352 | // Clear any forced encodings from the previous instruction. | |||
5353 | setForcedEncodingSize(0); | |||
5354 | setForcedDPP(false); | |||
5355 | setForcedSDWA(false); | |||
5356 | ||||
5357 | if (Name.endswith("_e64")) { | |||
5358 | setForcedEncodingSize(64); | |||
5359 | return Name.substr(0, Name.size() - 4); | |||
5360 | } else if (Name.endswith("_e32")) { | |||
5361 | setForcedEncodingSize(32); | |||
5362 | return Name.substr(0, Name.size() - 4); | |||
5363 | } else if (Name.endswith("_dpp")) { | |||
5364 | setForcedDPP(true); | |||
5365 | return Name.substr(0, Name.size() - 4); | |||
5366 | } else if (Name.endswith("_sdwa")) { | |||
5367 | setForcedSDWA(true); | |||
5368 | return Name.substr(0, Name.size() - 5); | |||
5369 | } | |||
5370 | return Name; | |||
5371 | } | |||
5372 | ||||
5373 | bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, | |||
5374 | StringRef Name, | |||
5375 | SMLoc NameLoc, OperandVector &Operands) { | |||
5376 | // Add the instruction mnemonic | |||
5377 | Name = parseMnemonicSuffix(Name); | |||
5378 | Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); | |||
5379 | ||||
5380 | bool IsMIMG = Name.startswith("image_"); | |||
5381 | ||||
5382 | while (!trySkipToken(AsmToken::EndOfStatement)) { | |||
5383 | OperandMode Mode = OperandMode_Default; | |||
5384 | if (IsMIMG && isGFX10Plus() && Operands.size() == 2) | |||
5385 | Mode = OperandMode_NSA; | |||
5386 | CPolSeen = 0; | |||
5387 | OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); | |||
5388 | ||||
5389 | if (Res != MatchOperand_Success) { | |||
5390 | checkUnsupportedInstruction(Name, NameLoc); | |||
5391 | if (!Parser.hasPendingError()) { | |||
5392 | // FIXME: use real operand location rather than the current location. | |||
5393 | StringRef Msg = | |||
5394 | (Res == MatchOperand_ParseFail) ? "failed parsing operand." : | |||
5395 | "not a valid operand."; | |||
5396 | Error(getLoc(), Msg); | |||
5397 | } | |||
5398 | while (!trySkipToken(AsmToken::EndOfStatement)) { | |||
5399 | lex(); | |||
5400 | } | |||
5401 | return true; | |||
5402 | } | |||
5403 | ||||
5404 | // Eat the comma or space if there is one. | |||
5405 | trySkipToken(AsmToken::Comma); | |||
5406 | } | |||
5407 | ||||
5408 | return false; | |||
5409 | } | |||
5410 | ||||
5411 | //===----------------------------------------------------------------------===// | |||
5412 | // Utility functions | |||
5413 | //===----------------------------------------------------------------------===// | |||
5414 | ||||
5415 | OperandMatchResultTy | |||
5416 | AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { | |||
5417 | ||||
5418 | if (!trySkipId(Prefix, AsmToken::Colon)) | |||
5419 | return MatchOperand_NoMatch; | |||
5420 | ||||
5421 | return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; | |||
5422 | } | |||
5423 | ||||
5424 | OperandMatchResultTy | |||
5425 | AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, | |||
5426 | AMDGPUOperand::ImmTy ImmTy, | |||
5427 | bool (*ConvertResult)(int64_t&)) { | |||
5428 | SMLoc S = getLoc(); | |||
5429 | int64_t Value = 0; | |||
5430 | ||||
5431 | OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); | |||
5432 | if (Res != MatchOperand_Success) | |||
5433 | return Res; | |||
5434 | ||||
5435 | if (ConvertResult && !ConvertResult(Value)) { | |||
5436 | Error(S, "invalid " + StringRef(Prefix) + " value."); | |||
5437 | } | |||
5438 | ||||
5439 | Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); | |||
5440 | return MatchOperand_Success; | |||
5441 | } | |||
5442 | ||||
5443 | OperandMatchResultTy | |||
5444 | AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, | |||
5445 | OperandVector &Operands, | |||
5446 | AMDGPUOperand::ImmTy ImmTy, | |||
5447 | bool (*ConvertResult)(int64_t&)) { | |||
5448 | SMLoc S = getLoc(); | |||
5449 | if (!trySkipId(Prefix, AsmToken::Colon)) | |||
5450 | return MatchOperand_NoMatch; | |||
5451 | ||||
5452 | if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) | |||
5453 | return MatchOperand_ParseFail; | |||
5454 | ||||
5455 | unsigned Val = 0; | |||
5456 | const unsigned MaxSize = 4; | |||
5457 | ||||
5458 | // FIXME: How to verify the number of elements matches the number of src | |||
5459 | // operands? | |||
5460 | for (int I = 0; ; ++I) { | |||
5461 | int64_t Op; | |||
5462 | SMLoc Loc = getLoc(); | |||
5463 | if (!parseExpr(Op)) | |||
5464 | return MatchOperand_ParseFail; | |||
5465 | ||||
5466 | if (Op != 0 && Op != 1) { | |||
5467 | Error(Loc, "invalid " + StringRef(Prefix) + " value."); | |||
5468 | return MatchOperand_ParseFail; | |||
5469 | } | |||
5470 | ||||
5471 | Val |= (Op << I); | |||
5472 | ||||
5473 | if (trySkipToken(AsmToken::RBrac)) | |||
5474 | break; | |||
5475 | ||||
5476 | if (I + 1 == MaxSize) { | |||
5477 | Error(getLoc(), "expected a closing square bracket"); | |||
5478 | return MatchOperand_ParseFail; | |||
5479 | } | |||
5480 | ||||
5481 | if (!skipToken(AsmToken::Comma, "expected a comma")) | |||
5482 | return MatchOperand_ParseFail; | |||
5483 | } | |||
5484 | ||||
5485 | Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); | |||
5486 | return MatchOperand_Success; | |||
5487 | } | |||
5488 | ||||
5489 | OperandMatchResultTy | |||
5490 | AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, | |||
5491 | AMDGPUOperand::ImmTy ImmTy) { | |||
5492 | int64_t Bit; | |||
5493 | SMLoc S = getLoc(); | |||
5494 | ||||
5495 | if (trySkipId(Name)) { | |||
5496 | Bit = 1; | |||
5497 | } else if (trySkipId("no", Name)) { | |||
5498 | Bit = 0; | |||
5499 | } else { | |||
5500 | return MatchOperand_NoMatch; | |||
5501 | } | |||
5502 | ||||
5503 | if (Name == "r128" && !hasMIMG_R128()) { | |||
5504 | Error(S, "r128 modifier is not supported on this GPU"); | |||
5505 | return MatchOperand_ParseFail; | |||
5506 | } | |||
5507 | if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { | |||
5508 | Error(S, "a16 modifier is not supported on this GPU"); | |||
5509 | return MatchOperand_ParseFail; | |||
5510 | } | |||
5511 | ||||
5512 | if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) | |||
5513 | ImmTy = AMDGPUOperand::ImmTyR128A16; | |||
5514 | ||||
5515 | Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); | |||
5516 | return MatchOperand_Success; | |||
5517 | } | |||
5518 | ||||
5519 | OperandMatchResultTy | |||
5520 | AMDGPUAsmParser::parseCPol(OperandVector &Operands) { | |||
5521 | unsigned CPolOn = 0; | |||
5522 | unsigned CPolOff = 0; | |||
5523 | SMLoc S = getLoc(); | |||
5524 | ||||
5525 | if (trySkipId("glc")) | |||
5526 | CPolOn = AMDGPU::CPol::GLC; | |||
5527 | else if (trySkipId("noglc")) | |||
5528 | CPolOff = AMDGPU::CPol::GLC; | |||
5529 | else if (trySkipId("slc")) | |||
5530 | CPolOn = AMDGPU::CPol::SLC; | |||
5531 | else if (trySkipId("noslc")) | |||
5532 | CPolOff = AMDGPU::CPol::SLC; | |||
5533 | else if (trySkipId("dlc")) | |||
5534 | CPolOn = AMDGPU::CPol::DLC; | |||
5535 | else if (trySkipId("nodlc")) | |||
5536 | CPolOff = AMDGPU::CPol::DLC; | |||
5537 | else if (trySkipId("scc")) | |||
5538 | CPolOn = AMDGPU::CPol::SCC; | |||
5539 | else if (trySkipId("noscc")) | |||
5540 | CPolOff = AMDGPU::CPol::SCC; | |||
5541 | else | |||
5542 | return MatchOperand_NoMatch; | |||
5543 | ||||
5544 | if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { | |||
5545 | Error(S, "dlc modifier is not supported on this GPU"); | |||
5546 | return MatchOperand_ParseFail; | |||
5547 | } | |||
5548 | ||||
5549 | if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { | |||
5550 | Error(S, "scc modifier is not supported on this GPU"); | |||
5551 | return MatchOperand_ParseFail; | |||
5552 | } | |||
5553 | ||||
5554 | if (CPolSeen & (CPolOn | CPolOff)) { | |||
5555 | Error(S, "duplicate cache policy modifier"); | |||
5556 | return MatchOperand_ParseFail; | |||
5557 | } | |||
5558 | ||||
5559 | CPolSeen |= (CPolOn | CPolOff); | |||
5560 | ||||
5561 | for (unsigned I = 1; I != Operands.size(); ++I) { | |||
5562 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); | |||
5563 | if (Op.isCPol()) { | |||
5564 | Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); | |||
5565 | return MatchOperand_Success; | |||
5566 | } | |||
5567 | } | |||
5568 | ||||
5569 | Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, | |||
5570 | AMDGPUOperand::ImmTyCPol)); | |||
5571 | ||||
5572 | return MatchOperand_Success; | |||
5573 | } | |||
5574 | ||||
5575 | static void addOptionalImmOperand( | |||
5576 | MCInst& Inst, const OperandVector& Operands, | |||
5577 | AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, | |||
5578 | AMDGPUOperand::ImmTy ImmT, | |||
5579 | int64_t Default = 0) { | |||
5580 | auto i = OptionalIdx.find(ImmT); | |||
5581 | if (i != OptionalIdx.end()) { | |||
5582 | unsigned Idx = i->second; | |||
5583 | ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); | |||
5584 | } else { | |||
5585 | Inst.addOperand(MCOperand::createImm(Default)); | |||
5586 | } | |||
5587 | } | |||
5588 | ||||
5589 | OperandMatchResultTy | |||
5590 | AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, | |||
5591 | StringRef &Value, | |||
5592 | SMLoc &StringLoc) { | |||
5593 | if (!trySkipId(Prefix, AsmToken::Colon)) | |||
5594 | return MatchOperand_NoMatch; | |||
5595 | ||||
5596 | StringLoc = getLoc(); | |||
5597 | return parseId(Value, "expected an identifier") ? MatchOperand_Success | |||
5598 | : MatchOperand_ParseFail; | |||
5599 | } | |||
5600 | ||||
5601 | //===----------------------------------------------------------------------===// | |||
5602 | // MTBUF format | |||
5603 | //===----------------------------------------------------------------------===// | |||
5604 | ||||
5605 | bool AMDGPUAsmParser::tryParseFmt(const char *Pref, | |||
5606 | int64_t MaxVal, | |||
5607 | int64_t &Fmt) { | |||
5608 | int64_t Val; | |||
5609 | SMLoc Loc = getLoc(); | |||
5610 | ||||
5611 | auto Res = parseIntWithPrefix(Pref, Val); | |||
5612 | if (Res == MatchOperand_ParseFail) | |||
5613 | return false; | |||
5614 | if (Res == MatchOperand_NoMatch) | |||
5615 | return true; | |||
5616 | ||||
5617 | if (Val < 0 || Val > MaxVal) { | |||
5618 | Error(Loc, Twine("out of range ", StringRef(Pref))); | |||
5619 | return false; | |||
5620 | } | |||
5621 | ||||
5622 | Fmt = Val; | |||
5623 | return true; | |||
5624 | } | |||
5625 | ||||
5626 | // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their | |||
5627 | // values to live in a joint format operand in the MCInst encoding. | |||
5628 | OperandMatchResultTy | |||
5629 | AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { | |||
5630 | using namespace llvm::AMDGPU::MTBUFFormat; | |||
5631 | ||||
5632 | int64_t Dfmt = DFMT_UNDEF; | |||
5633 | int64_t Nfmt = NFMT_UNDEF; | |||
5634 | ||||
5635 | // dfmt and nfmt can appear in either order, and each is optional. | |||
5636 | for (int I = 0; I < 2; ++I) { | |||
5637 | if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) | |||
5638 | return MatchOperand_ParseFail; | |||
5639 | ||||
5640 | if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { | |||
5641 | return MatchOperand_ParseFail; | |||
5642 | } | |||
5643 | // Skip optional comma between dfmt/nfmt | |||
5644 | // but guard against 2 commas following each other. | |||
5645 | if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && | |||
5646 | !peekToken().is(AsmToken::Comma)) { | |||
5647 | trySkipToken(AsmToken::Comma); | |||
5648 | } | |||
5649 | } | |||
5650 | ||||
5651 | if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) | |||
5652 | return MatchOperand_NoMatch; | |||
5653 | ||||
5654 | Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; | |||
5655 | Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; | |||
5656 | ||||
5657 | Format = encodeDfmtNfmt(Dfmt, Nfmt); | |||
5658 | return MatchOperand_Success; | |||
5659 | } | |||
5660 | ||||
5661 | OperandMatchResultTy | |||
5662 | AMDGPUAsmParser::parseUfmt(int64_t &Format) { | |||
5663 | using namespace llvm::AMDGPU::MTBUFFormat; | |||
5664 | ||||
5665 | int64_t Fmt = UFMT_UNDEF; | |||
5666 | ||||
5667 | if (!tryParseFmt("format", UFMT_MAX, Fmt)) | |||
5668 | return MatchOperand_ParseFail; | |||
5669 | ||||
5670 | if (Fmt == UFMT_UNDEF) | |||
5671 | return MatchOperand_NoMatch; | |||
5672 | ||||
5673 | Format = Fmt; | |||
5674 | return MatchOperand_Success; | |||
5675 | } | |||
5676 | ||||
5677 | bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, | |||
5678 | int64_t &Nfmt, | |||
5679 | StringRef FormatStr, | |||
5680 | SMLoc Loc) { | |||
5681 | using namespace llvm::AMDGPU::MTBUFFormat; | |||
5682 | int64_t Format; | |||
5683 | ||||
5684 | Format = getDfmt(FormatStr); | |||
5685 | if (Format != DFMT_UNDEF) { | |||
5686 | Dfmt = Format; | |||
5687 | return true; | |||
5688 | } | |||
5689 | ||||
5690 | Format = getNfmt(FormatStr, getSTI()); | |||
5691 | if (Format != NFMT_UNDEF) { | |||
5692 | Nfmt = Format; | |||
5693 | return true; | |||
5694 | } | |||
5695 | ||||
5696 | Error(Loc, "unsupported format"); | |||
5697 | return false; | |||
5698 | } | |||
5699 | ||||
5700 | OperandMatchResultTy | |||
5701 | AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, | |||
5702 | SMLoc FormatLoc, | |||
5703 | int64_t &Format) { | |||
5704 | using namespace llvm::AMDGPU::MTBUFFormat; | |||
5705 | ||||
5706 | int64_t Dfmt = DFMT_UNDEF; | |||
5707 | int64_t Nfmt = NFMT_UNDEF; | |||
5708 | if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) | |||
5709 | return MatchOperand_ParseFail; | |||
5710 | ||||
5711 | if (trySkipToken(AsmToken::Comma)) { | |||
5712 | StringRef Str; | |||
5713 | SMLoc Loc = getLoc(); | |||
5714 | if (!parseId(Str, "expected a format string") || | |||
5715 | !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { | |||
5716 | return MatchOperand_ParseFail; | |||
5717 | } | |||
5718 | if (Dfmt == DFMT_UNDEF) { | |||
5719 | Error(Loc, "duplicate numeric format"); | |||
5720 | return MatchOperand_ParseFail; | |||
5721 | } else if (Nfmt == NFMT_UNDEF) { | |||
5722 | Error(Loc, "duplicate data format"); | |||
5723 | return MatchOperand_ParseFail; | |||
5724 | } | |||
5725 | } | |||
5726 | ||||
5727 | Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; | |||
5728 | Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; | |||
5729 | ||||
5730 | if (isGFX10Plus()) { | |||
5731 | auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); | |||
5732 | if (Ufmt == UFMT_UNDEF) { | |||
5733 | Error(FormatLoc, "unsupported format"); | |||
5734 | return MatchOperand_ParseFail; | |||
5735 | } | |||
5736 | Format = Ufmt; | |||
5737 | } else { | |||
5738 | Format = encodeDfmtNfmt(Dfmt, Nfmt); | |||
5739 | } | |||
5740 | ||||
5741 | return MatchOperand_Success; | |||
5742 | } | |||
5743 | ||||
5744 | OperandMatchResultTy | |||
5745 | AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, | |||
5746 | SMLoc Loc, | |||
5747 | int64_t &Format) { | |||
5748 | using namespace llvm::AMDGPU::MTBUFFormat; | |||
5749 | ||||
5750 | auto Id = getUnifiedFormat(FormatStr); | |||
5751 | if (Id == UFMT_UNDEF) | |||
5752 | return MatchOperand_NoMatch; | |||
5753 | ||||
5754 | if (!isGFX10Plus()) { | |||
5755 | Error(Loc, "unified format is not supported on this GPU"); | |||
5756 | return MatchOperand_ParseFail; | |||
5757 | } | |||
5758 | ||||
5759 | Format = Id; | |||
5760 | return MatchOperand_Success; | |||
5761 | } | |||
5762 | ||||
5763 | OperandMatchResultTy | |||
5764 | AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { | |||
5765 | using namespace llvm::AMDGPU::MTBUFFormat; | |||
5766 | SMLoc Loc = getLoc(); | |||
5767 | ||||
5768 | if (!parseExpr(Format)) | |||
5769 | return MatchOperand_ParseFail; | |||
5770 | if (!isValidFormatEncoding(Format, getSTI())) { | |||
5771 | Error(Loc, "out of range format"); | |||
5772 | return MatchOperand_ParseFail; | |||
5773 | } | |||
5774 | ||||
5775 | return MatchOperand_Success; | |||
5776 | } | |||
5777 | ||||
5778 | OperandMatchResultTy | |||
5779 | AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { | |||
5780 | using namespace llvm::AMDGPU::MTBUFFormat; | |||
5781 | ||||
5782 | if (!trySkipId("format", AsmToken::Colon)) | |||
5783 | return MatchOperand_NoMatch; | |||
5784 | ||||
5785 | if (trySkipToken(AsmToken::LBrac)) { | |||
5786 | StringRef FormatStr; | |||
5787 | SMLoc Loc = getLoc(); | |||
5788 | if (!parseId(FormatStr, "expected a format string")) | |||
5789 | return MatchOperand_ParseFail; | |||
5790 | ||||
5791 | auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); | |||
5792 | if (Res == MatchOperand_NoMatch) | |||
5793 | Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); | |||
5794 | if (Res != MatchOperand_Success) | |||
5795 | return Res; | |||
5796 | ||||
5797 | if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) | |||
5798 | return MatchOperand_ParseFail; | |||
5799 | ||||
5800 | return MatchOperand_Success; | |||
5801 | } | |||
5802 | ||||
5803 | return parseNumericFormat(Format); | |||
5804 | } | |||
5805 | ||||
5806 | OperandMatchResultTy | |||
5807 | AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { | |||
5808 | using namespace llvm::AMDGPU::MTBUFFormat; | |||
5809 | ||||
5810 | int64_t Format = getDefaultFormatEncoding(getSTI()); | |||
5811 | OperandMatchResultTy Res; | |||
5812 | SMLoc Loc = getLoc(); | |||
5813 | ||||
5814 | // Parse legacy format syntax. | |||
5815 | Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); | |||
5816 | if (Res == MatchOperand_ParseFail) | |||
5817 | return Res; | |||
5818 | ||||
5819 | bool FormatFound = (Res == MatchOperand_Success); | |||
5820 | ||||
5821 | Operands.push_back( | |||
5822 | AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); | |||
5823 | ||||
5824 | if (FormatFound) | |||
5825 | trySkipToken(AsmToken::Comma); | |||
5826 | ||||
5827 | if (isToken(AsmToken::EndOfStatement)) { | |||
5828 | // We are expecting an soffset operand, | |||
5829 | // but let matcher handle the error. | |||
5830 | return MatchOperand_Success; | |||
5831 | } | |||
5832 | ||||
5833 | // Parse soffset. | |||
5834 | Res = parseRegOrImm(Operands); | |||
5835 | if (Res != MatchOperand_Success) | |||
5836 | return Res; | |||
5837 | ||||
5838 | trySkipToken(AsmToken::Comma); | |||
5839 | ||||
5840 | if (!FormatFound) { | |||
5841 | Res = parseSymbolicOrNumericFormat(Format); | |||
5842 | if (Res == MatchOperand_ParseFail) | |||
5843 | return Res; | |||
5844 | if (Res == MatchOperand_Success) { | |||
5845 | auto Size = Operands.size(); | |||
5846 | AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); | |||
5847 | assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT)(static_cast<void> (0)); | |||
5848 | Op.setImm(Format); | |||
5849 | } | |||
5850 | return MatchOperand_Success; | |||
5851 | } | |||
5852 | ||||
5853 | if (isId("format") && peekToken().is(AsmToken::Colon)) { | |||
5854 | Error(getLoc(), "duplicate format"); | |||
5855 | return MatchOperand_ParseFail; | |||
5856 | } | |||
5857 | return MatchOperand_Success; | |||
5858 | } | |||
5859 | ||||
5860 | //===----------------------------------------------------------------------===// | |||
5861 | // ds | |||
5862 | //===----------------------------------------------------------------------===// | |||
5863 | ||||
5864 | void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, | |||
5865 | const OperandVector &Operands) { | |||
5866 | OptionalImmIndexMap OptionalIdx; | |||
5867 | ||||
5868 | for (unsigned i = 1, e = Operands.size(); i != e; ++i) { | |||
5869 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); | |||
5870 | ||||
5871 | // Add the register arguments | |||
5872 | if (Op.isReg()) { | |||
5873 | Op.addRegOperands(Inst, 1); | |||
5874 | continue; | |||
5875 | } | |||
5876 | ||||
5877 | // Handle optional arguments | |||
5878 | OptionalIdx[Op.getImmTy()] = i; | |||
5879 | } | |||
5880 | ||||
5881 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); | |||
5882 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); | |||
5883 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); | |||
5884 | ||||
5885 | Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 | |||
5886 | } | |||
5887 | ||||
5888 | void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, | |||
5889 | bool IsGdsHardcoded) { | |||
5890 | OptionalImmIndexMap OptionalIdx; | |||
5891 | ||||
5892 | for (unsigned i = 1, e = Operands.size(); i != e; ++i) { | |||
5893 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); | |||
5894 | ||||
5895 | // Add the register arguments | |||
5896 | if (Op.isReg()) { | |||
5897 | Op.addRegOperands(Inst, 1); | |||
5898 | continue; | |||
5899 | } | |||
5900 | ||||
5901 | if (Op.isToken() && Op.getToken() == "gds") { | |||
5902 | IsGdsHardcoded = true; | |||
5903 | continue; | |||
5904 | } | |||
5905 | ||||
5906 | // Handle optional arguments | |||
5907 | OptionalIdx[Op.getImmTy()] = i; | |||
5908 | } | |||
5909 | ||||
5910 | AMDGPUOperand::ImmTy OffsetType = | |||
5911 | (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || | |||
5912 | Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || | |||
5913 | Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : | |||
5914 | AMDGPUOperand::ImmTyOffset; | |||
5915 | ||||
5916 | addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); | |||
5917 | ||||
5918 | if (!IsGdsHardcoded) { | |||
5919 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); | |||
5920 | } | |||
5921 | Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 | |||
5922 | } | |||
5923 | ||||
5924 | void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { | |||
5925 | OptionalImmIndexMap OptionalIdx; | |||
5926 | ||||
5927 | unsigned OperandIdx[4]; | |||
5928 | unsigned EnMask = 0; | |||
5929 | int SrcIdx = 0; | |||
5930 | ||||
5931 | for (unsigned i = 1, e = Operands.size(); i != e; ++i) { | |||
| ||||
5932 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); | |||
5933 | ||||
5934 | // Add the register arguments | |||
5935 | if (Op.isReg()) { | |||
5936 | assert(SrcIdx < 4)(static_cast<void> (0)); | |||
5937 | OperandIdx[SrcIdx] = Inst.size(); | |||
5938 | Op.addRegOperands(Inst, 1); | |||
5939 | ++SrcIdx; | |||
5940 | continue; | |||
5941 | } | |||
5942 | ||||
5943 | if (Op.isOff()) { | |||
5944 | assert(SrcIdx < 4)(static_cast<void> (0)); | |||
5945 | OperandIdx[SrcIdx] = Inst.size(); | |||
5946 | Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); | |||
5947 | ++SrcIdx; | |||
5948 | continue; | |||
5949 | } | |||
5950 | ||||
5951 | if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { | |||
5952 | Op.addImmOperands(Inst, 1); | |||
5953 | continue; | |||
5954 | } | |||
5955 | ||||
5956 | if (Op.isToken() && Op.getToken() == "done") | |||
5957 | continue; | |||
5958 | ||||
5959 | // Handle optional arguments | |||
5960 | OptionalIdx[Op.getImmTy()] = i; | |||
5961 | } | |||
5962 | ||||
5963 | assert(SrcIdx == 4)(static_cast<void> (0)); | |||
5964 | ||||
5965 | bool Compr = false; | |||
5966 | if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { | |||
5967 | Compr = true; | |||
5968 | Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); | |||
| ||||
5969 | Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); | |||
5970 | Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); | |||
5971 | } | |||
5972 | ||||
5973 | for (auto i = 0; i < SrcIdx; ++i) { | |||
5974 | if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { | |||
5975 | EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); | |||
5976 | } | |||
5977 | } | |||
5978 | ||||
5979 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); | |||
5980 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); | |||
5981 | ||||
5982 | Inst.addOperand(MCOperand::createImm(EnMask)); | |||
5983 | } | |||
5984 | ||||
5985 | //===----------------------------------------------------------------------===// | |||
5986 | // s_waitcnt | |||
5987 | //===----------------------------------------------------------------------===// | |||
5988 | ||||
5989 | static bool | |||
5990 | encodeCnt( | |||
5991 | const AMDGPU::IsaVersion ISA, | |||
5992 | int64_t &IntVal, | |||
5993 | int64_t CntVal, | |||
5994 | bool Saturate, | |||
5995 | unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), | |||
5996 | unsigned (*decode)(const IsaVersion &Version, unsigned)) | |||
5997 | { | |||
5998 | bool Failed = false; | |||
5999 | ||||
6000 | IntVal = encode(ISA, IntVal, CntVal); | |||
6001 | if (CntVal != decode(ISA, IntVal)) { | |||
6002 | if (Saturate) { | |||
6003 | IntVal = encode(ISA, IntVal, -1); | |||
6004 | } else { | |||
6005 | Failed = true; | |||
6006 | } | |||
6007 | } | |||
6008 | return Failed; | |||
6009 | } | |||
6010 | ||||
6011 | bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { | |||
6012 | ||||
6013 | SMLoc CntLoc = getLoc(); | |||
6014 | StringRef CntName = getTokenStr(); | |||
6015 | ||||
6016 | if (!skipToken(AsmToken::Identifier, "expected a counter name") || | |||
6017 | !skipToken(AsmToken::LParen, "expected a left parenthesis")) | |||
6018 | return false; | |||
6019 | ||||
6020 | int64_t CntVal; | |||
6021 | SMLoc ValLoc = getLoc(); | |||
6022 | if (!parseExpr(CntVal)) | |||
6023 | return false; | |||
6024 | ||||
6025 | AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); | |||
6026 | ||||
6027 | bool Failed = true; | |||
6028 | bool Sat = CntName.endswith("_sat"); | |||
6029 | ||||
6030 | if (CntName == "vmcnt" || CntName == "vmcnt_sat") { | |||
6031 | Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); | |||
6032 | } else if (CntName == "expcnt" || CntName == "expcnt_sat") { | |||
6033 | Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); | |||
6034 | } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { | |||
6035 | Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); | |||
6036 | } else { | |||
6037 | Error(CntLoc, "invalid counter name " + CntName); | |||
6038 | return false; | |||
6039 | } | |||
6040 | ||||
6041 | if (Failed) { | |||
6042 | Error(ValLoc, "too large value for " + CntName); | |||
6043 | return false; | |||
6044 | } | |||
6045 | ||||
6046 | if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) | |||
6047 | return false; | |||
6048 | ||||
6049 | if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { | |||
6050 | if (isToken(AsmToken::EndOfStatement)) { | |||
6051 | Error(getLoc(), "expected a counter name"); | |||
6052 | return false; | |||
6053 | } | |||
6054 | } | |||
6055 | ||||
6056 | return true; | |||
6057 | } | |||
6058 | ||||
6059 | OperandMatchResultTy | |||
6060 | AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { | |||
6061 | AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); | |||
6062 | int64_t Waitcnt = getWaitcntBitMask(ISA); | |||
6063 | SMLoc S = getLoc(); | |||
6064 | ||||
6065 | if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { | |||
6066 | while (!isToken(AsmToken::EndOfStatement)) { | |||
6067 | if (!parseCnt(Waitcnt)) | |||
6068 | return MatchOperand_ParseFail; | |||
6069 | } | |||
6070 | } else { | |||
6071 | if (!parseExpr(Waitcnt)) | |||
6072 | return MatchOperand_ParseFail; | |||
6073 | } | |||
6074 | ||||
6075 | Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); | |||
6076 | return MatchOperand_Success; | |||
6077 | } | |||
6078 | ||||
6079 | bool | |||
6080 | AMDGPUOperand::isSWaitCnt() const { | |||
6081 | return isImm(); | |||
6082 | } | |||
6083 | ||||
6084 | //===----------------------------------------------------------------------===// | |||
6085 | // hwreg | |||
6086 | //===----------------------------------------------------------------------===// | |||
6087 | ||||
6088 | bool | |||
6089 | AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, | |||
6090 | OperandInfoTy &Offset, | |||
6091 | OperandInfoTy &Width) { | |||
6092 | using namespace llvm::AMDGPU::Hwreg; | |||
6093 | ||||
6094 | // The register may be specified by name or using a numeric code | |||
6095 | HwReg.Loc = getLoc(); | |||
6096 | if (isToken(AsmToken::Identifier) && | |||
6097 | (HwReg.Id = getHwregId(getTokenStr())) >= 0) { | |||
6098 | HwReg.IsSymbolic = true; | |||
6099 | lex(); // skip register name | |||
6100 | } else if (!parseExpr(HwReg.Id, "a register name")) { | |||
6101 | return false; | |||
6102 | } | |||
6103 | ||||
6104 | if (trySkipToken(AsmToken::RParen)) | |||
6105 | return true; | |||
6106 | ||||
6107 | // parse optional params | |||
6108 | if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) | |||
6109 | return false; | |||
6110 | ||||
6111 | Offset.Loc = getLoc(); | |||
6112 | if (!parseExpr(Offset.Id)) | |||
6113 | return false; | |||
6114 | ||||
6115 | if (!skipToken(AsmToken::Comma, "expected a comma")) | |||
6116 | return false; | |||
6117 | ||||
6118 | Width.Loc = getLoc(); | |||
6119 | return parseExpr(Width.Id) && | |||
6120 | skipToken(AsmToken::RParen, "expected a closing parenthesis"); | |||
6121 | } | |||
6122 | ||||
6123 | bool | |||
6124 | AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, | |||
6125 | const OperandInfoTy &Offset, | |||
6126 | const OperandInfoTy &Width) { | |||
6127 | ||||
6128 | using namespace llvm::AMDGPU::Hwreg; | |||
6129 | ||||
6130 | if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { | |||
6131 | Error(HwReg.Loc, | |||
6132 | "specified hardware register is not supported on this GPU"); | |||
6133 | return false; | |||
6134 | } | |||
6135 | if (!isValidHwreg(HwReg.Id)) { | |||
6136 | Error(HwReg.Loc, | |||
6137 | "invalid code of hardware register: only 6-bit values are legal"); | |||
6138 | return false; | |||
6139 | } | |||
6140 | if (!isValidHwregOffset(Offset.Id)) { | |||
6141 | Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); | |||
6142 | return false; | |||
6143 | } | |||
6144 | if (!isValidHwregWidth(Width.Id)) { | |||
6145 | Error(Width.Loc, | |||
6146 | "invalid bitfield width: only values from 1 to 32 are legal"); | |||
6147 | return false; | |||
6148 | } | |||
6149 | return true; | |||
6150 | } | |||
6151 | ||||
6152 | OperandMatchResultTy | |||
6153 | AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { | |||
6154 | using namespace llvm::AMDGPU::Hwreg; | |||
6155 | ||||
6156 | int64_t ImmVal = 0; | |||
6157 | SMLoc Loc = getLoc(); | |||
6158 | ||||
6159 | if (trySkipId("hwreg", AsmToken::LParen)) { | |||
6160 | OperandInfoTy HwReg(ID_UNKNOWN_); | |||
6161 | OperandInfoTy Offset(OFFSET_DEFAULT_); | |||
6162 | OperandInfoTy Width(WIDTH_DEFAULT_); | |||
6163 | if (parseHwregBody(HwReg, Offset, Width) && | |||
6164 | validateHwreg(HwReg, Offset, Width)) { | |||
6165 | ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); | |||
6166 | } else { | |||
6167 | return MatchOperand_ParseFail; | |||
6168 | } | |||
6169 | } else if (parseExpr(ImmVal, "a hwreg macro")) { | |||
6170 | if (ImmVal < 0 || !isUInt<16>(ImmVal)) { | |||
6171 | Error(Loc, "invalid immediate: only 16-bit values are legal"); | |||
6172 | return MatchOperand_ParseFail; | |||
6173 | } | |||
6174 | } else { | |||
6175 | return MatchOperand_ParseFail; | |||
6176 | } | |||
6177 | ||||
6178 | Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); | |||
6179 | return MatchOperand_Success; | |||
6180 | } | |||
6181 | ||||
6182 | bool AMDGPUOperand::isHwreg() const { | |||
6183 | return isImmTy(ImmTyHwreg); | |||
6184 | } | |||
6185 | ||||
6186 | //===----------------------------------------------------------------------===// | |||
6187 | // sendmsg | |||
6188 | //===----------------------------------------------------------------------===// | |||
6189 | ||||
6190 | bool | |||
6191 | AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, | |||
6192 | OperandInfoTy &Op, | |||
6193 | OperandInfoTy &Stream) { | |||
6194 | using namespace llvm::AMDGPU::SendMsg; | |||
6195 | ||||
6196 | Msg.Loc = getLoc(); | |||
6197 | if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { | |||
6198 | Msg.IsSymbolic = true; | |||
6199 | lex(); // skip message name | |||
6200 | } else if (!parseExpr(Msg.Id, "a message name")) { | |||
6201 | return false; | |||
6202 | } | |||
6203 | ||||
6204 | if (trySkipToken(AsmToken::Comma)) { | |||
6205 | Op.IsDefined = true; | |||
6206 | Op.Loc = getLoc(); | |||
6207 | if (isToken(AsmToken::Identifier) && | |||
6208 | (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { | |||
6209 | lex(); // skip operation name | |||
6210 | } else if (!parseExpr(Op.Id, "an operation name")) { | |||
6211 | return false; | |||
6212 | } | |||
6213 | ||||
6214 | if (trySkipToken(AsmToken::Comma)) { | |||
6215 | Stream.IsDefined = true; | |||
6216 | Stream.Loc = getLoc(); | |||
6217 | if (!parseExpr(Stream.Id)) | |||
6218 | return false; | |||
6219 | } | |||
6220 | } | |||
6221 | ||||
6222 | return skipToken(AsmToken::RParen, "expected a closing parenthesis"); | |||
6223 | } | |||
6224 | ||||
6225 | bool | |||
6226 | AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, | |||
6227 | const OperandInfoTy &Op, | |||
6228 | const OperandInfoTy &Stream) { | |||
6229 | using namespace llvm::AMDGPU::SendMsg; | |||
6230 | ||||
6231 | // Validation strictness depends on whether message is specified | |||
6232 | // in a symbolc or in a numeric form. In the latter case | |||
6233 | // only encoding possibility is checked. | |||
6234 | bool Strict = Msg.IsSymbolic; | |||
6235 | ||||
6236 | if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { | |||
6237 | Error(Msg.Loc, "invalid message id"); | |||
6238 | return false; | |||
6239 | } | |||
6240 | if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { | |||
6241 | if (Op.IsDefined) { | |||
6242 | Error(Op.Loc, "message does not support operations"); | |||
6243 | } else { | |||
6244 | Error(Msg.Loc, "missing message operation"); | |||
6245 | } | |||
6246 | return false; | |||
6247 | } | |||
6248 | if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { | |||
6249 | Error(Op.Loc, "invalid operation id"); | |||
6250 | return false; | |||
6251 | } | |||
6252 | if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { | |||
6253 | Error(Stream.Loc, "message operation does not support streams"); | |||
6254 | return false; | |||
6255 | } | |||
6256 | if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { | |||
6257 | Error(Stream.Loc, "invalid message stream id"); | |||
6258 | return false; | |||
6259 | } | |||
6260 | return true; | |||
6261 | } | |||
6262 | ||||
6263 | OperandMatchResultTy | |||
6264 | AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { | |||
6265 | using namespace llvm::AMDGPU::SendMsg; | |||
6266 | ||||
6267 | int64_t ImmVal = 0; | |||
6268 | SMLoc Loc = getLoc(); | |||
6269 | ||||
6270 | if (trySkipId("sendmsg", AsmToken::LParen)) { | |||
6271 | OperandInfoTy Msg(ID_UNKNOWN_); | |||
6272 | OperandInfoTy Op(OP_NONE_); | |||
6273 | OperandInfoTy Stream(STREAM_ID_NONE_); | |||
6274 | if (parseSendMsgBody(Msg, Op, Stream) && | |||
6275 | validateSendMsg(Msg, Op, Stream)) { | |||
6276 | ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); | |||
6277 | } else { | |||
6278 | return MatchOperand_ParseFail; | |||
6279 | } | |||
6280 | } else if (parseExpr(ImmVal, "a sendmsg macro")) { | |||
6281 | if (ImmVal < 0 || !isUInt<16>(ImmVal)) { | |||
6282 | Error(Loc, "invalid immediate: only 16-bit values are legal"); | |||
6283 | return MatchOperand_ParseFail; | |||
6284 | } | |||
6285 | } else { | |||
6286 | return MatchOperand_ParseFail; | |||
6287 | } | |||
6288 | ||||
6289 | Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); | |||
6290 | return MatchOperand_Success; | |||
6291 | } | |||
6292 | ||||
6293 | bool AMDGPUOperand::isSendMsg() const { | |||
6294 | return isImmTy(ImmTySendMsg); | |||
6295 | } | |||
6296 | ||||
6297 | //===----------------------------------------------------------------------===// | |||
6298 | // v_interp | |||
6299 | //===----------------------------------------------------------------------===// | |||
6300 | ||||
6301 | OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { | |||
6302 | StringRef Str; | |||
6303 | SMLoc S = getLoc(); | |||
6304 | ||||
6305 | if (!parseId(Str)) | |||
6306 | return MatchOperand_NoMatch; | |||
6307 | ||||
6308 | int Slot = StringSwitch<int>(Str) | |||
6309 | .Case("p10", 0) | |||
6310 | .Case("p20", 1) | |||
6311 | .Case("p0", 2) | |||
6312 | .Default(-1); | |||
6313 | ||||
6314 | if (Slot == -1) { | |||
6315 | Error(S, "invalid interpolation slot"); | |||
6316 | return MatchOperand_ParseFail; | |||
6317 | } | |||
6318 | ||||
6319 | Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, | |||
6320 | AMDGPUOperand::ImmTyInterpSlot)); | |||
6321 | return MatchOperand_Success; | |||
6322 | } | |||
6323 | ||||
6324 | OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { | |||
6325 | StringRef Str; | |||
6326 | SMLoc S = getLoc(); | |||
6327 | ||||
6328 | if (!parseId(Str)) | |||
6329 | return MatchOperand_NoMatch; | |||
6330 | ||||
6331 | if (!Str.startswith("attr")) { | |||
6332 | Error(S, "invalid interpolation attribute"); | |||
6333 | return MatchOperand_ParseFail; | |||
6334 | } | |||
6335 | ||||
6336 | StringRef Chan = Str.take_back(2); | |||
6337 | int AttrChan = StringSwitch<int>(Chan) | |||
6338 | .Case(".x", 0) | |||
6339 | .Case(".y", 1) | |||
6340 | .Case(".z", 2) | |||
6341 | .Case(".w", 3) | |||
6342 | .Default(-1); | |||
6343 | if (AttrChan == -1) { | |||
6344 | Error(S, "invalid or missing interpolation attribute channel"); | |||
6345 | return MatchOperand_ParseFail; | |||
6346 | } | |||
6347 | ||||
6348 | Str = Str.drop_back(2).drop_front(4); | |||
6349 | ||||
6350 | uint8_t Attr; | |||
6351 | if (Str.getAsInteger(10, Attr)) { | |||
6352 | Error(S, "invalid or missing interpolation attribute number"); | |||
6353 | return MatchOperand_ParseFail; | |||
6354 | } | |||
6355 | ||||
6356 | if (Attr > 63) { | |||
6357 | Error(S, "out of bounds interpolation attribute number"); | |||
6358 | return MatchOperand_ParseFail; | |||
6359 | } | |||
6360 | ||||
6361 | SMLoc SChan = SMLoc::getFromPointer(Chan.data()); | |||
6362 | ||||
6363 | Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, | |||
6364 | AMDGPUOperand::ImmTyInterpAttr)); | |||
6365 | Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, | |||
6366 | AMDGPUOperand::ImmTyAttrChan)); | |||
6367 | return MatchOperand_Success; | |||
6368 | } | |||
6369 | ||||
6370 | //===----------------------------------------------------------------------===// | |||
6371 | // exp | |||
6372 | //===----------------------------------------------------------------------===// | |||
6373 | ||||
6374 | OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { | |||
6375 | using namespace llvm::AMDGPU::Exp; | |||
6376 | ||||
6377 | StringRef Str; | |||
6378 | SMLoc S = getLoc(); | |||
6379 | ||||
6380 | if (!parseId(Str)) | |||
6381 | return MatchOperand_NoMatch; | |||
6382 | ||||
6383 | unsigned Id = getTgtId(Str); | |||
6384 | if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { | |||
6385 | Error(S, (Id == ET_INVALID) ? | |||
6386 | "invalid exp target" : | |||
6387 | "exp target is not supported on this GPU"); | |||
6388 | return MatchOperand_ParseFail; | |||
6389 | } | |||
6390 | ||||
6391 | Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, | |||
6392 | AMDGPUOperand::ImmTyExpTgt)); | |||
6393 | return MatchOperand_Success; | |||
6394 | } | |||
6395 | ||||
6396 | //===----------------------------------------------------------------------===// | |||
6397 | // parser helpers | |||
6398 | //===----------------------------------------------------------------------===// | |||
6399 | ||||
6400 | bool | |||
6401 | AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { | |||
6402 | return Token.is(AsmToken::Identifier) && Token.getString() == Id; | |||
6403 | } | |||
6404 | ||||
6405 | bool | |||
6406 | AMDGPUAsmParser::isId(const StringRef Id) const { | |||
6407 | return isId(getToken(), Id); | |||
6408 | } | |||
6409 | ||||
6410 | bool | |||
6411 | AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { | |||
6412 | return getTokenKind() == Kind; | |||
6413 | } | |||
6414 | ||||
6415 | bool | |||
6416 | AMDGPUAsmParser::trySkipId(const StringRef Id) { | |||
6417 | if (isId(Id)) { | |||
6418 | lex(); | |||
6419 | return true; | |||
6420 | } | |||
6421 | return false; | |||
6422 | } | |||
6423 | ||||
6424 | bool | |||
6425 | AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { | |||
6426 | if (isToken(AsmToken::Identifier)) { | |||
6427 | StringRef Tok = getTokenStr(); | |||
6428 | if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { | |||
6429 | lex(); | |||
6430 | return true; | |||
6431 | } | |||
6432 | } | |||
6433 | return false; | |||
6434 | } | |||
6435 | ||||
6436 | bool | |||
6437 | AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { | |||
6438 | if (isId(Id) && peekToken().is(Kind)) { | |||
6439 | lex(); | |||
6440 | lex(); | |||
6441 | return true; | |||
6442 | } | |||
6443 | return false; | |||
6444 | } | |||
6445 | ||||
6446 | bool | |||
6447 | AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { | |||
6448 | if (isToken(Kind)) { | |||
6449 | lex(); | |||
6450 | return true; | |||
6451 | } | |||
6452 | return false; | |||
6453 | } | |||
6454 | ||||
6455 | bool | |||
6456 | AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, | |||
6457 | const StringRef ErrMsg) { | |||
6458 | if (!trySkipToken(Kind)) { | |||
6459 | Error(getLoc(), ErrMsg); | |||
6460 | return false; | |||
6461 | } | |||
6462 | return true; | |||
6463 | } | |||
6464 | ||||
6465 | bool | |||
6466 | AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { | |||
6467 | SMLoc S = getLoc(); | |||
6468 | ||||
6469 | const MCExpr *Expr; | |||
6470 | if (Parser.parseExpression(Expr)) | |||
6471 | return false; | |||
6472 | ||||
6473 | if (Expr->evaluateAsAbsolute(Imm)) | |||
6474 | return true; | |||
6475 | ||||
6476 | if (Expected.empty()) { | |||
6477 | Error(S, "expected absolute expression"); | |||
6478 | } else { | |||
6479 | Error(S, Twine("expected ", Expected) + | |||
6480 | Twine(" or an absolute expression")); | |||
6481 | } | |||
6482 | return false; | |||
6483 | } | |||
6484 | ||||
6485 | bool | |||
6486 | AMDGPUAsmParser::parseExpr(OperandVector &Operands) { | |||
6487 | SMLoc S = getLoc(); | |||
6488 | ||||
6489 | const MCExpr *Expr; | |||
6490 | if (Parser.parseExpression(Expr)) | |||
6491 | return false; | |||
6492 | ||||
6493 | int64_t IntVal; | |||
6494 | if (Expr->evaluateAsAbsolute(IntVal)) { | |||
6495 | Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); | |||
6496 | } else { | |||
6497 | Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); | |||
6498 | } | |||
6499 | return true; | |||
6500 | } | |||
6501 | ||||
6502 | bool | |||
6503 | AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { | |||
6504 | if (isToken(AsmToken::String)) { | |||
6505 | Val = getToken().getStringContents(); | |||
6506 | lex(); | |||
6507 | return true; | |||
6508 | } else { | |||
6509 | Error(getLoc(), ErrMsg); | |||
6510 | return false; | |||
6511 | } | |||
6512 | } | |||
6513 | ||||
6514 | bool | |||
6515 | AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { | |||
6516 | if (isToken(AsmToken::Identifier)) { | |||
6517 | Val = getTokenStr(); | |||
6518 | lex(); | |||
6519 | return true; | |||
6520 | } else { | |||
6521 | if (!ErrMsg.empty()) | |||
6522 | Error(getLoc(), ErrMsg); | |||
6523 | return false; | |||
6524 | } | |||
6525 | } | |||
6526 | ||||
6527 | AsmToken | |||
6528 | AMDGPUAsmParser::getToken() const { | |||
6529 | return Parser.getTok(); | |||
6530 | } | |||
6531 | ||||
6532 | AsmToken | |||
6533 | AMDGPUAsmParser::peekToken() { | |||
6534 | return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); | |||
6535 | } | |||
6536 | ||||
6537 | void | |||
6538 | AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { | |||
6539 | auto TokCount = getLexer().peekTokens(Tokens); | |||
6540 | ||||
6541 | for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) | |||
6542 | Tokens[Idx] = AsmToken(AsmToken::Error, ""); | |||
6543 | } | |||
6544 | ||||
6545 | AsmToken::TokenKind | |||
6546 | AMDGPUAsmParser::getTokenKind() const { | |||
6547 | return getLexer().getKind(); | |||
6548 | } | |||
6549 | ||||
6550 | SMLoc | |||
6551 | AMDGPUAsmParser::getLoc() const { | |||
6552 | return getToken().getLoc(); | |||
6553 | } | |||
6554 | ||||
6555 | StringRef | |||
6556 | AMDGPUAsmParser::getTokenStr() const { | |||
6557 | return getToken().getString(); | |||
6558 | } | |||
6559 | ||||
6560 | void | |||
6561 | AMDGPUAsmParser::lex() { | |||
6562 | Parser.Lex(); | |||
6563 | } | |||
6564 | ||||
6565 | SMLoc | |||
6566 | AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, | |||
6567 | const OperandVector &Operands) const { | |||
6568 | for (unsigned i = Operands.size() - 1; i > 0; --i) { | |||
6569 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); | |||
6570 | if (Test(Op)) | |||
6571 | return Op.getStartLoc(); | |||
6572 | } | |||
6573 | return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); | |||
6574 | } | |||
6575 | ||||
6576 | SMLoc | |||
6577 | AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, | |||
6578 | const OperandVector &Operands) const { | |||
6579 | auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; | |||
6580 | return getOperandLoc(Test, Operands); | |||
6581 | } | |||
6582 | ||||
6583 | SMLoc | |||
6584 | AMDGPUAsmParser::getRegLoc(unsigned Reg, | |||
6585 | const OperandVector &Operands) const { | |||
6586 | auto Test = [=](const AMDGPUOperand& Op) { | |||
6587 | return Op.isRegKind() && Op.getReg() == Reg; | |||
6588 | }; | |||
6589 | return getOperandLoc(Test, Operands); | |||
6590 | } | |||
6591 | ||||
6592 | SMLoc | |||
6593 | AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { | |||
6594 | auto Test = [](const AMDGPUOperand& Op) { | |||
6595 | return Op.IsImmKindLiteral() || Op.isExpr(); | |||
6596 | }; | |||
6597 | return getOperandLoc(Test, Operands); | |||
6598 | } | |||
6599 | ||||
6600 | SMLoc | |||
6601 | AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { | |||
6602 | auto Test = [](const AMDGPUOperand& Op) { | |||
6603 | return Op.isImmKindConst(); | |||
6604 | }; | |||
6605 | return getOperandLoc(Test, Operands); | |||
6606 | } | |||
6607 | ||||
6608 | //===----------------------------------------------------------------------===// | |||
6609 | // swizzle | |||
6610 | //===----------------------------------------------------------------------===// | |||
6611 | ||||
6612 | LLVM_READNONE__attribute__((__const__)) | |||
6613 | static unsigned | |||
6614 | encodeBitmaskPerm(const unsigned AndMask, | |||
6615 | const unsigned OrMask, | |||
6616 | const unsigned XorMask) { | |||
6617 | using namespace llvm::AMDGPU::Swizzle; | |||
6618 | ||||
6619 | return BITMASK_PERM_ENC | | |||
6620 | (AndMask << BITMASK_AND_SHIFT) | | |||
6621 | (OrMask << BITMASK_OR_SHIFT) | | |||
6622 | (XorMask << BITMASK_XOR_SHIFT); | |||
6623 | } | |||
6624 | ||||
6625 | bool | |||
6626 | AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, | |||
6627 | const unsigned MinVal, | |||
6628 | const unsigned MaxVal, | |||
6629 | const StringRef ErrMsg, | |||
6630 | SMLoc &Loc) { | |||
6631 | if (!skipToken(AsmToken::Comma, "expected a comma")) { | |||
6632 | return false; | |||
6633 | } | |||
6634 | Loc = getLoc(); | |||
6635 | if (!parseExpr(Op)) { | |||
6636 | return false; | |||
6637 | } | |||
6638 | if (Op < MinVal || Op > MaxVal) { | |||
6639 | Error(Loc, ErrMsg); | |||
6640 | return false; | |||
6641 | } | |||
6642 | ||||
6643 | return true; | |||
6644 | } | |||
6645 | ||||
6646 | bool | |||
6647 | AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, | |||
6648 | const unsigned MinVal, | |||
6649 | const unsigned MaxVal, | |||
6650 | const StringRef ErrMsg) { | |||
6651 | SMLoc Loc; | |||
6652 | for (unsigned i = 0; i < OpNum; ++i) { | |||
6653 | if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) | |||
6654 | return false; | |||
6655 | } | |||
6656 | ||||
6657 | return true; | |||
6658 | } | |||
6659 | ||||
6660 | bool | |||
6661 | AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { | |||
6662 | using namespace llvm::AMDGPU::Swizzle; | |||
6663 | ||||
6664 | int64_t Lane[LANE_NUM]; | |||
6665 | if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, | |||
6666 | "expected a 2-bit lane id")) { | |||
6667 | Imm = QUAD_PERM_ENC; | |||
6668 | for (unsigned I = 0; I < LANE_NUM; ++I) { | |||
6669 | Imm |= Lane[I] << (LANE_SHIFT * I); | |||
6670 | } | |||
6671 | return true; | |||
6672 | } | |||
6673 | return false; | |||
6674 | } | |||
6675 | ||||
6676 | bool | |||
6677 | AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { | |||
6678 | using namespace llvm::AMDGPU::Swizzle; | |||
6679 | ||||
6680 | SMLoc Loc; | |||
6681 | int64_t GroupSize; | |||
6682 | int64_t LaneIdx; | |||
6683 | ||||
6684 | if (!parseSwizzleOperand(GroupSize, | |||
6685 | 2, 32, | |||
6686 | "group size must be in the interval [2,32]", | |||
6687 | Loc)) { | |||
6688 | return false; | |||
6689 | } | |||
6690 | if (!isPowerOf2_64(GroupSize)) { | |||
6691 | Error(Loc, "group size must be a power of two"); | |||
6692 | return false; | |||
6693 | } | |||
6694 | if (parseSwizzleOperand(LaneIdx, | |||
6695 | 0, GroupSize - 1, | |||
6696 | "lane id must be in the interval [0,group size - 1]", | |||
6697 | Loc)) { | |||
6698 | Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); | |||
6699 | return true; | |||
6700 | } | |||
6701 | return false; | |||
6702 | } | |||
6703 | ||||
6704 | bool | |||
6705 | AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { | |||
6706 | using namespace llvm::AMDGPU::Swizzle; | |||
6707 | ||||
6708 | SMLoc Loc; | |||
6709 | int64_t GroupSize; | |||
6710 | ||||
6711 | if (!parseSwizzleOperand(GroupSize, | |||
6712 | 2, 32, | |||
6713 | "group size must be in the interval [2,32]", | |||
6714 | Loc)) { | |||
6715 | return false; | |||
6716 | } | |||
6717 | if (!isPowerOf2_64(GroupSize)) { | |||
6718 | Error(Loc, "group size must be a power of two"); | |||
6719 | return false; | |||
6720 | } | |||
6721 | ||||
6722 | Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); | |||
6723 | return true; | |||
6724 | } | |||
6725 | ||||
6726 | bool | |||
6727 | AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { | |||
6728 | using namespace llvm::AMDGPU::Swizzle; | |||
6729 | ||||
6730 | SMLoc Loc; | |||
6731 | int64_t GroupSize; | |||
6732 | ||||
6733 | if (!parseSwizzleOperand(GroupSize, | |||
6734 | 1, 16, | |||
6735 | "group size must be in the interval [1,16]", | |||
6736 | Loc)) { | |||
6737 | return false; | |||
6738 | } | |||
6739 | if (!isPowerOf2_64(GroupSize)) { | |||
6740 | Error(Loc, "group size must be a power of two"); | |||
6741 | return false; | |||
6742 | } | |||
6743 | ||||
6744 | Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); | |||
6745 | return true; | |||
6746 | } | |||
6747 | ||||
6748 | bool | |||
6749 | AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { | |||
6750 | using namespace llvm::AMDGPU::Swizzle; | |||
6751 | ||||
6752 | if (!skipToken(AsmToken::Comma, "expected a comma")) { | |||
6753 | return false; | |||
6754 | } | |||
6755 | ||||
6756 | StringRef Ctl; | |||
6757 | SMLoc StrLoc = getLoc(); | |||
6758 | if (!parseString(Ctl)) { | |||
6759 | return false; | |||
6760 | } | |||
6761 | if (Ctl.size() != BITMASK_WIDTH) { | |||
6762 | Error(StrLoc, "expected a 5-character mask"); | |||
6763 | return false; | |||
6764 | } | |||
6765 | ||||
6766 | unsigned AndMask = 0; | |||
6767 | unsigned OrMask = 0; | |||
6768 | unsigned XorMask = 0; | |||
6769 | ||||
6770 | for (size_t i = 0; i < Ctl.size(); ++i) { | |||
6771 | unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); | |||
6772 | switch(Ctl[i]) { | |||
6773 | default: | |||
6774 | Error(StrLoc, "invalid mask"); | |||
6775 | return false; | |||
6776 | case '0': | |||
6777 | break; | |||
6778 | case '1': | |||
6779 | OrMask |= Mask; | |||
6780 | break; | |||
6781 | case 'p': | |||
6782 | AndMask |= Mask; | |||
6783 | break; | |||
6784 | case 'i': | |||
6785 | AndMask |= Mask; | |||
6786 | XorMask |= Mask; | |||
6787 | break; | |||
6788 | } | |||
6789 | } | |||
6790 | ||||
6791 | Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); | |||
6792 | return true; | |||
6793 | } | |||
6794 | ||||
6795 | bool | |||
6796 | AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { | |||
6797 | ||||
6798 | SMLoc OffsetLoc = getLoc(); | |||
6799 | ||||
6800 | if (!parseExpr(Imm, "a swizzle macro")) { | |||
6801 | return false; | |||
6802 | } | |||
6803 | if (!isUInt<16>(Imm)) { | |||
6804 | Error(OffsetLoc, "expected a 16-bit offset"); | |||
6805 | return false; | |||
6806 | } | |||
6807 | return true; | |||
6808 | } | |||
6809 | ||||
6810 | bool | |||
6811 | AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { | |||
6812 | using namespace llvm::AMDGPU::Swizzle; | |||
6813 | ||||
6814 | if (skipToken(AsmToken::LParen, "expected a left parentheses")) { | |||
6815 | ||||
6816 | SMLoc ModeLoc = getLoc(); | |||
6817 | bool Ok = false; | |||
6818 | ||||
6819 | if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { | |||
6820 | Ok = parseSwizzleQuadPerm(Imm); | |||
6821 | } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { | |||
6822 | Ok = parseSwizzleBitmaskPerm(Imm); | |||
6823 | } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { | |||
6824 | Ok = parseSwizzleBroadcast(Imm); | |||
6825 | } else if (trySkipId(IdSymbolic[ID_SWAP])) { | |||
6826 | Ok = parseSwizzleSwap(Imm); | |||
6827 | } else if (trySkipId(IdSymbolic[ID_REVERSE])) { | |||
6828 | Ok = parseSwizzleReverse(Imm); | |||
6829 | } else { | |||
6830 | Error(ModeLoc, "expected a swizzle mode"); | |||
6831 | } | |||
6832 | ||||
6833 | return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); | |||
6834 | } | |||
6835 | ||||
6836 | return false; | |||
6837 | } | |||
6838 | ||||
6839 | OperandMatchResultTy | |||
6840 | AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { | |||
6841 | SMLoc S = getLoc(); | |||
6842 | int64_t Imm = 0; | |||
6843 | ||||
6844 | if (trySkipId("offset")) { | |||
6845 | ||||
6846 | bool Ok = false; | |||
6847 | if (skipToken(AsmToken::Colon, "expected a colon")) { | |||
6848 | if (trySkipId("swizzle")) { | |||
6849 | Ok = parseSwizzleMacro(Imm); | |||
6850 | } else { | |||
6851 | Ok = parseSwizzleOffset(Imm); | |||
6852 | } | |||
6853 | } | |||
6854 | ||||
6855 | Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); | |||
6856 | ||||
6857 | return Ok? MatchOperand_Success : MatchOperand_ParseFail; | |||
6858 | } else { | |||
6859 | // Swizzle "offset" operand is optional. | |||
6860 | // If it is omitted, try parsing other optional operands. | |||
6861 | return parseOptionalOpr(Operands); | |||
6862 | } | |||
6863 | } | |||
6864 | ||||
6865 | bool | |||
6866 | AMDGPUOperand::isSwizzle() const { | |||
6867 | return isImmTy(ImmTySwizzle); | |||
6868 | } | |||
6869 | ||||
6870 | //===----------------------------------------------------------------------===// | |||
6871 | // VGPR Index Mode | |||
6872 | //===----------------------------------------------------------------------===// | |||
6873 | ||||
6874 | int64_t AMDGPUAsmParser::parseGPRIdxMacro() { | |||
6875 | ||||
6876 | using namespace llvm::AMDGPU::VGPRIndexMode; | |||
6877 | ||||
6878 | if (trySkipToken(AsmToken::RParen)) { | |||
6879 | return OFF; | |||
6880 | } | |||
6881 | ||||
6882 | int64_t Imm = 0; | |||
6883 | ||||
6884 | while (true) { | |||
6885 | unsigned Mode = 0; | |||
6886 | SMLoc S = getLoc(); | |||
6887 | ||||
6888 | for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { | |||
6889 | if (trySkipId(IdSymbolic[ModeId])) { | |||
6890 | Mode = 1 << ModeId; | |||
6891 | break; | |||
6892 | } | |||
6893 | } | |||
6894 | ||||
6895 | if (Mode == 0) { | |||
6896 | Error(S, (Imm == 0)? | |||
6897 | "expected a VGPR index mode or a closing parenthesis" : | |||
6898 | "expected a VGPR index mode"); | |||
6899 | return UNDEF; | |||
6900 | } | |||
6901 | ||||
6902 | if (Imm & Mode) { | |||
6903 | Error(S, "duplicate VGPR index mode"); | |||
6904 | return UNDEF; | |||
6905 | } | |||
6906 | Imm |= Mode; | |||
6907 | ||||
6908 | if (trySkipToken(AsmToken::RParen)) | |||
6909 | break; | |||
6910 | if (!skipToken(AsmToken::Comma, | |||
6911 | "expected a comma or a closing parenthesis")) | |||
6912 | return UNDEF; | |||
6913 | } | |||
6914 | ||||
6915 | return Imm; | |||
6916 | } | |||
6917 | ||||
6918 | OperandMatchResultTy | |||
6919 | AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { | |||
6920 | ||||
6921 | using namespace llvm::AMDGPU::VGPRIndexMode; | |||
6922 | ||||
6923 | int64_t Imm = 0; | |||
6924 | SMLoc S = getLoc(); | |||
6925 | ||||
6926 | if (trySkipId("gpr_idx", AsmToken::LParen)) { | |||
6927 | Imm = parseGPRIdxMacro(); | |||
6928 | if (Imm == UNDEF) | |||
6929 | return MatchOperand_ParseFail; | |||
6930 | } else { | |||
6931 | if (getParser().parseAbsoluteExpression(Imm)) | |||
6932 | return MatchOperand_ParseFail; | |||
6933 | if (Imm < 0 || !isUInt<4>(Imm)) { | |||
6934 | Error(S, "invalid immediate: only 4-bit values are legal"); | |||
6935 | return MatchOperand_ParseFail; | |||
6936 | } | |||
6937 | } | |||
6938 | ||||
6939 | Operands.push_back( | |||
6940 | AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); | |||
6941 | return MatchOperand_Success; | |||
6942 | } | |||
6943 | ||||
6944 | bool AMDGPUOperand::isGPRIdxMode() const { | |||
6945 | return isImmTy(ImmTyGprIdxMode); | |||
6946 | } | |||
6947 | ||||
6948 | //===----------------------------------------------------------------------===// | |||
6949 | // sopp branch targets | |||
6950 | //===----------------------------------------------------------------------===// | |||
6951 | ||||
6952 | OperandMatchResultTy | |||
6953 | AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { | |||
6954 | ||||
6955 | // Make sure we are not parsing something | |||
6956 | // that looks like a label or an expression but is not. | |||
6957 | // This will improve error messages. | |||
6958 | if (isRegister() || isModifier()) | |||
6959 | return MatchOperand_NoMatch; | |||
6960 | ||||
6961 | if (!parseExpr(Operands)) | |||
6962 | return MatchOperand_ParseFail; | |||
6963 | ||||
6964 | AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); | |||
6965 | assert(Opr.isImm() || Opr.isExpr())(static_cast<void> (0)); | |||
6966 | SMLoc Loc = Opr.getStartLoc(); | |||
6967 | ||||
6968 | // Currently we do not support arbitrary expressions as branch targets. | |||
6969 | // Only labels and absolute expressions are accepted. | |||
6970 | if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { | |||
6971 | Error(Loc, "expected an absolute expression or a label"); | |||
6972 | } else if (Opr.isImm() && !Opr.isS16Imm()) { | |||
6973 | Error(Loc, "expected a 16-bit signed jump offset"); | |||
6974 | } | |||
6975 | ||||
6976 | return MatchOperand_Success; | |||
6977 | } | |||
6978 | ||||
6979 | //===----------------------------------------------------------------------===// | |||
6980 | // Boolean holding registers | |||
6981 | //===----------------------------------------------------------------------===// | |||
6982 | ||||
6983 | OperandMatchResultTy | |||
6984 | AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { | |||
6985 | return parseReg(Operands); | |||
6986 | } | |||
6987 | ||||
6988 | //===----------------------------------------------------------------------===// | |||
6989 | // mubuf | |||
6990 | //===----------------------------------------------------------------------===// | |||
6991 | ||||
6992 | AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { | |||
6993 | return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); | |||
6994 | } | |||
6995 | ||||
6996 | void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, | |||
6997 | const OperandVector &Operands, | |||
6998 | bool IsAtomic, | |||
6999 | bool IsLds) { | |||
7000 | bool IsLdsOpcode = IsLds; | |||
7001 | bool HasLdsModifier = false; | |||
7002 | OptionalImmIndexMap OptionalIdx; | |||
7003 | unsigned FirstOperandIdx = 1; | |||
7004 | bool IsAtomicReturn = false; | |||
7005 | ||||
7006 | if (IsAtomic) { | |||
7007 | for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { | |||
7008 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); | |||
7009 | if (!Op.isCPol()) | |||
7010 | continue; | |||
7011 | IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; | |||
7012 | break; | |||
7013 | } | |||
7014 | ||||
7015 | if (!IsAtomicReturn) { | |||
7016 | int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); | |||
7017 | if (NewOpc != -1) | |||
7018 | Inst.setOpcode(NewOpc); | |||
7019 | } | |||
7020 | ||||
7021 | IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & | |||
7022 | SIInstrFlags::IsAtomicRet; | |||
7023 | } | |||
7024 | ||||
7025 | for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { | |||
7026 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); | |||
7027 | ||||
7028 | // Add the register arguments | |||
7029 | if (Op.isReg()) { | |||
7030 | Op.addRegOperands(Inst, 1); | |||
7031 | // Insert a tied src for atomic return dst. | |||
7032 | // This cannot be postponed as subsequent calls to | |||
7033 | // addImmOperands rely on correct number of MC operands. | |||
7034 | if (IsAtomicReturn && i == FirstOperandIdx) | |||
7035 | Op.addRegOperands(Inst, 1); | |||
7036 | continue; | |||
7037 | } | |||
7038 | ||||
7039 | // Handle the case where soffset is an immediate | |||
7040 | if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { | |||
7041 | Op.addImmOperands(Inst, 1); | |||
7042 | continue; | |||
7043 | } | |||
7044 | ||||
7045 | HasLdsModifier |= Op.isLDS(); | |||
7046 | ||||
7047 | // Handle tokens like 'offen' which are sometimes hard-coded into the | |||
7048 | // asm string. There are no MCInst operands for these. | |||
7049 | if (Op.isToken()) { | |||
7050 | continue; | |||
7051 | } | |||
7052 | assert(Op.isImm())(static_cast<void> (0)); | |||
7053 | ||||
7054 | // Handle optional arguments | |||
7055 | OptionalIdx[Op.getImmTy()] = i; | |||
7056 | } | |||
7057 | ||||
7058 | // This is a workaround for an llvm quirk which may result in an | |||
7059 | // incorrect instruction selection. Lds and non-lds versions of | |||
7060 | // MUBUF instructions are identical except that lds versions | |||
7061 | // have mandatory 'lds' modifier. However this modifier follows | |||
7062 | // optional modifiers and llvm asm matcher regards this 'lds' | |||
7063 | // modifier as an optional one. As a result, an lds version | |||
7064 | // of opcode may be selected even if it has no 'lds' modifier. | |||
7065 | if (IsLdsOpcode && !HasLdsModifier) { | |||
7066 | int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); | |||
7067 | if (NoLdsOpcode != -1) { // Got lds version - correct it. | |||
7068 | Inst.setOpcode(NoLdsOpcode); | |||
7069 | IsLdsOpcode = false; | |||
7070 | } | |||
7071 | } | |||
7072 | ||||
7073 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); | |||
7074 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); | |||
7075 | ||||
7076 | if (!IsLdsOpcode) { // tfe is not legal with lds opcodes | |||
7077 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); | |||
7078 | } | |||
7079 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); | |||
7080 | } | |||
7081 | ||||
7082 | void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { | |||
7083 | OptionalImmIndexMap OptionalIdx; | |||
7084 | ||||
7085 | for (unsigned i = 1, e = Operands.size(); i != e; ++i) { | |||
7086 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); | |||
7087 | ||||
7088 | // Add the register arguments | |||
7089 | if (Op.isReg()) { | |||
7090 | Op.addRegOperands(Inst, 1); | |||
7091 | continue; | |||
7092 | } | |||
7093 | ||||
7094 | // Handle the case where soffset is an immediate | |||
7095 | if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { | |||
7096 | Op.addImmOperands(Inst, 1); | |||
7097 | continue; | |||
7098 | } | |||
7099 | ||||
7100 | // Handle tokens like 'offen' which are sometimes hard-coded into the | |||
7101 | // asm string. There are no MCInst operands for these. | |||
7102 | if (Op.isToken()) { | |||
7103 | continue; | |||
7104 | } | |||
7105 | assert(Op.isImm())(static_cast<void> (0)); | |||
7106 | ||||
7107 | // Handle optional arguments | |||
7108 | OptionalIdx[Op.getImmTy()] = i; | |||
7109 | } | |||
7110 | ||||
7111 | addOptionalImmOperand(Inst, Operands, OptionalIdx, | |||
7112 | AMDGPUOperand::ImmTyOffset); | |||
7113 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); | |||
7114 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); | |||
7115 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); | |||
7116 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); | |||
7117 | } | |||
7118 | ||||
7119 | //===----------------------------------------------------------------------===// | |||
7120 | // mimg | |||
7121 | //===----------------------------------------------------------------------===// | |||
7122 | ||||
7123 | void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, | |||
7124 | bool IsAtomic) { | |||
7125 | unsigned I = 1; | |||
7126 | const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); | |||
7127 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { | |||
7128 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); | |||
7129 | } | |||
7130 | ||||
7131 | if (IsAtomic) { | |||
7132 | // Add src, same as dst | |||
7133 | assert(Desc.getNumDefs() == 1)(static_cast<void> (0)); | |||
7134 | ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); | |||
7135 | } | |||
7136 | ||||
7137 | OptionalImmIndexMap OptionalIdx; | |||
7138 | ||||
7139 | for (unsigned E = Operands.size(); I != E; ++I) { | |||
7140 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); | |||
7141 | ||||
7142 | // Add the register arguments | |||
7143 | if (Op.isReg()) { | |||
7144 | Op.addRegOperands(Inst, 1); | |||
7145 | } else if (Op.isImmModifier()) { | |||
7146 | OptionalIdx[Op.getImmTy()] = I; | |||
7147 | } else if (!Op.isToken()) { | |||
7148 | llvm_unreachable("unexpected operand type")__builtin_unreachable(); | |||
7149 | } | |||
7150 | } | |||
7151 | ||||
7152 | bool IsGFX10Plus = isGFX10Plus(); | |||
7153 | ||||
7154 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); | |||
7155 | if (IsGFX10Plus) | |||
7156 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); | |||
7157 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); | |||
7158 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); | |||
7159 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); | |||
7160 | if (IsGFX10Plus) | |||
7161 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); | |||
7162 | if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) | |||
7163 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); | |||
7164 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); | |||
7165 | if (!IsGFX10Plus) | |||
7166 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); | |||
7167 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); | |||
7168 | } | |||
7169 | ||||
7170 | void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { | |||
7171 | cvtMIMG(Inst, Operands, true); | |||
7172 | } | |||
7173 | ||||
7174 | void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { | |||
7175 | OptionalImmIndexMap OptionalIdx; | |||
7176 | bool IsAtomicReturn = false; | |||
7177 | ||||
7178 | for (unsigned i = 1, e = Operands.size(); i != e; ++i) { | |||
7179 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); | |||
7180 | if (!Op.isCPol()) | |||
7181 | continue; | |||
7182 | IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; | |||
7183 | break; | |||
7184 | } | |||
7185 | ||||
7186 | if (!IsAtomicReturn) { | |||
7187 | int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); | |||
7188 | if (NewOpc != -1) | |||
7189 | Inst.setOpcode(NewOpc); | |||
7190 | } | |||
7191 | ||||
7192 | IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & | |||
7193 | SIInstrFlags::IsAtomicRet; | |||
7194 | ||||
7195 | for (unsigned i = 1, e = Operands.size(); i != e; ++i) { | |||
7196 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); | |||
7197 | ||||
7198 | // Add the register arguments | |||
7199 | if (Op.isReg()) { | |||
7200 | Op.addRegOperands(Inst, 1); | |||
7201 | if (IsAtomicReturn && i == 1) | |||
7202 | Op.addRegOperands(Inst, 1); | |||
7203 | continue; | |||
7204 | } | |||
7205 | ||||
7206 | // Handle the case where soffset is an immediate | |||
7207 | if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { | |||
7208 | Op.addImmOperands(Inst, 1); | |||
7209 | continue; | |||
7210 | } | |||
7211 | ||||
7212 | // Handle tokens like 'offen' which are sometimes hard-coded into the | |||
7213 | // asm string. There are no MCInst operands for these. | |||
7214 | if (Op.isToken()) { | |||
7215 | continue; | |||
7216 | } | |||
7217 | assert(Op.isImm())(static_cast<void> (0)); | |||
7218 | ||||
7219 | // Handle optional arguments | |||
7220 | OptionalIdx[Op.getImmTy()] = i; | |||
7221 | } | |||
7222 | ||||
7223 | if ((int)Inst.getNumOperands() <= | |||
7224 | AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) | |||
7225 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); | |||
7226 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); | |||
7227 | } | |||
7228 | ||||
7229 | void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, | |||
7230 | const OperandVector &Operands) { | |||
7231 | for (unsigned I = 1; I < Operands.size(); ++I) { | |||
7232 | auto &Operand = (AMDGPUOperand &)*Operands[I]; | |||
7233 | if (Operand.isReg()) | |||
7234 | Operand.addRegOperands(Inst, 1); | |||
7235 | } | |||
7236 | ||||
7237 | Inst.addOperand(MCOperand::createImm(1)); // a16 | |||
7238 | } | |||
7239 | ||||
7240 | //===----------------------------------------------------------------------===// | |||
7241 | // smrd | |||
7242 | //===----------------------------------------------------------------------===// | |||
7243 | ||||
7244 | bool AMDGPUOperand::isSMRDOffset8() const { | |||
7245 | return isImm() && isUInt<8>(getImm()); | |||
7246 | } | |||
7247 | ||||
7248 | bool AMDGPUOperand::isSMEMOffset() const { | |||
7249 | return isImm(); // Offset range is checked later by validator. | |||
7250 | } | |||
7251 | ||||
7252 | bool AMDGPUOperand::isSMRDLiteralOffset() const { | |||
7253 | // 32-bit literals are only supported on CI and we only want to use them | |||
7254 | // when the offset is > 8-bits. | |||
7255 | return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); | |||
7256 | } | |||
7257 | ||||
7258 | AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { | |||
7259 | return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); | |||
7260 | } | |||
7261 | ||||
7262 | AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { | |||
7263 | return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); | |||
7264 | } | |||
7265 | ||||
7266 | AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { | |||
7267 | return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); | |||
7268 | } | |||
7269 | ||||
7270 | AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { | |||
7271 | return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); | |||
7272 | } | |||
7273 | ||||
7274 | //===----------------------------------------------------------------------===// | |||
7275 | // vop3 | |||
7276 | //===----------------------------------------------------------------------===// | |||
7277 | ||||
7278 | static bool ConvertOmodMul(int64_t &Mul) { | |||
7279 | if (Mul != 1 && Mul != 2 && Mul != 4) | |||
7280 | return false; | |||
7281 | ||||
7282 | Mul >>= 1; | |||
7283 | return true; | |||
7284 | } | |||
7285 | ||||
7286 | static bool ConvertOmodDiv(int64_t &Div) { | |||
7287 | if (Div == 1) { | |||
7288 | Div = 0; | |||
7289 | return true; | |||
7290 | } | |||
7291 | ||||
7292 | if (Div == 2) { | |||
7293 | Div = 3; | |||
7294 | return true; | |||
7295 | } | |||
7296 | ||||
7297 | return false; | |||
7298 | } | |||
7299 | ||||
7300 | // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. | |||
7301 | // This is intentional and ensures compatibility with sp3. | |||
7302 | // See bug 35397 for details. | |||
7303 | static bool ConvertBoundCtrl(int64_t &BoundCtrl) { | |||
7304 | if (BoundCtrl == 0 || BoundCtrl == 1) { | |||
7305 | BoundCtrl = 1; | |||
7306 | return true; | |||
7307 | } | |||
7308 | return false; | |||
7309 | } | |||
7310 | ||||
7311 | // Note: the order in this table matches the order of operands in AsmString. | |||
7312 | static const OptionalOperand AMDGPUOptionalOperandTable[] = { | |||
7313 | {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, | |||
7314 | {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, | |||
7315 | {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, | |||
7316 | {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, | |||
7317 | {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, | |||
7318 | {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, | |||
7319 | {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, | |||
7320 | {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, | |||
7321 | {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, | |||
7322 | {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, | |||
7323 | {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, | |||
7324 | {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, | |||
7325 | {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, | |||
7326 | {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, | |||
7327 | {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, | |||
7328 | {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, | |||
7329 | {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, | |||
7330 | {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, | |||
7331 | {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, | |||
7332 | {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, | |||
7333 | {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, | |||
7334 | {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, | |||
7335 | {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, | |||
7336 | {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, | |||
7337 | {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, | |||
7338 | {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, | |||
7339 | {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, | |||
7340 | {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, | |||
7341 | {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, | |||
7342 | {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, | |||
7343 | {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, | |||
7344 | {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, | |||
7345 | {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, | |||
7346 | {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, | |||
7347 | {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, | |||
7348 | {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, | |||
7349 | {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, | |||
7350 | {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, | |||
7351 | {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, | |||
7352 | {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, | |||
7353 | {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} | |||
7354 | }; | |||
7355 | ||||
7356 | void AMDGPUAsmParser::onBeginOfFile() { | |||
7357 | if (!getParser().getStreamer().getTargetStreamer() || | |||
7358 | getSTI().getTargetTriple().getArch() == Triple::r600) | |||
7359 | return; | |||
7360 | ||||
7361 | if (!getTargetStreamer().getTargetID()) | |||
7362 | getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); | |||
7363 | ||||
7364 | if (isHsaAbiVersion3Or4(&getSTI())) | |||
7365 | getTargetStreamer().EmitDirectiveAMDGCNTarget(); | |||
7366 | } | |||
7367 | ||||
7368 | OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { | |||
7369 | ||||
7370 | OperandMatchResultTy res = parseOptionalOpr(Operands); | |||
7371 | ||||
7372 | // This is a hack to enable hardcoded mandatory operands which follow | |||
7373 | // optional operands. | |||
7374 | // | |||
7375 | // Current design assumes that all operands after the first optional operand | |||
7376 | // are also optional. However implementation of some instructions violates | |||
7377 | // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). | |||
7378 | // | |||
7379 | // To alleviate this problem, we have to (implicitly) parse extra operands | |||
7380 | // to make sure autogenerated parser of custom operands never hit hardcoded | |||
7381 | // mandatory operands. | |||
7382 | ||||
7383 | for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { | |||
7384 | if (res != MatchOperand_Success || | |||
7385 | isToken(AsmToken::EndOfStatement)) | |||
7386 | break; | |||
7387 | ||||
7388 | trySkipToken(AsmToken::Comma); | |||
7389 | res = parseOptionalOpr(Operands); | |||
7390 | } | |||
7391 | ||||
7392 | return res; | |||
7393 | } | |||
7394 | ||||
7395 | OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { | |||
7396 | OperandMatchResultTy res; | |||
7397 | for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { | |||
7398 | // try to parse any optional operand here | |||
7399 | if (Op.IsBit) { | |||
7400 | res = parseNamedBit(Op.Name, Operands, Op.Type); | |||
7401 | } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { | |||
7402 | res = parseOModOperand(Operands); | |||
7403 | } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || | |||
7404 | Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || | |||
7405 | Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { | |||
7406 | res = parseSDWASel(Operands, Op.Name, Op.Type); | |||
7407 | } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { | |||
7408 | res = parseSDWADstUnused(Operands); | |||
7409 | } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || | |||
7410 | Op.Type == AMDGPUOperand::ImmTyOpSelHi || | |||
7411 | Op.Type == AMDGPUOperand::ImmTyNegLo || | |||
7412 | Op.Type == AMDGPUOperand::ImmTyNegHi) { | |||
7413 | res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, | |||
7414 | Op.ConvertResult); | |||
7415 | } else if (Op.Type == AMDGPUOperand::ImmTyDim) { | |||
7416 | res = parseDim(Operands); | |||
7417 | } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { | |||
7418 | res = parseCPol(Operands); | |||
7419 | } else { | |||
7420 | res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); | |||
7421 | } | |||
7422 | if (res != MatchOperand_NoMatch) { | |||
7423 | return res; | |||
7424 | } | |||
7425 | } | |||
7426 | return MatchOperand_NoMatch; | |||
7427 | } | |||
7428 | ||||
7429 | OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { | |||
7430 | StringRef Name = getTokenStr(); | |||
7431 | if (Name == "mul") { | |||
7432 | return parseIntWithPrefix("mul", Operands, | |||
7433 | AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); | |||
7434 | } | |||
7435 | ||||
7436 | if (Name == "div") { | |||
7437 | return parseIntWithPrefix("div", Operands, | |||
7438 | AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); | |||
7439 | } | |||
7440 | ||||
7441 | return MatchOperand_NoMatch; | |||
7442 | } | |||
7443 | ||||
7444 | void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { | |||
7445 | cvtVOP3P(Inst, Operands); | |||
7446 | ||||
7447 | int Opc = Inst.getOpcode(); | |||
7448 | ||||
7449 | int SrcNum; | |||
7450 | const int Ops[] = { AMDGPU::OpName::src0, | |||
7451 | AMDGPU::OpName::src1, | |||
7452 | AMDGPU::OpName::src2 }; | |||
7453 | for (SrcNum = 0; | |||
7454 | SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; | |||
7455 | ++SrcNum); | |||
7456 | assert(SrcNum > 0)(static_cast<void> (0)); | |||
7457 | ||||
7458 | int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); | |||
7459 | unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); | |||
7460 | ||||
7461 | if ((OpSel & (1 << SrcNum)) != 0) { | |||
7462 | int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); | |||
7463 | uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); | |||
7464 | Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); | |||
7465 | } | |||
7466 | } | |||
7467 | ||||
7468 | static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { | |||
7469 | // 1. This operand is input modifiers | |||
7470 | return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS | |||
7471 | // 2. This is not last operand | |||
7472 | && Desc.NumOperands > (OpNum + 1) | |||
7473 | // 3. Next operand is register class | |||
7474 | && Desc.OpInfo[OpNum + 1].RegClass != -1 | |||
7475 | // 4. Next register is not tied to any other operand | |||
7476 | && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; | |||
7477 | } | |||
7478 | ||||
7479 | void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) | |||
7480 | { | |||
7481 | OptionalImmIndexMap OptionalIdx; | |||
7482 | unsigned Opc = Inst.getOpcode(); | |||
7483 | ||||
7484 | unsigned I = 1; | |||
7485 | const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); | |||
7486 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { | |||
7487 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); | |||
7488 | } | |||
7489 | ||||
7490 | for (unsigned E = Operands.size(); I != E; ++I) { | |||
7491 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); | |||
7492 | if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { | |||
7493 | Op.addRegOrImmWithFPInputModsOperands(Inst, 2); | |||
7494 | } else if (Op.isInterpSlot() || | |||
7495 | Op.isInterpAttr() || | |||
7496 | Op.isAttrChan()) { | |||
7497 | Inst.addOperand(MCOperand::createImm(Op.getImm())); | |||
7498 | } else if (Op.isImmModifier()) { | |||
7499 | OptionalIdx[Op.getImmTy()] = I; | |||
7500 | } else { | |||
7501 | llvm_unreachable("unhandled operand type")__builtin_unreachable(); | |||
7502 | } | |||
7503 | } | |||
7504 | ||||
7505 | if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { | |||
7506 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); | |||
7507 | } | |||
7508 | ||||
7509 | if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { | |||
7510 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); | |||
7511 | } | |||
7512 | ||||
7513 | if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { | |||
7514 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); | |||
7515 | } | |||
7516 | } | |||
7517 | ||||
7518 | void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, | |||
7519 | OptionalImmIndexMap &OptionalIdx) { | |||
7520 | unsigned Opc = Inst.getOpcode(); | |||
7521 | ||||
7522 | unsigned I = 1; | |||
7523 | const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); | |||
7524 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { | |||
7525 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); | |||
7526 | } | |||
7527 | ||||
7528 | if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { | |||
7529 | // This instruction has src modifiers | |||
7530 | for (unsigned E = Operands.size(); I != E; ++I) { | |||
7531 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); | |||
7532 | if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { | |||
7533 | Op.addRegOrImmWithFPInputModsOperands(Inst, 2); | |||
7534 | } else if (Op.isImmModifier()) { | |||
7535 | OptionalIdx[Op.getImmTy()] = I; | |||
7536 | } else if (Op.isRegOrImm()) { | |||
7537 | Op.addRegOrImmOperands(Inst, 1); | |||
7538 | } else { | |||
7539 | llvm_unreachable("unhandled operand type")__builtin_unreachable(); | |||
7540 | } | |||
7541 | } | |||
7542 | } else { | |||
7543 | // No src modifiers | |||
7544 | for (unsigned E = Operands.size(); I != E; ++I) { | |||
7545 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); | |||
7546 | if (Op.isMod()) { | |||
7547 | OptionalIdx[Op.getImmTy()] = I; | |||
7548 | } else { | |||
7549 | Op.addRegOrImmOperands(Inst, 1); | |||
7550 | } | |||
7551 | } | |||
7552 | } | |||
7553 | ||||
7554 | if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { | |||
7555 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); | |||
7556 | } | |||
7557 | ||||
7558 | if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { | |||
7559 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); | |||
7560 | } | |||
7561 | ||||
7562 | // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): | |||
7563 | // it has src2 register operand that is tied to dst operand | |||
7564 | // we don't allow modifiers for this operand in assembler so src2_modifiers | |||
7565 | // should be 0. | |||
7566 | if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || | |||
7567 | Opc == AMDGPU::V_MAC_F32_e64_gfx10 || | |||
7568 | Opc == AMDGPU::V_MAC_F32_e64_vi || | |||
7569 | Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || | |||
7570 | Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || | |||
7571 | Opc == AMDGPU::V_MAC_F16_e64_vi || | |||
7572 | Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || | |||
7573 | Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || | |||
7574 | Opc == AMDGPU::V_FMAC_F32_e64_vi || | |||
7575 | Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || | |||
7576 | Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { | |||
7577 | auto it = Inst.begin(); | |||
7578 | std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); | |||
7579 | it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 | |||
7580 | ++it; | |||
7581 | // Copy the operand to ensure it's not invalidated when Inst grows. | |||
7582 | Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst | |||
7583 | } | |||
7584 | } | |||
7585 | ||||
7586 | void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { | |||
7587 | OptionalImmIndexMap OptionalIdx; | |||
7588 | cvtVOP3(Inst, Operands, OptionalIdx); | |||
7589 | } | |||
7590 | ||||
7591 | void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, | |||
7592 | OptionalImmIndexMap &OptIdx) { | |||
7593 | const int Opc = Inst.getOpcode(); | |||
7594 | const MCInstrDesc &Desc = MII.get(Opc); | |||
7595 | ||||
7596 | const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; | |||
7597 | ||||
7598 | if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { | |||
7599 | assert(!IsPacked)(static_cast<void> (0)); | |||
7600 | Inst.addOperand(Inst.getOperand(0)); | |||
7601 | } | |||
7602 | ||||
7603 | // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 | |||
7604 | // instruction, and then figure out where to actually put the modifiers | |||
7605 | ||||
7606 | int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); | |||
7607 | if (OpSelIdx != -1) { | |||
7608 | addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); | |||
7609 | } | |||
7610 | ||||
7611 | int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); | |||
7612 | if (OpSelHiIdx != -1) { | |||
7613 | int DefaultVal = IsPacked ? -1 : 0; | |||
7614 | addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, | |||
7615 | DefaultVal); | |||
7616 | } | |||
7617 | ||||
7618 | int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); | |||
7619 | if (NegLoIdx != -1) { | |||
7620 | addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); | |||
7621 | addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); | |||
7622 | } | |||
7623 | ||||
7624 | const int Ops[] = { AMDGPU::OpName::src0, | |||
7625 | AMDGPU::OpName::src1, | |||
7626 | AMDGPU::OpName::src2 }; | |||
7627 | const int ModOps[] = { AMDGPU::OpName::src0_modifiers, | |||
7628 | AMDGPU::OpName::src1_modifiers, | |||
7629 | AMDGPU::OpName::src2_modifiers }; | |||
7630 | ||||
7631 | unsigned OpSel = 0; | |||
7632 | unsigned OpSelHi = 0; | |||
7633 | unsigned NegLo = 0; | |||
7634 | unsigned NegHi = 0; | |||
7635 | ||||
7636 | if (OpSelIdx != -1) | |||
7637 | OpSel = Inst.getOperand(OpSelIdx).getImm(); | |||
7638 | ||||
7639 | if (OpSelHiIdx != -1) | |||
7640 | OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); | |||
7641 | ||||
7642 | if (NegLoIdx != -1) { | |||
7643 | int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); | |||
7644 | NegLo = Inst.getOperand(NegLoIdx).getImm(); | |||
7645 | NegHi = Inst.getOperand(NegHiIdx).getImm(); | |||
7646 | } | |||
7647 | ||||
7648 | for (int J = 0; J < 3; ++J) { | |||
7649 | int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); | |||
7650 | if (OpIdx == -1) | |||
7651 | break; | |||
7652 | ||||
7653 | uint32_t ModVal = 0; | |||
7654 | ||||
7655 | if ((OpSel & (1 << J)) != 0) | |||
7656 | ModVal |= SISrcMods::OP_SEL_0; | |||
7657 | ||||
7658 | if ((OpSelHi & (1 << J)) != 0) | |||
7659 | ModVal |= SISrcMods::OP_SEL_1; | |||
7660 | ||||
7661 | if ((NegLo & (1 << J)) != 0) | |||
7662 | ModVal |= SISrcMods::NEG; | |||
7663 | ||||
7664 | if ((NegHi & (1 << J)) != 0) | |||
7665 | ModVal |= SISrcMods::NEG_HI; | |||
7666 | ||||
7667 | int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); | |||
7668 | ||||
7669 | Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); | |||
7670 | } | |||
7671 | } | |||
7672 | ||||
7673 | void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { | |||
7674 | OptionalImmIndexMap OptIdx; | |||
7675 | cvtVOP3(Inst, Operands, OptIdx); | |||
7676 | cvtVOP3P(Inst, Operands, OptIdx); | |||
7677 | } | |||
7678 | ||||
7679 | //===----------------------------------------------------------------------===// | |||
7680 | // dpp | |||
7681 | //===----------------------------------------------------------------------===// | |||
7682 | ||||
7683 | bool AMDGPUOperand::isDPP8() const { | |||
7684 | return isImmTy(ImmTyDPP8); | |||
7685 | } | |||
7686 | ||||
7687 | bool AMDGPUOperand::isDPPCtrl() const { | |||
7688 | using namespace AMDGPU::DPP; | |||
7689 | ||||
7690 | bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); | |||
7691 | if (result) { | |||
7692 | int64_t Imm = getImm(); | |||
7693 | return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || | |||
7694 | (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || | |||
7695 | (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || | |||
7696 | (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || | |||
7697 | (Imm == DppCtrl::WAVE_SHL1) || | |||
7698 | (Imm == DppCtrl::WAVE_ROL1) || | |||
7699 | (Imm == DppCtrl::WAVE_SHR1) || | |||
7700 | (Imm == DppCtrl::WAVE_ROR1) || | |||
7701 | (Imm == DppCtrl::ROW_MIRROR) || | |||
7702 | (Imm == DppCtrl::ROW_HALF_MIRROR) || | |||
7703 | (Imm == DppCtrl::BCAST15) || | |||
7704 | (Imm == DppCtrl::BCAST31) || | |||
7705 | (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || | |||
7706 | (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); | |||
7707 | } | |||
7708 | return false; | |||
7709 | } | |||
7710 | ||||
7711 | //===----------------------------------------------------------------------===// | |||
7712 | // mAI | |||
7713 | //===----------------------------------------------------------------------===// | |||
7714 | ||||
7715 | bool AMDGPUOperand::isBLGP() const { | |||
7716 | return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); | |||
7717 | } | |||
7718 | ||||
7719 | bool AMDGPUOperand::isCBSZ() const { | |||
7720 | return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); | |||
7721 | } | |||
7722 | ||||
7723 | bool AMDGPUOperand::isABID() const { | |||
7724 | return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); | |||
7725 | } | |||
7726 | ||||
7727 | bool AMDGPUOperand::isS16Imm() const { | |||
7728 | return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); | |||
7729 | } | |||
7730 | ||||
7731 | bool AMDGPUOperand::isU16Imm() const { | |||
7732 | return isImm() && isUInt<16>(getImm()); | |||
7733 | } | |||
7734 | ||||
7735 | //===----------------------------------------------------------------------===// | |||
7736 | // dim | |||
7737 | //===----------------------------------------------------------------------===// | |||
7738 | ||||
7739 | bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { | |||
7740 | // We want to allow "dim:1D" etc., | |||
7741 | // but the initial 1 is tokenized as an integer. | |||
7742 | std::string Token; | |||
7743 | if (isToken(AsmToken::Integer)) { | |||
7744 | SMLoc Loc = getToken().getEndLoc(); | |||
7745 | Token = std::string(getTokenStr()); | |||
7746 | lex(); | |||
7747 | if (getLoc() != Loc) | |||
7748 | return false; | |||
7749 | } | |||
7750 | ||||
7751 | StringRef Suffix; | |||
7752 | if (!parseId(Suffix)) | |||
7753 | return false; | |||
7754 | Token += Suffix; | |||
7755 | ||||
7756 | StringRef DimId = Token; | |||
7757 | if (DimId.startswith("SQ_RSRC_IMG_")) | |||
7758 | DimId = DimId.drop_front(12); | |||
7759 | ||||
7760 | const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); | |||
7761 | if (!DimInfo) | |||
7762 | return false; | |||
7763 | ||||
7764 | Encoding = DimInfo->Encoding; | |||
7765 | return true; | |||
7766 | } | |||
7767 | ||||
7768 | OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { | |||
7769 | if (!isGFX10Plus()) | |||
7770 | return MatchOperand_NoMatch; | |||
7771 | ||||
7772 | SMLoc S = getLoc(); | |||
7773 | ||||
7774 | if (!trySkipId("dim", AsmToken::Colon)) | |||
7775 | return MatchOperand_NoMatch; | |||
7776 | ||||
7777 | unsigned Encoding; | |||
7778 | SMLoc Loc = getLoc(); | |||
7779 | if (!parseDimId(Encoding)) { | |||
7780 | Error(Loc, "invalid dim value"); | |||
7781 | return MatchOperand_ParseFail; | |||
7782 | } | |||
7783 | ||||
7784 | Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, | |||
7785 | AMDGPUOperand::ImmTyDim)); | |||
7786 | return MatchOperand_Success; | |||
7787 | } | |||
7788 | ||||
7789 | //===----------------------------------------------------------------------===// | |||
7790 | // dpp | |||
7791 | //===----------------------------------------------------------------------===// | |||
7792 | ||||
7793 | OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { | |||
7794 | SMLoc S = getLoc(); | |||
7795 | ||||
7796 | if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) | |||
7797 | return MatchOperand_NoMatch; | |||
7798 | ||||
7799 | // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] | |||
7800 | ||||
7801 | int64_t Sels[8]; | |||
7802 | ||||
7803 | if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) | |||
7804 | return MatchOperand_ParseFail; | |||
7805 | ||||
7806 | for (size_t i = 0; i < 8; ++i) { | |||
7807 | if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) | |||
7808 | return MatchOperand_ParseFail; | |||
7809 | ||||
7810 | SMLoc Loc = getLoc(); | |||
7811 | if (getParser().parseAbsoluteExpression(Sels[i])) | |||
7812 | return MatchOperand_ParseFail; | |||
7813 | if (0 > Sels[i] || 7 < Sels[i]) { | |||
7814 | Error(Loc, "expected a 3-bit value"); | |||
7815 | return MatchOperand_ParseFail; | |||
7816 | } | |||
7817 | } | |||
7818 | ||||
7819 | if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) | |||
7820 | return MatchOperand_ParseFail; | |||
7821 | ||||
7822 | unsigned DPP8 = 0; | |||
7823 | for (size_t i = 0; i < 8; ++i) | |||
7824 | DPP8 |= (Sels[i] << (i * 3)); | |||
7825 | ||||
7826 | Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); | |||
7827 | return MatchOperand_Success; | |||
7828 | } | |||
7829 | ||||
7830 | bool | |||
7831 | AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, | |||
7832 | const OperandVector &Operands) { | |||
7833 | if (Ctrl == "row_newbcast") | |||
7834 | return isGFX90A(); | |||
7835 | ||||
7836 | if (Ctrl == "row_share" || | |||
7837 | Ctrl == "row_xmask") | |||
7838 | return isGFX10Plus(); | |||
7839 | ||||
7840 | if (Ctrl == "wave_shl" || | |||
7841 | Ctrl == "wave_shr" || | |||
7842 | Ctrl == "wave_rol" || | |||
7843 | Ctrl == "wave_ror" || | |||
7844 | Ctrl == "row_bcast") | |||
7845 | return isVI() || isGFX9(); | |||
7846 | ||||
7847 | return Ctrl == "row_mirror" || | |||
7848 | Ctrl == "row_half_mirror" || | |||
7849 | Ctrl == "quad_perm" || | |||
7850 | Ctrl == "row_shl" || | |||
7851 | Ctrl == "row_shr" || | |||
7852 | Ctrl == "row_ror"; | |||
7853 | } | |||
7854 | ||||
7855 | int64_t | |||
7856 | AMDGPUAsmParser::parseDPPCtrlPerm() { | |||
7857 | // quad_perm:[%d,%d,%d,%d] | |||
7858 | ||||
7859 | if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) | |||
7860 | return -1; | |||
7861 | ||||
7862 | int64_t Val = 0; | |||
7863 | for (int i = 0; i < 4; ++i) { | |||
7864 | if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) | |||
7865 | return -1; | |||
7866 | ||||
7867 | int64_t Temp; | |||
7868 | SMLoc Loc = getLoc(); | |||
7869 | if (getParser().parseAbsoluteExpression(Temp)) | |||
7870 | return -1; | |||
7871 | if (Temp < 0 || Temp > 3) { | |||
7872 | Error(Loc, "expected a 2-bit value"); | |||
7873 | return -1; | |||
7874 | } | |||
7875 | ||||
7876 | Val += (Temp << i * 2); | |||
7877 | } | |||
7878 | ||||
7879 | if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) | |||
7880 | return -1; | |||
7881 | ||||
7882 | return Val; | |||
7883 | } | |||
7884 | ||||
7885 | int64_t | |||
7886 | AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { | |||
7887 | using namespace AMDGPU::DPP; | |||
7888 | ||||
7889 | // sel:%d | |||
7890 | ||||
7891 | int64_t Val; | |||
7892 | SMLoc Loc = getLoc(); | |||
7893 | ||||
7894 | if (getParser().parseAbsoluteExpression(Val)) | |||
7895 | return -1; | |||
7896 | ||||
7897 | struct DppCtrlCheck { | |||
7898 | int64_t Ctrl; | |||
7899 | int Lo; | |||
7900 | int Hi; | |||
7901 | }; | |||
7902 | ||||
7903 | DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) | |||
7904 | .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) | |||
7905 | .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) | |||
7906 | .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) | |||
7907 | .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) | |||
7908 | .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) | |||
7909 | .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) | |||
7910 | .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) | |||
7911 | .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) | |||
7912 | .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) | |||
7913 | .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) | |||
7914 | .Default({-1, 0, 0}); | |||
7915 | ||||
7916 | bool Valid; | |||
7917 | if (Check.Ctrl == -1) { | |||
7918 | Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); | |||
7919 | Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; | |||
7920 | } else { | |||
7921 | Valid = Check.Lo <= Val && Val <= Check.Hi; | |||
7922 | Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); | |||
7923 | } | |||
7924 | ||||
7925 | if (!Valid) { | |||
7926 | Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); | |||
7927 | return -1; | |||
7928 | } | |||
7929 | ||||
7930 | return Val; | |||
7931 | } | |||
7932 | ||||
7933 | OperandMatchResultTy | |||
7934 | AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { | |||
7935 | using namespace AMDGPU::DPP; | |||
7936 | ||||
7937 | if (!isToken(AsmToken::Identifier) || | |||
7938 | !isSupportedDPPCtrl(getTokenStr(), Operands)) | |||
7939 | return MatchOperand_NoMatch; | |||
7940 | ||||
7941 | SMLoc S = getLoc(); | |||
7942 | int64_t Val = -1; | |||
7943 | StringRef Ctrl; | |||
7944 | ||||
7945 | parseId(Ctrl); | |||
7946 | ||||
7947 | if (Ctrl == "row_mirror") { | |||
7948 | Val = DppCtrl::ROW_MIRROR; | |||
7949 | } else if (Ctrl == "row_half_mirror") { | |||
7950 | Val = DppCtrl::ROW_HALF_MIRROR; | |||
7951 | } else { | |||
7952 | if (skipToken(AsmToken::Colon, "expected a colon")) { | |||
7953 | if (Ctrl == "quad_perm") { | |||
7954 | Val = parseDPPCtrlPerm(); | |||
7955 | } else { | |||
7956 | Val = parseDPPCtrlSel(Ctrl); | |||
7957 | } | |||
7958 | } | |||
7959 | } | |||
7960 | ||||
7961 | if (Val == -1) | |||
7962 | return MatchOperand_ParseFail; | |||
7963 | ||||
7964 | Operands.push_back( | |||
7965 | AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); | |||
7966 | return MatchOperand_Success; | |||
7967 | } | |||
7968 | ||||
7969 | AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { | |||
7970 | return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); | |||
7971 | } | |||
7972 | ||||
7973 | AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { | |||
7974 | return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); | |||
7975 | } | |||
7976 | ||||
7977 | AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { | |||
7978 | return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); | |||
7979 | } | |||
7980 | ||||
7981 | AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { | |||
7982 | return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); | |||
7983 | } | |||
7984 | ||||
7985 | AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { | |||
7986 | return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); | |||
7987 | } | |||
7988 | ||||
7989 | void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { | |||
7990 | OptionalImmIndexMap OptionalIdx; | |||
7991 | ||||
7992 | unsigned Opc = Inst.getOpcode(); | |||
7993 | bool HasModifiers = | |||
7994 | AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; | |||
7995 | unsigned I = 1; | |||
7996 | const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); | |||
7997 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { | |||
7998 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); | |||
7999 | } | |||
8000 | ||||
8001 | int Fi = 0; | |||
8002 | for (unsigned E = Operands.size(); I != E; ++I) { | |||
8003 | auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), | |||
8004 | MCOI::TIED_TO); | |||
8005 | if (TiedTo != -1) { | |||
8006 | assert((unsigned)TiedTo < Inst.getNumOperands())(static_cast<void> (0)); | |||
8007 | // handle tied old or src2 for MAC instructions | |||
8008 | Inst.addOperand(Inst.getOperand(TiedTo)); | |||
8009 | } | |||
8010 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); | |||
8011 | // Add the register arguments | |||
8012 | if (Op.isReg() && validateVccOperand(Op.getReg())) { | |||
8013 | // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. | |||
8014 | // Skip it. | |||
8015 | continue; | |||
8016 | } | |||
8017 | ||||
8018 | if (IsDPP8) { | |||
8019 | if (Op.isDPP8()) { | |||
8020 | Op.addImmOperands(Inst, 1); | |||
8021 | } else if (HasModifiers && | |||
8022 | isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { | |||
8023 | Op.addRegWithFPInputModsOperands(Inst, 2); | |||
8024 | } else if (Op.isFI()) { | |||
8025 | Fi = Op.getImm(); | |||
8026 | } else if (Op.isReg()) { | |||
8027 | Op.addRegOperands(Inst, 1); | |||
8028 | } else { | |||
8029 | llvm_unreachable("Invalid operand type")__builtin_unreachable(); | |||
8030 | } | |||
8031 | } else { | |||
8032 | if (HasModifiers && | |||
8033 | isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { | |||
8034 | Op.addRegWithFPInputModsOperands(Inst, 2); | |||
8035 | } else if (Op.isReg()) { | |||
8036 | Op.addRegOperands(Inst, 1); | |||
8037 | } else if (Op.isDPPCtrl()) { | |||
8038 | Op.addImmOperands(Inst, 1); | |||
8039 | } else if (Op.isImm()) { | |||
8040 | // Handle optional arguments | |||
8041 | OptionalIdx[Op.getImmTy()] = I; | |||
8042 | } else { | |||
8043 | llvm_unreachable("Invalid operand type")__builtin_unreachable(); | |||
8044 | } | |||
8045 | } | |||
8046 | } | |||
8047 | ||||
8048 | if (IsDPP8) { | |||
8049 | using namespace llvm::AMDGPU::DPP; | |||
8050 | Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); | |||
8051 | } else { | |||
8052 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); | |||
8053 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); | |||
8054 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); | |||
8055 | if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { | |||
8056 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); | |||
8057 | } | |||
8058 | } | |||
8059 | } | |||
8060 | ||||
8061 | //===----------------------------------------------------------------------===// | |||
8062 | // sdwa | |||
8063 | //===----------------------------------------------------------------------===// | |||
8064 | ||||
8065 | OperandMatchResultTy | |||
8066 | AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, | |||
8067 | AMDGPUOperand::ImmTy Type) { | |||
8068 | using namespace llvm::AMDGPU::SDWA; | |||
8069 | ||||
8070 | SMLoc S = getLoc(); | |||
8071 | StringRef Value; | |||
8072 | OperandMatchResultTy res; | |||
8073 | ||||
8074 | SMLoc StringLoc; | |||
8075 | res = parseStringWithPrefix(Prefix, Value, StringLoc); | |||
8076 | if (res != MatchOperand_Success) { | |||
8077 | return res; | |||
8078 | } | |||
8079 | ||||
8080 | int64_t Int; | |||
8081 | Int = StringSwitch<int64_t>(Value) | |||
8082 | .Case("BYTE_0", SdwaSel::BYTE_0) | |||
8083 | .Case("BYTE_1", SdwaSel::BYTE_1) | |||
8084 | .Case("BYTE_2", SdwaSel::BYTE_2) | |||
8085 | .Case("BYTE_3", SdwaSel::BYTE_3) | |||
8086 | .Case("WORD_0", SdwaSel::WORD_0) | |||
8087 | .Case("WORD_1", SdwaSel::WORD_1) | |||
8088 | .Case("DWORD", SdwaSel::DWORD) | |||
8089 | .Default(0xffffffff); | |||
8090 | ||||
8091 | if (Int == 0xffffffff) { | |||
8092 | Error(StringLoc, "invalid " + Twine(Prefix) + " value"); | |||
8093 | return MatchOperand_ParseFail; | |||
8094 | } | |||
8095 | ||||
8096 | Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); | |||
8097 | return MatchOperand_Success; | |||
8098 | } | |||
8099 | ||||
8100 | OperandMatchResultTy | |||
8101 | AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { | |||
8102 | using namespace llvm::AMDGPU::SDWA; | |||
8103 | ||||
8104 | SMLoc S = getLoc(); | |||
8105 | StringRef Value; | |||
8106 | OperandMatchResultTy res; | |||
8107 | ||||
8108 | SMLoc StringLoc; | |||
8109 | res = parseStringWithPrefix("dst_unused", Value, StringLoc); | |||
8110 | if (res != MatchOperand_Success) { | |||
8111 | return res; | |||
8112 | } | |||
8113 | ||||
8114 | int64_t Int; | |||
8115 | Int = StringSwitch<int64_t>(Value) | |||
8116 | .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) | |||
8117 | .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) | |||
8118 | .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) | |||
8119 | .Default(0xffffffff); | |||
8120 | ||||
8121 | if (Int == 0xffffffff) { | |||
8122 | Error(StringLoc, "invalid dst_unused value"); | |||
8123 | return MatchOperand_ParseFail; | |||
8124 | } | |||
8125 | ||||
8126 | Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); | |||
8127 | return MatchOperand_Success; | |||
8128 | } | |||
8129 | ||||
8130 | void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { | |||
8131 | cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); | |||
8132 | } | |||
8133 | ||||
8134 | void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { | |||
8135 | cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); | |||
8136 | } | |||
8137 | ||||
8138 | void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { | |||
8139 | cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); | |||
8140 | } | |||
8141 | ||||
8142 | void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { | |||
8143 | cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); | |||
8144 | } | |||
8145 | ||||
8146 | void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { | |||
8147 | cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); | |||
8148 | } | |||
8149 | ||||
8150 | void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, | |||
8151 | uint64_t BasicInstType, | |||
8152 | bool SkipDstVcc, | |||
8153 | bool SkipSrcVcc) { | |||
8154 | using namespace llvm::AMDGPU::SDWA; | |||
8155 | ||||
8156 | OptionalImmIndexMap OptionalIdx; | |||
8157 | bool SkipVcc = SkipDstVcc || SkipSrcVcc; | |||
8158 | bool SkippedVcc = false; | |||
8159 | ||||
8160 | unsigned I = 1; | |||
8161 | const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); | |||
8162 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { | |||
8163 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); | |||
8164 | } | |||
8165 | ||||
8166 | for (unsigned E = Operands.size(); I != E; ++I) { | |||
8167 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); | |||
8168 | if (SkipVcc && !SkippedVcc && Op.isReg() && | |||
8169 | (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { | |||
8170 | // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. | |||
8171 | // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) | |||
8172 | // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. | |||
8173 | // Skip VCC only if we didn't skip it on previous iteration. | |||
8174 | // Note that src0 and src1 occupy 2 slots each because of modifiers. | |||
8175 | if (BasicInstType == SIInstrFlags::VOP2 && | |||
8176 | ((SkipDstVcc && Inst.getNumOperands() == 1) || | |||
8177 | (SkipSrcVcc && Inst.getNumOperands() == 5))) { | |||
8178 | SkippedVcc = true; | |||
8179 | continue; | |||
8180 | } else if (BasicInstType == SIInstrFlags::VOPC && | |||
8181 | Inst.getNumOperands() == 0) { | |||
8182 | SkippedVcc = true; | |||
8183 | continue; | |||
8184 | } | |||
8185 | } | |||
8186 | if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { | |||
8187 | Op.addRegOrImmWithInputModsOperands(Inst, 2); | |||
8188 | } else if (Op.isImm()) { | |||
8189 | // Handle optional arguments | |||
8190 | OptionalIdx[Op.getImmTy()] = I; | |||
8191 | } else { | |||
8192 | llvm_unreachable("Invalid operand type")__builtin_unreachable(); | |||
8193 | } | |||
8194 | SkippedVcc = false; | |||
8195 | } | |||
8196 | ||||
8197 | if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && | |||
8198 | Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && | |||
8199 | Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { | |||
8200 | // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments | |||
8201 | switch (BasicInstType) { | |||
8202 | case SIInstrFlags::VOP1: | |||
8203 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); | |||
8204 | if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { | |||
8205 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); | |||
8206 | } | |||
8207 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); | |||
8208 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); | |||
8209 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); | |||
8210 | break; | |||
8211 | ||||
8212 | case SIInstrFlags::VOP2: | |||
8213 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); | |||
8214 | if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { | |||
8215 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); | |||
8216 | } | |||
8217 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); | |||
8218 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); | |||
8219 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); | |||
8220 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); | |||
8221 | break; | |||
8222 | ||||
8223 | case SIInstrFlags::VOPC: | |||
8224 | if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) | |||
8225 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); | |||
8226 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); | |||
8227 | addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); | |||
8228 | break; | |||
8229 | ||||
8230 | default: | |||
8231 | llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed")__builtin_unreachable(); | |||
8232 | } | |||
8233 | } | |||
8234 | ||||
8235 | // special case v_mac_{f16, f32}: | |||
8236 | // it has src2 register operand that is tied to dst operand | |||
8237 | if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || | |||
8238 | Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { | |||
8239 | auto it = Inst.begin(); | |||
8240 | std::advance( | |||
8241 | it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); | |||
8242 | Inst.insert(it, Inst.getOperand(0)); // src2 = dst | |||
8243 | } | |||
8244 | } | |||
8245 | ||||
8246 | //===----------------------------------------------------------------------===// | |||
8247 | // mAI | |||
8248 | //===----------------------------------------------------------------------===// | |||
8249 | ||||
8250 | AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { | |||
8251 | return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); | |||
8252 | } | |||
8253 | ||||
8254 | AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { | |||
8255 | return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); | |||
8256 | } | |||
8257 | ||||
8258 | AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { | |||
8259 | return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); | |||
8260 | } | |||
8261 | ||||
8262 | /// Force static initialization. | |||
8263 | extern "C" LLVM_EXTERNAL_VISIBILITY__attribute__ ((visibility("default"))) void LLVMInitializeAMDGPUAsmParser() { | |||
8264 | RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); | |||
8265 | RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); | |||
8266 | } | |||
8267 | ||||
8268 | #define GET_REGISTER_MATCHER | |||
8269 | #define GET_MATCHER_IMPLEMENTATION | |||
8270 | #define GET_MNEMONIC_SPELL_CHECKER | |||
8271 | #define GET_MNEMONIC_CHECKER | |||
8272 | #include "AMDGPUGenAsmMatcher.inc" | |||
8273 | ||||
8274 | // This fuction should be defined after auto-generated include so that we have | |||
8275 | // MatchClassKind enum defined | |||
8276 | unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, | |||
8277 | unsigned Kind) { | |||
8278 | // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). | |||
8279 | // But MatchInstructionImpl() expects to meet token and fails to validate | |||
8280 | // operand. This method checks if we are given immediate operand but expect to | |||
8281 | // get corresponding token. | |||
8282 | AMDGPUOperand &Operand = (AMDGPUOperand&)Op; | |||
8283 | switch (Kind) { | |||
8284 | case MCK_addr64: | |||
8285 | return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; | |||
8286 | case MCK_gds: | |||
8287 | return Operand.isGDS() ? Match_Success : Match_InvalidOperand; | |||
8288 | case MCK_lds: | |||
8289 | return Operand.isLDS() ? Match_Success : Match_InvalidOperand; | |||
8290 | case MCK_idxen: | |||
8291 | return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; | |||
8292 | case MCK_offen: | |||
8293 | return Operand.isOffen() ? Match_Success : Match_InvalidOperand; | |||
8294 | case MCK_SSrcB32: | |||
8295 | // When operands have expression values, they will return true for isToken, | |||
8296 | // because it is not possible to distinguish between a token and an | |||
8297 | // expression at parse time. MatchInstructionImpl() will always try to | |||
8298 | // match an operand as a token, when isToken returns true, and when the | |||
8299 | // name of the expression is not a valid token, the match will fail, | |||
8300 | // so we need to handle it here. | |||
8301 | return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; | |||
8302 | case MCK_SSrcF32: | |||
8303 | return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; | |||
8304 | case MCK_SoppBrTarget: | |||
8305 | return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; | |||
8306 | case MCK_VReg32OrOff: | |||
8307 | return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; | |||
8308 | case MCK_InterpSlot: | |||
8309 | return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; | |||
8310 | case MCK_Attr: | |||
8311 | return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; | |||
8312 | case MCK_AttrChan: | |||
8313 | return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; | |||
8314 | case MCK_ImmSMEMOffset: | |||
8315 | return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; | |||
8316 | case MCK_SReg_64: | |||
8317 | case MCK_SReg_64_XEXEC: | |||
8318 | // Null is defined as a 32-bit register but | |||
8319 | // it should also be enabled with 64-bit operands. | |||
8320 | // The following code enables it for SReg_64 operands | |||
8321 | // used as source and destination. Remaining source | |||
8322 | // operands are handled in isInlinableImm. | |||
8323 | return Operand.isNull() ? Match_Success : Match_InvalidOperand; | |||
8324 | default: | |||
8325 | return Match_InvalidOperand; | |||
8326 | } | |||
8327 | } | |||
8328 | ||||
8329 | //===----------------------------------------------------------------------===// | |||
8330 | // endpgm | |||
8331 | //===----------------------------------------------------------------------===// | |||
8332 | ||||
8333 | OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { | |||
8334 | SMLoc S = getLoc(); | |||
8335 | int64_t Imm = 0; | |||
8336 | ||||
8337 | if (!parseExpr(Imm)) { | |||
8338 | // The operand is optional, if not present default to 0 | |||
8339 | Imm = 0; | |||
8340 | } | |||
8341 | ||||
8342 | if (!isUInt<16>(Imm)) { | |||
8343 | Error(S, "expected a 16-bit value"); | |||
8344 | return MatchOperand_ParseFail; | |||
8345 | } | |||
8346 | ||||
8347 | Operands.push_back( | |||
8348 | AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); | |||
8349 | return MatchOperand_Success; | |||
8350 | } | |||
8351 | ||||
8352 | bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } |
1 | // RB tree implementation -*- C++ -*- |
2 | |
3 | // Copyright (C) 2001-2020 Free Software Foundation, Inc. |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free |
6 | // software; you can redistribute it and/or modify it under the |
7 | // terms of the GNU General Public License as published by the |
8 | // Free Software Foundation; either version 3, or (at your option) |
9 | // any later version. |
10 | |
11 | // This library is distributed in the hope that it will be useful, |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | // GNU General Public License for more details. |
15 | |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version |
18 | // 3.1, as published by the Free Software Foundation. |
19 | |
20 | // You should have received a copy of the GNU General Public License and |
21 | // a copy of the GCC Runtime Library Exception along with this program; |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
23 | // <http://www.gnu.org/licenses/>. |
24 | |
25 | /* |
26 | * |
27 | * Copyright (c) 1996,1997 |
28 | * Silicon Graphics Computer Systems, Inc. |
29 | * |
30 | * Permission to use, copy, modify, distribute and sell this software |
31 | * and its documentation for any purpose is hereby granted without fee, |
32 | * provided that the above copyright notice appear in all copies and |
33 | * that both that copyright notice and this permission notice appear |
34 | * in supporting documentation. Silicon Graphics makes no |
35 | * representations about the suitability of this software for any |
36 | * purpose. It is provided "as is" without express or implied warranty. |
37 | * |
38 | * |
39 | * Copyright (c) 1994 |
40 | * Hewlett-Packard Company |
41 | * |
42 | * Permission to use, copy, modify, distribute and sell this software |
43 | * and its documentation for any purpose is hereby granted without fee, |
44 | * provided that the above copyright notice appear in all copies and |
45 | * that both that copyright notice and this permission notice appear |
46 | * in supporting documentation. Hewlett-Packard Company makes no |
47 | * representations about the suitability of this software for any |
48 | * purpose. It is provided "as is" without express or implied warranty. |
49 | * |
50 | * |
51 | */ |
52 | |
53 | /** @file bits/stl_tree.h |
54 | * This is an internal header file, included by other library headers. |
55 | * Do not attempt to use it directly. @headername{map,set} |
56 | */ |
57 | |
58 | #ifndef _STL_TREE_H1 |
59 | #define _STL_TREE_H1 1 |
60 | |
61 | #pragma GCC system_header |
62 | |
63 | #include <bits/stl_algobase.h> |
64 | #include <bits/allocator.h> |
65 | #include <bits/stl_function.h> |
66 | #include <bits/cpp_type_traits.h> |
67 | #include <ext/alloc_traits.h> |
68 | #if __cplusplus201402L >= 201103L |
69 | # include <ext/aligned_buffer.h> |
70 | #endif |
71 | #if __cplusplus201402L > 201402L |
72 | # include <bits/node_handle.h> |
73 | #endif |
74 | |
75 | namespace std _GLIBCXX_VISIBILITY(default)__attribute__ ((__visibility__ ("default"))) |
76 | { |
77 | _GLIBCXX_BEGIN_NAMESPACE_VERSION |
78 | |
79 | #if __cplusplus201402L > 201103L |
80 | # define __cpp_lib_generic_associative_lookup201304 201304 |
81 | #endif |
82 | |
83 | // Red-black tree class, designed for use in implementing STL |
84 | // associative containers (set, multiset, map, and multimap). The |
85 | // insertion and deletion algorithms are based on those in Cormen, |
86 | // Leiserson, and Rivest, Introduction to Algorithms (MIT Press, |
87 | // 1990), except that |
88 | // |
89 | // (1) the header cell is maintained with links not only to the root |
90 | // but also to the leftmost node of the tree, to enable constant |
91 | // time begin(), and to the rightmost node of the tree, to enable |
92 | // linear time performance when used with the generic set algorithms |
93 | // (set_union, etc.) |
94 | // |
95 | // (2) when a node being deleted has two children its successor node |
96 | // is relinked into its place, rather than copied, so that the only |
97 | // iterators invalidated are those referring to the deleted node. |
98 | |
99 | enum _Rb_tree_color { _S_red = false, _S_black = true }; |
100 | |
101 | struct _Rb_tree_node_base |
102 | { |
103 | typedef _Rb_tree_node_base* _Base_ptr; |
104 | typedef const _Rb_tree_node_base* _Const_Base_ptr; |
105 | |
106 | _Rb_tree_color _M_color; |
107 | _Base_ptr _M_parent; |
108 | _Base_ptr _M_left; |
109 | _Base_ptr _M_right; |
110 | |
111 | static _Base_ptr |
112 | _S_minimum(_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept |
113 | { |
114 | while (__x->_M_left != 0) __x = __x->_M_left; |
115 | return __x; |
116 | } |
117 | |
118 | static _Const_Base_ptr |
119 | _S_minimum(_Const_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept |
120 | { |
121 | while (__x->_M_left != 0) __x = __x->_M_left; |
122 | return __x; |
123 | } |
124 | |
125 | static _Base_ptr |
126 | _S_maximum(_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept |
127 | { |
128 | while (__x->_M_right != 0) __x = __x->_M_right; |
129 | return __x; |
130 | } |
131 | |
132 | static _Const_Base_ptr |
133 | _S_maximum(_Const_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept |
134 | { |
135 | while (__x->_M_right != 0) __x = __x->_M_right; |
136 | return __x; |
137 | } |
138 | }; |
139 | |
140 | // Helper type offering value initialization guarantee on the compare functor. |
141 | template<typename _Key_compare> |
142 | struct _Rb_tree_key_compare |
143 | { |
144 | _Key_compare _M_key_compare; |
145 | |
146 | _Rb_tree_key_compare() |
147 | _GLIBCXX_NOEXCEPT_IF(noexcept(is_nothrow_default_constructible<_Key_compare> ::value) |
148 | is_nothrow_default_constructible<_Key_compare>::value)noexcept(is_nothrow_default_constructible<_Key_compare> ::value) |
149 | : _M_key_compare() |
150 | { } |
151 | |
152 | _Rb_tree_key_compare(const _Key_compare& __comp) |
153 | : _M_key_compare(__comp) |
154 | { } |
155 | |
156 | #if __cplusplus201402L >= 201103L |
157 | // Copy constructor added for consistency with C++98 mode. |
158 | _Rb_tree_key_compare(const _Rb_tree_key_compare&) = default; |
159 | |
160 | _Rb_tree_key_compare(_Rb_tree_key_compare&& __x) |
161 | noexcept(is_nothrow_copy_constructible<_Key_compare>::value) |
162 | : _M_key_compare(__x._M_key_compare) |
163 | { } |
164 | #endif |
165 | }; |
166 | |
167 | // Helper type to manage default initialization of node count and header. |
168 | struct _Rb_tree_header |
169 | { |
170 | _Rb_tree_node_base _M_header; |
171 | size_t _M_node_count; // Keeps track of size of tree. |
172 | |
173 | _Rb_tree_header() _GLIBCXX_NOEXCEPTnoexcept |
174 | { |
175 | _M_header._M_color = _S_red; |
176 | _M_reset(); |
177 | } |
178 | |
179 | #if __cplusplus201402L >= 201103L |
180 | _Rb_tree_header(_Rb_tree_header&& __x) noexcept |
181 | { |
182 | if (__x._M_header._M_parent != nullptr) |
183 | _M_move_data(__x); |
184 | else |
185 | { |
186 | _M_header._M_color = _S_red; |
187 | _M_reset(); |
188 | } |
189 | } |
190 | #endif |
191 | |
192 | void |
193 | _M_move_data(_Rb_tree_header& __from) |
194 | { |
195 | _M_header._M_color = __from._M_header._M_color; |
196 | _M_header._M_parent = __from._M_header._M_parent; |
197 | _M_header._M_left = __from._M_header._M_left; |
198 | _M_header._M_right = __from._M_header._M_right; |
199 | _M_header._M_parent->_M_parent = &_M_header; |
200 | _M_node_count = __from._M_node_count; |
201 | |
202 | __from._M_reset(); |
203 | } |
204 | |
205 | void |
206 | _M_reset() |
207 | { |
208 | _M_header._M_parent = 0; |
209 | _M_header._M_left = &_M_header; |
210 | _M_header._M_right = &_M_header; |
211 | _M_node_count = 0; |
212 | } |
213 | }; |
214 | |
215 | template<typename _Val> |
216 | struct _Rb_tree_node : public _Rb_tree_node_base |
217 | { |
218 | typedef _Rb_tree_node<_Val>* _Link_type; |
219 | |
220 | #if __cplusplus201402L < 201103L |
221 | _Val _M_value_field; |
222 | |
223 | _Val* |
224 | _M_valptr() |
225 | { return std::__addressof(_M_value_field); } |
226 | |
227 | const _Val* |
228 | _M_valptr() const |
229 | { return std::__addressof(_M_value_field); } |
230 | #else |
231 | __gnu_cxx::__aligned_membuf<_Val> _M_storage; |
232 | |
233 | _Val* |
234 | _M_valptr() |
235 | { return _M_storage._M_ptr(); } |
236 | |
237 | const _Val* |
238 | _M_valptr() const |
239 | { return _M_storage._M_ptr(); } |
240 | #endif |
241 | }; |
242 | |
243 | _GLIBCXX_PURE__attribute__ ((__pure__)) _Rb_tree_node_base* |
244 | _Rb_tree_increment(_Rb_tree_node_base* __x) throw (); |
245 | |
246 | _GLIBCXX_PURE__attribute__ ((__pure__)) const _Rb_tree_node_base* |
247 | _Rb_tree_increment(const _Rb_tree_node_base* __x) throw (); |
248 | |
249 | _GLIBCXX_PURE__attribute__ ((__pure__)) _Rb_tree_node_base* |
250 | _Rb_tree_decrement(_Rb_tree_node_base* __x) throw (); |
251 | |
252 | _GLIBCXX_PURE__attribute__ ((__pure__)) const _Rb_tree_node_base* |
253 | _Rb_tree_decrement(const _Rb_tree_node_base* __x) throw (); |
254 | |
255 | template<typename _Tp> |
256 | struct _Rb_tree_iterator |
257 | { |
258 | typedef _Tp value_type; |
259 | typedef _Tp& reference; |
260 | typedef _Tp* pointer; |
261 | |
262 | typedef bidirectional_iterator_tag iterator_category; |
263 | typedef ptrdiff_t difference_type; |
264 | |
265 | typedef _Rb_tree_iterator<_Tp> _Self; |
266 | typedef _Rb_tree_node_base::_Base_ptr _Base_ptr; |
267 | typedef _Rb_tree_node<_Tp>* _Link_type; |
268 | |
269 | _Rb_tree_iterator() _GLIBCXX_NOEXCEPTnoexcept |
270 | : _M_node() { } |
271 | |
272 | explicit |
273 | _Rb_tree_iterator(_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept |
274 | : _M_node(__x) { } |
275 | |
276 | reference |
277 | operator*() const _GLIBCXX_NOEXCEPTnoexcept |
278 | { return *static_cast<_Link_type>(_M_node)->_M_valptr(); } |
279 | |
280 | pointer |
281 | operator->() const _GLIBCXX_NOEXCEPTnoexcept |
282 | { return static_cast<_Link_type> (_M_node)->_M_valptr(); } |
283 | |
284 | _Self& |
285 | operator++() _GLIBCXX_NOEXCEPTnoexcept |
286 | { |
287 | _M_node = _Rb_tree_increment(_M_node); |
288 | return *this; |
289 | } |
290 | |
291 | _Self |
292 | operator++(int) _GLIBCXX_NOEXCEPTnoexcept |
293 | { |
294 | _Self __tmp = *this; |
295 | _M_node = _Rb_tree_increment(_M_node); |
296 | return __tmp; |
297 | } |
298 | |
299 | _Self& |
300 | operator--() _GLIBCXX_NOEXCEPTnoexcept |
301 | { |
302 | _M_node = _Rb_tree_decrement(_M_node); |
303 | return *this; |
304 | } |
305 | |
306 | _Self |
307 | operator--(int) _GLIBCXX_NOEXCEPTnoexcept |
308 | { |
309 | _Self __tmp = *this; |
310 | _M_node = _Rb_tree_decrement(_M_node); |
311 | return __tmp; |
312 | } |
313 | |
314 | friend bool |
315 | operator==(const _Self& __x, const _Self& __y) _GLIBCXX_NOEXCEPTnoexcept |
316 | { return __x._M_node == __y._M_node; } |
317 | |
318 | #if ! __cpp_lib_three_way_comparison |
319 | friend bool |
320 | operator!=(const _Self& __x, const _Self& __y) _GLIBCXX_NOEXCEPTnoexcept |
321 | { return __x._M_node != __y._M_node; } |
322 | #endif |
323 | |
324 | _Base_ptr _M_node; |
325 | }; |
326 | |
327 | template<typename _Tp> |
328 | struct _Rb_tree_const_iterator |
329 | { |
330 | typedef _Tp value_type; |
331 | typedef const _Tp& reference; |
332 | typedef const _Tp* pointer; |
333 | |
334 | typedef _Rb_tree_iterator<_Tp> iterator; |
335 | |
336 | typedef bidirectional_iterator_tag iterator_category; |
337 | typedef ptrdiff_t difference_type; |
338 | |
339 | typedef _Rb_tree_const_iterator<_Tp> _Self; |
340 | typedef _Rb_tree_node_base::_Const_Base_ptr _Base_ptr; |
341 | typedef const _Rb_tree_node<_Tp>* _Link_type; |
342 | |
343 | _Rb_tree_const_iterator() _GLIBCXX_NOEXCEPTnoexcept |
344 | : _M_node() { } |
345 | |
346 | explicit |
347 | _Rb_tree_const_iterator(_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept |
348 | : _M_node(__x) { } |
349 | |
350 | _Rb_tree_const_iterator(const iterator& __it) _GLIBCXX_NOEXCEPTnoexcept |
351 | : _M_node(__it._M_node) { } |
352 | |
353 | iterator |
354 | _M_const_cast() const _GLIBCXX_NOEXCEPTnoexcept |
355 | { return iterator(const_cast<typename iterator::_Base_ptr>(_M_node)); } |
356 | |
357 | reference |
358 | operator*() const _GLIBCXX_NOEXCEPTnoexcept |
359 | { return *static_cast<_Link_type>(_M_node)->_M_valptr(); } |
360 | |
361 | pointer |
362 | operator->() const _GLIBCXX_NOEXCEPTnoexcept |
363 | { return static_cast<_Link_type>(_M_node)->_M_valptr(); } |
364 | |
365 | _Self& |
366 | operator++() _GLIBCXX_NOEXCEPTnoexcept |
367 | { |
368 | _M_node = _Rb_tree_increment(_M_node); |
369 | return *this; |
370 | } |
371 | |
372 | _Self |
373 | operator++(int) _GLIBCXX_NOEXCEPTnoexcept |
374 | { |
375 | _Self __tmp = *this; |
376 | _M_node = _Rb_tree_increment(_M_node); |
377 | return __tmp; |
378 | } |
379 | |
380 | _Self& |
381 | operator--() _GLIBCXX_NOEXCEPTnoexcept |
382 | { |
383 | _M_node = _Rb_tree_decrement(_M_node); |
384 | return *this; |
385 | } |
386 | |
387 | _Self |
388 | operator--(int) _GLIBCXX_NOEXCEPTnoexcept |
389 | { |
390 | _Self __tmp = *this; |
391 | _M_node = _Rb_tree_decrement(_M_node); |
392 | return __tmp; |
393 | } |
394 | |
395 | friend bool |
396 | operator==(const _Self& __x, const _Self& __y) _GLIBCXX_NOEXCEPTnoexcept |
397 | { return __x._M_node == __y._M_node; } |
398 | |
399 | #if ! __cpp_lib_three_way_comparison |
400 | friend bool |
401 | operator!=(const _Self& __x, const _Self& __y) _GLIBCXX_NOEXCEPTnoexcept |
402 | { return __x._M_node != __y._M_node; } |
403 | #endif |
404 | |
405 | _Base_ptr _M_node; |
406 | }; |
407 | |
408 | void |
409 | _Rb_tree_insert_and_rebalance(const bool __insert_left, |
410 | _Rb_tree_node_base* __x, |
411 | _Rb_tree_node_base* __p, |
412 | _Rb_tree_node_base& __header) throw (); |
413 | |
414 | _Rb_tree_node_base* |
415 | _Rb_tree_rebalance_for_erase(_Rb_tree_node_base* const __z, |
416 | _Rb_tree_node_base& __header) throw (); |
417 | |
418 | #if __cplusplus201402L >= 201402L |
419 | template<typename _Cmp, typename _SfinaeType, typename = __void_t<>> |
420 | struct __has_is_transparent |
421 | { }; |
422 | |
423 | template<typename _Cmp, typename _SfinaeType> |
424 | struct __has_is_transparent<_Cmp, _SfinaeType, |
425 | __void_t<typename _Cmp::is_transparent>> |
426 | { typedef void type; }; |
427 | |
428 | template<typename _Cmp, typename _SfinaeType> |
429 | using __has_is_transparent_t |
430 | = typename __has_is_transparent<_Cmp, _SfinaeType>::type; |
431 | #endif |
432 | |
433 | #if __cplusplus201402L > 201402L |
434 | template<typename _Tree1, typename _Cmp2> |
435 | struct _Rb_tree_merge_helper { }; |
436 | #endif |
437 | |
438 | template<typename _Key, typename _Val, typename _KeyOfValue, |
439 | typename _Compare, typename _Alloc = allocator<_Val> > |
440 | class _Rb_tree |
441 | { |
442 | typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template |
443 | rebind<_Rb_tree_node<_Val> >::other _Node_allocator; |
444 | |
445 | typedef __gnu_cxx::__alloc_traits<_Node_allocator> _Alloc_traits; |
446 | |
447 | protected: |
448 | typedef _Rb_tree_node_base* _Base_ptr; |
449 | typedef const _Rb_tree_node_base* _Const_Base_ptr; |
450 | typedef _Rb_tree_node<_Val>* _Link_type; |
451 | typedef const _Rb_tree_node<_Val>* _Const_Link_type; |
452 | |
453 | private: |
454 | // Functor recycling a pool of nodes and using allocation once the pool |
455 | // is empty. |
456 | struct _Reuse_or_alloc_node |
457 | { |
458 | _Reuse_or_alloc_node(_Rb_tree& __t) |
459 | : _M_root(__t._M_root()), _M_nodes(__t._M_rightmost()), _M_t(__t) |
460 | { |
461 | if (_M_root) |
462 | { |
463 | _M_root->_M_parent = 0; |
464 | |
465 | if (_M_nodes->_M_left) |
466 | _M_nodes = _M_nodes->_M_left; |
467 | } |
468 | else |
469 | _M_nodes = 0; |
470 | } |
471 | |
472 | #if __cplusplus201402L >= 201103L |
473 | _Reuse_or_alloc_node(const _Reuse_or_alloc_node&) = delete; |
474 | #endif |
475 | |
476 | ~_Reuse_or_alloc_node() |
477 | { _M_t._M_erase(static_cast<_Link_type>(_M_root)); } |
478 | |
479 | template<typename _Arg> |
480 | _Link_type |
481 | #if __cplusplus201402L < 201103L |
482 | operator()(const _Arg& __arg) |
483 | #else |
484 | operator()(_Arg&& __arg) |
485 | #endif |
486 | { |
487 | _Link_type __node = static_cast<_Link_type>(_M_extract()); |
488 | if (__node) |
489 | { |
490 | _M_t._M_destroy_node(__node); |
491 | _M_t._M_construct_node(__node, _GLIBCXX_FORWARD(_Arg, __arg)std::forward<_Arg>(__arg)); |
492 | return __node; |
493 | } |
494 | |
495 | return _M_t._M_create_node(_GLIBCXX_FORWARD(_Arg, __arg)std::forward<_Arg>(__arg)); |
496 | } |
497 | |
498 | private: |
499 | _Base_ptr |
500 | _M_extract() |
501 | { |
502 | if (!_M_nodes) |
503 | return _M_nodes; |
504 | |
505 | _Base_ptr __node = _M_nodes; |
506 | _M_nodes = _M_nodes->_M_parent; |
507 | if (_M_nodes) |
508 | { |
509 | if (_M_nodes->_M_right == __node) |
510 | { |
511 | _M_nodes->_M_right = 0; |
512 | |
513 | if (_M_nodes->_M_left) |
514 | { |
515 | _M_nodes = _M_nodes->_M_left; |
516 | |
517 | while (_M_nodes->_M_right) |
518 | _M_nodes = _M_nodes->_M_right; |
519 | |
520 | if (_M_nodes->_M_left) |
521 | _M_nodes = _M_nodes->_M_left; |
522 | } |
523 | } |
524 | else // __node is on the left. |
525 | _M_nodes->_M_left = 0; |
526 | } |
527 | else |
528 | _M_root = 0; |
529 | |
530 | return __node; |
531 | } |
532 | |
533 | _Base_ptr _M_root; |
534 | _Base_ptr _M_nodes; |
535 | _Rb_tree& _M_t; |
536 | }; |
537 | |
538 | // Functor similar to the previous one but without any pool of nodes to |
539 | // recycle. |
540 | struct _Alloc_node |
541 | { |
542 | _Alloc_node(_Rb_tree& __t) |
543 | : _M_t(__t) { } |
544 | |
545 | template<typename _Arg> |
546 | _Link_type |
547 | #if __cplusplus201402L < 201103L |
548 | operator()(const _Arg& __arg) const |
549 | #else |
550 | operator()(_Arg&& __arg) const |
551 | #endif |
552 | { return _M_t._M_create_node(_GLIBCXX_FORWARD(_Arg, __arg)std::forward<_Arg>(__arg)); } |
553 | |
554 | private: |
555 | _Rb_tree& _M_t; |
556 | }; |
557 | |
558 | public: |
559 | typedef _Key key_type; |
560 | typedef _Val value_type; |
561 | typedef value_type* pointer; |
562 | typedef const value_type* const_pointer; |
563 | typedef value_type& reference; |
564 | typedef const value_type& const_reference; |
565 | typedef size_t size_type; |
566 | typedef ptrdiff_t difference_type; |
567 | typedef _Alloc allocator_type; |
568 | |
569 | _Node_allocator& |
570 | _M_get_Node_allocator() _GLIBCXX_NOEXCEPTnoexcept |
571 | { return this->_M_impl; } |
572 | |
573 | const _Node_allocator& |
574 | _M_get_Node_allocator() const _GLIBCXX_NOEXCEPTnoexcept |
575 | { return this->_M_impl; } |
576 | |
577 | allocator_type |
578 | get_allocator() const _GLIBCXX_NOEXCEPTnoexcept |
579 | { return allocator_type(_M_get_Node_allocator()); } |
580 | |
581 | protected: |
582 | _Link_type |
583 | _M_get_node() |
584 | { return _Alloc_traits::allocate(_M_get_Node_allocator(), 1); } |
585 | |
586 | void |
587 | _M_put_node(_Link_type __p) _GLIBCXX_NOEXCEPTnoexcept |
588 | { _Alloc_traits::deallocate(_M_get_Node_allocator(), __p, 1); } |
589 | |
590 | #if __cplusplus201402L < 201103L |
591 | void |
592 | _M_construct_node(_Link_type __node, const value_type& __x) |
593 | { |
594 | __tryif (true) |
595 | { get_allocator().construct(__node->_M_valptr(), __x); } |
596 | __catch(...)if (false) |
597 | { |
598 | _M_put_node(__node); |
599 | __throw_exception_again; |
600 | } |
601 | } |
602 | |
603 | _Link_type |
604 | _M_create_node(const value_type& __x) |
605 | { |
606 | _Link_type __tmp = _M_get_node(); |
607 | _M_construct_node(__tmp, __x); |
608 | return __tmp; |
609 | } |
610 | #else |
611 | template<typename... _Args> |
612 | void |
613 | _M_construct_node(_Link_type __node, _Args&&... __args) |
614 | { |
615 | __tryif (true) |
616 | { |
617 | ::new(__node) _Rb_tree_node<_Val>; |
618 | _Alloc_traits::construct(_M_get_Node_allocator(), |
619 | __node->_M_valptr(), |
620 | std::forward<_Args>(__args)...); |
621 | } |
622 | __catch(...)if (false) |
623 | { |
624 | __node->~_Rb_tree_node<_Val>(); |
625 | _M_put_node(__node); |
626 | __throw_exception_again; |
627 | } |
628 | } |
629 | |
630 | template<typename... _Args> |
631 | _Link_type |
632 | _M_create_node(_Args&&... __args) |
633 | { |
634 | _Link_type __tmp = _M_get_node(); |
635 | _M_construct_node(__tmp, std::forward<_Args>(__args)...); |
636 | return __tmp; |
637 | } |
638 | #endif |
639 | |
640 | void |
641 | _M_destroy_node(_Link_type __p) _GLIBCXX_NOEXCEPTnoexcept |
642 | { |
643 | #if __cplusplus201402L < 201103L |
644 | get_allocator().destroy(__p->_M_valptr()); |
645 | #else |
646 | _Alloc_traits::destroy(_M_get_Node_allocator(), __p->_M_valptr()); |
647 | __p->~_Rb_tree_node<_Val>(); |
648 | #endif |
649 | } |
650 | |
651 | void |
652 | _M_drop_node(_Link_type __p) _GLIBCXX_NOEXCEPTnoexcept |
653 | { |
654 | _M_destroy_node(__p); |
655 | _M_put_node(__p); |
656 | } |
657 | |
658 | template<typename _NodeGen> |
659 | _Link_type |
660 | _M_clone_node(_Const_Link_type __x, _NodeGen& __node_gen) |
661 | { |
662 | _Link_type __tmp = __node_gen(*__x->_M_valptr()); |
663 | __tmp->_M_color = __x->_M_color; |
664 | __tmp->_M_left = 0; |
665 | __tmp->_M_right = 0; |
666 | return __tmp; |
667 | } |
668 | |
669 | protected: |
670 | #if _GLIBCXX_INLINE_VERSION0 |
671 | template<typename _Key_compare> |
672 | #else |
673 | // Unused _Is_pod_comparator is kept as it is part of mangled name. |
674 | template<typename _Key_compare, |
675 | bool /* _Is_pod_comparator */ = __is_pod(_Key_compare)> |
676 | #endif |
677 | struct _Rb_tree_impl |
678 | : public _Node_allocator |
679 | , public _Rb_tree_key_compare<_Key_compare> |
680 | , public _Rb_tree_header |
681 | { |
682 | typedef _Rb_tree_key_compare<_Key_compare> _Base_key_compare; |
683 | |
684 | _Rb_tree_impl() |
685 | _GLIBCXX_NOEXCEPT_IF(noexcept(is_nothrow_default_constructible<_Node_allocator> ::value && is_nothrow_default_constructible<_Base_key_compare >::value) |
686 | is_nothrow_default_constructible<_Node_allocator>::valuenoexcept(is_nothrow_default_constructible<_Node_allocator> ::value && is_nothrow_default_constructible<_Base_key_compare >::value) |
687 | && is_nothrow_default_constructible<_Base_key_compare>::value )noexcept(is_nothrow_default_constructible<_Node_allocator> ::value && is_nothrow_default_constructible<_Base_key_compare >::value) |
688 | : _Node_allocator() |
689 | { } |
690 | |
691 | _Rb_tree_impl(const _Rb_tree_impl& __x) |
692 | : _Node_allocator(_Alloc_traits::_S_select_on_copy(__x)) |
693 | , _Base_key_compare(__x._M_key_compare) |
694 | { } |
695 | |
696 | #if __cplusplus201402L < 201103L |
697 | _Rb_tree_impl(const _Key_compare& __comp, const _Node_allocator& __a) |
698 | : _Node_allocator(__a), _Base_key_compare(__comp) |
699 | { } |
700 | #else |
701 | _Rb_tree_impl(_Rb_tree_impl&&) = default; |
702 | |
703 | explicit |
704 | _Rb_tree_impl(_Node_allocator&& __a) |
705 | : _Node_allocator(std::move(__a)) |
706 | { } |
707 | |
708 | _Rb_tree_impl(_Rb_tree_impl&& __x, _Node_allocator&& __a) |
709 | : _Node_allocator(std::move(__a)), |
710 | _Base_key_compare(std::move(__x)), |
711 | _Rb_tree_header(std::move(__x)) |
712 | { } |
713 | |
714 | _Rb_tree_impl(const _Key_compare& __comp, _Node_allocator&& __a) |
715 | : _Node_allocator(std::move(__a)), _Base_key_compare(__comp) |
716 | { } |
717 | #endif |
718 | }; |
719 | |
720 | _Rb_tree_impl<_Compare> _M_impl; |
721 | |
722 | protected: |
723 | _Base_ptr& |
724 | _M_root() _GLIBCXX_NOEXCEPTnoexcept |
725 | { return this->_M_impl._M_header._M_parent; } |
726 | |
727 | _Const_Base_ptr |
728 | _M_root() const _GLIBCXX_NOEXCEPTnoexcept |
729 | { return this->_M_impl._M_header._M_parent; } |
730 | |
731 | _Base_ptr& |
732 | _M_leftmost() _GLIBCXX_NOEXCEPTnoexcept |
733 | { return this->_M_impl._M_header._M_left; } |
734 | |
735 | _Const_Base_ptr |
736 | _M_leftmost() const _GLIBCXX_NOEXCEPTnoexcept |
737 | { return this->_M_impl._M_header._M_left; } |
738 | |
739 | _Base_ptr& |
740 | _M_rightmost() _GLIBCXX_NOEXCEPTnoexcept |
741 | { return this->_M_impl._M_header._M_right; } |
742 | |
743 | _Const_Base_ptr |
744 | _M_rightmost() const _GLIBCXX_NOEXCEPTnoexcept |
745 | { return this->_M_impl._M_header._M_right; } |
746 | |
747 | _Link_type |
748 | _M_begin() _GLIBCXX_NOEXCEPTnoexcept |
749 | { return static_cast<_Link_type>(this->_M_impl._M_header._M_parent); } |
750 | |
751 | _Const_Link_type |
752 | _M_begin() const _GLIBCXX_NOEXCEPTnoexcept |
753 | { |
754 | return static_cast<_Const_Link_type> |
755 | (this->_M_impl._M_header._M_parent); |
756 | } |
757 | |
758 | _Base_ptr |
759 | _M_end() _GLIBCXX_NOEXCEPTnoexcept |
760 | { return &this->_M_impl._M_header; } |
761 | |
762 | _Const_Base_ptr |
763 | _M_end() const _GLIBCXX_NOEXCEPTnoexcept |
764 | { return &this->_M_impl._M_header; } |
765 | |
766 | static const _Key& |
767 | _S_key(_Const_Link_type __x) |
768 | { |
769 | #if __cplusplus201402L >= 201103L |
770 | // If we're asking for the key we're presumably using the comparison |
771 | // object, and so this is a good place to sanity check it. |
772 | static_assert(__is_invocable<_Compare&, const _Key&, const _Key&>{}, |
773 | "comparison object must be invocable " |
774 | "with two arguments of key type"); |
775 | # if __cplusplus201402L >= 201703L |
776 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
777 | // 2542. Missing const requirements for associative containers |
778 | if constexpr (__is_invocable<_Compare&, const _Key&, const _Key&>{}) |
779 | static_assert( |
780 | is_invocable_v<const _Compare&, const _Key&, const _Key&>, |
781 | "comparison object must be invocable as const"); |
782 | # endif // C++17 |
783 | #endif // C++11 |
784 | |
785 | return _KeyOfValue()(*__x->_M_valptr()); |
786 | } |
787 | |
788 | static _Link_type |
789 | _S_left(_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept |
790 | { return static_cast<_Link_type>(__x->_M_left); } |
791 | |
792 | static _Const_Link_type |
793 | _S_left(_Const_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept |
794 | { return static_cast<_Const_Link_type>(__x->_M_left); } |
795 | |
796 | static _Link_type |
797 | _S_right(_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept |
798 | { return static_cast<_Link_type>(__x->_M_right); } |
799 | |
800 | static _Const_Link_type |
801 | _S_right(_Const_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept |
802 | { return static_cast<_Const_Link_type>(__x->_M_right); } |
803 | |
804 | static const _Key& |
805 | _S_key(_Const_Base_ptr __x) |
806 | { return _S_key(static_cast<_Const_Link_type>(__x)); } |
807 | |
808 | static _Base_ptr |
809 | _S_minimum(_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept |
810 | { return _Rb_tree_node_base::_S_minimum(__x); } |
811 | |
812 | static _Const_Base_ptr |
813 | _S_minimum(_Const_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept |
814 | { return _Rb_tree_node_base::_S_minimum(__x); } |
815 | |
816 | static _Base_ptr |
817 | _S_maximum(_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept |
818 | { return _Rb_tree_node_base::_S_maximum(__x); } |
819 | |
820 | static _Const_Base_ptr |
821 | _S_maximum(_Const_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept |
822 | { return _Rb_tree_node_base::_S_maximum(__x); } |
823 | |
824 | public: |
825 | typedef _Rb_tree_iterator<value_type> iterator; |
826 | typedef _Rb_tree_const_iterator<value_type> const_iterator; |
827 | |
828 | typedef std::reverse_iterator<iterator> reverse_iterator; |
829 | typedef std::reverse_iterator<const_iterator> const_reverse_iterator; |
830 | |
831 | #if __cplusplus201402L > 201402L |
832 | using node_type = _Node_handle<_Key, _Val, _Node_allocator>; |
833 | using insert_return_type = _Node_insert_return< |
834 | conditional_t<is_same_v<_Key, _Val>, const_iterator, iterator>, |
835 | node_type>; |
836 | #endif |
837 | |
838 | pair<_Base_ptr, _Base_ptr> |
839 | _M_get_insert_unique_pos(const key_type& __k); |
840 | |
841 | pair<_Base_ptr, _Base_ptr> |
842 | _M_get_insert_equal_pos(const key_type& __k); |
843 | |
844 | pair<_Base_ptr, _Base_ptr> |
845 | _M_get_insert_hint_unique_pos(const_iterator __pos, |
846 | const key_type& __k); |
847 | |
848 | pair<_Base_ptr, _Base_ptr> |
849 | _M_get_insert_hint_equal_pos(const_iterator __pos, |
850 | const key_type& __k); |
851 | |
852 | private: |
853 | #if __cplusplus201402L >= 201103L |
854 | template<typename _Arg, typename _NodeGen> |
855 | iterator |
856 | _M_insert_(_Base_ptr __x, _Base_ptr __y, _Arg&& __v, _NodeGen&); |
857 | |
858 | iterator |
859 | _M_insert_node(_Base_ptr __x, _Base_ptr __y, _Link_type __z); |
860 | |
861 | template<typename _Arg> |
862 | iterator |
863 | _M_insert_lower(_Base_ptr __y, _Arg&& __v); |
864 | |
865 | template<typename _Arg> |
866 | iterator |
867 | _M_insert_equal_lower(_Arg&& __x); |
868 | |
869 | iterator |
870 | _M_insert_lower_node(_Base_ptr __p, _Link_type __z); |
871 | |
872 | iterator |
873 | _M_insert_equal_lower_node(_Link_type __z); |
874 | #else |
875 | template<typename _NodeGen> |
876 | iterator |
877 | _M_insert_(_Base_ptr __x, _Base_ptr __y, |
878 | const value_type& __v, _NodeGen&); |
879 | |
880 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
881 | // 233. Insertion hints in associative containers. |
882 | iterator |
883 | _M_insert_lower(_Base_ptr __y, const value_type& __v); |
884 | |
885 | iterator |
886 | _M_insert_equal_lower(const value_type& __x); |
887 | #endif |
888 | |
889 | template<typename _NodeGen> |
890 | _Link_type |
891 | _M_copy(_Const_Link_type __x, _Base_ptr __p, _NodeGen&); |
892 | |
893 | template<typename _NodeGen> |
894 | _Link_type |
895 | _M_copy(const _Rb_tree& __x, _NodeGen& __gen) |
896 | { |
897 | _Link_type __root = _M_copy(__x._M_begin(), _M_end(), __gen); |
898 | _M_leftmost() = _S_minimum(__root); |
899 | _M_rightmost() = _S_maximum(__root); |
900 | _M_impl._M_node_count = __x._M_impl._M_node_count; |
901 | return __root; |
902 | } |
903 | |
904 | _Link_type |
905 | _M_copy(const _Rb_tree& __x) |
906 | { |
907 | _Alloc_node __an(*this); |
908 | return _M_copy(__x, __an); |
909 | } |
910 | |
911 | void |
912 | _M_erase(_Link_type __x); |
913 | |
914 | iterator |
915 | _M_lower_bound(_Link_type __x, _Base_ptr __y, |
916 | const _Key& __k); |
917 | |
918 | const_iterator |
919 | _M_lower_bound(_Const_Link_type __x, _Const_Base_ptr __y, |
920 | const _Key& __k) const; |
921 | |
922 | iterator |
923 | _M_upper_bound(_Link_type __x, _Base_ptr __y, |
924 | const _Key& __k); |
925 | |
926 | const_iterator |
927 | _M_upper_bound(_Const_Link_type __x, _Const_Base_ptr __y, |
928 | const _Key& __k) const; |
929 | |
930 | public: |
931 | // allocation/deallocation |
932 | #if __cplusplus201402L < 201103L |
933 | _Rb_tree() { } |
934 | #else |
935 | _Rb_tree() = default; |
936 | #endif |
937 | |
938 | _Rb_tree(const _Compare& __comp, |
939 | const allocator_type& __a = allocator_type()) |
940 | : _M_impl(__comp, _Node_allocator(__a)) { } |
941 | |
942 | _Rb_tree(const _Rb_tree& __x) |
943 | : _M_impl(__x._M_impl) |
944 | { |
945 | if (__x._M_root() != 0) |
946 | _M_root() = _M_copy(__x); |
947 | } |
948 | |
949 | #if __cplusplus201402L >= 201103L |
950 | _Rb_tree(const allocator_type& __a) |
951 | : _M_impl(_Node_allocator(__a)) |
952 | { } |
953 | |
954 | _Rb_tree(const _Rb_tree& __x, const allocator_type& __a) |
955 | : _M_impl(__x._M_impl._M_key_compare, _Node_allocator(__a)) |
956 | { |
957 | if (__x._M_root() != nullptr) |
958 | _M_root() = _M_copy(__x); |
959 | } |
960 | |
961 | _Rb_tree(_Rb_tree&&) = default; |
962 | |
963 | _Rb_tree(_Rb_tree&& __x, const allocator_type& __a) |
964 | : _Rb_tree(std::move(__x), _Node_allocator(__a)) |
965 | { } |
966 | |
967 | private: |
968 | _Rb_tree(_Rb_tree&& __x, _Node_allocator&& __a, true_type) |
969 | noexcept(is_nothrow_default_constructible<_Compare>::value) |
970 | : _M_impl(std::move(__x._M_impl), std::move(__a)) |
971 | { } |
972 | |
973 | _Rb_tree(_Rb_tree&& __x, _Node_allocator&& __a, false_type) |
974 | : _M_impl(__x._M_impl._M_key_compare, std::move(__a)) |
975 | { |
976 | if (__x._M_root() != nullptr) |
977 | _M_move_data(__x, false_type{}); |
978 | } |
979 | |
980 | public: |
981 | _Rb_tree(_Rb_tree&& __x, _Node_allocator&& __a) |
982 | noexcept( noexcept( |
983 | _Rb_tree(std::declval<_Rb_tree&&>(), std::declval<_Node_allocator&&>(), |
984 | std::declval<typename _Alloc_traits::is_always_equal>())) ) |
985 | : _Rb_tree(std::move(__x), std::move(__a), |
986 | typename _Alloc_traits::is_always_equal{}) |
987 | { } |
988 | #endif |
989 | |
990 | ~_Rb_tree() _GLIBCXX_NOEXCEPTnoexcept |
991 | { _M_erase(_M_begin()); } |
992 | |
993 | _Rb_tree& |
994 | operator=(const _Rb_tree& __x); |
995 | |
996 | // Accessors. |
997 | _Compare |
998 | key_comp() const |
999 | { return _M_impl._M_key_compare; } |
1000 | |
1001 | iterator |
1002 | begin() _GLIBCXX_NOEXCEPTnoexcept |
1003 | { return iterator(this->_M_impl._M_header._M_left); } |
1004 | |
1005 | const_iterator |
1006 | begin() const _GLIBCXX_NOEXCEPTnoexcept |
1007 | { return const_iterator(this->_M_impl._M_header._M_left); } |
1008 | |
1009 | iterator |
1010 | end() _GLIBCXX_NOEXCEPTnoexcept |
1011 | { return iterator(&this->_M_impl._M_header); } |
1012 | |
1013 | const_iterator |
1014 | end() const _GLIBCXX_NOEXCEPTnoexcept |
1015 | { return const_iterator(&this->_M_impl._M_header); } |
1016 | |
1017 | reverse_iterator |
1018 | rbegin() _GLIBCXX_NOEXCEPTnoexcept |
1019 | { return reverse_iterator(end()); } |
1020 | |
1021 | const_reverse_iterator |
1022 | rbegin() const _GLIBCXX_NOEXCEPTnoexcept |
1023 | { return const_reverse_iterator(end()); } |
1024 | |
1025 | reverse_iterator |
1026 | rend() _GLIBCXX_NOEXCEPTnoexcept |
1027 | { return reverse_iterator(begin()); } |
1028 | |
1029 | const_reverse_iterator |
1030 | rend() const _GLIBCXX_NOEXCEPTnoexcept |
1031 | { return const_reverse_iterator(begin()); } |
1032 | |
1033 | _GLIBCXX_NODISCARD bool |
1034 | empty() const _GLIBCXX_NOEXCEPTnoexcept |
1035 | { return _M_impl._M_node_count == 0; } |
1036 | |
1037 | size_type |
1038 | size() const _GLIBCXX_NOEXCEPTnoexcept |
1039 | { return _M_impl._M_node_count; } |
1040 | |
1041 | size_type |
1042 | max_size() const _GLIBCXX_NOEXCEPTnoexcept |
1043 | { return _Alloc_traits::max_size(_M_get_Node_allocator()); } |
1044 | |
1045 | void |
1046 | swap(_Rb_tree& __t) |
1047 | _GLIBCXX_NOEXCEPT_IF(__is_nothrow_swappable<_Compare>::value)noexcept(__is_nothrow_swappable<_Compare>::value); |
1048 | |
1049 | // Insert/erase. |
1050 | #if __cplusplus201402L >= 201103L |
1051 | template<typename _Arg> |
1052 | pair<iterator, bool> |
1053 | _M_insert_unique(_Arg&& __x); |
1054 | |
1055 | template<typename _Arg> |
1056 | iterator |
1057 | _M_insert_equal(_Arg&& __x); |
1058 | |
1059 | template<typename _Arg, typename _NodeGen> |
1060 | iterator |
1061 | _M_insert_unique_(const_iterator __pos, _Arg&& __x, _NodeGen&); |
1062 | |
1063 | template<typename _Arg> |
1064 | iterator |
1065 | _M_insert_unique_(const_iterator __pos, _Arg&& __x) |
1066 | { |
1067 | _Alloc_node __an(*this); |
1068 | return _M_insert_unique_(__pos, std::forward<_Arg>(__x), __an); |
1069 | } |
1070 | |
1071 | template<typename _Arg, typename _NodeGen> |
1072 | iterator |
1073 | _M_insert_equal_(const_iterator __pos, _Arg&& __x, _NodeGen&); |
1074 | |
1075 | template<typename _Arg> |
1076 | iterator |
1077 | _M_insert_equal_(const_iterator __pos, _Arg&& __x) |
1078 | { |
1079 | _Alloc_node __an(*this); |
1080 | return _M_insert_equal_(__pos, std::forward<_Arg>(__x), __an); |
1081 | } |
1082 | |
1083 | template<typename... _Args> |
1084 | pair<iterator, bool> |
1085 | _M_emplace_unique(_Args&&... __args); |
1086 | |
1087 | template<typename... _Args> |
1088 | iterator |
1089 | _M_emplace_equal(_Args&&... __args); |
1090 | |
1091 | template<typename... _Args> |
1092 | iterator |
1093 | _M_emplace_hint_unique(const_iterator __pos, _Args&&... __args); |
1094 | |
1095 | template<typename... _Args> |
1096 | iterator |
1097 | _M_emplace_hint_equal(const_iterator __pos, _Args&&... __args); |
1098 | |
1099 | template<typename _Iter> |
1100 | using __same_value_type |
1101 | = is_same<value_type, typename iterator_traits<_Iter>::value_type>; |
1102 | |
1103 | template<typename _InputIterator> |
1104 | __enable_if_t<__same_value_type<_InputIterator>::value> |
1105 | _M_insert_range_unique(_InputIterator __first, _InputIterator __last) |
1106 | { |
1107 | _Alloc_node __an(*this); |
1108 | for (; __first != __last; ++__first) |
1109 | _M_insert_unique_(end(), *__first, __an); |
1110 | } |
1111 | |
1112 | template<typename _InputIterator> |
1113 | __enable_if_t<!__same_value_type<_InputIterator>::value> |
1114 | _M_insert_range_unique(_InputIterator __first, _InputIterator __last) |
1115 | { |
1116 | for (; __first != __last; ++__first) |
1117 | _M_emplace_unique(*__first); |
1118 | } |
1119 | |
1120 | template<typename _InputIterator> |
1121 | __enable_if_t<__same_value_type<_InputIterator>::value> |
1122 | _M_insert_range_equal(_InputIterator __first, _InputIterator __last) |
1123 | { |
1124 | _Alloc_node __an(*this); |
1125 | for (; __first != __last; ++__first) |
1126 | _M_insert_equal_(end(), *__first, __an); |
1127 | } |
1128 | |
1129 | template<typename _InputIterator> |
1130 | __enable_if_t<!__same_value_type<_InputIterator>::value> |
1131 | _M_insert_range_equal(_InputIterator __first, _InputIterator __last) |
1132 | { |
1133 | _Alloc_node __an(*this); |
1134 | for (; __first != __last; ++__first) |
1135 | _M_emplace_equal(*__first); |
1136 | } |
1137 | #else |
1138 | pair<iterator, bool> |
1139 | _M_insert_unique(const value_type& __x); |
1140 | |
1141 | iterator |
1142 | _M_insert_equal(const value_type& __x); |
1143 | |
1144 | template<typename _NodeGen> |
1145 | iterator |
1146 | _M_insert_unique_(const_iterator __pos, const value_type& __x, |
1147 | _NodeGen&); |
1148 | |
1149 | iterator |
1150 | _M_insert_unique_(const_iterator __pos, const value_type& __x) |
1151 | { |
1152 | _Alloc_node __an(*this); |
1153 | return _M_insert_unique_(__pos, __x, __an); |
1154 | } |
1155 | |
1156 | template<typename _NodeGen> |
1157 | iterator |
1158 | _M_insert_equal_(const_iterator __pos, const value_type& __x, |
1159 | _NodeGen&); |
1160 | iterator |
1161 | _M_insert_equal_(const_iterator __pos, const value_type& __x) |
1162 | { |
1163 | _Alloc_node __an(*this); |
1164 | return _M_insert_equal_(__pos, __x, __an); |
1165 | } |
1166 | |
1167 | template<typename _InputIterator> |
1168 | void |
1169 | _M_insert_range_unique(_InputIterator __first, _InputIterator __last) |
1170 | { |
1171 | _Alloc_node __an(*this); |
1172 | for (; __first != __last; ++__first) |
1173 | _M_insert_unique_(end(), *__first, __an); |
1174 | } |
1175 | |
1176 | template<typename _InputIterator> |
1177 | void |
1178 | _M_insert_range_equal(_InputIterator __first, _InputIterator __last) |
1179 | { |
1180 | _Alloc_node __an(*this); |
1181 | for (; __first != __last; ++__first) |
1182 | _M_insert_equal_(end(), *__first, __an); |
1183 | } |
1184 | #endif |
1185 | |
1186 | private: |
1187 | void |
1188 | _M_erase_aux(const_iterator __position); |
1189 | |
1190 | void |
1191 | _M_erase_aux(const_iterator __first, const_iterator __last); |
1192 | |
1193 | public: |
1194 | #if __cplusplus201402L >= 201103L |
1195 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
1196 | // DR 130. Associative erase should return an iterator. |
1197 | _GLIBCXX_ABI_TAG_CXX11__attribute ((__abi_tag__ ("cxx11"))) |
1198 | iterator |
1199 | erase(const_iterator __position) |
1200 | { |
1201 | __glibcxx_assert(__position != end()); |
1202 | const_iterator __result = __position; |
1203 | ++__result; |
1204 | _M_erase_aux(__position); |
1205 | return __result._M_const_cast(); |
1206 | } |
1207 | |
1208 | // LWG 2059. |
1209 | _GLIBCXX_ABI_TAG_CXX11__attribute ((__abi_tag__ ("cxx11"))) |
1210 | iterator |
1211 | erase(iterator __position) |
1212 | { |
1213 | __glibcxx_assert(__position != end()); |
1214 | iterator __result = __position; |
1215 | ++__result; |
1216 | _M_erase_aux(__position); |
1217 | return __result; |
1218 | } |
1219 | #else |
1220 | void |
1221 | erase(iterator __position) |
1222 | { |
1223 | __glibcxx_assert(__position != end()); |
1224 | _M_erase_aux(__position); |
1225 | } |
1226 | |
1227 | void |
1228 | erase(const_iterator __position) |
1229 | { |
1230 | __glibcxx_assert(__position != end()); |
1231 | _M_erase_aux(__position); |
1232 | } |
1233 | #endif |
1234 | |
1235 | size_type |
1236 | erase(const key_type& __x); |
1237 | |
1238 | #if __cplusplus201402L >= 201103L |
1239 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
1240 | // DR 130. Associative erase should return an iterator. |
1241 | _GLIBCXX_ABI_TAG_CXX11__attribute ((__abi_tag__ ("cxx11"))) |
1242 | iterator |
1243 | erase(const_iterator __first, const_iterator __last) |
1244 | { |
1245 | _M_erase_aux(__first, __last); |
1246 | return __last._M_const_cast(); |
1247 | } |
1248 | #else |
1249 | void |
1250 | erase(iterator __first, iterator __last) |
1251 | { _M_erase_aux(__first, __last); } |
1252 | |
1253 | void |
1254 | erase(const_iterator __first, const_iterator __last) |
1255 | { _M_erase_aux(__first, __last); } |
1256 | #endif |
1257 | |
1258 | void |
1259 | clear() _GLIBCXX_NOEXCEPTnoexcept |
1260 | { |
1261 | _M_erase(_M_begin()); |
1262 | _M_impl._M_reset(); |
1263 | } |
1264 | |
1265 | // Set operations. |
1266 | iterator |
1267 | find(const key_type& __k); |
1268 | |
1269 | const_iterator |
1270 | find(const key_type& __k) const; |
1271 | |
1272 | size_type |
1273 | count(const key_type& __k) const; |
1274 | |
1275 | iterator |
1276 | lower_bound(const key_type& __k) |
1277 | { return _M_lower_bound(_M_begin(), _M_end(), __k); } |
1278 | |
1279 | const_iterator |
1280 | lower_bound(const key_type& __k) const |
1281 | { return _M_lower_bound(_M_begin(), _M_end(), __k); } |
1282 | |
1283 | iterator |
1284 | upper_bound(const key_type& __k) |
1285 | { return _M_upper_bound(_M_begin(), _M_end(), __k); } |
1286 | |
1287 | const_iterator |
1288 | upper_bound(const key_type& __k) const |
1289 | { return _M_upper_bound(_M_begin(), _M_end(), __k); } |
1290 | |
1291 | pair<iterator, iterator> |
1292 | equal_range(const key_type& __k); |
1293 | |
1294 | pair<const_iterator, const_iterator> |
1295 | equal_range(const key_type& __k) const; |
1296 | |
1297 | #if __cplusplus201402L >= 201402L |
1298 | template<typename _Kt, |
1299 | typename _Req = __has_is_transparent_t<_Compare, _Kt>> |
1300 | iterator |
1301 | _M_find_tr(const _Kt& __k) |
1302 | { |
1303 | const _Rb_tree* __const_this = this; |
1304 | return __const_this->_M_find_tr(__k)._M_const_cast(); |
1305 | } |
1306 | |
1307 | template<typename _Kt, |
1308 | typename _Req = __has_is_transparent_t<_Compare, _Kt>> |
1309 | const_iterator |
1310 | _M_find_tr(const _Kt& __k) const |
1311 | { |
1312 | auto __j = _M_lower_bound_tr(__k); |
1313 | if (__j != end() && _M_impl._M_key_compare(__k, _S_key(__j._M_node))) |
1314 | __j = end(); |
1315 | return __j; |
1316 | } |
1317 | |
1318 | template<typename _Kt, |
1319 | typename _Req = __has_is_transparent_t<_Compare, _Kt>> |
1320 | size_type |
1321 | _M_count_tr(const _Kt& __k) const |
1322 | { |
1323 | auto __p = _M_equal_range_tr(__k); |
1324 | return std::distance(__p.first, __p.second); |
1325 | } |
1326 | |
1327 | template<typename _Kt, |
1328 | typename _Req = __has_is_transparent_t<_Compare, _Kt>> |
1329 | iterator |
1330 | _M_lower_bound_tr(const _Kt& __k) |
1331 | { |
1332 | const _Rb_tree* __const_this = this; |
1333 | return __const_this->_M_lower_bound_tr(__k)._M_const_cast(); |
1334 | } |
1335 | |
1336 | template<typename _Kt, |
1337 | typename _Req = __has_is_transparent_t<_Compare, _Kt>> |
1338 | const_iterator |
1339 | _M_lower_bound_tr(const _Kt& __k) const |
1340 | { |
1341 | auto __x = _M_begin(); |
1342 | auto __y = _M_end(); |
1343 | while (__x != 0) |
1344 | if (!_M_impl._M_key_compare(_S_key(__x), __k)) |
1345 | { |
1346 | __y = __x; |
1347 | __x = _S_left(__x); |
1348 | } |
1349 | else |
1350 | __x = _S_right(__x); |
1351 | return const_iterator(__y); |
1352 | } |
1353 | |
1354 | template<typename _Kt, |
1355 | typename _Req = __has_is_transparent_t<_Compare, _Kt>> |
1356 | iterator |
1357 | _M_upper_bound_tr(const _Kt& __k) |
1358 | { |
1359 | const _Rb_tree* __const_this = this; |
1360 | return __const_this->_M_upper_bound_tr(__k)._M_const_cast(); |
1361 | } |
1362 | |
1363 | template<typename _Kt, |
1364 | typename _Req = __has_is_transparent_t<_Compare, _Kt>> |
1365 | const_iterator |
1366 | _M_upper_bound_tr(const _Kt& __k) const |
1367 | { |
1368 | auto __x = _M_begin(); |
1369 | auto __y = _M_end(); |
1370 | while (__x != 0) |
1371 | if (_M_impl._M_key_compare(__k, _S_key(__x))) |
1372 | { |
1373 | __y = __x; |
1374 | __x = _S_left(__x); |
1375 | } |
1376 | else |
1377 | __x = _S_right(__x); |
1378 | return const_iterator(__y); |
1379 | } |
1380 | |
1381 | template<typename _Kt, |
1382 | typename _Req = __has_is_transparent_t<_Compare, _Kt>> |
1383 | pair<iterator, iterator> |
1384 | _M_equal_range_tr(const _Kt& __k) |
1385 | { |
1386 | const _Rb_tree* __const_this = this; |
1387 | auto __ret = __const_this->_M_equal_range_tr(__k); |
1388 | return { __ret.first._M_const_cast(), __ret.second._M_const_cast() }; |
1389 | } |
1390 | |
1391 | template<typename _Kt, |
1392 | typename _Req = __has_is_transparent_t<_Compare, _Kt>> |
1393 | pair<const_iterator, const_iterator> |
1394 | _M_equal_range_tr(const _Kt& __k) const |
1395 | { |
1396 | auto __low = _M_lower_bound_tr(__k); |
1397 | auto __high = __low; |
1398 | auto& __cmp = _M_impl._M_key_compare; |
1399 | while (__high != end() && !__cmp(__k, _S_key(__high._M_node))) |
1400 | ++__high; |
1401 | return { __low, __high }; |
1402 | } |
1403 | #endif |
1404 | |
1405 | // Debugging. |
1406 | bool |
1407 | __rb_verify() const; |
1408 | |
1409 | #if __cplusplus201402L >= 201103L |
1410 | _Rb_tree& |
1411 | operator=(_Rb_tree&&) |
1412 | noexcept(_Alloc_traits::_S_nothrow_move() |
1413 | && is_nothrow_move_assignable<_Compare>::value); |
1414 | |
1415 | template<typename _Iterator> |
1416 | void |
1417 | _M_assign_unique(_Iterator, _Iterator); |
1418 | |
1419 | template<typename _Iterator> |
1420 | void |
1421 | _M_assign_equal(_Iterator, _Iterator); |
1422 | |
1423 | private: |
1424 | // Move elements from container with equal allocator. |
1425 | void |
1426 | _M_move_data(_Rb_tree& __x, true_type) |
1427 | { _M_impl._M_move_data(__x._M_impl); } |
1428 | |
1429 | // Move elements from container with possibly non-equal allocator, |
1430 | // which might result in a copy not a move. |
1431 | void |
1432 | _M_move_data(_Rb_tree&, false_type); |
1433 | |
1434 | // Move assignment from container with equal allocator. |
1435 | void |
1436 | _M_move_assign(_Rb_tree&, true_type); |
1437 | |
1438 | // Move assignment from container with possibly non-equal allocator, |
1439 | // which might result in a copy not a move. |
1440 | void |
1441 | _M_move_assign(_Rb_tree&, false_type); |
1442 | #endif |
1443 | |
1444 | #if __cplusplus201402L > 201402L |
1445 | public: |
1446 | /// Re-insert an extracted node. |
1447 | insert_return_type |
1448 | _M_reinsert_node_unique(node_type&& __nh) |
1449 | { |
1450 | insert_return_type __ret; |
1451 | if (__nh.empty()) |
1452 | __ret.position = end(); |
1453 | else |
1454 | { |
1455 | __glibcxx_assert(_M_get_Node_allocator() == *__nh._M_alloc); |
1456 | |
1457 | auto __res = _M_get_insert_unique_pos(__nh._M_key()); |
1458 | if (__res.second) |
1459 | { |
1460 | __ret.position |
1461 | = _M_insert_node(__res.first, __res.second, __nh._M_ptr); |
1462 | __nh._M_ptr = nullptr; |
1463 | __ret.inserted = true; |
1464 | } |
1465 | else |
1466 | { |
1467 | __ret.node = std::move(__nh); |
1468 | __ret.position = iterator(__res.first); |
1469 | __ret.inserted = false; |
1470 | } |
1471 | } |
1472 | return __ret; |
1473 | } |
1474 | |
1475 | /// Re-insert an extracted node. |
1476 | iterator |
1477 | _M_reinsert_node_equal(node_type&& __nh) |
1478 | { |
1479 | iterator __ret; |
1480 | if (__nh.empty()) |
1481 | __ret = end(); |
1482 | else |
1483 | { |
1484 | __glibcxx_assert(_M_get_Node_allocator() == *__nh._M_alloc); |
1485 | auto __res = _M_get_insert_equal_pos(__nh._M_key()); |
1486 | if (__res.second) |
1487 | __ret = _M_insert_node(__res.first, __res.second, __nh._M_ptr); |
1488 | else |
1489 | __ret = _M_insert_equal_lower_node(__nh._M_ptr); |
1490 | __nh._M_ptr = nullptr; |
1491 | } |
1492 | return __ret; |
1493 | } |
1494 | |
1495 | /// Re-insert an extracted node. |
1496 | iterator |
1497 | _M_reinsert_node_hint_unique(const_iterator __hint, node_type&& __nh) |
1498 | { |
1499 | iterator __ret; |
1500 | if (__nh.empty()) |
1501 | __ret = end(); |
1502 | else |
1503 | { |
1504 | __glibcxx_assert(_M_get_Node_allocator() == *__nh._M_alloc); |
1505 | auto __res = _M_get_insert_hint_unique_pos(__hint, __nh._M_key()); |
1506 | if (__res.second) |
1507 | { |
1508 | __ret = _M_insert_node(__res.first, __res.second, __nh._M_ptr); |
1509 | __nh._M_ptr = nullptr; |
1510 | } |
1511 | else |
1512 | __ret = iterator(__res.first); |
1513 | } |
1514 | return __ret; |
1515 | } |
1516 | |
1517 | /// Re-insert an extracted node. |
1518 | iterator |
1519 | _M_reinsert_node_hint_equal(const_iterator __hint, node_type&& __nh) |
1520 | { |
1521 | iterator __ret; |
1522 | if (__nh.empty()) |
1523 | __ret = end(); |
1524 | else |
1525 | { |
1526 | __glibcxx_assert(_M_get_Node_allocator() == *__nh._M_alloc); |
1527 | auto __res = _M_get_insert_hint_equal_pos(__hint, __nh._M_key()); |
1528 | if (__res.second) |
1529 | __ret = _M_insert_node(__res.first, __res.second, __nh._M_ptr); |
1530 | else |
1531 | __ret = _M_insert_equal_lower_node(__nh._M_ptr); |
1532 | __nh._M_ptr = nullptr; |
1533 | } |
1534 | return __ret; |
1535 | } |
1536 | |
1537 | /// Extract a node. |
1538 | node_type |
1539 | extract(const_iterator __pos) |
1540 | { |
1541 | auto __ptr = _Rb_tree_rebalance_for_erase( |
1542 | __pos._M_const_cast()._M_node, _M_impl._M_header); |
1543 | --_M_impl._M_node_count; |
1544 | return { static_cast<_Link_type>(__ptr), _M_get_Node_allocator() }; |
1545 | } |
1546 | |
1547 | /// Extract a node. |
1548 | node_type |
1549 | extract(const key_type& __k) |
1550 | { |
1551 | node_type __nh; |
1552 | auto __pos = find(__k); |
1553 | if (__pos != end()) |
1554 | __nh = extract(const_iterator(__pos)); |
1555 | return __nh; |
1556 | } |
1557 | |
1558 | template<typename _Compare2> |
1559 | using _Compatible_tree |
1560 | = _Rb_tree<_Key, _Val, _KeyOfValue, _Compare2, _Alloc>; |
1561 | |
1562 | template<typename, typename> |
1563 | friend class _Rb_tree_merge_helper; |
1564 | |
1565 | /// Merge from a compatible container into one with unique keys. |
1566 | template<typename _Compare2> |
1567 | void |
1568 | _M_merge_unique(_Compatible_tree<_Compare2>& __src) noexcept |
1569 | { |
1570 | using _Merge_helper = _Rb_tree_merge_helper<_Rb_tree, _Compare2>; |
1571 | for (auto __i = __src.begin(), __end = __src.end(); __i != __end;) |
1572 | { |
1573 | auto __pos = __i++; |
1574 | auto __res = _M_get_insert_unique_pos(_KeyOfValue()(*__pos)); |
1575 | if (__res.second) |
1576 | { |
1577 | auto& __src_impl = _Merge_helper::_S_get_impl(__src); |
1578 | auto __ptr = _Rb_tree_rebalance_for_erase( |
1579 | __pos._M_node, __src_impl._M_header); |
1580 | --__src_impl._M_node_count; |
1581 | _M_insert_node(__res.first, __res.second, |
1582 | static_cast<_Link_type>(__ptr)); |
1583 | } |
1584 | } |
1585 | } |
1586 | |
1587 | /// Merge from a compatible container into one with equivalent keys. |
1588 | template<typename _Compare2> |
1589 | void |
1590 | _M_merge_equal(_Compatible_tree<_Compare2>& __src) noexcept |
1591 | { |
1592 | using _Merge_helper = _Rb_tree_merge_helper<_Rb_tree, _Compare2>; |
1593 | for (auto __i = __src.begin(), __end = __src.end(); __i != __end;) |
1594 | { |
1595 | auto __pos = __i++; |
1596 | auto __res = _M_get_insert_equal_pos(_KeyOfValue()(*__pos)); |
1597 | if (__res.second) |
1598 | { |
1599 | auto& __src_impl = _Merge_helper::_S_get_impl(__src); |
1600 | auto __ptr = _Rb_tree_rebalance_for_erase( |
1601 | __pos._M_node, __src_impl._M_header); |
1602 | --__src_impl._M_node_count; |
1603 | _M_insert_node(__res.first, __res.second, |
1604 | static_cast<_Link_type>(__ptr)); |
1605 | } |
1606 | } |
1607 | } |
1608 | #endif // C++17 |
1609 | |
1610 | friend bool |
1611 | operator==(const _Rb_tree& __x, const _Rb_tree& __y) |
1612 | { |
1613 | return __x.size() == __y.size() |
1614 | && std::equal(__x.begin(), __x.end(), __y.begin()); |
1615 | } |
1616 | |
1617 | #if __cpp_lib_three_way_comparison |
1618 | friend auto |
1619 | operator<=>(const _Rb_tree& __x, const _Rb_tree& __y) |
1620 | { |
1621 | if constexpr (requires { typename __detail::__synth3way_t<_Val>; }) |
1622 | return std::lexicographical_compare_three_way(__x.begin(), __x.end(), |
1623 | __y.begin(), __y.end(), |
1624 | __detail::__synth3way); |
1625 | } |
1626 | #else |
1627 | friend bool |
1628 | operator<(const _Rb_tree& __x, const _Rb_tree& __y) |
1629 | { |
1630 | return std::lexicographical_compare(__x.begin(), __x.end(), |
1631 | __y.begin(), __y.end()); |
1632 | } |
1633 | |
1634 | friend bool _GLIBCXX_DEPRECATED__attribute__ ((__deprecated__)) |
1635 | operator!=(const _Rb_tree& __x, const _Rb_tree& __y) |
1636 | { return !(__x == __y); } |
1637 | |
1638 | friend bool _GLIBCXX_DEPRECATED__attribute__ ((__deprecated__)) |
1639 | operator>(const _Rb_tree& __x, const _Rb_tree& __y) |
1640 | { return __y < __x; } |
1641 | |
1642 | friend bool _GLIBCXX_DEPRECATED__attribute__ ((__deprecated__)) |
1643 | operator<=(const _Rb_tree& __x, const _Rb_tree& __y) |
1644 | { return !(__y < __x); } |
1645 | |
1646 | friend bool _GLIBCXX_DEPRECATED__attribute__ ((__deprecated__)) |
1647 | operator>=(const _Rb_tree& __x, const _Rb_tree& __y) |
1648 | { return !(__x < __y); } |
1649 | #endif |
1650 | }; |
1651 | |
1652 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1653 | typename _Compare, typename _Alloc> |
1654 | inline void |
1655 | swap(_Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>& __x, |
1656 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>& __y) |
1657 | { __x.swap(__y); } |
1658 | |
1659 | #if __cplusplus201402L >= 201103L |
1660 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1661 | typename _Compare, typename _Alloc> |
1662 | void |
1663 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
1664 | _M_move_data(_Rb_tree& __x, false_type) |
1665 | { |
1666 | if (_M_get_Node_allocator() == __x._M_get_Node_allocator()) |
1667 | _M_move_data(__x, true_type()); |
1668 | else |
1669 | { |
1670 | _Alloc_node __an(*this); |
1671 | auto __lbd = |
1672 | [&__an](const value_type& __cval) |
1673 | { |
1674 | auto& __val = const_cast<value_type&>(__cval); |
1675 | return __an(std::move_if_noexcept(__val)); |
1676 | }; |
1677 | _M_root() = _M_copy(__x, __lbd); |
1678 | } |
1679 | } |
1680 | |
1681 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1682 | typename _Compare, typename _Alloc> |
1683 | inline void |
1684 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
1685 | _M_move_assign(_Rb_tree& __x, true_type) |
1686 | { |
1687 | clear(); |
1688 | if (__x._M_root() != nullptr) |
1689 | _M_move_data(__x, true_type()); |
1690 | std::__alloc_on_move(_M_get_Node_allocator(), |
1691 | __x._M_get_Node_allocator()); |
1692 | } |
1693 | |
1694 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1695 | typename _Compare, typename _Alloc> |
1696 | void |
1697 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
1698 | _M_move_assign(_Rb_tree& __x, false_type) |
1699 | { |
1700 | if (_M_get_Node_allocator() == __x._M_get_Node_allocator()) |
1701 | return _M_move_assign(__x, true_type{}); |
1702 | |
1703 | // Try to move each node reusing existing nodes and copying __x nodes |
1704 | // structure. |
1705 | _Reuse_or_alloc_node __roan(*this); |
1706 | _M_impl._M_reset(); |
1707 | if (__x._M_root() != nullptr) |
1708 | { |
1709 | auto __lbd = |
1710 | [&__roan](const value_type& __cval) |
1711 | { |
1712 | auto& __val = const_cast<value_type&>(__cval); |
1713 | return __roan(std::move(__val)); |
1714 | }; |
1715 | _M_root() = _M_copy(__x, __lbd); |
1716 | __x.clear(); |
1717 | } |
1718 | } |
1719 | |
1720 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1721 | typename _Compare, typename _Alloc> |
1722 | inline _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>& |
1723 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
1724 | operator=(_Rb_tree&& __x) |
1725 | noexcept(_Alloc_traits::_S_nothrow_move() |
1726 | && is_nothrow_move_assignable<_Compare>::value) |
1727 | { |
1728 | _M_impl._M_key_compare = std::move(__x._M_impl._M_key_compare); |
1729 | _M_move_assign(__x, __bool_constant<_Alloc_traits::_S_nothrow_move()>()); |
1730 | return *this; |
1731 | } |
1732 | |
1733 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1734 | typename _Compare, typename _Alloc> |
1735 | template<typename _Iterator> |
1736 | void |
1737 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
1738 | _M_assign_unique(_Iterator __first, _Iterator __last) |
1739 | { |
1740 | _Reuse_or_alloc_node __roan(*this); |
1741 | _M_impl._M_reset(); |
1742 | for (; __first != __last; ++__first) |
1743 | _M_insert_unique_(end(), *__first, __roan); |
1744 | } |
1745 | |
1746 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1747 | typename _Compare, typename _Alloc> |
1748 | template<typename _Iterator> |
1749 | void |
1750 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
1751 | _M_assign_equal(_Iterator __first, _Iterator __last) |
1752 | { |
1753 | _Reuse_or_alloc_node __roan(*this); |
1754 | _M_impl._M_reset(); |
1755 | for (; __first != __last; ++__first) |
1756 | _M_insert_equal_(end(), *__first, __roan); |
1757 | } |
1758 | #endif |
1759 | |
1760 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1761 | typename _Compare, typename _Alloc> |
1762 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>& |
1763 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
1764 | operator=(const _Rb_tree& __x) |
1765 | { |
1766 | if (this != &__x) |
1767 | { |
1768 | // Note that _Key may be a constant type. |
1769 | #if __cplusplus201402L >= 201103L |
1770 | if (_Alloc_traits::_S_propagate_on_copy_assign()) |
1771 | { |
1772 | auto& __this_alloc = this->_M_get_Node_allocator(); |
1773 | auto& __that_alloc = __x._M_get_Node_allocator(); |
1774 | if (!_Alloc_traits::_S_always_equal() |
1775 | && __this_alloc != __that_alloc) |
1776 | { |
1777 | // Replacement allocator cannot free existing storage, we need |
1778 | // to erase nodes first. |
1779 | clear(); |
1780 | std::__alloc_on_copy(__this_alloc, __that_alloc); |
1781 | } |
1782 | } |
1783 | #endif |
1784 | |
1785 | _Reuse_or_alloc_node __roan(*this); |
1786 | _M_impl._M_reset(); |
1787 | _M_impl._M_key_compare = __x._M_impl._M_key_compare; |
1788 | if (__x._M_root() != 0) |
1789 | _M_root() = _M_copy(__x, __roan); |
1790 | } |
1791 | |
1792 | return *this; |
1793 | } |
1794 | |
1795 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1796 | typename _Compare, typename _Alloc> |
1797 | #if __cplusplus201402L >= 201103L |
1798 | template<typename _Arg, typename _NodeGen> |
1799 | #else |
1800 | template<typename _NodeGen> |
1801 | #endif |
1802 | typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator |
1803 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
1804 | _M_insert_(_Base_ptr __x, _Base_ptr __p, |
1805 | #if __cplusplus201402L >= 201103L |
1806 | _Arg&& __v, |
1807 | #else |
1808 | const _Val& __v, |
1809 | #endif |
1810 | _NodeGen& __node_gen) |
1811 | { |
1812 | bool __insert_left = (__x != 0 || __p == _M_end() |
1813 | || _M_impl._M_key_compare(_KeyOfValue()(__v), |
1814 | _S_key(__p))); |
1815 | |
1816 | _Link_type __z = __node_gen(_GLIBCXX_FORWARD(_Arg, __v)std::forward<_Arg>(__v)); |
1817 | |
1818 | _Rb_tree_insert_and_rebalance(__insert_left, __z, __p, |
1819 | this->_M_impl._M_header); |
1820 | ++_M_impl._M_node_count; |
1821 | return iterator(__z); |
1822 | } |
1823 | |
1824 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1825 | typename _Compare, typename _Alloc> |
1826 | #if __cplusplus201402L >= 201103L |
1827 | template<typename _Arg> |
1828 | #endif |
1829 | typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator |
1830 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
1831 | #if __cplusplus201402L >= 201103L |
1832 | _M_insert_lower(_Base_ptr __p, _Arg&& __v) |
1833 | #else |
1834 | _M_insert_lower(_Base_ptr __p, const _Val& __v) |
1835 | #endif |
1836 | { |
1837 | bool __insert_left = (__p == _M_end() |
1838 | || !_M_impl._M_key_compare(_S_key(__p), |
1839 | _KeyOfValue()(__v))); |
1840 | |
1841 | _Link_type __z = _M_create_node(_GLIBCXX_FORWARD(_Arg, __v)std::forward<_Arg>(__v)); |
1842 | |
1843 | _Rb_tree_insert_and_rebalance(__insert_left, __z, __p, |
1844 | this->_M_impl._M_header); |
1845 | ++_M_impl._M_node_count; |
1846 | return iterator(__z); |
1847 | } |
1848 | |
1849 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1850 | typename _Compare, typename _Alloc> |
1851 | #if __cplusplus201402L >= 201103L |
1852 | template<typename _Arg> |
1853 | #endif |
1854 | typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator |
1855 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
1856 | #if __cplusplus201402L >= 201103L |
1857 | _M_insert_equal_lower(_Arg&& __v) |
1858 | #else |
1859 | _M_insert_equal_lower(const _Val& __v) |
1860 | #endif |
1861 | { |
1862 | _Link_type __x = _M_begin(); |
1863 | _Base_ptr __y = _M_end(); |
1864 | while (__x != 0) |
1865 | { |
1866 | __y = __x; |
1867 | __x = !_M_impl._M_key_compare(_S_key(__x), _KeyOfValue()(__v)) ? |
1868 | _S_left(__x) : _S_right(__x); |
1869 | } |
1870 | return _M_insert_lower(__y, _GLIBCXX_FORWARD(_Arg, __v)std::forward<_Arg>(__v)); |
1871 | } |
1872 | |
1873 | template<typename _Key, typename _Val, typename _KoV, |
1874 | typename _Compare, typename _Alloc> |
1875 | template<typename _NodeGen> |
1876 | typename _Rb_tree<_Key, _Val, _KoV, _Compare, _Alloc>::_Link_type |
1877 | _Rb_tree<_Key, _Val, _KoV, _Compare, _Alloc>:: |
1878 | _M_copy(_Const_Link_type __x, _Base_ptr __p, _NodeGen& __node_gen) |
1879 | { |
1880 | // Structural copy. __x and __p must be non-null. |
1881 | _Link_type __top = _M_clone_node(__x, __node_gen); |
1882 | __top->_M_parent = __p; |
1883 | |
1884 | __tryif (true) |
1885 | { |
1886 | if (__x->_M_right) |
1887 | __top->_M_right = _M_copy(_S_right(__x), __top, __node_gen); |
1888 | __p = __top; |
1889 | __x = _S_left(__x); |
1890 | |
1891 | while (__x != 0) |
1892 | { |
1893 | _Link_type __y = _M_clone_node(__x, __node_gen); |
1894 | __p->_M_left = __y; |
1895 | __y->_M_parent = __p; |
1896 | if (__x->_M_right) |
1897 | __y->_M_right = _M_copy(_S_right(__x), __y, __node_gen); |
1898 | __p = __y; |
1899 | __x = _S_left(__x); |
1900 | } |
1901 | } |
1902 | __catch(...)if (false) |
1903 | { |
1904 | _M_erase(__top); |
1905 | __throw_exception_again; |
1906 | } |
1907 | return __top; |
1908 | } |
1909 | |
1910 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1911 | typename _Compare, typename _Alloc> |
1912 | void |
1913 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
1914 | _M_erase(_Link_type __x) |
1915 | { |
1916 | // Erase without rebalancing. |
1917 | while (__x != 0) |
1918 | { |
1919 | _M_erase(_S_right(__x)); |
1920 | _Link_type __y = _S_left(__x); |
1921 | _M_drop_node(__x); |
1922 | __x = __y; |
1923 | } |
1924 | } |
1925 | |
1926 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1927 | typename _Compare, typename _Alloc> |
1928 | typename _Rb_tree<_Key, _Val, _KeyOfValue, |
1929 | _Compare, _Alloc>::iterator |
1930 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
1931 | _M_lower_bound(_Link_type __x, _Base_ptr __y, |
1932 | const _Key& __k) |
1933 | { |
1934 | while (__x != 0) |
1935 | if (!_M_impl._M_key_compare(_S_key(__x), __k)) |
1936 | __y = __x, __x = _S_left(__x); |
1937 | else |
1938 | __x = _S_right(__x); |
1939 | return iterator(__y); |
1940 | } |
1941 | |
1942 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1943 | typename _Compare, typename _Alloc> |
1944 | typename _Rb_tree<_Key, _Val, _KeyOfValue, |
1945 | _Compare, _Alloc>::const_iterator |
1946 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
1947 | _M_lower_bound(_Const_Link_type __x, _Const_Base_ptr __y, |
1948 | const _Key& __k) const |
1949 | { |
1950 | while (__x != 0) |
1951 | if (!_M_impl._M_key_compare(_S_key(__x), __k)) |
1952 | __y = __x, __x = _S_left(__x); |
1953 | else |
1954 | __x = _S_right(__x); |
1955 | return const_iterator(__y); |
1956 | } |
1957 | |
1958 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1959 | typename _Compare, typename _Alloc> |
1960 | typename _Rb_tree<_Key, _Val, _KeyOfValue, |
1961 | _Compare, _Alloc>::iterator |
1962 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
1963 | _M_upper_bound(_Link_type __x, _Base_ptr __y, |
1964 | const _Key& __k) |
1965 | { |
1966 | while (__x != 0) |
1967 | if (_M_impl._M_key_compare(__k, _S_key(__x))) |
1968 | __y = __x, __x = _S_left(__x); |
1969 | else |
1970 | __x = _S_right(__x); |
1971 | return iterator(__y); |
1972 | } |
1973 | |
1974 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1975 | typename _Compare, typename _Alloc> |
1976 | typename _Rb_tree<_Key, _Val, _KeyOfValue, |
1977 | _Compare, _Alloc>::const_iterator |
1978 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
1979 | _M_upper_bound(_Const_Link_type __x, _Const_Base_ptr __y, |
1980 | const _Key& __k) const |
1981 | { |
1982 | while (__x != 0) |
1983 | if (_M_impl._M_key_compare(__k, _S_key(__x))) |
1984 | __y = __x, __x = _S_left(__x); |
1985 | else |
1986 | __x = _S_right(__x); |
1987 | return const_iterator(__y); |
1988 | } |
1989 | |
1990 | template<typename _Key, typename _Val, typename _KeyOfValue, |
1991 | typename _Compare, typename _Alloc> |
1992 | pair<typename _Rb_tree<_Key, _Val, _KeyOfValue, |
1993 | _Compare, _Alloc>::iterator, |
1994 | typename _Rb_tree<_Key, _Val, _KeyOfValue, |
1995 | _Compare, _Alloc>::iterator> |
1996 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
1997 | equal_range(const _Key& __k) |
1998 | { |
1999 | _Link_type __x = _M_begin(); |
2000 | _Base_ptr __y = _M_end(); |
2001 | while (__x != 0) |
2002 | { |
2003 | if (_M_impl._M_key_compare(_S_key(__x), __k)) |
2004 | __x = _S_right(__x); |
2005 | else if (_M_impl._M_key_compare(__k, _S_key(__x))) |
2006 | __y = __x, __x = _S_left(__x); |
2007 | else |
2008 | { |
2009 | _Link_type __xu(__x); |
2010 | _Base_ptr __yu(__y); |
2011 | __y = __x, __x = _S_left(__x); |
2012 | __xu = _S_right(__xu); |
2013 | return pair<iterator, |
2014 | iterator>(_M_lower_bound(__x, __y, __k), |
2015 | _M_upper_bound(__xu, __yu, __k)); |
2016 | } |
2017 | } |
2018 | return pair<iterator, iterator>(iterator(__y), |
2019 | iterator(__y)); |
2020 | } |
2021 | |
2022 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2023 | typename _Compare, typename _Alloc> |
2024 | pair<typename _Rb_tree<_Key, _Val, _KeyOfValue, |
2025 | _Compare, _Alloc>::const_iterator, |
2026 | typename _Rb_tree<_Key, _Val, _KeyOfValue, |
2027 | _Compare, _Alloc>::const_iterator> |
2028 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2029 | equal_range(const _Key& __k) const |
2030 | { |
2031 | _Const_Link_type __x = _M_begin(); |
2032 | _Const_Base_ptr __y = _M_end(); |
2033 | while (__x != 0) |
2034 | { |
2035 | if (_M_impl._M_key_compare(_S_key(__x), __k)) |
2036 | __x = _S_right(__x); |
2037 | else if (_M_impl._M_key_compare(__k, _S_key(__x))) |
2038 | __y = __x, __x = _S_left(__x); |
2039 | else |
2040 | { |
2041 | _Const_Link_type __xu(__x); |
2042 | _Const_Base_ptr __yu(__y); |
2043 | __y = __x, __x = _S_left(__x); |
2044 | __xu = _S_right(__xu); |
2045 | return pair<const_iterator, |
2046 | const_iterator>(_M_lower_bound(__x, __y, __k), |
2047 | _M_upper_bound(__xu, __yu, __k)); |
2048 | } |
2049 | } |
2050 | return pair<const_iterator, const_iterator>(const_iterator(__y), |
2051 | const_iterator(__y)); |
2052 | } |
2053 | |
2054 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2055 | typename _Compare, typename _Alloc> |
2056 | void |
2057 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2058 | swap(_Rb_tree& __t) |
2059 | _GLIBCXX_NOEXCEPT_IF(__is_nothrow_swappable<_Compare>::value)noexcept(__is_nothrow_swappable<_Compare>::value) |
2060 | { |
2061 | if (_M_root() == 0) |
2062 | { |
2063 | if (__t._M_root() != 0) |
2064 | _M_impl._M_move_data(__t._M_impl); |
2065 | } |
2066 | else if (__t._M_root() == 0) |
2067 | __t._M_impl._M_move_data(_M_impl); |
2068 | else |
2069 | { |
2070 | std::swap(_M_root(),__t._M_root()); |
2071 | std::swap(_M_leftmost(),__t._M_leftmost()); |
2072 | std::swap(_M_rightmost(),__t._M_rightmost()); |
2073 | |
2074 | _M_root()->_M_parent = _M_end(); |
2075 | __t._M_root()->_M_parent = __t._M_end(); |
2076 | std::swap(this->_M_impl._M_node_count, __t._M_impl._M_node_count); |
2077 | } |
2078 | // No need to swap header's color as it does not change. |
2079 | std::swap(this->_M_impl._M_key_compare, __t._M_impl._M_key_compare); |
2080 | |
2081 | _Alloc_traits::_S_on_swap(_M_get_Node_allocator(), |
2082 | __t._M_get_Node_allocator()); |
2083 | } |
2084 | |
2085 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2086 | typename _Compare, typename _Alloc> |
2087 | pair<typename _Rb_tree<_Key, _Val, _KeyOfValue, |
2088 | _Compare, _Alloc>::_Base_ptr, |
2089 | typename _Rb_tree<_Key, _Val, _KeyOfValue, |
2090 | _Compare, _Alloc>::_Base_ptr> |
2091 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2092 | _M_get_insert_unique_pos(const key_type& __k) |
2093 | { |
2094 | typedef pair<_Base_ptr, _Base_ptr> _Res; |
2095 | _Link_type __x = _M_begin(); |
2096 | _Base_ptr __y = _M_end(); |
2097 | bool __comp = true; |
2098 | while (__x != 0) |
2099 | { |
2100 | __y = __x; |
2101 | __comp = _M_impl._M_key_compare(__k, _S_key(__x)); |
2102 | __x = __comp ? _S_left(__x) : _S_right(__x); |
2103 | } |
2104 | iterator __j = iterator(__y); |
2105 | if (__comp) |
2106 | { |
2107 | if (__j == begin()) |
2108 | return _Res(__x, __y); |
2109 | else |
2110 | --__j; |
2111 | } |
2112 | if (_M_impl._M_key_compare(_S_key(__j._M_node), __k)) |
2113 | return _Res(__x, __y); |
2114 | return _Res(__j._M_node, 0); |
2115 | } |
2116 | |
2117 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2118 | typename _Compare, typename _Alloc> |
2119 | pair<typename _Rb_tree<_Key, _Val, _KeyOfValue, |
2120 | _Compare, _Alloc>::_Base_ptr, |
2121 | typename _Rb_tree<_Key, _Val, _KeyOfValue, |
2122 | _Compare, _Alloc>::_Base_ptr> |
2123 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2124 | _M_get_insert_equal_pos(const key_type& __k) |
2125 | { |
2126 | typedef pair<_Base_ptr, _Base_ptr> _Res; |
2127 | _Link_type __x = _M_begin(); |
2128 | _Base_ptr __y = _M_end(); |
2129 | while (__x != 0) |
2130 | { |
2131 | __y = __x; |
2132 | __x = _M_impl._M_key_compare(__k, _S_key(__x)) ? |
2133 | _S_left(__x) : _S_right(__x); |
2134 | } |
2135 | return _Res(__x, __y); |
2136 | } |
2137 | |
2138 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2139 | typename _Compare, typename _Alloc> |
2140 | #if __cplusplus201402L >= 201103L |
2141 | template<typename _Arg> |
2142 | #endif |
2143 | pair<typename _Rb_tree<_Key, _Val, _KeyOfValue, |
2144 | _Compare, _Alloc>::iterator, bool> |
2145 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2146 | #if __cplusplus201402L >= 201103L |
2147 | _M_insert_unique(_Arg&& __v) |
2148 | #else |
2149 | _M_insert_unique(const _Val& __v) |
2150 | #endif |
2151 | { |
2152 | typedef pair<iterator, bool> _Res; |
2153 | pair<_Base_ptr, _Base_ptr> __res |
2154 | = _M_get_insert_unique_pos(_KeyOfValue()(__v)); |
2155 | |
2156 | if (__res.second) |
2157 | { |
2158 | _Alloc_node __an(*this); |
2159 | return _Res(_M_insert_(__res.first, __res.second, |
2160 | _GLIBCXX_FORWARD(_Arg, __v)std::forward<_Arg>(__v), __an), |
2161 | true); |
2162 | } |
2163 | |
2164 | return _Res(iterator(__res.first), false); |
2165 | } |
2166 | |
2167 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2168 | typename _Compare, typename _Alloc> |
2169 | #if __cplusplus201402L >= 201103L |
2170 | template<typename _Arg> |
2171 | #endif |
2172 | typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator |
2173 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2174 | #if __cplusplus201402L >= 201103L |
2175 | _M_insert_equal(_Arg&& __v) |
2176 | #else |
2177 | _M_insert_equal(const _Val& __v) |
2178 | #endif |
2179 | { |
2180 | pair<_Base_ptr, _Base_ptr> __res |
2181 | = _M_get_insert_equal_pos(_KeyOfValue()(__v)); |
2182 | _Alloc_node __an(*this); |
2183 | return _M_insert_(__res.first, __res.second, |
2184 | _GLIBCXX_FORWARD(_Arg, __v)std::forward<_Arg>(__v), __an); |
2185 | } |
2186 | |
2187 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2188 | typename _Compare, typename _Alloc> |
2189 | pair<typename _Rb_tree<_Key, _Val, _KeyOfValue, |
2190 | _Compare, _Alloc>::_Base_ptr, |
2191 | typename _Rb_tree<_Key, _Val, _KeyOfValue, |
2192 | _Compare, _Alloc>::_Base_ptr> |
2193 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2194 | _M_get_insert_hint_unique_pos(const_iterator __position, |
2195 | const key_type& __k) |
2196 | { |
2197 | iterator __pos = __position._M_const_cast(); |
2198 | typedef pair<_Base_ptr, _Base_ptr> _Res; |
2199 | |
2200 | // end() |
2201 | if (__pos._M_node == _M_end()) |
2202 | { |
2203 | if (size() > 0 |
2204 | && _M_impl._M_key_compare(_S_key(_M_rightmost()), __k)) |
2205 | return _Res(0, _M_rightmost()); |
2206 | else |
2207 | return _M_get_insert_unique_pos(__k); |
2208 | } |
2209 | else if (_M_impl._M_key_compare(__k, _S_key(__pos._M_node))) |
2210 | { |
2211 | // First, try before... |
2212 | iterator __before = __pos; |
2213 | if (__pos._M_node == _M_leftmost()) // begin() |
2214 | return _Res(_M_leftmost(), _M_leftmost()); |
2215 | else if (_M_impl._M_key_compare(_S_key((--__before)._M_node), __k)) |
2216 | { |
2217 | if (_S_right(__before._M_node) == 0) |
2218 | return _Res(0, __before._M_node); |
2219 | else |
2220 | return _Res(__pos._M_node, __pos._M_node); |
2221 | } |
2222 | else |
2223 | return _M_get_insert_unique_pos(__k); |
2224 | } |
2225 | else if (_M_impl._M_key_compare(_S_key(__pos._M_node), __k)) |
2226 | { |
2227 | // ... then try after. |
2228 | iterator __after = __pos; |
2229 | if (__pos._M_node == _M_rightmost()) |
2230 | return _Res(0, _M_rightmost()); |
2231 | else if (_M_impl._M_key_compare(__k, _S_key((++__after)._M_node))) |
2232 | { |
2233 | if (_S_right(__pos._M_node) == 0) |
2234 | return _Res(0, __pos._M_node); |
2235 | else |
2236 | return _Res(__after._M_node, __after._M_node); |
2237 | } |
2238 | else |
2239 | return _M_get_insert_unique_pos(__k); |
2240 | } |
2241 | else |
2242 | // Equivalent keys. |
2243 | return _Res(__pos._M_node, 0); |
2244 | } |
2245 | |
2246 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2247 | typename _Compare, typename _Alloc> |
2248 | #if __cplusplus201402L >= 201103L |
2249 | template<typename _Arg, typename _NodeGen> |
2250 | #else |
2251 | template<typename _NodeGen> |
2252 | #endif |
2253 | typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator |
2254 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2255 | _M_insert_unique_(const_iterator __position, |
2256 | #if __cplusplus201402L >= 201103L |
2257 | _Arg&& __v, |
2258 | #else |
2259 | const _Val& __v, |
2260 | #endif |
2261 | _NodeGen& __node_gen) |
2262 | { |
2263 | pair<_Base_ptr, _Base_ptr> __res |
2264 | = _M_get_insert_hint_unique_pos(__position, _KeyOfValue()(__v)); |
2265 | |
2266 | if (__res.second) |
2267 | return _M_insert_(__res.first, __res.second, |
2268 | _GLIBCXX_FORWARD(_Arg, __v)std::forward<_Arg>(__v), |
2269 | __node_gen); |
2270 | return iterator(__res.first); |
2271 | } |
2272 | |
2273 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2274 | typename _Compare, typename _Alloc> |
2275 | pair<typename _Rb_tree<_Key, _Val, _KeyOfValue, |
2276 | _Compare, _Alloc>::_Base_ptr, |
2277 | typename _Rb_tree<_Key, _Val, _KeyOfValue, |
2278 | _Compare, _Alloc>::_Base_ptr> |
2279 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2280 | _M_get_insert_hint_equal_pos(const_iterator __position, const key_type& __k) |
2281 | { |
2282 | iterator __pos = __position._M_const_cast(); |
2283 | typedef pair<_Base_ptr, _Base_ptr> _Res; |
2284 | |
2285 | // end() |
2286 | if (__pos._M_node == _M_end()) |
2287 | { |
2288 | if (size() > 0 |
2289 | && !_M_impl._M_key_compare(__k, _S_key(_M_rightmost()))) |
2290 | return _Res(0, _M_rightmost()); |
2291 | else |
2292 | return _M_get_insert_equal_pos(__k); |
2293 | } |
2294 | else if (!_M_impl._M_key_compare(_S_key(__pos._M_node), __k)) |
2295 | { |
2296 | // First, try before... |
2297 | iterator __before = __pos; |
2298 | if (__pos._M_node == _M_leftmost()) // begin() |
2299 | return _Res(_M_leftmost(), _M_leftmost()); |
2300 | else if (!_M_impl._M_key_compare(__k, _S_key((--__before)._M_node))) |
2301 | { |
2302 | if (_S_right(__before._M_node) == 0) |
2303 | return _Res(0, __before._M_node); |
2304 | else |
2305 | return _Res(__pos._M_node, __pos._M_node); |
2306 | } |
2307 | else |
2308 | return _M_get_insert_equal_pos(__k); |
2309 | } |
2310 | else |
2311 | { |
2312 | // ... then try after. |
2313 | iterator __after = __pos; |
2314 | if (__pos._M_node == _M_rightmost()) |
2315 | return _Res(0, _M_rightmost()); |
2316 | else if (!_M_impl._M_key_compare(_S_key((++__after)._M_node), __k)) |
2317 | { |
2318 | if (_S_right(__pos._M_node) == 0) |
2319 | return _Res(0, __pos._M_node); |
2320 | else |
2321 | return _Res(__after._M_node, __after._M_node); |
2322 | } |
2323 | else |
2324 | return _Res(0, 0); |
2325 | } |
2326 | } |
2327 | |
2328 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2329 | typename _Compare, typename _Alloc> |
2330 | #if __cplusplus201402L >= 201103L |
2331 | template<typename _Arg, typename _NodeGen> |
2332 | #else |
2333 | template<typename _NodeGen> |
2334 | #endif |
2335 | typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator |
2336 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2337 | _M_insert_equal_(const_iterator __position, |
2338 | #if __cplusplus201402L >= 201103L |
2339 | _Arg&& __v, |
2340 | #else |
2341 | const _Val& __v, |
2342 | #endif |
2343 | _NodeGen& __node_gen) |
2344 | { |
2345 | pair<_Base_ptr, _Base_ptr> __res |
2346 | = _M_get_insert_hint_equal_pos(__position, _KeyOfValue()(__v)); |
2347 | |
2348 | if (__res.second) |
2349 | return _M_insert_(__res.first, __res.second, |
2350 | _GLIBCXX_FORWARD(_Arg, __v)std::forward<_Arg>(__v), |
2351 | __node_gen); |
2352 | |
2353 | return _M_insert_equal_lower(_GLIBCXX_FORWARD(_Arg, __v)std::forward<_Arg>(__v)); |
2354 | } |
2355 | |
2356 | #if __cplusplus201402L >= 201103L |
2357 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2358 | typename _Compare, typename _Alloc> |
2359 | typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator |
2360 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2361 | _M_insert_node(_Base_ptr __x, _Base_ptr __p, _Link_type __z) |
2362 | { |
2363 | bool __insert_left = (__x != 0 || __p == _M_end() |
2364 | || _M_impl._M_key_compare(_S_key(__z), |
2365 | _S_key(__p))); |
2366 | |
2367 | _Rb_tree_insert_and_rebalance(__insert_left, __z, __p, |
2368 | this->_M_impl._M_header); |
2369 | ++_M_impl._M_node_count; |
2370 | return iterator(__z); |
2371 | } |
2372 | |
2373 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2374 | typename _Compare, typename _Alloc> |
2375 | typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator |
2376 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2377 | _M_insert_lower_node(_Base_ptr __p, _Link_type __z) |
2378 | { |
2379 | bool __insert_left = (__p == _M_end() |
2380 | || !_M_impl._M_key_compare(_S_key(__p), |
2381 | _S_key(__z))); |
2382 | |
2383 | _Rb_tree_insert_and_rebalance(__insert_left, __z, __p, |
2384 | this->_M_impl._M_header); |
2385 | ++_M_impl._M_node_count; |
2386 | return iterator(__z); |
2387 | } |
2388 | |
2389 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2390 | typename _Compare, typename _Alloc> |
2391 | typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator |
2392 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2393 | _M_insert_equal_lower_node(_Link_type __z) |
2394 | { |
2395 | _Link_type __x = _M_begin(); |
2396 | _Base_ptr __y = _M_end(); |
2397 | while (__x != 0) |
2398 | { |
2399 | __y = __x; |
2400 | __x = !_M_impl._M_key_compare(_S_key(__x), _S_key(__z)) ? |
2401 | _S_left(__x) : _S_right(__x); |
2402 | } |
2403 | return _M_insert_lower_node(__y, __z); |
2404 | } |
2405 | |
2406 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2407 | typename _Compare, typename _Alloc> |
2408 | template<typename... _Args> |
2409 | pair<typename _Rb_tree<_Key, _Val, _KeyOfValue, |
2410 | _Compare, _Alloc>::iterator, bool> |
2411 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2412 | _M_emplace_unique(_Args&&... __args) |
2413 | { |
2414 | _Link_type __z = _M_create_node(std::forward<_Args>(__args)...); |
2415 | |
2416 | __tryif (true) |
2417 | { |
2418 | typedef pair<iterator, bool> _Res; |
2419 | auto __res = _M_get_insert_unique_pos(_S_key(__z)); |
2420 | if (__res.second) |
2421 | return _Res(_M_insert_node(__res.first, __res.second, __z), true); |
2422 | |
2423 | _M_drop_node(__z); |
2424 | return _Res(iterator(__res.first), false); |
2425 | } |
2426 | __catch(...)if (false) |
2427 | { |
2428 | _M_drop_node(__z); |
2429 | __throw_exception_again; |
2430 | } |
2431 | } |
2432 | |
2433 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2434 | typename _Compare, typename _Alloc> |
2435 | template<typename... _Args> |
2436 | typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator |
2437 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2438 | _M_emplace_equal(_Args&&... __args) |
2439 | { |
2440 | _Link_type __z = _M_create_node(std::forward<_Args>(__args)...); |
2441 | |
2442 | __tryif (true) |
2443 | { |
2444 | auto __res = _M_get_insert_equal_pos(_S_key(__z)); |
2445 | return _M_insert_node(__res.first, __res.second, __z); |
2446 | } |
2447 | __catch(...)if (false) |
2448 | { |
2449 | _M_drop_node(__z); |
2450 | __throw_exception_again; |
2451 | } |
2452 | } |
2453 | |
2454 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2455 | typename _Compare, typename _Alloc> |
2456 | template<typename... _Args> |
2457 | typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator |
2458 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2459 | _M_emplace_hint_unique(const_iterator __pos, _Args&&... __args) |
2460 | { |
2461 | _Link_type __z = _M_create_node(std::forward<_Args>(__args)...); |
2462 | |
2463 | __tryif (true) |
2464 | { |
2465 | auto __res = _M_get_insert_hint_unique_pos(__pos, _S_key(__z)); |
2466 | |
2467 | if (__res.second) |
2468 | return _M_insert_node(__res.first, __res.second, __z); |
2469 | |
2470 | _M_drop_node(__z); |
2471 | return iterator(__res.first); |
2472 | } |
2473 | __catch(...)if (false) |
2474 | { |
2475 | _M_drop_node(__z); |
2476 | __throw_exception_again; |
2477 | } |
2478 | } |
2479 | |
2480 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2481 | typename _Compare, typename _Alloc> |
2482 | template<typename... _Args> |
2483 | typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator |
2484 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2485 | _M_emplace_hint_equal(const_iterator __pos, _Args&&... __args) |
2486 | { |
2487 | _Link_type __z = _M_create_node(std::forward<_Args>(__args)...); |
2488 | |
2489 | __tryif (true) |
2490 | { |
2491 | auto __res = _M_get_insert_hint_equal_pos(__pos, _S_key(__z)); |
2492 | |
2493 | if (__res.second) |
2494 | return _M_insert_node(__res.first, __res.second, __z); |
2495 | |
2496 | return _M_insert_equal_lower_node(__z); |
2497 | } |
2498 | __catch(...)if (false) |
2499 | { |
2500 | _M_drop_node(__z); |
2501 | __throw_exception_again; |
2502 | } |
2503 | } |
2504 | #endif |
2505 | |
2506 | |
2507 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2508 | typename _Compare, typename _Alloc> |
2509 | void |
2510 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2511 | _M_erase_aux(const_iterator __position) |
2512 | { |
2513 | _Link_type __y = |
2514 | static_cast<_Link_type>(_Rb_tree_rebalance_for_erase |
2515 | (const_cast<_Base_ptr>(__position._M_node), |
2516 | this->_M_impl._M_header)); |
2517 | _M_drop_node(__y); |
2518 | --_M_impl._M_node_count; |
2519 | } |
2520 | |
2521 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2522 | typename _Compare, typename _Alloc> |
2523 | void |
2524 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2525 | _M_erase_aux(const_iterator __first, const_iterator __last) |
2526 | { |
2527 | if (__first == begin() && __last == end()) |
2528 | clear(); |
2529 | else |
2530 | while (__first != __last) |
2531 | _M_erase_aux(__first++); |
2532 | } |
2533 | |
2534 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2535 | typename _Compare, typename _Alloc> |
2536 | typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::size_type |
2537 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2538 | erase(const _Key& __x) |
2539 | { |
2540 | pair<iterator, iterator> __p = equal_range(__x); |
2541 | const size_type __old_size = size(); |
2542 | _M_erase_aux(__p.first, __p.second); |
2543 | return __old_size - size(); |
2544 | } |
2545 | |
2546 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2547 | typename _Compare, typename _Alloc> |
2548 | typename _Rb_tree<_Key, _Val, _KeyOfValue, |
2549 | _Compare, _Alloc>::iterator |
2550 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2551 | find(const _Key& __k) |
2552 | { |
2553 | iterator __j = _M_lower_bound(_M_begin(), _M_end(), __k); |
2554 | return (__j == end() |
2555 | || _M_impl._M_key_compare(__k, |
2556 | _S_key(__j._M_node))) ? end() : __j; |
2557 | } |
2558 | |
2559 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2560 | typename _Compare, typename _Alloc> |
2561 | typename _Rb_tree<_Key, _Val, _KeyOfValue, |
2562 | _Compare, _Alloc>::const_iterator |
2563 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2564 | find(const _Key& __k) const |
2565 | { |
2566 | const_iterator __j = _M_lower_bound(_M_begin(), _M_end(), __k); |
2567 | return (__j == end() |
2568 | || _M_impl._M_key_compare(__k, |
2569 | _S_key(__j._M_node))) ? end() : __j; |
2570 | } |
2571 | |
2572 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2573 | typename _Compare, typename _Alloc> |
2574 | typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::size_type |
2575 | _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>:: |
2576 | count(const _Key& __k) const |
2577 | { |
2578 | pair<const_iterator, const_iterator> __p = equal_range(__k); |
2579 | const size_type __n = std::distance(__p.first, __p.second); |
2580 | return __n; |
2581 | } |
2582 | |
2583 | _GLIBCXX_PURE__attribute__ ((__pure__)) unsigned int |
2584 | _Rb_tree_black_count(const _Rb_tree_node_base* __node, |
2585 | const _Rb_tree_node_base* __root) throw (); |
2586 | |
2587 | template<typename _Key, typename _Val, typename _KeyOfValue, |
2588 | typename _Compare, typename _Alloc> |
2589 | bool |
2590 | _Rb_tree<_Key,_Val,_KeyOfValue,_Compare,_Alloc>::__rb_verify() const |
2591 | { |
2592 | if (_M_impl._M_node_count == 0 || begin() == end()) |
2593 | return _M_impl._M_node_count == 0 && begin() == end() |
2594 | && this->_M_impl._M_header._M_left == _M_end() |
2595 | && this->_M_impl._M_header._M_right == _M_end(); |
2596 | |
2597 | unsigned int __len = _Rb_tree_black_count(_M_leftmost(), _M_root()); |
2598 | for (const_iterator __it = begin(); __it != end(); ++__it) |
2599 | { |
2600 | _Const_Link_type __x = static_cast<_Const_Link_type>(__it._M_node); |
2601 | _Const_Link_type __L = _S_left(__x); |
2602 | _Const_Link_type __R = _S_right(__x); |
2603 | |
2604 | if (__x->_M_color == _S_red) |
2605 | if ((__L && __L->_M_color == _S_red) |
2606 | || (__R && __R->_M_color == _S_red)) |
2607 | return false; |
2608 | |
2609 | if (__L && _M_impl._M_key_compare(_S_key(__x), _S_key(__L))) |
2610 | return false; |
2611 | if (__R && _M_impl._M_key_compare(_S_key(__R), _S_key(__x))) |
2612 | return false; |
2613 | |
2614 | if (!__L && !__R && _Rb_tree_black_count(__x, _M_root()) != __len) |
2615 | return false; |
2616 | } |
2617 | |
2618 | if (_M_leftmost() != _Rb_tree_node_base::_S_minimum(_M_root())) |
2619 | return false; |
2620 | if (_M_rightmost() != _Rb_tree_node_base::_S_maximum(_M_root())) |
2621 | return false; |
2622 | return true; |
2623 | } |
2624 | |
2625 | #if __cplusplus201402L > 201402L |
2626 | // Allow access to internals of compatible _Rb_tree specializations. |
2627 | template<typename _Key, typename _Val, typename _Sel, typename _Cmp1, |
2628 | typename _Alloc, typename _Cmp2> |
2629 | struct _Rb_tree_merge_helper<_Rb_tree<_Key, _Val, _Sel, _Cmp1, _Alloc>, |
2630 | _Cmp2> |
2631 | { |
2632 | private: |
2633 | friend class _Rb_tree<_Key, _Val, _Sel, _Cmp1, _Alloc>; |
2634 | |
2635 | static auto& |
2636 | _S_get_impl(_Rb_tree<_Key, _Val, _Sel, _Cmp2, _Alloc>& __tree) |
2637 | { return __tree._M_impl; } |
2638 | }; |
2639 | #endif // C++17 |
2640 | |
2641 | _GLIBCXX_END_NAMESPACE_VERSION |
2642 | } // namespace |
2643 | |
2644 | #endif |