LLVM  4.0.0
X86InstrFMA3Info.cpp
Go to the documentation of this file.
1 //===-- X86InstrFMA3Info.cpp - X86 FMA3 Instruction Information -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the implementation of the classes providing information
11 // about existing X86 FMA3 opcodes, classifying and grouping them.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86InstrFMA3Info.h"
16 #include "X86InstrInfo.h"
18 #include "llvm/Support/Threading.h"
19 using namespace llvm;
20 
21 /// This flag is used in the method llvm::call_once() used below to make the
22 /// initialization of the map 'OpcodeToGroup' thread safe.
23 LLVM_DEFINE_ONCE_FLAG(InitGroupsOnceFlag);
24 
27  return &*X86InstrFMA3InfoObj;
28 }
29 
30 void X86InstrFMA3Info::initRMGroup(const uint16_t *RegOpcodes,
31  const uint16_t *MemOpcodes, unsigned Attr) {
32  // Create a new instance of this class that would hold a group of FMA opcodes.
33  X86InstrFMA3Group *G = new X86InstrFMA3Group(RegOpcodes, MemOpcodes, Attr);
34 
35  // Add the references from indvidual opcodes to the group holding them.
36  assert((!OpcodeToGroup[RegOpcodes[0]] && !OpcodeToGroup[RegOpcodes[1]] &&
37  !OpcodeToGroup[RegOpcodes[2]] && !OpcodeToGroup[MemOpcodes[0]] &&
38  !OpcodeToGroup[MemOpcodes[1]] && !OpcodeToGroup[MemOpcodes[2]]) &&
39  "Duplication or rewrite of elements in OpcodeToGroup.");
40  OpcodeToGroup[RegOpcodes[0]] = G;
41  OpcodeToGroup[RegOpcodes[1]] = G;
42  OpcodeToGroup[RegOpcodes[2]] = G;
43  OpcodeToGroup[MemOpcodes[0]] = G;
44  OpcodeToGroup[MemOpcodes[1]] = G;
45  OpcodeToGroup[MemOpcodes[2]] = G;
46 }
47 
48 void X86InstrFMA3Info::initRGroup(const uint16_t *RegOpcodes, unsigned Attr) {
49  // Create a new instance of this class that would hold a group of FMA opcodes.
50  X86InstrFMA3Group *G = new X86InstrFMA3Group(RegOpcodes, nullptr, Attr);
51 
52  // Add the references from indvidual opcodes to the group holding them.
53  assert((!OpcodeToGroup[RegOpcodes[0]] && !OpcodeToGroup[RegOpcodes[1]] &&
54  !OpcodeToGroup[RegOpcodes[2]]) &&
55  "Duplication or rewrite of elements in OpcodeToGroup.");
56  OpcodeToGroup[RegOpcodes[0]] = G;
57  OpcodeToGroup[RegOpcodes[1]] = G;
58  OpcodeToGroup[RegOpcodes[2]] = G;
59 }
60 
61 void X86InstrFMA3Info::initMGroup(const uint16_t *MemOpcodes, unsigned Attr) {
62  // Create a new instance of this class that would hold a group of FMA opcodes.
63  X86InstrFMA3Group *G = new X86InstrFMA3Group(nullptr, MemOpcodes, Attr);
64 
65  // Add the references from indvidual opcodes to the group holding them.
66  assert((!OpcodeToGroup[MemOpcodes[0]] && !OpcodeToGroup[MemOpcodes[1]] &&
67  !OpcodeToGroup[MemOpcodes[2]]) &&
68  "Duplication or rewrite of elements in OpcodeToGroup.");
69  OpcodeToGroup[MemOpcodes[0]] = G;
70  OpcodeToGroup[MemOpcodes[1]] = G;
71  OpcodeToGroup[MemOpcodes[2]] = G;
72 }
73 
74 #define FMA3RM(R132, R213, R231, M132, M213, M231) \
75  static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \
76  static const uint16_t Mem##R132[3] = {X86::M132, X86::M213, X86::M231}; \
77  initRMGroup(Reg##R132, Mem##R132);
78 
79 #define FMA3RMA(R132, R213, R231, M132, M213, M231, Attrs) \
80  static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \
81  static const uint16_t Mem##R132[3] = {X86::M132, X86::M213, X86::M231}; \
82  initRMGroup(Reg##R132, Mem##R132, (Attrs));
83 
84 #define FMA3R(R132, R213, R231) \
85  static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \
86  initRGroup(Reg##R132);
87 
88 #define FMA3RA(R132, R213, R231, Attrs) \
89  static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \
90  initRGroup(Reg##R132, (Attrs));
91 
92 #define FMA3M(M132, M213, M231) \
93  static const uint16_t Mem##M132[3] = {X86::M132, X86::M213, X86::M231}; \
94  initMGroup(Mem##M132);
95 
96 #define FMA3MA(M132, M213, M231, Attrs) \
97  static const uint16_t Mem##M132[3] = {X86::M132, X86::M213, X86::M231}; \
98  initMGroup(Mem##M132, (Attrs));
99 
100 #define FMA3_AVX2_VECTOR_GROUP(Name) \
101  FMA3RM(Name##132PSr, Name##213PSr, Name##231PSr, \
102  Name##132PSm, Name##213PSm, Name##231PSm); \
103  FMA3RM(Name##132PDr, Name##213PDr, Name##231PDr, \
104  Name##132PDm, Name##213PDm, Name##231PDm); \
105  FMA3RM(Name##132PSYr, Name##213PSYr, Name##231PSYr, \
106  Name##132PSYm, Name##213PSYm, Name##231PSYm); \
107  FMA3RM(Name##132PDYr, Name##213PDYr, Name##231PDYr, \
108  Name##132PDYm, Name##213PDYm, Name##231PDYm);
109 
110 #define FMA3_AVX2_SCALAR_GROUP(Name) \
111  FMA3RM(Name##132SSr, Name##213SSr, Name##231SSr, \
112  Name##132SSm, Name##213SSm, Name##231SSm); \
113  FMA3RM(Name##132SDr, Name##213SDr, Name##231SDr, \
114  Name##132SDm, Name##213SDm, Name##231SDm); \
115  FMA3RMA(Name##132SSr_Int, Name##213SSr_Int, Name##231SSr_Int, \
116  Name##132SSm_Int, Name##213SSm_Int, Name##231SSm_Int, \
117  X86InstrFMA3Group::X86FMA3Intrinsic); \
118  FMA3RMA(Name##132SDr_Int, Name##213SDr_Int, Name##231SDr_Int, \
119  Name##132SDm_Int, Name##213SDm_Int, Name##231SDm_Int, \
120  X86InstrFMA3Group::X86FMA3Intrinsic);
121 
122 #define FMA3_AVX2_FULL_GROUP(Name) \
123  FMA3_AVX2_VECTOR_GROUP(Name); \
124  FMA3_AVX2_SCALAR_GROUP(Name);
125 
126 #define FMA3_AVX512_VECTOR_GROUP(Name) \
127  FMA3RM(Name##132PSZ128r, Name##213PSZ128r, Name##231PSZ128r, \
128  Name##132PSZ128m, Name##213PSZ128m, Name##231PSZ128m); \
129  FMA3RM(Name##132PDZ128r, Name##213PDZ128r, Name##231PDZ128r, \
130  Name##132PDZ128m, Name##213PDZ128m, Name##231PDZ128m); \
131  FMA3RM(Name##132PSZ256r, Name##213PSZ256r, Name##231PSZ256r, \
132  Name##132PSZ256m, Name##213PSZ256m, Name##231PSZ256m); \
133  FMA3RM(Name##132PDZ256r, Name##213PDZ256r, Name##231PDZ256r, \
134  Name##132PDZ256m, Name##213PDZ256m, Name##231PDZ256m); \
135  FMA3RM(Name##132PSZr, Name##213PSZr, Name##231PSZr, \
136  Name##132PSZm, Name##213PSZm, Name##231PSZm); \
137  FMA3RM(Name##132PDZr, Name##213PDZr, Name##231PDZr, \
138  Name##132PDZm, Name##213PDZm, Name##231PDZm); \
139  FMA3RMA(Name##132PSZ128rk, Name##213PSZ128rk, Name##231PSZ128rk, \
140  Name##132PSZ128mk, Name##213PSZ128mk, Name##231PSZ128mk, \
141  X86InstrFMA3Group::X86FMA3KMergeMasked); \
142  FMA3RMA(Name##132PDZ128rk, Name##213PDZ128rk, Name##231PDZ128rk, \
143  Name##132PDZ128mk, Name##213PDZ128mk, Name##231PDZ128mk, \
144  X86InstrFMA3Group::X86FMA3KMergeMasked); \
145  FMA3RMA(Name##132PSZ256rk, Name##213PSZ256rk, Name##231PSZ256rk, \
146  Name##132PSZ256mk, Name##213PSZ256mk, Name##231PSZ256mk, \
147  X86InstrFMA3Group::X86FMA3KMergeMasked); \
148  FMA3RMA(Name##132PDZ256rk, Name##213PDZ256rk, Name##231PDZ256rk, \
149  Name##132PDZ256mk, Name##213PDZ256mk, Name##231PDZ256mk, \
150  X86InstrFMA3Group::X86FMA3KMergeMasked); \
151  FMA3RMA(Name##132PSZrk, Name##213PSZrk, Name##231PSZrk, \
152  Name##132PSZmk, Name##213PSZmk, Name##231PSZmk, \
153  X86InstrFMA3Group::X86FMA3KMergeMasked); \
154  FMA3RMA(Name##132PDZrk, Name##213PDZrk, Name##231PDZrk, \
155  Name##132PDZmk, Name##213PDZmk, Name##231PDZmk, \
156  X86InstrFMA3Group::X86FMA3KMergeMasked); \
157  FMA3RMA(Name##132PSZ128rkz, Name##213PSZ128rkz, Name##231PSZ128rkz, \
158  Name##132PSZ128mkz, Name##213PSZ128mkz, Name##231PSZ128mkz, \
159  X86InstrFMA3Group::X86FMA3KZeroMasked); \
160  FMA3RMA(Name##132PDZ128rkz, Name##213PDZ128rkz, Name##231PDZ128rkz, \
161  Name##132PDZ128mkz, Name##213PDZ128mkz, Name##231PDZ128mkz, \
162  X86InstrFMA3Group::X86FMA3KZeroMasked); \
163  FMA3RMA(Name##132PSZ256rkz, Name##213PSZ256rkz, Name##231PSZ256rkz, \
164  Name##132PSZ256mkz, Name##213PSZ256mkz, Name##231PSZ256mkz, \
165  X86InstrFMA3Group::X86FMA3KZeroMasked); \
166  FMA3RMA(Name##132PDZ256rkz, Name##213PDZ256rkz, Name##231PDZ256rkz, \
167  Name##132PDZ256mkz, Name##213PDZ256mkz, Name##231PDZ256mkz, \
168  X86InstrFMA3Group::X86FMA3KZeroMasked); \
169  FMA3RMA(Name##132PSZrkz, Name##213PSZrkz, Name##231PSZrkz, \
170  Name##132PSZmkz, Name##213PSZmkz, Name##231PSZmkz, \
171  X86InstrFMA3Group::X86FMA3KZeroMasked); \
172  FMA3RMA(Name##132PDZrkz, Name##213PDZrkz, Name##231PDZrkz, \
173  Name##132PDZmkz, Name##213PDZmkz, Name##231PDZmkz, \
174  X86InstrFMA3Group::X86FMA3KZeroMasked); \
175  FMA3R(Name##132PSZrb, Name##213PSZrb, Name##231PSZrb); \
176  FMA3R(Name##132PDZrb, Name##213PDZrb, Name##231PDZrb); \
177  FMA3RA(Name##132PSZrbk, Name##213PSZrbk, Name##231PSZrbk, \
178  X86InstrFMA3Group::X86FMA3KMergeMasked); \
179  FMA3RA(Name##132PDZrbk, Name##213PDZrbk, Name##231PDZrbk, \
180  X86InstrFMA3Group::X86FMA3KMergeMasked); \
181  FMA3RA(Name##132PSZrbkz, Name##213PSZrbkz, Name##231PSZrbkz, \
182  X86InstrFMA3Group::X86FMA3KZeroMasked); \
183  FMA3RA(Name##132PDZrbkz, Name##213PDZrbkz, Name##231PDZrbkz, \
184  X86InstrFMA3Group::X86FMA3KZeroMasked); \
185  FMA3M(Name##132PSZ128mb, Name##213PSZ128mb, Name##231PSZ128mb); \
186  FMA3M(Name##132PDZ128mb, Name##213PDZ128mb, Name##231PDZ128mb); \
187  FMA3M(Name##132PSZ256mb, Name##213PSZ256mb, Name##231PSZ256mb); \
188  FMA3M(Name##132PDZ256mb, Name##213PDZ256mb, Name##231PDZ256mb); \
189  FMA3M(Name##132PSZmb, Name##213PSZmb, Name##231PSZmb); \
190  FMA3M(Name##132PDZmb, Name##213PDZmb, Name##231PDZmb); \
191  FMA3MA(Name##132PSZ128mbk, Name##213PSZ128mbk, Name##231PSZ128mbk, \
192  X86InstrFMA3Group::X86FMA3KMergeMasked); \
193  FMA3MA(Name##132PDZ128mbk, Name##213PDZ128mbk, Name##231PDZ128mbk, \
194  X86InstrFMA3Group::X86FMA3KMergeMasked); \
195  FMA3MA(Name##132PSZ256mbk, Name##213PSZ256mbk, Name##231PSZ256mbk, \
196  X86InstrFMA3Group::X86FMA3KMergeMasked); \
197  FMA3MA(Name##132PDZ256mbk, Name##213PDZ256mbk, Name##231PDZ256mbk, \
198  X86InstrFMA3Group::X86FMA3KMergeMasked); \
199  FMA3MA(Name##132PSZmbk, Name##213PSZmbk, Name##231PSZmbk, \
200  X86InstrFMA3Group::X86FMA3KMergeMasked); \
201  FMA3MA(Name##132PDZmbk, Name##213PDZmbk, Name##231PDZmbk, \
202  X86InstrFMA3Group::X86FMA3KMergeMasked); \
203  FMA3MA(Name##132PSZ128mbkz, Name##213PSZ128mbkz, Name##231PSZ128mbkz, \
204  X86InstrFMA3Group::X86FMA3KZeroMasked); \
205  FMA3MA(Name##132PDZ128mbkz, Name##213PDZ128mbkz, Name##231PDZ128mbkz, \
206  X86InstrFMA3Group::X86FMA3KZeroMasked); \
207  FMA3MA(Name##132PSZ256mbkz, Name##213PSZ256mbkz, Name##231PSZ256mbkz, \
208  X86InstrFMA3Group::X86FMA3KZeroMasked); \
209  FMA3MA(Name##132PDZ256mbkz, Name##213PDZ256mbkz, Name##231PDZ256mbkz, \
210  X86InstrFMA3Group::X86FMA3KZeroMasked); \
211  FMA3MA(Name##132PSZmbkz, Name##213PSZmbkz, Name##231PSZmbkz, \
212  X86InstrFMA3Group::X86FMA3KZeroMasked); \
213  FMA3MA(Name##132PDZmbkz, Name##213PDZmbkz, Name##231PDZmbkz, \
214  X86InstrFMA3Group::X86FMA3KZeroMasked);
215 
216 #define FMA3_AVX512_SCALAR_GROUP(Name) \
217  FMA3RM(Name##132SSZr, Name##213SSZr, Name##231SSZr, \
218  Name##132SSZm, Name##213SSZm, Name##231SSZm); \
219  FMA3RM(Name##132SDZr, Name##213SDZr, Name##231SDZr, \
220  Name##132SDZm, Name##213SDZm, Name##231SDZm); \
221  FMA3RMA(Name##132SSZr_Int, Name##213SSZr_Int, Name##231SSZr_Int, \
222  Name##132SSZm_Int, Name##213SSZm_Int, Name##231SSZm_Int, \
223  X86InstrFMA3Group::X86FMA3Intrinsic); \
224  FMA3RMA(Name##132SDZr_Int, Name##213SDZr_Int, Name##231SDZr_Int, \
225  Name##132SDZm_Int, Name##213SDZm_Int, Name##231SDZm_Int, \
226  X86InstrFMA3Group::X86FMA3Intrinsic); \
227  FMA3RMA(Name##132SSZr_Intk, Name##213SSZr_Intk, Name##231SSZr_Intk, \
228  Name##132SSZm_Intk, Name##213SSZm_Intk, Name##231SSZm_Intk, \
229  X86InstrFMA3Group::X86FMA3Intrinsic | \
230  X86InstrFMA3Group::X86FMA3KMergeMasked); \
231  FMA3RMA(Name##132SDZr_Intk, Name##213SDZr_Intk, Name##231SDZr_Intk, \
232  Name##132SDZm_Intk, Name##213SDZm_Intk, Name##231SDZm_Intk, \
233  X86InstrFMA3Group::X86FMA3Intrinsic | \
234  X86InstrFMA3Group::X86FMA3KMergeMasked); \
235  FMA3RMA(Name##132SSZr_Intkz, Name##213SSZr_Intkz, Name##231SSZr_Intkz, \
236  Name##132SSZm_Intkz, Name##213SSZm_Intkz, Name##231SSZm_Intkz, \
237  X86InstrFMA3Group::X86FMA3Intrinsic | \
238  X86InstrFMA3Group::X86FMA3KZeroMasked); \
239  FMA3RMA(Name##132SDZr_Intkz, Name##213SDZr_Intkz, Name##231SDZr_Intkz, \
240  Name##132SDZm_Intkz, Name##213SDZm_Intkz, Name##231SDZm_Intkz, \
241  X86InstrFMA3Group::X86FMA3Intrinsic | \
242  X86InstrFMA3Group::X86FMA3KZeroMasked); \
243  FMA3RA(Name##132SSZrb_Int, Name##213SSZrb_Int, Name##231SSZrb_Int, \
244  X86InstrFMA3Group::X86FMA3Intrinsic); \
245  FMA3RA(Name##132SDZrb_Int, Name##213SDZrb_Int, Name##231SDZrb_Int, \
246  X86InstrFMA3Group::X86FMA3Intrinsic); \
247  FMA3RA(Name##132SSZrb_Intk, Name##213SSZrb_Intk, Name##231SSZrb_Intk, \
248  X86InstrFMA3Group::X86FMA3Intrinsic | \
249  X86InstrFMA3Group::X86FMA3KMergeMasked); \
250  FMA3RA(Name##132SDZrb_Intk, Name##213SDZrb_Intk, Name##231SDZrb_Intk, \
251  X86InstrFMA3Group::X86FMA3Intrinsic | \
252  X86InstrFMA3Group::X86FMA3KMergeMasked); \
253  FMA3RA(Name##132SSZrb_Intkz, Name##213SSZrb_Intkz, Name##231SSZrb_Intkz, \
254  X86InstrFMA3Group::X86FMA3Intrinsic | \
255  X86InstrFMA3Group::X86FMA3KZeroMasked); \
256  FMA3RA(Name##132SDZrb_Intkz, Name##213SDZrb_Intkz, Name##231SDZrb_Intkz, \
257  X86InstrFMA3Group::X86FMA3Intrinsic | \
258  X86InstrFMA3Group::X86FMA3KZeroMasked);
259 
260 #define FMA3_AVX512_FULL_GROUP(Name) \
261  FMA3_AVX512_VECTOR_GROUP(Name); \
262  FMA3_AVX512_SCALAR_GROUP(Name);
263 
264 void X86InstrFMA3Info::initGroupsOnceImpl() {
265  FMA3_AVX2_FULL_GROUP(VFMADD);
266  FMA3_AVX2_FULL_GROUP(VFMSUB);
267  FMA3_AVX2_FULL_GROUP(VFNMADD);
268  FMA3_AVX2_FULL_GROUP(VFNMSUB);
269 
270  FMA3_AVX2_VECTOR_GROUP(VFMADDSUB);
271  FMA3_AVX2_VECTOR_GROUP(VFMSUBADD);
272 
273  FMA3_AVX512_FULL_GROUP(VFMADD);
274  FMA3_AVX512_FULL_GROUP(VFMSUB);
275  FMA3_AVX512_FULL_GROUP(VFNMADD);
276  FMA3_AVX512_FULL_GROUP(VFNMSUB);
277 
278  FMA3_AVX512_VECTOR_GROUP(VFMADDSUB);
279  FMA3_AVX512_VECTOR_GROUP(VFMSUBADD);
280 }
281 
282 void X86InstrFMA3Info::initGroupsOnce() {
283  llvm::call_once(InitGroupsOnceFlag,
284  []() { getX86InstrFMA3Info()->initGroupsOnceImpl(); });
285 }
This class provides information about all existing FMA3 opcodes.
#define FMA3_AVX512_VECTOR_GROUP(Name)
void call_once(once_flag &flag, Function &&F, Args &&...ArgList)
Execute the function specified as a parameter once.
Definition: Threading.h:91
#define FMA3_AVX512_FULL_GROUP(Name)
This class is used to group {132, 213, 231} forms of FMA opcodes together.
LLVM_DEFINE_ONCE_FLAG(InitGroupsOnceFlag)
This flag is used in the method llvm::call_once() used below to make the initialization of the map 'O...
#define FMA3_AVX2_FULL_GROUP(Name)
const DataFlowGraph & G
Definition: RDFGraph.cpp:206
static ManagedStatic< X86InstrFMA3Info > X86InstrFMA3InfoObj
static X86InstrFMA3Info * getX86InstrFMA3Info()
Returns the reference to an object of this class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define FMA3_AVX2_VECTOR_GROUP(Name)
ManagedStatic - This transparently changes the behavior of global statics to be lazily constructed on...
Definition: ManagedStatic.h:63