Line data Source code
1 : //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file implements the auto-upgrade helper functions.
11 : // This is where deprecated IR intrinsics and other IR features are updated to
12 : // current specifications.
13 : //
14 : //===----------------------------------------------------------------------===//
15 :
16 : #include "llvm/IR/AutoUpgrade.h"
17 : #include "llvm/ADT/StringSwitch.h"
18 : #include "llvm/IR/Constants.h"
19 : #include "llvm/IR/DIBuilder.h"
20 : #include "llvm/IR/DebugInfo.h"
21 : #include "llvm/IR/DiagnosticInfo.h"
22 : #include "llvm/IR/Function.h"
23 : #include "llvm/IR/IRBuilder.h"
24 : #include "llvm/IR/Instruction.h"
25 : #include "llvm/IR/IntrinsicInst.h"
26 : #include "llvm/IR/LLVMContext.h"
27 : #include "llvm/IR/Module.h"
28 : #include "llvm/IR/Verifier.h"
29 : #include "llvm/Support/ErrorHandling.h"
30 : #include "llvm/Support/Regex.h"
31 : #include <cstring>
32 : using namespace llvm;
33 :
34 600 : static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
35 :
36 : // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
37 : // changed their type from v4f32 to v2i64.
38 211 : static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
39 : Function *&NewFn) {
40 : // Check whether this is an old version of the function, which received
41 : // v4f32 arguments.
42 211 : Type *Arg0Type = F->getFunctionType()->getParamType(0);
43 211 : if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
44 : return false;
45 :
46 : // Yes, it's old, replace it with new version.
47 8 : rename(F);
48 8 : NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
49 8 : return true;
50 : }
51 :
52 : // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
53 : // arguments have changed their type from i32 to i8.
54 245 : static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
55 : Function *&NewFn) {
56 : // Check that the last argument is an i32.
57 245 : Type *LastArgType = F->getFunctionType()->getParamType(
58 245 : F->getFunctionType()->getNumParams() - 1);
59 245 : if (!LastArgType->isIntegerTy(32))
60 : return false;
61 :
62 : // Move this function aside and map down.
63 41 : rename(F);
64 41 : NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
65 41 : return true;
66 : }
67 :
68 64 : static bool UpgradeADCSBBIntrinsic(Function *F, Intrinsic::ID IID,
69 : Function *&NewFn) {
70 : // If this intrinsic has 3 operands, it's the new version.
71 64 : if (F->getFunctionType()->getNumParams() == 3)
72 : return false;
73 :
74 24 : rename(F);
75 24 : NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
76 24 : return true;
77 : }
78 :
79 16379 : static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
80 : // All of the intrinsics matches below should be marked with which llvm
81 : // version started autoupgrading them. At some point in the future we would
82 : // like to use this information to remove upgrade code for some older
83 : // intrinsics. It is currently undecided how we will determine that future
84 : // point.
85 : if (Name.startswith("sse2.paddus.") || // Added in 8.0
86 : Name.startswith("sse2.psubus.") || // Added in 8.0
87 : Name.startswith("avx2.paddus.") || // Added in 8.0
88 : Name.startswith("avx2.psubus.") || // Added in 8.0
89 : Name.startswith("avx512.mask.paddus.") || // Added in 8.0
90 : Name.startswith("avx512.mask.psubus.") || // Added in 8.0
91 : Name=="ssse3.pabs.b.128" || // Added in 6.0
92 : Name=="ssse3.pabs.w.128" || // Added in 6.0
93 : Name=="ssse3.pabs.d.128" || // Added in 6.0
94 : Name.startswith("fma4.vfmadd.s") || // Added in 7.0
95 : Name.startswith("fma.vfmadd.") || // Added in 7.0
96 : Name.startswith("fma.vfmsub.") || // Added in 7.0
97 : Name.startswith("fma.vfmaddsub.") || // Added in 7.0
98 : Name.startswith("fma.vfmsubadd.") || // Added in 7.0
99 : Name.startswith("fma.vfnmadd.") || // Added in 7.0
100 : Name.startswith("fma.vfnmsub.") || // Added in 7.0
101 : Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
102 : Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
103 : Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
104 : Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
105 : Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
106 : Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
107 : Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
108 : Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
109 : Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
110 : Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
111 : Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
112 : Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
113 : Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
114 : Name.startswith("avx512.kunpck") || //added in 6.0
115 : Name.startswith("avx2.pabs.") || // Added in 6.0
116 : Name.startswith("avx512.mask.pabs.") || // Added in 6.0
117 : Name.startswith("avx512.broadcastm") || // Added in 6.0
118 : Name == "sse.sqrt.ss" || // Added in 7.0
119 : Name == "sse2.sqrt.sd" || // Added in 7.0
120 : Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
121 : Name.startswith("avx.sqrt.p") || // Added in 7.0
122 : Name.startswith("sse2.sqrt.p") || // Added in 7.0
123 : Name.startswith("sse.sqrt.p") || // Added in 7.0
124 : Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
125 : Name.startswith("sse2.pcmpeq.") || // Added in 3.1
126 : Name.startswith("sse2.pcmpgt.") || // Added in 3.1
127 : Name.startswith("avx2.pcmpeq.") || // Added in 3.1
128 : Name.startswith("avx2.pcmpgt.") || // Added in 3.1
129 : Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
130 : Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
131 : Name.startswith("avx.vperm2f128.") || // Added in 6.0
132 : Name == "avx2.vperm2i128" || // Added in 6.0
133 : Name == "sse.add.ss" || // Added in 4.0
134 : Name == "sse2.add.sd" || // Added in 4.0
135 : Name == "sse.sub.ss" || // Added in 4.0
136 : Name == "sse2.sub.sd" || // Added in 4.0
137 : Name == "sse.mul.ss" || // Added in 4.0
138 : Name == "sse2.mul.sd" || // Added in 4.0
139 : Name == "sse.div.ss" || // Added in 4.0
140 : Name == "sse2.div.sd" || // Added in 4.0
141 : Name == "sse41.pmaxsb" || // Added in 3.9
142 : Name == "sse2.pmaxs.w" || // Added in 3.9
143 : Name == "sse41.pmaxsd" || // Added in 3.9
144 : Name == "sse2.pmaxu.b" || // Added in 3.9
145 : Name == "sse41.pmaxuw" || // Added in 3.9
146 : Name == "sse41.pmaxud" || // Added in 3.9
147 : Name == "sse41.pminsb" || // Added in 3.9
148 : Name == "sse2.pmins.w" || // Added in 3.9
149 : Name == "sse41.pminsd" || // Added in 3.9
150 : Name == "sse2.pminu.b" || // Added in 3.9
151 : Name == "sse41.pminuw" || // Added in 3.9
152 : Name == "sse41.pminud" || // Added in 3.9
153 : Name == "avx512.kand.w" || // Added in 7.0
154 : Name == "avx512.kandn.w" || // Added in 7.0
155 : Name == "avx512.knot.w" || // Added in 7.0
156 : Name == "avx512.kor.w" || // Added in 7.0
157 : Name == "avx512.kxor.w" || // Added in 7.0
158 : Name == "avx512.kxnor.w" || // Added in 7.0
159 : Name == "avx512.kortestc.w" || // Added in 7.0
160 : Name == "avx512.kortestz.w" || // Added in 7.0
161 : Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
162 : Name.startswith("avx2.pmax") || // Added in 3.9
163 : Name.startswith("avx2.pmin") || // Added in 3.9
164 : Name.startswith("avx512.mask.pmax") || // Added in 4.0
165 : Name.startswith("avx512.mask.pmin") || // Added in 4.0
166 : Name.startswith("avx2.vbroadcast") || // Added in 3.8
167 : Name.startswith("avx2.pbroadcast") || // Added in 3.8
168 : Name.startswith("avx.vpermil.") || // Added in 3.1
169 : Name.startswith("sse2.pshuf") || // Added in 3.9
170 : Name.startswith("avx512.pbroadcast") || // Added in 3.9
171 : Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
172 : Name.startswith("avx512.mask.movddup") || // Added in 3.9
173 : Name.startswith("avx512.mask.movshdup") || // Added in 3.9
174 : Name.startswith("avx512.mask.movsldup") || // Added in 3.9
175 : Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
176 : Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
177 : Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
178 : Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
179 : Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
180 : Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
181 : Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
182 : Name.startswith("avx512.mask.punpckl") || // Added in 3.9
183 : Name.startswith("avx512.mask.punpckh") || // Added in 3.9
184 : Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
185 : Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
186 : Name.startswith("avx512.mask.pand.") || // Added in 3.9
187 : Name.startswith("avx512.mask.pandn.") || // Added in 3.9
188 : Name.startswith("avx512.mask.por.") || // Added in 3.9
189 : Name.startswith("avx512.mask.pxor.") || // Added in 3.9
190 : Name.startswith("avx512.mask.and.") || // Added in 3.9
191 : Name.startswith("avx512.mask.andn.") || // Added in 3.9
192 : Name.startswith("avx512.mask.or.") || // Added in 3.9
193 : Name.startswith("avx512.mask.xor.") || // Added in 3.9
194 : Name.startswith("avx512.mask.padd.") || // Added in 4.0
195 : Name.startswith("avx512.mask.psub.") || // Added in 4.0
196 : Name.startswith("avx512.mask.pmull.") || // Added in 4.0
197 : Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
198 : Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
199 : Name == "avx512.mask.cvtudq2ps.128" || // Added in 7.0
200 : Name == "avx512.mask.cvtudq2ps.256" || // Added in 7.0
201 : Name == "avx512.mask.cvtqq2pd.128" || // Added in 7.0
202 : Name == "avx512.mask.cvtqq2pd.256" || // Added in 7.0
203 : Name == "avx512.mask.cvtuqq2pd.128" || // Added in 7.0
204 : Name == "avx512.mask.cvtuqq2pd.256" || // Added in 7.0
205 : Name == "avx512.mask.cvtdq2ps.128" || // Added in 7.0
206 : Name == "avx512.mask.cvtdq2ps.256" || // Added in 7.0
207 : Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
208 : Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
209 : Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
210 : Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
211 : Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
212 : Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
213 : Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
214 : Name == "avx512.cvtusi2sd" || // Added in 7.0
215 : Name.startswith("avx512.mask.permvar.") || // Added in 7.0
216 : Name.startswith("avx512.mask.permvar.") || // Added in 7.0
217 : Name == "sse2.pmulu.dq" || // Added in 7.0
218 : Name == "sse41.pmuldq" || // Added in 7.0
219 : Name == "avx2.pmulu.dq" || // Added in 7.0
220 : Name == "avx2.pmul.dq" || // Added in 7.0
221 : Name == "avx512.pmulu.dq.512" || // Added in 7.0
222 : Name == "avx512.pmul.dq.512" || // Added in 7.0
223 : Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
224 : Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
225 : Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
226 : Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
227 : Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
228 : Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
229 : Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
230 : Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
231 : Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
232 : Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
233 : Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
234 : Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
235 : Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
236 : Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
237 : Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
238 : Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
239 : Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
240 : Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
241 : Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
242 : Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
243 : Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
244 : Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
245 : Name.startswith("avx512.mask.psll.d") || // Added in 4.0
246 : Name.startswith("avx512.mask.psll.q") || // Added in 4.0
247 : Name.startswith("avx512.mask.psll.w") || // Added in 4.0
248 : Name.startswith("avx512.mask.psra.d") || // Added in 4.0
249 : Name.startswith("avx512.mask.psra.q") || // Added in 4.0
250 : Name.startswith("avx512.mask.psra.w") || // Added in 4.0
251 : Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
252 : Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
253 : Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
254 : Name.startswith("avx512.mask.pslli") || // Added in 4.0
255 : Name.startswith("avx512.mask.psrai") || // Added in 4.0
256 : Name.startswith("avx512.mask.psrli") || // Added in 4.0
257 : Name.startswith("avx512.mask.psllv") || // Added in 4.0
258 : Name.startswith("avx512.mask.psrav") || // Added in 4.0
259 : Name.startswith("avx512.mask.psrlv") || // Added in 4.0
260 : Name.startswith("sse41.pmovsx") || // Added in 3.8
261 : Name.startswith("sse41.pmovzx") || // Added in 3.9
262 : Name.startswith("avx2.pmovsx") || // Added in 3.9
263 : Name.startswith("avx2.pmovzx") || // Added in 3.9
264 : Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
265 : Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
266 : Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
267 : Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
268 : Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
269 : Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
270 : Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
271 : Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
272 : Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
273 : Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
274 : Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
275 : Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
276 : Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
277 : Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
278 : Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
279 : Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
280 : Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
281 : Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
282 : Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
283 : Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
284 : Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
285 : Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
286 : Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
287 : Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
288 : Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
289 : Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
290 : Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
291 : Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
292 : Name.startswith("avx512.mask.prorv.") || // Added in 7.0
293 : Name.startswith("avx512.mask.pror.") || // Added in 7.0
294 : Name.startswith("avx512.mask.prolv.") || // Added in 7.0
295 : Name.startswith("avx512.mask.prol.") || // Added in 7.0
296 : Name.startswith("avx512.mask.padds.") || // Added in 8.0
297 : Name.startswith("avx512.mask.psubs.") || // Added in 8.0
298 : Name == "sse.cvtsi2ss" || // Added in 7.0
299 : Name == "sse.cvtsi642ss" || // Added in 7.0
300 : Name == "sse2.cvtsi2sd" || // Added in 7.0
301 : Name == "sse2.cvtsi642sd" || // Added in 7.0
302 : Name == "sse2.cvtss2sd" || // Added in 7.0
303 : Name == "sse2.cvtdq2pd" || // Added in 3.9
304 : Name == "sse2.cvtdq2ps" || // Added in 7.0
305 : Name == "sse2.cvtps2pd" || // Added in 3.9
306 : Name == "avx.cvtdq2.pd.256" || // Added in 3.9
307 : Name == "avx.cvtdq2.ps.256" || // Added in 7.0
308 : Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
309 : Name.startswith("avx.vinsertf128.") || // Added in 3.7
310 : Name == "avx2.vinserti128" || // Added in 3.7
311 : Name.startswith("avx512.mask.insert") || // Added in 4.0
312 : Name.startswith("avx.vextractf128.") || // Added in 3.7
313 : Name == "avx2.vextracti128" || // Added in 3.7
314 : Name.startswith("avx512.mask.vextract") || // Added in 4.0
315 : Name.startswith("sse4a.movnt.") || // Added in 3.9
316 : Name.startswith("avx.movnt.") || // Added in 3.2
317 : Name.startswith("avx512.storent.") || // Added in 3.9
318 : Name == "sse41.movntdqa" || // Added in 5.0
319 : Name == "avx2.movntdqa" || // Added in 5.0
320 : Name == "avx512.movntdqa" || // Added in 5.0
321 : Name == "sse2.storel.dq" || // Added in 3.9
322 : Name.startswith("sse.storeu.") || // Added in 3.9
323 : Name.startswith("sse2.storeu.") || // Added in 3.9
324 : Name.startswith("avx.storeu.") || // Added in 3.9
325 : Name.startswith("avx512.mask.storeu.") || // Added in 3.9
326 : Name.startswith("avx512.mask.store.p") || // Added in 3.9
327 : Name.startswith("avx512.mask.store.b.") || // Added in 3.9
328 : Name.startswith("avx512.mask.store.w.") || // Added in 3.9
329 : Name.startswith("avx512.mask.store.d.") || // Added in 3.9
330 : Name.startswith("avx512.mask.store.q.") || // Added in 3.9
331 : Name == "avx512.mask.store.ss" || // Added in 7.0
332 : Name.startswith("avx512.mask.loadu.") || // Added in 3.9
333 : Name.startswith("avx512.mask.load.") || // Added in 3.9
334 : Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
335 : Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
336 : Name == "sse42.crc32.64.8" || // Added in 3.4
337 : Name.startswith("avx.vbroadcast.s") || // Added in 3.5
338 : Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
339 : Name.startswith("avx512.mask.palignr.") || // Added in 3.9
340 : Name.startswith("avx512.mask.valign.") || // Added in 4.0
341 : Name.startswith("sse2.psll.dq") || // Added in 3.7
342 : Name.startswith("sse2.psrl.dq") || // Added in 3.7
343 : Name.startswith("avx2.psll.dq") || // Added in 3.7
344 : Name.startswith("avx2.psrl.dq") || // Added in 3.7
345 : Name.startswith("avx512.psll.dq") || // Added in 3.9
346 : Name.startswith("avx512.psrl.dq") || // Added in 3.9
347 : Name == "sse41.pblendw" || // Added in 3.7
348 : Name.startswith("sse41.blendp") || // Added in 3.7
349 : Name.startswith("avx.blend.p") || // Added in 3.7
350 : Name == "avx2.pblendw" || // Added in 3.7
351 : Name.startswith("avx2.pblendd.") || // Added in 3.7
352 : Name.startswith("avx.vbroadcastf128") || // Added in 4.0
353 : Name == "avx2.vbroadcasti128" || // Added in 3.7
354 : Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
355 : Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
356 : Name == "xop.vpcmov" || // Added in 3.8
357 : Name == "xop.vpcmov.256" || // Added in 5.0
358 : Name.startswith("avx512.mask.move.s") || // Added in 4.0
359 : Name.startswith("avx512.cvtmask2") || // Added in 5.0
360 192 : (Name.startswith("xop.vpcom") && // Added in 3.2
361 192 : F->arg_size() == 2) ||
362 : Name.startswith("avx512.ptestm") || //Added in 6.0
363 : Name.startswith("avx512.ptestnm") || //Added in 6.0
364 : Name.startswith("sse2.pavg") || // Added in 6.0
365 : Name.startswith("avx2.pavg") || // Added in 6.0
366 : Name.startswith("avx512.mask.pavg")) // Added in 6.0
367 4474 : return true;
368 :
369 : return false;
370 : }
371 :
372 16455 : static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
373 : Function *&NewFn) {
374 : // Only handle intrinsics that start with "x86.".
375 : if (!Name.startswith("x86."))
376 : return false;
377 : // Remove "x86." prefix.
378 16379 : Name = Name.substr(4);
379 :
380 16379 : if (ShouldUpgradeX86Intrinsic(F, Name)) {
381 4474 : NewFn = nullptr;
382 4474 : return true;
383 : }
384 :
385 : if (Name == "addcarryx.u32")
386 12 : return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_addcarryx_u32, NewFn);
387 : if (Name == "addcarryx.u64")
388 12 : return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_addcarryx_u64, NewFn);
389 : if (Name == "addcarry.u32")
390 10 : return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_addcarry_u32, NewFn);
391 : if (Name == "addcarry.u64")
392 10 : return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_addcarry_u64, NewFn);
393 : if (Name == "subborrow.u32")
394 10 : return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_subborrow_u32, NewFn);
395 : if (Name == "subborrow.u64")
396 10 : return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_subborrow_u64, NewFn);
397 :
398 : if (Name == "rdtscp") {
399 : // If this intrinsic has 0 operands, it's the new version.
400 4 : if (F->getFunctionType()->getNumParams() == 0)
401 : return false;
402 :
403 2 : rename(F);
404 2 : NewFn = Intrinsic::getDeclaration(F->getParent(),
405 : Intrinsic::x86_rdtscp);
406 2 : return true;
407 : }
408 :
409 : // SSE4.1 ptest functions may have an old signature.
410 : if (Name.startswith("sse41.ptest")) { // Added in 3.2
411 : if (Name.substr(11) == "c")
412 84 : return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
413 : if (Name.substr(11) == "z")
414 65 : return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
415 : if (Name.substr(11) == "nzc")
416 62 : return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
417 : }
418 : // Several blend and other instructions with masks used the wrong number of
419 : // bits.
420 : if (Name == "sse41.insertps") // Added in 3.6
421 68 : return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
422 68 : NewFn);
423 : if (Name == "sse41.dppd") // Added in 3.6
424 44 : return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
425 44 : NewFn);
426 : if (Name == "sse41.dpps") // Added in 3.6
427 44 : return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
428 44 : NewFn);
429 : if (Name == "sse41.mpsadbw") // Added in 3.6
430 44 : return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
431 44 : NewFn);
432 : if (Name == "avx.dp.ps.256") // Added in 3.6
433 24 : return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
434 24 : NewFn);
435 : if (Name == "avx2.mpsadbw") // Added in 3.6
436 21 : return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
437 21 : NewFn);
438 :
439 : // frcz.ss/sd may need to have an argument dropped. Added in 3.2
440 5 : if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
441 0 : rename(F);
442 0 : NewFn = Intrinsic::getDeclaration(F->getParent(),
443 : Intrinsic::x86_xop_vfrcz_ss);
444 0 : return true;
445 : }
446 5 : if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
447 0 : rename(F);
448 0 : NewFn = Intrinsic::getDeclaration(F->getParent(),
449 : Intrinsic::x86_xop_vfrcz_sd);
450 0 : return true;
451 : }
452 : // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
453 : if (Name.startswith("xop.vpermil2")) { // Added in 3.9
454 48 : auto Idx = F->getFunctionType()->getParamType(2);
455 : if (Idx->isFPOrFPVectorTy()) {
456 4 : rename(F);
457 4 : unsigned IdxSize = Idx->getPrimitiveSizeInBits();
458 4 : unsigned EltSize = Idx->getScalarSizeInBits();
459 : Intrinsic::ID Permil2ID;
460 4 : if (EltSize == 64 && IdxSize == 128)
461 : Permil2ID = Intrinsic::x86_xop_vpermil2pd;
462 3 : else if (EltSize == 32 && IdxSize == 128)
463 : Permil2ID = Intrinsic::x86_xop_vpermil2ps;
464 2 : else if (EltSize == 64 && IdxSize == 256)
465 : Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
466 : else
467 : Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
468 4 : NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
469 4 : return true;
470 : }
471 : }
472 :
473 : return false;
474 : }
475 :
476 382586 : static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
477 : assert(F && "Illegal to upgrade a non-existent Function.");
478 :
479 : // Quickly eliminate it, if it's not a candidate.
480 382586 : StringRef Name = F->getName();
481 382586 : if (Name.size() <= 8 || !Name.startswith("llvm."))
482 : return false;
483 49669 : Name = Name.substr(5); // Strip off "llvm."
484 :
485 49669 : switch (Name[0]) {
486 : default: break;
487 : case 'a': {
488 : if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
489 3 : NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
490 3 : F->arg_begin()->getType());
491 23 : return true;
492 : }
493 : if (Name.startswith("arm.neon.vclz")) {
494 : Type* args[2] = {
495 2 : F->arg_begin()->getType(),
496 2 : Type::getInt1Ty(F->getContext())
497 4 : };
498 : // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
499 : // the end of the name. Change name from llvm.arm.neon.vclz.* to
500 : // llvm.ctlz.*
501 2 : FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
502 4 : NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
503 4 : "llvm.ctlz." + Name.substr(14), F->getParent());
504 : return true;
505 : }
506 : if (Name.startswith("arm.neon.vcnt")) {
507 2 : NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
508 2 : F->arg_begin()->getType());
509 2 : return true;
510 : }
511 7964 : Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
512 7964 : if (vldRegex.match(Name)) {
513 : auto fArgs = F->getFunctionType()->params();
514 : SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
515 : // Can't use Intrinsic::getDeclaration here as the return types might
516 : // then only be structurally equal.
517 7 : FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
518 7 : NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
519 7 : "llvm." + Name + ".p0i8", F->getParent());
520 : return true;
521 : }
522 7957 : Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
523 7957 : if (vstRegex.match(Name)) {
524 : static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
525 : Intrinsic::arm_neon_vst2,
526 : Intrinsic::arm_neon_vst3,
527 : Intrinsic::arm_neon_vst4};
528 :
529 : static const Intrinsic::ID StoreLaneInts[] = {
530 : Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
531 : Intrinsic::arm_neon_vst4lane
532 : };
533 :
534 : auto fArgs = F->getFunctionType()->params();
535 7 : Type *Tys[] = {fArgs[0], fArgs[1]};
536 7 : if (Name.find("lane") == StringRef::npos)
537 4 : NewFn = Intrinsic::getDeclaration(F->getParent(),
538 4 : StoreInts[fArgs.size() - 3], Tys);
539 : else
540 3 : NewFn = Intrinsic::getDeclaration(F->getParent(),
541 3 : StoreLaneInts[fArgs.size() - 5], Tys);
542 : return true;
543 : }
544 : if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
545 2 : NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
546 2 : return true;
547 : }
548 7948 : break;
549 : }
550 :
551 : case 'c': {
552 764 : if (Name.startswith("ctlz.") && F->arg_size() == 1) {
553 39 : rename(F);
554 39 : NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
555 39 : F->arg_begin()->getType());
556 39 : return true;
557 : }
558 578 : if (Name.startswith("cttz.") && F->arg_size() == 1) {
559 25 : rename(F);
560 25 : NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
561 25 : F->arg_begin()->getType());
562 25 : return true;
563 : }
564 : break;
565 : }
566 : case 'd': {
567 553 : if (Name == "dbg.value" && F->arg_size() == 4) {
568 131 : rename(F);
569 131 : NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
570 131 : return true;
571 : }
572 : break;
573 : }
574 : case 'i':
575 : case 'l': {
576 : bool IsLifetimeStart = Name.startswith("lifetime.start");
577 : if (IsLifetimeStart || Name.startswith("invariant.start")) {
578 439 : Intrinsic::ID ID = IsLifetimeStart ?
579 : Intrinsic::lifetime_start : Intrinsic::invariant_start;
580 : auto Args = F->getFunctionType()->params();
581 439 : Type* ObjectPtr[1] = {Args[1]};
582 878 : if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
583 77 : rename(F);
584 77 : NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
585 77 : return true;
586 : }
587 : }
588 :
589 : bool IsLifetimeEnd = Name.startswith("lifetime.end");
590 : if (IsLifetimeEnd || Name.startswith("invariant.end")) {
591 405 : Intrinsic::ID ID = IsLifetimeEnd ?
592 : Intrinsic::lifetime_end : Intrinsic::invariant_end;
593 :
594 : auto Args = F->getFunctionType()->params();
595 453 : Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
596 810 : if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
597 58 : rename(F);
598 58 : NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
599 58 : return true;
600 : }
601 : }
602 : if (Name.startswith("invariant.group.barrier")) {
603 : // Rename invariant.group.barrier to launder.invariant.group
604 : auto Args = F->getFunctionType()->params();
605 3 : Type* ObjectPtr[1] = {Args[0]};
606 3 : rename(F);
607 3 : NewFn = Intrinsic::getDeclaration(F->getParent(),
608 : Intrinsic::launder_invariant_group, ObjectPtr);
609 : return true;
610 :
611 : }
612 :
613 : break;
614 : }
615 : case 'm': {
616 : if (Name.startswith("masked.load.")) {
617 284 : Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
618 466 : if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
619 10 : rename(F);
620 10 : NewFn = Intrinsic::getDeclaration(F->getParent(),
621 : Intrinsic::masked_load,
622 : Tys);
623 10 : return true;
624 : }
625 : }
626 : if (Name.startswith("masked.store.")) {
627 : auto Args = F->getFunctionType()->params();
628 223 : Type *Tys[] = { Args[0], Args[1] };
629 446 : if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
630 14 : rename(F);
631 14 : NewFn = Intrinsic::getDeclaration(F->getParent(),
632 : Intrinsic::masked_store,
633 : Tys);
634 14 : return true;
635 : }
636 : }
637 : // Renaming gather/scatter intrinsics with no address space overloading
638 : // to the new overload which includes an address space
639 : if (Name.startswith("masked.gather.")) {
640 311 : Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
641 524 : if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
642 44 : rename(F);
643 44 : NewFn = Intrinsic::getDeclaration(F->getParent(),
644 : Intrinsic::masked_gather, Tys);
645 44 : return true;
646 : }
647 : }
648 : if (Name.startswith("masked.scatter.")) {
649 : auto Args = F->getFunctionType()->params();
650 150 : Type *Tys[] = {Args[0], Args[1]};
651 300 : if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
652 4 : rename(F);
653 4 : NewFn = Intrinsic::getDeclaration(F->getParent(),
654 : Intrinsic::masked_scatter, Tys);
655 4 : return true;
656 : }
657 : }
658 : // Updating the memory intrinsics (memcpy/memmove/memset) that have an
659 : // alignment parameter to embedding the alignment as an attribute of
660 : // the pointer args.
661 813 : if (Name.startswith("memcpy.") && F->arg_size() == 5) {
662 29 : rename(F);
663 : // Get the types of dest, src, and len
664 29 : ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
665 29 : NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
666 : ParamTypes);
667 : return true;
668 : }
669 139 : if (Name.startswith("memmove.") && F->arg_size() == 5) {
670 5 : rename(F);
671 : // Get the types of dest, src, and len
672 5 : ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
673 5 : NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
674 : ParamTypes);
675 : return true;
676 : }
677 429 : if (Name.startswith("memset.") && F->arg_size() == 5) {
678 46 : rename(F);
679 : // Get the types of dest, and len
680 : const auto *FT = F->getFunctionType();
681 : Type *ParamTypes[2] = {
682 46 : FT->getParamType(0), // Dest
683 : FT->getParamType(2) // len
684 92 : };
685 46 : NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
686 : ParamTypes);
687 : return true;
688 : }
689 : break;
690 : }
691 : case 'n': {
692 : if (Name.startswith("nvvm.")) {
693 547 : Name = Name.substr(5);
694 :
695 : // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
696 539 : Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
697 : .Cases("brev32", "brev64", Intrinsic::bitreverse)
698 : .Case("clz.i", Intrinsic::ctlz)
699 : .Case("popc.i", Intrinsic::ctpop)
700 : .Default(Intrinsic::not_intrinsic);
701 8 : if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
702 16 : NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
703 : {F->getReturnType()});
704 8 : return true;
705 : }
706 :
707 : // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
708 : // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
709 : //
710 : // TODO: We could add lohi.i2d.
711 513 : bool Expand = StringSwitch<bool>(Name)
712 : .Cases("abs.i", "abs.ll", true)
713 : .Cases("clz.ll", "popc.ll", "h2f", true)
714 : .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
715 : .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
716 : .Default(false);
717 26 : if (Expand) {
718 26 : NewFn = nullptr;
719 26 : return true;
720 : }
721 : }
722 : break;
723 : }
724 : case 'o':
725 : // We only need to change the name to match the mangling including the
726 : // address space.
727 : if (Name.startswith("objectsize.")) {
728 123 : Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
729 123 : if (F->arg_size() == 2 ||
730 212 : F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
731 36 : rename(F);
732 36 : NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
733 : Tys);
734 36 : return true;
735 : }
736 : }
737 : break;
738 :
739 : case 's':
740 : if (Name == "stackprotectorcheck") {
741 6 : NewFn = nullptr;
742 6 : return true;
743 : }
744 : break;
745 :
746 16455 : case 'x':
747 16455 : if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
748 : return true;
749 : }
750 : // Remangle our intrinsic since we upgrade the mangling
751 44532 : auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
752 44532 : if (Result != None) {
753 131 : NewFn = Result.getValue();
754 131 : return true;
755 : }
756 :
757 : // This may not belong here. This function is effectively being overloaded
758 : // to both detect an intrinsic which needs upgrading, and to provide the
759 : // upgraded form of the intrinsic. We should perhaps have two separate
760 : // functions for this.
761 : return false;
762 : }
763 :
764 382586 : bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
765 382586 : NewFn = nullptr;
766 382586 : bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
767 : assert(F != NewFn && "Intrinsic function upgraded to the same function");
768 :
769 : // Upgrade intrinsic attributes. This does not change the function.
770 382586 : if (NewFn)
771 : F = NewFn;
772 382586 : if (Intrinsic::ID id = F->getIntrinsicID())
773 44853 : F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
774 382586 : return Upgraded;
775 : }
776 :
777 41275 : bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
778 : // Nothing to do yet.
779 41275 : return false;
780 : }
781 :
782 : // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
783 : // to byte shuffles.
784 30 : static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
785 : Value *Op, unsigned Shift) {
786 30 : Type *ResultTy = Op->getType();
787 30 : unsigned NumElts = ResultTy->getVectorNumElements() * 8;
788 :
789 : // Bitcast from a 64-bit element type to a byte element type.
790 30 : Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
791 30 : Op = Builder.CreateBitCast(Op, VecTy, "cast");
792 :
793 : // We'll be shuffling in zeroes.
794 30 : Value *Res = Constant::getNullValue(VecTy);
795 :
796 : // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
797 : // we'll just return the zero vector.
798 30 : if (Shift < 16) {
799 : uint32_t Idxs[64];
800 : // 256/512-bit version is split into 2/4 16-byte lanes.
801 86 : for (unsigned l = 0; l != NumElts; l += 16)
802 952 : for (unsigned i = 0; i != 16; ++i) {
803 896 : unsigned Idx = NumElts + i - Shift;
804 896 : if (Idx < NumElts)
805 244 : Idx -= NumElts - 16; // end of lane, switch operand.
806 896 : Idxs[l + i] = Idx + l;
807 : }
808 :
809 30 : Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
810 : }
811 :
812 : // Bitcast back to a 64-bit element type.
813 30 : return Builder.CreateBitCast(Res, ResultTy, "cast");
814 : }
815 :
816 : // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
817 : // to byte shuffles.
818 124 : static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
819 : unsigned Shift) {
820 124 : Type *ResultTy = Op->getType();
821 124 : unsigned NumElts = ResultTy->getVectorNumElements() * 8;
822 :
823 : // Bitcast from a 64-bit element type to a byte element type.
824 124 : Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
825 124 : Op = Builder.CreateBitCast(Op, VecTy, "cast");
826 :
827 : // We'll be shuffling in zeroes.
828 124 : Value *Res = Constant::getNullValue(VecTy);
829 :
830 : // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
831 : // we'll just return the zero vector.
832 124 : if (Shift < 16) {
833 : uint32_t Idxs[64];
834 : // 256/512-bit version is split into 2/4 16-byte lanes.
835 274 : for (unsigned l = 0; l != NumElts; l += 16)
836 2550 : for (unsigned i = 0; i != 16; ++i) {
837 2400 : unsigned Idx = i + Shift;
838 2400 : if (Idx >= 16)
839 660 : Idx += NumElts - 16; // end of lane, switch operand.
840 2400 : Idxs[l + i] = Idx + l;
841 : }
842 :
843 124 : Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
844 : }
845 :
846 : // Bitcast back to a 64-bit element type.
847 124 : return Builder.CreateBitCast(Res, ResultTy, "cast");
848 : }
849 :
850 3985 : static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
851 : unsigned NumElts) {
852 7970 : llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
853 : cast<IntegerType>(Mask->getType())->getBitWidth());
854 3985 : Mask = Builder.CreateBitCast(Mask, MaskTy);
855 :
856 : // If we have less than 8 elements, then the starting mask was an i8 and
857 : // we need to extract down to the right number of elements.
858 3985 : if (NumElts < 8) {
859 : uint32_t Indices[4];
860 5216 : for (unsigned i = 0; i != NumElts; ++i)
861 4040 : Indices[i] = i;
862 1176 : Mask = Builder.CreateShuffleVector(Mask, Mask,
863 : makeArrayRef(Indices, NumElts),
864 : "extract");
865 : }
866 :
867 3985 : return Mask;
868 : }
869 :
870 4899 : static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
871 : Value *Op0, Value *Op1) {
872 : // If the mask is all ones just emit the first operation.
873 : if (const auto *C = dyn_cast<Constant>(Mask))
874 1861 : if (C->isAllOnesValue())
875 : return Op0;
876 :
877 6084 : Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
878 3042 : return Builder.CreateSelect(Mask, Op0, Op1);
879 : }
880 :
881 175 : static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
882 : Value *Op0, Value *Op1) {
883 : // If the mask is all ones just emit the first operation.
884 : if (const auto *C = dyn_cast<Constant>(Mask))
885 52 : if (C->isAllOnesValue())
886 : return Op0;
887 :
888 : llvm::VectorType *MaskTy =
889 262 : llvm::VectorType::get(Builder.getInt1Ty(),
890 : Mask->getType()->getIntegerBitWidth());
891 131 : Mask = Builder.CreateBitCast(Mask, MaskTy);
892 131 : Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
893 131 : return Builder.CreateSelect(Mask, Op0, Op1);
894 : }
895 :
896 : // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
897 : // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
898 : // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
899 42 : static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
900 : Value *Op1, Value *Shift,
901 : Value *Passthru, Value *Mask,
902 : bool IsVALIGN) {
903 42 : unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
904 :
905 42 : unsigned NumElts = Op0->getType()->getVectorNumElements();
906 : assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
907 : assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
908 : assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
909 :
910 : // Mask the immediate for VALIGN.
911 42 : if (IsVALIGN)
912 24 : ShiftVal &= (NumElts - 1);
913 :
914 : // If palignr is shifting the pair of vectors more than the size of two
915 : // lanes, emit zero.
916 42 : if (ShiftVal >= 32)
917 0 : return llvm::Constant::getNullValue(Op0->getType());
918 :
919 : // If palignr is shifting the pair of input vectors more than one lane,
920 : // but less than two lanes, convert to shifting in zeroes.
921 42 : if (ShiftVal > 16) {
922 0 : ShiftVal -= 16;
923 : Op1 = Op0;
924 0 : Op0 = llvm::Constant::getNullValue(Op0->getType());
925 : }
926 :
927 : uint32_t Indices[64];
928 : // 256-bit palignr operates on 128-bit lanes so we need to handle that
929 108 : for (unsigned l = 0; l < NumElts; l += 16) {
930 1122 : for (unsigned i = 0; i != 16; ++i) {
931 1056 : unsigned Idx = ShiftVal + i;
932 1056 : if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
933 84 : Idx += NumElts - 16; // End of lane, switch operand.
934 1056 : Indices[l + i] = Idx + l;
935 : }
936 : }
937 :
938 42 : Value *Align = Builder.CreateShuffleVector(Op1, Op0,
939 : makeArrayRef(Indices, NumElts),
940 : "palignr");
941 :
942 42 : return EmitX86Select(Builder, Mask, Align, Passthru);
943 : }
944 :
945 605 : static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
946 : bool IsAddition) {
947 605 : Value *Op0 = CI.getOperand(0);
948 : Value *Op1 = CI.getOperand(1);
949 :
950 : // Collect vector elements and type data.
951 605 : Type *ResultType = CI.getType();
952 :
953 : Value *Res;
954 605 : if (IsAddition) {
955 : // ADDUS: a > (a+b) ? ~0 : (a+b)
956 : // If Op0 > Add, overflow occured.
957 226 : Value *Add = Builder.CreateAdd(Op0, Op1);
958 226 : Value *ICmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Op0, Add);
959 226 : Value *Max = llvm::Constant::getAllOnesValue(ResultType);
960 226 : Res = Builder.CreateSelect(ICmp, Max, Add);
961 : } else {
962 : // SUBUS: max(a, b) - b
963 379 : Value *ICmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Op0, Op1);
964 379 : Value *Select = Builder.CreateSelect(ICmp, Op0, Op1);
965 379 : Res = Builder.CreateSub(Select, Op1);
966 : }
967 :
968 605 : if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
969 : Value *VecSrc = CI.getOperand(2);
970 : Value *Mask = CI.getOperand(3);
971 148 : Res = EmitX86Select(Builder, Mask, Res, VecSrc);
972 : }
973 605 : return Res;
974 : }
975 :
976 120 : static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
977 : Value *Ptr, Value *Data, Value *Mask,
978 : bool Aligned) {
979 : // Cast the pointer to the right type.
980 120 : Ptr = Builder.CreateBitCast(Ptr,
981 120 : llvm::PointerType::getUnqual(Data->getType()));
982 : unsigned Align =
983 120 : Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
984 :
985 : // If the mask is all ones just emit a regular store.
986 : if (const auto *C = dyn_cast<Constant>(Mask))
987 60 : if (C->isAllOnesValue())
988 60 : return Builder.CreateAlignedStore(Data, Ptr, Align);
989 :
990 : // Convert the mask from an integer type to a vector of i1.
991 60 : unsigned NumElts = Data->getType()->getVectorNumElements();
992 60 : Mask = getX86MaskVec(Builder, Mask, NumElts);
993 60 : return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
994 : }
995 :
996 180 : static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
997 : Value *Ptr, Value *Passthru, Value *Mask,
998 : bool Aligned) {
999 : // Cast the pointer to the right type.
1000 180 : Ptr = Builder.CreateBitCast(Ptr,
1001 180 : llvm::PointerType::getUnqual(Passthru->getType()));
1002 : unsigned Align =
1003 180 : Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
1004 :
1005 : // If the mask is all ones just emit a regular store.
1006 : if (const auto *C = dyn_cast<Constant>(Mask))
1007 60 : if (C->isAllOnesValue())
1008 60 : return Builder.CreateAlignedLoad(Ptr, Align);
1009 :
1010 : // Convert the mask from an integer type to a vector of i1.
1011 120 : unsigned NumElts = Passthru->getType()->getVectorNumElements();
1012 120 : Mask = getX86MaskVec(Builder, Mask, NumElts);
1013 240 : return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
1014 : }
1015 :
1016 246 : static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1017 246 : Value *Op0 = CI.getArgOperand(0);
1018 246 : llvm::Type *Ty = Op0->getType();
1019 246 : Value *Zero = llvm::Constant::getNullValue(Ty);
1020 246 : Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
1021 246 : Value *Neg = Builder.CreateNeg(Op0);
1022 246 : Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
1023 :
1024 246 : if (CI.getNumArgOperands() == 3)
1025 48 : Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
1026 :
1027 246 : return Res;
1028 : }
1029 :
1030 1074 : static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
1031 : ICmpInst::Predicate Pred) {
1032 1074 : Value *Op0 = CI.getArgOperand(0);
1033 : Value *Op1 = CI.getArgOperand(1);
1034 1074 : Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
1035 1074 : Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
1036 :
1037 1074 : if (CI.getNumArgOperands() == 4)
1038 192 : Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1039 :
1040 1074 : return Res;
1041 : }
1042 :
1043 322 : static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1044 322 : Type *Ty = CI.getType();
1045 :
1046 : // Arguments have a vXi32 type so cast to vXi64.
1047 644 : Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1048 322 : Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1049 :
1050 322 : if (IsSigned) {
1051 : // Shift left then arithmetic shift right.
1052 146 : Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1053 146 : LHS = Builder.CreateShl(LHS, ShiftAmt);
1054 146 : LHS = Builder.CreateAShr(LHS, ShiftAmt);
1055 146 : RHS = Builder.CreateShl(RHS, ShiftAmt);
1056 146 : RHS = Builder.CreateAShr(RHS, ShiftAmt);
1057 : } else {
1058 : // Clear the upper bits.
1059 176 : Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1060 176 : LHS = Builder.CreateAnd(LHS, Mask);
1061 176 : RHS = Builder.CreateAnd(RHS, Mask);
1062 : }
1063 :
1064 322 : Value *Res = Builder.CreateMul(LHS, RHS);
1065 :
1066 322 : if (CI.getNumArgOperands() == 4)
1067 108 : Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1068 :
1069 322 : return Res;
1070 : }
1071 :
1072 : // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1073 1026 : static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1074 : Value *Mask) {
1075 1026 : unsigned NumElts = Vec->getType()->getVectorNumElements();
1076 1026 : if (Mask) {
1077 : const auto *C = dyn_cast<Constant>(Mask);
1078 505 : if (!C || !C->isAllOnesValue())
1079 495 : Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1080 : }
1081 :
1082 1026 : if (NumElts < 8) {
1083 : uint32_t Indices[8];
1084 1149 : for (unsigned i = 0; i != NumElts; ++i)
1085 884 : Indices[i] = i;
1086 1501 : for (unsigned i = NumElts; i != 8; ++i)
1087 1236 : Indices[i] = NumElts + i % NumElts;
1088 265 : Vec = Builder.CreateShuffleVector(Vec,
1089 265 : Constant::getNullValue(Vec->getType()),
1090 : Indices);
1091 : }
1092 1291 : return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1093 : }
1094 :
1095 864 : static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1096 : unsigned CC, bool Signed) {
1097 864 : Value *Op0 = CI.getArgOperand(0);
1098 864 : unsigned NumElts = Op0->getType()->getVectorNumElements();
1099 :
1100 : Value *Cmp;
1101 864 : if (CC == 3) {
1102 96 : Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1103 768 : } else if (CC == 7) {
1104 96 : Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1105 : } else {
1106 : ICmpInst::Predicate Pred;
1107 672 : switch (CC) {
1108 0 : default: llvm_unreachable("Unknown condition code");
1109 : case 0: Pred = ICmpInst::ICMP_EQ; break;
1110 96 : case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1111 96 : case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1112 96 : case 4: Pred = ICmpInst::ICMP_NE; break;
1113 96 : case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1114 144 : case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1115 : }
1116 672 : Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1117 : }
1118 :
1119 864 : Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1120 :
1121 864 : return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1122 : }
1123 :
1124 : // Replace a masked intrinsic with an older unmasked intrinsic.
1125 500 : static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1126 : Intrinsic::ID IID) {
1127 500 : Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1128 500 : Value *Rep = Builder.CreateCall(Intrin,
1129 500 : { CI.getArgOperand(0), CI.getArgOperand(1) });
1130 500 : return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1131 : }
1132 :
1133 8 : static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1134 8 : Value* A = CI.getArgOperand(0);
1135 : Value* B = CI.getArgOperand(1);
1136 : Value* Src = CI.getArgOperand(2);
1137 : Value* Mask = CI.getArgOperand(3);
1138 :
1139 16 : Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1140 8 : Value* Cmp = Builder.CreateIsNotNull(AndNode);
1141 8 : Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1142 8 : Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1143 8 : Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1144 8 : return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1145 : }
1146 :
1147 :
1148 24 : static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1149 24 : Value* Op = CI.getArgOperand(0);
1150 24 : Type* ReturnOp = CI.getType();
1151 : unsigned NumElts = CI.getType()->getVectorNumElements();
1152 24 : Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1153 24 : return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1154 : }
1155 :
1156 : // Replace intrinsic with unmasked version and a select.
1157 924 : static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1158 : CallInst &CI, Value *&Rep) {
1159 924 : Name = Name.substr(12); // Remove avx512.mask.
1160 :
1161 924 : unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1162 924 : unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1163 : Intrinsic::ID IID;
1164 : if (Name.startswith("max.p")) {
1165 12 : if (VecWidth == 128 && EltWidth == 32)
1166 : IID = Intrinsic::x86_sse_max_ps;
1167 6 : else if (VecWidth == 128 && EltWidth == 64)
1168 : IID = Intrinsic::x86_sse2_max_pd;
1169 6 : else if (VecWidth == 256 && EltWidth == 32)
1170 : IID = Intrinsic::x86_avx_max_ps_256;
1171 0 : else if (VecWidth == 256 && EltWidth == 64)
1172 : IID = Intrinsic::x86_avx_max_pd_256;
1173 : else
1174 0 : llvm_unreachable("Unexpected intrinsic");
1175 : } else if (Name.startswith("min.p")) {
1176 12 : if (VecWidth == 128 && EltWidth == 32)
1177 : IID = Intrinsic::x86_sse_min_ps;
1178 6 : else if (VecWidth == 128 && EltWidth == 64)
1179 : IID = Intrinsic::x86_sse2_min_pd;
1180 6 : else if (VecWidth == 256 && EltWidth == 32)
1181 : IID = Intrinsic::x86_avx_min_ps_256;
1182 0 : else if (VecWidth == 256 && EltWidth == 64)
1183 : IID = Intrinsic::x86_avx_min_pd_256;
1184 : else
1185 0 : llvm_unreachable("Unexpected intrinsic");
1186 : } else if (Name.startswith("pshuf.b.")) {
1187 28 : if (VecWidth == 128)
1188 : IID = Intrinsic::x86_ssse3_pshuf_b_128;
1189 24 : else if (VecWidth == 256)
1190 : IID = Intrinsic::x86_avx2_pshuf_b;
1191 20 : else if (VecWidth == 512)
1192 : IID = Intrinsic::x86_avx512_pshuf_b_512;
1193 : else
1194 0 : llvm_unreachable("Unexpected intrinsic");
1195 : } else if (Name.startswith("pmul.hr.sw.")) {
1196 12 : if (VecWidth == 128)
1197 : IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1198 8 : else if (VecWidth == 256)
1199 : IID = Intrinsic::x86_avx2_pmul_hr_sw;
1200 4 : else if (VecWidth == 512)
1201 : IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1202 : else
1203 0 : llvm_unreachable("Unexpected intrinsic");
1204 : } else if (Name.startswith("pmulh.w.")) {
1205 12 : if (VecWidth == 128)
1206 : IID = Intrinsic::x86_sse2_pmulh_w;
1207 8 : else if (VecWidth == 256)
1208 : IID = Intrinsic::x86_avx2_pmulh_w;
1209 4 : else if (VecWidth == 512)
1210 : IID = Intrinsic::x86_avx512_pmulh_w_512;
1211 : else
1212 0 : llvm_unreachable("Unexpected intrinsic");
1213 : } else if (Name.startswith("pmulhu.w.")) {
1214 12 : if (VecWidth == 128)
1215 : IID = Intrinsic::x86_sse2_pmulhu_w;
1216 8 : else if (VecWidth == 256)
1217 : IID = Intrinsic::x86_avx2_pmulhu_w;
1218 4 : else if (VecWidth == 512)
1219 : IID = Intrinsic::x86_avx512_pmulhu_w_512;
1220 : else
1221 0 : llvm_unreachable("Unexpected intrinsic");
1222 : } else if (Name.startswith("pmaddw.d.")) {
1223 12 : if (VecWidth == 128)
1224 : IID = Intrinsic::x86_sse2_pmadd_wd;
1225 8 : else if (VecWidth == 256)
1226 : IID = Intrinsic::x86_avx2_pmadd_wd;
1227 4 : else if (VecWidth == 512)
1228 : IID = Intrinsic::x86_avx512_pmaddw_d_512;
1229 : else
1230 0 : llvm_unreachable("Unexpected intrinsic");
1231 : } else if (Name.startswith("pmaddubs.w.")) {
1232 12 : if (VecWidth == 128)
1233 : IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1234 8 : else if (VecWidth == 256)
1235 : IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1236 4 : else if (VecWidth == 512)
1237 : IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1238 : else
1239 0 : llvm_unreachable("Unexpected intrinsic");
1240 : } else if (Name.startswith("packsswb.")) {
1241 36 : if (VecWidth == 128)
1242 : IID = Intrinsic::x86_sse2_packsswb_128;
1243 24 : else if (VecWidth == 256)
1244 : IID = Intrinsic::x86_avx2_packsswb;
1245 12 : else if (VecWidth == 512)
1246 : IID = Intrinsic::x86_avx512_packsswb_512;
1247 : else
1248 0 : llvm_unreachable("Unexpected intrinsic");
1249 : } else if (Name.startswith("packssdw.")) {
1250 54 : if (VecWidth == 128)
1251 : IID = Intrinsic::x86_sse2_packssdw_128;
1252 36 : else if (VecWidth == 256)
1253 : IID = Intrinsic::x86_avx2_packssdw;
1254 18 : else if (VecWidth == 512)
1255 : IID = Intrinsic::x86_avx512_packssdw_512;
1256 : else
1257 0 : llvm_unreachable("Unexpected intrinsic");
1258 : } else if (Name.startswith("packuswb.")) {
1259 36 : if (VecWidth == 128)
1260 : IID = Intrinsic::x86_sse2_packuswb_128;
1261 24 : else if (VecWidth == 256)
1262 : IID = Intrinsic::x86_avx2_packuswb;
1263 12 : else if (VecWidth == 512)
1264 : IID = Intrinsic::x86_avx512_packuswb_512;
1265 : else
1266 0 : llvm_unreachable("Unexpected intrinsic");
1267 : } else if (Name.startswith("packusdw.")) {
1268 54 : if (VecWidth == 128)
1269 : IID = Intrinsic::x86_sse41_packusdw;
1270 36 : else if (VecWidth == 256)
1271 : IID = Intrinsic::x86_avx2_packusdw;
1272 18 : else if (VecWidth == 512)
1273 : IID = Intrinsic::x86_avx512_packusdw_512;
1274 : else
1275 0 : llvm_unreachable("Unexpected intrinsic");
1276 : } else if (Name.startswith("vpermilvar.")) {
1277 46 : if (VecWidth == 128 && EltWidth == 32)
1278 : IID = Intrinsic::x86_avx_vpermilvar_ps;
1279 40 : else if (VecWidth == 128 && EltWidth == 64)
1280 : IID = Intrinsic::x86_avx_vpermilvar_pd;
1281 34 : else if (VecWidth == 256 && EltWidth == 32)
1282 : IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1283 28 : else if (VecWidth == 256 && EltWidth == 64)
1284 : IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1285 22 : else if (VecWidth == 512 && EltWidth == 32)
1286 : IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1287 6 : else if (VecWidth == 512 && EltWidth == 64)
1288 : IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1289 : else
1290 0 : llvm_unreachable("Unexpected intrinsic");
1291 : } else if (Name == "cvtpd2dq.256") {
1292 : IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1293 : } else if (Name == "cvtpd2ps.256") {
1294 : IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1295 : } else if (Name == "cvttpd2dq.256") {
1296 : IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1297 : } else if (Name == "cvttps2dq.128") {
1298 : IID = Intrinsic::x86_sse2_cvttps2dq;
1299 : } else if (Name == "cvttps2dq.256") {
1300 : IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1301 : } else if (Name.startswith("permvar.")) {
1302 : bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1303 84 : if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1304 : IID = Intrinsic::x86_avx2_permps;
1305 78 : else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1306 : IID = Intrinsic::x86_avx2_permd;
1307 72 : else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1308 : IID = Intrinsic::x86_avx512_permvar_df_256;
1309 66 : else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1310 : IID = Intrinsic::x86_avx512_permvar_di_256;
1311 60 : else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1312 : IID = Intrinsic::x86_avx512_permvar_sf_512;
1313 54 : else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1314 : IID = Intrinsic::x86_avx512_permvar_si_512;
1315 48 : else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1316 : IID = Intrinsic::x86_avx512_permvar_df_512;
1317 42 : else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1318 : IID = Intrinsic::x86_avx512_permvar_di_512;
1319 36 : else if (VecWidth == 128 && EltWidth == 16)
1320 : IID = Intrinsic::x86_avx512_permvar_hi_128;
1321 30 : else if (VecWidth == 256 && EltWidth == 16)
1322 : IID = Intrinsic::x86_avx512_permvar_hi_256;
1323 24 : else if (VecWidth == 512 && EltWidth == 16)
1324 : IID = Intrinsic::x86_avx512_permvar_hi_512;
1325 18 : else if (VecWidth == 128 && EltWidth == 8)
1326 : IID = Intrinsic::x86_avx512_permvar_qi_128;
1327 12 : else if (VecWidth == 256 && EltWidth == 8)
1328 : IID = Intrinsic::x86_avx512_permvar_qi_256;
1329 6 : else if (VecWidth == 512 && EltWidth == 8)
1330 : IID = Intrinsic::x86_avx512_permvar_qi_512;
1331 : else
1332 0 : llvm_unreachable("Unexpected intrinsic");
1333 : } else if (Name.startswith("dbpsadbw.")) {
1334 18 : if (VecWidth == 128)
1335 : IID = Intrinsic::x86_avx512_dbpsadbw_128;
1336 12 : else if (VecWidth == 256)
1337 : IID = Intrinsic::x86_avx512_dbpsadbw_256;
1338 6 : else if (VecWidth == 512)
1339 : IID = Intrinsic::x86_avx512_dbpsadbw_512;
1340 : else
1341 0 : llvm_unreachable("Unexpected intrinsic");
1342 : } else if (Name.startswith("vpshld.")) {
1343 38 : if (VecWidth == 128 && Name[7] == 'q')
1344 : IID = Intrinsic::x86_avx512_vpshld_q_128;
1345 34 : else if (VecWidth == 128 && Name[7] == 'd')
1346 : IID = Intrinsic::x86_avx512_vpshld_d_128;
1347 28 : else if (VecWidth == 128 && Name[7] == 'w')
1348 : IID = Intrinsic::x86_avx512_vpshld_w_128;
1349 24 : else if (VecWidth == 256 && Name[7] == 'q')
1350 : IID = Intrinsic::x86_avx512_vpshld_q_256;
1351 20 : else if (VecWidth == 256 && Name[7] == 'd')
1352 : IID = Intrinsic::x86_avx512_vpshld_d_256;
1353 16 : else if (VecWidth == 256 && Name[7] == 'w')
1354 : IID = Intrinsic::x86_avx512_vpshld_w_256;
1355 12 : else if (VecWidth == 512 && Name[7] == 'q')
1356 : IID = Intrinsic::x86_avx512_vpshld_q_512;
1357 8 : else if (VecWidth == 512 && Name[7] == 'd')
1358 : IID = Intrinsic::x86_avx512_vpshld_d_512;
1359 4 : else if (VecWidth == 512 && Name[7] == 'w')
1360 : IID = Intrinsic::x86_avx512_vpshld_w_512;
1361 : else
1362 0 : llvm_unreachable("Unexpected intrinsic");
1363 : } else if (Name.startswith("vpshrd.")) {
1364 38 : if (VecWidth == 128 && Name[7] == 'q')
1365 : IID = Intrinsic::x86_avx512_vpshrd_q_128;
1366 34 : else if (VecWidth == 128 && Name[7] == 'd')
1367 : IID = Intrinsic::x86_avx512_vpshrd_d_128;
1368 28 : else if (VecWidth == 128 && Name[7] == 'w')
1369 : IID = Intrinsic::x86_avx512_vpshrd_w_128;
1370 24 : else if (VecWidth == 256 && Name[7] == 'q')
1371 : IID = Intrinsic::x86_avx512_vpshrd_q_256;
1372 20 : else if (VecWidth == 256 && Name[7] == 'd')
1373 : IID = Intrinsic::x86_avx512_vpshrd_d_256;
1374 16 : else if (VecWidth == 256 && Name[7] == 'w')
1375 : IID = Intrinsic::x86_avx512_vpshrd_w_256;
1376 12 : else if (VecWidth == 512 && Name[7] == 'q')
1377 : IID = Intrinsic::x86_avx512_vpshrd_q_512;
1378 8 : else if (VecWidth == 512 && Name[7] == 'd')
1379 : IID = Intrinsic::x86_avx512_vpshrd_d_512;
1380 4 : else if (VecWidth == 512 && Name[7] == 'w')
1381 : IID = Intrinsic::x86_avx512_vpshrd_w_512;
1382 : else
1383 0 : llvm_unreachable("Unexpected intrinsic");
1384 : } else if (Name.startswith("prorv.")) {
1385 64 : if (VecWidth == 128 && EltWidth == 32)
1386 : IID = Intrinsic::x86_avx512_prorv_d_128;
1387 58 : else if (VecWidth == 256 && EltWidth == 32)
1388 : IID = Intrinsic::x86_avx512_prorv_d_256;
1389 52 : else if (VecWidth == 512 && EltWidth == 32)
1390 : IID = Intrinsic::x86_avx512_prorv_d_512;
1391 32 : else if (VecWidth == 128 && EltWidth == 64)
1392 : IID = Intrinsic::x86_avx512_prorv_q_128;
1393 26 : else if (VecWidth == 256 && EltWidth == 64)
1394 : IID = Intrinsic::x86_avx512_prorv_q_256;
1395 20 : else if (VecWidth == 512 && EltWidth == 64)
1396 : IID = Intrinsic::x86_avx512_prorv_q_512;
1397 : else
1398 0 : llvm_unreachable("Unexpected intrinsic");
1399 : } else if (Name.startswith("prolv.")) {
1400 64 : if (VecWidth == 128 && EltWidth == 32)
1401 : IID = Intrinsic::x86_avx512_prolv_d_128;
1402 58 : else if (VecWidth == 256 && EltWidth == 32)
1403 : IID = Intrinsic::x86_avx512_prolv_d_256;
1404 52 : else if (VecWidth == 512 && EltWidth == 32)
1405 : IID = Intrinsic::x86_avx512_prolv_d_512;
1406 32 : else if (VecWidth == 128 && EltWidth == 64)
1407 : IID = Intrinsic::x86_avx512_prolv_q_128;
1408 26 : else if (VecWidth == 256 && EltWidth == 64)
1409 : IID = Intrinsic::x86_avx512_prolv_q_256;
1410 20 : else if (VecWidth == 512 && EltWidth == 64)
1411 : IID = Intrinsic::x86_avx512_prolv_q_512;
1412 : else
1413 0 : llvm_unreachable("Unexpected intrinsic");
1414 : } else if (Name.startswith("pror.")) {
1415 36 : if (VecWidth == 128 && EltWidth == 32)
1416 : IID = Intrinsic::x86_avx512_pror_d_128;
1417 30 : else if (VecWidth == 256 && EltWidth == 32)
1418 : IID = Intrinsic::x86_avx512_pror_d_256;
1419 24 : else if (VecWidth == 512 && EltWidth == 32)
1420 : IID = Intrinsic::x86_avx512_pror_d_512;
1421 18 : else if (VecWidth == 128 && EltWidth == 64)
1422 : IID = Intrinsic::x86_avx512_pror_q_128;
1423 12 : else if (VecWidth == 256 && EltWidth == 64)
1424 : IID = Intrinsic::x86_avx512_pror_q_256;
1425 6 : else if (VecWidth == 512 && EltWidth == 64)
1426 : IID = Intrinsic::x86_avx512_pror_q_512;
1427 : else
1428 0 : llvm_unreachable("Unexpected intrinsic");
1429 : } else if (Name.startswith("prol.")) {
1430 36 : if (VecWidth == 128 && EltWidth == 32)
1431 : IID = Intrinsic::x86_avx512_prol_d_128;
1432 30 : else if (VecWidth == 256 && EltWidth == 32)
1433 : IID = Intrinsic::x86_avx512_prol_d_256;
1434 24 : else if (VecWidth == 512 && EltWidth == 32)
1435 : IID = Intrinsic::x86_avx512_prol_d_512;
1436 18 : else if (VecWidth == 128 && EltWidth == 64)
1437 : IID = Intrinsic::x86_avx512_prol_q_128;
1438 12 : else if (VecWidth == 256 && EltWidth == 64)
1439 : IID = Intrinsic::x86_avx512_prol_q_256;
1440 6 : else if (VecWidth == 512 && EltWidth == 64)
1441 : IID = Intrinsic::x86_avx512_prol_q_512;
1442 : else
1443 0 : llvm_unreachable("Unexpected intrinsic");
1444 : } else if (Name.startswith("padds.")) {
1445 94 : if (VecWidth == 128 && EltWidth == 8)
1446 : IID = Intrinsic::x86_sse2_padds_b;
1447 82 : else if (VecWidth == 256 && EltWidth == 8)
1448 : IID = Intrinsic::x86_avx2_padds_b;
1449 70 : else if (VecWidth == 512 && EltWidth == 8)
1450 : IID = Intrinsic::x86_avx512_padds_b_512;
1451 41 : else if (VecWidth == 128 && EltWidth == 16)
1452 : IID = Intrinsic::x86_sse2_padds_w;
1453 29 : else if (VecWidth == 256 && EltWidth == 16)
1454 : IID = Intrinsic::x86_avx2_padds_w;
1455 17 : else if (VecWidth == 512 && EltWidth == 16)
1456 : IID = Intrinsic::x86_avx512_padds_w_512;
1457 : else
1458 0 : llvm_unreachable("Unexpected intrinsic");
1459 : } else if (Name.startswith("psubs.")) {
1460 94 : if (VecWidth == 128 && EltWidth == 8)
1461 : IID = Intrinsic::x86_sse2_psubs_b;
1462 82 : else if (VecWidth == 256 && EltWidth == 8)
1463 : IID = Intrinsic::x86_avx2_psubs_b;
1464 70 : else if (VecWidth == 512 && EltWidth == 8)
1465 : IID = Intrinsic::x86_avx512_psubs_b_512;
1466 41 : else if (VecWidth == 128 && EltWidth == 16)
1467 : IID = Intrinsic::x86_sse2_psubs_w;
1468 29 : else if (VecWidth == 256 && EltWidth == 16)
1469 : IID = Intrinsic::x86_avx2_psubs_w;
1470 17 : else if (VecWidth == 512 && EltWidth == 16)
1471 : IID = Intrinsic::x86_avx512_psubs_w_512;
1472 : else
1473 0 : llvm_unreachable("Unexpected intrinsic");
1474 : } else
1475 : return false;
1476 :
1477 1848 : SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
1478 924 : CI.arg_operands().end());
1479 : Args.pop_back();
1480 : Args.pop_back();
1481 1848 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1482 : Args);
1483 : unsigned NumArgs = CI.getNumArgOperands();
1484 924 : Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1485 : CI.getArgOperand(NumArgs - 2));
1486 : return true;
1487 : }
1488 :
1489 : /// Upgrade comment in call to inline asm that represents an objc retain release
1490 : /// marker.
1491 72 : void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1492 : size_t Pos;
1493 1 : if (AsmStr->find("mov\tfp") == 0 &&
1494 73 : AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1495 : (Pos = AsmStr->find("# marker")) != std::string::npos) {
1496 1 : AsmStr->replace(Pos, 1, ";");
1497 : }
1498 72 : return;
1499 : }
1500 :
1501 : /// Upgrade a call to an old intrinsic. All argument and return casting must be
1502 : /// provided to seamlessly integrate with existing context.
1503 11994 : void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1504 11994 : Function *F = CI->getCalledFunction();
1505 11994 : LLVMContext &C = CI->getContext();
1506 : IRBuilder<> Builder(C);
1507 11994 : Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1508 :
1509 : assert(F && "Intrinsic call is not direct?");
1510 :
1511 11994 : if (!NewFn) {
1512 : // Get the Function's name.
1513 10563 : StringRef Name = F->getName();
1514 :
1515 : assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1516 10563 : Name = Name.substr(5);
1517 :
1518 : bool IsX86 = Name.startswith("x86.");
1519 : if (IsX86)
1520 10531 : Name = Name.substr(4);
1521 : bool IsNVVM = Name.startswith("nvvm.");
1522 : if (IsNVVM)
1523 26 : Name = Name.substr(5);
1524 :
1525 10563 : if (IsX86 && Name.startswith("sse4a.movnt.")) {
1526 14 : Module *M = F->getParent();
1527 : SmallVector<Metadata *, 1> Elts;
1528 14 : Elts.push_back(
1529 28 : ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1530 : MDNode *Node = MDNode::get(C, Elts);
1531 :
1532 14 : Value *Arg0 = CI->getArgOperand(0);
1533 : Value *Arg1 = CI->getArgOperand(1);
1534 :
1535 : // Nontemporal (unaligned) store of the 0'th element of the float/double
1536 : // vector.
1537 14 : Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1538 : PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1539 14 : Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1540 : Value *Extract =
1541 14 : Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1542 :
1543 : StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
1544 28 : SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1545 :
1546 : // Remove intrinsic.
1547 14 : CI->eraseFromParent();
1548 : return;
1549 : }
1550 :
1551 10549 : if (IsX86 && (Name.startswith("avx.movnt.") ||
1552 : Name.startswith("avx512.storent."))) {
1553 18 : Module *M = F->getParent();
1554 : SmallVector<Metadata *, 1> Elts;
1555 18 : Elts.push_back(
1556 36 : ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1557 : MDNode *Node = MDNode::get(C, Elts);
1558 :
1559 18 : Value *Arg0 = CI->getArgOperand(0);
1560 : Value *Arg1 = CI->getArgOperand(1);
1561 :
1562 : // Convert the type of the pointer to a pointer to the stored type.
1563 18 : Value *BC = Builder.CreateBitCast(Arg0,
1564 18 : PointerType::getUnqual(Arg1->getType()),
1565 : "cast");
1566 18 : VectorType *VTy = cast<VectorType>(Arg1->getType());
1567 18 : StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
1568 : VTy->getBitWidth() / 8);
1569 36 : SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1570 :
1571 : // Remove intrinsic.
1572 18 : CI->eraseFromParent();
1573 : return;
1574 : }
1575 :
1576 10531 : if (IsX86 && Name == "sse2.storel.dq") {
1577 6 : Value *Arg0 = CI->getArgOperand(0);
1578 : Value *Arg1 = CI->getArgOperand(1);
1579 :
1580 6 : Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1581 6 : Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1582 6 : Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1583 6 : Value *BC = Builder.CreateBitCast(Arg0,
1584 6 : PointerType::getUnqual(Elt->getType()),
1585 : "cast");
1586 : Builder.CreateAlignedStore(Elt, BC, 1);
1587 :
1588 : // Remove intrinsic.
1589 6 : CI->eraseFromParent();
1590 6 : return;
1591 : }
1592 :
1593 10525 : if (IsX86 && (Name.startswith("sse.storeu.") ||
1594 : Name.startswith("sse2.storeu.") ||
1595 : Name.startswith("avx.storeu."))) {
1596 85 : Value *Arg0 = CI->getArgOperand(0);
1597 : Value *Arg1 = CI->getArgOperand(1);
1598 :
1599 85 : Arg0 = Builder.CreateBitCast(Arg0,
1600 85 : PointerType::getUnqual(Arg1->getType()),
1601 : "cast");
1602 : Builder.CreateAlignedStore(Arg1, Arg0, 1);
1603 :
1604 : // Remove intrinsic.
1605 85 : CI->eraseFromParent();
1606 85 : return;
1607 : }
1608 :
1609 10440 : if (IsX86 && Name == "avx512.mask.store.ss") {
1610 0 : Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1611 0 : UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1612 : Mask, false);
1613 :
1614 : // Remove intrinsic.
1615 0 : CI->eraseFromParent();
1616 0 : return;
1617 : }
1618 :
1619 10440 : if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1620 : // "avx512.mask.storeu." or "avx512.mask.store."
1621 120 : bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1622 240 : UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1623 : CI->getArgOperand(2), Aligned);
1624 :
1625 : // Remove intrinsic.
1626 120 : CI->eraseFromParent();
1627 120 : return;
1628 : }
1629 :
1630 : Value *Rep;
1631 : // Upgrade packed integer vector compare intrinsics to compare instructions.
1632 10320 : if (IsX86 && (Name.startswith("sse2.pcmp") ||
1633 : Name.startswith("avx2.pcmp"))) {
1634 : // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1635 1 : bool CmpEq = Name[9] == 'e';
1636 2 : Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1637 : CI->getArgOperand(0), CI->getArgOperand(1));
1638 1 : Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1639 10319 : } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1640 12 : Type *ExtTy = Type::getInt32Ty(C);
1641 12 : if (CI->getOperand(0)->getType()->isIntegerTy(8))
1642 6 : ExtTy = Type::getInt64Ty(C);
1643 12 : unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1644 12 : ExtTy->getPrimitiveSizeInBits();
1645 12 : Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1646 12 : Rep = Builder.CreateVectorSplat(NumElts, Rep);
1647 10307 : } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1648 : Name == "sse2.sqrt.sd")) {
1649 139 : Value *Vec = CI->getArgOperand(0);
1650 139 : Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1651 139 : Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1652 139 : Intrinsic::sqrt, Elt0->getType());
1653 139 : Elt0 = Builder.CreateCall(Intr, Elt0);
1654 139 : Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1655 10168 : } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1656 : Name.startswith("sse2.sqrt.p") ||
1657 : Name.startswith("sse.sqrt.p"))) {
1658 136 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1659 : Intrinsic::sqrt,
1660 272 : CI->getType()),
1661 136 : {CI->getArgOperand(0)});
1662 10032 : } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1663 52 : if (CI->getNumArgOperands() == 4 &&
1664 24 : (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1665 : cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1666 24 : Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1667 : : Intrinsic::x86_avx512_sqrt_pd_512;
1668 :
1669 12 : Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1670 24 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1671 : IID), Args);
1672 : } else {
1673 16 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1674 : Intrinsic::sqrt,
1675 32 : CI->getType()),
1676 : {CI->getArgOperand(0)});
1677 : }
1678 28 : Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1679 : CI->getArgOperand(1));
1680 10004 : } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1681 : Name.startswith("avx512.ptestnm"))) {
1682 100 : Value *Op0 = CI->getArgOperand(0);
1683 : Value *Op1 = CI->getArgOperand(1);
1684 : Value *Mask = CI->getArgOperand(2);
1685 100 : Rep = Builder.CreateAnd(Op0, Op1);
1686 100 : llvm::Type *Ty = Op0->getType();
1687 100 : Value *Zero = llvm::Constant::getNullValue(Ty);
1688 : ICmpInst::Predicate Pred =
1689 : Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1690 100 : Rep = Builder.CreateICmp(Pred, Rep, Zero);
1691 100 : Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1692 9904 : } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1693 : unsigned NumElts =
1694 72 : CI->getArgOperand(1)->getType()->getVectorNumElements();
1695 72 : Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1696 72 : Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1697 : CI->getArgOperand(1));
1698 9832 : } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1699 6 : unsigned NumElts = CI->getType()->getScalarSizeInBits();
1700 6 : Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1701 6 : Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1702 : uint32_t Indices[64];
1703 230 : for (unsigned i = 0; i != NumElts; ++i)
1704 224 : Indices[i] = i;
1705 :
1706 : // First extract half of each vector. This gives better codegen than
1707 : // doing it in a single shuffle.
1708 6 : LHS = Builder.CreateShuffleVector(LHS, LHS,
1709 6 : makeArrayRef(Indices, NumElts / 2));
1710 6 : RHS = Builder.CreateShuffleVector(RHS, RHS,
1711 : makeArrayRef(Indices, NumElts / 2));
1712 : // Concat the vectors.
1713 : // NOTE: Operands have to be swapped to match intrinsic definition.
1714 6 : Rep = Builder.CreateShuffleVector(RHS, LHS,
1715 : makeArrayRef(Indices, NumElts));
1716 6 : Rep = Builder.CreateBitCast(Rep, CI->getType());
1717 9826 : } else if (IsX86 && Name == "avx512.kand.w") {
1718 4 : Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1719 4 : Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1720 4 : Rep = Builder.CreateAnd(LHS, RHS);
1721 4 : Rep = Builder.CreateBitCast(Rep, CI->getType());
1722 9822 : } else if (IsX86 && Name == "avx512.kandn.w") {
1723 4 : Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1724 4 : Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1725 4 : LHS = Builder.CreateNot(LHS);
1726 4 : Rep = Builder.CreateAnd(LHS, RHS);
1727 4 : Rep = Builder.CreateBitCast(Rep, CI->getType());
1728 9818 : } else if (IsX86 && Name == "avx512.kor.w") {
1729 4 : Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1730 4 : Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1731 4 : Rep = Builder.CreateOr(LHS, RHS);
1732 4 : Rep = Builder.CreateBitCast(Rep, CI->getType());
1733 9814 : } else if (IsX86 && Name == "avx512.kxor.w") {
1734 4 : Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1735 4 : Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1736 4 : Rep = Builder.CreateXor(LHS, RHS);
1737 4 : Rep = Builder.CreateBitCast(Rep, CI->getType());
1738 9810 : } else if (IsX86 && Name == "avx512.kxnor.w") {
1739 4 : Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1740 4 : Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1741 4 : LHS = Builder.CreateNot(LHS);
1742 4 : Rep = Builder.CreateXor(LHS, RHS);
1743 4 : Rep = Builder.CreateBitCast(Rep, CI->getType());
1744 9806 : } else if (IsX86 && Name == "avx512.knot.w") {
1745 2 : Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1746 2 : Rep = Builder.CreateNot(Rep);
1747 2 : Rep = Builder.CreateBitCast(Rep, CI->getType());
1748 9804 : } else if (IsX86 &&
1749 : (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
1750 4 : Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1751 4 : Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1752 4 : Rep = Builder.CreateOr(LHS, RHS);
1753 4 : Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
1754 : Value *C;
1755 8 : if (Name[14] == 'c')
1756 0 : C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
1757 : else
1758 4 : C = ConstantInt::getNullValue(Builder.getInt16Ty());
1759 4 : Rep = Builder.CreateICmpEQ(Rep, C);
1760 4 : Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1761 9800 : } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
1762 : Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
1763 : Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
1764 : Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1765 79 : Type *I32Ty = Type::getInt32Ty(C);
1766 79 : Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1767 79 : ConstantInt::get(I32Ty, 0));
1768 79 : Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1769 79 : ConstantInt::get(I32Ty, 0));
1770 : Value *EltOp;
1771 79 : if (Name.contains(".add."))
1772 17 : EltOp = Builder.CreateFAdd(Elt0, Elt1);
1773 62 : else if (Name.contains(".sub."))
1774 23 : EltOp = Builder.CreateFSub(Elt0, Elt1);
1775 39 : else if (Name.contains(".mul."))
1776 23 : EltOp = Builder.CreateFMul(Elt0, Elt1);
1777 : else
1778 16 : EltOp = Builder.CreateFDiv(Elt0, Elt1);
1779 79 : Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
1780 79 : ConstantInt::get(I32Ty, 0));
1781 9721 : } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1782 : // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1783 : bool CmpEq = Name[16] == 'e';
1784 144 : Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1785 9625 : } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1786 24 : Type *OpTy = CI->getArgOperand(0)->getType();
1787 24 : unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1788 24 : unsigned EltWidth = OpTy->getScalarSizeInBits();
1789 : Intrinsic::ID IID;
1790 24 : if (VecWidth == 128 && EltWidth == 32)
1791 : IID = Intrinsic::x86_avx512_fpclass_ps_128;
1792 20 : else if (VecWidth == 256 && EltWidth == 32)
1793 : IID = Intrinsic::x86_avx512_fpclass_ps_256;
1794 16 : else if (VecWidth == 512 && EltWidth == 32)
1795 : IID = Intrinsic::x86_avx512_fpclass_ps_512;
1796 12 : else if (VecWidth == 128 && EltWidth == 64)
1797 : IID = Intrinsic::x86_avx512_fpclass_pd_128;
1798 8 : else if (VecWidth == 256 && EltWidth == 64)
1799 : IID = Intrinsic::x86_avx512_fpclass_pd_256;
1800 4 : else if (VecWidth == 512 && EltWidth == 64)
1801 : IID = Intrinsic::x86_avx512_fpclass_pd_512;
1802 : else
1803 0 : llvm_unreachable("Unexpected intrinsic");
1804 :
1805 48 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1806 : { CI->getOperand(0), CI->getArgOperand(1) });
1807 24 : Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1808 9601 : } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
1809 12 : Type *OpTy = CI->getArgOperand(0)->getType();
1810 12 : unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1811 12 : unsigned EltWidth = OpTy->getScalarSizeInBits();
1812 : Intrinsic::ID IID;
1813 12 : if (VecWidth == 128 && EltWidth == 32)
1814 : IID = Intrinsic::x86_avx512_cmp_ps_128;
1815 10 : else if (VecWidth == 256 && EltWidth == 32)
1816 : IID = Intrinsic::x86_avx512_cmp_ps_256;
1817 8 : else if (VecWidth == 512 && EltWidth == 32)
1818 : IID = Intrinsic::x86_avx512_cmp_ps_512;
1819 6 : else if (VecWidth == 128 && EltWidth == 64)
1820 : IID = Intrinsic::x86_avx512_cmp_pd_128;
1821 4 : else if (VecWidth == 256 && EltWidth == 64)
1822 : IID = Intrinsic::x86_avx512_cmp_pd_256;
1823 2 : else if (VecWidth == 512 && EltWidth == 64)
1824 : IID = Intrinsic::x86_avx512_cmp_pd_512;
1825 : else
1826 0 : llvm_unreachable("Unexpected intrinsic");
1827 :
1828 : SmallVector<Value *, 4> Args;
1829 12 : Args.push_back(CI->getArgOperand(0));
1830 12 : Args.push_back(CI->getArgOperand(1));
1831 12 : Args.push_back(CI->getArgOperand(2));
1832 12 : if (CI->getNumArgOperands() == 5)
1833 4 : Args.push_back(CI->getArgOperand(4));
1834 :
1835 24 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1836 : Args);
1837 12 : Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
1838 9973 : } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
1839 : Name[16] != 'p') {
1840 : // Integer compare intrinsics.
1841 384 : unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1842 384 : Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
1843 9205 : } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
1844 384 : unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1845 384 : Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1846 8821 : } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
1847 : Name.startswith("avx512.cvtw2mask.") ||
1848 : Name.startswith("avx512.cvtd2mask.") ||
1849 : Name.startswith("avx512.cvtq2mask."))) {
1850 26 : Value *Op = CI->getArgOperand(0);
1851 26 : Value *Zero = llvm::Constant::getNullValue(Op->getType());
1852 26 : Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
1853 26 : Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
1854 8795 : } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
1855 : Name == "ssse3.pabs.w.128" ||
1856 : Name == "ssse3.pabs.d.128" ||
1857 : Name.startswith("avx2.pabs") ||
1858 : Name.startswith("avx512.mask.pabs"))) {
1859 246 : Rep = upgradeAbs(Builder, *CI);
1860 8549 : } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1861 : Name == "sse2.pmaxs.w" ||
1862 : Name == "sse41.pmaxsd" ||
1863 : Name.startswith("avx2.pmaxs") ||
1864 : Name.startswith("avx512.mask.pmaxs"))) {
1865 261 : Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
1866 8288 : } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
1867 : Name == "sse41.pmaxuw" ||
1868 : Name == "sse41.pmaxud" ||
1869 : Name.startswith("avx2.pmaxu") ||
1870 : Name.startswith("avx512.mask.pmaxu"))) {
1871 298 : Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1872 7990 : } else if (IsX86 && (Name == "sse41.pminsb" ||
1873 : Name == "sse2.pmins.w" ||
1874 : Name == "sse41.pminsd" ||
1875 : Name.startswith("avx2.pmins") ||
1876 : Name.startswith("avx512.mask.pmins"))) {
1877 257 : Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1878 7733 : } else if (IsX86 && (Name == "sse2.pminu.b" ||
1879 : Name == "sse41.pminuw" ||
1880 : Name == "sse41.pminud" ||
1881 : Name.startswith("avx2.pminu") ||
1882 : Name.startswith("avx512.mask.pminu"))) {
1883 258 : Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1884 7475 : } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
1885 : Name == "avx2.pmulu.dq" ||
1886 : Name == "avx512.pmulu.dq.512" ||
1887 : Name.startswith("avx512.mask.pmulu.dq."))) {
1888 176 : Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
1889 7299 : } else if (IsX86 && (Name == "sse41.pmuldq" ||
1890 : Name == "avx2.pmul.dq" ||
1891 : Name == "avx512.pmul.dq.512" ||
1892 : Name.startswith("avx512.mask.pmul.dq."))) {
1893 146 : Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
1894 7153 : } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
1895 : Name == "sse2.cvtsi2sd" ||
1896 : Name == "sse.cvtsi642ss" ||
1897 : Name == "sse2.cvtsi642sd")) {
1898 114 : Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
1899 : CI->getType()->getVectorElementType());
1900 38 : Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1901 7115 : } else if (IsX86 && Name == "avx512.cvtusi2sd") {
1902 6 : Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
1903 : CI->getType()->getVectorElementType());
1904 2 : Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1905 7113 : } else if (IsX86 && Name == "sse2.cvtss2sd") {
1906 42 : Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
1907 21 : Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
1908 21 : Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1909 7092 : } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
1910 : Name == "sse2.cvtdq2ps" ||
1911 : Name == "avx.cvtdq2.pd.256" ||
1912 : Name == "avx.cvtdq2.ps.256" ||
1913 : Name.startswith("avx512.mask.cvtdq2pd.") ||
1914 : Name.startswith("avx512.mask.cvtudq2pd.") ||
1915 : Name == "avx512.mask.cvtdq2ps.128" ||
1916 : Name == "avx512.mask.cvtdq2ps.256" ||
1917 : Name == "avx512.mask.cvtudq2ps.128" ||
1918 : Name == "avx512.mask.cvtudq2ps.256" ||
1919 : Name == "avx512.mask.cvtqq2pd.128" ||
1920 : Name == "avx512.mask.cvtqq2pd.256" ||
1921 : Name == "avx512.mask.cvtuqq2pd.128" ||
1922 : Name == "avx512.mask.cvtuqq2pd.256" ||
1923 : Name == "sse2.cvtps2pd" ||
1924 : Name == "avx.cvt.ps2.pd.256" ||
1925 : Name == "avx512.mask.cvtps2pd.128" ||
1926 : Name == "avx512.mask.cvtps2pd.256")) {
1927 115 : Type *DstTy = CI->getType();
1928 115 : Rep = CI->getArgOperand(0);
1929 :
1930 : unsigned NumDstElts = DstTy->getVectorNumElements();
1931 230 : if (NumDstElts < Rep->getType()->getVectorNumElements()) {
1932 : assert(NumDstElts == 2 && "Unexpected vector size");
1933 32 : uint32_t ShuffleMask[2] = { 0, 1 };
1934 32 : Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
1935 : }
1936 :
1937 115 : bool IsPS2PD = (StringRef::npos != Name.find("ps2"));
1938 115 : bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
1939 115 : if (IsPS2PD)
1940 22 : Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
1941 93 : else if (IsUnsigned)
1942 28 : Rep = Builder.CreateUIToFP(Rep, DstTy, "cvt");
1943 : else
1944 65 : Rep = Builder.CreateSIToFP(Rep, DstTy, "cvt");
1945 :
1946 115 : if (CI->getNumArgOperands() == 3)
1947 64 : Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1948 : CI->getArgOperand(1));
1949 6977 : } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
1950 216 : Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1951 : CI->getArgOperand(1), CI->getArgOperand(2),
1952 : /*Aligned*/false);
1953 6869 : } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
1954 144 : Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1955 : CI->getArgOperand(1),CI->getArgOperand(2),
1956 : /*Aligned*/true);
1957 6797 : } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
1958 110 : Type *ResultTy = CI->getType();
1959 110 : Type *PtrTy = ResultTy->getVectorElementType();
1960 :
1961 : // Cast the pointer to element type.
1962 110 : Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
1963 : llvm::PointerType::getUnqual(PtrTy));
1964 :
1965 110 : Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
1966 : ResultTy->getVectorNumElements());
1967 :
1968 110 : Function *ELd = Intrinsic::getDeclaration(F->getParent(),
1969 : Intrinsic::masked_expandload,
1970 : ResultTy);
1971 110 : Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
1972 6687 : } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
1973 72 : Type *ResultTy = CI->getArgOperand(1)->getType();
1974 72 : Type *PtrTy = ResultTy->getVectorElementType();
1975 :
1976 : // Cast the pointer to element type.
1977 72 : Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
1978 : llvm::PointerType::getUnqual(PtrTy));
1979 :
1980 72 : Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
1981 : ResultTy->getVectorNumElements());
1982 :
1983 72 : Function *CSt = Intrinsic::getDeclaration(F->getParent(),
1984 : Intrinsic::masked_compressstore,
1985 : ResultTy);
1986 72 : Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
1987 6615 : } else if (IsX86 && Name.startswith("xop.vpcom")) {
1988 : Intrinsic::ID intID;
1989 : if (Name.endswith("ub"))
1990 : intID = Intrinsic::x86_xop_vpcomub;
1991 : else if (Name.endswith("uw"))
1992 : intID = Intrinsic::x86_xop_vpcomuw;
1993 : else if (Name.endswith("ud"))
1994 : intID = Intrinsic::x86_xop_vpcomud;
1995 : else if (Name.endswith("uq"))
1996 : intID = Intrinsic::x86_xop_vpcomuq;
1997 : else if (Name.endswith("b"))
1998 : intID = Intrinsic::x86_xop_vpcomb;
1999 : else if (Name.endswith("w"))
2000 : intID = Intrinsic::x86_xop_vpcomw;
2001 : else if (Name.endswith("d"))
2002 : intID = Intrinsic::x86_xop_vpcomd;
2003 : else if (Name.endswith("q"))
2004 : intID = Intrinsic::x86_xop_vpcomq;
2005 : else
2006 0 : llvm_unreachable("Unknown suffix");
2007 :
2008 81 : Name = Name.substr(9); // strip off "xop.vpcom"
2009 : unsigned Imm;
2010 : if (Name.startswith("lt"))
2011 : Imm = 0;
2012 : else if (Name.startswith("le"))
2013 : Imm = 1;
2014 : else if (Name.startswith("gt"))
2015 : Imm = 2;
2016 : else if (Name.startswith("ge"))
2017 : Imm = 3;
2018 : else if (Name.startswith("eq"))
2019 : Imm = 4;
2020 : else if (Name.startswith("ne"))
2021 : Imm = 5;
2022 : else if (Name.startswith("false"))
2023 : Imm = 6;
2024 : else if (Name.startswith("true"))
2025 : Imm = 7;
2026 : else
2027 0 : llvm_unreachable("Unknown condition");
2028 :
2029 81 : Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
2030 81 : Rep =
2031 162 : Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
2032 81 : Builder.getInt8(Imm)});
2033 6534 : } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2034 12 : Value *Sel = CI->getArgOperand(2);
2035 12 : Value *NotSel = Builder.CreateNot(Sel);
2036 12 : Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2037 12 : Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2038 12 : Rep = Builder.CreateOr(Sel0, Sel1);
2039 6522 : } else if (IsX86 && Name == "sse42.crc32.64.8") {
2040 42 : Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2041 : Intrinsic::x86_sse42_crc32_32_8);
2042 84 : Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2043 42 : Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2044 42 : Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2045 6480 : } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2046 : Name.startswith("avx512.vbroadcast.s"))) {
2047 : // Replace broadcasts with a series of insertelements.
2048 7 : Type *VecTy = CI->getType();
2049 7 : Type *EltTy = VecTy->getVectorElementType();
2050 : unsigned EltNum = VecTy->getVectorNumElements();
2051 14 : Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2052 7 : EltTy->getPointerTo());
2053 7 : Value *Load = Builder.CreateLoad(EltTy, Cast);
2054 7 : Type *I32Ty = Type::getInt32Ty(C);
2055 7 : Rep = UndefValue::get(VecTy);
2056 71 : for (unsigned I = 0; I < EltNum; ++I)
2057 64 : Rep = Builder.CreateInsertElement(Rep, Load,
2058 64 : ConstantInt::get(I32Ty, I));
2059 6473 : } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2060 : Name.startswith("sse41.pmovzx") ||
2061 : Name.startswith("avx2.pmovsx") ||
2062 : Name.startswith("avx2.pmovzx") ||
2063 : Name.startswith("avx512.mask.pmovsx") ||
2064 : Name.startswith("avx512.mask.pmovzx"))) {
2065 394 : VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
2066 394 : VectorType *DstTy = cast<VectorType>(CI->getType());
2067 394 : unsigned NumDstElts = DstTy->getNumElements();
2068 :
2069 : // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2070 394 : SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2071 3070 : for (unsigned i = 0; i != NumDstElts; ++i)
2072 5352 : ShuffleMask[i] = i;
2073 :
2074 394 : Value *SV = Builder.CreateShuffleVector(
2075 394 : CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
2076 :
2077 394 : bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2078 394 : Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2079 394 : : Builder.CreateZExt(SV, DstTy);
2080 : // If there are 3 arguments, it's a masked intrinsic so we need a select.
2081 394 : if (CI->getNumArgOperands() == 3)
2082 216 : Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2083 : CI->getArgOperand(1));
2084 6079 : } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2085 : Name == "avx2.vbroadcasti128")) {
2086 : // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2087 9 : Type *EltTy = CI->getType()->getVectorElementType();
2088 9 : unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2089 9 : Type *VT = VectorType::get(EltTy, NumSrcElts);
2090 9 : Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2091 : PointerType::getUnqual(VT));
2092 9 : Value *Load = Builder.CreateAlignedLoad(Op, 1);
2093 9 : if (NumSrcElts == 2)
2094 5 : Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2095 : { 0, 1, 0, 1 });
2096 : else
2097 4 : Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2098 : { 0, 1, 2, 3, 0, 1, 2, 3 });
2099 6070 : } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2100 : Name.startswith("avx512.mask.shuf.f"))) {
2101 38 : unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2102 38 : Type *VT = CI->getType();
2103 38 : unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2104 38 : unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2105 38 : unsigned ControlBitsMask = NumLanes - 1;
2106 38 : unsigned NumControlBits = NumLanes / 2;
2107 38 : SmallVector<uint32_t, 8> ShuffleMask(0);
2108 :
2109 150 : for (unsigned l = 0; l != NumLanes; ++l) {
2110 112 : unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2111 : // We actually need the other source.
2112 112 : if (l >= NumLanes / 2)
2113 56 : LaneMask += NumLanes;
2114 440 : for (unsigned i = 0; i != NumElementsInLane; ++i)
2115 328 : ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2116 : }
2117 38 : Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2118 : CI->getArgOperand(1), ShuffleMask);
2119 38 : Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2120 : CI->getArgOperand(3));
2121 6032 : }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2122 : Name.startswith("avx512.mask.broadcasti"))) {
2123 : unsigned NumSrcElts =
2124 126 : CI->getArgOperand(0)->getType()->getVectorNumElements();
2125 126 : unsigned NumDstElts = CI->getType()->getVectorNumElements();
2126 :
2127 126 : SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2128 1398 : for (unsigned i = 0; i != NumDstElts; ++i)
2129 2544 : ShuffleMask[i] = i % NumSrcElts;
2130 :
2131 126 : Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2132 : CI->getArgOperand(0),
2133 : ShuffleMask);
2134 126 : Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2135 : CI->getArgOperand(1));
2136 5906 : } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2137 : Name.startswith("avx2.vbroadcast") ||
2138 : Name.startswith("avx512.pbroadcast") ||
2139 : Name.startswith("avx512.mask.broadcast.s"))) {
2140 : // Replace vp?broadcasts with a vector shuffle.
2141 146 : Value *Op = CI->getArgOperand(0);
2142 146 : unsigned NumElts = CI->getType()->getVectorNumElements();
2143 146 : Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
2144 146 : Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
2145 146 : Constant::getNullValue(MaskTy));
2146 :
2147 146 : if (CI->getNumArgOperands() == 3)
2148 102 : Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2149 : CI->getArgOperand(1));
2150 5760 : } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2151 : Name.startswith("sse2.psubus.") ||
2152 : Name.startswith("avx2.paddus.") ||
2153 : Name.startswith("avx2.psubus.") ||
2154 : Name.startswith("avx512.mask.paddus.") ||
2155 : Name.startswith("avx512.mask.psubus."))) {
2156 605 : bool IsAdd = Name.contains(".paddus");
2157 605 : Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, IsAdd);
2158 5155 : } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2159 36 : Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2160 : CI->getArgOperand(1),
2161 : CI->getArgOperand(2),
2162 : CI->getArgOperand(3),
2163 : CI->getArgOperand(4),
2164 : false);
2165 5137 : } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2166 48 : Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2167 : CI->getArgOperand(1),
2168 : CI->getArgOperand(2),
2169 : CI->getArgOperand(3),
2170 : CI->getArgOperand(4),
2171 : true);
2172 5113 : } else if (IsX86 && (Name == "sse2.psll.dq" ||
2173 : Name == "avx2.psll.dq")) {
2174 : // 128/256-bit shift left specified in bits.
2175 14 : unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2176 14 : Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2177 : Shift / 8); // Shift is in bits.
2178 5099 : } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2179 : Name == "avx2.psrl.dq")) {
2180 : // 128/256-bit shift right specified in bits.
2181 108 : unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2182 108 : Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2183 : Shift / 8); // Shift is in bits.
2184 4991 : } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2185 : Name == "avx2.psll.dq.bs" ||
2186 : Name == "avx512.psll.dq.512")) {
2187 : // 128/256/512-bit shift left specified in bytes.
2188 32 : unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2189 16 : Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2190 4975 : } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2191 : Name == "avx2.psrl.dq.bs" ||
2192 : Name == "avx512.psrl.dq.512")) {
2193 : // 128/256/512-bit shift right specified in bytes.
2194 32 : unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2195 16 : Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2196 4959 : } else if (IsX86 && (Name == "sse41.pblendw" ||
2197 : Name.startswith("sse41.blendp") ||
2198 : Name.startswith("avx.blend.p") ||
2199 : Name == "avx2.pblendw" ||
2200 : Name.startswith("avx2.pblendd."))) {
2201 98 : Value *Op0 = CI->getArgOperand(0);
2202 : Value *Op1 = CI->getArgOperand(1);
2203 98 : unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2204 98 : VectorType *VecTy = cast<VectorType>(CI->getType());
2205 98 : unsigned NumElts = VecTy->getNumElements();
2206 :
2207 98 : SmallVector<uint32_t, 16> Idxs(NumElts);
2208 748 : for (unsigned i = 0; i != NumElts; ++i)
2209 650 : Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2210 :
2211 98 : Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2212 4861 : } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2213 : Name == "avx2.vinserti128" ||
2214 : Name.startswith("avx512.mask.insert"))) {
2215 102 : Value *Op0 = CI->getArgOperand(0);
2216 : Value *Op1 = CI->getArgOperand(1);
2217 102 : unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2218 102 : unsigned DstNumElts = CI->getType()->getVectorNumElements();
2219 102 : unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
2220 102 : unsigned Scale = DstNumElts / SrcNumElts;
2221 :
2222 : // Mask off the high bits of the immediate value; hardware ignores those.
2223 102 : Imm = Imm % Scale;
2224 :
2225 : // Extend the second operand into a vector the size of the destination.
2226 102 : Value *UndefV = UndefValue::get(Op1->getType());
2227 102 : SmallVector<uint32_t, 8> Idxs(DstNumElts);
2228 492 : for (unsigned i = 0; i != SrcNumElts; ++i)
2229 780 : Idxs[i] = i;
2230 636 : for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2231 1068 : Idxs[i] = SrcNumElts;
2232 102 : Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
2233 :
2234 : // Insert the second operand into the first operand.
2235 :
2236 : // Note that there is no guarantee that instruction lowering will actually
2237 : // produce a vinsertf128 instruction for the created shuffles. In
2238 : // particular, the 0 immediate case involves no lane changes, so it can
2239 : // be handled as a blend.
2240 :
2241 : // Example of shuffle mask for 32-bit elements:
2242 : // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2243 : // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2244 :
2245 : // First fill with identify mask.
2246 1026 : for (unsigned i = 0; i != DstNumElts; ++i)
2247 1848 : Idxs[i] = i;
2248 : // Then replace the elements where we need to insert.
2249 492 : for (unsigned i = 0; i != SrcNumElts; ++i)
2250 780 : Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2251 102 : Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2252 :
2253 : // If the intrinsic has a mask operand, handle that.
2254 102 : if (CI->getNumArgOperands() == 5)
2255 72 : Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2256 : CI->getArgOperand(3));
2257 4759 : } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2258 : Name == "avx2.vextracti128" ||
2259 : Name.startswith("avx512.mask.vextract"))) {
2260 67 : Value *Op0 = CI->getArgOperand(0);
2261 67 : unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2262 67 : unsigned DstNumElts = CI->getType()->getVectorNumElements();
2263 67 : unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
2264 67 : unsigned Scale = SrcNumElts / DstNumElts;
2265 :
2266 : // Mask off the high bits of the immediate value; hardware ignores those.
2267 67 : Imm = Imm % Scale;
2268 :
2269 : // Get indexes for the subvector of the input vector.
2270 67 : SmallVector<uint32_t, 8> Idxs(DstNumElts);
2271 307 : for (unsigned i = 0; i != DstNumElts; ++i) {
2272 480 : Idxs[i] = i + (Imm * DstNumElts);
2273 : }
2274 67 : Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2275 :
2276 : // If the intrinsic has a mask operand, handle that.
2277 67 : if (CI->getNumArgOperands() == 4)
2278 32 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2279 : CI->getArgOperand(2));
2280 4692 : } else if (!IsX86 && Name == "stackprotectorcheck") {
2281 6 : Rep = nullptr;
2282 4686 : } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2283 : Name.startswith("avx512.mask.perm.di."))) {
2284 24 : Value *Op0 = CI->getArgOperand(0);
2285 24 : unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2286 24 : VectorType *VecTy = cast<VectorType>(CI->getType());
2287 24 : unsigned NumElts = VecTy->getNumElements();
2288 :
2289 24 : SmallVector<uint32_t, 8> Idxs(NumElts);
2290 168 : for (unsigned i = 0; i != NumElts; ++i)
2291 288 : Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2292 :
2293 24 : Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2294 :
2295 24 : if (CI->getNumArgOperands() == 4)
2296 24 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2297 : CI->getArgOperand(2));
2298 4662 : } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2299 : Name == "avx2.vperm2i128")) {
2300 : // The immediate permute control byte looks like this:
2301 : // [1:0] - select 128 bits from sources for low half of destination
2302 : // [2] - ignore
2303 : // [3] - zero low half of destination
2304 : // [5:4] - select 128 bits from sources for high half of destination
2305 : // [6] - ignore
2306 : // [7] - zero high half of destination
2307 :
2308 16 : uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2309 :
2310 16 : unsigned NumElts = CI->getType()->getVectorNumElements();
2311 16 : unsigned HalfSize = NumElts / 2;
2312 16 : SmallVector<uint32_t, 8> ShuffleMask(NumElts);
2313 :
2314 : // Determine which operand(s) are actually in use for this instruction.
2315 16 : Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2316 16 : Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2317 :
2318 : // If needed, replace operands based on zero mask.
2319 16 : V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2320 16 : V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2321 :
2322 : // Permute low half of result.
2323 16 : unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2324 64 : for (unsigned i = 0; i < HalfSize; ++i)
2325 96 : ShuffleMask[i] = StartIndex + i;
2326 :
2327 : // Permute high half of result.
2328 16 : StartIndex = (Imm & 0x10) ? HalfSize : 0;
2329 64 : for (unsigned i = 0; i < HalfSize; ++i)
2330 96 : ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2331 :
2332 16 : Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2333 :
2334 4646 : } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2335 : Name == "sse2.pshuf.d" ||
2336 : Name.startswith("avx512.mask.vpermil.p") ||
2337 : Name.startswith("avx512.mask.pshuf.d."))) {
2338 148 : Value *Op0 = CI->getArgOperand(0);
2339 148 : unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2340 148 : VectorType *VecTy = cast<VectorType>(CI->getType());
2341 148 : unsigned NumElts = VecTy->getNumElements();
2342 : // Calculate the size of each index in the immediate.
2343 148 : unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2344 148 : unsigned IdxMask = ((1 << IdxSize) - 1);
2345 :
2346 148 : SmallVector<uint32_t, 8> Idxs(NumElts);
2347 : // Lookup the bits for this element, wrapping around the immediate every
2348 : // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2349 : // to offset by the first index of each group.
2350 952 : for (unsigned i = 0; i != NumElts; ++i)
2351 1608 : Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2352 :
2353 148 : Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2354 :
2355 148 : if (CI->getNumArgOperands() == 4)
2356 54 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2357 : CI->getArgOperand(2));
2358 4498 : } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2359 : Name.startswith("avx512.mask.pshufl.w."))) {
2360 78 : Value *Op0 = CI->getArgOperand(0);
2361 78 : unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2362 78 : unsigned NumElts = CI->getType()->getVectorNumElements();
2363 :
2364 78 : SmallVector<uint32_t, 16> Idxs(NumElts);
2365 180 : for (unsigned l = 0; l != NumElts; l += 8) {
2366 510 : for (unsigned i = 0; i != 4; ++i)
2367 816 : Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2368 510 : for (unsigned i = 4; i != 8; ++i)
2369 816 : Idxs[i + l] = i + l;
2370 : }
2371 :
2372 78 : Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2373 :
2374 78 : if (CI->getNumArgOperands() == 4)
2375 18 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2376 : CI->getArgOperand(2));
2377 4420 : } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2378 : Name.startswith("avx512.mask.pshufh.w."))) {
2379 60 : Value *Op0 = CI->getArgOperand(0);
2380 60 : unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2381 60 : unsigned NumElts = CI->getType()->getVectorNumElements();
2382 :
2383 60 : SmallVector<uint32_t, 16> Idxs(NumElts);
2384 144 : for (unsigned l = 0; l != NumElts; l += 8) {
2385 420 : for (unsigned i = 0; i != 4; ++i)
2386 672 : Idxs[i + l] = i + l;
2387 420 : for (unsigned i = 0; i != 4; ++i)
2388 672 : Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2389 : }
2390 :
2391 60 : Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2392 :
2393 60 : if (CI->getNumArgOperands() == 4)
2394 18 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2395 : CI->getArgOperand(2));
2396 4360 : } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2397 28 : Value *Op0 = CI->getArgOperand(0);
2398 : Value *Op1 = CI->getArgOperand(1);
2399 28 : unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2400 28 : unsigned NumElts = CI->getType()->getVectorNumElements();
2401 :
2402 28 : unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2403 28 : unsigned HalfLaneElts = NumLaneElts / 2;
2404 :
2405 28 : SmallVector<uint32_t, 16> Idxs(NumElts);
2406 216 : for (unsigned i = 0; i != NumElts; ++i) {
2407 : // Base index is the starting element of the lane.
2408 188 : Idxs[i] = i - (i % NumLaneElts);
2409 : // If we are half way through the lane switch to the other source.
2410 188 : if ((i % NumLaneElts) >= HalfLaneElts)
2411 94 : Idxs[i] += NumElts;
2412 : // Now select the specific element. By adding HalfLaneElts bits from
2413 : // the immediate. Wrapping around the immediate every 8-bits.
2414 188 : Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2415 : }
2416 :
2417 28 : Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2418 :
2419 28 : Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2420 : CI->getArgOperand(3));
2421 4332 : } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2422 : Name.startswith("avx512.mask.movshdup") ||
2423 : Name.startswith("avx512.mask.movsldup"))) {
2424 54 : Value *Op0 = CI->getArgOperand(0);
2425 54 : unsigned NumElts = CI->getType()->getVectorNumElements();
2426 54 : unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2427 :
2428 : unsigned Offset = 0;
2429 : if (Name.startswith("avx512.mask.movshdup."))
2430 : Offset = 1;
2431 :
2432 54 : SmallVector<uint32_t, 16> Idxs(NumElts);
2433 180 : for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2434 336 : for (unsigned i = 0; i != NumLaneElts; i += 2) {
2435 210 : Idxs[i + l + 0] = i + l + Offset;
2436 420 : Idxs[i + l + 1] = i + l + Offset;
2437 : }
2438 :
2439 54 : Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2440 :
2441 54 : Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2442 : CI->getArgOperand(1));
2443 4278 : } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2444 : Name.startswith("avx512.mask.unpckl."))) {
2445 74 : Value *Op0 = CI->getArgOperand(0);
2446 : Value *Op1 = CI->getArgOperand(1);
2447 74 : int NumElts = CI->getType()->getVectorNumElements();
2448 74 : int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2449 :
2450 74 : SmallVector<uint32_t, 64> Idxs(NumElts);
2451 250 : for (int l = 0; l != NumElts; l += NumLaneElts)
2452 1200 : for (int i = 0; i != NumLaneElts; ++i)
2453 2048 : Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2454 :
2455 74 : Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2456 :
2457 74 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2458 : CI->getArgOperand(2));
2459 4204 : } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2460 : Name.startswith("avx512.mask.unpckh."))) {
2461 72 : Value *Op0 = CI->getArgOperand(0);
2462 : Value *Op1 = CI->getArgOperand(1);
2463 72 : int NumElts = CI->getType()->getVectorNumElements();
2464 72 : int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2465 :
2466 72 : SmallVector<uint32_t, 64> Idxs(NumElts);
2467 240 : for (int l = 0; l != NumElts; l += NumLaneElts)
2468 1176 : for (int i = 0; i != NumLaneElts; ++i)
2469 2016 : Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2470 :
2471 72 : Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2472 :
2473 72 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2474 : CI->getArgOperand(2));
2475 4132 : } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
2476 88 : Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
2477 44 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2478 : CI->getArgOperand(2));
2479 4088 : } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
2480 144 : Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
2481 : CI->getArgOperand(1));
2482 72 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2483 : CI->getArgOperand(2));
2484 4016 : } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
2485 88 : Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
2486 44 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2487 : CI->getArgOperand(2));
2488 3972 : } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
2489 88 : Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
2490 44 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2491 : CI->getArgOperand(2));
2492 3928 : } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
2493 54 : VectorType *FTy = cast<VectorType>(CI->getType());
2494 54 : VectorType *ITy = VectorType::getInteger(FTy);
2495 108 : Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2496 : Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2497 54 : Rep = Builder.CreateBitCast(Rep, FTy);
2498 54 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2499 : CI->getArgOperand(2));
2500 3874 : } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
2501 54 : VectorType *FTy = cast<VectorType>(CI->getType());
2502 54 : VectorType *ITy = VectorType::getInteger(FTy);
2503 108 : Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2504 54 : Rep = Builder.CreateAnd(Rep,
2505 : Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2506 54 : Rep = Builder.CreateBitCast(Rep, FTy);
2507 54 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2508 : CI->getArgOperand(2));
2509 3820 : } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
2510 54 : VectorType *FTy = cast<VectorType>(CI->getType());
2511 54 : VectorType *ITy = VectorType::getInteger(FTy);
2512 108 : Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2513 : Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2514 54 : Rep = Builder.CreateBitCast(Rep, FTy);
2515 54 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2516 : CI->getArgOperand(2));
2517 3766 : } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
2518 54 : VectorType *FTy = cast<VectorType>(CI->getType());
2519 54 : VectorType *ITy = VectorType::getInteger(FTy);
2520 108 : Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2521 : Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2522 54 : Rep = Builder.CreateBitCast(Rep, FTy);
2523 54 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2524 : CI->getArgOperand(2));
2525 3712 : } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2526 216 : Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2527 108 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2528 : CI->getArgOperand(2));
2529 3604 : } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2530 216 : Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2531 108 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2532 : CI->getArgOperand(2));
2533 3496 : } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2534 216 : Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2535 108 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2536 : CI->getArgOperand(2));
2537 3388 : } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2538 : if (Name.endswith(".512")) {
2539 : Intrinsic::ID IID;
2540 30 : if (Name[17] == 's')
2541 : IID = Intrinsic::x86_avx512_add_ps_512;
2542 : else
2543 : IID = Intrinsic::x86_avx512_add_pd_512;
2544 :
2545 60 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2546 30 : { CI->getArgOperand(0), CI->getArgOperand(1),
2547 : CI->getArgOperand(4) });
2548 : } else {
2549 24 : Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2550 : }
2551 42 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2552 : CI->getArgOperand(2));
2553 3346 : } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2554 : if (Name.endswith(".512")) {
2555 : Intrinsic::ID IID;
2556 30 : if (Name[17] == 's')
2557 : IID = Intrinsic::x86_avx512_div_ps_512;
2558 : else
2559 : IID = Intrinsic::x86_avx512_div_pd_512;
2560 :
2561 60 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2562 30 : { CI->getArgOperand(0), CI->getArgOperand(1),
2563 : CI->getArgOperand(4) });
2564 : } else {
2565 24 : Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2566 : }
2567 42 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2568 : CI->getArgOperand(2));
2569 3304 : } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2570 : if (Name.endswith(".512")) {
2571 : Intrinsic::ID IID;
2572 32 : if (Name[17] == 's')
2573 : IID = Intrinsic::x86_avx512_mul_ps_512;
2574 : else
2575 : IID = Intrinsic::x86_avx512_mul_pd_512;
2576 :
2577 64 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2578 32 : { CI->getArgOperand(0), CI->getArgOperand(1),
2579 : CI->getArgOperand(4) });
2580 : } else {
2581 24 : Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2582 : }
2583 44 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2584 : CI->getArgOperand(2));
2585 3260 : } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2586 : if (Name.endswith(".512")) {
2587 : Intrinsic::ID IID;
2588 28 : if (Name[17] == 's')
2589 : IID = Intrinsic::x86_avx512_sub_ps_512;
2590 : else
2591 : IID = Intrinsic::x86_avx512_sub_pd_512;
2592 :
2593 56 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2594 28 : { CI->getArgOperand(0), CI->getArgOperand(1),
2595 : CI->getArgOperand(4) });
2596 : } else {
2597 24 : Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2598 : }
2599 40 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2600 : CI->getArgOperand(2));
2601 3220 : } else if (IsX86 && Name.startswith("avx512.mask.max.p") &&
2602 : Name.drop_front(18) == ".512") {
2603 : Intrinsic::ID IID;
2604 12 : if (Name[17] == 's')
2605 : IID = Intrinsic::x86_avx512_max_ps_512;
2606 : else
2607 : IID = Intrinsic::x86_avx512_max_pd_512;
2608 :
2609 24 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2610 12 : { CI->getArgOperand(0), CI->getArgOperand(1),
2611 : CI->getArgOperand(4) });
2612 12 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2613 : CI->getArgOperand(2));
2614 3208 : } else if (IsX86 && Name.startswith("avx512.mask.min.p") &&
2615 : Name.drop_front(18) == ".512") {
2616 : Intrinsic::ID IID;
2617 12 : if (Name[17] == 's')
2618 : IID = Intrinsic::x86_avx512_min_ps_512;
2619 : else
2620 : IID = Intrinsic::x86_avx512_min_pd_512;
2621 :
2622 24 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2623 12 : { CI->getArgOperand(0), CI->getArgOperand(1),
2624 : CI->getArgOperand(4) });
2625 12 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2626 : CI->getArgOperand(2));
2627 3196 : } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2628 26 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2629 : Intrinsic::ctlz,
2630 52 : CI->getType()),
2631 26 : { CI->getArgOperand(0), Builder.getInt1(false) });
2632 26 : Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2633 : CI->getArgOperand(1));
2634 3170 : } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2635 156 : bool IsImmediate = Name[16] == 'i' ||
2636 132 : (Name.size() > 18 && Name[18] == 'i');
2637 : bool IsVariable = Name[16] == 'v';
2638 156 : char Size = Name[16] == '.' ? Name[17] :
2639 66 : Name[17] == '.' ? Name[18] :
2640 42 : Name[18] == '.' ? Name[19] :
2641 : Name[20];
2642 :
2643 : Intrinsic::ID IID;
2644 156 : if (IsVariable && Name[17] != '.') {
2645 42 : if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2646 : IID = Intrinsic::x86_avx2_psllv_q;
2647 36 : else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2648 : IID = Intrinsic::x86_avx2_psllv_q_256;
2649 30 : else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
2650 : IID = Intrinsic::x86_avx2_psllv_d;
2651 24 : else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
2652 : IID = Intrinsic::x86_avx2_psllv_d_256;
2653 18 : else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
2654 : IID = Intrinsic::x86_avx512_psllv_w_128;
2655 12 : else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
2656 : IID = Intrinsic::x86_avx512_psllv_w_256;
2657 6 : else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
2658 : IID = Intrinsic::x86_avx512_psllv_w_512;
2659 : else
2660 0 : llvm_unreachable("Unexpected size");
2661 : } else if (Name.endswith(".128")) {
2662 24 : if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2663 12 : IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
2664 : : Intrinsic::x86_sse2_psll_d;
2665 12 : else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2666 0 : IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
2667 : : Intrinsic::x86_sse2_psll_q;
2668 12 : else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2669 12 : IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
2670 : : Intrinsic::x86_sse2_psll_w;
2671 : else
2672 0 : llvm_unreachable("Unexpected size");
2673 : } else if (Name.endswith(".256")) {
2674 30 : if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2675 12 : IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
2676 : : Intrinsic::x86_avx2_psll_d;
2677 18 : else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2678 6 : IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
2679 : : Intrinsic::x86_avx2_psll_q;
2680 12 : else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2681 12 : IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
2682 : : Intrinsic::x86_avx2_psll_w;
2683 : else
2684 0 : llvm_unreachable("Unexpected size");
2685 : } else {
2686 60 : if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2687 24 : IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
2688 12 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
2689 : Intrinsic::x86_avx512_psll_d_512;
2690 36 : else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2691 24 : IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
2692 12 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
2693 : Intrinsic::x86_avx512_psll_q_512;
2694 12 : else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
2695 12 : IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
2696 : : Intrinsic::x86_avx512_psll_w_512;
2697 : else
2698 0 : llvm_unreachable("Unexpected size");
2699 : }
2700 :
2701 156 : Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2702 3014 : } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
2703 176 : bool IsImmediate = Name[16] == 'i' ||
2704 152 : (Name.size() > 18 && Name[18] == 'i');
2705 : bool IsVariable = Name[16] == 'v';
2706 176 : char Size = Name[16] == '.' ? Name[17] :
2707 68 : Name[17] == '.' ? Name[18] :
2708 42 : Name[18] == '.' ? Name[19] :
2709 : Name[20];
2710 :
2711 : Intrinsic::ID IID;
2712 176 : if (IsVariable && Name[17] != '.') {
2713 42 : if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
2714 : IID = Intrinsic::x86_avx2_psrlv_q;
2715 36 : else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
2716 : IID = Intrinsic::x86_avx2_psrlv_q_256;
2717 30 : else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
2718 : IID = Intrinsic::x86_avx2_psrlv_d;
2719 24 : else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
2720 : IID = Intrinsic::x86_avx2_psrlv_d_256;
2721 18 : else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
2722 : IID = Intrinsic::x86_avx512_psrlv_w_128;
2723 12 : else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
2724 : IID = Intrinsic::x86_avx512_psrlv_w_256;
2725 6 : else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
2726 : IID = Intrinsic::x86_avx512_psrlv_w_512;
2727 : else
2728 0 : llvm_unreachable("Unexpected size");
2729 : } else if (Name.endswith(".128")) {
2730 36 : if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2731 12 : IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
2732 : : Intrinsic::x86_sse2_psrl_d;
2733 24 : else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2734 12 : IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
2735 : : Intrinsic::x86_sse2_psrl_q;
2736 12 : else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2737 12 : IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
2738 : : Intrinsic::x86_sse2_psrl_w;
2739 : else
2740 0 : llvm_unreachable("Unexpected size");
2741 : } else if (Name.endswith(".256")) {
2742 36 : if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2743 12 : IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
2744 : : Intrinsic::x86_avx2_psrl_d;
2745 24 : else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2746 12 : IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
2747 : : Intrinsic::x86_avx2_psrl_q;
2748 12 : else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2749 12 : IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
2750 : : Intrinsic::x86_avx2_psrl_w;
2751 : else
2752 0 : llvm_unreachable("Unexpected size");
2753 : } else {
2754 62 : if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2755 24 : IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
2756 12 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
2757 : Intrinsic::x86_avx512_psrl_d_512;
2758 38 : else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2759 26 : IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
2760 14 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
2761 : Intrinsic::x86_avx512_psrl_q_512;
2762 12 : else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
2763 12 : IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
2764 : : Intrinsic::x86_avx512_psrl_w_512;
2765 : else
2766 0 : llvm_unreachable("Unexpected size");
2767 : }
2768 :
2769 176 : Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2770 2838 : } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
2771 168 : bool IsImmediate = Name[16] == 'i' ||
2772 144 : (Name.size() > 18 && Name[18] == 'i');
2773 : bool IsVariable = Name[16] == 'v';
2774 168 : char Size = Name[16] == '.' ? Name[17] :
2775 72 : Name[17] == '.' ? Name[18] :
2776 34 : Name[18] == '.' ? Name[19] :
2777 : Name[20];
2778 :
2779 : Intrinsic::ID IID;
2780 168 : if (IsVariable && Name[17] != '.') {
2781 34 : if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
2782 : IID = Intrinsic::x86_avx2_psrav_d;
2783 28 : else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
2784 : IID = Intrinsic::x86_avx2_psrav_d_256;
2785 20 : else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
2786 : IID = Intrinsic::x86_avx512_psrav_w_128;
2787 14 : else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
2788 : IID = Intrinsic::x86_avx512_psrav_w_256;
2789 8 : else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
2790 : IID = Intrinsic::x86_avx512_psrav_w_512;
2791 : else
2792 0 : llvm_unreachable("Unexpected size");
2793 : } else if (Name.endswith(".128")) {
2794 38 : if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2795 6 : IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
2796 : : Intrinsic::x86_sse2_psra_d;
2797 32 : else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2798 20 : IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
2799 14 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
2800 : Intrinsic::x86_avx512_psra_q_128;
2801 12 : else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2802 12 : IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
2803 : : Intrinsic::x86_sse2_psra_w;
2804 : else
2805 0 : llvm_unreachable("Unexpected size");
2806 : } else if (Name.endswith(".256")) {
2807 36 : if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2808 6 : IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
2809 : : Intrinsic::x86_avx2_psra_d;
2810 30 : else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2811 18 : IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
2812 12 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
2813 : Intrinsic::x86_avx512_psra_q_256;
2814 12 : else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2815 12 : IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
2816 : : Intrinsic::x86_avx2_psra_w;
2817 : else
2818 0 : llvm_unreachable("Unexpected size");
2819 : } else {
2820 60 : if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
2821 24 : IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
2822 12 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
2823 : Intrinsic::x86_avx512_psra_d_512;
2824 36 : else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
2825 24 : IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
2826 12 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
2827 : Intrinsic::x86_avx512_psra_q_512;
2828 12 : else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
2829 12 : IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
2830 : : Intrinsic::x86_avx512_psra_w_512;
2831 : else
2832 0 : llvm_unreachable("Unexpected size");
2833 : }
2834 :
2835 168 : Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2836 2670 : } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
2837 8 : Rep = upgradeMaskedMove(Builder, *CI);
2838 2662 : } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
2839 24 : Rep = UpgradeMaskToInt(Builder, *CI);
2840 2638 : } else if (IsX86 && Name.endswith(".movntdqa")) {
2841 44 : Module *M = F->getParent();
2842 : MDNode *Node = MDNode::get(
2843 88 : C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2844 :
2845 44 : Value *Ptr = CI->getArgOperand(0);
2846 44 : VectorType *VTy = cast<VectorType>(CI->getType());
2847 :
2848 : // Convert the type of the pointer to a pointer to the stored type.
2849 : Value *BC =
2850 44 : Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
2851 44 : LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
2852 88 : LI->setMetadata(M->getMDKindID("nontemporal"), Node);
2853 44 : Rep = LI;
2854 2594 : } else if (IsX86 &&
2855 : (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") ||
2856 : Name.startswith("avx512.mask.pavg"))) {
2857 : // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w,
2858 : // llvm.x86.avx512.mask.pavg.b/w
2859 46 : Value *A = CI->getArgOperand(0);
2860 : Value *B = CI->getArgOperand(1);
2861 46 : VectorType *ZextType = VectorType::getExtendedElementVectorType(
2862 : cast<VectorType>(A->getType()));
2863 46 : Value *ExtendedA = Builder.CreateZExt(A, ZextType);
2864 46 : Value *ExtendedB = Builder.CreateZExt(B, ZextType);
2865 46 : Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB);
2866 46 : Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1));
2867 46 : Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1));
2868 46 : Rep = Builder.CreateTrunc(ShiftR, A->getType());
2869 46 : if (CI->getNumArgOperands() > 2) {
2870 24 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2871 : CI->getArgOperand(2));
2872 : }
2873 2548 : } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
2874 : Name.startswith("fma.vfmsub.") ||
2875 : Name.startswith("fma.vfnmadd.") ||
2876 : Name.startswith("fma.vfnmsub."))) {
2877 470 : bool NegMul = Name[6] == 'n';
2878 470 : bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
2879 470 : bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
2880 :
2881 470 : Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2882 470 : CI->getArgOperand(2) };
2883 :
2884 470 : if (IsScalar) {
2885 154 : Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
2886 308 : Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2887 308 : Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2888 : }
2889 :
2890 470 : if (NegMul && !IsScalar)
2891 144 : Ops[0] = Builder.CreateFNeg(Ops[0]);
2892 470 : if (NegMul && IsScalar)
2893 78 : Ops[1] = Builder.CreateFNeg(Ops[1]);
2894 470 : if (NegAcc)
2895 228 : Ops[2] = Builder.CreateFNeg(Ops[2]);
2896 :
2897 470 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2898 : Intrinsic::fma,
2899 470 : Ops[0]->getType()),
2900 : Ops);
2901 :
2902 470 : if (IsScalar)
2903 154 : Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
2904 : (uint64_t)0);
2905 2078 : } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
2906 46 : Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2907 46 : CI->getArgOperand(2) };
2908 :
2909 46 : Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
2910 92 : Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2911 92 : Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2912 :
2913 46 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2914 : Intrinsic::fma,
2915 92 : Ops[0]->getType()),
2916 : Ops);
2917 :
2918 92 : Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
2919 : Rep, (uint64_t)0);
2920 2032 : } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
2921 : Name.startswith("avx512.maskz.vfmadd.s") ||
2922 : Name.startswith("avx512.mask3.vfmadd.s") ||
2923 : Name.startswith("avx512.mask3.vfmsub.s") ||
2924 : Name.startswith("avx512.mask3.vfnmsub.s"))) {
2925 175 : bool IsMask3 = Name[11] == '3';
2926 175 : bool IsMaskZ = Name[11] == 'z';
2927 : // Drop the "avx512.mask." to make it easier.
2928 350 : Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
2929 : bool NegMul = Name[2] == 'n';
2930 175 : bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
2931 :
2932 175 : Value *A = CI->getArgOperand(0);
2933 : Value *B = CI->getArgOperand(1);
2934 : Value *C = CI->getArgOperand(2);
2935 :
2936 175 : if (NegMul && (IsMask3 || IsMaskZ))
2937 16 : A = Builder.CreateFNeg(A);
2938 175 : if (NegMul && !(IsMask3 || IsMaskZ))
2939 0 : B = Builder.CreateFNeg(B);
2940 175 : if (NegAcc)
2941 44 : C = Builder.CreateFNeg(C);
2942 :
2943 175 : A = Builder.CreateExtractElement(A, (uint64_t)0);
2944 175 : B = Builder.CreateExtractElement(B, (uint64_t)0);
2945 175 : C = Builder.CreateExtractElement(C, (uint64_t)0);
2946 :
2947 350 : if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
2948 : cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
2949 44 : Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
2950 :
2951 : Intrinsic::ID IID;
2952 88 : if (Name.back() == 'd')
2953 : IID = Intrinsic::x86_avx512_vfmadd_f64;
2954 : else
2955 : IID = Intrinsic::x86_avx512_vfmadd_f32;
2956 44 : Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
2957 44 : Rep = Builder.CreateCall(FMA, Ops);
2958 : } else {
2959 131 : Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
2960 : Intrinsic::fma,
2961 262 : A->getType());
2962 131 : Rep = Builder.CreateCall(FMA, { A, B, C });
2963 : }
2964 :
2965 175 : Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
2966 140 : IsMask3 ? C : A;
2967 :
2968 : // For Mask3 with NegAcc, we need to create a new extractelement that
2969 : // avoids the negation above.
2970 175 : if (NegAcc && IsMask3)
2971 88 : PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
2972 : (uint64_t)0);
2973 :
2974 175 : Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
2975 : Rep, PassThru);
2976 350 : Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
2977 : Rep, (uint64_t)0);
2978 1857 : } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
2979 : Name.startswith("avx512.mask.vfnmadd.p") ||
2980 : Name.startswith("avx512.mask.vfnmsub.p") ||
2981 : Name.startswith("avx512.mask3.vfmadd.p") ||
2982 : Name.startswith("avx512.mask3.vfmsub.p") ||
2983 : Name.startswith("avx512.mask3.vfnmsub.p") ||
2984 : Name.startswith("avx512.maskz.vfmadd.p"))) {
2985 164 : bool IsMask3 = Name[11] == '3';
2986 164 : bool IsMaskZ = Name[11] == 'z';
2987 : // Drop the "avx512.mask." to make it easier.
2988 328 : Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
2989 : bool NegMul = Name[2] == 'n';
2990 164 : bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
2991 :
2992 164 : Value *A = CI->getArgOperand(0);
2993 : Value *B = CI->getArgOperand(1);
2994 : Value *C = CI->getArgOperand(2);
2995 :
2996 164 : if (NegMul && (IsMask3 || IsMaskZ))
2997 10 : A = Builder.CreateFNeg(A);
2998 164 : if (NegMul && !(IsMask3 || IsMaskZ))
2999 62 : B = Builder.CreateFNeg(B);
3000 164 : if (NegAcc)
3001 58 : C = Builder.CreateFNeg(C);
3002 :
3003 224 : if (CI->getNumArgOperands() == 5 &&
3004 60 : (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3005 : cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3006 : Intrinsic::ID IID;
3007 : // Check the character before ".512" in string.
3008 56 : if (Name[Name.size()-5] == 's')
3009 : IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3010 : else
3011 : IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3012 :
3013 84 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3014 : { A, B, C, CI->getArgOperand(4) });
3015 : } else {
3016 136 : Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3017 : Intrinsic::fma,
3018 272 : A->getType());
3019 136 : Rep = Builder.CreateCall(FMA, { A, B, C });
3020 : }
3021 :
3022 164 : Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3023 154 : IsMask3 ? CI->getArgOperand(2) :
3024 124 : CI->getArgOperand(0);
3025 :
3026 164 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3027 1693 : } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
3028 : Name.startswith("fma.vfmsubadd.p"))) {
3029 48 : bool IsSubAdd = Name[7] == 's';
3030 48 : int NumElts = CI->getType()->getVectorNumElements();
3031 :
3032 48 : Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3033 48 : CI->getArgOperand(2) };
3034 :
3035 48 : Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3036 96 : Ops[0]->getType());
3037 48 : Value *Odd = Builder.CreateCall(FMA, Ops);
3038 48 : Ops[2] = Builder.CreateFNeg(Ops[2]);
3039 48 : Value *Even = Builder.CreateCall(FMA, Ops);
3040 :
3041 48 : if (IsSubAdd)
3042 : std::swap(Even, Odd);
3043 :
3044 48 : SmallVector<uint32_t, 32> Idxs(NumElts);
3045 264 : for (int i = 0; i != NumElts; ++i)
3046 432 : Idxs[i] = i + (i % 2) * NumElts;
3047 :
3048 48 : Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3049 1645 : } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3050 : Name.startswith("avx512.mask3.vfmaddsub.p") ||
3051 : Name.startswith("avx512.maskz.vfmaddsub.p") ||
3052 : Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3053 52 : bool IsMask3 = Name[11] == '3';
3054 52 : bool IsMaskZ = Name[11] == 'z';
3055 : // Drop the "avx512.mask." to make it easier.
3056 104 : Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3057 : bool IsSubAdd = Name[3] == 's';
3058 64 : if (CI->getNumArgOperands() == 5 &&
3059 12 : (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3060 : cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3061 : Intrinsic::ID IID;
3062 : // Check the character before ".512" in string.
3063 0 : if (Name[Name.size()-5] == 's')
3064 : IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3065 : else
3066 : IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3067 :
3068 : Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3069 : CI->getArgOperand(2), CI->getArgOperand(4) };
3070 0 : if (IsSubAdd)
3071 0 : Ops[2] = Builder.CreateFNeg(Ops[2]);
3072 :
3073 0 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3074 : {CI->getArgOperand(0), CI->getArgOperand(1),
3075 : CI->getArgOperand(2), CI->getArgOperand(4)});
3076 : } else {
3077 104 : int NumElts = CI->getType()->getVectorNumElements();
3078 :
3079 : Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3080 52 : CI->getArgOperand(2) };
3081 :
3082 52 : Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3083 104 : Ops[0]->getType());
3084 52 : Value *Odd = Builder.CreateCall(FMA, Ops);
3085 52 : Ops[2] = Builder.CreateFNeg(Ops[2]);
3086 52 : Value *Even = Builder.CreateCall(FMA, Ops);
3087 :
3088 52 : if (IsSubAdd)
3089 : std::swap(Even, Odd);
3090 :
3091 52 : SmallVector<uint32_t, 32> Idxs(NumElts);
3092 376 : for (int i = 0; i != NumElts; ++i)
3093 648 : Idxs[i] = i + (i % 2) * NumElts;
3094 :
3095 52 : Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3096 : }
3097 :
3098 52 : Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3099 42 : IsMask3 ? CI->getArgOperand(2) :
3100 22 : CI->getArgOperand(0);
3101 :
3102 52 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3103 1593 : } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3104 : Name.startswith("avx512.maskz.pternlog."))) {
3105 48 : bool ZeroMask = Name[11] == 'z';
3106 48 : unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3107 48 : unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3108 : Intrinsic::ID IID;
3109 48 : if (VecWidth == 128 && EltWidth == 32)
3110 : IID = Intrinsic::x86_avx512_pternlog_d_128;
3111 40 : else if (VecWidth == 256 && EltWidth == 32)
3112 : IID = Intrinsic::x86_avx512_pternlog_d_256;
3113 32 : else if (VecWidth == 512 && EltWidth == 32)
3114 : IID = Intrinsic::x86_avx512_pternlog_d_512;
3115 24 : else if (VecWidth == 128 && EltWidth == 64)
3116 : IID = Intrinsic::x86_avx512_pternlog_q_128;
3117 16 : else if (VecWidth == 256 && EltWidth == 64)
3118 : IID = Intrinsic::x86_avx512_pternlog_q_256;
3119 8 : else if (VecWidth == 512 && EltWidth == 64)
3120 : IID = Intrinsic::x86_avx512_pternlog_q_512;
3121 : else
3122 0 : llvm_unreachable("Unexpected intrinsic");
3123 :
3124 48 : Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3125 48 : CI->getArgOperand(2), CI->getArgOperand(3) };
3126 48 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3127 : Args);
3128 48 : Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3129 : : CI->getArgOperand(0);
3130 48 : Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3131 1545 : } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3132 : Name.startswith("avx512.maskz.vpmadd52"))) {
3133 120 : bool ZeroMask = Name[11] == 'z';
3134 120 : bool High = Name[20] == 'h' || Name[21] == 'h';
3135 120 : unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3136 : Intrinsic::ID IID;
3137 120 : if (VecWidth == 128 && !High)
3138 : IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3139 104 : else if (VecWidth == 256 && !High)
3140 : IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3141 88 : else if (VecWidth == 512 && !High)
3142 : IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3143 72 : else if (VecWidth == 128 && High)
3144 : IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3145 56 : else if (VecWidth == 256 && High)
3146 : IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3147 40 : else if (VecWidth == 512 && High)
3148 : IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3149 : else
3150 0 : llvm_unreachable("Unexpected intrinsic");
3151 :
3152 120 : Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3153 120 : CI->getArgOperand(2) };
3154 120 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3155 : Args);
3156 120 : Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3157 : : CI->getArgOperand(0);
3158 120 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3159 1425 : } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3160 : Name.startswith("avx512.mask.vpermt2var.") ||
3161 : Name.startswith("avx512.maskz.vpermt2var."))) {
3162 403 : bool ZeroMask = Name[11] == 'z';
3163 : bool IndexForm = Name[17] == 'i';
3164 403 : unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3165 403 : unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3166 : bool IsFloat = CI->getType()->isFPOrFPVectorTy();
3167 : Intrinsic::ID IID;
3168 403 : if (VecWidth == 128 && EltWidth == 32 && IsFloat)
3169 : IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
3170 394 : else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
3171 : IID = Intrinsic::x86_avx512_vpermi2var_d_128;
3172 379 : else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
3173 : IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
3174 372 : else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
3175 : IID = Intrinsic::x86_avx512_vpermi2var_q_128;
3176 360 : else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
3177 : IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
3178 351 : else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
3179 : IID = Intrinsic::x86_avx512_vpermi2var_d_256;
3180 337 : else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
3181 : IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
3182 330 : else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
3183 : IID = Intrinsic::x86_avx512_vpermi2var_q_256;
3184 318 : else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
3185 : IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
3186 258 : else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
3187 : IID = Intrinsic::x86_avx512_vpermi2var_d_512;
3188 217 : else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
3189 : IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
3190 179 : else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
3191 : IID = Intrinsic::x86_avx512_vpermi2var_q_512;
3192 151 : else if (VecWidth == 128 && EltWidth == 16)
3193 : IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
3194 139 : else if (VecWidth == 256 && EltWidth == 16)
3195 : IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
3196 107 : else if (VecWidth == 512 && EltWidth == 16)
3197 : IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
3198 75 : else if (VecWidth == 128 && EltWidth == 8)
3199 : IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
3200 40 : else if (VecWidth == 256 && EltWidth == 8)
3201 : IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
3202 18 : else if (VecWidth == 512 && EltWidth == 8)
3203 : IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
3204 : else
3205 0 : llvm_unreachable("Unexpected intrinsic");
3206 :
3207 403 : Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3208 403 : CI->getArgOperand(2) };
3209 :
3210 : // If this isn't index form we need to swap operand 0 and 1.
3211 403 : if (!IndexForm)
3212 : std::swap(Args[0], Args[1]);
3213 :
3214 403 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3215 : Args);
3216 403 : Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3217 224 : : Builder.CreateBitCast(CI->getArgOperand(1),
3218 : CI->getType());
3219 403 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3220 1022 : } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3221 : Name.startswith("avx512.maskz.vpdpbusd.") ||
3222 : Name.startswith("avx512.mask.vpdpbusds.") ||
3223 : Name.startswith("avx512.maskz.vpdpbusds."))) {
3224 36 : bool ZeroMask = Name[11] == 'z';
3225 36 : bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3226 36 : unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3227 : Intrinsic::ID IID;
3228 36 : if (VecWidth == 128 && !IsSaturating)
3229 : IID = Intrinsic::x86_avx512_vpdpbusd_128;
3230 30 : else if (VecWidth == 256 && !IsSaturating)
3231 : IID = Intrinsic::x86_avx512_vpdpbusd_256;
3232 24 : else if (VecWidth == 512 && !IsSaturating)
3233 : IID = Intrinsic::x86_avx512_vpdpbusd_512;
3234 18 : else if (VecWidth == 128 && IsSaturating)
3235 : IID = Intrinsic::x86_avx512_vpdpbusds_128;
3236 12 : else if (VecWidth == 256 && IsSaturating)
3237 : IID = Intrinsic::x86_avx512_vpdpbusds_256;
3238 6 : else if (VecWidth == 512 && IsSaturating)
3239 : IID = Intrinsic::x86_avx512_vpdpbusds_512;
3240 : else
3241 0 : llvm_unreachable("Unexpected intrinsic");
3242 :
3243 36 : Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3244 36 : CI->getArgOperand(2) };
3245 36 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3246 : Args);
3247 36 : Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3248 : : CI->getArgOperand(0);
3249 36 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3250 986 : } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3251 : Name.startswith("avx512.maskz.vpdpwssd.") ||
3252 : Name.startswith("avx512.mask.vpdpwssds.") ||
3253 : Name.startswith("avx512.maskz.vpdpwssds."))) {
3254 36 : bool ZeroMask = Name[11] == 'z';
3255 36 : bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3256 36 : unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3257 : Intrinsic::ID IID;
3258 36 : if (VecWidth == 128 && !IsSaturating)
3259 : IID = Intrinsic::x86_avx512_vpdpwssd_128;
3260 30 : else if (VecWidth == 256 && !IsSaturating)
3261 : IID = Intrinsic::x86_avx512_vpdpwssd_256;
3262 24 : else if (VecWidth == 512 && !IsSaturating)
3263 : IID = Intrinsic::x86_avx512_vpdpwssd_512;
3264 18 : else if (VecWidth == 128 && IsSaturating)
3265 : IID = Intrinsic::x86_avx512_vpdpwssds_128;
3266 12 : else if (VecWidth == 256 && IsSaturating)
3267 : IID = Intrinsic::x86_avx512_vpdpwssds_256;
3268 6 : else if (VecWidth == 512 && IsSaturating)
3269 : IID = Intrinsic::x86_avx512_vpdpwssds_512;
3270 : else
3271 0 : llvm_unreachable("Unexpected intrinsic");
3272 :
3273 36 : Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3274 36 : CI->getArgOperand(2) };
3275 36 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3276 : Args);
3277 36 : Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3278 : : CI->getArgOperand(0);
3279 36 : Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3280 1874 : } else if (IsX86 && Name.startswith("avx512.mask.") &&
3281 924 : upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3282 : // Rep will be updated by the call in the condition.
3283 26 : } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3284 4 : Value *Arg = CI->getArgOperand(0);
3285 4 : Value *Neg = Builder.CreateNeg(Arg, "neg");
3286 4 : Value *Cmp = Builder.CreateICmpSGE(
3287 4 : Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3288 4 : Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3289 22 : } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3290 : Name == "max.ui" || Name == "max.ull")) {
3291 8 : Value *Arg0 = CI->getArgOperand(0);
3292 : Value *Arg1 = CI->getArgOperand(1);
3293 : Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3294 4 : ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3295 8 : : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3296 8 : Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3297 14 : } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3298 : Name == "min.ui" || Name == "min.ull")) {
3299 8 : Value *Arg0 = CI->getArgOperand(0);
3300 : Value *Arg1 = CI->getArgOperand(1);
3301 : Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3302 4 : ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3303 8 : : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3304 8 : Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3305 6 : } else if (IsNVVM && Name == "clz.ll") {
3306 : // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3307 2 : Value *Arg = CI->getArgOperand(0);
3308 4 : Value *Ctlz = Builder.CreateCall(
3309 : Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3310 2 : {Arg->getType()}),
3311 2 : {Arg, Builder.getFalse()}, "ctlz");
3312 4 : Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3313 4 : } else if (IsNVVM && Name == "popc.ll") {
3314 : // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3315 : // i64.
3316 2 : Value *Arg = CI->getArgOperand(0);
3317 4 : Value *Popc = Builder.CreateCall(
3318 : Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3319 2 : {Arg->getType()}),
3320 : Arg, "ctpop");
3321 4 : Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3322 2 : } else if (IsNVVM && Name == "h2f") {
3323 4 : Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3324 : F->getParent(), Intrinsic::convert_from_fp16,
3325 2 : {Builder.getFloatTy()}),
3326 4 : CI->getArgOperand(0), "h2f");
3327 : } else {
3328 0 : llvm_unreachable("Unknown function for CallInst upgrade.");
3329 : }
3330 :
3331 10320 : if (Rep)
3332 10314 : CI->replaceAllUsesWith(Rep);
3333 10320 : CI->eraseFromParent();
3334 10320 : return;
3335 : }
3336 :
3337 : const auto &DefaultCase = [&NewFn, &CI]() -> void {
3338 : // Handle generic mangling change, but nothing else
3339 : assert(
3340 : (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3341 : "Unknown function for CallInst upgrade and isn't just a name change");
3342 : CI->setCalledFunction(NewFn);
3343 : };
3344 : CallInst *NewCall = nullptr;
3345 1431 : switch (NewFn->getIntrinsicID()) {
3346 311 : default: {
3347 : DefaultCase();
3348 : return;
3349 : }
3350 :
3351 14 : case Intrinsic::arm_neon_vld1:
3352 : case Intrinsic::arm_neon_vld2:
3353 : case Intrinsic::arm_neon_vld3:
3354 : case Intrinsic::arm_neon_vld4:
3355 : case Intrinsic::arm_neon_vld2lane:
3356 : case Intrinsic::arm_neon_vld3lane:
3357 : case Intrinsic::arm_neon_vld4lane:
3358 : case Intrinsic::arm_neon_vst1:
3359 : case Intrinsic::arm_neon_vst2:
3360 : case Intrinsic::arm_neon_vst3:
3361 : case Intrinsic::arm_neon_vst4:
3362 : case Intrinsic::arm_neon_vst2lane:
3363 : case Intrinsic::arm_neon_vst3lane:
3364 : case Intrinsic::arm_neon_vst4lane: {
3365 28 : SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3366 14 : CI->arg_operands().end());
3367 14 : NewCall = Builder.CreateCall(NewFn, Args);
3368 : break;
3369 : }
3370 :
3371 : case Intrinsic::bitreverse:
3372 8 : NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3373 8 : break;
3374 :
3375 : case Intrinsic::ctlz:
3376 : case Intrinsic::cttz:
3377 : assert(CI->getNumArgOperands() == 1 &&
3378 : "Mismatch between function args and call args");
3379 : NewCall =
3380 68 : Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3381 68 : break;
3382 :
3383 75 : case Intrinsic::objectsize: {
3384 75 : Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3385 75 : ? Builder.getFalse()
3386 : : CI->getArgOperand(2);
3387 75 : NewCall = Builder.CreateCall(
3388 : NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
3389 75 : break;
3390 : }
3391 :
3392 : case Intrinsic::ctpop:
3393 6 : NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3394 6 : break;
3395 :
3396 : case Intrinsic::convert_from_fp16:
3397 0 : NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3398 0 : break;
3399 :
3400 481 : case Intrinsic::dbg_value:
3401 : // Upgrade from the old version that had an extra offset argument.
3402 : assert(CI->getNumArgOperands() == 4);
3403 : // Drop nonzero offsets instead of attempting to upgrade them.
3404 481 : if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3405 481 : if (Offset->isZeroValue()) {
3406 479 : NewCall = Builder.CreateCall(
3407 : NewFn,
3408 : {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3409 479 : break;
3410 : }
3411 2 : CI->eraseFromParent();
3412 2 : return;
3413 :
3414 : case Intrinsic::x86_xop_vfrcz_ss:
3415 : case Intrinsic::x86_xop_vfrcz_sd:
3416 0 : NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3417 0 : break;
3418 :
3419 8 : case Intrinsic::x86_xop_vpermil2pd:
3420 : case Intrinsic::x86_xop_vpermil2ps:
3421 : case Intrinsic::x86_xop_vpermil2pd_256:
3422 : case Intrinsic::x86_xop_vpermil2ps_256: {
3423 16 : SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3424 8 : CI->arg_operands().end());
3425 8 : VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3426 8 : VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3427 8 : Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3428 8 : NewCall = Builder.CreateCall(NewFn, Args);
3429 : break;
3430 : }
3431 :
3432 12 : case Intrinsic::x86_sse41_ptestc:
3433 : case Intrinsic::x86_sse41_ptestz:
3434 : case Intrinsic::x86_sse41_ptestnzc: {
3435 : // The arguments for these intrinsics used to be v4f32, and changed
3436 : // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3437 : // So, the only thing required is a bitcast for both arguments.
3438 : // First, check the arguments have the old type.
3439 12 : Value *Arg0 = CI->getArgOperand(0);
3440 12 : if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
3441 : return;
3442 :
3443 : // Old intrinsic, add bitcasts
3444 : Value *Arg1 = CI->getArgOperand(1);
3445 :
3446 12 : Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
3447 :
3448 12 : Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3449 12 : Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3450 :
3451 12 : NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3452 12 : break;
3453 : }
3454 :
3455 2 : case Intrinsic::x86_rdtscp: {
3456 : // This used to take 1 arguments. If we have no arguments, it is already
3457 : // upgraded.
3458 2 : if (CI->getNumOperands() == 0)
3459 : return;
3460 :
3461 2 : NewCall = Builder.CreateCall(NewFn);
3462 : // Extract the second result and store it.
3463 4 : Value *Data = Builder.CreateExtractValue(NewCall, 1);
3464 : // Cast the pointer to the right type.
3465 4 : Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3466 2 : llvm::PointerType::getUnqual(Data->getType()));
3467 : Builder.CreateAlignedStore(Data, Ptr, 1);
3468 : // Replace the original call result with the first result of the new call.
3469 4 : Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3470 :
3471 4 : std::string Name = CI->getName();
3472 2 : if (!Name.empty()) {
3473 0 : CI->setName(Name + ".old");
3474 0 : NewCall->setName(Name);
3475 : }
3476 2 : CI->replaceAllUsesWith(TSC);
3477 2 : CI->eraseFromParent();
3478 : return;
3479 : }
3480 :
3481 28 : case Intrinsic::x86_addcarryx_u32:
3482 : case Intrinsic::x86_addcarryx_u64:
3483 : case Intrinsic::x86_addcarry_u32:
3484 : case Intrinsic::x86_addcarry_u64:
3485 : case Intrinsic::x86_subborrow_u32:
3486 : case Intrinsic::x86_subborrow_u64: {
3487 : // This used to take 4 arguments. If we only have 3 arguments its already
3488 : // upgraded.
3489 28 : if (CI->getNumOperands() == 3)
3490 : return;
3491 :
3492 : // Make a call with 3 operands.
3493 28 : NewCall = Builder.CreateCall(NewFn, { CI->getArgOperand(0),
3494 : CI->getArgOperand(1),
3495 : CI->getArgOperand(2)});
3496 : // Extract the second result and store it.
3497 56 : Value *Data = Builder.CreateExtractValue(NewCall, 1);
3498 : // Cast the pointer to the right type.
3499 28 : Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3500 28 : llvm::PointerType::getUnqual(Data->getType()));
3501 : Builder.CreateAlignedStore(Data, Ptr, 1);
3502 : // Replace the original call result with the first result of the new call.
3503 56 : Value *CF = Builder.CreateExtractValue(NewCall, 0);
3504 :
3505 56 : std::string Name = CI->getName();
3506 28 : if (!Name.empty()) {
3507 12 : CI->setName(Name + ".old");
3508 24 : NewCall->setName(Name);
3509 : }
3510 28 : CI->replaceAllUsesWith(CF);
3511 28 : CI->eraseFromParent();
3512 : return;
3513 : }
3514 :
3515 111 : case Intrinsic::x86_sse41_insertps:
3516 : case Intrinsic::x86_sse41_dppd:
3517 : case Intrinsic::x86_sse41_dpps:
3518 : case Intrinsic::x86_sse41_mpsadbw:
3519 : case Intrinsic::x86_avx_dp_ps_256:
3520 : case Intrinsic::x86_avx2_mpsadbw: {
3521 : // Need to truncate the last argument from i32 to i8 -- this argument models
3522 : // an inherently 8-bit immediate operand to these x86 instructions.
3523 222 : SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3524 111 : CI->arg_operands().end());
3525 :
3526 : // Replace the last argument with a trunc.
3527 111 : Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3528 111 : NewCall = Builder.CreateCall(NewFn, Args);
3529 : break;
3530 : }
3531 :
3532 : case Intrinsic::thread_pointer: {
3533 2 : NewCall = Builder.CreateCall(NewFn, {});
3534 2 : break;
3535 : }
3536 :
3537 90 : case Intrinsic::invariant_start:
3538 : case Intrinsic::invariant_end:
3539 : case Intrinsic::masked_load:
3540 : case Intrinsic::masked_store:
3541 : case Intrinsic::masked_gather:
3542 : case Intrinsic::masked_scatter: {
3543 180 : SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3544 90 : CI->arg_operands().end());
3545 90 : NewCall = Builder.CreateCall(NewFn, Args);
3546 : break;
3547 : }
3548 :
3549 215 : case Intrinsic::memcpy:
3550 : case Intrinsic::memmove:
3551 : case Intrinsic::memset: {
3552 : // We have to make sure that the call signature is what we're expecting.
3553 : // We only want to change the old signatures by removing the alignment arg:
3554 : // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3555 : // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3556 : // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3557 : // -> @llvm.memset...(i8*, i8, i[32|64], i1)
3558 : // Note: i8*'s in the above can be any pointer type
3559 215 : if (CI->getNumArgOperands() != 5) {
3560 : DefaultCase();
3561 20 : return;
3562 : }
3563 : // Remove alignment argument (3), and add alignment attributes to the
3564 : // dest/src pointers.
3565 : Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3566 195 : CI->getArgOperand(2), CI->getArgOperand(4)};
3567 195 : NewCall = Builder.CreateCall(NewFn, Args);
3568 : auto *MemCI = cast<MemIntrinsic>(NewCall);
3569 : // All mem intrinsics support dest alignment.
3570 : const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3571 390 : MemCI->setDestAlignment(Align->getZExtValue());
3572 : // Memcpy/Memmove also support source alignment.
3573 : if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3574 260 : MTI->setSourceAlignment(Align->getZExtValue());
3575 195 : break;
3576 : }
3577 : }
3578 : assert(NewCall && "Should have either set this variable or returned through "
3579 : "the default case");
3580 2136 : std::string Name = CI->getName();
3581 1068 : if (!Name.empty()) {
3582 206 : CI->setName(Name + ".old");
3583 412 : NewCall->setName(Name);
3584 : }
3585 1068 : CI->replaceAllUsesWith(NewCall);
3586 1068 : CI->eraseFromParent();
3587 : }
3588 :
3589 324747 : void llvm::UpgradeCallsToIntrinsic(Function *F) {
3590 : assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3591 :
3592 : // Check if this function should be upgraded and get the replacement function
3593 : // if there is one.
3594 : Function *NewFn;
3595 324747 : if (UpgradeIntrinsicFunction(F, NewFn)) {
3596 : // Replace all users of the old function with the new function or new
3597 : // instructions. This is not a range loop because the call is deleted.
3598 16837 : for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
3599 : if (CallInst *CI = dyn_cast<CallInst>(*UI++))
3600 11612 : UpgradeIntrinsicCall(CI, NewFn);
3601 :
3602 : // Remove old function, no longer used, from the module.
3603 5225 : F->eraseFromParent();
3604 : }
3605 324747 : }
3606 :
3607 4964 : MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
3608 : // Check if the tag uses struct-path aware TBAA format.
3609 4766 : if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3610 : return &MD;
3611 :
3612 : auto &Context = MD.getContext();
3613 198 : if (MD.getNumOperands() == 3) {
3614 64 : Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3615 : MDNode *ScalarType = MDNode::get(Context, Elts);
3616 : // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3617 : Metadata *Elts2[] = {ScalarType, ScalarType,
3618 32 : ConstantAsMetadata::get(
3619 32 : Constant::getNullValue(Type::getInt64Ty(Context))),
3620 96 : MD.getOperand(2)};
3621 : return MDNode::get(Context, Elts2);
3622 : }
3623 : // Create a MDNode <MD, MD, offset 0>
3624 166 : Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
3625 332 : Type::getInt64Ty(Context)))};
3626 : return MDNode::get(Context, Elts);
3627 : }
3628 :
3629 15706 : Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3630 : Instruction *&Temp) {
3631 15706 : if (Opc != Instruction::BitCast)
3632 : return nullptr;
3633 :
3634 12172 : Temp = nullptr;
3635 12172 : Type *SrcTy = V->getType();
3636 23359 : if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3637 : SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3638 0 : LLVMContext &Context = V->getContext();
3639 :
3640 : // We have no information about target data layout, so we assume that
3641 : // the maximum pointer size is 64bit.
3642 0 : Type *MidTy = Type::getInt64Ty(Context);
3643 0 : Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3644 :
3645 0 : return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3646 : }
3647 :
3648 : return nullptr;
3649 : }
3650 :
3651 4370 : Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3652 4370 : if (Opc != Instruction::BitCast)
3653 : return nullptr;
3654 :
3655 3988 : Type *SrcTy = C->getType();
3656 7935 : if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3657 : SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3658 0 : LLVMContext &Context = C->getContext();
3659 :
3660 : // We have no information about target data layout, so we assume that
3661 : // the maximum pointer size is 64bit.
3662 0 : Type *MidTy = Type::getInt64Ty(Context);
3663 :
3664 0 : return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
3665 0 : DestTy);
3666 : }
3667 :
3668 : return nullptr;
3669 : }
3670 :
3671 : /// Check the debug info version number, if it is out-dated, drop the debug
3672 : /// info. Return true if module is modified.
3673 38255 : bool llvm::UpgradeDebugInfo(Module &M) {
3674 38255 : unsigned Version = getDebugMetadataVersionFromModule(M);
3675 38255 : if (Version == DEBUG_METADATA_VERSION) {
3676 2063 : bool BrokenDebugInfo = false;
3677 2063 : if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
3678 0 : report_fatal_error("Broken module found, compilation aborted!");
3679 2063 : if (!BrokenDebugInfo)
3680 : // Everything is ok.
3681 2013 : return false;
3682 : else {
3683 : // Diagnose malformed debug info.
3684 : DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
3685 50 : M.getContext().diagnose(Diag);
3686 : }
3687 : }
3688 36242 : bool Modified = StripDebugInfo(M);
3689 36242 : if (Modified && Version != DEBUG_METADATA_VERSION) {
3690 : // Diagnose a version mismatch.
3691 : DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
3692 50 : M.getContext().diagnose(DiagVersion);
3693 : }
3694 : return Modified;
3695 : }
3696 :
3697 2632 : bool llvm::UpgradeRetainReleaseMarker(Module &M) {
3698 : bool Changed = false;
3699 : NamedMDNode *ModRetainReleaseMarker =
3700 2632 : M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker");
3701 2632 : if (ModRetainReleaseMarker) {
3702 1 : MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3703 1 : if (Op) {
3704 : MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3705 : if (ID) {
3706 : SmallVector<StringRef, 4> ValueComp;
3707 1 : ID->getString().split(ValueComp, "#");
3708 1 : if (ValueComp.size() == 2) {
3709 5 : std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3710 1 : Metadata *Ops[1] = {MDString::get(M.getContext(), NewValue)};
3711 1 : ModRetainReleaseMarker->setOperand(0,
3712 1 : MDNode::get(M.getContext(), Ops));
3713 : Changed = true;
3714 : }
3715 : }
3716 : }
3717 : }
3718 2632 : return Changed;
3719 : }
3720 :
3721 36995 : bool llvm::UpgradeModuleFlags(Module &M) {
3722 36995 : NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
3723 36995 : if (!ModFlags)
3724 : return false;
3725 :
3726 : bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
3727 8095 : for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
3728 5315 : MDNode *Op = ModFlags->getOperand(I);
3729 5315 : if (Op->getNumOperands() != 3)
3730 : continue;
3731 : MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
3732 : if (!ID)
3733 : continue;
3734 5311 : if (ID->getString() == "Objective-C Image Info Version")
3735 : HasObjCFlag = true;
3736 5311 : if (ID->getString() == "Objective-C Class Properties")
3737 : HasClassProperties = true;
3738 : // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
3739 : // field was Error and now they are Max.
3740 10226 : if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
3741 : if (auto *Behavior =
3742 : mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
3743 453 : if (Behavior->getLimitedValue() == Module::Error) {
3744 281 : Type *Int32Ty = Type::getInt32Ty(M.getContext());
3745 : Metadata *Ops[3] = {
3746 281 : ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
3747 281 : MDString::get(M.getContext(), ID->getString()),
3748 562 : Op->getOperand(2)};
3749 281 : ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3750 : Changed = true;
3751 : }
3752 : }
3753 : }
3754 : // Upgrade Objective-C Image Info Section. Removed the whitespce in the
3755 : // section name so that llvm-lto will not complain about mismatching
3756 : // module flags that is functionally the same.
3757 5311 : if (ID->getString() == "Objective-C Image Info Section") {
3758 : if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
3759 : SmallVector<StringRef, 4> ValueComp;
3760 32 : Value->getString().split(ValueComp, " ");
3761 64 : if (ValueComp.size() != 1) {
3762 : std::string NewValue;
3763 93 : for (auto &S : ValueComp)
3764 148 : NewValue += S.str();
3765 : Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
3766 57 : MDString::get(M.getContext(), NewValue)};
3767 19 : ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3768 : Changed = true;
3769 : }
3770 : }
3771 : }
3772 : }
3773 :
3774 : // "Objective-C Class Properties" is recently added for Objective-C. We
3775 : // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
3776 : // flag of value 0, so we can correclty downgrade this flag when trying to
3777 : // link an ObjC bitcode without this module flag with an ObjC bitcode with
3778 : // this module flag.
3779 2780 : if (HasObjCFlag && !HasClassProperties) {
3780 18 : M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
3781 : (uint32_t)0);
3782 : Changed = true;
3783 : }
3784 :
3785 : return Changed;
3786 : }
3787 :
3788 34363 : void llvm::UpgradeSectionAttributes(Module &M) {
3789 : auto TrimSpaces = [](StringRef Section) -> std::string {
3790 : SmallVector<StringRef, 5> Components;
3791 : Section.split(Components, ',');
3792 :
3793 : SmallString<32> Buffer;
3794 : raw_svector_ostream OS(Buffer);
3795 :
3796 : for (auto Component : Components)
3797 : OS << ',' << Component.trim();
3798 :
3799 : return OS.str().substr(1);
3800 : };
3801 :
3802 62406 : for (auto &GV : M.globals()) {
3803 28043 : if (!GV.hasSection())
3804 : continue;
3805 :
3806 563 : StringRef Section = GV.getSection();
3807 :
3808 : if (!Section.startswith("__DATA, __objc_catlist"))
3809 : continue;
3810 :
3811 : // __DATA, __objc_catlist, regular, no_dead_strip
3812 : // __DATA,__objc_catlist,regular,no_dead_strip
3813 4 : GV.setSection(TrimSpaces(Section));
3814 : }
3815 34363 : }
3816 :
3817 4 : static bool isOldLoopArgument(Metadata *MD) {
3818 : auto *T = dyn_cast_or_null<MDTuple>(MD);
3819 : if (!T)
3820 : return false;
3821 4 : if (T->getNumOperands() < 1)
3822 : return false;
3823 4 : auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
3824 : if (!S)
3825 : return false;
3826 4 : return S->getString().startswith("llvm.vectorizer.");
3827 : }
3828 :
3829 8 : static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
3830 : StringRef OldPrefix = "llvm.vectorizer.";
3831 : assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
3832 :
3833 : if (OldTag == "llvm.vectorizer.unroll")
3834 2 : return MDString::get(C, "llvm.loop.interleave.count");
3835 :
3836 6 : return MDString::get(
3837 6 : C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
3838 12 : .str());
3839 : }
3840 :
3841 10 : static Metadata *upgradeLoopArgument(Metadata *MD) {
3842 : auto *T = dyn_cast_or_null<MDTuple>(MD);
3843 : if (!T)
3844 : return MD;
3845 10 : if (T->getNumOperands() < 1)
3846 : return MD;
3847 10 : auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
3848 : if (!OldTag)
3849 : return MD;
3850 16 : if (!OldTag->getString().startswith("llvm.vectorizer."))
3851 0 : return MD;
3852 :
3853 : // This has an old tag. Upgrade it.
3854 : SmallVector<Metadata *, 8> Ops;
3855 8 : Ops.reserve(T->getNumOperands());
3856 16 : Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
3857 16 : for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
3858 8 : Ops.push_back(T->getOperand(I));
3859 :
3860 8 : return MDTuple::get(T->getContext(), Ops);
3861 : }
3862 :
3863 2 : MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
3864 : auto *T = dyn_cast<MDTuple>(&N);
3865 : if (!T)
3866 : return &N;
3867 :
3868 2 : if (none_of(T->operands(), isOldLoopArgument))
3869 : return &N;
3870 :
3871 : SmallVector<Metadata *, 8> Ops;
3872 2 : Ops.reserve(T->getNumOperands());
3873 12 : for (Metadata *MD : T->operands())
3874 10 : Ops.push_back(upgradeLoopArgument(MD));
3875 :
3876 2 : return MDTuple::get(T->getContext(), Ops);
3877 : }
|