LLVM  13.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/ObjCARCUtil.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/DIBuilder.h"
20 #include "llvm/IR/DebugInfo.h"
21 #include "llvm/IR/DiagnosticInfo.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/IR/InstVisitor.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/IntrinsicsAArch64.h"
29 #include "llvm/IR/IntrinsicsARM.h"
30 #include "llvm/IR/IntrinsicsX86.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/Verifier.h"
35 #include "llvm/Support/Regex.h"
36 #include <cstring>
37 using namespace llvm;
38 
39 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
40 
41 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
42 // changed their type from v4f32 to v2i64.
44  Function *&NewFn) {
45  // Check whether this is an old version of the function, which received
46  // v4f32 arguments.
47  Type *Arg0Type = F->getFunctionType()->getParamType(0);
48  if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
49  return false;
50 
51  // Yes, it's old, replace it with new version.
52  rename(F);
53  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
54  return true;
55 }
56 
57 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
58 // arguments have changed their type from i32 to i8.
60  Function *&NewFn) {
61  // Check that the last argument is an i32.
62  Type *LastArgType = F->getFunctionType()->getParamType(
63  F->getFunctionType()->getNumParams() - 1);
64  if (!LastArgType->isIntegerTy(32))
65  return false;
66 
67  // Move this function aside and map down.
68  rename(F);
69  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
70  return true;
71 }
72 
73 // Upgrade the declaration of fp compare intrinsics that change return type
74 // from scalar to vXi1 mask.
76  Function *&NewFn) {
77  // Check if the return type is a vector.
78  if (F->getReturnType()->isVectorTy())
79  return false;
80 
81  rename(F);
82  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
83  return true;
84 }
85 
87  // All of the intrinsics matches below should be marked with which llvm
88  // version started autoupgrading them. At some point in the future we would
89  // like to use this information to remove upgrade code for some older
90  // intrinsics. It is currently undecided how we will determine that future
91  // point.
92  if (Name == "addcarryx.u32" || // Added in 8.0
93  Name == "addcarryx.u64" || // Added in 8.0
94  Name == "addcarry.u32" || // Added in 8.0
95  Name == "addcarry.u64" || // Added in 8.0
96  Name == "subborrow.u32" || // Added in 8.0
97  Name == "subborrow.u64" || // Added in 8.0
98  Name.startswith("sse2.padds.") || // Added in 8.0
99  Name.startswith("sse2.psubs.") || // Added in 8.0
100  Name.startswith("sse2.paddus.") || // Added in 8.0
101  Name.startswith("sse2.psubus.") || // Added in 8.0
102  Name.startswith("avx2.padds.") || // Added in 8.0
103  Name.startswith("avx2.psubs.") || // Added in 8.0
104  Name.startswith("avx2.paddus.") || // Added in 8.0
105  Name.startswith("avx2.psubus.") || // Added in 8.0
106  Name.startswith("avx512.padds.") || // Added in 8.0
107  Name.startswith("avx512.psubs.") || // Added in 8.0
108  Name.startswith("avx512.mask.padds.") || // Added in 8.0
109  Name.startswith("avx512.mask.psubs.") || // Added in 8.0
110  Name.startswith("avx512.mask.paddus.") || // Added in 8.0
111  Name.startswith("avx512.mask.psubus.") || // Added in 8.0
112  Name=="ssse3.pabs.b.128" || // Added in 6.0
113  Name=="ssse3.pabs.w.128" || // Added in 6.0
114  Name=="ssse3.pabs.d.128" || // Added in 6.0
115  Name.startswith("fma4.vfmadd.s") || // Added in 7.0
116  Name.startswith("fma.vfmadd.") || // Added in 7.0
117  Name.startswith("fma.vfmsub.") || // Added in 7.0
118  Name.startswith("fma.vfmsubadd.") || // Added in 7.0
119  Name.startswith("fma.vfnmadd.") || // Added in 7.0
120  Name.startswith("fma.vfnmsub.") || // Added in 7.0
121  Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
122  Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
123  Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
124  Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
125  Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
126  Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
127  Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
128  Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
129  Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
130  Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
131  Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
132  Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
133  Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
134  Name.startswith("avx512.kunpck") || //added in 6.0
135  Name.startswith("avx2.pabs.") || // Added in 6.0
136  Name.startswith("avx512.mask.pabs.") || // Added in 6.0
137  Name.startswith("avx512.broadcastm") || // Added in 6.0
138  Name == "sse.sqrt.ss" || // Added in 7.0
139  Name == "sse2.sqrt.sd" || // Added in 7.0
140  Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
141  Name.startswith("avx.sqrt.p") || // Added in 7.0
142  Name.startswith("sse2.sqrt.p") || // Added in 7.0
143  Name.startswith("sse.sqrt.p") || // Added in 7.0
144  Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
145  Name.startswith("sse2.pcmpeq.") || // Added in 3.1
146  Name.startswith("sse2.pcmpgt.") || // Added in 3.1
147  Name.startswith("avx2.pcmpeq.") || // Added in 3.1
148  Name.startswith("avx2.pcmpgt.") || // Added in 3.1
149  Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
150  Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
151  Name.startswith("avx.vperm2f128.") || // Added in 6.0
152  Name == "avx2.vperm2i128" || // Added in 6.0
153  Name == "sse.add.ss" || // Added in 4.0
154  Name == "sse2.add.sd" || // Added in 4.0
155  Name == "sse.sub.ss" || // Added in 4.0
156  Name == "sse2.sub.sd" || // Added in 4.0
157  Name == "sse.mul.ss" || // Added in 4.0
158  Name == "sse2.mul.sd" || // Added in 4.0
159  Name == "sse.div.ss" || // Added in 4.0
160  Name == "sse2.div.sd" || // Added in 4.0
161  Name == "sse41.pmaxsb" || // Added in 3.9
162  Name == "sse2.pmaxs.w" || // Added in 3.9
163  Name == "sse41.pmaxsd" || // Added in 3.9
164  Name == "sse2.pmaxu.b" || // Added in 3.9
165  Name == "sse41.pmaxuw" || // Added in 3.9
166  Name == "sse41.pmaxud" || // Added in 3.9
167  Name == "sse41.pminsb" || // Added in 3.9
168  Name == "sse2.pmins.w" || // Added in 3.9
169  Name == "sse41.pminsd" || // Added in 3.9
170  Name == "sse2.pminu.b" || // Added in 3.9
171  Name == "sse41.pminuw" || // Added in 3.9
172  Name == "sse41.pminud" || // Added in 3.9
173  Name == "avx512.kand.w" || // Added in 7.0
174  Name == "avx512.kandn.w" || // Added in 7.0
175  Name == "avx512.knot.w" || // Added in 7.0
176  Name == "avx512.kor.w" || // Added in 7.0
177  Name == "avx512.kxor.w" || // Added in 7.0
178  Name == "avx512.kxnor.w" || // Added in 7.0
179  Name == "avx512.kortestc.w" || // Added in 7.0
180  Name == "avx512.kortestz.w" || // Added in 7.0
181  Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
182  Name.startswith("avx2.pmax") || // Added in 3.9
183  Name.startswith("avx2.pmin") || // Added in 3.9
184  Name.startswith("avx512.mask.pmax") || // Added in 4.0
185  Name.startswith("avx512.mask.pmin") || // Added in 4.0
186  Name.startswith("avx2.vbroadcast") || // Added in 3.8
187  Name.startswith("avx2.pbroadcast") || // Added in 3.8
188  Name.startswith("avx.vpermil.") || // Added in 3.1
189  Name.startswith("sse2.pshuf") || // Added in 3.9
190  Name.startswith("avx512.pbroadcast") || // Added in 3.9
191  Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
192  Name.startswith("avx512.mask.movddup") || // Added in 3.9
193  Name.startswith("avx512.mask.movshdup") || // Added in 3.9
194  Name.startswith("avx512.mask.movsldup") || // Added in 3.9
195  Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
196  Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
197  Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
198  Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
199  Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
200  Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
201  Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
202  Name.startswith("avx512.mask.punpckl") || // Added in 3.9
203  Name.startswith("avx512.mask.punpckh") || // Added in 3.9
204  Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
205  Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
206  Name.startswith("avx512.mask.pand.") || // Added in 3.9
207  Name.startswith("avx512.mask.pandn.") || // Added in 3.9
208  Name.startswith("avx512.mask.por.") || // Added in 3.9
209  Name.startswith("avx512.mask.pxor.") || // Added in 3.9
210  Name.startswith("avx512.mask.and.") || // Added in 3.9
211  Name.startswith("avx512.mask.andn.") || // Added in 3.9
212  Name.startswith("avx512.mask.or.") || // Added in 3.9
213  Name.startswith("avx512.mask.xor.") || // Added in 3.9
214  Name.startswith("avx512.mask.padd.") || // Added in 4.0
215  Name.startswith("avx512.mask.psub.") || // Added in 4.0
216  Name.startswith("avx512.mask.pmull.") || // Added in 4.0
217  Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
218  Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
219  Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
220  Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
221  Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
222  Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
223  Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
224  Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
225  Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
226  Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
227  Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
228  Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
229  Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
230  Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
231  Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
232  Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
233  Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
234  Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
235  Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
236  Name == "avx512.cvtusi2sd" || // Added in 7.0
237  Name.startswith("avx512.mask.permvar.") || // Added in 7.0
238  Name == "sse2.pmulu.dq" || // Added in 7.0
239  Name == "sse41.pmuldq" || // Added in 7.0
240  Name == "avx2.pmulu.dq" || // Added in 7.0
241  Name == "avx2.pmul.dq" || // Added in 7.0
242  Name == "avx512.pmulu.dq.512" || // Added in 7.0
243  Name == "avx512.pmul.dq.512" || // Added in 7.0
244  Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
245  Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
246  Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
247  Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
248  Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
249  Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
250  Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
251  Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
252  Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
253  Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
254  Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
255  Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
256  Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
257  Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
258  Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
259  Name.startswith("avx512.cmp.p") || // Added in 12.0
260  Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
261  Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
262  Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
263  Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
264  Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
265  Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
266  Name.startswith("avx512.mask.psll.d") || // Added in 4.0
267  Name.startswith("avx512.mask.psll.q") || // Added in 4.0
268  Name.startswith("avx512.mask.psll.w") || // Added in 4.0
269  Name.startswith("avx512.mask.psra.d") || // Added in 4.0
270  Name.startswith("avx512.mask.psra.q") || // Added in 4.0
271  Name.startswith("avx512.mask.psra.w") || // Added in 4.0
272  Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
273  Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
274  Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
275  Name.startswith("avx512.mask.pslli") || // Added in 4.0
276  Name.startswith("avx512.mask.psrai") || // Added in 4.0
277  Name.startswith("avx512.mask.psrli") || // Added in 4.0
278  Name.startswith("avx512.mask.psllv") || // Added in 4.0
279  Name.startswith("avx512.mask.psrav") || // Added in 4.0
280  Name.startswith("avx512.mask.psrlv") || // Added in 4.0
281  Name.startswith("sse41.pmovsx") || // Added in 3.8
282  Name.startswith("sse41.pmovzx") || // Added in 3.9
283  Name.startswith("avx2.pmovsx") || // Added in 3.9
284  Name.startswith("avx2.pmovzx") || // Added in 3.9
285  Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
286  Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
287  Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
288  Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
289  Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
290  Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
291  Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
292  Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
293  Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
294  Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
295  Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
296  Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
297  Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
298  Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
299  Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
300  Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
301  Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
302  Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
303  Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
304  Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
305  Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
306  Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
307  Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
308  Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
309  Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
310  Name.startswith("avx512.vpshld.") || // Added in 8.0
311  Name.startswith("avx512.vpshrd.") || // Added in 8.0
312  Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
313  Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
314  Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
315  Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
316  Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
317  Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
318  Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
319  Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
320  Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
321  Name.startswith("avx512.mask.conflict.") || // Added in 9.0
322  Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
323  Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
324  Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
325  Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
326  Name == "sse.cvtsi2ss" || // Added in 7.0
327  Name == "sse.cvtsi642ss" || // Added in 7.0
328  Name == "sse2.cvtsi2sd" || // Added in 7.0
329  Name == "sse2.cvtsi642sd" || // Added in 7.0
330  Name == "sse2.cvtss2sd" || // Added in 7.0
331  Name == "sse2.cvtdq2pd" || // Added in 3.9
332  Name == "sse2.cvtdq2ps" || // Added in 7.0
333  Name == "sse2.cvtps2pd" || // Added in 3.9
334  Name == "avx.cvtdq2.pd.256" || // Added in 3.9
335  Name == "avx.cvtdq2.ps.256" || // Added in 7.0
336  Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
337  Name.startswith("vcvtph2ps.") || // Added in 11.0
338  Name.startswith("avx.vinsertf128.") || // Added in 3.7
339  Name == "avx2.vinserti128" || // Added in 3.7
340  Name.startswith("avx512.mask.insert") || // Added in 4.0
341  Name.startswith("avx.vextractf128.") || // Added in 3.7
342  Name == "avx2.vextracti128" || // Added in 3.7
343  Name.startswith("avx512.mask.vextract") || // Added in 4.0
344  Name.startswith("sse4a.movnt.") || // Added in 3.9
345  Name.startswith("avx.movnt.") || // Added in 3.2
346  Name.startswith("avx512.storent.") || // Added in 3.9
347  Name == "sse41.movntdqa" || // Added in 5.0
348  Name == "avx2.movntdqa" || // Added in 5.0
349  Name == "avx512.movntdqa" || // Added in 5.0
350  Name == "sse2.storel.dq" || // Added in 3.9
351  Name.startswith("sse.storeu.") || // Added in 3.9
352  Name.startswith("sse2.storeu.") || // Added in 3.9
353  Name.startswith("avx.storeu.") || // Added in 3.9
354  Name.startswith("avx512.mask.storeu.") || // Added in 3.9
355  Name.startswith("avx512.mask.store.p") || // Added in 3.9
356  Name.startswith("avx512.mask.store.b.") || // Added in 3.9
357  Name.startswith("avx512.mask.store.w.") || // Added in 3.9
358  Name.startswith("avx512.mask.store.d.") || // Added in 3.9
359  Name.startswith("avx512.mask.store.q.") || // Added in 3.9
360  Name == "avx512.mask.store.ss" || // Added in 7.0
361  Name.startswith("avx512.mask.loadu.") || // Added in 3.9
362  Name.startswith("avx512.mask.load.") || // Added in 3.9
363  Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
364  Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
365  Name.startswith("avx512.mask.expand.b") || // Added in 9.0
366  Name.startswith("avx512.mask.expand.w") || // Added in 9.0
367  Name.startswith("avx512.mask.expand.d") || // Added in 9.0
368  Name.startswith("avx512.mask.expand.q") || // Added in 9.0
369  Name.startswith("avx512.mask.expand.p") || // Added in 9.0
370  Name.startswith("avx512.mask.compress.b") || // Added in 9.0
371  Name.startswith("avx512.mask.compress.w") || // Added in 9.0
372  Name.startswith("avx512.mask.compress.d") || // Added in 9.0
373  Name.startswith("avx512.mask.compress.q") || // Added in 9.0
374  Name.startswith("avx512.mask.compress.p") || // Added in 9.0
375  Name == "sse42.crc32.64.8" || // Added in 3.4
376  Name.startswith("avx.vbroadcast.s") || // Added in 3.5
377  Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
378  Name.startswith("avx512.mask.palignr.") || // Added in 3.9
379  Name.startswith("avx512.mask.valign.") || // Added in 4.0
380  Name.startswith("sse2.psll.dq") || // Added in 3.7
381  Name.startswith("sse2.psrl.dq") || // Added in 3.7
382  Name.startswith("avx2.psll.dq") || // Added in 3.7
383  Name.startswith("avx2.psrl.dq") || // Added in 3.7
384  Name.startswith("avx512.psll.dq") || // Added in 3.9
385  Name.startswith("avx512.psrl.dq") || // Added in 3.9
386  Name == "sse41.pblendw" || // Added in 3.7
387  Name.startswith("sse41.blendp") || // Added in 3.7
388  Name.startswith("avx.blend.p") || // Added in 3.7
389  Name == "avx2.pblendw" || // Added in 3.7
390  Name.startswith("avx2.pblendd.") || // Added in 3.7
391  Name.startswith("avx.vbroadcastf128") || // Added in 4.0
392  Name == "avx2.vbroadcasti128" || // Added in 3.7
393  Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
394  Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
395  Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
396  Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
397  Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
398  Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
399  Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
400  Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
401  Name == "xop.vpcmov" || // Added in 3.8
402  Name == "xop.vpcmov.256" || // Added in 5.0
403  Name.startswith("avx512.mask.move.s") || // Added in 4.0
404  Name.startswith("avx512.cvtmask2") || // Added in 5.0
405  Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
406  Name.startswith("xop.vprot") || // Added in 8.0
407  Name.startswith("avx512.prol") || // Added in 8.0
408  Name.startswith("avx512.pror") || // Added in 8.0
409  Name.startswith("avx512.mask.prorv.") || // Added in 8.0
410  Name.startswith("avx512.mask.pror.") || // Added in 8.0
411  Name.startswith("avx512.mask.prolv.") || // Added in 8.0
412  Name.startswith("avx512.mask.prol.") || // Added in 8.0
413  Name.startswith("avx512.ptestm") || //Added in 6.0
414  Name.startswith("avx512.ptestnm") || //Added in 6.0
415  Name.startswith("avx512.mask.pavg")) // Added in 6.0
416  return true;
417 
418  return false;
419 }
420 
422  Function *&NewFn) {
423  // Only handle intrinsics that start with "x86.".
424  if (!Name.startswith("x86."))
425  return false;
426  // Remove "x86." prefix.
427  Name = Name.substr(4);
428 
430  NewFn = nullptr;
431  return true;
432  }
433 
434  if (Name == "rdtscp") { // Added in 8.0
435  // If this intrinsic has 0 operands, it's the new version.
436  if (F->getFunctionType()->getNumParams() == 0)
437  return false;
438 
439  rename(F);
440  NewFn = Intrinsic::getDeclaration(F->getParent(),
441  Intrinsic::x86_rdtscp);
442  return true;
443  }
444 
445  // SSE4.1 ptest functions may have an old signature.
446  if (Name.startswith("sse41.ptest")) { // Added in 3.2
447  if (Name.substr(11) == "c")
448  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
449  if (Name.substr(11) == "z")
450  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
451  if (Name.substr(11) == "nzc")
452  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
453  }
454  // Several blend and other instructions with masks used the wrong number of
455  // bits.
456  if (Name == "sse41.insertps") // Added in 3.6
457  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
458  NewFn);
459  if (Name == "sse41.dppd") // Added in 3.6
460  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
461  NewFn);
462  if (Name == "sse41.dpps") // Added in 3.6
463  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
464  NewFn);
465  if (Name == "sse41.mpsadbw") // Added in 3.6
466  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
467  NewFn);
468  if (Name == "avx.dp.ps.256") // Added in 3.6
469  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
470  NewFn);
471  if (Name == "avx2.mpsadbw") // Added in 3.6
472  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
473  NewFn);
474  if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
475  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
476  NewFn);
477  if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
478  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
479  NewFn);
480  if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
481  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
482  NewFn);
483  if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
484  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
485  NewFn);
486  if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
487  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
488  NewFn);
489  if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
490  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
491  NewFn);
492 
493  // frcz.ss/sd may need to have an argument dropped. Added in 3.2
494  if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
495  rename(F);
496  NewFn = Intrinsic::getDeclaration(F->getParent(),
497  Intrinsic::x86_xop_vfrcz_ss);
498  return true;
499  }
500  if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
501  rename(F);
502  NewFn = Intrinsic::getDeclaration(F->getParent(),
503  Intrinsic::x86_xop_vfrcz_sd);
504  return true;
505  }
506  // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
507  if (Name.startswith("xop.vpermil2")) { // Added in 3.9
508  auto Idx = F->getFunctionType()->getParamType(2);
509  if (Idx->isFPOrFPVectorTy()) {
510  rename(F);
511  unsigned IdxSize = Idx->getPrimitiveSizeInBits();
512  unsigned EltSize = Idx->getScalarSizeInBits();
513  Intrinsic::ID Permil2ID;
514  if (EltSize == 64 && IdxSize == 128)
515  Permil2ID = Intrinsic::x86_xop_vpermil2pd;
516  else if (EltSize == 32 && IdxSize == 128)
517  Permil2ID = Intrinsic::x86_xop_vpermil2ps;
518  else if (EltSize == 64 && IdxSize == 256)
519  Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
520  else
521  Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
522  NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
523  return true;
524  }
525  }
526 
527  if (Name == "seh.recoverfp") {
528  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
529  return true;
530  }
531 
532  return false;
533 }
534 
536  assert(F && "Illegal to upgrade a non-existent Function.");
537 
538  // Quickly eliminate it, if it's not a candidate.
539  StringRef Name = F->getName();
540  if (Name.size() <= 8 || !Name.startswith("llvm."))
541  return false;
542  Name = Name.substr(5); // Strip off "llvm."
543 
544  switch (Name[0]) {
545  default: break;
546  case 'a': {
547  if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
548  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
549  F->arg_begin()->getType());
550  return true;
551  }
552  if (Name.startswith("arm.neon.vclz")) {
553  Type* args[2] = {
554  F->arg_begin()->getType(),
555  Type::getInt1Ty(F->getContext())
556  };
557  // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
558  // the end of the name. Change name from llvm.arm.neon.vclz.* to
559  // llvm.ctlz.*
560  FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
561  NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
562  "llvm.ctlz." + Name.substr(14), F->getParent());
563  return true;
564  }
565  if (Name.startswith("arm.neon.vcnt")) {
566  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
567  F->arg_begin()->getType());
568  return true;
569  }
570  static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
571  if (vldRegex.match(Name)) {
572  auto fArgs = F->getFunctionType()->params();
573  SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
574  // Can't use Intrinsic::getDeclaration here as the return types might
575  // then only be structurally equal.
576  FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
577  NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
578  "llvm." + Name + ".p0i8", F->getParent());
579  return true;
580  }
581  static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
582  if (vstRegex.match(Name)) {
583  static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
584  Intrinsic::arm_neon_vst2,
585  Intrinsic::arm_neon_vst3,
586  Intrinsic::arm_neon_vst4};
587 
588  static const Intrinsic::ID StoreLaneInts[] = {
589  Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
590  Intrinsic::arm_neon_vst4lane
591  };
592 
593  auto fArgs = F->getFunctionType()->params();
594  Type *Tys[] = {fArgs[0], fArgs[1]};
595  if (Name.find("lane") == StringRef::npos)
596  NewFn = Intrinsic::getDeclaration(F->getParent(),
597  StoreInts[fArgs.size() - 3], Tys);
598  else
599  NewFn = Intrinsic::getDeclaration(F->getParent(),
600  StoreLaneInts[fArgs.size() - 5], Tys);
601  return true;
602  }
603  if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
604  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
605  return true;
606  }
607  if (Name.startswith("arm.neon.vqadds.")) {
608  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
609  F->arg_begin()->getType());
610  return true;
611  }
612  if (Name.startswith("arm.neon.vqaddu.")) {
613  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
614  F->arg_begin()->getType());
615  return true;
616  }
617  if (Name.startswith("arm.neon.vqsubs.")) {
618  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
619  F->arg_begin()->getType());
620  return true;
621  }
622  if (Name.startswith("arm.neon.vqsubu.")) {
623  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
624  F->arg_begin()->getType());
625  return true;
626  }
627  if (Name.startswith("aarch64.neon.addp")) {
628  if (F->arg_size() != 2)
629  break; // Invalid IR.
630  VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
631  if (Ty && Ty->getElementType()->isFloatingPointTy()) {
632  NewFn = Intrinsic::getDeclaration(F->getParent(),
633  Intrinsic::aarch64_neon_faddp, Ty);
634  return true;
635  }
636  }
637 
638  // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
639  // respectively
640  if ((Name.startswith("arm.neon.bfdot.") ||
641  Name.startswith("aarch64.neon.bfdot.")) &&
642  Name.endswith("i8")) {
643  Intrinsic::ID IID =
645  .Cases("arm.neon.bfdot.v2f32.v8i8",
646  "arm.neon.bfdot.v4f32.v16i8",
647  Intrinsic::arm_neon_bfdot)
648  .Cases("aarch64.neon.bfdot.v2f32.v8i8",
649  "aarch64.neon.bfdot.v4f32.v16i8",
650  Intrinsic::aarch64_neon_bfdot)
652  if (IID == Intrinsic::not_intrinsic)
653  break;
654 
655  size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
656  assert((OperandWidth == 64 || OperandWidth == 128) &&
657  "Unexpected operand width");
658  LLVMContext &Ctx = F->getParent()->getContext();
659  std::array<Type *, 2> Tys {{
660  F->getReturnType(),
661  FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
662  }};
663  NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
664  return true;
665  }
666 
667  // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
668  // and accept v8bf16 instead of v16i8
669  if ((Name.startswith("arm.neon.bfm") ||
670  Name.startswith("aarch64.neon.bfm")) &&
671  Name.endswith(".v4f32.v16i8")) {
672  Intrinsic::ID IID =
674  .Case("arm.neon.bfmmla.v4f32.v16i8",
675  Intrinsic::arm_neon_bfmmla)
676  .Case("arm.neon.bfmlalb.v4f32.v16i8",
677  Intrinsic::arm_neon_bfmlalb)
678  .Case("arm.neon.bfmlalt.v4f32.v16i8",
679  Intrinsic::arm_neon_bfmlalt)
680  .Case("aarch64.neon.bfmmla.v4f32.v16i8",
681  Intrinsic::aarch64_neon_bfmmla)
682  .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
683  Intrinsic::aarch64_neon_bfmlalb)
684  .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
685  Intrinsic::aarch64_neon_bfmlalt)
687  if (IID == Intrinsic::not_intrinsic)
688  break;
689 
690  std::array<Type *, 0> Tys;
691  NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
692  return true;
693  }
694  break;
695  }
696 
697  case 'c': {
698  if (Name.startswith("ctlz.") && F->arg_size() == 1) {
699  rename(F);
700  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
701  F->arg_begin()->getType());
702  return true;
703  }
704  if (Name.startswith("cttz.") && F->arg_size() == 1) {
705  rename(F);
706  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
707  F->arg_begin()->getType());
708  return true;
709  }
710  break;
711  }
712  case 'd': {
713  if (Name == "dbg.value" && F->arg_size() == 4) {
714  rename(F);
715  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
716  return true;
717  }
718  break;
719  }
720  case 'e': {
722  static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
723  if (R.match(Name, &Groups)) {
726  .Case("add", Intrinsic::vector_reduce_add)
727  .Case("mul", Intrinsic::vector_reduce_mul)
728  .Case("and", Intrinsic::vector_reduce_and)
729  .Case("or", Intrinsic::vector_reduce_or)
730  .Case("xor", Intrinsic::vector_reduce_xor)
731  .Case("smax", Intrinsic::vector_reduce_smax)
732  .Case("smin", Intrinsic::vector_reduce_smin)
733  .Case("umax", Intrinsic::vector_reduce_umax)
734  .Case("umin", Intrinsic::vector_reduce_umin)
735  .Case("fmax", Intrinsic::vector_reduce_fmax)
736  .Case("fmin", Intrinsic::vector_reduce_fmin)
738  if (ID != Intrinsic::not_intrinsic) {
739  rename(F);
740  auto Args = F->getFunctionType()->params();
741  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
742  return true;
743  }
744  }
745  static const Regex R2(
746  "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
747  Groups.clear();
748  if (R2.match(Name, &Groups)) {
750  if (Groups[1] == "fadd")
751  ID = Intrinsic::vector_reduce_fadd;
752  if (Groups[1] == "fmul")
753  ID = Intrinsic::vector_reduce_fmul;
754  if (ID != Intrinsic::not_intrinsic) {
755  rename(F);
756  auto Args = F->getFunctionType()->params();
757  Type *Tys[] = {Args[1]};
758  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
759  return true;
760  }
761  }
762  break;
763  }
764  case 'i':
765  case 'l': {
766  bool IsLifetimeStart = Name.startswith("lifetime.start");
767  if (IsLifetimeStart || Name.startswith("invariant.start")) {
768  Intrinsic::ID ID = IsLifetimeStart ?
769  Intrinsic::lifetime_start : Intrinsic::invariant_start;
770  auto Args = F->getFunctionType()->params();
771  Type* ObjectPtr[1] = {Args[1]};
772  if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
773  rename(F);
774  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
775  return true;
776  }
777  }
778 
779  bool IsLifetimeEnd = Name.startswith("lifetime.end");
780  if (IsLifetimeEnd || Name.startswith("invariant.end")) {
781  Intrinsic::ID ID = IsLifetimeEnd ?
782  Intrinsic::lifetime_end : Intrinsic::invariant_end;
783 
784  auto Args = F->getFunctionType()->params();
785  Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
786  if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
787  rename(F);
788  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
789  return true;
790  }
791  }
792  if (Name.startswith("invariant.group.barrier")) {
793  // Rename invariant.group.barrier to launder.invariant.group
794  auto Args = F->getFunctionType()->params();
795  Type* ObjectPtr[1] = {Args[0]};
796  rename(F);
797  NewFn = Intrinsic::getDeclaration(F->getParent(),
798  Intrinsic::launder_invariant_group, ObjectPtr);
799  return true;
800 
801  }
802 
803  break;
804  }
805  case 'm': {
806  if (Name.startswith("masked.load.")) {
807  Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
808  if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
809  rename(F);
810  NewFn = Intrinsic::getDeclaration(F->getParent(),
811  Intrinsic::masked_load,
812  Tys);
813  return true;
814  }
815  }
816  if (Name.startswith("masked.store.")) {
817  auto Args = F->getFunctionType()->params();
818  Type *Tys[] = { Args[0], Args[1] };
819  if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
820  rename(F);
821  NewFn = Intrinsic::getDeclaration(F->getParent(),
822  Intrinsic::masked_store,
823  Tys);
824  return true;
825  }
826  }
827  // Renaming gather/scatter intrinsics with no address space overloading
828  // to the new overload which includes an address space
829  if (Name.startswith("masked.gather.")) {
830  Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
831  if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
832  rename(F);
833  NewFn = Intrinsic::getDeclaration(F->getParent(),
834  Intrinsic::masked_gather, Tys);
835  return true;
836  }
837  }
838  if (Name.startswith("masked.scatter.")) {
839  auto Args = F->getFunctionType()->params();
840  Type *Tys[] = {Args[0], Args[1]};
841  if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
842  rename(F);
843  NewFn = Intrinsic::getDeclaration(F->getParent(),
844  Intrinsic::masked_scatter, Tys);
845  return true;
846  }
847  }
848  // Updating the memory intrinsics (memcpy/memmove/memset) that have an
849  // alignment parameter to embedding the alignment as an attribute of
850  // the pointer args.
851  if (Name.startswith("memcpy.") && F->arg_size() == 5) {
852  rename(F);
853  // Get the types of dest, src, and len
854  ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
855  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
856  ParamTypes);
857  return true;
858  }
859  if (Name.startswith("memmove.") && F->arg_size() == 5) {
860  rename(F);
861  // Get the types of dest, src, and len
862  ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
863  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
864  ParamTypes);
865  return true;
866  }
867  if (Name.startswith("memset.") && F->arg_size() == 5) {
868  rename(F);
869  // Get the types of dest, and len
870  const auto *FT = F->getFunctionType();
871  Type *ParamTypes[2] = {
872  FT->getParamType(0), // Dest
873  FT->getParamType(2) // len
874  };
875  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
876  ParamTypes);
877  return true;
878  }
879  break;
880  }
881  case 'n': {
882  if (Name.startswith("nvvm.")) {
883  Name = Name.substr(5);
884 
885  // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
887  .Cases("brev32", "brev64", Intrinsic::bitreverse)
888  .Case("clz.i", Intrinsic::ctlz)
889  .Case("popc.i", Intrinsic::ctpop)
891  if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
892  NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
893  {F->getReturnType()});
894  return true;
895  }
896 
897  // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
898  // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
899  //
900  // TODO: We could add lohi.i2d.
901  bool Expand = StringSwitch<bool>(Name)
902  .Cases("abs.i", "abs.ll", true)
903  .Cases("clz.ll", "popc.ll", "h2f", true)
904  .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
905  .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
906  .StartsWith("atomic.load.add.f32.p", true)
907  .StartsWith("atomic.load.add.f64.p", true)
908  .Default(false);
909  if (Expand) {
910  NewFn = nullptr;
911  return true;
912  }
913  }
914  break;
915  }
916  case 'o':
917  // We only need to change the name to match the mangling including the
918  // address space.
919  if (Name.startswith("objectsize.")) {
920  Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
921  if (F->arg_size() == 2 || F->arg_size() == 3 ||
922  F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
923  rename(F);
924  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
925  Tys);
926  return true;
927  }
928  }
929  break;
930 
931  case 'p':
932  if (Name == "prefetch") {
933  // Handle address space overloading.
934  Type *Tys[] = {F->arg_begin()->getType()};
935  if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) {
936  rename(F);
937  NewFn =
938  Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
939  return true;
940  }
941  } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
942  rename(F);
943  NewFn = Intrinsic::getDeclaration(F->getParent(),
944  Intrinsic::ptr_annotation,
945  F->arg_begin()->getType());
946  return true;
947  }
948  break;
949 
950  case 's':
951  if (Name == "stackprotectorcheck") {
952  NewFn = nullptr;
953  return true;
954  }
955  break;
956 
957  case 'v': {
958  if (Name == "var.annotation" && F->arg_size() == 4) {
959  rename(F);
960  NewFn = Intrinsic::getDeclaration(F->getParent(),
961  Intrinsic::var_annotation);
962  return true;
963  }
964  break;
965  }
966 
967  case 'x':
968  if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
969  return true;
970  }
971  // Remangle our intrinsic since we upgrade the mangling
973  if (Result != None) {
974  NewFn = Result.getValue();
975  return true;
976  }
977 
978  // This may not belong here. This function is effectively being overloaded
979  // to both detect an intrinsic which needs upgrading, and to provide the
980  // upgraded form of the intrinsic. We should perhaps have two separate
981  // functions for this.
982  return false;
983 }
984 
986  NewFn = nullptr;
987  bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
988  assert(F != NewFn && "Intrinsic function upgraded to the same function");
989 
990  // Upgrade intrinsic attributes. This does not change the function.
991  if (NewFn)
992  F = NewFn;
993  if (Intrinsic::ID id = F->getIntrinsicID())
994  F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
995  return Upgraded;
996 }
997 
999  if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1000  GV->getName() == "llvm.global_dtors")) ||
1001  !GV->hasInitializer())
1002  return nullptr;
1003  ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1004  if (!ATy)
1005  return nullptr;
1006  StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1007  if (!STy || STy->getNumElements() != 2)
1008  return nullptr;
1009 
1010  LLVMContext &C = GV->getContext();
1011  IRBuilder<> IRB(C);
1012  auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1013  IRB.getInt8PtrTy());
1014  Constant *Init = GV->getInitializer();
1015  unsigned N = Init->getNumOperands();
1016  std::vector<Constant *> NewCtors(N);
1017  for (unsigned i = 0; i != N; ++i) {
1018  auto Ctor = cast<Constant>(Init->getOperand(i));
1019  NewCtors[i] = ConstantStruct::get(
1020  EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
1022  }
1023  Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1024 
1025  return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1026  NewInit, GV->getName());
1027 }
1028 
1029 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1030 // to byte shuffles.
1032  Value *Op, unsigned Shift) {
1033  auto *ResultTy = cast<FixedVectorType>(Op->getType());
1034  unsigned NumElts = ResultTy->getNumElements() * 8;
1035 
1036  // Bitcast from a 64-bit element type to a byte element type.
1037  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1038  Op = Builder.CreateBitCast(Op, VecTy, "cast");
1039 
1040  // We'll be shuffling in zeroes.
1041  Value *Res = Constant::getNullValue(VecTy);
1042 
1043  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1044  // we'll just return the zero vector.
1045  if (Shift < 16) {
1046  int Idxs[64];
1047  // 256/512-bit version is split into 2/4 16-byte lanes.
1048  for (unsigned l = 0; l != NumElts; l += 16)
1049  for (unsigned i = 0; i != 16; ++i) {
1050  unsigned Idx = NumElts + i - Shift;
1051  if (Idx < NumElts)
1052  Idx -= NumElts - 16; // end of lane, switch operand.
1053  Idxs[l + i] = Idx + l;
1054  }
1055 
1056  Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
1057  }
1058 
1059  // Bitcast back to a 64-bit element type.
1060  return Builder.CreateBitCast(Res, ResultTy, "cast");
1061 }
1062 
1063 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1064 // to byte shuffles.
1066  unsigned Shift) {
1067  auto *ResultTy = cast<FixedVectorType>(Op->getType());
1068  unsigned NumElts = ResultTy->getNumElements() * 8;
1069 
1070  // Bitcast from a 64-bit element type to a byte element type.
1071  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1072  Op = Builder.CreateBitCast(Op, VecTy, "cast");
1073 
1074  // We'll be shuffling in zeroes.
1075  Value *Res = Constant::getNullValue(VecTy);
1076 
1077  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1078  // we'll just return the zero vector.
1079  if (Shift < 16) {
1080  int Idxs[64];
1081  // 256/512-bit version is split into 2/4 16-byte lanes.
1082  for (unsigned l = 0; l != NumElts; l += 16)
1083  for (unsigned i = 0; i != 16; ++i) {
1084  unsigned Idx = i + Shift;
1085  if (Idx >= 16)
1086  Idx += NumElts - 16; // end of lane, switch operand.
1087  Idxs[l + i] = Idx + l;
1088  }
1089 
1090  Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
1091  }
1092 
1093  // Bitcast back to a 64-bit element type.
1094  return Builder.CreateBitCast(Res, ResultTy, "cast");
1095 }
1096 
1098  unsigned NumElts) {
1099  assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1101  Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1102  Mask = Builder.CreateBitCast(Mask, MaskTy);
1103 
1104  // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1105  // i8 and we need to extract down to the right number of elements.
1106  if (NumElts <= 4) {
1107  int Indices[4];
1108  for (unsigned i = 0; i != NumElts; ++i)
1109  Indices[i] = i;
1110  Mask = Builder.CreateShuffleVector(
1111  Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
1112  }
1113 
1114  return Mask;
1115 }
1116 
1118  Value *Op0, Value *Op1) {
1119  // If the mask is all ones just emit the first operation.
1120  if (const auto *C = dyn_cast<Constant>(Mask))
1121  if (C->isAllOnesValue())
1122  return Op0;
1123 
1125  cast<FixedVectorType>(Op0->getType())->getNumElements());
1126  return Builder.CreateSelect(Mask, Op0, Op1);
1127 }
1128 
1130  Value *Op0, Value *Op1) {
1131  // If the mask is all ones just emit the first operation.
1132  if (const auto *C = dyn_cast<Constant>(Mask))
1133  if (C->isAllOnesValue())
1134  return Op0;
1135 
1136  auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1137  Mask->getType()->getIntegerBitWidth());
1138  Mask = Builder.CreateBitCast(Mask, MaskTy);
1139  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1140  return Builder.CreateSelect(Mask, Op0, Op1);
1141 }
1142 
1143 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1144 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1145 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1147  Value *Op1, Value *Shift,
1148  Value *Passthru, Value *Mask,
1149  bool IsVALIGN) {
1150  unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1151 
1152  unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1153  assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1154  assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1155  assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1156 
1157  // Mask the immediate for VALIGN.
1158  if (IsVALIGN)
1159  ShiftVal &= (NumElts - 1);
1160 
1161  // If palignr is shifting the pair of vectors more than the size of two
1162  // lanes, emit zero.
1163  if (ShiftVal >= 32)
1164  return llvm::Constant::getNullValue(Op0->getType());
1165 
1166  // If palignr is shifting the pair of input vectors more than one lane,
1167  // but less than two lanes, convert to shifting in zeroes.
1168  if (ShiftVal > 16) {
1169  ShiftVal -= 16;
1170  Op1 = Op0;
1171  Op0 = llvm::Constant::getNullValue(Op0->getType());
1172  }
1173 
1174  int Indices[64];
1175  // 256-bit palignr operates on 128-bit lanes so we need to handle that
1176  for (unsigned l = 0; l < NumElts; l += 16) {
1177  for (unsigned i = 0; i != 16; ++i) {
1178  unsigned Idx = ShiftVal + i;
1179  if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1180  Idx += NumElts - 16; // End of lane, switch operand.
1181  Indices[l + i] = Idx + l;
1182  }
1183  }
1184 
1185  Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1186  makeArrayRef(Indices, NumElts),
1187  "palignr");
1188 
1189  return EmitX86Select(Builder, Mask, Align, Passthru);
1190 }
1191 
1193  bool ZeroMask, bool IndexForm) {
1194  Type *Ty = CI.getType();
1195  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1196  unsigned EltWidth = Ty->getScalarSizeInBits();
1197  bool IsFloat = Ty->isFPOrFPVectorTy();
1198  Intrinsic::ID IID;
1199  if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1200  IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1201  else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1202  IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1203  else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1204  IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1205  else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1206  IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1207  else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1208  IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1209  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1210  IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1211  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1212  IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1213  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1214  IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1215  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1216  IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1217  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1218  IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1219  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1220  IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1221  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1222  IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1223  else if (VecWidth == 128 && EltWidth == 16)
1224  IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1225  else if (VecWidth == 256 && EltWidth == 16)
1226  IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1227  else if (VecWidth == 512 && EltWidth == 16)
1228  IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1229  else if (VecWidth == 128 && EltWidth == 8)
1230  IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1231  else if (VecWidth == 256 && EltWidth == 8)
1232  IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1233  else if (VecWidth == 512 && EltWidth == 8)
1234  IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1235  else
1236  llvm_unreachable("Unexpected intrinsic");
1237 
1238  Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1239  CI.getArgOperand(2) };
1240 
1241  // If this isn't index form we need to swap operand 0 and 1.
1242  if (!IndexForm)
1243  std::swap(Args[0], Args[1]);
1244 
1245  Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1246  Args);
1247  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1248  : Builder.CreateBitCast(CI.getArgOperand(1),
1249  Ty);
1250  return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1251 }
1252 
1254  Intrinsic::ID IID) {
1255  Type *Ty = CI.getType();
1256  Value *Op0 = CI.getOperand(0);
1257  Value *Op1 = CI.getOperand(1);
1258  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1259  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1260 
1261  if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1262  Value *VecSrc = CI.getOperand(2);
1263  Value *Mask = CI.getOperand(3);
1264  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1265  }
1266  return Res;
1267 }
1268 
1270  bool IsRotateRight) {
1271  Type *Ty = CI.getType();
1272  Value *Src = CI.getArgOperand(0);
1273  Value *Amt = CI.getArgOperand(1);
1274 
1275  // Amount may be scalar immediate, in which case create a splat vector.
1276  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1277  // we only care about the lowest log2 bits anyway.
1278  if (Amt->getType() != Ty) {
1279  unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1280  Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1281  Amt = Builder.CreateVectorSplat(NumElts, Amt);
1282  }
1283 
1284  Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1285  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1286  Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1287 
1288  if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1289  Value *VecSrc = CI.getOperand(2);
1290  Value *Mask = CI.getOperand(3);
1291  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1292  }
1293  return Res;
1294 }
1295 
1296 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1297  bool IsSigned) {
1298  Type *Ty = CI.getType();
1299  Value *LHS = CI.getArgOperand(0);
1300  Value *RHS = CI.getArgOperand(1);
1301 
1302  CmpInst::Predicate Pred;
1303  switch (Imm) {
1304  case 0x0:
1305  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1306  break;
1307  case 0x1:
1308  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1309  break;
1310  case 0x2:
1311  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1312  break;
1313  case 0x3:
1314  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1315  break;
1316  case 0x4:
1317  Pred = ICmpInst::ICMP_EQ;
1318  break;
1319  case 0x5:
1320  Pred = ICmpInst::ICMP_NE;
1321  break;
1322  case 0x6:
1323  return Constant::getNullValue(Ty); // FALSE
1324  case 0x7:
1325  return Constant::getAllOnesValue(Ty); // TRUE
1326  default:
1327  llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1328  }
1329 
1330  Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1331  Value *Ext = Builder.CreateSExt(Cmp, Ty);
1332  return Ext;
1333 }
1334 
1336  bool IsShiftRight, bool ZeroMask) {
1337  Type *Ty = CI.getType();
1338  Value *Op0 = CI.getArgOperand(0);
1339  Value *Op1 = CI.getArgOperand(1);
1340  Value *Amt = CI.getArgOperand(2);
1341 
1342  if (IsShiftRight)
1343  std::swap(Op0, Op1);
1344 
1345  // Amount may be scalar immediate, in which case create a splat vector.
1346  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1347  // we only care about the lowest log2 bits anyway.
1348  if (Amt->getType() != Ty) {
1349  unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1350  Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1351  Amt = Builder.CreateVectorSplat(NumElts, Amt);
1352  }
1353 
1354  Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1355  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1356  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1357 
1358  unsigned NumArgs = CI.getNumArgOperands();
1359  if (NumArgs >= 4) { // For masked intrinsics.
1360  Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1361  ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1362  CI.getArgOperand(0);
1363  Value *Mask = CI.getOperand(NumArgs - 1);
1364  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1365  }
1366  return Res;
1367 }
1368 
1370  Value *Ptr, Value *Data, Value *Mask,
1371  bool Aligned) {
1372  // Cast the pointer to the right type.
1373  Ptr = Builder.CreateBitCast(Ptr,
1374  llvm::PointerType::getUnqual(Data->getType()));
1375  const Align Alignment =
1376  Aligned
1377  ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
1378  : Align(1);
1379 
1380  // If the mask is all ones just emit a regular store.
1381  if (const auto *C = dyn_cast<Constant>(Mask))
1382  if (C->isAllOnesValue())
1383  return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1384 
1385  // Convert the mask from an integer type to a vector of i1.
1386  unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1387  Mask = getX86MaskVec(Builder, Mask, NumElts);
1388  return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1389 }
1390 
1392  Value *Ptr, Value *Passthru, Value *Mask,
1393  bool Aligned) {
1394  Type *ValTy = Passthru->getType();
1395  // Cast the pointer to the right type.
1396  Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1397  const Align Alignment =
1398  Aligned
1399  ? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
1400  8)
1401  : Align(1);
1402 
1403  // If the mask is all ones just emit a regular store.
1404  if (const auto *C = dyn_cast<Constant>(Mask))
1405  if (C->isAllOnesValue())
1406  return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1407 
1408  // Convert the mask from an integer type to a vector of i1.
1409  unsigned NumElts =
1410  cast<FixedVectorType>(Passthru->getType())->getNumElements();
1411  Mask = getX86MaskVec(Builder, Mask, NumElts);
1412  return Builder.CreateMaskedLoad(Ptr, Alignment, Mask, Passthru);
1413 }
1414 
1416  Type *Ty = CI.getType();
1417  Value *Op0 = CI.getArgOperand(0);
1419  Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1420  if (CI.getNumArgOperands() == 3)
1421  Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1422  return Res;
1423 }
1424 
1425 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1426  Type *Ty = CI.getType();
1427 
1428  // Arguments have a vXi32 type so cast to vXi64.
1429  Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1430  Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1431 
1432  if (IsSigned) {
1433  // Shift left then arithmetic shift right.
1434  Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1435  LHS = Builder.CreateShl(LHS, ShiftAmt);
1436  LHS = Builder.CreateAShr(LHS, ShiftAmt);
1437  RHS = Builder.CreateShl(RHS, ShiftAmt);
1438  RHS = Builder.CreateAShr(RHS, ShiftAmt);
1439  } else {
1440  // Clear the upper bits.
1441  Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1442  LHS = Builder.CreateAnd(LHS, Mask);
1443  RHS = Builder.CreateAnd(RHS, Mask);
1444  }
1445 
1446  Value *Res = Builder.CreateMul(LHS, RHS);
1447 
1448  if (CI.getNumArgOperands() == 4)
1449  Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1450 
1451  return Res;
1452 }
1453 
1454 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1456  Value *Mask) {
1457  unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1458  if (Mask) {
1459  const auto *C = dyn_cast<Constant>(Mask);
1460  if (!C || !C->isAllOnesValue())
1461  Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1462  }
1463 
1464  if (NumElts < 8) {
1465  int Indices[8];
1466  for (unsigned i = 0; i != NumElts; ++i)
1467  Indices[i] = i;
1468  for (unsigned i = NumElts; i != 8; ++i)
1469  Indices[i] = NumElts + i % NumElts;
1470  Vec = Builder.CreateShuffleVector(Vec,
1472  Indices);
1473  }
1474  return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1475 }
1476 
1478  unsigned CC, bool Signed) {
1479  Value *Op0 = CI.getArgOperand(0);
1480  unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1481 
1482  Value *Cmp;
1483  if (CC == 3) {
1484  Cmp = Constant::getNullValue(
1485  FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1486  } else if (CC == 7) {
1488  FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1489  } else {
1490  ICmpInst::Predicate Pred;
1491  switch (CC) {
1492  default: llvm_unreachable("Unknown condition code");
1493  case 0: Pred = ICmpInst::ICMP_EQ; break;
1494  case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1495  case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1496  case 4: Pred = ICmpInst::ICMP_NE; break;
1497  case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1498  case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1499  }
1500  Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1501  }
1502 
1503  Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1504 
1505  return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1506 }
1507 
1508 // Replace a masked intrinsic with an older unmasked intrinsic.
1510  Intrinsic::ID IID) {
1511  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1512  Value *Rep = Builder.CreateCall(Intrin,
1513  { CI.getArgOperand(0), CI.getArgOperand(1) });
1514  return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1515 }
1516 
1518  Value* A = CI.getArgOperand(0);
1519  Value* B = CI.getArgOperand(1);
1520  Value* Src = CI.getArgOperand(2);
1521  Value* Mask = CI.getArgOperand(3);
1522 
1523  Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1524  Value* Cmp = Builder.CreateIsNotNull(AndNode);
1525  Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1526  Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1527  Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1528  return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1529 }
1530 
1531 
1533  Value* Op = CI.getArgOperand(0);
1534  Type* ReturnOp = CI.getType();
1535  unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1536  Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1537  return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1538 }
1539 
1540 // Replace intrinsic with unmasked version and a select.
1542  CallInst &CI, Value *&Rep) {
1543  Name = Name.substr(12); // Remove avx512.mask.
1544 
1545  unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1546  unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1547  Intrinsic::ID IID;
1548  if (Name.startswith("max.p")) {
1549  if (VecWidth == 128 && EltWidth == 32)
1550  IID = Intrinsic::x86_sse_max_ps;
1551  else if (VecWidth == 128 && EltWidth == 64)
1552  IID = Intrinsic::x86_sse2_max_pd;
1553  else if (VecWidth == 256 && EltWidth == 32)
1554  IID = Intrinsic::x86_avx_max_ps_256;
1555  else if (VecWidth == 256 && EltWidth == 64)
1556  IID = Intrinsic::x86_avx_max_pd_256;
1557  else
1558  llvm_unreachable("Unexpected intrinsic");
1559  } else if (Name.startswith("min.p")) {
1560  if (VecWidth == 128 && EltWidth == 32)
1561  IID = Intrinsic::x86_sse_min_ps;
1562  else if (VecWidth == 128 && EltWidth == 64)
1563  IID = Intrinsic::x86_sse2_min_pd;
1564  else if (VecWidth == 256 && EltWidth == 32)
1565  IID = Intrinsic::x86_avx_min_ps_256;
1566  else if (VecWidth == 256 && EltWidth == 64)
1567  IID = Intrinsic::x86_avx_min_pd_256;
1568  else
1569  llvm_unreachable("Unexpected intrinsic");
1570  } else if (Name.startswith("pshuf.b.")) {
1571  if (VecWidth == 128)
1572  IID = Intrinsic::x86_ssse3_pshuf_b_128;
1573  else if (VecWidth == 256)
1574  IID = Intrinsic::x86_avx2_pshuf_b;
1575  else if (VecWidth == 512)
1576  IID = Intrinsic::x86_avx512_pshuf_b_512;
1577  else
1578  llvm_unreachable("Unexpected intrinsic");
1579  } else if (Name.startswith("pmul.hr.sw.")) {
1580  if (VecWidth == 128)
1581  IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1582  else if (VecWidth == 256)
1583  IID = Intrinsic::x86_avx2_pmul_hr_sw;
1584  else if (VecWidth == 512)
1585  IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1586  else
1587  llvm_unreachable("Unexpected intrinsic");
1588  } else if (Name.startswith("pmulh.w.")) {
1589  if (VecWidth == 128)
1590  IID = Intrinsic::x86_sse2_pmulh_w;
1591  else if (VecWidth == 256)
1592  IID = Intrinsic::x86_avx2_pmulh_w;
1593  else if (VecWidth == 512)
1594  IID = Intrinsic::x86_avx512_pmulh_w_512;
1595  else
1596  llvm_unreachable("Unexpected intrinsic");
1597  } else if (Name.startswith("pmulhu.w.")) {
1598  if (VecWidth == 128)
1599  IID = Intrinsic::x86_sse2_pmulhu_w;
1600  else if (VecWidth == 256)
1601  IID = Intrinsic::x86_avx2_pmulhu_w;
1602  else if (VecWidth == 512)
1603  IID = Intrinsic::x86_avx512_pmulhu_w_512;
1604  else
1605  llvm_unreachable("Unexpected intrinsic");
1606  } else if (Name.startswith("pmaddw.d.")) {
1607  if (VecWidth == 128)
1608  IID = Intrinsic::x86_sse2_pmadd_wd;
1609  else if (VecWidth == 256)
1610  IID = Intrinsic::x86_avx2_pmadd_wd;
1611  else if (VecWidth == 512)
1612  IID = Intrinsic::x86_avx512_pmaddw_d_512;
1613  else
1614  llvm_unreachable("Unexpected intrinsic");
1615  } else if (Name.startswith("pmaddubs.w.")) {
1616  if (VecWidth == 128)
1617  IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1618  else if (VecWidth == 256)
1619  IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1620  else if (VecWidth == 512)
1621  IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1622  else
1623  llvm_unreachable("Unexpected intrinsic");
1624  } else if (Name.startswith("packsswb.")) {
1625  if (VecWidth == 128)
1626  IID = Intrinsic::x86_sse2_packsswb_128;
1627  else if (VecWidth == 256)
1628  IID = Intrinsic::x86_avx2_packsswb;
1629  else if (VecWidth == 512)
1630  IID = Intrinsic::x86_avx512_packsswb_512;
1631  else
1632  llvm_unreachable("Unexpected intrinsic");
1633  } else if (Name.startswith("packssdw.")) {
1634  if (VecWidth == 128)
1635  IID = Intrinsic::x86_sse2_packssdw_128;
1636  else if (VecWidth == 256)
1637  IID = Intrinsic::x86_avx2_packssdw;
1638  else if (VecWidth == 512)
1639  IID = Intrinsic::x86_avx512_packssdw_512;
1640  else
1641  llvm_unreachable("Unexpected intrinsic");
1642  } else if (Name.startswith("packuswb.")) {
1643  if (VecWidth == 128)
1644  IID = Intrinsic::x86_sse2_packuswb_128;
1645  else if (VecWidth == 256)
1646  IID = Intrinsic::x86_avx2_packuswb;
1647  else if (VecWidth == 512)
1648  IID = Intrinsic::x86_avx512_packuswb_512;
1649  else
1650  llvm_unreachable("Unexpected intrinsic");
1651  } else if (Name.startswith("packusdw.")) {
1652  if (VecWidth == 128)
1653  IID = Intrinsic::x86_sse41_packusdw;
1654  else if (VecWidth == 256)
1655  IID = Intrinsic::x86_avx2_packusdw;
1656  else if (VecWidth == 512)
1657  IID = Intrinsic::x86_avx512_packusdw_512;
1658  else
1659  llvm_unreachable("Unexpected intrinsic");
1660  } else if (Name.startswith("vpermilvar.")) {
1661  if (VecWidth == 128 && EltWidth == 32)
1662  IID = Intrinsic::x86_avx_vpermilvar_ps;
1663  else if (VecWidth == 128 && EltWidth == 64)
1664  IID = Intrinsic::x86_avx_vpermilvar_pd;
1665  else if (VecWidth == 256 && EltWidth == 32)
1666  IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1667  else if (VecWidth == 256 && EltWidth == 64)
1668  IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1669  else if (VecWidth == 512 && EltWidth == 32)
1670  IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1671  else if (VecWidth == 512 && EltWidth == 64)
1672  IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1673  else
1674  llvm_unreachable("Unexpected intrinsic");
1675  } else if (Name == "cvtpd2dq.256") {
1676  IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1677  } else if (Name == "cvtpd2ps.256") {
1678  IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1679  } else if (Name == "cvttpd2dq.256") {
1680  IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1681  } else if (Name == "cvttps2dq.128") {
1682  IID = Intrinsic::x86_sse2_cvttps2dq;
1683  } else if (Name == "cvttps2dq.256") {
1684  IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1685  } else if (Name.startswith("permvar.")) {
1686  bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1687  if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1688  IID = Intrinsic::x86_avx2_permps;
1689  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1690  IID = Intrinsic::x86_avx2_permd;
1691  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1692  IID = Intrinsic::x86_avx512_permvar_df_256;
1693  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1694  IID = Intrinsic::x86_avx512_permvar_di_256;
1695  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1696  IID = Intrinsic::x86_avx512_permvar_sf_512;
1697  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1698  IID = Intrinsic::x86_avx512_permvar_si_512;
1699  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1700  IID = Intrinsic::x86_avx512_permvar_df_512;
1701  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1702  IID = Intrinsic::x86_avx512_permvar_di_512;
1703  else if (VecWidth == 128 && EltWidth == 16)
1704  IID = Intrinsic::x86_avx512_permvar_hi_128;
1705  else if (VecWidth == 256 && EltWidth == 16)
1706  IID = Intrinsic::x86_avx512_permvar_hi_256;
1707  else if (VecWidth == 512 && EltWidth == 16)
1708  IID = Intrinsic::x86_avx512_permvar_hi_512;
1709  else if (VecWidth == 128 && EltWidth == 8)
1710  IID = Intrinsic::x86_avx512_permvar_qi_128;
1711  else if (VecWidth == 256 && EltWidth == 8)
1712  IID = Intrinsic::x86_avx512_permvar_qi_256;
1713  else if (VecWidth == 512 && EltWidth == 8)
1714  IID = Intrinsic::x86_avx512_permvar_qi_512;
1715  else
1716  llvm_unreachable("Unexpected intrinsic");
1717  } else if (Name.startswith("dbpsadbw.")) {
1718  if (VecWidth == 128)
1719  IID = Intrinsic::x86_avx512_dbpsadbw_128;
1720  else if (VecWidth == 256)
1721  IID = Intrinsic::x86_avx512_dbpsadbw_256;
1722  else if (VecWidth == 512)
1723  IID = Intrinsic::x86_avx512_dbpsadbw_512;
1724  else
1725  llvm_unreachable("Unexpected intrinsic");
1726  } else if (Name.startswith("pmultishift.qb.")) {
1727  if (VecWidth == 128)
1728  IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1729  else if (VecWidth == 256)
1730  IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1731  else if (VecWidth == 512)
1732  IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1733  else
1734  llvm_unreachable("Unexpected intrinsic");
1735  } else if (Name.startswith("conflict.")) {
1736  if (Name[9] == 'd' && VecWidth == 128)
1737  IID = Intrinsic::x86_avx512_conflict_d_128;
1738  else if (Name[9] == 'd' && VecWidth == 256)
1739  IID = Intrinsic::x86_avx512_conflict_d_256;
1740  else if (Name[9] == 'd' && VecWidth == 512)
1741  IID = Intrinsic::x86_avx512_conflict_d_512;
1742  else if (Name[9] == 'q' && VecWidth == 128)
1743  IID = Intrinsic::x86_avx512_conflict_q_128;
1744  else if (Name[9] == 'q' && VecWidth == 256)
1745  IID = Intrinsic::x86_avx512_conflict_q_256;
1746  else if (Name[9] == 'q' && VecWidth == 512)
1747  IID = Intrinsic::x86_avx512_conflict_q_512;
1748  else
1749  llvm_unreachable("Unexpected intrinsic");
1750  } else if (Name.startswith("pavg.")) {
1751  if (Name[5] == 'b' && VecWidth == 128)
1752  IID = Intrinsic::x86_sse2_pavg_b;
1753  else if (Name[5] == 'b' && VecWidth == 256)
1754  IID = Intrinsic::x86_avx2_pavg_b;
1755  else if (Name[5] == 'b' && VecWidth == 512)
1756  IID = Intrinsic::x86_avx512_pavg_b_512;
1757  else if (Name[5] == 'w' && VecWidth == 128)
1758  IID = Intrinsic::x86_sse2_pavg_w;
1759  else if (Name[5] == 'w' && VecWidth == 256)
1760  IID = Intrinsic::x86_avx2_pavg_w;
1761  else if (Name[5] == 'w' && VecWidth == 512)
1762  IID = Intrinsic::x86_avx512_pavg_w_512;
1763  else
1764  llvm_unreachable("Unexpected intrinsic");
1765  } else
1766  return false;
1767 
1769  CI.arg_operands().end());
1770  Args.pop_back();
1771  Args.pop_back();
1772  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1773  Args);
1774  unsigned NumArgs = CI.getNumArgOperands();
1775  Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1776  CI.getArgOperand(NumArgs - 2));
1777  return true;
1778 }
1779 
1780 /// Upgrade comment in call to inline asm that represents an objc retain release
1781 /// marker.
1782 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1783  size_t Pos;
1784  if (AsmStr->find("mov\tfp") == 0 &&
1785  AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1786  (Pos = AsmStr->find("# marker")) != std::string::npos) {
1787  AsmStr->replace(Pos, 1, ";");
1788  }
1789 }
1790 
1791 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1792 /// provided to seamlessly integrate with existing context.
1794  Function *F = CI->getCalledFunction();
1795  LLVMContext &C = CI->getContext();
1797  Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1798 
1799  assert(F && "Intrinsic call is not direct?");
1800 
1801  if (!NewFn) {
1802  // Get the Function's name.
1803  StringRef Name = F->getName();
1804 
1805  assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1806  Name = Name.substr(5);
1807 
1808  bool IsX86 = Name.startswith("x86.");
1809  if (IsX86)
1810  Name = Name.substr(4);
1811  bool IsNVVM = Name.startswith("nvvm.");
1812  if (IsNVVM)
1813  Name = Name.substr(5);
1814 
1815  if (IsX86 && Name.startswith("sse4a.movnt.")) {
1816  Module *M = F->getParent();
1818  Elts.push_back(
1820  MDNode *Node = MDNode::get(C, Elts);
1821 
1822  Value *Arg0 = CI->getArgOperand(0);
1823  Value *Arg1 = CI->getArgOperand(1);
1824 
1825  // Nontemporal (unaligned) store of the 0'th element of the float/double
1826  // vector.
1827  Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1828  PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1829  Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1830  Value *Extract =
1831  Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1832 
1833  StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
1834  SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1835 
1836  // Remove intrinsic.
1837  CI->eraseFromParent();
1838  return;
1839  }
1840 
1841  if (IsX86 && (Name.startswith("avx.movnt.") ||
1842  Name.startswith("avx512.storent."))) {
1843  Module *M = F->getParent();
1845  Elts.push_back(
1847  MDNode *Node = MDNode::get(C, Elts);
1848 
1849  Value *Arg0 = CI->getArgOperand(0);
1850  Value *Arg1 = CI->getArgOperand(1);
1851 
1852  // Convert the type of the pointer to a pointer to the stored type.
1853  Value *BC = Builder.CreateBitCast(Arg0,
1855  "cast");
1856  StoreInst *SI = Builder.CreateAlignedStore(
1857  Arg1, BC,
1858  Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
1859  SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1860 
1861  // Remove intrinsic.
1862  CI->eraseFromParent();
1863  return;
1864  }
1865 
1866  if (IsX86 && Name == "sse2.storel.dq") {
1867  Value *Arg0 = CI->getArgOperand(0);
1868  Value *Arg1 = CI->getArgOperand(1);
1869 
1870  auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
1871  Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1872  Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1873  Value *BC = Builder.CreateBitCast(Arg0,
1875  "cast");
1876  Builder.CreateAlignedStore(Elt, BC, Align(1));
1877 
1878  // Remove intrinsic.
1879  CI->eraseFromParent();
1880  return;
1881  }
1882 
1883  if (IsX86 && (Name.startswith("sse.storeu.") ||
1884  Name.startswith("sse2.storeu.") ||
1885  Name.startswith("avx.storeu."))) {
1886  Value *Arg0 = CI->getArgOperand(0);
1887  Value *Arg1 = CI->getArgOperand(1);
1888 
1889  Arg0 = Builder.CreateBitCast(Arg0,
1891  "cast");
1892  Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
1893 
1894  // Remove intrinsic.
1895  CI->eraseFromParent();
1896  return;
1897  }
1898 
1899  if (IsX86 && Name == "avx512.mask.store.ss") {
1900  Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1902  Mask, false);
1903 
1904  // Remove intrinsic.
1905  CI->eraseFromParent();
1906  return;
1907  }
1908 
1909  if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1910  // "avx512.mask.storeu." or "avx512.mask.store."
1911  bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1913  CI->getArgOperand(2), Aligned);
1914 
1915  // Remove intrinsic.
1916  CI->eraseFromParent();
1917  return;
1918  }
1919 
1920  Value *Rep;
1921  // Upgrade packed integer vector compare intrinsics to compare instructions.
1922  if (IsX86 && (Name.startswith("sse2.pcmp") ||
1923  Name.startswith("avx2.pcmp"))) {
1924  // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1925  bool CmpEq = Name[9] == 'e';
1926  Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1927  CI->getArgOperand(0), CI->getArgOperand(1));
1928  Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1929  } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1930  Type *ExtTy = Type::getInt32Ty(C);
1931  if (CI->getOperand(0)->getType()->isIntegerTy(8))
1932  ExtTy = Type::getInt64Ty(C);
1933  unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1934  ExtTy->getPrimitiveSizeInBits();
1935  Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1936  Rep = Builder.CreateVectorSplat(NumElts, Rep);
1937  } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1938  Name == "sse2.sqrt.sd")) {
1939  Value *Vec = CI->getArgOperand(0);
1940  Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1941  Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1942  Intrinsic::sqrt, Elt0->getType());
1943  Elt0 = Builder.CreateCall(Intr, Elt0);
1944  Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1945  } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1946  Name.startswith("sse2.sqrt.p") ||
1947  Name.startswith("sse.sqrt.p"))) {
1948  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1949  Intrinsic::sqrt,
1950  CI->getType()),
1951  {CI->getArgOperand(0)});
1952  } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1953  if (CI->getNumArgOperands() == 4 &&
1954  (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1955  cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1956  Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1957  : Intrinsic::x86_avx512_sqrt_pd_512;
1958 
1959  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1960  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1961  IID), Args);
1962  } else {
1963  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1964  Intrinsic::sqrt,
1965  CI->getType()),
1966  {CI->getArgOperand(0)});
1967  }
1968  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1969  CI->getArgOperand(1));
1970  } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1971  Name.startswith("avx512.ptestnm"))) {
1972  Value *Op0 = CI->getArgOperand(0);
1973  Value *Op1 = CI->getArgOperand(1);
1974  Value *Mask = CI->getArgOperand(2);
1975  Rep = Builder.CreateAnd(Op0, Op1);
1976  llvm::Type *Ty = Op0->getType();
1977  Value *Zero = llvm::Constant::getNullValue(Ty);
1978  ICmpInst::Predicate Pred =
1979  Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1980  Rep = Builder.CreateICmp(Pred, Rep, Zero);
1981  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1982  } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1983  unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
1984  ->getNumElements();
1985  Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1986  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1987  CI->getArgOperand(1));
1988  } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1989  unsigned NumElts = CI->getType()->getScalarSizeInBits();
1990  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1991  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1992  int Indices[64];
1993  for (unsigned i = 0; i != NumElts; ++i)
1994  Indices[i] = i;
1995 
1996  // First extract half of each vector. This gives better codegen than
1997  // doing it in a single shuffle.
1998  LHS = Builder.CreateShuffleVector(LHS, LHS,
1999  makeArrayRef(Indices, NumElts / 2));
2000  RHS = Builder.CreateShuffleVector(RHS, RHS,
2001  makeArrayRef(Indices, NumElts / 2));
2002  // Concat the vectors.
2003  // NOTE: Operands have to be swapped to match intrinsic definition.
2004  Rep = Builder.CreateShuffleVector(RHS, LHS,
2005  makeArrayRef(Indices, NumElts));
2006  Rep = Builder.CreateBitCast(Rep, CI->getType());
2007  } else if (IsX86 && Name == "avx512.kand.w") {
2008  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2009  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2010  Rep = Builder.CreateAnd(LHS, RHS);
2011  Rep = Builder.CreateBitCast(Rep, CI->getType());
2012  } else if (IsX86 && Name == "avx512.kandn.w") {
2013  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2014  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2015  LHS = Builder.CreateNot(LHS);
2016  Rep = Builder.CreateAnd(LHS, RHS);
2017  Rep = Builder.CreateBitCast(Rep, CI->getType());
2018  } else if (IsX86 && Name == "avx512.kor.w") {
2019  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2020  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2021  Rep = Builder.CreateOr(LHS, RHS);
2022  Rep = Builder.CreateBitCast(Rep, CI->getType());
2023  } else if (IsX86 && Name == "avx512.kxor.w") {
2024  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2025  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2026  Rep = Builder.CreateXor(LHS, RHS);
2027  Rep = Builder.CreateBitCast(Rep, CI->getType());
2028  } else if (IsX86 && Name == "avx512.kxnor.w") {
2029  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2030  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2031  LHS = Builder.CreateNot(LHS);
2032  Rep = Builder.CreateXor(LHS, RHS);
2033  Rep = Builder.CreateBitCast(Rep, CI->getType());
2034  } else if (IsX86 && Name == "avx512.knot.w") {
2035  Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2036  Rep = Builder.CreateNot(Rep);
2037  Rep = Builder.CreateBitCast(Rep, CI->getType());
2038  } else if (IsX86 &&
2039  (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2040  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2041  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2042  Rep = Builder.CreateOr(LHS, RHS);
2043  Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2044  Value *C;
2045  if (Name[14] == 'c')
2046  C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2047  else
2048  C = ConstantInt::getNullValue(Builder.getInt16Ty());
2049  Rep = Builder.CreateICmpEQ(Rep, C);
2050  Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2051  } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2052  Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2053  Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2054  Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2055  Type *I32Ty = Type::getInt32Ty(C);
2056  Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2057  ConstantInt::get(I32Ty, 0));
2058  Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2059  ConstantInt::get(I32Ty, 0));
2060  Value *EltOp;
2061  if (Name.contains(".add."))
2062  EltOp = Builder.CreateFAdd(Elt0, Elt1);
2063  else if (Name.contains(".sub."))
2064  EltOp = Builder.CreateFSub(Elt0, Elt1);
2065  else if (Name.contains(".mul."))
2066  EltOp = Builder.CreateFMul(Elt0, Elt1);
2067  else
2068  EltOp = Builder.CreateFDiv(Elt0, Elt1);
2069  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2070  ConstantInt::get(I32Ty, 0));
2071  } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
2072  // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2073  bool CmpEq = Name[16] == 'e';
2074  Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2075  } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
2076  Type *OpTy = CI->getArgOperand(0)->getType();
2077  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2078  Intrinsic::ID IID;
2079  switch (VecWidth) {
2080  default: llvm_unreachable("Unexpected intrinsic");
2081  case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2082  case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2083  case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2084  }
2085 
2086  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2087  { CI->getOperand(0), CI->getArgOperand(1) });
2088  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2089  } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
2090  Type *OpTy = CI->getArgOperand(0)->getType();
2091  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2092  unsigned EltWidth = OpTy->getScalarSizeInBits();
2093  Intrinsic::ID IID;
2094  if (VecWidth == 128 && EltWidth == 32)
2095  IID = Intrinsic::x86_avx512_fpclass_ps_128;
2096  else if (VecWidth == 256 && EltWidth == 32)
2097  IID = Intrinsic::x86_avx512_fpclass_ps_256;
2098  else if (VecWidth == 512 && EltWidth == 32)
2099  IID = Intrinsic::x86_avx512_fpclass_ps_512;
2100  else if (VecWidth == 128 && EltWidth == 64)
2101  IID = Intrinsic::x86_avx512_fpclass_pd_128;
2102  else if (VecWidth == 256 && EltWidth == 64)
2103  IID = Intrinsic::x86_avx512_fpclass_pd_256;
2104  else if (VecWidth == 512 && EltWidth == 64)
2105  IID = Intrinsic::x86_avx512_fpclass_pd_512;
2106  else
2107  llvm_unreachable("Unexpected intrinsic");
2108 
2109  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2110  { CI->getOperand(0), CI->getArgOperand(1) });
2111  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2112  } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
2114  CI->arg_operands().end());
2115  Type *OpTy = Args[0]->getType();
2116  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2117  unsigned EltWidth = OpTy->getScalarSizeInBits();
2118  Intrinsic::ID IID;
2119  if (VecWidth == 128 && EltWidth == 32)
2120  IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2121  else if (VecWidth == 256 && EltWidth == 32)
2122  IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2123  else if (VecWidth == 512 && EltWidth == 32)
2124  IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2125  else if (VecWidth == 128 && EltWidth == 64)
2126  IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2127  else if (VecWidth == 256 && EltWidth == 64)
2128  IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2129  else if (VecWidth == 512 && EltWidth == 64)
2130  IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2131  else
2132  llvm_unreachable("Unexpected intrinsic");
2133 
2135  if (VecWidth == 512)
2136  std::swap(Mask, Args.back());
2137  Args.push_back(Mask);
2138 
2139  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2140  Args);
2141  } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
2142  // Integer compare intrinsics.
2143  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2144  Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2145  } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2146  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2147  Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2148  } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2149  Name.startswith("avx512.cvtw2mask.") ||
2150  Name.startswith("avx512.cvtd2mask.") ||
2151  Name.startswith("avx512.cvtq2mask."))) {
2152  Value *Op = CI->getArgOperand(0);
2153  Value *Zero = llvm::Constant::getNullValue(Op->getType());
2154  Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2155  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2156  } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2157  Name == "ssse3.pabs.w.128" ||
2158  Name == "ssse3.pabs.d.128" ||
2159  Name.startswith("avx2.pabs") ||
2160  Name.startswith("avx512.mask.pabs"))) {
2161  Rep = upgradeAbs(Builder, *CI);
2162  } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2163  Name == "sse2.pmaxs.w" ||
2164  Name == "sse41.pmaxsd" ||
2165  Name.startswith("avx2.pmaxs") ||
2166  Name.startswith("avx512.mask.pmaxs"))) {
2168  } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2169  Name == "sse41.pmaxuw" ||
2170  Name == "sse41.pmaxud" ||
2171  Name.startswith("avx2.pmaxu") ||
2172  Name.startswith("avx512.mask.pmaxu"))) {
2174  } else if (IsX86 && (Name == "sse41.pminsb" ||
2175  Name == "sse2.pmins.w" ||
2176  Name == "sse41.pminsd" ||
2177  Name.startswith("avx2.pmins") ||
2178  Name.startswith("avx512.mask.pmins"))) {
2180  } else if (IsX86 && (Name == "sse2.pminu.b" ||
2181  Name == "sse41.pminuw" ||
2182  Name == "sse41.pminud" ||
2183  Name.startswith("avx2.pminu") ||
2184  Name.startswith("avx512.mask.pminu"))) {
2186  } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2187  Name == "avx2.pmulu.dq" ||
2188  Name == "avx512.pmulu.dq.512" ||
2189  Name.startswith("avx512.mask.pmulu.dq."))) {
2190  Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2191  } else if (IsX86 && (Name == "sse41.pmuldq" ||
2192  Name == "avx2.pmul.dq" ||
2193  Name == "avx512.pmul.dq.512" ||
2194  Name.startswith("avx512.mask.pmul.dq."))) {
2195  Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2196  } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2197  Name == "sse2.cvtsi2sd" ||
2198  Name == "sse.cvtsi642ss" ||
2199  Name == "sse2.cvtsi642sd")) {
2200  Rep = Builder.CreateSIToFP(
2201  CI->getArgOperand(1),
2202  cast<VectorType>(CI->getType())->getElementType());
2203  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2204  } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2205  Rep = Builder.CreateUIToFP(
2206  CI->getArgOperand(1),
2207  cast<VectorType>(CI->getType())->getElementType());
2208  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2209  } else if (IsX86 && Name == "sse2.cvtss2sd") {
2210  Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2211  Rep = Builder.CreateFPExt(
2212  Rep, cast<VectorType>(CI->getType())->getElementType());
2213  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2214  } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2215  Name == "sse2.cvtdq2ps" ||
2216  Name == "avx.cvtdq2.pd.256" ||
2217  Name == "avx.cvtdq2.ps.256" ||
2218  Name.startswith("avx512.mask.cvtdq2pd.") ||
2219  Name.startswith("avx512.mask.cvtudq2pd.") ||
2220  Name.startswith("avx512.mask.cvtdq2ps.") ||
2221  Name.startswith("avx512.mask.cvtudq2ps.") ||
2222  Name.startswith("avx512.mask.cvtqq2pd.") ||
2223  Name.startswith("avx512.mask.cvtuqq2pd.") ||
2224  Name == "avx512.mask.cvtqq2ps.256" ||
2225  Name == "avx512.mask.cvtqq2ps.512" ||
2226  Name == "avx512.mask.cvtuqq2ps.256" ||
2227  Name == "avx512.mask.cvtuqq2ps.512" ||
2228  Name == "sse2.cvtps2pd" ||
2229  Name == "avx.cvt.ps2.pd.256" ||
2230  Name == "avx512.mask.cvtps2pd.128" ||
2231  Name == "avx512.mask.cvtps2pd.256")) {
2232  auto *DstTy = cast<FixedVectorType>(CI->getType());
2233  Rep = CI->getArgOperand(0);
2234  auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2235 
2236  unsigned NumDstElts = DstTy->getNumElements();
2237  if (NumDstElts < SrcTy->getNumElements()) {
2238  assert(NumDstElts == 2 && "Unexpected vector size");
2239  Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2240  }
2241 
2242  bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2243  bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2244  if (IsPS2PD)
2245  Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2246  else if (CI->getNumArgOperands() == 4 &&
2247  (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2248  cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2249  Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2250  : Intrinsic::x86_avx512_sitofp_round;
2252  { DstTy, SrcTy });
2253  Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2254  } else {
2255  Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2256  : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2257  }
2258 
2259  if (CI->getNumArgOperands() >= 3)
2260  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2261  CI->getArgOperand(1));
2262  } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
2263  Name.startswith("vcvtph2ps."))) {
2264  auto *DstTy = cast<FixedVectorType>(CI->getType());
2265  Rep = CI->getArgOperand(0);
2266  auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2267  unsigned NumDstElts = DstTy->getNumElements();
2268  if (NumDstElts != SrcTy->getNumElements()) {
2269  assert(NumDstElts == 4 && "Unexpected vector size");
2270  Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2271  }
2272  Rep = Builder.CreateBitCast(
2273  Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2274  Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2275  if (CI->getNumArgOperands() >= 3)
2276  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2277  CI->getArgOperand(1));
2278  } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
2279  Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2280  CI->getArgOperand(1), CI->getArgOperand(2),
2281  /*Aligned*/false);
2282  } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
2283  Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2284  CI->getArgOperand(1),CI->getArgOperand(2),
2285  /*Aligned*/true);
2286  } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2287  auto *ResultTy = cast<FixedVectorType>(CI->getType());
2288  Type *PtrTy = ResultTy->getElementType();
2289 
2290  // Cast the pointer to element type.
2291  Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2293 
2294  Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2295  ResultTy->getNumElements());
2296 
2297  Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2298  Intrinsic::masked_expandload,
2299  ResultTy);
2300  Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2301  } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2302  auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2303  Type *PtrTy = ResultTy->getElementType();
2304 
2305  // Cast the pointer to element type.
2306  Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2308 
2309  Value *MaskVec =
2311  cast<FixedVectorType>(ResultTy)->getNumElements());
2312 
2313  Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2314  Intrinsic::masked_compressstore,
2315  ResultTy);
2316  Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2317  } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2318  Name.startswith("avx512.mask.expand."))) {
2319  auto *ResultTy = cast<FixedVectorType>(CI->getType());
2320 
2321  Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2322  ResultTy->getNumElements());
2323 
2324  bool IsCompress = Name[12] == 'c';
2325  Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2326  : Intrinsic::x86_avx512_mask_expand;
2327  Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2328  Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2329  MaskVec });
2330  } else if (IsX86 && Name.startswith("xop.vpcom")) {
2331  bool IsSigned;
2332  if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2333  Name.endswith("uq"))
2334  IsSigned = false;
2335  else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2336  Name.endswith("q"))
2337  IsSigned = true;
2338  else
2339  llvm_unreachable("Unknown suffix");
2340 
2341  unsigned Imm;
2342  if (CI->getNumArgOperands() == 3) {
2343  Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2344  } else {
2345  Name = Name.substr(9); // strip off "xop.vpcom"
2346  if (Name.startswith("lt"))
2347  Imm = 0;
2348  else if (Name.startswith("le"))
2349  Imm = 1;
2350  else if (Name.startswith("gt"))
2351  Imm = 2;
2352  else if (Name.startswith("ge"))
2353  Imm = 3;
2354  else if (Name.startswith("eq"))
2355  Imm = 4;
2356  else if (Name.startswith("ne"))
2357  Imm = 5;
2358  else if (Name.startswith("false"))
2359  Imm = 6;
2360  else if (Name.startswith("true"))
2361  Imm = 7;
2362  else
2363  llvm_unreachable("Unknown condition");
2364  }
2365 
2366  Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2367  } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2368  Value *Sel = CI->getArgOperand(2);
2369  Value *NotSel = Builder.CreateNot(Sel);
2370  Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2371  Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2372  Rep = Builder.CreateOr(Sel0, Sel1);
2373  } else if (IsX86 && (Name.startswith("xop.vprot") ||
2374  Name.startswith("avx512.prol") ||
2375  Name.startswith("avx512.mask.prol"))) {
2376  Rep = upgradeX86Rotate(Builder, *CI, false);
2377  } else if (IsX86 && (Name.startswith("avx512.pror") ||
2378  Name.startswith("avx512.mask.pror"))) {
2379  Rep = upgradeX86Rotate(Builder, *CI, true);
2380  } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2381  Name.startswith("avx512.mask.vpshld") ||
2382  Name.startswith("avx512.maskz.vpshld"))) {
2383  bool ZeroMask = Name[11] == 'z';
2384  Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2385  } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2386  Name.startswith("avx512.mask.vpshrd") ||
2387  Name.startswith("avx512.maskz.vpshrd"))) {
2388  bool ZeroMask = Name[11] == 'z';
2389  Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2390  } else if (IsX86 && Name == "sse42.crc32.64.8") {
2391  Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2392  Intrinsic::x86_sse42_crc32_32_8);
2393  Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2394  Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2395  Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2396  } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2397  Name.startswith("avx512.vbroadcast.s"))) {
2398  // Replace broadcasts with a series of insertelements.
2399  auto *VecTy = cast<FixedVectorType>(CI->getType());
2400  Type *EltTy = VecTy->getElementType();
2401  unsigned EltNum = VecTy->getNumElements();
2402  Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2403  EltTy->getPointerTo());
2404  Value *Load = Builder.CreateLoad(EltTy, Cast);
2405  Type *I32Ty = Type::getInt32Ty(C);
2406  Rep = UndefValue::get(VecTy);
2407  for (unsigned I = 0; I < EltNum; ++I)
2408  Rep = Builder.CreateInsertElement(Rep, Load,
2409  ConstantInt::get(I32Ty, I));
2410  } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2411  Name.startswith("sse41.pmovzx") ||
2412  Name.startswith("avx2.pmovsx") ||
2413  Name.startswith("avx2.pmovzx") ||
2414  Name.startswith("avx512.mask.pmovsx") ||
2415  Name.startswith("avx512.mask.pmovzx"))) {
2416  auto *DstTy = cast<FixedVectorType>(CI->getType());
2417  unsigned NumDstElts = DstTy->getNumElements();
2418 
2419  // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2420  SmallVector<int, 8> ShuffleMask(NumDstElts);
2421  for (unsigned i = 0; i != NumDstElts; ++i)
2422  ShuffleMask[i] = i;
2423 
2424  Value *SV =
2425  Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2426 
2427  bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2428  Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2429  : Builder.CreateZExt(SV, DstTy);
2430  // If there are 3 arguments, it's a masked intrinsic so we need a select.
2431  if (CI->getNumArgOperands() == 3)
2432  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2433  CI->getArgOperand(1));
2434  } else if (Name == "avx512.mask.pmov.qd.256" ||
2435  Name == "avx512.mask.pmov.qd.512" ||
2436  Name == "avx512.mask.pmov.wb.256" ||
2437  Name == "avx512.mask.pmov.wb.512") {
2438  Type *Ty = CI->getArgOperand(1)->getType();
2439  Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2440  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2441  CI->getArgOperand(1));
2442  } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2443  Name == "avx2.vbroadcasti128")) {
2444  // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2445  Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2446  unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2447  auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2448  Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2450  Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2451  if (NumSrcElts == 2)
2452  Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2453  else
2454  Rep = Builder.CreateShuffleVector(
2455  Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2456  } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2457  Name.startswith("avx512.mask.shuf.f"))) {
2458  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2459  Type *VT = CI->getType();
2460  unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2461  unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2462  unsigned ControlBitsMask = NumLanes - 1;
2463  unsigned NumControlBits = NumLanes / 2;
2464  SmallVector<int, 8> ShuffleMask(0);
2465 
2466  for (unsigned l = 0; l != NumLanes; ++l) {
2467  unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2468  // We actually need the other source.
2469  if (l >= NumLanes / 2)
2470  LaneMask += NumLanes;
2471  for (unsigned i = 0; i != NumElementsInLane; ++i)
2472  ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2473  }
2474  Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2475  CI->getArgOperand(1), ShuffleMask);
2476  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2477  CI->getArgOperand(3));
2478  }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2479  Name.startswith("avx512.mask.broadcasti"))) {
2480  unsigned NumSrcElts =
2481  cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2482  ->getNumElements();
2483  unsigned NumDstElts =
2484  cast<FixedVectorType>(CI->getType())->getNumElements();
2485 
2486  SmallVector<int, 8> ShuffleMask(NumDstElts);
2487  for (unsigned i = 0; i != NumDstElts; ++i)
2488  ShuffleMask[i] = i % NumSrcElts;
2489 
2490  Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2491  CI->getArgOperand(0),
2492  ShuffleMask);
2493  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2494  CI->getArgOperand(1));
2495  } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2496  Name.startswith("avx2.vbroadcast") ||
2497  Name.startswith("avx512.pbroadcast") ||
2498  Name.startswith("avx512.mask.broadcast.s"))) {
2499  // Replace vp?broadcasts with a vector shuffle.
2500  Value *Op = CI->getArgOperand(0);
2501  ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2502  Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2505  Rep = Builder.CreateShuffleVector(Op, M);
2506 
2507  if (CI->getNumArgOperands() == 3)
2508  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2509  CI->getArgOperand(1));
2510  } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2511  Name.startswith("avx2.padds.") ||
2512  Name.startswith("avx512.padds.") ||
2513  Name.startswith("avx512.mask.padds."))) {
2514  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2515  } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
2516  Name.startswith("avx2.psubs.") ||
2517  Name.startswith("avx512.psubs.") ||
2518  Name.startswith("avx512.mask.psubs."))) {
2519  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2520  } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2521  Name.startswith("avx2.paddus.") ||
2522  Name.startswith("avx512.mask.paddus."))) {
2523  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
2524  } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
2525  Name.startswith("avx2.psubus.") ||
2526  Name.startswith("avx512.mask.psubus."))) {
2527  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
2528  } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2530  CI->getArgOperand(1),
2531  CI->getArgOperand(2),
2532  CI->getArgOperand(3),
2533  CI->getArgOperand(4),
2534  false);
2535  } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2537  CI->getArgOperand(1),
2538  CI->getArgOperand(2),
2539  CI->getArgOperand(3),
2540  CI->getArgOperand(4),
2541  true);
2542  } else if (IsX86 && (Name == "sse2.psll.dq" ||
2543  Name == "avx2.psll.dq")) {
2544  // 128/256-bit shift left specified in bits.
2545  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2547  Shift / 8); // Shift is in bits.
2548  } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2549  Name == "avx2.psrl.dq")) {
2550  // 128/256-bit shift right specified in bits.
2551  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2553  Shift / 8); // Shift is in bits.
2554  } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2555  Name == "avx2.psll.dq.bs" ||
2556  Name == "avx512.psll.dq.512")) {
2557  // 128/256/512-bit shift left specified in bytes.
2558  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2560  } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2561  Name == "avx2.psrl.dq.bs" ||
2562  Name == "avx512.psrl.dq.512")) {
2563  // 128/256/512-bit shift right specified in bytes.
2564  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2566  } else if (IsX86 && (Name == "sse41.pblendw" ||
2567  Name.startswith("sse41.blendp") ||
2568  Name.startswith("avx.blend.p") ||
2569  Name == "avx2.pblendw" ||
2570  Name.startswith("avx2.pblendd."))) {
2571  Value *Op0 = CI->getArgOperand(0);
2572  Value *Op1 = CI->getArgOperand(1);
2573  unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2574  auto *VecTy = cast<FixedVectorType>(CI->getType());
2575  unsigned NumElts = VecTy->getNumElements();
2576 
2577  SmallVector<int, 16> Idxs(NumElts);
2578  for (unsigned i = 0; i != NumElts; ++i)
2579  Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2580 
2581  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2582  } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2583  Name == "avx2.vinserti128" ||
2584  Name.startswith("avx512.mask.insert"))) {
2585  Value *Op0 = CI->getArgOperand(0);
2586  Value *Op1 = CI->getArgOperand(1);
2587  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2588  unsigned DstNumElts =
2589  cast<FixedVectorType>(CI->getType())->getNumElements();
2590  unsigned SrcNumElts =
2591  cast<FixedVectorType>(Op1->getType())->getNumElements();
2592  unsigned Scale = DstNumElts / SrcNumElts;
2593 
2594  // Mask off the high bits of the immediate value; hardware ignores those.
2595  Imm = Imm % Scale;
2596 
2597  // Extend the second operand into a vector the size of the destination.
2598  SmallVector<int, 8> Idxs(DstNumElts);
2599  for (unsigned i = 0; i != SrcNumElts; ++i)
2600  Idxs[i] = i;
2601  for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2602  Idxs[i] = SrcNumElts;
2603  Rep = Builder.CreateShuffleVector(Op1, Idxs);
2604 
2605  // Insert the second operand into the first operand.
2606 
2607  // Note that there is no guarantee that instruction lowering will actually
2608  // produce a vinsertf128 instruction for the created shuffles. In
2609  // particular, the 0 immediate case involves no lane changes, so it can
2610  // be handled as a blend.
2611 
2612  // Example of shuffle mask for 32-bit elements:
2613  // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2614  // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2615 
2616  // First fill with identify mask.
2617  for (unsigned i = 0; i != DstNumElts; ++i)
2618  Idxs[i] = i;
2619  // Then replace the elements where we need to insert.
2620  for (unsigned i = 0; i != SrcNumElts; ++i)
2621  Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2622  Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2623 
2624  // If the intrinsic has a mask operand, handle that.
2625  if (CI->getNumArgOperands() == 5)
2626  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2627  CI->getArgOperand(3));
2628  } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2629  Name == "avx2.vextracti128" ||
2630  Name.startswith("avx512.mask.vextract"))) {
2631  Value *Op0 = CI->getArgOperand(0);
2632  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2633  unsigned DstNumElts =
2634  cast<FixedVectorType>(CI->getType())->getNumElements();
2635  unsigned SrcNumElts =
2636  cast<FixedVectorType>(Op0->getType())->getNumElements();
2637  unsigned Scale = SrcNumElts / DstNumElts;
2638 
2639  // Mask off the high bits of the immediate value; hardware ignores those.
2640  Imm = Imm % Scale;
2641 
2642  // Get indexes for the subvector of the input vector.
2643  SmallVector<int, 8> Idxs(DstNumElts);
2644  for (unsigned i = 0; i != DstNumElts; ++i) {
2645  Idxs[i] = i + (Imm * DstNumElts);
2646  }
2647  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2648 
2649  // If the intrinsic has a mask operand, handle that.
2650  if (CI->getNumArgOperands() == 4)
2651  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2652  CI->getArgOperand(2));
2653  } else if (!IsX86 && Name == "stackprotectorcheck") {
2654  Rep = nullptr;
2655  } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2656  Name.startswith("avx512.mask.perm.di."))) {
2657  Value *Op0 = CI->getArgOperand(0);
2658  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2659  auto *VecTy = cast<FixedVectorType>(CI->getType());
2660  unsigned NumElts = VecTy->getNumElements();
2661 
2662  SmallVector<int, 8> Idxs(NumElts);
2663  for (unsigned i = 0; i != NumElts; ++i)
2664  Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2665 
2666  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2667 
2668  if (CI->getNumArgOperands() == 4)
2669  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2670  CI->getArgOperand(2));
2671  } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2672  Name == "avx2.vperm2i128")) {
2673  // The immediate permute control byte looks like this:
2674  // [1:0] - select 128 bits from sources for low half of destination
2675  // [2] - ignore
2676  // [3] - zero low half of destination
2677  // [5:4] - select 128 bits from sources for high half of destination
2678  // [6] - ignore
2679  // [7] - zero high half of destination
2680 
2681  uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2682 
2683  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2684  unsigned HalfSize = NumElts / 2;
2685  SmallVector<int, 8> ShuffleMask(NumElts);
2686 
2687  // Determine which operand(s) are actually in use for this instruction.
2688  Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2689  Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2690 
2691  // If needed, replace operands based on zero mask.
2692  V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2693  V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2694 
2695  // Permute low half of result.
2696  unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2697  for (unsigned i = 0; i < HalfSize; ++i)
2698  ShuffleMask[i] = StartIndex + i;
2699 
2700  // Permute high half of result.
2701  StartIndex = (Imm & 0x10) ? HalfSize : 0;
2702  for (unsigned i = 0; i < HalfSize; ++i)
2703  ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2704 
2705  Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2706 
2707  } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2708  Name == "sse2.pshuf.d" ||
2709  Name.startswith("avx512.mask.vpermil.p") ||
2710  Name.startswith("avx512.mask.pshuf.d."))) {
2711  Value *Op0 = CI->getArgOperand(0);
2712  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2713  auto *VecTy = cast<FixedVectorType>(CI->getType());
2714  unsigned NumElts = VecTy->getNumElements();
2715  // Calculate the size of each index in the immediate.
2716  unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2717  unsigned IdxMask = ((1 << IdxSize) - 1);
2718 
2719  SmallVector<int, 8> Idxs(NumElts);
2720  // Lookup the bits for this element, wrapping around the immediate every
2721  // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2722  // to offset by the first index of each group.
2723  for (unsigned i = 0; i != NumElts; ++i)
2724  Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2725 
2726  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2727 
2728  if (CI->getNumArgOperands() == 4)
2729  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2730  CI->getArgOperand(2));
2731  } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2732  Name.startswith("avx512.mask.pshufl.w."))) {
2733  Value *Op0 = CI->getArgOperand(0);
2734  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2735  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2736 
2737  SmallVector<int, 16> Idxs(NumElts);
2738  for (unsigned l = 0; l != NumElts; l += 8) {
2739  for (unsigned i = 0; i != 4; ++i)
2740  Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2741  for (unsigned i = 4; i != 8; ++i)
2742  Idxs[i + l] = i + l;
2743  }
2744 
2745  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2746 
2747  if (CI->getNumArgOperands() == 4)
2748  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2749  CI->getArgOperand(2));
2750  } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2751  Name.startswith("avx512.mask.pshufh.w."))) {
2752  Value *Op0 = CI->getArgOperand(0);
2753  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2754  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2755 
2756  SmallVector<int, 16> Idxs(NumElts);
2757  for (unsigned l = 0; l != NumElts; l += 8) {
2758  for (unsigned i = 0; i != 4; ++i)
2759  Idxs[i + l] = i + l;
2760  for (unsigned i = 0; i != 4; ++i)
2761  Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2762  }
2763 
2764  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2765 
2766  if (CI->getNumArgOperands() == 4)
2767  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2768  CI->getArgOperand(2));
2769  } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2770  Value *Op0 = CI->getArgOperand(0);
2771  Value *Op1 = CI->getArgOperand(1);
2772  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2773  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2774 
2775  unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2776  unsigned HalfLaneElts = NumLaneElts / 2;
2777 
2778  SmallVector<int, 16> Idxs(NumElts);
2779  for (unsigned i = 0; i != NumElts; ++i) {
2780  // Base index is the starting element of the lane.
2781  Idxs[i] = i - (i % NumLaneElts);
2782  // If we are half way through the lane switch to the other source.
2783  if ((i % NumLaneElts) >= HalfLaneElts)
2784  Idxs[i] += NumElts;
2785  // Now select the specific element. By adding HalfLaneElts bits from
2786  // the immediate. Wrapping around the immediate every 8-bits.
2787  Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2788  }
2789 
2790  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2791 
2792  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2793  CI->getArgOperand(3));
2794  } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2795  Name.startswith("avx512.mask.movshdup") ||
2796  Name.startswith("avx512.mask.movsldup"))) {
2797  Value *Op0 = CI->getArgOperand(0);
2798  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2799  unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2800 
2801  unsigned Offset = 0;
2802  if (Name.startswith("avx512.mask.movshdup."))
2803  Offset = 1;
2804 
2805  SmallVector<int, 16> Idxs(NumElts);
2806  for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2807  for (unsigned i = 0; i != NumLaneElts; i += 2) {
2808  Idxs[i + l + 0] = i + l + Offset;
2809  Idxs[i + l + 1] = i + l + Offset;
2810  }
2811 
2812  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2813 
2814  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2815  CI->getArgOperand(1));
2816  } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2817  Name.startswith("avx512.mask.unpckl."))) {
2818  Value *Op0 = CI->getArgOperand(0);
2819  Value *Op1 = CI->getArgOperand(1);
2820  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2821  int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2822 
2823  SmallVector<int, 64> Idxs(NumElts);
2824  for (int l = 0; l != NumElts; l += NumLaneElts)
2825  for (int i = 0; i != NumLaneElts; ++i)
2826  Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2827 
2828  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2829 
2830  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2831  CI->getArgOperand(2));
2832  } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2833  Name.startswith("avx512.mask.unpckh."))) {
2834  Value *Op0 = CI->getArgOperand(0);
2835  Value *Op1 = CI->getArgOperand(1);
2836  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2837  int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2838 
2839  SmallVector<int, 64> Idxs(NumElts);
2840  for (int l = 0; l != NumElts; l += NumLaneElts)
2841  for (int i = 0; i != NumLaneElts; ++i)
2842  Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2843 
2844  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2845 
2846  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2847  CI->getArgOperand(2));
2848  } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2849  Name.startswith("avx512.mask.pand."))) {
2850  VectorType *FTy = cast<VectorType>(CI->getType());
2851  VectorType *ITy = VectorType::getInteger(FTy);
2852  Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2853  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2854  Rep = Builder.CreateBitCast(Rep, FTy);
2855  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2856  CI->getArgOperand(2));
2857  } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2858  Name.startswith("avx512.mask.pandn."))) {
2859  VectorType *FTy = cast<VectorType>(CI->getType());
2860  VectorType *ITy = VectorType::getInteger(FTy);
2861  Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2862  Rep = Builder.CreateAnd(Rep,
2863  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2864  Rep = Builder.CreateBitCast(Rep, FTy);
2865  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2866  CI->getArgOperand(2));
2867  } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2868  Name.startswith("avx512.mask.por."))) {
2869  VectorType *FTy = cast<VectorType>(CI->getType());
2870  VectorType *ITy = VectorType::getInteger(FTy);
2871  Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2872  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2873  Rep = Builder.CreateBitCast(Rep, FTy);
2874  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2875  CI->getArgOperand(2));
2876  } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2877  Name.startswith("avx512.mask.pxor."))) {
2878  VectorType *FTy = cast<VectorType>(CI->getType());
2879  VectorType *ITy = VectorType::getInteger(FTy);
2880  Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2881  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2882  Rep = Builder.CreateBitCast(Rep, FTy);
2883  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2884  CI->getArgOperand(2));
2885  } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2886  Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2887  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2888  CI->getArgOperand(2));
2889  } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2890  Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2891  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2892  CI->getArgOperand(2));
2893  } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2894  Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2895  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2896  CI->getArgOperand(2));
2897  } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2898  if (Name.endswith(".512")) {
2899  Intrinsic::ID IID;
2900  if (Name[17] == 's')
2901  IID = Intrinsic::x86_avx512_add_ps_512;
2902  else
2903  IID = Intrinsic::x86_avx512_add_pd_512;
2904 
2905  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2906  { CI->getArgOperand(0), CI->getArgOperand(1),
2907  CI->getArgOperand(4) });
2908  } else {
2909  Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2910  }
2911  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2912  CI->getArgOperand(2));
2913  } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2914  if (Name.endswith(".512")) {
2915  Intrinsic::ID IID;
2916  if (Name[17] == 's')
2917  IID = Intrinsic::x86_avx512_div_ps_512;
2918  else
2919  IID = Intrinsic::x86_avx512_div_pd_512;
2920 
2921  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2922  { CI->getArgOperand(0), CI->getArgOperand(1),
2923  CI->getArgOperand(4) });
2924  } else {
2925  Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2926  }
2927  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2928  CI->getArgOperand(2));
2929  } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2930  if (Name.endswith(".512")) {
2931  Intrinsic::ID IID;
2932  if (Name[17] == 's')
2933  IID = Intrinsic::x86_avx512_mul_ps_512;
2934  else
2935  IID = Intrinsic::x86_avx512_mul_pd_512;
2936 
2937  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2938  { CI->getArgOperand(0), CI->getArgOperand(1),
2939  CI->getArgOperand(4) });
2940  } else {
2941  Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2942  }
2943  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2944  CI->getArgOperand(2));
2945  } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2946  if (Name.endswith(".512")) {
2947  Intrinsic::ID IID;
2948  if (Name[17] == 's')
2949  IID = Intrinsic::x86_avx512_sub_ps_512;
2950  else
2951  IID = Intrinsic::x86_avx512_sub_pd_512;
2952 
2953  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2954  { CI->getArgOperand(0), CI->getArgOperand(1),
2955  CI->getArgOperand(4) });
2956  } else {
2957  Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2958  }
2959  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2960  CI->getArgOperand(2));
2961  } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2962  Name.startswith("avx512.mask.min.p")) &&
2963  Name.drop_front(18) == ".512") {
2964  bool IsDouble = Name[17] == 'd';
2965  bool IsMin = Name[13] == 'i';
2966  static const Intrinsic::ID MinMaxTbl[2][2] = {
2967  { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
2968  { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
2969  };
2970  Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2971 
2972  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2973  { CI->getArgOperand(0), CI->getArgOperand(1),
2974  CI->getArgOperand(4) });
2975  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2976  CI->getArgOperand(2));
2977  } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2978  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2979  Intrinsic::ctlz,
2980  CI->getType()),
2981  { CI->getArgOperand(0), Builder.getInt1(false) });
2982  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2983  CI->getArgOperand(1));
2984  } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2985  bool IsImmediate = Name[16] == 'i' ||
2986  (Name.size() > 18 && Name[18] == 'i');
2987  bool IsVariable = Name[16] == 'v';
2988  char Size = Name[16] == '.' ? Name[17] :
2989  Name[17] == '.' ? Name[18] :
2990  Name[18] == '.' ? Name[19] :
2991  Name[20];
2992 
2993  Intrinsic::ID IID;
2994  if (IsVariable && Name[17] != '.') {
2995  if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2996  IID = Intrinsic::x86_avx2_psllv_q;
2997  else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2998  IID = Intrinsic::x86_avx2_psllv_q_256;
2999  else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3000  IID = Intrinsic::x86_avx2_psllv_d;
3001  else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3002  IID = Intrinsic::x86_avx2_psllv_d_256;
3003  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3004  IID = Intrinsic::x86_avx512_psllv_w_128;
3005  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3006  IID = Intrinsic::x86_avx512_psllv_w_256;
3007  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3008  IID = Intrinsic::x86_avx512_psllv_w_512;
3009  else
3010  llvm_unreachable("Unexpected size");
3011  } else if (Name.endswith(".128")) {
3012  if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3013  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3014  : Intrinsic::x86_sse2_psll_d;
3015  else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3016  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3017  : Intrinsic::x86_sse2_psll_q;
3018  else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3019  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3020  : Intrinsic::x86_sse2_psll_w;
3021  else
3022  llvm_unreachable("Unexpected size");
3023  } else if (Name.endswith(".256")) {
3024  if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3025  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3026  : Intrinsic::x86_avx2_psll_d;
3027  else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3028  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3029  : Intrinsic::x86_avx2_psll_q;
3030  else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3031  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3032  : Intrinsic::x86_avx2_psll_w;
3033  else
3034  llvm_unreachable("Unexpected size");
3035  } else {
3036  if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3037  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3038  IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
3039  Intrinsic::x86_avx512_psll_d_512;
3040  else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3041  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3042  IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
3043  Intrinsic::x86_avx512_psll_q_512;
3044  else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3045  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3046  : Intrinsic::x86_avx512_psll_w_512;
3047  else
3048  llvm_unreachable("Unexpected size");
3049  }
3050 
3051  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3052  } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
3053  bool IsImmediate = Name[16] == 'i' ||
3054  (Name.size() > 18 && Name[18] == 'i');
3055  bool IsVariable = Name[16] == 'v';
3056  char Size = Name[16] == '.' ? Name[17] :
3057  Name[17] == '.' ? Name[18] :
3058  Name[18] == '.' ? Name[19] :
3059  Name[20];
3060 
3061  Intrinsic::ID IID;
3062  if (IsVariable && Name[17] != '.') {
3063  if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3064  IID = Intrinsic::x86_avx2_psrlv_q;
3065  else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3066  IID = Intrinsic::x86_avx2_psrlv_q_256;
3067  else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3068  IID = Intrinsic::x86_avx2_psrlv_d;
3069  else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3070  IID = Intrinsic::x86_avx2_psrlv_d_256;
3071  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3072  IID = Intrinsic::x86_avx512_psrlv_w_128;
3073  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3074  IID = Intrinsic::x86_avx512_psrlv_w_256;
3075  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3076  IID = Intrinsic::x86_avx512_psrlv_w_512;
3077  else
3078  llvm_unreachable("Unexpected size");
3079  } else if (Name.endswith(".128")) {
3080  if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3081  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3082  : Intrinsic::x86_sse2_psrl_d;
3083  else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3084  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3085  : Intrinsic::x86_sse2_psrl_q;
3086  else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3087  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3088  : Intrinsic::x86_sse2_psrl_w;
3089  else
3090  llvm_unreachable("Unexpected size");
3091  } else if (Name.endswith(".256")) {
3092  if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3093  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3094  : Intrinsic::x86_avx2_psrl_d;
3095  else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3096  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3097  : Intrinsic::x86_avx2_psrl_q;
3098  else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3099  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3100  : Intrinsic::x86_avx2_psrl_w;
3101  else
3102  llvm_unreachable("Unexpected size");
3103  } else {
3104  if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3105  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3106  IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
3107  Intrinsic::x86_avx512_psrl_d_512;
3108  else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3109  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3110  IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
3111  Intrinsic::x86_avx512_psrl_q_512;
3112  else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3113  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3114  : Intrinsic::x86_avx512_psrl_w_512;
3115  else
3116  llvm_unreachable("Unexpected size");
3117  }
3118 
3119  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3120  } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
3121  bool IsImmediate = Name[16] == 'i' ||
3122  (Name.size() > 18 && Name[18] == 'i');
3123  bool IsVariable = Name[16] == 'v';
3124  char Size = Name[16] == '.' ? Name[17] :
3125  Name[17] == '.' ? Name[18] :
3126  Name[18] == '.' ? Name[19] :
3127  Name[20];
3128 
3129  Intrinsic::ID IID;
3130  if (IsVariable && Name[17] != '.') {
3131  if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3132  IID = Intrinsic::x86_avx2_psrav_d;
3133  else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3134  IID = Intrinsic::x86_avx2_psrav_d_256;
3135  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3136  IID = Intrinsic::x86_avx512_psrav_w_128;
3137  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3138  IID = Intrinsic::x86_avx512_psrav_w_256;
3139  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3140  IID = Intrinsic::x86_avx512_psrav_w_512;
3141  else
3142  llvm_unreachable("Unexpected size");
3143  } else if (Name.endswith(".128")) {
3144  if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3145  IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3146  : Intrinsic::x86_sse2_psra_d;
3147  else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3148  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3149  IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
3150  Intrinsic::x86_avx512_psra_q_128;
3151  else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3152  IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3153  : Intrinsic::x86_sse2_psra_w;
3154  else
3155  llvm_unreachable("Unexpected size");
3156  } else if (Name.endswith(".256")) {
3157  if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3158  IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3159  : Intrinsic::x86_avx2_psra_d;
3160  else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3161  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3162  IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
3163  Intrinsic::x86_avx512_psra_q_256;
3164  else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3165  IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3166  : Intrinsic::x86_avx2_psra_w;
3167  else
3168  llvm_unreachable("Unexpected size");
3169  } else {
3170  if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3171  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3172  IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
3173  Intrinsic::x86_avx512_psra_d_512;
3174  else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3175  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3176  IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
3177  Intrinsic::x86_avx512_psra_q_512;
3178  else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3179  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3180  : Intrinsic::x86_avx512_psra_w_512;
3181  else
3182  llvm_unreachable("Unexpected size");
3183  }
3184 
3185  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3186  } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3187  Rep = upgradeMaskedMove(Builder, *CI);
3188  } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3189  Rep = UpgradeMaskToInt(Builder, *CI);
3190  } else if (IsX86 && Name.endswith(".movntdqa")) {
3191  Module *M = F->getParent();
3192  MDNode *Node = MDNode::get(
3194 
3195  Value *Ptr = CI->getArgOperand(0);
3196 
3197  // Convert the type of the pointer to a pointer to the stored type.
3198  Value *BC = Builder.CreateBitCast(
3199  Ptr, PointerType::getUnqual(CI->getType()), "cast");
3200  LoadInst *LI = Builder.CreateAlignedLoad(
3201  CI->getType(), BC,
3203  LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3204  Rep = LI;
3205  } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3206  Name.startswith("fma.vfmsub.") ||
3207  Name.startswith("fma.vfnmadd.") ||
3208  Name.startswith("fma.vfnmsub."))) {
3209  bool NegMul = Name[6] == 'n';
3210  bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3211  bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3212 
3213  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3214  CI->getArgOperand(2) };
3215 
3216  if (IsScalar) {
3217  Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3218  Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3219  Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3220  }
3221 
3222  if (NegMul && !IsScalar)
3223  Ops[0] = Builder.CreateFNeg(Ops[0]);
3224  if (NegMul && IsScalar)
3225  Ops[1] = Builder.CreateFNeg(Ops[1]);
3226  if (NegAcc)
3227  Ops[2] = Builder.CreateFNeg(Ops[2]);
3228 
3229  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3230  Intrinsic::fma,
3231  Ops[0]->getType()),
3232  Ops);
3233 
3234  if (IsScalar)
3235  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3236  (uint64_t)0);
3237  } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3238  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3239  CI->getArgOperand(2) };
3240 
3241  Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3242  Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3243  Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3244 
3245  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3246  Intrinsic::fma,
3247  Ops[0]->getType()),
3248  Ops);
3249 
3250  Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3251  Rep, (uint64_t)0);
3252  } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3253  Name.startswith("avx512.maskz.vfmadd.s") ||
3254  Name.startswith("avx512.mask3.vfmadd.s") ||
3255  Name.startswith("avx512.mask3.vfmsub.s") ||
3256  Name.startswith("avx512.mask3.vfnmsub.s"))) {
3257  bool IsMask3 = Name[11] == '3';
3258  bool IsMaskZ = Name[11] == 'z';
3259  // Drop the "avx512.mask." to make it easier.
3260  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3261  bool NegMul = Name[2] == 'n';
3262  bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3263 
3264  Value *A = CI->getArgOperand(0);
3265  Value *B = CI->getArgOperand(1);
3266  Value *C = CI->getArgOperand(2);
3267 
3268  if (NegMul && (IsMask3 || IsMaskZ))
3269  A = Builder.CreateFNeg(A);
3270  if (NegMul && !(IsMask3 || IsMaskZ))
3271  B = Builder.CreateFNeg(B);
3272  if (NegAcc)
3273  C = Builder.CreateFNeg(C);
3274 
3275  A = Builder.CreateExtractElement(A, (uint64_t)0);
3276  B = Builder.CreateExtractElement(B, (uint64_t)0);
3277  C = Builder.CreateExtractElement(C, (uint64_t)0);
3278 
3279  if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3280  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3281  Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3282 
3283  Intrinsic::ID IID;
3284  if (Name.back() == 'd')
3285  IID = Intrinsic::x86_avx512_vfmadd_f64;
3286  else
3287  IID = Intrinsic::x86_avx512_vfmadd_f32;
3289  Rep = Builder.CreateCall(FMA, Ops);
3290  } else {
3292  Intrinsic::fma,
3293  A->getType());
3294  Rep = Builder.CreateCall(FMA, { A, B, C });
3295  }
3296 
3297  Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3298  IsMask3 ? C : A;
3299 
3300  // For Mask3 with NegAcc, we need to create a new extractelement that
3301  // avoids the negation above.
3302  if (NegAcc && IsMask3)
3303  PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3304  (uint64_t)0);
3305 
3307  Rep, PassThru);
3308  Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3309  Rep, (uint64_t)0);
3310  } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3311  Name.startswith("avx512.mask.vfnmadd.p") ||
3312  Name.startswith("avx512.mask.vfnmsub.p") ||
3313  Name.startswith("avx512.mask3.vfmadd.p") ||
3314  Name.startswith("avx512.mask3.vfmsub.p") ||
3315  Name.startswith("avx512.mask3.vfnmsub.p") ||
3316  Name.startswith("avx512.maskz.vfmadd.p"))) {
3317  bool IsMask3 = Name[11] == '3';
3318  bool IsMaskZ = Name[11] == 'z';
3319  // Drop the "avx512.mask." to make it easier.
3320  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3321  bool NegMul = Name[2] == 'n';
3322  bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3323 
3324  Value *A = CI->getArgOperand(0);
3325  Value *B = CI->getArgOperand(1);
3326  Value *C = CI->getArgOperand(2);
3327 
3328  if (NegMul && (IsMask3 || IsMaskZ))
3329  A = Builder.CreateFNeg(A);
3330  if (NegMul && !(IsMask3 || IsMaskZ))
3331  B = Builder.CreateFNeg(B);
3332  if (NegAcc)
3333  C = Builder.CreateFNeg(C);
3334 
3335  if (CI->getNumArgOperands() == 5 &&
3336  (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3337  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3338  Intrinsic::ID IID;
3339  // Check the character before ".512" in string.
3340  if (Name[Name.size()-5] == 's')
3341  IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3342  else
3343  IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3344 
3345  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3346  { A, B, C, CI->getArgOperand(4) });
3347  } else {
3349  Intrinsic::fma,
3350  A->getType());
3351  Rep = Builder.CreateCall(FMA, { A, B, C });
3352  }
3353 
3354  Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3355  IsMask3 ? CI->getArgOperand(2) :
3356  CI->getArgOperand(0);
3357 
3358  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3359  } else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
3360  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3361  unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3362  Intrinsic::ID IID;
3363  if (VecWidth == 128 && EltWidth == 32)
3364  IID = Intrinsic::x86_fma_vfmaddsub_ps;
3365  else if (VecWidth == 256 && EltWidth == 32)
3366  IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3367  else if (VecWidth == 128 && EltWidth == 64)
3368  IID = Intrinsic::x86_fma_vfmaddsub_pd;
3369  else if (VecWidth == 256 && EltWidth == 64)
3370  IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3371  else
3372  llvm_unreachable("Unexpected intrinsic");
3373 
3374  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3375  CI->getArgOperand(2) };
3376  Ops[2] = Builder.CreateFNeg(Ops[2]);
3377  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3378  Ops);
3379  } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3380  Name.startswith("avx512.mask3.vfmaddsub.p") ||
3381  Name.startswith("avx512.maskz.vfmaddsub.p") ||
3382  Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3383  bool IsMask3 = Name[11] == '3';
3384  bool IsMaskZ = Name[11] == 'z';
3385  // Drop the "avx512.mask." to make it easier.
3386  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3387  bool IsSubAdd = Name[3] == 's';
3388  if (CI->getNumArgOperands() == 5) {
3389  Intrinsic::ID IID;
3390  // Check the character before ".512" in string.
3391  if (Name[Name.size()-5] == 's')
3392  IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3393  else
3394  IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3395 
3396  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3397  CI->getArgOperand(2), CI->getArgOperand(4) };
3398  if (IsSubAdd)
3399  Ops[2] = Builder.CreateFNeg(Ops[2]);
3400 
3401  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3402  Ops);
3403  } else {
3404  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3405 
3406  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3407  CI->getArgOperand(2) };
3408 
3409  Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3410  Ops[0]->getType());
3411  Value *Odd = Builder.CreateCall(FMA, Ops);
3412  Ops[2] = Builder.CreateFNeg(Ops[2]);
3413  Value *Even = Builder.CreateCall(FMA, Ops);
3414 
3415  if (IsSubAdd)
3416  std::swap(Even, Odd);
3417 
3418  SmallVector<int, 32> Idxs(NumElts);
3419  for (int i = 0; i != NumElts; ++i)
3420  Idxs[i] = i + (i % 2) * NumElts;
3421 
3422  Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3423  }
3424 
3425  Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3426  IsMask3 ? CI->getArgOperand(2) :
3427  CI->getArgOperand(0);
3428 
3429  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3430  } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3431  Name.startswith("avx512.maskz.pternlog."))) {
3432  bool ZeroMask = Name[11] == 'z';
3433  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3434  unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3435  Intrinsic::ID IID;
3436  if (VecWidth == 128 && EltWidth == 32)
3437  IID = Intrinsic::x86_avx512_pternlog_d_128;
3438  else if (VecWidth == 256 && EltWidth == 32)
3439  IID = Intrinsic::x86_avx512_pternlog_d_256;
3440  else if (VecWidth == 512 && EltWidth == 32)
3441  IID = Intrinsic::x86_avx512_pternlog_d_512;
3442  else if (VecWidth == 128 && EltWidth == 64)
3443  IID = Intrinsic::x86_avx512_pternlog_q_128;
3444  else if (VecWidth == 256 && EltWidth == 64)
3445  IID = Intrinsic::x86_avx512_pternlog_q_256;
3446  else if (VecWidth == 512 && EltWidth == 64)
3447  IID = Intrinsic::x86_avx512_pternlog_q_512;
3448  else
3449  llvm_unreachable("Unexpected intrinsic");
3450 
3451  Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3452  CI->getArgOperand(2), CI->getArgOperand(3) };
3453  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3454  Args);
3455  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3456  : CI->getArgOperand(0);
3457  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3458  } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3459  Name.startswith("avx512.maskz.vpmadd52"))) {
3460  bool ZeroMask = Name[11] == 'z';
3461  bool High = Name[20] == 'h' || Name[21] == 'h';
3462  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3463  Intrinsic::ID IID;
3464  if (VecWidth == 128 && !High)
3465  IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3466  else if (VecWidth == 256 && !High)
3467  IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3468  else if (VecWidth == 512 && !High)
3469  IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3470  else if (VecWidth == 128 && High)
3471  IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3472  else if (VecWidth == 256 && High)
3473  IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3474  else if (VecWidth == 512 && High)
3475  IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3476  else
3477  llvm_unreachable("Unexpected intrinsic");
3478 
3479  Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3480  CI->getArgOperand(2) };
3481  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3482  Args);
3483  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3484  : CI->getArgOperand(0);
3485  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3486  } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3487  Name.startswith("avx512.mask.vpermt2var.") ||
3488  Name.startswith("avx512.maskz.vpermt2var."))) {
3489  bool ZeroMask = Name[11] == 'z';
3490  bool IndexForm = Name[17] == 'i';
3491  Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3492  } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3493  Name.startswith("avx512.maskz.vpdpbusd.") ||
3494  Name.startswith("avx512.mask.vpdpbusds.") ||
3495  Name.startswith("avx512.maskz.vpdpbusds."))) {
3496  bool ZeroMask = Name[11] == 'z';
3497  bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3498  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3499  Intrinsic::ID IID;
3500  if (VecWidth == 128 && !IsSaturating)
3501  IID = Intrinsic::x86_avx512_vpdpbusd_128;
3502  else if (VecWidth == 256 && !IsSaturating)
3503  IID = Intrinsic::x86_avx512_vpdpbusd_256;
3504  else if (VecWidth == 512 && !IsSaturating)
3505  IID = Intrinsic::x86_avx512_vpdpbusd_512;
3506  else if (VecWidth == 128 && IsSaturating)
3507  IID = Intrinsic::x86_avx512_vpdpbusds_128;
3508  else if (VecWidth == 256 && IsSaturating)
3509  IID = Intrinsic::x86_avx512_vpdpbusds_256;
3510  else if (VecWidth == 512 && IsSaturating)
3511  IID = Intrinsic::x86_avx512_vpdpbusds_512;
3512  else
3513  llvm_unreachable("Unexpected intrinsic");
3514 
3515  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3516  CI->getArgOperand(2) };
3517  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3518  Args);
3519  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3520  : CI->getArgOperand(0);
3521  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3522  } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3523  Name.startswith("avx512.maskz.vpdpwssd.") ||
3524  Name.startswith("avx512.mask.vpdpwssds.") ||
3525  Name.startswith("avx512.maskz.vpdpwssds."))) {
3526  bool ZeroMask = Name[11] == 'z';
3527  bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3528  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3529  Intrinsic::ID IID;
3530  if (VecWidth == 128 && !IsSaturating)
3531  IID = Intrinsic::x86_avx512_vpdpwssd_128;
3532  else if (VecWidth == 256 && !IsSaturating)
3533  IID = Intrinsic::x86_avx512_vpdpwssd_256;
3534  else if (VecWidth == 512 && !IsSaturating)
3535  IID = Intrinsic::x86_avx512_vpdpwssd_512;
3536  else if (VecWidth == 128 && IsSaturating)
3537  IID = Intrinsic::x86_avx512_vpdpwssds_128;
3538  else if (VecWidth == 256 && IsSaturating)
3539  IID = Intrinsic::x86_avx512_vpdpwssds_256;
3540  else if (VecWidth == 512 && IsSaturating)
3541  IID = Intrinsic::x86_avx512_vpdpwssds_512;
3542  else
3543  llvm_unreachable("Unexpected intrinsic");
3544 
3545  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3546  CI->getArgOperand(2) };
3547  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3548  Args);
3549  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3550  : CI->getArgOperand(0);
3551  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3552  } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3553  Name == "addcarry.u32" || Name == "addcarry.u64" ||
3554  Name == "subborrow.u32" || Name == "subborrow.u64")) {
3555  Intrinsic::ID IID;
3556  if (Name[0] == 'a' && Name.back() == '2')
3557  IID = Intrinsic::x86_addcarry_32;
3558  else if (Name[0] == 'a' && Name.back() == '4')
3559  IID = Intrinsic::x86_addcarry_64;
3560  else if (Name[0] == 's' && Name.back() == '2')
3561  IID = Intrinsic::x86_subborrow_32;
3562  else if (Name[0] == 's' && Name.back() == '4')
3563  IID = Intrinsic::x86_subborrow_64;
3564  else
3565  llvm_unreachable("Unexpected intrinsic");
3566 
3567  // Make a call with 3 operands.
3568  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3569  CI->getArgOperand(2)};
3570  Value *NewCall = Builder.CreateCall(
3572  Args);
3573 
3574  // Extract the second result and store it.
3575  Value *Data = Builder.CreateExtractValue(NewCall, 1);
3576  // Cast the pointer to the right type.
3577  Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3578  llvm::PointerType::getUnqual(Data->getType()));
3579  Builder.CreateAlignedStore(Data, Ptr, Align(1));
3580  // Replace the original call result with the first result of the new call.
3581  Value *CF = Builder.CreateExtractValue(NewCall, 0);
3582 
3583  CI->replaceAllUsesWith(CF);
3584  Rep = nullptr;
3585  } else if (IsX86 && Name.startswith("avx512.mask.") &&
3586  upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3587  // Rep will be updated by the call in the condition.
3588  } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3589  Value *Arg = CI->getArgOperand(0);
3590  Value *Neg = Builder.CreateNeg(Arg, "neg");
3591  Value *Cmp = Builder.CreateICmpSGE(
3592  Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3593  Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3594  } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3595  Name.startswith("atomic.load.add.f64.p"))) {
3596  Value *Ptr = CI->getArgOperand(0);
3597  Value *Val = CI->getArgOperand(1);
3598  Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
3600  } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3601  Name == "max.ui" || Name == "max.ull")) {
3602  Value *Arg0 = CI->getArgOperand(0);
3603  Value *Arg1 = CI->getArgOperand(1);
3604  Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3605  ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3606  : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3607  Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3608  } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3609  Name == "min.ui" || Name == "min.ull")) {
3610  Value *Arg0 = CI->getArgOperand(0);
3611  Value *Arg1 = CI->getArgOperand(1);
3612  Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3613  ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3614  : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3615  Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3616  } else if (IsNVVM && Name == "clz.ll") {
3617  // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3618  Value *Arg = CI->getArgOperand(0);
3619  Value *Ctlz = Builder.CreateCall(
3620  Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3621  {Arg->getType()}),
3622  {Arg, Builder.getFalse()}, "ctlz");
3623  Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3624  } else if (IsNVVM && Name == "popc.ll") {
3625  // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3626  // i64.
3627  Value *Arg = CI->getArgOperand(0);
3628  Value *Popc = Builder.CreateCall(
3629  Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3630  {Arg->getType()}),
3631  Arg, "ctpop");
3632  Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3633  } else if (IsNVVM && Name == "h2f") {
3634  Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3635  F->getParent(), Intrinsic::convert_from_fp16,
3636  {Builder.getFloatTy()}),
3637  CI->getArgOperand(0), "h2f");
3638  } else {
3639  llvm_unreachable("Unknown function for CallInst upgrade.");
3640  }
3641 
3642  if (Rep)
3643  CI->replaceAllUsesWith(Rep);
3644  CI->eraseFromParent();
3645  return;
3646  }
3647 
3648  const auto &DefaultCase = [&NewFn, &CI]() -> void {
3649  // Handle generic mangling change, but nothing else
3650  assert(
3651  (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3652  "Unknown function for CallInst upgrade and isn't just a name change");
3653  CI->setCalledFunction(NewFn);
3654  };
3655  CallInst *NewCall = nullptr;
3656  switch (NewFn->getIntrinsicID()) {
3657  default: {
3658  DefaultCase();
3659  return;
3660  }
3661  case Intrinsic::arm_neon_vld1:
3662  case Intrinsic::arm_neon_vld2:
3663  case Intrinsic::arm_neon_vld3:
3664  case Intrinsic::arm_neon_vld4:
3665  case Intrinsic::arm_neon_vld2lane:
3666  case Intrinsic::arm_neon_vld3lane:
3667  case Intrinsic::arm_neon_vld4lane:
3668  case Intrinsic::arm_neon_vst1:
3669  case Intrinsic::arm_neon_vst2:
3670  case Intrinsic::arm_neon_vst3:
3671  case Intrinsic::arm_neon_vst4:
3672  case Intrinsic::arm_neon_vst2lane:
3673  case Intrinsic::arm_neon_vst3lane:
3674  case Intrinsic::arm_neon_vst4lane: {
3676  CI->arg_operands().end());
3677  NewCall = Builder.CreateCall(NewFn, Args);
3678  break;
3679  }
3680 
3681  case Intrinsic::arm_neon_bfdot:
3682  case Intrinsic::arm_neon_bfmmla:
3683  case Intrinsic::arm_neon_bfmlalb:
3684  case Intrinsic::arm_neon_bfmlalt:
3685  case Intrinsic::aarch64_neon_bfdot:
3686  case Intrinsic::aarch64_neon_bfmmla:
3687  case Intrinsic::aarch64_neon_bfmlalb:
3688  case Intrinsic::aarch64_neon_bfmlalt: {
3690  assert(CI->getNumArgOperands() == 3 &&
3691  "Mismatch between function args and call args");
3692  size_t OperandWidth =
3694  assert((OperandWidth == 64 || OperandWidth == 128) &&
3695  "Unexpected operand width");
3696  Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
3697  auto Iter = CI->arg_operands().begin();
3698  Args.push_back(*Iter++);
3699  Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3700  Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3701  NewCall = Builder.CreateCall(NewFn, Args);
3702  break;
3703  }
3704 
3705  case Intrinsic::bitreverse:
3706  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3707  break;
3708 
3709  case Intrinsic::ctlz:
3710  case Intrinsic::cttz:
3711  assert(CI->getNumArgOperands() == 1 &&
3712  "Mismatch between function args and call args");
3713  NewCall =
3714  Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3715  break;
3716 
3717  case Intrinsic::objectsize: {
3718  Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3719  ? Builder.getFalse()
3720  : CI->getArgOperand(2);
3721  Value *Dynamic =
3722  CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3723  NewCall = Builder.CreateCall(
3724  NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3725  break;
3726  }
3727 
3728  case Intrinsic::ctpop:
3729  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3730  break;
3731 
3732  case Intrinsic::convert_from_fp16:
3733  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3734  break;
3735 
3736  case Intrinsic::dbg_value:
3737  // Upgrade from the old version that had an extra offset argument.
3738  assert(CI->getNumArgOperands() == 4);
3739  // Drop nonzero offsets instead of attempting to upgrade them.
3740  if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3741  if (Offset->isZeroValue()) {
3742  NewCall = Builder.CreateCall(
3743  NewFn,
3744  {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3745  break;
3746  }
3747  CI->eraseFromParent();
3748  return;
3749 
3750  case Intrinsic::ptr_annotation:
3751  // Upgrade from versions that lacked the annotation attribute argument.
3752  assert(CI->getNumArgOperands() == 4 &&
3753  "Before LLVM 12.0 this intrinsic took four arguments");
3754  // Create a new call with an added null annotation attribute argument.
3755  NewCall = Builder.CreateCall(
3756  NewFn,
3757  {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3758  CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3759  NewCall->takeName(CI);
3760  CI->replaceAllUsesWith(NewCall);
3761  CI->eraseFromParent();
3762  return;
3763 
3764  case Intrinsic::var_annotation:
3765  // Upgrade from versions that lacked the annotation attribute argument.
3766  assert(CI->getNumArgOperands() == 4 &&
3767  "Before LLVM 12.0 this intrinsic took four arguments");
3768  // Create a new call with an added null annotation attribute argument.
3769  NewCall = Builder.CreateCall(
3770  NewFn,
3771  {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3772  CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3773  CI->eraseFromParent();
3774  return;
3775 
3776  case Intrinsic::x86_xop_vfrcz_ss:
3777  case Intrinsic::x86_xop_vfrcz_sd:
3778  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3779  break;
3780 
3781  case Intrinsic::x86_xop_vpermil2pd:
3782  case Intrinsic::x86_xop_vpermil2ps:
3783  case Intrinsic::x86_xop_vpermil2pd_256:
3784  case Intrinsic::x86_xop_vpermil2ps_256: {
3786  CI->arg_operands().end());
3787  VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3788  VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3789  Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3790  NewCall = Builder.CreateCall(NewFn, Args);
3791  break;
3792  }
3793 
3794  case Intrinsic::x86_sse41_ptestc:
3795  case Intrinsic::x86_sse41_ptestz:
3796  case Intrinsic::x86_sse41_ptestnzc: {
3797  // The arguments for these intrinsics used to be v4f32, and changed
3798  // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3799  // So, the only thing required is a bitcast for both arguments.
3800  // First, check the arguments have the old type.
3801  Value *Arg0 = CI->getArgOperand(0);
3802  if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
3803  return;
3804 
3805  // Old intrinsic, add bitcasts
3806  Value *Arg1 = CI->getArgOperand(1);
3807 
3808  auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
3809 
3810  Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3811  Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3812 
3813  NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3814  break;
3815  }
3816 
3817  case Intrinsic::x86_rdtscp: {
3818  // This used to take 1 arguments. If we have no arguments, it is already
3819  // upgraded.
3820  if (CI->getNumOperands() == 0)
3821  return;
3822 
3823  NewCall = Builder.CreateCall(NewFn);
3824  // Extract the second result and store it.
3825  Value *Data = Builder.CreateExtractValue(NewCall, 1);
3826  // Cast the pointer to the right type.
3827  Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3828  llvm::PointerType::getUnqual(Data->getType()));
3829  Builder.CreateAlignedStore(Data, Ptr, Align(1));
3830  // Replace the original call result with the first result of the new call.
3831  Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3832 
3833  NewCall->takeName(CI);
3834  CI->replaceAllUsesWith(TSC);
3835  CI->eraseFromParent();
3836  return;
3837  }
3838 
3839  case Intrinsic::x86_sse41_insertps:
3840  case Intrinsic::x86_sse41_dppd:
3841  case Intrinsic::x86_sse41_dpps:
3842  case Intrinsic::x86_sse41_mpsadbw:
3843  case Intrinsic::x86_avx_dp_ps_256:
3844  case Intrinsic::x86_avx2_mpsadbw: {
3845  // Need to truncate the last argument from i32 to i8 -- this argument models
3846  // an inherently 8-bit immediate operand to these x86 instructions.
3848  CI->arg_operands().end());
3849 
3850  // Replace the last argument with a trunc.
3851  Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3852  NewCall = Builder.CreateCall(NewFn, Args);
3853  break;
3854  }
3855 
3856  case Intrinsic::x86_avx512_mask_cmp_pd_128:
3857  case Intrinsic::x86_avx512_mask_cmp_pd_256:
3858  case Intrinsic::x86_avx512_mask_cmp_pd_512:
3859  case Intrinsic::x86_avx512_mask_cmp_ps_128:
3860  case Intrinsic::x86_avx512_mask_cmp_ps_256:
3861  case Intrinsic::x86_avx512_mask_cmp_ps_512: {
3863  CI->arg_operands().end());
3864  unsigned NumElts =
3865  cast<FixedVectorType>(Args[0]->getType())->getNumElements();
3866  Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
3867 
3868  NewCall = Builder.CreateCall(NewFn, Args);
3869  Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
3870 
3871  NewCall->takeName(CI);
3872  CI->replaceAllUsesWith(Res);
3873  CI->eraseFromParent();
3874  return;
3875  }
3876 
3877  case Intrinsic::thread_pointer: {
3878  NewCall = Builder.CreateCall(NewFn, {});
3879  break;
3880  }
3881 
3882  case Intrinsic::invariant_start:
3883  case Intrinsic::invariant_end:
3884  case Intrinsic::masked_load:
3885  case Intrinsic::masked_store:
3886  case Intrinsic::masked_gather:
3887  case Intrinsic::masked_scatter: {
3889  CI->arg_operands().end());
3890  NewCall = Builder.CreateCall(NewFn, Args);
3891  break;
3892  }
3893 
3894  case Intrinsic::memcpy:
3895  case Intrinsic::memmove:
3896  case Intrinsic::memset: {
3897  // We have to make sure that the call signature is what we're expecting.
3898  // We only want to change the old signatures by removing the alignment arg:
3899  // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3900  // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3901  // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3902  // -> @llvm.memset...(i8*, i8, i[32|64], i1)
3903  // Note: i8*'s in the above can be any pointer type
3904  if (CI->getNumArgOperands() != 5) {
3905  DefaultCase();
3906  return;
3907  }
3908  // Remove alignment argument (3), and add alignment attributes to the
3909  // dest/src pointers.
3910  Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3911  CI->getArgOperand(2), CI->getArgOperand(4)};
3912  NewCall = Builder.CreateCall(NewFn, Args);
3913  auto *MemCI = cast<MemIntrinsic>(NewCall);
3914  // All mem intrinsics support dest alignment.
3915  const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3916  MemCI->setDestAlignment(Align->getMaybeAlignValue());
3917  // Memcpy/Memmove also support source alignment.
3918  if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3919  MTI->setSourceAlignment(Align->getMaybeAlignValue());
3920  break;
3921  }
3922  }
3923  assert(NewCall && "Should have either set this variable or returned through "
3924  "the default case");
3925  NewCall->takeName(CI);
3926  CI->replaceAllUsesWith(NewCall);
3927  CI->eraseFromParent();
3928 }
3929 
3931  assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3932 
3933  // Check if this function should be upgraded and get the replacement function
3934  // if there is one.
3935  Function *NewFn;
3936  if (UpgradeIntrinsicFunction(F, NewFn)) {
3937  // Replace all users of the old function with the new function or new
3938  // instructions. This is not a range loop because the call is deleted.
3939  for (User *U : make_early_inc_range(F->users()))
3940  if (CallInst *CI = dyn_cast<CallInst>(U))
3941  UpgradeIntrinsicCall(CI, NewFn);
3942 
3943  // Remove old function, no longer used, from the module.
3944  F->eraseFromParent();
3945  }
3946 }
3947 
3949  // Check if the tag uses struct-path aware TBAA format.
3950  if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3951  return &MD;
3952 
3953  auto &Context = MD.getContext();
3954  if (MD.getNumOperands() == 3) {
3955  Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3956  MDNode *ScalarType = MDNode::get(Context, Elts);
3957  // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3958  Metadata *Elts2[] = {ScalarType, ScalarType,
3961  MD.getOperand(2)};
3962  return MDNode::get(Context, Elts2);
3963  }
3964  // Create a MDNode <MD, MD, offset 0>
3967  return MDNode::get(Context, Elts);
3968 }
3969 
3970 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3971  Instruction *&Temp) {
3972  if (Opc != Instruction::BitCast)
3973  return nullptr;
3974 
3975  Temp = nullptr;
3976  Type *SrcTy = V->getType();
3977  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3978  SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3979  LLVMContext &Context = V->getContext();
3980 
3981  // We have no information about target data layout, so we assume that
3982  // the maximum pointer size is 64bit.
3983  Type *MidTy = Type::getInt64Ty(Context);
3984  Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3985 
3986  return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3987  }
3988 
3989  return nullptr;
3990 }
3991 
3992 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3993  if (Opc != Instruction::BitCast)
3994  return nullptr;
3995 
3996  Type *SrcTy = C->getType();
3997  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3998  SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3999  LLVMContext &Context = C->getContext();
4000 
4001  // We have no information about target data layout, so we assume that
4002  // the maximum pointer size is 64bit.
4003  Type *MidTy = Type::getInt64Ty(Context);
4004 
4006  DestTy);
4007  }
4008 
4009  return nullptr;
4010 }
4011 
4012 /// Check the debug info version number, if it is out-dated, drop the debug
4013 /// info. Return true if module is modified.
4017  bool BrokenDebugInfo = false;
4018  if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4019  report_fatal_error("Broken module found, compilation aborted!");
4020  if (!BrokenDebugInfo)
4021  // Everything is ok.
4022  return false;
4023  else {
4024  // Diagnose malformed debug info.
4026  M.getContext().diagnose(Diag);
4027  }
4028  }
4029  bool Modified = StripDebugInfo(M);
4031  // Diagnose a version mismatch.
4033  M.getContext().diagnose(DiagVersion);
4034  }
4035  return Modified;
4036 }
4037 
4038 /// This checks for objc retain release marker which should be upgraded. It
4039 /// returns true if module is modified.
4041  bool Changed = false;
4042  const char *MarkerKey = objcarc::getRVMarkerModuleFlagStr();
4043  NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4044  if (ModRetainReleaseMarker) {
4045  MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4046  if (Op) {
4047  MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4048  if (ID) {
4049  SmallVector<StringRef, 4> ValueComp;
4050  ID->getString().split(ValueComp, "#");
4051  if (ValueComp.size() == 2) {
4052  std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4053  ID = MDString::get(M.getContext(), NewValue);
4054  }
4055  M.addModuleFlag(Module::Error, MarkerKey, ID);
4056  M.eraseNamedMetadata(ModRetainReleaseMarker);
4057  Changed = true;
4058  }
4059  }
4060  }
4061  return Changed;
4062 }
4063 
4065  // This lambda converts normal function calls to ARC runtime functions to
4066  // intrinsic calls.
4067  auto UpgradeToIntrinsic = [&](const char *OldFunc,
4068  llvm::Intrinsic::ID IntrinsicFunc) {
4069  Function *Fn = M.getFunction(OldFunc);
4070 
4071  if (!Fn)
4072  return;
4073 
4074  Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4075 
4076  for (User *U : make_early_inc_range(Fn->users())) {
4077  CallInst *CI = dyn_cast<CallInst>(U);
4078  if (!CI || CI->getCalledFunction() != Fn)
4079  continue;
4080 
4081  IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4082  FunctionType *NewFuncTy = NewFn->getFunctionType();
4084 
4085  // Don't upgrade the intrinsic if it's not valid to bitcast the return
4086  // value to the return type of the old function.
4087  if (NewFuncTy->getReturnType() != CI->getType() &&
4088  !CastInst::castIsValid(Instruction::BitCast, CI,
4089  NewFuncTy->getReturnType()))
4090  continue;
4091 
4092  bool InvalidCast = false;
4093 
4094  for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
4095  Value *Arg = CI->getArgOperand(I);
4096 
4097  // Bitcast argument to the parameter type of the new function if it's
4098  // not a variadic argument.
4099  if (I < NewFuncTy->getNumParams()) {
4100  // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4101  // to the parameter type of the new function.
4102  if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4103  NewFuncTy->getParamType(I))) {
4104  InvalidCast = true;
4105  break;
4106  }
4107  Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4108  }
4109  Args.push_back(Arg);
4110  }
4111 
4112  if (InvalidCast)
4113  continue;
4114 
4115  // Create a call instruction that calls the new function.
4116  CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4117  NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4118  NewCall->takeName(CI);
4119 
4120  // Bitcast the return value back to the type of the old call.
4121  Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4122 
4123  if (!CI->use_empty())
4124  CI->replaceAllUsesWith(NewRetVal);
4125  CI->eraseFromParent();
4126  }
4127 
4128  if (Fn->use_empty())
4129  Fn->eraseFromParent();
4130  };
4131 
4132  // Unconditionally convert a call to "clang.arc.use" to a call to
4133  // "llvm.objc.clang.arc.use".
4134  UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
4135 
4136  // Upgrade the retain release marker. If there is no need to upgrade
4137  // the marker, that means either the module is already new enough to contain
4138  // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
4140  return;
4141 
4142  std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
4143  {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
4144  {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
4145  {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
4146  {"objc_autoreleaseReturnValue",
4147  llvm::Intrinsic::objc_autoreleaseReturnValue},
4148  {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
4149  {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
4150  {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
4151  {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
4152  {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
4153  {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
4154  {"objc_release", llvm::Intrinsic::objc_release},
4155  {"objc_retain", llvm::Intrinsic::objc_retain},
4156  {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
4157  {"objc_retainAutoreleaseReturnValue",
4158  llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
4159  {"objc_retainAutoreleasedReturnValue",
4160  llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
4161  {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
4162  {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
4163  {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
4164  {"objc_unsafeClaimAutoreleasedReturnValue",
4165  llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
4166  {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
4167  {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
4168  {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
4169  {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
4170  {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
4171  {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
4172  {"objc_arc_annotation_topdown_bbstart",
4173  llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
4174  {"objc_arc_annotation_topdown_bbend",
4175  llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
4176  {"objc_arc_annotation_bottomup_bbstart",
4177  llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
4178  {"objc_arc_annotation_bottomup_bbend",
4179  llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
4180 
4181  for (auto &I : RuntimeFuncs)
4182  UpgradeToIntrinsic(I.first, I.second);
4183 }
4184 
4186  NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4187  if (!ModFlags)
4188  return false;
4189 
4190  bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4191  bool HasSwiftVersionFlag = false;
4192  uint8_t SwiftMajorVersion, SwiftMinorVersion;
4193  uint32_t SwiftABIVersion;
4194  auto Int8Ty = Type::getInt8Ty(M.getContext());
4195  auto Int32Ty = Type::getInt32Ty(M.getContext());
4196 
4197  for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4198  MDNode *Op = ModFlags->getOperand(I);
4199  if (Op->getNumOperands() != 3)
4200  continue;
4201  MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4202  if (!ID)
4203  continue;
4204  if (ID->getString() == "Objective-C Image Info Version")
4205  HasObjCFlag = true;
4206  if (ID->getString() == "Objective-C Class Properties")
4207  HasClassProperties = true;
4208  // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
4209  // field was Error and now they are Max.
4210  if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
4211  if (auto *Behavior =
4212  mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4213  if (Behavior->getLimitedValue() == Module::Error) {
4214  Type *Int32Ty = Type::getInt32Ty(M.getContext());
4215  Metadata *Ops[3] = {
4217  MDString::get(M.getContext(), ID->getString()),
4218  Op->getOperand(2)};
4219  ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4220  Changed = true;
4221  }
4222  }
4223  }
4224  // Upgrade Objective-C Image Info Section. Removed the whitespce in the
4225  // section name so that llvm-lto will not complain about mismatching
4226  // module flags that is functionally the same.
4227  if (ID->getString() == "Objective-C Image Info Section") {
4228  if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
4229  SmallVector<StringRef, 4> ValueComp;
4230  Value->getString().split(ValueComp, " ");
4231  if (ValueComp.size() != 1) {
4232  std::string NewValue;
4233  for (auto &S : ValueComp)
4234  NewValue += S.str();
4235  Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
4236  MDString::get(M.getContext(), NewValue)};
4237  ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4238  Changed = true;
4239  }
4240  }
4241  }
4242 
4243  // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
4244  // If the higher bits are set, it adds new module flag for swift info.
4245  if (ID->getString() == "Objective-C Garbage Collection") {
4246  auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
4247  if (Md) {
4248  assert(Md->getValue() && "Expected non-empty metadata");
4249  auto Type = Md->getValue()->getType();
4250  if (Type == Int8Ty)
4251  continue;
4252  unsigned