LLVM  14.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/InstVisitor.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/Intrinsics.h"
27 #include "llvm/IR/IntrinsicsAArch64.h"
28 #include "llvm/IR/IntrinsicsARM.h"
29 #include "llvm/IR/IntrinsicsX86.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Verifier.h"
34 #include "llvm/Support/Regex.h"
35 #include <cstring>
36 using namespace llvm;
37 
38 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
39 
40 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
41 // changed their type from v4f32 to v2i64.
43  Function *&NewFn) {
44  // Check whether this is an old version of the function, which received
45  // v4f32 arguments.
46  Type *Arg0Type = F->getFunctionType()->getParamType(0);
47  if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
48  return false;
49 
50  // Yes, it's old, replace it with new version.
51  rename(F);
52  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
53  return true;
54 }
55 
56 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
57 // arguments have changed their type from i32 to i8.
59  Function *&NewFn) {
60  // Check that the last argument is an i32.
61  Type *LastArgType = F->getFunctionType()->getParamType(
62  F->getFunctionType()->getNumParams() - 1);
63  if (!LastArgType->isIntegerTy(32))
64  return false;
65 
66  // Move this function aside and map down.
67  rename(F);
68  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
69  return true;
70 }
71 
72 // Upgrade the declaration of fp compare intrinsics that change return type
73 // from scalar to vXi1 mask.
75  Function *&NewFn) {
76  // Check if the return type is a vector.
77  if (F->getReturnType()->isVectorTy())
78  return false;
79 
80  rename(F);
81  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
82  return true;
83 }
84 
86  // All of the intrinsics matches below should be marked with which llvm
87  // version started autoupgrading them. At some point in the future we would
88  // like to use this information to remove upgrade code for some older
89  // intrinsics. It is currently undecided how we will determine that future
90  // point.
91  if (Name == "addcarryx.u32" || // Added in 8.0
92  Name == "addcarryx.u64" || // Added in 8.0
93  Name == "addcarry.u32" || // Added in 8.0
94  Name == "addcarry.u64" || // Added in 8.0
95  Name == "subborrow.u32" || // Added in 8.0
96  Name == "subborrow.u64" || // Added in 8.0
97  Name.startswith("sse2.padds.") || // Added in 8.0
98  Name.startswith("sse2.psubs.") || // Added in 8.0
99  Name.startswith("sse2.paddus.") || // Added in 8.0
100  Name.startswith("sse2.psubus.") || // Added in 8.0
101  Name.startswith("avx2.padds.") || // Added in 8.0
102  Name.startswith("avx2.psubs.") || // Added in 8.0
103  Name.startswith("avx2.paddus.") || // Added in 8.0
104  Name.startswith("avx2.psubus.") || // Added in 8.0
105  Name.startswith("avx512.padds.") || // Added in 8.0
106  Name.startswith("avx512.psubs.") || // Added in 8.0
107  Name.startswith("avx512.mask.padds.") || // Added in 8.0
108  Name.startswith("avx512.mask.psubs.") || // Added in 8.0
109  Name.startswith("avx512.mask.paddus.") || // Added in 8.0
110  Name.startswith("avx512.mask.psubus.") || // Added in 8.0
111  Name=="ssse3.pabs.b.128" || // Added in 6.0
112  Name=="ssse3.pabs.w.128" || // Added in 6.0
113  Name=="ssse3.pabs.d.128" || // Added in 6.0
114  Name.startswith("fma4.vfmadd.s") || // Added in 7.0
115  Name.startswith("fma.vfmadd.") || // Added in 7.0
116  Name.startswith("fma.vfmsub.") || // Added in 7.0
117  Name.startswith("fma.vfmsubadd.") || // Added in 7.0
118  Name.startswith("fma.vfnmadd.") || // Added in 7.0
119  Name.startswith("fma.vfnmsub.") || // Added in 7.0
120  Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
121  Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
122  Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
123  Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
124  Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
125  Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
126  Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
127  Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
128  Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
129  Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
130  Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
131  Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
132  Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
133  Name.startswith("avx512.kunpck") || //added in 6.0
134  Name.startswith("avx2.pabs.") || // Added in 6.0
135  Name.startswith("avx512.mask.pabs.") || // Added in 6.0
136  Name.startswith("avx512.broadcastm") || // Added in 6.0
137  Name == "sse.sqrt.ss" || // Added in 7.0
138  Name == "sse2.sqrt.sd" || // Added in 7.0
139  Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
140  Name.startswith("avx.sqrt.p") || // Added in 7.0
141  Name.startswith("sse2.sqrt.p") || // Added in 7.0
142  Name.startswith("sse.sqrt.p") || // Added in 7.0
143  Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
144  Name.startswith("sse2.pcmpeq.") || // Added in 3.1
145  Name.startswith("sse2.pcmpgt.") || // Added in 3.1
146  Name.startswith("avx2.pcmpeq.") || // Added in 3.1
147  Name.startswith("avx2.pcmpgt.") || // Added in 3.1
148  Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
149  Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
150  Name.startswith("avx.vperm2f128.") || // Added in 6.0
151  Name == "avx2.vperm2i128" || // Added in 6.0
152  Name == "sse.add.ss" || // Added in 4.0
153  Name == "sse2.add.sd" || // Added in 4.0
154  Name == "sse.sub.ss" || // Added in 4.0
155  Name == "sse2.sub.sd" || // Added in 4.0
156  Name == "sse.mul.ss" || // Added in 4.0
157  Name == "sse2.mul.sd" || // Added in 4.0
158  Name == "sse.div.ss" || // Added in 4.0
159  Name == "sse2.div.sd" || // Added in 4.0
160  Name == "sse41.pmaxsb" || // Added in 3.9
161  Name == "sse2.pmaxs.w" || // Added in 3.9
162  Name == "sse41.pmaxsd" || // Added in 3.9
163  Name == "sse2.pmaxu.b" || // Added in 3.9
164  Name == "sse41.pmaxuw" || // Added in 3.9
165  Name == "sse41.pmaxud" || // Added in 3.9
166  Name == "sse41.pminsb" || // Added in 3.9
167  Name == "sse2.pmins.w" || // Added in 3.9
168  Name == "sse41.pminsd" || // Added in 3.9
169  Name == "sse2.pminu.b" || // Added in 3.9
170  Name == "sse41.pminuw" || // Added in 3.9
171  Name == "sse41.pminud" || // Added in 3.9
172  Name == "avx512.kand.w" || // Added in 7.0
173  Name == "avx512.kandn.w" || // Added in 7.0
174  Name == "avx512.knot.w" || // Added in 7.0
175  Name == "avx512.kor.w" || // Added in 7.0
176  Name == "avx512.kxor.w" || // Added in 7.0
177  Name == "avx512.kxnor.w" || // Added in 7.0
178  Name == "avx512.kortestc.w" || // Added in 7.0
179  Name == "avx512.kortestz.w" || // Added in 7.0
180  Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
181  Name.startswith("avx2.pmax") || // Added in 3.9
182  Name.startswith("avx2.pmin") || // Added in 3.9
183  Name.startswith("avx512.mask.pmax") || // Added in 4.0
184  Name.startswith("avx512.mask.pmin") || // Added in 4.0
185  Name.startswith("avx2.vbroadcast") || // Added in 3.8
186  Name.startswith("avx2.pbroadcast") || // Added in 3.8
187  Name.startswith("avx.vpermil.") || // Added in 3.1
188  Name.startswith("sse2.pshuf") || // Added in 3.9
189  Name.startswith("avx512.pbroadcast") || // Added in 3.9
190  Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
191  Name.startswith("avx512.mask.movddup") || // Added in 3.9
192  Name.startswith("avx512.mask.movshdup") || // Added in 3.9
193  Name.startswith("avx512.mask.movsldup") || // Added in 3.9
194  Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
195  Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
196  Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
197  Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
198  Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
199  Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
200  Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
201  Name.startswith("avx512.mask.punpckl") || // Added in 3.9
202  Name.startswith("avx512.mask.punpckh") || // Added in 3.9
203  Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
204  Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
205  Name.startswith("avx512.mask.pand.") || // Added in 3.9
206  Name.startswith("avx512.mask.pandn.") || // Added in 3.9
207  Name.startswith("avx512.mask.por.") || // Added in 3.9
208  Name.startswith("avx512.mask.pxor.") || // Added in 3.9
209  Name.startswith("avx512.mask.and.") || // Added in 3.9
210  Name.startswith("avx512.mask.andn.") || // Added in 3.9
211  Name.startswith("avx512.mask.or.") || // Added in 3.9
212  Name.startswith("avx512.mask.xor.") || // Added in 3.9
213  Name.startswith("avx512.mask.padd.") || // Added in 4.0
214  Name.startswith("avx512.mask.psub.") || // Added in 4.0
215  Name.startswith("avx512.mask.pmull.") || // Added in 4.0
216  Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
217  Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
218  Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
219  Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
220  Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
221  Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
222  Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
223  Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
224  Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
225  Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
226  Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
227  Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
228  Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
229  Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
230  Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
231  Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
232  Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
233  Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
234  Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
235  Name == "avx512.cvtusi2sd" || // Added in 7.0
236  Name.startswith("avx512.mask.permvar.") || // Added in 7.0
237  Name == "sse2.pmulu.dq" || // Added in 7.0
238  Name == "sse41.pmuldq" || // Added in 7.0
239  Name == "avx2.pmulu.dq" || // Added in 7.0
240  Name == "avx2.pmul.dq" || // Added in 7.0
241  Name == "avx512.pmulu.dq.512" || // Added in 7.0
242  Name == "avx512.pmul.dq.512" || // Added in 7.0
243  Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
244  Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
245  Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
246  Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
247  Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
248  Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
249  Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
250  Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
251  Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
252  Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
253  Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
254  Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
255  Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
256  Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
257  Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
258  Name.startswith("avx512.cmp.p") || // Added in 12.0
259  Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
260  Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
261  Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
262  Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
263  Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
264  Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
265  Name.startswith("avx512.mask.psll.d") || // Added in 4.0
266  Name.startswith("avx512.mask.psll.q") || // Added in 4.0
267  Name.startswith("avx512.mask.psll.w") || // Added in 4.0
268  Name.startswith("avx512.mask.psra.d") || // Added in 4.0
269  Name.startswith("avx512.mask.psra.q") || // Added in 4.0
270  Name.startswith("avx512.mask.psra.w") || // Added in 4.0
271  Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
272  Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
273  Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
274  Name.startswith("avx512.mask.pslli") || // Added in 4.0
275  Name.startswith("avx512.mask.psrai") || // Added in 4.0
276  Name.startswith("avx512.mask.psrli") || // Added in 4.0
277  Name.startswith("avx512.mask.psllv") || // Added in 4.0
278  Name.startswith("avx512.mask.psrav") || // Added in 4.0
279  Name.startswith("avx512.mask.psrlv") || // Added in 4.0
280  Name.startswith("sse41.pmovsx") || // Added in 3.8
281  Name.startswith("sse41.pmovzx") || // Added in 3.9
282  Name.startswith("avx2.pmovsx") || // Added in 3.9
283  Name.startswith("avx2.pmovzx") || // Added in 3.9
284  Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
285  Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
286  Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
287  Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
288  Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
289  Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
290  Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
291  Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
292  Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
293  Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
294  Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
295  Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
296  Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
297  Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
298  Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
299  Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
300  Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
301  Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
302  Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
303  Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
304  Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
305  Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
306  Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
307  Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
308  Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
309  Name.startswith("avx512.vpshld.") || // Added in 8.0
310  Name.startswith("avx512.vpshrd.") || // Added in 8.0
311  Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
312  Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
313  Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
314  Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
315  Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
316  Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
317  Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
318  Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
319  Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
320  Name.startswith("avx512.mask.conflict.") || // Added in 9.0
321  Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
322  Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
323  Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
324  Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
325  Name == "sse.cvtsi2ss" || // Added in 7.0
326  Name == "sse.cvtsi642ss" || // Added in 7.0
327  Name == "sse2.cvtsi2sd" || // Added in 7.0
328  Name == "sse2.cvtsi642sd" || // Added in 7.0
329  Name == "sse2.cvtss2sd" || // Added in 7.0
330  Name == "sse2.cvtdq2pd" || // Added in 3.9
331  Name == "sse2.cvtdq2ps" || // Added in 7.0
332  Name == "sse2.cvtps2pd" || // Added in 3.9
333  Name == "avx.cvtdq2.pd.256" || // Added in 3.9
334  Name == "avx.cvtdq2.ps.256" || // Added in 7.0
335  Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
336  Name.startswith("vcvtph2ps.") || // Added in 11.0
337  Name.startswith("avx.vinsertf128.") || // Added in 3.7
338  Name == "avx2.vinserti128" || // Added in 3.7
339  Name.startswith("avx512.mask.insert") || // Added in 4.0
340  Name.startswith("avx.vextractf128.") || // Added in 3.7
341  Name == "avx2.vextracti128" || // Added in 3.7
342  Name.startswith("avx512.mask.vextract") || // Added in 4.0
343  Name.startswith("sse4a.movnt.") || // Added in 3.9
344  Name.startswith("avx.movnt.") || // Added in 3.2
345  Name.startswith("avx512.storent.") || // Added in 3.9
346  Name == "sse41.movntdqa" || // Added in 5.0
347  Name == "avx2.movntdqa" || // Added in 5.0
348  Name == "avx512.movntdqa" || // Added in 5.0
349  Name == "sse2.storel.dq" || // Added in 3.9
350  Name.startswith("sse.storeu.") || // Added in 3.9
351  Name.startswith("sse2.storeu.") || // Added in 3.9
352  Name.startswith("avx.storeu.") || // Added in 3.9
353  Name.startswith("avx512.mask.storeu.") || // Added in 3.9
354  Name.startswith("avx512.mask.store.p") || // Added in 3.9
355  Name.startswith("avx512.mask.store.b.") || // Added in 3.9
356  Name.startswith("avx512.mask.store.w.") || // Added in 3.9
357  Name.startswith("avx512.mask.store.d.") || // Added in 3.9
358  Name.startswith("avx512.mask.store.q.") || // Added in 3.9
359  Name == "avx512.mask.store.ss" || // Added in 7.0
360  Name.startswith("avx512.mask.loadu.") || // Added in 3.9
361  Name.startswith("avx512.mask.load.") || // Added in 3.9
362  Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
363  Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
364  Name.startswith("avx512.mask.expand.b") || // Added in 9.0
365  Name.startswith("avx512.mask.expand.w") || // Added in 9.0
366  Name.startswith("avx512.mask.expand.d") || // Added in 9.0
367  Name.startswith("avx512.mask.expand.q") || // Added in 9.0
368  Name.startswith("avx512.mask.expand.p") || // Added in 9.0
369  Name.startswith("avx512.mask.compress.b") || // Added in 9.0
370  Name.startswith("avx512.mask.compress.w") || // Added in 9.0
371  Name.startswith("avx512.mask.compress.d") || // Added in 9.0
372  Name.startswith("avx512.mask.compress.q") || // Added in 9.0
373  Name.startswith("avx512.mask.compress.p") || // Added in 9.0
374  Name == "sse42.crc32.64.8" || // Added in 3.4
375  Name.startswith("avx.vbroadcast.s") || // Added in 3.5
376  Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
377  Name.startswith("avx512.mask.palignr.") || // Added in 3.9
378  Name.startswith("avx512.mask.valign.") || // Added in 4.0
379  Name.startswith("sse2.psll.dq") || // Added in 3.7
380  Name.startswith("sse2.psrl.dq") || // Added in 3.7
381  Name.startswith("avx2.psll.dq") || // Added in 3.7
382  Name.startswith("avx2.psrl.dq") || // Added in 3.7
383  Name.startswith("avx512.psll.dq") || // Added in 3.9
384  Name.startswith("avx512.psrl.dq") || // Added in 3.9
385  Name == "sse41.pblendw" || // Added in 3.7
386  Name.startswith("sse41.blendp") || // Added in 3.7
387  Name.startswith("avx.blend.p") || // Added in 3.7
388  Name == "avx2.pblendw" || // Added in 3.7
389  Name.startswith("avx2.pblendd.") || // Added in 3.7
390  Name.startswith("avx.vbroadcastf128") || // Added in 4.0
391  Name == "avx2.vbroadcasti128" || // Added in 3.7
392  Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
393  Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
394  Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
395  Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
396  Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
397  Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
398  Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
399  Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
400  Name == "xop.vpcmov" || // Added in 3.8
401  Name == "xop.vpcmov.256" || // Added in 5.0
402  Name.startswith("avx512.mask.move.s") || // Added in 4.0
403  Name.startswith("avx512.cvtmask2") || // Added in 5.0
404  Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
405  Name.startswith("xop.vprot") || // Added in 8.0
406  Name.startswith("avx512.prol") || // Added in 8.0
407  Name.startswith("avx512.pror") || // Added in 8.0
408  Name.startswith("avx512.mask.prorv.") || // Added in 8.0
409  Name.startswith("avx512.mask.pror.") || // Added in 8.0
410  Name.startswith("avx512.mask.prolv.") || // Added in 8.0
411  Name.startswith("avx512.mask.prol.") || // Added in 8.0
412  Name.startswith("avx512.ptestm") || //Added in 6.0
413  Name.startswith("avx512.ptestnm") || //Added in 6.0
414  Name.startswith("avx512.mask.pavg")) // Added in 6.0
415  return true;
416 
417  return false;
418 }
419 
421  Function *&NewFn) {
422  // Only handle intrinsics that start with "x86.".
423  if (!Name.startswith("x86."))
424  return false;
425  // Remove "x86." prefix.
426  Name = Name.substr(4);
427 
429  NewFn = nullptr;
430  return true;
431  }
432 
433  if (Name == "rdtscp") { // Added in 8.0
434  // If this intrinsic has 0 operands, it's the new version.
435  if (F->getFunctionType()->getNumParams() == 0)
436  return false;
437 
438  rename(F);
439  NewFn = Intrinsic::getDeclaration(F->getParent(),
440  Intrinsic::x86_rdtscp);
441  return true;
442  }
443 
444  // SSE4.1 ptest functions may have an old signature.
445  if (Name.startswith("sse41.ptest")) { // Added in 3.2
446  if (Name.substr(11) == "c")
447  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
448  if (Name.substr(11) == "z")
449  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
450  if (Name.substr(11) == "nzc")
451  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
452  }
453  // Several blend and other instructions with masks used the wrong number of
454  // bits.
455  if (Name == "sse41.insertps") // Added in 3.6
456  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
457  NewFn);
458  if (Name == "sse41.dppd") // Added in 3.6
459  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
460  NewFn);
461  if (Name == "sse41.dpps") // Added in 3.6
462  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
463  NewFn);
464  if (Name == "sse41.mpsadbw") // Added in 3.6
465  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
466  NewFn);
467  if (Name == "avx.dp.ps.256") // Added in 3.6
468  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
469  NewFn);
470  if (Name == "avx2.mpsadbw") // Added in 3.6
471  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
472  NewFn);
473  if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
474  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
475  NewFn);
476  if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
477  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
478  NewFn);
479  if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
480  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
481  NewFn);
482  if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
483  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
484  NewFn);
485  if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
486  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
487  NewFn);
488  if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
489  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
490  NewFn);
491 
492  // frcz.ss/sd may need to have an argument dropped. Added in 3.2
493  if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
494  rename(F);
495  NewFn = Intrinsic::getDeclaration(F->getParent(),
496  Intrinsic::x86_xop_vfrcz_ss);
497  return true;
498  }
499  if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
500  rename(F);
501  NewFn = Intrinsic::getDeclaration(F->getParent(),
502  Intrinsic::x86_xop_vfrcz_sd);
503  return true;
504  }
505  // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
506  if (Name.startswith("xop.vpermil2")) { // Added in 3.9
507  auto Idx = F->getFunctionType()->getParamType(2);
508  if (Idx->isFPOrFPVectorTy()) {
509  rename(F);
510  unsigned IdxSize = Idx->getPrimitiveSizeInBits();
511  unsigned EltSize = Idx->getScalarSizeInBits();
512  Intrinsic::ID Permil2ID;
513  if (EltSize == 64 && IdxSize == 128)
514  Permil2ID = Intrinsic::x86_xop_vpermil2pd;
515  else if (EltSize == 32 && IdxSize == 128)
516  Permil2ID = Intrinsic::x86_xop_vpermil2ps;
517  else if (EltSize == 64 && IdxSize == 256)
518  Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
519  else
520  Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
521  NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
522  return true;
523  }
524  }
525 
526  if (Name == "seh.recoverfp") {
527  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
528  return true;
529  }
530 
531  return false;
532 }
533 
535  assert(F && "Illegal to upgrade a non-existent Function.");
536 
537  // Quickly eliminate it, if it's not a candidate.
538  StringRef Name = F->getName();
539  if (Name.size() <= 8 || !Name.startswith("llvm."))
540  return false;
541  Name = Name.substr(5); // Strip off "llvm."
542 
543  switch (Name[0]) {
544  default: break;
545  case 'a': {
546  if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
547  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
548  F->arg_begin()->getType());
549  return true;
550  }
551  if (Name.startswith("aarch64.neon.frintn")) {
552  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
553  F->arg_begin()->getType());
554  return true;
555  }
556  if (Name.startswith("aarch64.neon.rbit")) {
557  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
558  F->arg_begin()->getType());
559  return true;
560  }
561  if (Name.startswith("arm.neon.vclz")) {
562  Type* args[2] = {
563  F->arg_begin()->getType(),
564  Type::getInt1Ty(F->getContext())
565  };
566  // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
567  // the end of the name. Change name from llvm.arm.neon.vclz.* to
568  // llvm.ctlz.*
569  FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
570  NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
571  "llvm.ctlz." + Name.substr(14), F->getParent());
572  return true;
573  }
574  if (Name.startswith("arm.neon.vcnt")) {
575  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
576  F->arg_begin()->getType());
577  return true;
578  }
579  static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
580  if (vldRegex.match(Name)) {
581  auto fArgs = F->getFunctionType()->params();
582  SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
583  // Can't use Intrinsic::getDeclaration here as the return types might
584  // then only be structurally equal.
585  FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
586  StringRef Suffix =
587  F->getContext().supportsTypedPointers() ? "p0i8" : "p0";
588  NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
589  "llvm." + Name + "." + Suffix, F->getParent());
590  return true;
591  }
592  static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
593  if (vstRegex.match(Name)) {
594  static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
595  Intrinsic::arm_neon_vst2,
596  Intrinsic::arm_neon_vst3,
597  Intrinsic::arm_neon_vst4};
598 
599  static const Intrinsic::ID StoreLaneInts[] = {
600  Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
601  Intrinsic::arm_neon_vst4lane
602  };
603 
604  auto fArgs = F->getFunctionType()->params();
605  Type *Tys[] = {fArgs[0], fArgs[1]};
606  if (Name.find("lane") == StringRef::npos)
607  NewFn = Intrinsic::getDeclaration(F->getParent(),
608  StoreInts[fArgs.size() - 3], Tys);
609  else
610  NewFn = Intrinsic::getDeclaration(F->getParent(),
611  StoreLaneInts[fArgs.size() - 5], Tys);
612  return true;
613  }
614  if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
615  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
616  return true;
617  }
618  if (Name.startswith("arm.neon.vqadds.")) {
619  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
620  F->arg_begin()->getType());
621  return true;
622  }
623  if (Name.startswith("arm.neon.vqaddu.")) {
624  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
625  F->arg_begin()->getType());
626  return true;
627  }
628  if (Name.startswith("arm.neon.vqsubs.")) {
629  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
630  F->arg_begin()->getType());
631  return true;
632  }
633  if (Name.startswith("arm.neon.vqsubu.")) {
634  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
635  F->arg_begin()->getType());
636  return true;
637  }
638  if (Name.startswith("aarch64.neon.addp")) {
639  if (F->arg_size() != 2)
640  break; // Invalid IR.
641  VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
642  if (Ty && Ty->getElementType()->isFloatingPointTy()) {
643  NewFn = Intrinsic::getDeclaration(F->getParent(),
644  Intrinsic::aarch64_neon_faddp, Ty);
645  return true;
646  }
647  }
648 
649  // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
650  // respectively
651  if ((Name.startswith("arm.neon.bfdot.") ||
652  Name.startswith("aarch64.neon.bfdot.")) &&
653  Name.endswith("i8")) {
654  Intrinsic::ID IID =
656  .Cases("arm.neon.bfdot.v2f32.v8i8",
657  "arm.neon.bfdot.v4f32.v16i8",
658  Intrinsic::arm_neon_bfdot)
659  .Cases("aarch64.neon.bfdot.v2f32.v8i8",
660  "aarch64.neon.bfdot.v4f32.v16i8",
661  Intrinsic::aarch64_neon_bfdot)
663  if (IID == Intrinsic::not_intrinsic)
664  break;
665 
666  size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
667  assert((OperandWidth == 64 || OperandWidth == 128) &&
668  "Unexpected operand width");
669  LLVMContext &Ctx = F->getParent()->getContext();
670  std::array<Type *, 2> Tys {{
671  F->getReturnType(),
672  FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
673  }};
674  NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
675  return true;
676  }
677 
678  // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
679  // and accept v8bf16 instead of v16i8
680  if ((Name.startswith("arm.neon.bfm") ||
681  Name.startswith("aarch64.neon.bfm")) &&
682  Name.endswith(".v4f32.v16i8")) {
683  Intrinsic::ID IID =
685  .Case("arm.neon.bfmmla.v4f32.v16i8",
686  Intrinsic::arm_neon_bfmmla)
687  .Case("arm.neon.bfmlalb.v4f32.v16i8",
688  Intrinsic::arm_neon_bfmlalb)
689  .Case("arm.neon.bfmlalt.v4f32.v16i8",
690  Intrinsic::arm_neon_bfmlalt)
691  .Case("aarch64.neon.bfmmla.v4f32.v16i8",
692  Intrinsic::aarch64_neon_bfmmla)
693  .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
694  Intrinsic::aarch64_neon_bfmlalb)
695  .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
696  Intrinsic::aarch64_neon_bfmlalt)
698  if (IID == Intrinsic::not_intrinsic)
699  break;
700 
701  std::array<Type *, 0> Tys;
702  NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
703  return true;
704  }
705  break;
706  }
707 
708  case 'c': {
709  if (Name.startswith("ctlz.") && F->arg_size() == 1) {
710  rename(F);
711  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
712  F->arg_begin()->getType());
713  return true;
714  }
715  if (Name.startswith("cttz.") && F->arg_size() == 1) {
716  rename(F);
717  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
718  F->arg_begin()->getType());
719  return true;
720  }
721  break;
722  }
723  case 'd': {
724  if (Name == "dbg.value" && F->arg_size() == 4) {
725  rename(F);
726  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
727  return true;
728  }
729  break;
730  }
731  case 'e': {
733  static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
734  if (R.match(Name, &Groups)) {
737  .Case("add", Intrinsic::vector_reduce_add)
738  .Case("mul", Intrinsic::vector_reduce_mul)
739  .Case("and", Intrinsic::vector_reduce_and)
740  .Case("or", Intrinsic::vector_reduce_or)
741  .Case("xor", Intrinsic::vector_reduce_xor)
742  .Case("smax", Intrinsic::vector_reduce_smax)
743  .Case("smin", Intrinsic::vector_reduce_smin)
744  .Case("umax", Intrinsic::vector_reduce_umax)
745  .Case("umin", Intrinsic::vector_reduce_umin)
746  .Case("fmax", Intrinsic::vector_reduce_fmax)
747  .Case("fmin", Intrinsic::vector_reduce_fmin)
749  if (ID != Intrinsic::not_intrinsic) {
750  rename(F);
751  auto Args = F->getFunctionType()->params();
752  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
753  return true;
754  }
755  }
756  static const Regex R2(
757  "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
758  Groups.clear();
759  if (R2.match(Name, &Groups)) {
761  if (Groups[1] == "fadd")
762  ID = Intrinsic::vector_reduce_fadd;
763  if (Groups[1] == "fmul")
764  ID = Intrinsic::vector_reduce_fmul;
765  if (ID != Intrinsic::not_intrinsic) {
766  rename(F);
767  auto Args = F->getFunctionType()->params();
768  Type *Tys[] = {Args[1]};
769  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
770  return true;
771  }
772  }
773  break;
774  }
775  case 'i':
776  case 'l': {
777  bool IsLifetimeStart = Name.startswith("lifetime.start");
778  if (IsLifetimeStart || Name.startswith("invariant.start")) {
779  Intrinsic::ID ID = IsLifetimeStart ?
780  Intrinsic::lifetime_start : Intrinsic::invariant_start;
781  auto Args = F->getFunctionType()->params();
782  Type* ObjectPtr[1] = {Args[1]};
783  if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
784  rename(F);
785  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
786  return true;
787  }
788  }
789 
790  bool IsLifetimeEnd = Name.startswith("lifetime.end");
791  if (IsLifetimeEnd || Name.startswith("invariant.end")) {
792  Intrinsic::ID ID = IsLifetimeEnd ?
793  Intrinsic::lifetime_end : Intrinsic::invariant_end;
794 
795  auto Args = F->getFunctionType()->params();
796  Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
797  if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
798  rename(F);
799  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
800  return true;
801  }
802  }
803  if (Name.startswith("invariant.group.barrier")) {
804  // Rename invariant.group.barrier to launder.invariant.group
805  auto Args = F->getFunctionType()->params();
806  Type* ObjectPtr[1] = {Args[0]};
807  rename(F);
808  NewFn = Intrinsic::getDeclaration(F->getParent(),
809  Intrinsic::launder_invariant_group, ObjectPtr);
810  return true;
811 
812  }
813 
814  break;
815  }
816  case 'm': {
817  if (Name.startswith("masked.load.")) {
818  Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
819  if (F->getName() !=
820  Intrinsic::getName(Intrinsic::masked_load, Tys, F->getParent())) {
821  rename(F);
822  NewFn = Intrinsic::getDeclaration(F->getParent(),
823  Intrinsic::masked_load,
824  Tys);
825  return true;
826  }
827  }
828  if (Name.startswith("masked.store.")) {
829  auto Args = F->getFunctionType()->params();
830  Type *Tys[] = { Args[0], Args[1] };
831  if (F->getName() !=
832  Intrinsic::getName(Intrinsic::masked_store, Tys, F->getParent())) {
833  rename(F);
834  NewFn = Intrinsic::getDeclaration(F->getParent(),
835  Intrinsic::masked_store,
836  Tys);
837  return true;
838  }
839  }
840  // Renaming gather/scatter intrinsics with no address space overloading
841  // to the new overload which includes an address space
842  if (Name.startswith("masked.gather.")) {
843  Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
844  if (F->getName() !=
845  Intrinsic::getName(Intrinsic::masked_gather, Tys, F->getParent())) {
846  rename(F);
847  NewFn = Intrinsic::getDeclaration(F->getParent(),
848  Intrinsic::masked_gather, Tys);
849  return true;
850  }
851  }
852  if (Name.startswith("masked.scatter.")) {
853  auto Args = F->getFunctionType()->params();
854  Type *Tys[] = {Args[0], Args[1]};
855  if (F->getName() !=
856  Intrinsic::getName(Intrinsic::masked_scatter, Tys, F->getParent())) {
857  rename(F);
858  NewFn = Intrinsic::getDeclaration(F->getParent(),
859  Intrinsic::masked_scatter, Tys);
860  return true;
861  }
862  }
863  // Updating the memory intrinsics (memcpy/memmove/memset) that have an
864  // alignment parameter to embedding the alignment as an attribute of
865  // the pointer args.
866  if (Name.startswith("memcpy.") && F->arg_size() == 5) {
867  rename(F);
868  // Get the types of dest, src, and len
869  ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
870  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
871  ParamTypes);
872  return true;
873  }
874  if (Name.startswith("memmove.") && F->arg_size() == 5) {
875  rename(F);
876  // Get the types of dest, src, and len
877  ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
878  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
879  ParamTypes);
880  return true;
881  }
882  if (Name.startswith("memset.") && F->arg_size() == 5) {
883  rename(F);
884  // Get the types of dest, and len
885  const auto *FT = F->getFunctionType();
886  Type *ParamTypes[2] = {
887  FT->getParamType(0), // Dest
888  FT->getParamType(2) // len
889  };
890  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
891  ParamTypes);
892  return true;
893  }
894  break;
895  }
896  case 'n': {
897  if (Name.startswith("nvvm.")) {
898  Name = Name.substr(5);
899 
900  // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
902  .Cases("brev32", "brev64", Intrinsic::bitreverse)
903  .Case("clz.i", Intrinsic::ctlz)
904  .Case("popc.i", Intrinsic::ctpop)
906  if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
907  NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
908  {F->getReturnType()});
909  return true;
910  }
911 
912  // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
913  // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
914  //
915  // TODO: We could add lohi.i2d.
916  bool Expand = StringSwitch<bool>(Name)
917  .Cases("abs.i", "abs.ll", true)
918  .Cases("clz.ll", "popc.ll", "h2f", true)
919  .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
920  .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
921  .StartsWith("atomic.load.add.f32.p", true)
922  .StartsWith("atomic.load.add.f64.p", true)
923  .Default(false);
924  if (Expand) {
925  NewFn = nullptr;
926  return true;
927  }
928  }
929  break;
930  }
931  case 'o':
932  // We only need to change the name to match the mangling including the
933  // address space.
934  if (Name.startswith("objectsize.")) {
935  Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
936  if (F->arg_size() == 2 || F->arg_size() == 3 ||
937  F->getName() !=
938  Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
939  rename(F);
940  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
941  Tys);
942  return true;
943  }
944  }
945  break;
946 
947  case 'p':
948  if (Name == "prefetch") {
949  // Handle address space overloading.
950  Type *Tys[] = {F->arg_begin()->getType()};
951  if (F->getName() !=
952  Intrinsic::getName(Intrinsic::prefetch, Tys, F->getParent())) {
953  rename(F);
954  NewFn =
955  Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
956  return true;
957  }
958  } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
959  rename(F);
960  NewFn = Intrinsic::getDeclaration(F->getParent(),
961  Intrinsic::ptr_annotation,
962  F->arg_begin()->getType());
963  return true;
964  }
965  break;
966 
967  case 's':
968  if (Name == "stackprotectorcheck") {
969  NewFn = nullptr;
970  return true;
971  }
972  break;
973 
974  case 'v': {
975  if (Name == "var.annotation" && F->arg_size() == 4) {
976  rename(F);
977  NewFn = Intrinsic::getDeclaration(F->getParent(),
978  Intrinsic::var_annotation);
979  return true;
980  }
981  break;
982  }
983 
984  case 'x':
985  if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
986  return true;
987  }
988  // Remangle our intrinsic since we upgrade the mangling
990  if (Result != None) {
991  NewFn = Result.getValue();
992  return true;
993  }
994 
995  // This may not belong here. This function is effectively being overloaded
996  // to both detect an intrinsic which needs upgrading, and to provide the
997  // upgraded form of the intrinsic. We should perhaps have two separate
998  // functions for this.
999  return false;
1000 }
1001 
1003  NewFn = nullptr;
1004  bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
1005  assert(F != NewFn && "Intrinsic function upgraded to the same function");
1006 
1007  // Upgrade intrinsic attributes. This does not change the function.
1008  if (NewFn)
1009  F = NewFn;
1010  if (Intrinsic::ID id = F->getIntrinsicID())
1011  F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1012  return Upgraded;
1013 }
1014 
1016  if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1017  GV->getName() == "llvm.global_dtors")) ||
1018  !GV->hasInitializer())
1019  return nullptr;
1020  ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1021  if (!ATy)
1022  return nullptr;
1023  StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1024  if (!STy || STy->getNumElements() != 2)
1025  return nullptr;
1026 
1027  LLVMContext &C = GV->getContext();
1028  IRBuilder<> IRB(C);
1029  auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1030  IRB.getInt8PtrTy());
1031  Constant *Init = GV->getInitializer();
1032  unsigned N = Init->getNumOperands();
1033  std::vector<Constant *> NewCtors(N);
1034  for (unsigned i = 0; i != N; ++i) {
1035  auto Ctor = cast<Constant>(Init->getOperand(i));
1036  NewCtors[i] = ConstantStruct::get(
1037  EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
1039  }
1040  Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1041 
1042  return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1043  NewInit, GV->getName());
1044 }
1045 
1046 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1047 // to byte shuffles.
1049  Value *Op, unsigned Shift) {
1050  auto *ResultTy = cast<FixedVectorType>(Op->getType());
1051  unsigned NumElts = ResultTy->getNumElements() * 8;
1052 
1053  // Bitcast from a 64-bit element type to a byte element type.
1054  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1055  Op = Builder.CreateBitCast(Op, VecTy, "cast");
1056 
1057  // We'll be shuffling in zeroes.
1058  Value *Res = Constant::getNullValue(VecTy);
1059 
1060  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1061  // we'll just return the zero vector.
1062  if (Shift < 16) {
1063  int Idxs[64];
1064  // 256/512-bit version is split into 2/4 16-byte lanes.
1065  for (unsigned l = 0; l != NumElts; l += 16)
1066  for (unsigned i = 0; i != 16; ++i) {
1067  unsigned Idx = NumElts + i - Shift;
1068  if (Idx < NumElts)
1069  Idx -= NumElts - 16; // end of lane, switch operand.
1070  Idxs[l + i] = Idx + l;
1071  }
1072 
1073  Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
1074  }
1075 
1076  // Bitcast back to a 64-bit element type.
1077  return Builder.CreateBitCast(Res, ResultTy, "cast");
1078 }
1079 
1080 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1081 // to byte shuffles.
1083  unsigned Shift) {
1084  auto *ResultTy = cast<FixedVectorType>(Op->getType());
1085  unsigned NumElts = ResultTy->getNumElements() * 8;
1086 
1087  // Bitcast from a 64-bit element type to a byte element type.
1088  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1089  Op = Builder.CreateBitCast(Op, VecTy, "cast");
1090 
1091  // We'll be shuffling in zeroes.
1092  Value *Res = Constant::getNullValue(VecTy);
1093 
1094  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1095  // we'll just return the zero vector.
1096  if (Shift < 16) {
1097  int Idxs[64];
1098  // 256/512-bit version is split into 2/4 16-byte lanes.
1099  for (unsigned l = 0; l != NumElts; l += 16)
1100  for (unsigned i = 0; i != 16; ++i) {
1101  unsigned Idx = i + Shift;
1102  if (Idx >= 16)
1103  Idx += NumElts - 16; // end of lane, switch operand.
1104  Idxs[l + i] = Idx + l;
1105  }
1106 
1107  Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
1108  }
1109 
1110  // Bitcast back to a 64-bit element type.
1111  return Builder.CreateBitCast(Res, ResultTy, "cast");
1112 }
1113 
1115  unsigned NumElts) {
1116  assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1118  Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1119  Mask = Builder.CreateBitCast(Mask, MaskTy);
1120 
1121  // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1122  // i8 and we need to extract down to the right number of elements.
1123  if (NumElts <= 4) {
1124  int Indices[4];
1125  for (unsigned i = 0; i != NumElts; ++i)
1126  Indices[i] = i;
1127  Mask = Builder.CreateShuffleVector(
1128  Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
1129  }
1130 
1131  return Mask;
1132 }
1133 
1135  Value *Op0, Value *Op1) {
1136  // If the mask is all ones just emit the first operation.
1137  if (const auto *C = dyn_cast<Constant>(Mask))
1138  if (C->isAllOnesValue())
1139  return Op0;
1140 
1142  cast<FixedVectorType>(Op0->getType())->getNumElements());
1143  return Builder.CreateSelect(Mask, Op0, Op1);
1144 }
1145 
1147  Value *Op0, Value *Op1) {
1148  // If the mask is all ones just emit the first operation.
1149  if (const auto *C = dyn_cast<Constant>(Mask))
1150  if (C->isAllOnesValue())
1151  return Op0;
1152 
1153  auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1154  Mask->getType()->getIntegerBitWidth());
1155  Mask = Builder.CreateBitCast(Mask, MaskTy);
1156  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1157  return Builder.CreateSelect(Mask, Op0, Op1);
1158 }
1159 
1160 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1161 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1162 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1164  Value *Op1, Value *Shift,
1165  Value *Passthru, Value *Mask,
1166  bool IsVALIGN) {
1167  unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1168 
1169  unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1170  assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1171  assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1172  assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1173 
1174  // Mask the immediate for VALIGN.
1175  if (IsVALIGN)
1176  ShiftVal &= (NumElts - 1);
1177 
1178  // If palignr is shifting the pair of vectors more than the size of two
1179  // lanes, emit zero.
1180  if (ShiftVal >= 32)
1181  return llvm::Constant::getNullValue(Op0->getType());
1182 
1183  // If palignr is shifting the pair of input vectors more than one lane,
1184  // but less than two lanes, convert to shifting in zeroes.
1185  if (ShiftVal > 16) {
1186  ShiftVal -= 16;
1187  Op1 = Op0;
1188  Op0 = llvm::Constant::getNullValue(Op0->getType());
1189  }
1190 
1191  int Indices[64];
1192  // 256-bit palignr operates on 128-bit lanes so we need to handle that
1193  for (unsigned l = 0; l < NumElts; l += 16) {
1194  for (unsigned i = 0; i != 16; ++i) {
1195  unsigned Idx = ShiftVal + i;
1196  if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1197  Idx += NumElts - 16; // End of lane, switch operand.
1198  Indices[l + i] = Idx + l;
1199  }
1200  }
1201 
1202  Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1203  makeArrayRef(Indices, NumElts),
1204  "palignr");
1205 
1206  return EmitX86Select(Builder, Mask, Align, Passthru);
1207 }
1208 
1210  bool ZeroMask, bool IndexForm) {
1211  Type *Ty = CI.getType();
1212  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1213  unsigned EltWidth = Ty->getScalarSizeInBits();
1214  bool IsFloat = Ty->isFPOrFPVectorTy();
1215  Intrinsic::ID IID;
1216  if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1217  IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1218  else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1219  IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1220  else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1221  IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1222  else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1223  IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1224  else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1225  IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1226  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1227  IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1228  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1229  IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1230  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1231  IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1232  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1233  IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1234  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1235  IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1236  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1237  IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1238  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1239  IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1240  else if (VecWidth == 128 && EltWidth == 16)
1241  IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1242  else if (VecWidth == 256 && EltWidth == 16)
1243  IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1244  else if (VecWidth == 512 && EltWidth == 16)
1245  IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1246  else if (VecWidth == 128 && EltWidth == 8)
1247  IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1248  else if (VecWidth == 256 && EltWidth == 8)
1249  IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1250  else if (VecWidth == 512 && EltWidth == 8)
1251  IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1252  else
1253  llvm_unreachable("Unexpected intrinsic");
1254 
1255  Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1256  CI.getArgOperand(2) };
1257 
1258  // If this isn't index form we need to swap operand 0 and 1.
1259  if (!IndexForm)
1260  std::swap(Args[0], Args[1]);
1261 
1262  Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1263  Args);
1264  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1265  : Builder.CreateBitCast(CI.getArgOperand(1),
1266  Ty);
1267  return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1268 }
1269 
1271  Intrinsic::ID IID) {
1272  Type *Ty = CI.getType();
1273  Value *Op0 = CI.getOperand(0);
1274  Value *Op1 = CI.getOperand(1);
1275  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1276  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1277 
1278  if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1279  Value *VecSrc = CI.getOperand(2);
1280  Value *Mask = CI.getOperand(3);
1281  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1282  }
1283  return Res;
1284 }
1285 
1287  bool IsRotateRight) {
1288  Type *Ty = CI.getType();
1289  Value *Src = CI.getArgOperand(0);
1290  Value *Amt = CI.getArgOperand(1);
1291 
1292  // Amount may be scalar immediate, in which case create a splat vector.
1293  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1294  // we only care about the lowest log2 bits anyway.
1295  if (Amt->getType() != Ty) {
1296  unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1297  Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1298  Amt = Builder.CreateVectorSplat(NumElts, Amt);
1299  }
1300 
1301  Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1302  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1303  Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1304 
1305  if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1306  Value *VecSrc = CI.getOperand(2);
1307  Value *Mask = CI.getOperand(3);
1308  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1309  }
1310  return Res;
1311 }
1312 
1313 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1314  bool IsSigned) {
1315  Type *Ty = CI.getType();
1316  Value *LHS = CI.getArgOperand(0);
1317  Value *RHS = CI.getArgOperand(1);
1318 
1319  CmpInst::Predicate Pred;
1320  switch (Imm) {
1321  case 0x0:
1322  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1323  break;
1324  case 0x1:
1325  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1326  break;
1327  case 0x2:
1328  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1329  break;
1330  case 0x3:
1331  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1332  break;
1333  case 0x4:
1334  Pred = ICmpInst::ICMP_EQ;
1335  break;
1336  case 0x5:
1337  Pred = ICmpInst::ICMP_NE;
1338  break;
1339  case 0x6:
1340  return Constant::getNullValue(Ty); // FALSE
1341  case 0x7:
1342  return Constant::getAllOnesValue(Ty); // TRUE
1343  default:
1344  llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1345  }
1346 
1347  Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1348  Value *Ext = Builder.CreateSExt(Cmp, Ty);
1349  return Ext;
1350 }
1351 
1353  bool IsShiftRight, bool ZeroMask) {
1354  Type *Ty = CI.getType();
1355  Value *Op0 = CI.getArgOperand(0);
1356  Value *Op1 = CI.getArgOperand(1);
1357  Value *Amt = CI.getArgOperand(2);
1358 
1359  if (IsShiftRight)
1360  std::swap(Op0, Op1);
1361 
1362  // Amount may be scalar immediate, in which case create a splat vector.
1363  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1364  // we only care about the lowest log2 bits anyway.
1365  if (Amt->getType() != Ty) {
1366  unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1367  Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1368  Amt = Builder.CreateVectorSplat(NumElts, Amt);
1369  }
1370 
1371  Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1372  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1373  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1374 
1375  unsigned NumArgs = CI.getNumArgOperands();
1376  if (NumArgs >= 4) { // For masked intrinsics.
1377  Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1378  ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1379  CI.getArgOperand(0);
1380  Value *Mask = CI.getOperand(NumArgs - 1);
1381  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1382  }
1383  return Res;
1384 }
1385 
1387  Value *Ptr, Value *Data, Value *Mask,
1388  bool Aligned) {
1389  // Cast the pointer to the right type.
1390  Ptr = Builder.CreateBitCast(Ptr,
1391  llvm::PointerType::getUnqual(Data->getType()));
1392  const Align Alignment =
1393  Aligned
1394  ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
1395  : Align(1);
1396 
1397  // If the mask is all ones just emit a regular store.
1398  if (const auto *C = dyn_cast<Constant>(Mask))
1399  if (C->isAllOnesValue())
1400  return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1401 
1402  // Convert the mask from an integer type to a vector of i1.
1403  unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1404  Mask = getX86MaskVec(Builder, Mask, NumElts);
1405  return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1406 }
1407 
1409  Value *Ptr, Value *Passthru, Value *Mask,
1410  bool Aligned) {
1411  Type *ValTy = Passthru->getType();
1412  // Cast the pointer to the right type.
1413  Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1414  const Align Alignment =
1415  Aligned
1416  ? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
1417  8)
1418  : Align(1);
1419 
1420  // If the mask is all ones just emit a regular store.
1421  if (const auto *C = dyn_cast<Constant>(Mask))
1422  if (C->isAllOnesValue())
1423  return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1424 
1425  // Convert the mask from an integer type to a vector of i1.
1426  unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1427  Mask = getX86MaskVec(Builder, Mask, NumElts);
1428  return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1429 }
1430 
1432  Type *Ty = CI.getType();
1433  Value *Op0 = CI.getArgOperand(0);
1435  Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1436  if (CI.getNumArgOperands() == 3)
1437  Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1438  return Res;
1439 }
1440 
1441 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1442  Type *Ty = CI.getType();
1443 
1444  // Arguments have a vXi32 type so cast to vXi64.
1445  Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1446  Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1447 
1448  if (IsSigned) {
1449  // Shift left then arithmetic shift right.
1450  Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1451  LHS = Builder.CreateShl(LHS, ShiftAmt);
1452  LHS = Builder.CreateAShr(LHS, ShiftAmt);
1453  RHS = Builder.CreateShl(RHS, ShiftAmt);
1454  RHS = Builder.CreateAShr(RHS, ShiftAmt);
1455  } else {
1456  // Clear the upper bits.
1457  Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1458  LHS = Builder.CreateAnd(LHS, Mask);
1459  RHS = Builder.CreateAnd(RHS, Mask);
1460  }
1461 
1462  Value *Res = Builder.CreateMul(LHS, RHS);
1463 
1464  if (CI.getNumArgOperands() == 4)
1465  Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1466 
1467  return Res;
1468 }
1469 
1470 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1472  Value *Mask) {
1473  unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1474  if (Mask) {
1475  const auto *C = dyn_cast<Constant>(Mask);
1476  if (!C || !C->isAllOnesValue())
1477  Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1478  }
1479 
1480  if (NumElts < 8) {
1481  int Indices[8];
1482  for (unsigned i = 0; i != NumElts; ++i)
1483  Indices[i] = i;
1484  for (unsigned i = NumElts; i != 8; ++i)
1485  Indices[i] = NumElts + i % NumElts;
1486  Vec = Builder.CreateShuffleVector(Vec,
1488  Indices);
1489  }
1490  return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1491 }
1492 
1494  unsigned CC, bool Signed) {
1495  Value *Op0 = CI.getArgOperand(0);
1496  unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1497 
1498  Value *Cmp;
1499  if (CC == 3) {
1500  Cmp = Constant::getNullValue(
1501  FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1502  } else if (CC == 7) {
1504  FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1505  } else {
1506  ICmpInst::Predicate Pred;
1507  switch (CC) {
1508  default: llvm_unreachable("Unknown condition code");
1509  case 0: Pred = ICmpInst::ICMP_EQ; break;
1510  case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1511  case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1512  case 4: Pred = ICmpInst::ICMP_NE; break;
1513  case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1514  case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1515  }
1516  Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1517  }
1518 
1519  Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1520 
1521  return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1522 }
1523 
1524 // Replace a masked intrinsic with an older unmasked intrinsic.
1526  Intrinsic::ID IID) {
1527  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1528  Value *Rep = Builder.CreateCall(Intrin,
1529  { CI.getArgOperand(0), CI.getArgOperand(1) });
1530  return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1531 }
1532 
1534  Value* A = CI.getArgOperand(0);
1535  Value* B = CI.getArgOperand(1);
1536  Value* Src = CI.getArgOperand(2);
1537  Value* Mask = CI.getArgOperand(3);
1538 
1539  Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1540  Value* Cmp = Builder.CreateIsNotNull(AndNode);
1541  Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1542  Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1543  Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1544  return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1545 }
1546 
1547 
1549  Value* Op = CI.getArgOperand(0);
1550  Type* ReturnOp = CI.getType();
1551  unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1552  Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1553  return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1554 }
1555 
1556 // Replace intrinsic with unmasked version and a select.
1558  CallInst &CI, Value *&Rep) {
1559  Name = Name.substr(12); // Remove avx512.mask.
1560 
1561  unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1562  unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1563  Intrinsic::ID IID;
1564  if (Name.startswith("max.p")) {
1565  if (VecWidth == 128 && EltWidth == 32)
1566  IID = Intrinsic::x86_sse_max_ps;
1567  else if (VecWidth == 128 && EltWidth == 64)
1568  IID = Intrinsic::x86_sse2_max_pd;
1569  else if (VecWidth == 256 && EltWidth == 32)
1570  IID = Intrinsic::x86_avx_max_ps_256;
1571  else if (VecWidth == 256 && EltWidth == 64)
1572  IID = Intrinsic::x86_avx_max_pd_256;
1573  else
1574  llvm_unreachable("Unexpected intrinsic");
1575  } else if (Name.startswith("min.p")) {
1576  if (VecWidth == 128 && EltWidth == 32)
1577  IID = Intrinsic::x86_sse_min_ps;
1578  else if (VecWidth == 128 && EltWidth == 64)
1579  IID = Intrinsic::x86_sse2_min_pd;
1580  else if (VecWidth == 256 && EltWidth == 32)
1581  IID = Intrinsic::x86_avx_min_ps_256;
1582  else if (VecWidth == 256 && EltWidth == 64)
1583  IID = Intrinsic::x86_avx_min_pd_256;
1584  else
1585  llvm_unreachable("Unexpected intrinsic");
1586  } else if (Name.startswith("pshuf.b.")) {
1587  if (VecWidth == 128)
1588  IID = Intrinsic::x86_ssse3_pshuf_b_128;
1589  else if (VecWidth == 256)
1590  IID = Intrinsic::x86_avx2_pshuf_b;
1591  else if (VecWidth == 512)
1592  IID = Intrinsic::x86_avx512_pshuf_b_512;
1593  else
1594  llvm_unreachable("Unexpected intrinsic");
1595  } else if (Name.startswith("pmul.hr.sw.")) {
1596  if (VecWidth == 128)
1597  IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1598  else if (VecWidth == 256)
1599  IID = Intrinsic::x86_avx2_pmul_hr_sw;
1600  else if (VecWidth == 512)
1601  IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1602  else
1603  llvm_unreachable("Unexpected intrinsic");
1604  } else if (Name.startswith("pmulh.w.")) {
1605  if (VecWidth == 128)
1606  IID = Intrinsic::x86_sse2_pmulh_w;
1607  else if (VecWidth == 256)
1608  IID = Intrinsic::x86_avx2_pmulh_w;
1609  else if (VecWidth == 512)
1610  IID = Intrinsic::x86_avx512_pmulh_w_512;
1611  else
1612  llvm_unreachable("Unexpected intrinsic");
1613  } else if (Name.startswith("pmulhu.w.")) {
1614  if (VecWidth == 128)
1615  IID = Intrinsic::x86_sse2_pmulhu_w;
1616  else if (VecWidth == 256)
1617  IID = Intrinsic::x86_avx2_pmulhu_w;
1618  else if (VecWidth == 512)
1619  IID = Intrinsic::x86_avx512_pmulhu_w_512;
1620  else
1621  llvm_unreachable("Unexpected intrinsic");
1622  } else if (Name.startswith("pmaddw.d.")) {
1623  if (VecWidth == 128)
1624  IID = Intrinsic::x86_sse2_pmadd_wd;
1625  else if (VecWidth == 256)
1626  IID = Intrinsic::x86_avx2_pmadd_wd;
1627  else if (VecWidth == 512)
1628  IID = Intrinsic::x86_avx512_pmaddw_d_512;
1629  else
1630  llvm_unreachable("Unexpected intrinsic");
1631  } else if (Name.startswith("pmaddubs.w.")) {
1632  if (VecWidth == 128)
1633  IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1634  else if (VecWidth == 256)
1635  IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1636  else if (VecWidth == 512)
1637  IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1638  else
1639  llvm_unreachable("Unexpected intrinsic");
1640  } else if (Name.startswith("packsswb.")) {
1641  if (VecWidth == 128)
1642  IID = Intrinsic::x86_sse2_packsswb_128;
1643  else if (VecWidth == 256)
1644  IID = Intrinsic::x86_avx2_packsswb;
1645  else if (VecWidth == 512)
1646  IID = Intrinsic::x86_avx512_packsswb_512;
1647  else
1648  llvm_unreachable("Unexpected intrinsic");
1649  } else if (Name.startswith("packssdw.")) {
1650  if (VecWidth == 128)
1651  IID = Intrinsic::x86_sse2_packssdw_128;
1652  else if (VecWidth == 256)
1653  IID = Intrinsic::x86_avx2_packssdw;
1654  else if (VecWidth == 512)
1655  IID = Intrinsic::x86_avx512_packssdw_512;
1656  else
1657  llvm_unreachable("Unexpected intrinsic");
1658  } else if (Name.startswith("packuswb.")) {
1659  if (VecWidth == 128)
1660  IID = Intrinsic::x86_sse2_packuswb_128;
1661  else if (VecWidth == 256)
1662  IID = Intrinsic::x86_avx2_packuswb;
1663  else if (VecWidth == 512)
1664  IID = Intrinsic::x86_avx512_packuswb_512;
1665  else
1666  llvm_unreachable("Unexpected intrinsic");
1667  } else if (Name.startswith("packusdw.")) {
1668  if (VecWidth == 128)
1669  IID = Intrinsic::x86_sse41_packusdw;
1670  else if (VecWidth == 256)
1671  IID = Intrinsic::x86_avx2_packusdw;
1672  else if (VecWidth == 512)
1673  IID = Intrinsic::x86_avx512_packusdw_512;
1674  else
1675  llvm_unreachable("Unexpected intrinsic");
1676  } else if (Name.startswith("vpermilvar.")) {
1677  if (VecWidth == 128 && EltWidth == 32)
1678  IID = Intrinsic::x86_avx_vpermilvar_ps;
1679  else if (VecWidth == 128 && EltWidth == 64)
1680  IID = Intrinsic::x86_avx_vpermilvar_pd;
1681  else if (VecWidth == 256 && EltWidth == 32)
1682  IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1683  else if (VecWidth == 256 && EltWidth == 64)
1684  IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1685  else if (VecWidth == 512 && EltWidth == 32)
1686  IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1687  else if (VecWidth == 512 && EltWidth == 64)
1688  IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1689  else
1690  llvm_unreachable("Unexpected intrinsic");
1691  } else if (Name == "cvtpd2dq.256") {
1692  IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1693  } else if (Name == "cvtpd2ps.256") {
1694  IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1695  } else if (Name == "cvttpd2dq.256") {
1696  IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1697  } else if (Name == "cvttps2dq.128") {
1698  IID = Intrinsic::x86_sse2_cvttps2dq;
1699  } else if (Name == "cvttps2dq.256") {
1700  IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1701  } else if (Name.startswith("permvar.")) {
1702  bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1703  if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1704  IID = Intrinsic::x86_avx2_permps;
1705  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1706  IID = Intrinsic::x86_avx2_permd;
1707  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1708  IID = Intrinsic::x86_avx512_permvar_df_256;
1709  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1710  IID = Intrinsic::x86_avx512_permvar_di_256;
1711  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1712  IID = Intrinsic::x86_avx512_permvar_sf_512;
1713  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1714  IID = Intrinsic::x86_avx512_permvar_si_512;
1715  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1716  IID = Intrinsic::x86_avx512_permvar_df_512;
1717  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1718  IID = Intrinsic::x86_avx512_permvar_di_512;
1719  else if (VecWidth == 128 && EltWidth == 16)
1720  IID = Intrinsic::x86_avx512_permvar_hi_128;
1721  else if (VecWidth == 256 && EltWidth == 16)
1722  IID = Intrinsic::x86_avx512_permvar_hi_256;
1723  else if (VecWidth == 512 && EltWidth == 16)
1724  IID = Intrinsic::x86_avx512_permvar_hi_512;
1725  else if (VecWidth == 128 && EltWidth == 8)
1726  IID = Intrinsic::x86_avx512_permvar_qi_128;
1727  else if (VecWidth == 256 && EltWidth == 8)
1728  IID = Intrinsic::x86_avx512_permvar_qi_256;
1729  else if (VecWidth == 512 && EltWidth == 8)
1730  IID = Intrinsic::x86_avx512_permvar_qi_512;
1731  else
1732  llvm_unreachable("Unexpected intrinsic");
1733  } else if (Name.startswith("dbpsadbw.")) {
1734  if (VecWidth == 128)
1735  IID = Intrinsic::x86_avx512_dbpsadbw_128;
1736  else if (VecWidth == 256)
1737  IID = Intrinsic::x86_avx512_dbpsadbw_256;
1738  else if (VecWidth == 512)
1739  IID = Intrinsic::x86_avx512_dbpsadbw_512;
1740  else
1741  llvm_unreachable("Unexpected intrinsic");
1742  } else if (Name.startswith("pmultishift.qb.")) {
1743  if (VecWidth == 128)
1744  IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1745  else if (VecWidth == 256)
1746  IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1747  else if (VecWidth == 512)
1748  IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1749  else
1750  llvm_unreachable("Unexpected intrinsic");
1751  } else if (Name.startswith("conflict.")) {
1752  if (Name[9] == 'd' && VecWidth == 128)
1753  IID = Intrinsic::x86_avx512_conflict_d_128;
1754  else if (Name[9] == 'd' && VecWidth == 256)
1755  IID = Intrinsic::x86_avx512_conflict_d_256;
1756  else if (Name[9] == 'd' && VecWidth == 512)
1757  IID = Intrinsic::x86_avx512_conflict_d_512;
1758  else if (Name[9] == 'q' && VecWidth == 128)
1759  IID = Intrinsic::x86_avx512_conflict_q_128;
1760  else if (Name[9] == 'q' && VecWidth == 256)
1761  IID = Intrinsic::x86_avx512_conflict_q_256;
1762  else if (Name[9] == 'q' && VecWidth == 512)
1763  IID = Intrinsic::x86_avx512_conflict_q_512;
1764  else
1765  llvm_unreachable("Unexpected intrinsic");
1766  } else if (Name.startswith("pavg.")) {
1767  if (Name[5] == 'b' && VecWidth == 128)
1768  IID = Intrinsic::x86_sse2_pavg_b;
1769  else if (Name[5] == 'b' && VecWidth == 256)
1770  IID = Intrinsic::x86_avx2_pavg_b;
1771  else if (Name[5] == 'b' && VecWidth == 512)
1772  IID = Intrinsic::x86_avx512_pavg_b_512;
1773  else if (Name[5] == 'w' && VecWidth == 128)
1774  IID = Intrinsic::x86_sse2_pavg_w;
1775  else if (Name[5] == 'w' && VecWidth == 256)
1776  IID = Intrinsic::x86_avx2_pavg_w;
1777  else if (Name[5] == 'w' && VecWidth == 512)
1778  IID = Intrinsic::x86_avx512_pavg_w_512;
1779  else
1780  llvm_unreachable("Unexpected intrinsic");
1781  } else
1782  return false;
1783 
1785  Args.pop_back();
1786  Args.pop_back();
1787  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1788  Args);
1789  unsigned NumArgs = CI.getNumArgOperands();
1790  Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1791  CI.getArgOperand(NumArgs - 2));
1792  return true;
1793 }
1794 
1795 /// Upgrade comment in call to inline asm that represents an objc retain release
1796 /// marker.
1797 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1798  size_t Pos;
1799  if (AsmStr->find("mov\tfp") == 0 &&
1800  AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1801  (Pos = AsmStr->find("# marker")) != std::string::npos) {
1802  AsmStr->replace(Pos, 1, ";");
1803  }
1804 }
1805 
1806 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1807 /// provided to seamlessly integrate with existing context.
1809  Function *F = CI->getCalledFunction();
1810  LLVMContext &C = CI->getContext();
1812  Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1813 
1814  assert(F && "Intrinsic call is not direct?");
1815 
1816  if (!NewFn) {
1817  // Get the Function's name.
1818  StringRef Name = F->getName();
1819 
1820  assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1821  Name = Name.substr(5);
1822 
1823  bool IsX86 = Name.startswith("x86.");
1824  if (IsX86)
1825  Name = Name.substr(4);
1826  bool IsNVVM = Name.startswith("nvvm.");
1827  if (IsNVVM)
1828  Name = Name.substr(5);
1829 
1830  if (IsX86 && Name.startswith("sse4a.movnt.")) {
1831  Module *M = F->getParent();
1833  Elts.push_back(
1835  MDNode *Node = MDNode::get(C, Elts);
1836 
1837  Value *Arg0 = CI->getArgOperand(0);
1838  Value *Arg1 = CI->getArgOperand(1);
1839 
1840  // Nontemporal (unaligned) store of the 0'th element of the float/double
1841  // vector.
1842  Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1843  PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1844  Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1845  Value *Extract =
1846  Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1847 
1848  StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
1849  SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1850 
1851  // Remove intrinsic.
1852  CI->eraseFromParent();
1853  return;
1854  }
1855 
1856  if (IsX86 && (Name.startswith("avx.movnt.") ||
1857  Name.startswith("avx512.storent."))) {
1858  Module *M = F->getParent();
1860  Elts.push_back(
1862  MDNode *Node = MDNode::get(C, Elts);
1863 
1864  Value *Arg0 = CI->getArgOperand(0);
1865  Value *Arg1 = CI->getArgOperand(1);
1866 
1867  // Convert the type of the pointer to a pointer to the stored type.
1868  Value *BC = Builder.CreateBitCast(Arg0,
1870  "cast");
1871  StoreInst *SI = Builder.CreateAlignedStore(
1872  Arg1, BC,
1873  Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
1874  SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1875 
1876  // Remove intrinsic.
1877  CI->eraseFromParent();
1878  return;
1879  }
1880 
1881  if (IsX86 && Name == "sse2.storel.dq") {
1882  Value *Arg0 = CI->getArgOperand(0);
1883  Value *Arg1 = CI->getArgOperand(1);
1884 
1885  auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
1886  Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1887  Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1888  Value *BC = Builder.CreateBitCast(Arg0,
1890  "cast");
1891  Builder.CreateAlignedStore(Elt, BC, Align(1));
1892 
1893  // Remove intrinsic.
1894  CI->eraseFromParent();
1895  return;
1896  }
1897 
1898  if (IsX86 && (Name.startswith("sse.storeu.") ||
1899  Name.startswith("sse2.storeu.") ||
1900  Name.startswith("avx.storeu."))) {
1901  Value *Arg0 = CI->getArgOperand(0);
1902  Value *Arg1 = CI->getArgOperand(1);
1903 
1904  Arg0 = Builder.CreateBitCast(Arg0,
1906  "cast");
1907  Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
1908 
1909  // Remove intrinsic.
1910  CI->eraseFromParent();
1911  return;
1912  }
1913 
1914  if (IsX86 && Name == "avx512.mask.store.ss") {
1915  Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1917  Mask, false);
1918 
1919  // Remove intrinsic.
1920  CI->eraseFromParent();
1921  return;
1922  }
1923 
1924  if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1925  // "avx512.mask.storeu." or "avx512.mask.store."
1926  bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1928  CI->getArgOperand(2), Aligned);
1929 
1930  // Remove intrinsic.
1931  CI->eraseFromParent();
1932  return;
1933  }
1934 
1935  Value *Rep;
1936  // Upgrade packed integer vector compare intrinsics to compare instructions.
1937  if (IsX86 && (Name.startswith("sse2.pcmp") ||
1938  Name.startswith("avx2.pcmp"))) {
1939  // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1940  bool CmpEq = Name[9] == 'e';
1941  Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1942  CI->getArgOperand(0), CI->getArgOperand(1));
1943  Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1944  } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1945  Type *ExtTy = Type::getInt32Ty(C);
1946  if (CI->getOperand(0)->getType()->isIntegerTy(8))
1947  ExtTy = Type::getInt64Ty(C);
1948  unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1949  ExtTy->getPrimitiveSizeInBits();
1950  Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1951  Rep = Builder.CreateVectorSplat(NumElts, Rep);
1952  } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1953  Name == "sse2.sqrt.sd")) {
1954  Value *Vec = CI->getArgOperand(0);
1955  Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1956  Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1957  Intrinsic::sqrt, Elt0->getType());
1958  Elt0 = Builder.CreateCall(Intr, Elt0);
1959  Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1960  } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1961  Name.startswith("sse2.sqrt.p") ||
1962  Name.startswith("sse.sqrt.p"))) {
1963  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1964  Intrinsic::sqrt,
1965  CI->getType()),
1966  {CI->getArgOperand(0)});
1967  } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1968  if (CI->getNumArgOperands() == 4 &&
1969  (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1970  cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1971  Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1972  : Intrinsic::x86_avx512_sqrt_pd_512;
1973 
1974  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1975  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1976  IID), Args);
1977  } else {
1978  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1979  Intrinsic::sqrt,
1980  CI->getType()),
1981  {CI->getArgOperand(0)});
1982  }
1983  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1984  CI->getArgOperand(1));
1985  } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1986  Name.startswith("avx512.ptestnm"))) {
1987  Value *Op0 = CI->getArgOperand(0);
1988  Value *Op1 = CI->getArgOperand(1);
1989  Value *Mask = CI->getArgOperand(2);
1990  Rep = Builder.CreateAnd(Op0, Op1);
1991  llvm::Type *Ty = Op0->getType();
1992  Value *Zero = llvm::Constant::getNullValue(Ty);
1993  ICmpInst::Predicate Pred =
1994  Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1995  Rep = Builder.CreateICmp(Pred, Rep, Zero);
1996  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1997  } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1998  unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
1999  ->getNumElements();
2000  Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2001  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2002  CI->getArgOperand(1));
2003  } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
2004  unsigned NumElts = CI->getType()->getScalarSizeInBits();
2005  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2006  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2007  int Indices[64];
2008  for (unsigned i = 0; i != NumElts; ++i)
2009  Indices[i] = i;
2010 
2011  // First extract half of each vector. This gives better codegen than
2012  // doing it in a single shuffle.
2013  LHS = Builder.CreateShuffleVector(LHS, LHS,
2014  makeArrayRef(Indices, NumElts / 2));
2015  RHS = Builder.CreateShuffleVector(RHS, RHS,
2016  makeArrayRef(Indices, NumElts / 2));
2017  // Concat the vectors.
2018  // NOTE: Operands have to be swapped to match intrinsic definition.
2019  Rep = Builder.CreateShuffleVector(RHS, LHS,
2020  makeArrayRef(Indices, NumElts));
2021  Rep = Builder.CreateBitCast(Rep, CI->getType());
2022  } else if (IsX86 && Name == "avx512.kand.w") {
2023  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2024  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2025  Rep = Builder.CreateAnd(LHS, RHS);
2026  Rep = Builder.CreateBitCast(Rep, CI->getType());
2027  } else if (IsX86 && Name == "avx512.kandn.w") {
2028  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2029  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2030  LHS = Builder.CreateNot(LHS);
2031  Rep = Builder.CreateAnd(LHS, RHS);
2032  Rep = Builder.CreateBitCast(Rep, CI->getType());
2033  } else if (IsX86 && Name == "avx512.kor.w") {
2034  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2035  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2036  Rep = Builder.CreateOr(LHS, RHS);
2037  Rep = Builder.CreateBitCast(Rep, CI->getType());
2038  } else if (IsX86 && Name == "avx512.kxor.w") {
2039  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2040  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2041  Rep = Builder.CreateXor(LHS, RHS);
2042  Rep = Builder.CreateBitCast(Rep, CI->getType());
2043  } else if (IsX86 && Name == "avx512.kxnor.w") {
2044  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2045  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2046  LHS = Builder.CreateNot(LHS);
2047  Rep = Builder.CreateXor(LHS, RHS);
2048  Rep = Builder.CreateBitCast(Rep, CI->getType());
2049  } else if (IsX86 && Name == "avx512.knot.w") {
2050  Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2051  Rep = Builder.CreateNot(Rep);
2052  Rep = Builder.CreateBitCast(Rep, CI->getType());
2053  } else if (IsX86 &&
2054  (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2055  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2056  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2057  Rep = Builder.CreateOr(LHS, RHS);
2058  Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2059  Value *C;
2060  if (Name[14] == 'c')
2061  C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2062  else
2063  C = ConstantInt::getNullValue(Builder.getInt16Ty());
2064  Rep = Builder.CreateICmpEQ(Rep, C);
2065  Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2066  } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2067  Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2068  Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2069  Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2070  Type *I32Ty = Type::getInt32Ty(C);
2071  Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2072  ConstantInt::get(I32Ty, 0));
2073  Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2074  ConstantInt::get(I32Ty, 0));
2075  Value *EltOp;
2076  if (Name.contains(".add."))
2077  EltOp = Builder.CreateFAdd(Elt0, Elt1);
2078  else if (Name.contains(".sub."))
2079  EltOp = Builder.CreateFSub(Elt0, Elt1);
2080  else if (Name.contains(".mul."))
2081  EltOp = Builder.CreateFMul(Elt0, Elt1);
2082  else
2083  EltOp = Builder.CreateFDiv(Elt0, Elt1);
2084  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2085  ConstantInt::get(I32Ty, 0));
2086  } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
2087  // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2088  bool CmpEq = Name[16] == 'e';
2089  Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2090  } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
2091  Type *OpTy = CI->getArgOperand(0)->getType();
2092  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2093  Intrinsic::ID IID;
2094  switch (VecWidth) {
2095  default: llvm_unreachable("Unexpected intrinsic");
2096  case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2097  case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2098  case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2099  }
2100 
2101  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2102  { CI->getOperand(0), CI->getArgOperand(1) });
2103  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2104  } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
2105  Type *OpTy = CI->getArgOperand(0)->getType();
2106  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2107  unsigned EltWidth = OpTy->getScalarSizeInBits();
2108  Intrinsic::ID IID;
2109  if (VecWidth == 128 && EltWidth == 32)
2110  IID = Intrinsic::x86_avx512_fpclass_ps_128;
2111  else if (VecWidth == 256 && EltWidth == 32)
2112  IID = Intrinsic::x86_avx512_fpclass_ps_256;
2113  else if (VecWidth == 512 && EltWidth == 32)
2114  IID = Intrinsic::x86_avx512_fpclass_ps_512;
2115  else if (VecWidth == 128 && EltWidth == 64)
2116  IID = Intrinsic::x86_avx512_fpclass_pd_128;
2117  else if (VecWidth == 256 && EltWidth == 64)
2118  IID = Intrinsic::x86_avx512_fpclass_pd_256;
2119  else if (VecWidth == 512 && EltWidth == 64)
2120  IID = Intrinsic::x86_avx512_fpclass_pd_512;
2121  else
2122  llvm_unreachable("Unexpected intrinsic");
2123 
2124  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2125  { CI->getOperand(0), CI->getArgOperand(1) });
2126  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2127  } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
2129  Type *OpTy = Args[0]->getType();
2130  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2131  unsigned EltWidth = OpTy->getScalarSizeInBits();
2132  Intrinsic::ID IID;
2133  if (VecWidth == 128 && EltWidth == 32)
2134  IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2135  else if (VecWidth == 256 && EltWidth == 32)
2136  IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2137  else if (VecWidth == 512 && EltWidth == 32)
2138  IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2139  else if (VecWidth == 128 && EltWidth == 64)
2140  IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2141  else if (VecWidth == 256 && EltWidth == 64)
2142  IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2143  else if (VecWidth == 512 && EltWidth == 64)
2144  IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2145  else
2146  llvm_unreachable("Unexpected intrinsic");
2147 
2149  if (VecWidth == 512)
2150  std::swap(Mask, Args.back());
2151  Args.push_back(Mask);
2152 
2153  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2154  Args);
2155  } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
2156  // Integer compare intrinsics.
2157  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2158  Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2159  } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2160  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2161  Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2162  } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2163  Name.startswith("avx512.cvtw2mask.") ||
2164  Name.startswith("avx512.cvtd2mask.") ||
2165  Name.startswith("avx512.cvtq2mask."))) {
2166  Value *Op = CI->getArgOperand(0);
2167  Value *Zero = llvm::Constant::getNullValue(Op->getType());
2168  Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2169  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2170  } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2171  Name == "ssse3.pabs.w.128" ||
2172  Name == "ssse3.pabs.d.128" ||
2173  Name.startswith("avx2.pabs") ||
2174  Name.startswith("avx512.mask.pabs"))) {
2175  Rep = upgradeAbs(Builder, *CI);
2176  } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2177  Name == "sse2.pmaxs.w" ||
2178  Name == "sse41.pmaxsd" ||
2179  Name.startswith("avx2.pmaxs") ||
2180  Name.startswith("avx512.mask.pmaxs"))) {
2182  } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2183  Name == "sse41.pmaxuw" ||
2184  Name == "sse41.pmaxud" ||
2185  Name.startswith("avx2.pmaxu") ||
2186  Name.startswith("avx512.mask.pmaxu"))) {
2188  } else if (IsX86 && (Name == "sse41.pminsb" ||
2189  Name == "sse2.pmins.w" ||
2190  Name == "sse41.pminsd" ||
2191  Name.startswith("avx2.pmins") ||
2192  Name.startswith("avx512.mask.pmins"))) {
2194  } else if (IsX86 && (Name == "sse2.pminu.b" ||
2195  Name == "sse41.pminuw" ||
2196  Name == "sse41.pminud" ||
2197  Name.startswith("avx2.pminu") ||
2198  Name.startswith("avx512.mask.pminu"))) {
2200  } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2201  Name == "avx2.pmulu.dq" ||
2202  Name == "avx512.pmulu.dq.512" ||
2203  Name.startswith("avx512.mask.pmulu.dq."))) {
2204  Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2205  } else if (IsX86 && (Name == "sse41.pmuldq" ||
2206  Name == "avx2.pmul.dq" ||
2207  Name == "avx512.pmul.dq.512" ||
2208  Name.startswith("avx512.mask.pmul.dq."))) {
2209  Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2210  } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2211  Name == "sse2.cvtsi2sd" ||
2212  Name == "sse.cvtsi642ss" ||
2213  Name == "sse2.cvtsi642sd")) {
2214  Rep = Builder.CreateSIToFP(
2215  CI->getArgOperand(1),
2216  cast<VectorType>(CI->getType())->getElementType());
2217  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2218  } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2219  Rep = Builder.CreateUIToFP(
2220  CI->getArgOperand(1),
2221  cast<VectorType>(CI->getType())->getElementType());
2222  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2223  } else if (IsX86 && Name == "sse2.cvtss2sd") {
2224  Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2225  Rep = Builder.CreateFPExt(
2226  Rep, cast<VectorType>(CI->getType())->getElementType());
2227  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2228  } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2229  Name == "sse2.cvtdq2ps" ||
2230  Name == "avx.cvtdq2.pd.256" ||
2231  Name == "avx.cvtdq2.ps.256" ||
2232  Name.startswith("avx512.mask.cvtdq2pd.") ||
2233  Name.startswith("avx512.mask.cvtudq2pd.") ||
2234  Name.startswith("avx512.mask.cvtdq2ps.") ||
2235  Name.startswith("avx512.mask.cvtudq2ps.") ||
2236  Name.startswith("avx512.mask.cvtqq2pd.") ||
2237  Name.startswith("avx512.mask.cvtuqq2pd.") ||
2238  Name == "avx512.mask.cvtqq2ps.256" ||
2239  Name == "avx512.mask.cvtqq2ps.512" ||
2240  Name == "avx512.mask.cvtuqq2ps.256" ||
2241  Name == "avx512.mask.cvtuqq2ps.512" ||
2242  Name == "sse2.cvtps2pd" ||
2243  Name == "avx.cvt.ps2.pd.256" ||
2244  Name == "avx512.mask.cvtps2pd.128" ||
2245  Name == "avx512.mask.cvtps2pd.256")) {
2246  auto *DstTy = cast<FixedVectorType>(CI->getType());
2247  Rep = CI->getArgOperand(0);
2248  auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2249 
2250  unsigned NumDstElts = DstTy->getNumElements();
2251  if (NumDstElts < SrcTy->getNumElements()) {
2252  assert(NumDstElts == 2 && "Unexpected vector size");
2253  Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2254  }
2255 
2256  bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2257  bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2258  if (IsPS2PD)
2259  Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2260  else if (CI->getNumArgOperands() == 4 &&
2261  (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2262  cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2263  Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2264  : Intrinsic::x86_avx512_sitofp_round;
2266  { DstTy, SrcTy });
2267  Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2268  } else {
2269  Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2270  : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2271  }
2272 
2273  if (CI->getNumArgOperands() >= 3)
2274  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2275  CI->getArgOperand(1));
2276  } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
2277  Name.startswith("vcvtph2ps."))) {
2278  auto *DstTy = cast<FixedVectorType>(CI->getType());
2279  Rep = CI->getArgOperand(0);
2280  auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2281  unsigned NumDstElts = DstTy->getNumElements();
2282  if (NumDstElts != SrcTy->getNumElements()) {
2283  assert(NumDstElts == 4 && "Unexpected vector size");
2284  Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2285  }
2286  Rep = Builder.CreateBitCast(
2287  Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2288  Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2289  if (CI->getNumArgOperands() >= 3)
2290  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2291  CI->getArgOperand(1));
2292  } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
2293  Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2294  CI->getArgOperand(1), CI->getArgOperand(2),
2295  /*Aligned*/false);
2296  } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
2297  Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2298  CI->getArgOperand(1),CI->getArgOperand(2),
2299  /*Aligned*/true);
2300  } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2301  auto *ResultTy = cast<FixedVectorType>(CI->getType());
2302  Type *PtrTy = ResultTy->getElementType();
2303 
2304  // Cast the pointer to element type.
2305  Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2307 
2308  Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2309  ResultTy->getNumElements());
2310 
2311  Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2312  Intrinsic::masked_expandload,
2313  ResultTy);
2314  Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2315  } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2316  auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2317  Type *PtrTy = ResultTy->getElementType();
2318 
2319  // Cast the pointer to element type.
2320  Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2322 
2323  Value *MaskVec =
2325  cast<FixedVectorType>(ResultTy)->getNumElements());
2326 
2327  Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2328  Intrinsic::masked_compressstore,
2329  ResultTy);
2330  Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2331  } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2332  Name.startswith("avx512.mask.expand."))) {
2333  auto *ResultTy = cast<FixedVectorType>(CI->getType());
2334 
2335  Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2336  ResultTy->getNumElements());
2337 
2338  bool IsCompress = Name[12] == 'c';
2339  Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2340  : Intrinsic::x86_avx512_mask_expand;
2341  Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2342  Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2343  MaskVec });
2344  } else if (IsX86 && Name.startswith("xop.vpcom")) {
2345  bool IsSigned;
2346  if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2347  Name.endswith("uq"))
2348  IsSigned = false;
2349  else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2350  Name.endswith("q"))
2351  IsSigned = true;
2352  else
2353  llvm_unreachable("Unknown suffix");
2354 
2355  unsigned Imm;
2356  if (CI->getNumArgOperands() == 3) {
2357  Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2358  } else {
2359  Name = Name.substr(9); // strip off "xop.vpcom"
2360  if (Name.startswith("lt"))
2361  Imm = 0;
2362  else if (Name.startswith("le"))
2363  Imm = 1;
2364  else if (Name.startswith("gt"))
2365  Imm = 2;
2366  else if (Name.startswith("ge"))
2367  Imm = 3;
2368  else if (Name.startswith("eq"))
2369  Imm = 4;
2370  else if (Name.startswith("ne"))
2371  Imm = 5;
2372  else if (Name.startswith("false"))
2373  Imm = 6;
2374  else if (Name.startswith("true"))
2375  Imm = 7;
2376  else
2377  llvm_unreachable("Unknown condition");
2378  }
2379 
2380  Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2381  } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2382  Value *Sel = CI->getArgOperand(2);
2383  Value *NotSel = Builder.CreateNot(Sel);
2384  Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2385  Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2386  Rep = Builder.CreateOr(Sel0, Sel1);
2387  } else if (IsX86 && (Name.startswith("xop.vprot") ||
2388  Name.startswith("avx512.prol") ||
2389  Name.startswith("avx512.mask.prol"))) {
2390  Rep = upgradeX86Rotate(Builder, *CI, false);
2391  } else if (IsX86 && (Name.startswith("avx512.pror") ||
2392  Name.startswith("avx512.mask.pror"))) {
2393  Rep = upgradeX86Rotate(Builder, *CI, true);
2394  } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2395  Name.startswith("avx512.mask.vpshld") ||
2396  Name.startswith("avx512.maskz.vpshld"))) {
2397  bool ZeroMask = Name[11] == 'z';
2398  Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2399  } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2400  Name.startswith("avx512.mask.vpshrd") ||
2401  Name.startswith("avx512.maskz.vpshrd"))) {
2402  bool ZeroMask = Name[11] == 'z';
2403  Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2404  } else if (IsX86 && Name == "sse42.crc32.64.8") {
2405  Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2406  Intrinsic::x86_sse42_crc32_32_8);
2407  Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2408  Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2409  Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2410  } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2411  Name.startswith("avx512.vbroadcast.s"))) {
2412  // Replace broadcasts with a series of insertelements.
2413  auto *VecTy = cast<FixedVectorType>(CI->getType());
2414  Type *EltTy = VecTy->getElementType();
2415  unsigned EltNum = VecTy->getNumElements();
2416  Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2417  EltTy->getPointerTo());
2418  Value *Load = Builder.CreateLoad(EltTy, Cast);
2419  Type *I32Ty = Type::getInt32Ty(C);
2420  Rep = UndefValue::get(VecTy);
2421  for (unsigned I = 0; I < EltNum; ++I)
2422  Rep = Builder.CreateInsertElement(Rep, Load,
2423  ConstantInt::get(I32Ty, I));
2424  } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2425  Name.startswith("sse41.pmovzx") ||
2426  Name.startswith("avx2.pmovsx") ||
2427  Name.startswith("avx2.pmovzx") ||
2428  Name.startswith("avx512.mask.pmovsx") ||
2429  Name.startswith("avx512.mask.pmovzx"))) {
2430  auto *DstTy = cast<FixedVectorType>(CI->getType());
2431  unsigned NumDstElts = DstTy->getNumElements();
2432 
2433  // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2434  SmallVector<int, 8> ShuffleMask(NumDstElts);
2435  for (unsigned i = 0; i != NumDstElts; ++i)
2436  ShuffleMask[i] = i;
2437 
2438  Value *SV =
2439  Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2440 
2441  bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2442  Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2443  : Builder.CreateZExt(SV, DstTy);
2444  // If there are 3 arguments, it's a masked intrinsic so we need a select.
2445  if (CI->getNumArgOperands() == 3)
2446  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2447  CI->getArgOperand(1));
2448  } else if (Name == "avx512.mask.pmov.qd.256" ||
2449  Name == "avx512.mask.pmov.qd.512" ||
2450  Name == "avx512.mask.pmov.wb.256" ||
2451  Name == "avx512.mask.pmov.wb.512") {
2452  Type *Ty = CI->getArgOperand(1)->getType();
2453  Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2454  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2455  CI->getArgOperand(1));
2456  } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2457  Name == "avx2.vbroadcasti128")) {
2458  // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2459  Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2460  unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2461  auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2462  Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2464  Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2465  if (NumSrcElts == 2)
2466  Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2467  else
2468  Rep = Builder.CreateShuffleVector(
2469  Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2470  } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2471  Name.startswith("avx512.mask.shuf.f"))) {
2472  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2473  Type *VT = CI->getType();
2474  unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2475  unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2476  unsigned ControlBitsMask = NumLanes - 1;
2477  unsigned NumControlBits = NumLanes / 2;
2478  SmallVector<int, 8> ShuffleMask(0);
2479 
2480  for (unsigned l = 0; l != NumLanes; ++l) {
2481  unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2482  // We actually need the other source.
2483  if (l >= NumLanes / 2)
2484  LaneMask += NumLanes;
2485  for (unsigned i = 0; i != NumElementsInLane; ++i)
2486  ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2487  }
2488  Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2489  CI->getArgOperand(1), ShuffleMask);
2490  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2491  CI->getArgOperand(3));
2492  }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2493  Name.startswith("avx512.mask.broadcasti"))) {
2494  unsigned NumSrcElts =
2495  cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2496  ->getNumElements();
2497  unsigned NumDstElts =
2498  cast<FixedVectorType>(CI->getType())->getNumElements();
2499 
2500  SmallVector<int, 8> ShuffleMask(NumDstElts);
2501  for (unsigned i = 0; i != NumDstElts; ++i)
2502  ShuffleMask[i] = i % NumSrcElts;
2503 
2504  Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2505  CI->getArgOperand(0),
2506  ShuffleMask);
2507  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2508  CI->getArgOperand(1));
2509  } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2510  Name.startswith("avx2.vbroadcast") ||
2511  Name.startswith("avx512.pbroadcast") ||
2512  Name.startswith("avx512.mask.broadcast.s"))) {
2513  // Replace vp?broadcasts with a vector shuffle.
2514  Value *Op = CI->getArgOperand(0);
2515  ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2516  Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2519  Rep = Builder.CreateShuffleVector(Op, M);
2520 
2521  if (CI->getNumArgOperands() == 3)
2522  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2523  CI->getArgOperand(1));
2524  } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2525  Name.startswith("avx2.padds.") ||
2526  Name.startswith("avx512.padds.") ||
2527  Name.startswith("avx512.mask.padds."))) {
2528  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2529  } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
2530  Name.startswith("avx2.psubs.") ||
2531  Name.startswith("avx512.psubs.") ||
2532  Name.startswith("avx512.mask.psubs."))) {
2533  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2534  } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2535  Name.startswith("avx2.paddus.") ||
2536  Name.startswith("avx512.mask.paddus."))) {
2537  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
2538  } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
2539  Name.startswith("avx2.psubus.") ||
2540  Name.startswith("avx512.mask.psubus."))) {
2541  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
2542  } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2544  CI->getArgOperand(1),
2545  CI->getArgOperand(2),
2546  CI->getArgOperand(3),
2547  CI->getArgOperand(4),
2548  false);
2549  } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2551  CI->getArgOperand(1),
2552  CI->getArgOperand(2),
2553  CI->getArgOperand(3),
2554  CI->getArgOperand(4),
2555  true);
2556  } else if (IsX86 && (Name == "sse2.psll.dq" ||
2557  Name == "avx2.psll.dq")) {
2558  // 128/256-bit shift left specified in bits.
2559  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2561  Shift / 8); // Shift is in bits.
2562  } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2563  Name == "avx2.psrl.dq")) {
2564  // 128/256-bit shift right specified in bits.
2565  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2567  Shift / 8); // Shift is in bits.
2568  } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2569  Name == "avx2.psll.dq.bs" ||
2570  Name == "avx512.psll.dq.512")) {
2571  // 128/256/512-bit shift left specified in bytes.
2572  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2574  } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2575  Name == "avx2.psrl.dq.bs" ||
2576  Name == "avx512.psrl.dq.512")) {
2577  // 128/256/512-bit shift right specified in bytes.
2578  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2580  } else if (IsX86 && (Name == "sse41.pblendw" ||
2581  Name.startswith("sse41.blendp") ||
2582  Name.startswith("avx.blend.p") ||
2583  Name == "avx2.pblendw" ||
2584  Name.startswith("avx2.pblendd."))) {
2585  Value *Op0 = CI->getArgOperand(0);
2586  Value *Op1 = CI->getArgOperand(1);
2587  unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2588  auto *VecTy = cast<FixedVectorType>(CI->getType());
2589  unsigned NumElts = VecTy->getNumElements();
2590 
2591  SmallVector<int, 16> Idxs(NumElts);
2592  for (unsigned i = 0; i != NumElts; ++i)
2593  Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2594 
2595  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2596  } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2597  Name == "avx2.vinserti128" ||
2598  Name.startswith("avx512.mask.insert"))) {
2599  Value *Op0 = CI->getArgOperand(0);
2600  Value *Op1 = CI->getArgOperand(1);
2601  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2602  unsigned DstNumElts =
2603  cast<FixedVectorType>(CI->getType())->getNumElements();
2604  unsigned SrcNumElts =
2605  cast<FixedVectorType>(Op1->getType())->getNumElements();
2606  unsigned Scale = DstNumElts / SrcNumElts;
2607 
2608  // Mask off the high bits of the immediate value; hardware ignores those.
2609  Imm = Imm % Scale;
2610 
2611  // Extend the second operand into a vector the size of the destination.
2612  SmallVector<int, 8> Idxs(DstNumElts);
2613  for (unsigned i = 0; i != SrcNumElts; ++i)
2614  Idxs[i] = i;
2615  for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2616  Idxs[i] = SrcNumElts;
2617  Rep = Builder.CreateShuffleVector(Op1, Idxs);
2618 
2619  // Insert the second operand into the first operand.
2620 
2621  // Note that there is no guarantee that instruction lowering will actually
2622  // produce a vinsertf128 instruction for the created shuffles. In
2623  // particular, the 0 immediate case involves no lane changes, so it can
2624  // be handled as a blend.
2625 
2626  // Example of shuffle mask for 32-bit elements:
2627  // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2628  // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2629 
2630  // First fill with identify mask.
2631  for (unsigned i = 0; i != DstNumElts; ++i)
2632  Idxs[i] = i;
2633  // Then replace the elements where we need to insert.
2634  for (unsigned i = 0; i != SrcNumElts; ++i)
2635  Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2636  Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2637 
2638  // If the intrinsic has a mask operand, handle that.
2639  if (CI->getNumArgOperands() == 5)
2640  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2641  CI->getArgOperand(3));
2642  } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2643  Name == "avx2.vextracti128" ||
2644  Name.startswith("avx512.mask.vextract"))) {
2645  Value *Op0 = CI->getArgOperand(0);
2646  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2647  unsigned DstNumElts =
2648  cast<FixedVectorType>(CI->getType())->getNumElements();
2649  unsigned SrcNumElts =
2650  cast<FixedVectorType>(Op0->getType())->getNumElements();
2651  unsigned Scale = SrcNumElts / DstNumElts;
2652 
2653  // Mask off the high bits of the immediate value; hardware ignores those.
2654  Imm = Imm % Scale;
2655 
2656  // Get indexes for the subvector of the input vector.
2657  SmallVector<int, 8> Idxs(DstNumElts);
2658  for (unsigned i = 0; i != DstNumElts; ++i) {
2659  Idxs[i] = i + (Imm * DstNumElts);
2660  }
2661  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2662 
2663  // If the intrinsic has a mask operand, handle that.
2664  if (CI->getNumArgOperands() == 4)
2665  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2666  CI->getArgOperand(2));
2667  } else if (!IsX86 && Name == "stackprotectorcheck") {
2668  Rep = nullptr;
2669  } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2670  Name.startswith("avx512.mask.perm.di."))) {
2671  Value *Op0 = CI->getArgOperand(0);
2672  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2673  auto *VecTy = cast<FixedVectorType>(CI->getType());
2674  unsigned NumElts = VecTy->getNumElements();
2675 
2676  SmallVector<int, 8> Idxs(NumElts);
2677  for (unsigned i = 0; i != NumElts; ++i)
2678  Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2679 
2680  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2681 
2682  if (CI->getNumArgOperands() == 4)
2683  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2684  CI->getArgOperand(2));
2685  } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2686  Name == "avx2.vperm2i128")) {
2687  // The immediate permute control byte looks like this:
2688  // [1:0] - select 128 bits from sources for low half of destination
2689  // [2] - ignore
2690  // [3] - zero low half of destination
2691  // [5:4] - select 128 bits from sources for high half of destination
2692  // [6] - ignore
2693  // [7] - zero high half of destination
2694 
2695  uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2696 
2697  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2698  unsigned HalfSize = NumElts / 2;
2699  SmallVector<int, 8> ShuffleMask(NumElts);
2700 
2701  // Determine which operand(s) are actually in use for this instruction.
2702  Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2703  Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2704 
2705  // If needed, replace operands based on zero mask.
2706  V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2707  V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2708 
2709  // Permute low half of result.
2710  unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2711  for (unsigned i = 0; i < HalfSize; ++i)
2712  ShuffleMask[i] = StartIndex + i;
2713 
2714  // Permute high half of result.
2715  StartIndex = (Imm & 0x10) ? HalfSize : 0;
2716  for (unsigned i = 0; i < HalfSize; ++i)
2717  ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2718 
2719  Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2720 
2721  } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2722  Name == "sse2.pshuf.d" ||
2723  Name.startswith("avx512.mask.vpermil.p") ||
2724  Name.startswith("avx512.mask.pshuf.d."))) {
2725  Value *Op0 = CI->getArgOperand(0);
2726  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2727  auto *VecTy = cast<FixedVectorType>(CI->getType());
2728  unsigned NumElts = VecTy->getNumElements();
2729  // Calculate the size of each index in the immediate.
2730  unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2731  unsigned IdxMask = ((1 << IdxSize) - 1);
2732 
2733  SmallVector<int, 8> Idxs(NumElts);
2734  // Lookup the bits for this element, wrapping around the immediate every
2735  // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2736  // to offset by the first index of each group.
2737  for (unsigned i = 0; i != NumElts; ++i)
2738  Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2739 
2740  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2741 
2742  if (CI->getNumArgOperands() == 4)
2743  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2744  CI->getArgOperand(2));
2745  } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2746  Name.startswith("avx512.mask.pshufl.w."))) {
2747  Value *Op0 = CI->getArgOperand(0);
2748  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2749  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2750 
2751  SmallVector<int, 16> Idxs(NumElts);
2752  for (unsigned l = 0; l != NumElts; l += 8) {
2753  for (unsigned i = 0; i != 4; ++i)
2754  Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2755  for (unsigned i = 4; i != 8; ++i)
2756  Idxs[i + l] = i + l;
2757  }
2758 
2759  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2760 
2761  if (CI->getNumArgOperands() == 4)
2762  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2763  CI->getArgOperand(2));
2764  } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2765  Name.startswith("avx512.mask.pshufh.w."))) {
2766  Value *Op0 = CI->getArgOperand(0);
2767  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2768  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2769 
2770  SmallVector<int, 16> Idxs(NumElts);
2771  for (unsigned l = 0; l != NumElts; l += 8) {
2772  for (unsigned i = 0; i != 4; ++i)
2773  Idxs[i + l] = i + l;
2774  for (unsigned i = 0; i != 4; ++i)
2775  Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2776  }
2777 
2778  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2779 
2780  if (CI->getNumArgOperands() == 4)
2781  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2782  CI->getArgOperand(2));
2783  } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2784  Value *Op0 = CI->getArgOperand(0);
2785  Value *Op1 = CI->getArgOperand(1);
2786  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2787  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2788 
2789  unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2790  unsigned HalfLaneElts = NumLaneElts / 2;
2791 
2792  SmallVector<int, 16> Idxs(NumElts);
2793  for (unsigned i = 0; i != NumElts; ++i) {
2794  // Base index is the starting element of the lane.
2795  Idxs[i] = i - (i % NumLaneElts);
2796  // If we are half way through the lane switch to the other source.
2797  if ((i % NumLaneElts) >= HalfLaneElts)
2798  Idxs[i] += NumElts;
2799  // Now select the specific element. By adding HalfLaneElts bits from
2800  // the immediate. Wrapping around the immediate every 8-bits.
2801  Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2802  }
2803 
2804  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2805 
2806  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2807  CI->getArgOperand(3));
2808  } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2809  Name.startswith("avx512.mask.movshdup") ||
2810  Name.startswith("avx512.mask.movsldup"))) {
2811  Value *Op0 = CI->getArgOperand(0);
2812  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2813  unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2814 
2815  unsigned Offset = 0;
2816  if (Name.startswith("avx512.mask.movshdup."))
2817  Offset = 1;
2818 
2819  SmallVector<int, 16> Idxs(NumElts);
2820  for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2821  for (unsigned i = 0; i != NumLaneElts; i += 2) {
2822  Idxs[i + l + 0] = i + l + Offset;
2823  Idxs[i + l + 1] = i + l + Offset;
2824  }
2825 
2826  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2827 
2828  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2829  CI->getArgOperand(1));
2830  } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2831  Name.startswith("avx512.mask.unpckl."))) {
2832  Value *Op0 = CI->getArgOperand(0);
2833  Value *Op1 = CI->getArgOperand(1);
2834  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2835  int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2836 
2837  SmallVector<int, 64> Idxs(NumElts);
2838  for (int l = 0; l != NumElts; l += NumLaneElts)
2839  for (int i = 0; i != NumLaneElts; ++i)
2840  Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2841 
2842  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2843 
2844  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2845  CI->getArgOperand(2));
2846  } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2847  Name.startswith("avx512.mask.unpckh."))) {
2848  Value *Op0 = CI->getArgOperand(0);
2849  Value *Op1 = CI->getArgOperand(1);
2850  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2851  int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2852 
2853  SmallVector<int, 64> Idxs(NumElts);
2854  for (int l = 0; l != NumElts; l += NumLaneElts)
2855  for (int i = 0; i != NumLaneElts; ++i)
2856  Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2857 
2858  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2859 
2860  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2861  CI->getArgOperand(2));
2862  } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2863  Name.startswith("avx512.mask.pand."))) {
2864  VectorType *FTy = cast<VectorType>(CI->getType());
2865  VectorType *ITy = VectorType::getInteger(FTy);
2866  Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2867  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2868  Rep = Builder.CreateBitCast(Rep, FTy);
2869  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2870  CI->getArgOperand(2));
2871  } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2872  Name.startswith("avx512.mask.pandn."))) {
2873  VectorType *FTy = cast<VectorType>(CI->getType());
2874  VectorType *ITy = VectorType::getInteger(FTy);
2875  Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2876  Rep = Builder.CreateAnd(Rep,
2877  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2878  Rep = Builder.CreateBitCast(Rep, FTy);
2879  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2880  CI->getArgOperand(2));
2881  } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2882  Name.startswith("avx512.mask.por."))) {
2883  VectorType *FTy = cast<VectorType>(CI->getType());
2884  VectorType *ITy = VectorType::getInteger(FTy);
2885  Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2886  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2887  Rep = Builder.CreateBitCast(Rep, FTy);
2888  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2889  CI->getArgOperand(2));
2890  } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2891  Name.startswith("avx512.mask.pxor."))) {
2892  VectorType *FTy = cast<VectorType>(CI->getType());
2893  VectorType *ITy = VectorType::getInteger(FTy);
2894  Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2895  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2896  Rep = Builder.CreateBitCast(Rep, FTy);
2897  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2898  CI->getArgOperand(2));
2899  } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2900  Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2901  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2902  CI->getArgOperand(2));
2903  } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2904  Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2905  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2906  CI->getArgOperand(2));
2907  } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2908  Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2909  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2910  CI->getArgOperand(2));
2911  } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2912  if (Name.endswith(".512")) {
2913  Intrinsic::ID IID;
2914  if (Name[17] == 's')
2915  IID = Intrinsic::x86_avx512_add_ps_512;
2916  else
2917  IID = Intrinsic::x86_avx512_add_pd_512;
2918 
2919  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2920  { CI->getArgOperand(0), CI->getArgOperand(1),
2921  CI->getArgOperand(4) });
2922  } else {
2923  Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2924  }
2925  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2926  CI->getArgOperand(2));
2927  } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2928  if (Name.endswith(".512")) {
2929  Intrinsic::ID IID;
2930  if (Name[17] == 's')
2931  IID = Intrinsic::x86_avx512_div_ps_512;
2932  else
2933  IID = Intrinsic::x86_avx512_div_pd_512;
2934 
2935  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2936  { CI->getArgOperand(0), CI->getArgOperand(1),
2937  CI->getArgOperand(4) });
2938  } else {
2939  Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2940  }
2941  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2942  CI->getArgOperand(2));
2943  } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2944  if (Name.endswith(".512")) {
2945  Intrinsic::ID IID;
2946  if (Name[17] == 's')
2947  IID = Intrinsic::x86_avx512_mul_ps_512;
2948  else
2949  IID = Intrinsic::x86_avx512_mul_pd_512;
2950 
2951  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2952  { CI->getArgOperand(0), CI->getArgOperand(1),
2953  CI->getArgOperand(4) });
2954  } else {
2955  Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2956  }
2957  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2958  CI->getArgOperand(2));
2959  } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2960  if (Name.endswith(".512")) {
2961  Intrinsic::ID IID;
2962  if (Name[17] == 's')
2963  IID = Intrinsic::x86_avx512_sub_ps_512;
2964  else
2965  IID = Intrinsic::x86_avx512_sub_pd_512;
2966 
2967  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2968  { CI->getArgOperand(0), CI->getArgOperand(1),
2969  CI->getArgOperand(4) });
2970  } else {
2971  Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2972  }
2973  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2974  CI->getArgOperand(2));
2975  } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2976  Name.startswith("avx512.mask.min.p")) &&
2977  Name.drop_front(18) == ".512") {
2978  bool IsDouble = Name[17] == 'd';
2979  bool IsMin = Name[13] == 'i';
2980  static const Intrinsic::ID MinMaxTbl[2][2] = {
2981  { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
2982  { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
2983  };
2984  Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2985 
2986  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2987  { CI->getArgOperand(0), CI->getArgOperand(1),
2988  CI->getArgOperand(4) });
2989  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2990  CI->getArgOperand(2));
2991  } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2992  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2993  Intrinsic::ctlz,
2994  CI->getType()),
2995  { CI->getArgOperand(0), Builder.getInt1(false) });
2996  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2997  CI->getArgOperand(1));
2998  } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2999  bool IsImmediate = Name[16] == 'i' ||
3000  (Name.size() > 18 && Name[18] == 'i');
3001  bool IsVariable = Name[16] == 'v';
3002  char Size = Name[16] == '.' ? Name[17] :
3003  Name[17] == '.' ? Name[18] :
3004  Name[18] == '.' ? Name[19] :
3005  Name[20];
3006 
3007  Intrinsic::ID IID;
3008  if (IsVariable && Name[17] != '.') {
3009  if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3010  IID = Intrinsic::x86_avx2_psllv_q;
3011  else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3012  IID = Intrinsic::x86_avx2_psllv_q_256;
3013  else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3014  IID = Intrinsic::x86_avx2_psllv_d;
3015  else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3016  IID = Intrinsic::x86_avx2_psllv_d_256;
3017  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3018  IID = Intrinsic::x86_avx512_psllv_w_128;
3019  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3020  IID = Intrinsic::x86_avx512_psllv_w_256;
3021  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3022  IID = Intrinsic::x86_avx512_psllv_w_512;
3023  else
3024  llvm_unreachable("Unexpected size");
3025  } else if (Name.endswith(".128")) {
3026  if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3027  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3028  : Intrinsic::x86_sse2_psll_d;
3029  else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3030  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3031  : Intrinsic::x86_sse2_psll_q;
3032  else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3033  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3034  : Intrinsic::x86_sse2_psll_w;
3035  else
3036  llvm_unreachable("Unexpected size");
3037  } else if (Name.endswith(".256")) {
3038  if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3039  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3040  : Intrinsic::x86_avx2_psll_d;
3041  else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3042  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3043  : Intrinsic::x86_avx2_psll_q;
3044  else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3045  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3046  : Intrinsic::x86_avx2_psll_w;
3047  else
3048  llvm_unreachable("Unexpected size");
3049  } else {
3050  if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3051  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3052  IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
3053  Intrinsic::x86_avx512_psll_d_512;
3054  else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3055  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3056  IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
3057  Intrinsic::x86_avx512_psll_q_512;
3058  else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3059  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3060  : Intrinsic::x86_avx512_psll_w_512;
3061  else
3062  llvm_unreachable("Unexpected size");
3063  }
3064 
3065  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3066  } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
3067  bool IsImmediate = Name[16] == 'i' ||
3068  (Name.size() > 18 && Name[18] == 'i');
3069  bool IsVariable = Name[16] == 'v';
3070  char Size = Name[16] == '.' ? Name[17] :
3071  Name[17] == '.' ? Name[18] :
3072  Name[18] == '.' ? Name[19] :
3073  Name[20];
3074 
3075  Intrinsic::ID IID;
3076  if (IsVariable && Name[17] != '.') {
3077  if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3078  IID = Intrinsic::x86_avx2_psrlv_q;
3079  else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3080  IID = Intrinsic::x86_avx2_psrlv_q_256;
3081  else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3082  IID = Intrinsic::x86_avx2_psrlv_d;
3083  else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3084  IID = Intrinsic::x86_avx2_psrlv_d_256;
3085  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3086  IID = Intrinsic::x86_avx512_psrlv_w_128;
3087  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3088  IID = Intrinsic::x86_avx512_psrlv_w_256;
3089  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3090  IID = Intrinsic::x86_avx512_psrlv_w_512;
3091  else
3092  llvm_unreachable("Unexpected size");
3093  } else if (Name.endswith(".128")) {
3094  if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3095  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3096  : Intrinsic::x86_sse2_psrl_d;
3097  else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3098  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3099  : Intrinsic::x86_sse2_psrl_q;
3100  else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3101  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3102  : Intrinsic::x86_sse2_psrl_w;
3103  else
3104  llvm_unreachable("Unexpected size");
3105  } else if (Name.endswith(".256")) {
3106  if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3107  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3108  : Intrinsic::x86_avx2_psrl_d;
3109  else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3110  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3111  : Intrinsic::x86_avx2_psrl_q;
3112  else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3113  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3114  : Intrinsic::x86_avx2_psrl_w;
3115  else
3116  llvm_unreachable("Unexpected size");
3117  } else {
3118  if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3119  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3120  IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
3121  Intrinsic::x86_avx512_psrl_d_512;
3122  else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3123  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3124  IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
3125  Intrinsic::x86_avx512_psrl_q_512;
3126  else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3127  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3128  : Intrinsic::x86_avx512_psrl_w_512;
3129  else
3130  llvm_unreachable("Unexpected size");
3131  }
3132 
3133  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3134  } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
3135  bool IsImmediate = Name[16] == 'i' ||
3136  (Name.size() > 18 && Name[18] == 'i');
3137  bool IsVariable = Name[16] == 'v';
3138  char Size = Name[16] == '.' ? Name[17] :
3139  Name[17] == '.' ? Name[18] :
3140  Name[18] == '.' ? Name[19] :
3141  Name[20];
3142 
3143  Intrinsic::ID IID;
3144  if (IsVariable && Name[17] != '.') {
3145  if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3146  IID = Intrinsic::x86_avx2_psrav_d;
3147  else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3148  IID = Intrinsic::x86_avx2_psrav_d_256;
3149  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3150  IID = Intrinsic::x86_avx512_psrav_w_128;
3151  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3152  IID = Intrinsic::x86_avx512_psrav_w_256;
3153  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3154  IID = Intrinsic::x86_avx512_psrav_w_512;
3155  else
3156  llvm_unreachable("Unexpected size");
3157  } else if (Name.endswith(".128")) {
3158  if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3159  IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3160  : Intrinsic::x86_sse2_psra_d;
3161  else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3162  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3163  IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
3164  Intrinsic::x86_avx512_psra_q_128;
3165  else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3166  IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3167  : Intrinsic::x86_sse2_psra_w;
3168  else
3169  llvm_unreachable("Unexpected size");
3170  } else if (Name.endswith(".256")) {
3171  if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3172  IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3173  : Intrinsic::x86_avx2_psra_d;
3174  else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3175  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3176  IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
3177  Intrinsic::x86_avx512_psra_q_256;
3178  else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3179  IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3180  : Intrinsic::x86_avx2_psra_w;
3181  else
3182  llvm_unreachable("Unexpected size");
3183  } else {
3184  if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3185  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3186  IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
3187  Intrinsic::x86_avx512_psra_d_512;
3188  else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3189  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3190  IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
3191  Intrinsic::x86_avx512_psra_q_512;
3192  else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3193  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3194  : Intrinsic::x86_avx512_psra_w_512;
3195  else
3196  llvm_unreachable("Unexpected size");
3197  }
3198 
3199  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3200  } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3201  Rep = upgradeMaskedMove(Builder, *CI);
3202  } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3203  Rep = UpgradeMaskToInt(Builder, *CI);
3204  } else if (IsX86 && Name.endswith(".movntdqa")) {
3205  Module *M = F->getParent();
3206  MDNode *Node = MDNode::get(
3208 
3209  Value *Ptr = CI->getArgOperand(0);
3210 
3211  // Convert the type of the pointer to a pointer to the stored type.
3212  Value *BC = Builder.CreateBitCast(
3213  Ptr, PointerType::getUnqual(CI->getType()), "cast");
3214  LoadInst *LI = Builder.CreateAlignedLoad(
3215  CI->getType(), BC,
3217  LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3218  Rep = LI;
3219  } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3220  Name.startswith("fma.vfmsub.") ||
3221  Name.startswith("fma.vfnmadd.") ||
3222  Name.startswith("fma.vfnmsub."))) {
3223  bool NegMul = Name[6] == 'n';
3224  bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3225  bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3226 
3227  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3228  CI->getArgOperand(2) };
3229 
3230  if (IsScalar) {
3231  Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3232  Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3233  Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3234  }
3235 
3236  if (NegMul && !IsScalar)
3237  Ops[0] = Builder.CreateFNeg(Ops[0]);
3238  if (NegMul && IsScalar)
3239  Ops[1] = Builder.CreateFNeg(Ops[1]);
3240  if (NegAcc)
3241  Ops[2] = Builder.CreateFNeg(Ops[2]);
3242 
3243  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3244  Intrinsic::fma,
3245  Ops[0]->getType()),
3246  Ops);
3247 
3248  if (IsScalar)
3249  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3250  (uint64_t)0);
3251  } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3252  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3253  CI->getArgOperand(2) };
3254 
3255  Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3256  Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3257  Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3258 
3259  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3260  Intrinsic::fma,
3261  Ops[0]->getType()),
3262  Ops);
3263 
3264  Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3265  Rep, (uint64_t)0);
3266  } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3267  Name.startswith("avx512.maskz.vfmadd.s") ||
3268  Name.startswith("avx512.mask3.vfmadd.s") ||
3269  Name.startswith("avx512.mask3.vfmsub.s") ||
3270  Name.startswith("avx512.mask3.vfnmsub.s"))) {
3271  bool IsMask3 = Name[11] == '3';
3272  bool IsMaskZ = Name[11] == 'z';
3273  // Drop the "avx512.mask." to make it easier.
3274  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3275  bool NegMul = Name[2] == 'n';
3276  bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3277 
3278  Value *A = CI->getArgOperand(0);
3279  Value *B = CI->getArgOperand(1);
3280  Value *C = CI->getArgOperand(2);
3281 
3282  if (NegMul && (IsMask3 || IsMaskZ))
3283  A = Builder.CreateFNeg(A);
3284  if (NegMul && !(IsMask3 || IsMaskZ))
3285  B = Builder.CreateFNeg(B);
3286  if (NegAcc)
3287  C = Builder.CreateFNeg(C);
3288 
3289  A = Builder.CreateExtractElement(A, (uint64_t)0);
3290  B = Builder.CreateExtractElement(B, (uint64_t)0);
3291  C = Builder.CreateExtractElement(C, (uint64_t)0);
3292 
3293  if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3294  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3295  Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3296 
3297  Intrinsic::ID IID;
3298  if (Name.back() == 'd')
3299  IID = Intrinsic::x86_avx512_vfmadd_f64;
3300  else
3301  IID = Intrinsic::x86_avx512_vfmadd_f32;
3303  Rep = Builder.CreateCall(FMA, Ops);
3304  } else {
3306  Intrinsic::fma,
3307  A->getType());
3308  Rep = Builder.CreateCall(FMA, { A, B, C });
3309  }
3310 
3311  Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3312  IsMask3 ? C : A;
3313 
3314  // For Mask3 with NegAcc, we need to create a new extractelement that
3315  // avoids the negation above.
3316  if (NegAcc && IsMask3)
3317  PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3318  (uint64_t)0);
3319 
3321  Rep, PassThru);
3322  Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3323  Rep, (uint64_t)0);
3324  } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3325  Name.startswith("avx512.mask.vfnmadd.p") ||
3326  Name.startswith("avx512.mask.vfnmsub.p") ||
3327  Name.startswith("avx512.mask3.vfmadd.p") ||
3328  Name.startswith("avx512.mask3.vfmsub.p") ||
3329  Name.startswith("avx512.mask3.vfnmsub.p") ||
3330  Name.startswith("avx512.maskz.vfmadd.p"))) {
3331  bool IsMask3 = Name[11] == '3';
3332  bool IsMaskZ = Name[11] == 'z';
3333  // Drop the "avx512.mask." to make it easier.
3334  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3335  bool NegMul = Name[2] == 'n';
3336  bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3337 
3338  Value *A = CI->getArgOperand(0);
3339  Value *B = CI->getArgOperand(1);
3340  Value *C = CI->getArgOperand(2);
3341 
3342  if (NegMul && (IsMask3 || IsMaskZ))
3343  A = Builder.CreateFNeg(A);
3344  if (NegMul && !(IsMask3 || IsMaskZ))
3345  B = Builder.CreateFNeg(B);
3346  if (NegAcc)
3347  C = Builder.CreateFNeg(C);
3348 
3349  if (CI->getNumArgOperands() == 5 &&
3350  (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3351  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3352  Intrinsic::ID IID;
3353  // Check the character before ".512" in string.
3354  if (Name[Name.size()-5] == 's')
3355  IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3356  else
3357  IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3358 
3359  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3360  { A, B, C, CI->getArgOperand(4) });
3361  } else {
3363  Intrinsic::fma,
3364  A->getType());
3365  Rep = Builder.CreateCall(FMA, { A, B, C });
3366  }
3367 
3368  Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3369  IsMask3 ? CI->getArgOperand(2) :
3370  CI->getArgOperand(0);
3371 
3372  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3373  } else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
3374  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3375  unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3376  Intrinsic::ID IID;
3377  if (VecWidth == 128 && EltWidth == 32)
3378  IID = Intrinsic::x86_fma_vfmaddsub_ps;
3379  else if (VecWidth == 256 && EltWidth == 32)
3380  IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3381  else if (VecWidth == 128 && EltWidth == 64)
3382  IID = Intrinsic::x86_fma_vfmaddsub_pd;
3383  else if (VecWidth == 256 && EltWidth == 64)
3384  IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3385  else
3386  llvm_unreachable("Unexpected intrinsic");
3387 
3388  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3389  CI->getArgOperand(2) };
3390  Ops[2] = Builder.CreateFNeg(Ops[2]);
3391  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3392  Ops);
3393  } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3394  Name.startswith("avx512.mask3.vfmaddsub.p") ||
3395  Name.startswith("avx512.maskz.vfmaddsub.p") ||
3396  Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3397  bool IsMask3 = Name[11] == '3';
3398  bool IsMaskZ = Name[11] == 'z';
3399  // Drop the "avx512.mask." to make it easier.
3400  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3401  bool IsSubAdd = Name[3] == 's';
3402  if (CI->getNumArgOperands() == 5) {
3403  Intrinsic::ID IID;
3404  // Check the character before ".512" in string.
3405  if (Name[Name.size()-5] == 's')
3406  IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3407  else
3408  IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3409 
3410  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3411  CI->getArgOperand(2), CI->getArgOperand(4) };
3412  if (IsSubAdd)
3413  Ops[2] = Builder.CreateFNeg(Ops[2]);
3414 
3415  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3416  Ops);
3417  } else {
3418  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3419 
3420  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3421  CI->getArgOperand(2) };
3422 
3423  Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3424  Ops[0]->getType());
3425  Value *Odd = Builder.CreateCall(FMA, Ops);
3426  Ops[2] = Builder.CreateFNeg(Ops[2]);
3427  Value *Even = Builder.CreateCall(FMA, Ops);
3428 
3429  if (IsSubAdd)
3430  std::swap(Even, Odd);
3431 
3432  SmallVector<int, 32> Idxs(NumElts);
3433  for (int i = 0; i != NumElts; ++i)
3434  Idxs[i] = i + (i % 2) * NumElts;
3435 
3436  Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3437  }
3438 
3439  Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3440  IsMask3 ? CI->getArgOperand(2) :
3441  CI->getArgOperand(0);
3442 
3443  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3444  } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3445  Name.startswith("avx512.maskz.pternlog."))) {
3446  bool ZeroMask = Name[11] == 'z';
3447  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3448  unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3449  Intrinsic::ID IID;
3450  if (VecWidth == 128 && EltWidth == 32)
3451  IID = Intrinsic::x86_avx512_pternlog_d_128;
3452  else if (VecWidth == 256 && EltWidth == 32)
3453  IID = Intrinsic::x86_avx512_pternlog_d_256;
3454  else if (VecWidth == 512 && EltWidth == 32)
3455  IID = Intrinsic::x86_avx512_pternlog_d_512;
3456  else if (VecWidth == 128 && EltWidth == 64)
3457  IID = Intrinsic::x86_avx512_pternlog_q_128;
3458  else if (VecWidth == 256 && EltWidth == 64)
3459  IID = Intrinsic::x86_avx512_pternlog_q_256;
3460  else if (VecWidth == 512 && EltWidth == 64)
3461  IID = Intrinsic::x86_avx512_pternlog_q_512;
3462  else
3463  llvm_unreachable("Unexpected intrinsic");
3464 
3465  Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3466  CI->getArgOperand(2), CI->getArgOperand(3) };
3467  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3468  Args);
3469  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3470  : CI->getArgOperand(0);
3471  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3472  } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3473  Name.startswith("avx512.maskz.vpmadd52"))) {
3474  bool ZeroMask = Name[11] == 'z';
3475  bool High = Name[20] == 'h' || Name[21] == 'h';
3476  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3477  Intrinsic::ID IID;
3478  if (VecWidth == 128 && !High)
3479  IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3480  else if (VecWidth == 256 && !High)
3481  IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3482  else if (VecWidth == 512 && !High)
3483  IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3484  else if (VecWidth == 128 && High)
3485  IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3486  else if (VecWidth == 256 && High)
3487  IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3488  else if (VecWidth == 512 && High)
3489  IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3490  else
3491  llvm_unreachable("Unexpected intrinsic");
3492 
3493  Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3494  CI->getArgOperand(2) };
3495  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3496  Args);
3497  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3498  : CI->getArgOperand(0);
3499  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3500  } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3501  Name.startswith("avx512.mask.vpermt2var.") ||
3502  Name.startswith("avx512.maskz.vpermt2var."))) {
3503  bool ZeroMask = Name[11] == 'z';
3504  bool IndexForm = Name[17] == 'i';
3505  Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3506  } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3507  Name.startswith("avx512.maskz.vpdpbusd.") ||
3508  Name.startswith("avx512.mask.vpdpbusds.") ||
3509  Name.startswith("avx512.maskz.vpdpbusds."))) {
3510  bool ZeroMask = Name[11] == 'z';
3511  bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3512  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3513  Intrinsic::ID IID;
3514  if (VecWidth == 128 && !IsSaturating)
3515  IID = Intrinsic::x86_avx512_vpdpbusd_128;
3516  else if (VecWidth == 256 && !IsSaturating)
3517  IID = Intrinsic::x86_avx512_vpdpbusd_256;
3518  else if (VecWidth == 512 && !IsSaturating)
3519  IID = Intrinsic::x86_avx512_vpdpbusd_512;
3520  else if (VecWidth == 128 && IsSaturating)
3521  IID = Intrinsic::x86_avx512_vpdpbusds_128;
3522  else if (VecWidth == 256 && IsSaturating)
3523  IID = Intrinsic::x86_avx512_vpdpbusds_256;
3524  else if (VecWidth == 512 && IsSaturating)
3525  IID = Intrinsic::x86_avx512_vpdpbusds_512;
3526  else
3527  llvm_unreachable("Unexpected intrinsic");
3528 
3529  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3530  CI->getArgOperand(2) };
3531  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3532  Args);
3533  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3534  : CI->getArgOperand(0);
3535  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3536  } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3537  Name.startswith("avx512.maskz.vpdpwssd.") ||
3538  Name.startswith("avx512.mask.vpdpwssds.") ||
3539  Name.startswith("avx512.maskz.vpdpwssds."))) {
3540  bool ZeroMask = Name[11] == 'z';
3541  bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3542  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3543  Intrinsic::ID IID;
3544  if (VecWidth == 128 && !IsSaturating)
3545  IID = Intrinsic::x86_avx512_vpdpwssd_128;
3546  else if (VecWidth == 256 && !IsSaturating)
3547  IID = Intrinsic::x86_avx512_vpdpwssd_256;
3548  else if (VecWidth == 512 && !IsSaturating)
3549  IID = Intrinsic::x86_avx512_vpdpwssd_512;
3550  else if (VecWidth == 128 && IsSaturating)
3551  IID = Intrinsic::x86_avx512_vpdpwssds_128;
3552  else if (VecWidth == 256 && IsSaturating)
3553  IID = Intrinsic::x86_avx512_vpdpwssds_256;
3554  else if (VecWidth == 512 && IsSaturating)
3555  IID = Intrinsic::x86_avx512_vpdpwssds_512;
3556  else
3557  llvm_unreachable("Unexpected intrinsic");
3558 
3559  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3560  CI->getArgOperand(2) };
3561  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3562  Args);
3563  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3564  : CI->getArgOperand(0);
3565  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3566  } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3567  Name == "addcarry.u32" || Name == "addcarry.u64" ||
3568  Name == "subborrow.u32" || Name == "subborrow.u64")) {
3569  Intrinsic::ID IID;
3570  if (Name[0] == 'a' && Name.back() == '2')
3571  IID = Intrinsic::x86_addcarry_32;
3572  else if (Name[0] == 'a' && Name.back() == '4')
3573  IID = Intrinsic::x86_addcarry_64;
3574  else if (Name[0] == 's' && Name.back() == '2')
3575  IID = Intrinsic::x86_subborrow_32;
3576  else if (Name[0] == 's' && Name.back() == '4')
3577  IID = Intrinsic::x86_subborrow_64;
3578  else
3579  llvm_unreachable("Unexpected intrinsic");
3580 
3581  // Make a call with 3 operands.
3582  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3583  CI->getArgOperand(2)};
3584  Value *NewCall = Builder.CreateCall(
3586  Args);
3587 
3588  // Extract the second result and store it.
3589  Value *Data = Builder.CreateExtractValue(NewCall, 1);
3590  // Cast the pointer to the right type.
3591  Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3592  llvm::PointerType::getUnqual(Data->getType()));
3593  Builder.CreateAlignedStore(Data, Ptr, Align(1));
3594  // Replace the original call result with the first result of the new call.
3595  Value *CF = Builder.CreateExtractValue(NewCall, 0);
3596 
3597  CI->replaceAllUsesWith(CF);
3598  Rep = nullptr;
3599  } else if (IsX86 && Name.startswith("avx512.mask.") &&
3600  upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3601  // Rep will be updated by the call in the condition.
3602  } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3603  Value *Arg = CI->getArgOperand(0);
3604  Value *Neg = Builder.CreateNeg(Arg, "neg");
3605  Value *Cmp = Builder.CreateICmpSGE(
3606  Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3607  Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3608  } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3609  Name.startswith("atomic.load.add.f64.p"))) {
3610  Value *Ptr = CI->getArgOperand(0);
3611  Value *Val = CI->getArgOperand(1);
3612  Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
3614  } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3615  Name == "max.ui" || Name == "max.ull")) {
3616  Value *Arg0 = CI->getArgOperand(0);
3617  Value *Arg1 = CI->getArgOperand(1);
3618  Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3619  ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3620  : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3621  Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3622  } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3623  Name == "min.ui" || Name == "min.ull")) {
3624  Value *Arg0 = CI->getArgOperand(0);
3625  Value *Arg1 = CI->getArgOperand(1);
3626  Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3627  ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3628  : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3629  Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3630  } else if (IsNVVM && Name == "clz.ll") {
3631  // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3632  Value *Arg = CI->getArgOperand(0);
3633  Value *Ctlz = Builder.CreateCall(
3634  Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3635  {Arg->getType()}),
3636  {Arg, Builder.getFalse()}, "ctlz");
3637  Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3638  } else if (IsNVVM && Name == "popc.ll") {
3639  // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3640  // i64.
3641  Value *Arg = CI->getArgOperand(0);
3642  Value *Popc = Builder.CreateCall(
3643  Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3644  {Arg->getType()}),
3645  Arg, "ctpop");
3646  Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3647  } else if (IsNVVM && Name == "h2f") {
3648  Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3649  F->getParent(), Intrinsic::convert_from_fp16,
3650  {Builder.getFloatTy()}),
3651  CI->getArgOperand(0), "h2f");
3652  } else {
3653  llvm_unreachable("Unknown function for CallInst upgrade.");
3654  }
3655 
3656  if (Rep)
3657  CI->replaceAllUsesWith(Rep);
3658  CI->eraseFromParent();
3659  return;
3660  }
3661 
3662  const auto &DefaultCase = [&NewFn, &CI]() -> void {
3663  // Handle generic mangling change, but nothing else
3664  assert(
3665  (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3666  "Unknown function for CallInst upgrade and isn't just a name change");
3667  CI->setCalledFunction(NewFn);
3668  };
3669  CallInst *NewCall = nullptr;
3670  switch (NewFn->getIntrinsicID()) {
3671  default: {
3672  DefaultCase();
3673  return;
3674  }
3675  case Intrinsic::arm_neon_vld1:
3676  case Intrinsic::arm_neon_vld2:
3677  case Intrinsic::arm_neon_vld3:
3678  case Intrinsic::arm_neon_vld4:
3679  case Intrinsic::arm_neon_vld2lane:
3680  case Intrinsic::arm_neon_vld3lane:
3681  case Intrinsic::arm_neon_vld4lane:
3682  case Intrinsic::arm_neon_vst1:
3683  case Intrinsic::arm_neon_vst2:
3684  case Intrinsic::arm_neon_vst3:
3685  case Intrinsic::arm_neon_vst4:
3686  case Intrinsic::arm_neon_vst2lane:
3687  case Intrinsic::arm_neon_vst3lane:
3688  case Intrinsic::arm_neon_vst4lane: {
3690  NewCall = Builder.CreateCall(NewFn, Args);
3691  break;
3692  }
3693 
3694  case Intrinsic::arm_neon_bfdot:
3695  case Intrinsic::arm_neon_bfmmla:
3696  case Intrinsic::arm_neon_bfmlalb:
3697  case Intrinsic::arm_neon_bfmlalt:
3698  case Intrinsic::aarch64_neon_bfdot:
3699  case Intrinsic::aarch64_neon_bfmmla:
3700  case Intrinsic::aarch64_neon_bfmlalb:
3701  case Intrinsic::aarch64_neon_bfmlalt: {
3703  assert(CI->getNumArgOperands() == 3 &&
3704  "Mismatch between function args and call args");
3705  size_t OperandWidth =
3707  assert((OperandWidth == 64 || OperandWidth == 128) &&
3708  "Unexpected operand width");
3709  Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
3710  auto Iter = CI->arg_operands().begin();
3711  Args.push_back(*Iter++);
3712  Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3713  Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3714  NewCall = Builder.CreateCall(NewFn, Args);
3715  break;
3716  }
3717 
3718  case Intrinsic::bitreverse:
3719  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3720  break;
3721 
3722  case Intrinsic::ctlz:
3723  case Intrinsic::cttz:
3724  assert(CI->getNumArgOperands() == 1 &&
3725  "Mismatch between function args and call args");
3726  NewCall =
3727  Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3728  break;
3729 
3730  case Intrinsic::objectsize: {
3731  Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3732  ? Builder.getFalse()
3733  : CI->getArgOperand(2);
3734  Value *Dynamic =
3735  CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3736  NewCall = Builder.CreateCall(
3737  NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3738  break;
3739  }
3740 
3741  case Intrinsic::ctpop:
3742  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3743  break;
3744 
3745  case Intrinsic::convert_from_fp16:
3746  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3747  break;
3748 
3749  case Intrinsic::dbg_value:
3750  // Upgrade from the old version that had an extra offset argument.
3751  assert(CI->getNumArgOperands() == 4);
3752  // Drop nonzero offsets instead of attempting to upgrade them.
3753  if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3754  if (Offset->isZeroValue()) {
3755  NewCall = Builder.CreateCall(
3756  NewFn,
3757  {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3758  break;
3759  }
3760  CI->eraseFromParent();
3761  return;
3762 
3763  case Intrinsic::ptr_annotation:
3764  // Upgrade from versions that lacked the annotation attribute argument.
3765  assert(CI->getNumArgOperands() == 4 &&
3766  "Before LLVM 12.0 this intrinsic took four arguments");
3767  // Create a new call with an added null annotation attribute argument.
3768  NewCall = Builder.CreateCall(
3769  NewFn,
3770  {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3771  CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3772  NewCall->takeName(CI);
3773  CI->replaceAllUsesWith(NewCall);
3774  CI->eraseFromParent();
3775  return;
3776 
3777  case Intrinsic::var_annotation:
3778  // Upgrade from versions that lacked the annotation attribute argument.
3779  assert(CI->getNumArgOperands() == 4 &&
3780  "Before LLVM 12.0 this intrinsic took four arguments");
3781  // Create a new call with an added null annotation attribute argument.
3782  NewCall = Builder.CreateCall(
3783  NewFn,
3784  {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3785  CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3786  CI->eraseFromParent();
3787  return;
3788 
3789  case Intrinsic::x86_xop_vfrcz_ss:
3790  case Intrinsic::x86_xop_vfrcz_sd:
3791  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3792  break;
3793 
3794  case Intrinsic::x86_xop_vpermil2pd:
3795  case Intrinsic::x86_xop_vpermil2ps:
3796  case Intrinsic::x86_xop_vpermil2pd_256:
3797  case Intrinsic::x86_xop_vpermil2ps_256: {
3799  VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3800  VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3801  Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3802  NewCall = Builder.CreateCall(NewFn, Args);
3803  break;
3804  }
3805 
3806  case Intrinsic::x86_sse41_ptestc:
3807  case Intrinsic::x86_sse41_ptestz:
3808  case Intrinsic::x86_sse41_ptestnzc: {
3809  // The arguments for these intrinsics used to be v4f32, and changed
3810  // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3811  // So, the only thing required is a bitcast for both arguments.
3812  // First, check the arguments have the old type.
3813  Value *Arg0 = CI->getArgOperand(0);
3814  if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
3815  return;
3816 
3817  // Old intrinsic, add bitcasts
3818  Value *Arg1 = CI->getArgOperand(1);
3819 
3820  auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
3821 
3822  Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3823  Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3824 
3825  NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3826  break;
3827  }
3828 
3829  case Intrinsic::x86_rdtscp: {
3830  // This used to take 1 arguments. If we have no arguments, it is already
3831  // upgraded.
3832  if (CI->getNumOperands() == 0)
3833  return;
3834 
3835  NewCall = Builder.CreateCall(NewFn);
3836  // Extract the second result and store it.
3837  Value *Data = Builder.CreateExtractValue(NewCall, 1);
3838  // Cast the pointer to the right type.
3839  Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3840  llvm::PointerType::getUnqual(Data->getType()));
3841  Builder.CreateAlignedStore(Data, Ptr, Align(1));
3842  // Replace the original call result with the first result of the new call.
3843  Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3844 
3845  NewCall->takeName(CI);
3846  CI->replaceAllUsesWith(TSC);
3847  CI->eraseFromParent();
3848  return;
3849  }
3850 
3851  case Intrinsic::x86_sse41_insertps:
3852  case Intrinsic::x86_sse41_dppd:
3853  case Intrinsic::x86_sse41_dpps:
3854  case Intrinsic::x86_sse41_mpsadbw:
3855  case Intrinsic::x86_avx_dp_ps_256:
3856  case Intrinsic::x86_avx2_mpsadbw: {
3857  // Need to truncate the last argument from i32 to i8 -- this argument models
3858  // an inherently 8-bit immediate operand to these x86 instructions.
3860 
3861  // Replace the last argument with a trunc.
3862  Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3863  NewCall = Builder.CreateCall(NewFn, Args);
3864  break;
3865  }
3866 
3867  case Intrinsic::x86_avx512_mask_cmp_pd_128:
3868  case Intrinsic::x86_avx512_mask_cmp_pd_256:
3869  case Intrinsic::x86_avx512_mask_cmp_pd_512:
3870  case Intrinsic::x86_avx512_mask_cmp_ps_128:
3871  case Intrinsic::x86_avx512_mask_cmp_ps_256:
3872  case Intrinsic::x86_avx512_mask_cmp_ps_512: {
3874  unsigned NumElts =
3875  cast<FixedVectorType>(Args[0]->getType())->getNumElements();
3876  Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
3877 
3878  NewCall = Builder.CreateCall(NewFn, Args);
3879  Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
3880 
3881  NewCall->takeName(CI);
3882  CI->replaceAllUsesWith(Res);
3883  CI->eraseFromParent();
3884  return;
3885  }
3886 
3887  case Intrinsic::thread_pointer: {
3888  NewCall = Builder.CreateCall(NewFn, {});
3889  break;
3890  }
3891 
3892  case Intrinsic::invariant_start:
3893  case Intrinsic::invariant_end: {
3895  NewCall = Builder.CreateCall(NewFn, Args);
3896  break;
3897  }
3898  case Intrinsic::masked_load:
3899  case Intrinsic::masked_store:
3900  case Intrinsic::masked_gather:
3901  case Intrinsic::masked_scatter: {
3903  NewCall = Builder.CreateCall(NewFn, Args);
3904  NewCall->copyMetadata(*CI);
3905  break;
3906  }
3907 
3908  case Intrinsic::memcpy:
3909  case Intrinsic::memmove:
3910  case Intrinsic::memset: {
3911  // We have to make sure that the call signature is what we're expecting.
3912  // We only want to change the old signatures by removing the alignment arg:
3913  // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3914  // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3915  // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3916  // -> @llvm.memset...(i8*, i8, i[32|64], i1)
3917  // Note: i8*'s in the above can be any pointer type
3918  if (CI->getNumArgOperands() != 5) {
3919  DefaultCase();
3920  return;
3921  }
3922  // Remove alignment argument (3), and add alignment attributes to the
3923  // dest/src pointers.
3924  Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3925  CI->getArgOperand(2), CI->getArgOperand(4)};
3926  NewCall = Builder.CreateCall(NewFn, Args);
3927  auto *MemCI = cast<MemIntrinsic>(NewCall);
3928  // All mem intrinsics support dest alignment.
3929  const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3930  MemCI->setDestAlignment(Align->getMaybeAlignValue());
3931  // Memcpy/Memmove also support source alignment.
3932  if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3933  MTI->setSourceAlignment(Align->getMaybeAlignValue());
3934  break;
3935  }
3936  }
3937  assert(NewCall && "Should have either set this variable or returned through "
3938  "the default case");
3939  NewCall->takeName(CI);
3940  CI->replaceAllUsesWith(NewCall);
3941  CI->eraseFromParent();
3942 }
3943 
3945  assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3946 
3947  // Check if this function should be upgraded and get the replacement function
3948  // if there is one.
3949  Function *NewFn;
3950  if (UpgradeIntrinsicFunction(F, NewFn)) {
3951  // Replace all users of the old function with the new function or new
3952  // instructions. This is not a range loop because the call is deleted.
3953  for (User *U : make_early_inc_range(F->users()))
3954  if (CallInst *CI = dyn_cast<CallInst>(U))
3955  UpgradeIntrinsicCall(CI, NewFn);
3956 
3957  // Remove old function, no longer used, from the module.
3958  F->eraseFromParent();
3959  }
3960 }
3961 
3963  // Check if the tag uses struct-path aware TBAA format.
3964  if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3965  return &MD;
3966 
3967  auto &Context = MD.getContext();
3968  if (MD.getNumOperands() == 3) {
3969  Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3970  MDNode *ScalarType = MDNode::get(Context, Elts);
3971  // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3972  Metadata *Elts2[] = {ScalarType, ScalarType,
3975  MD.getOperand(2)};
3976  return MDNode::get(Context, Elts2);
3977  }
3978  // Create a MDNode <MD, MD, offset 0>
3981  return MDNode::get(Context, Elts);
3982 }
3983 
3984 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3985  Instruction *&Temp) {
3986  if (Opc != Instruction::BitCast)
3987  return nullptr;
3988 
3989  Temp = nullptr;
3990  Type *SrcTy = V->getType();
3991  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3992  SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3993  LLVMContext &Context = V->getContext();
3994 
3995  // We have no information about target data layout, so we assume that
3996  // the maximum pointer size is 64bit.
3997  Type *MidTy = Type::getInt64Ty(Context);
3998  Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3999 
4000  return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4001  }
4002 
4003  return nullptr;
4004 }
4005 
4006 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4007  if (Opc != Instruction::BitCast)
4008  return nullptr;
4009 
4010  Type *SrcTy = C->getType();
4011  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4012  SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4013  LLVMContext &Context = C->getContext();
4014 
4015  // We have no information about target data layout, so we assume that
4016  // the maximum pointer size is 64bit.
4017  Type *MidTy = Type::getInt64Ty(Context);
4018 
4020  DestTy);
4021  }
4022 
4023  return nullptr;
4024 }
4025 
4026 /// Check the debug info version number, if it is out-dated, drop the debug
4027 /// info. Return true if module is modified.
4031  bool BrokenDebugInfo = false;
4032  if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4033  report_fatal_error("Broken module found, compilation aborted!");
4034  if (!BrokenDebugInfo)
4035  // Everything is ok.
4036  return false;
4037  else {
4038  // Diagnose malformed debug info.
4040  M.getContext().diagnose(Diag);
4041  }
4042  }
4043  bool Modified = StripDebugInfo(M);
4045  // Diagnose a version mismatch.
4047  M.getContext().diagnose(DiagVersion);
4048  }
4049  return Modified;
4050 }
4051 
4052 /// This checks for objc retain release marker which should be upgraded. It
4053 /// returns true if module is modified.
4055  bool Changed = false;
4056  const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4057  NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4058  if (ModRetainReleaseMarker) {
4059  MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4060  if (Op) {
4061  MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4062  if (ID) {
4063  SmallVector<StringRef, 4> ValueComp;
4064  ID->getString().split(ValueComp, "#");
4065  if (ValueComp.size() == 2) {
4066  std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4067  ID = MDString::get(M.getContext(), NewValue);
4068  }
4069  M.addModuleFlag(Module::Error, MarkerKey, ID);
4070  M.eraseNamedMetadata(ModRetainReleaseMarker);
4071  Changed = true;
4072  }
4073  }
4074  }
4075  return Changed;
4076 }
4077 
4079  // This lambda converts normal function calls to ARC runtime functions to
4080  // intrinsic calls.
4081  auto UpgradeToIntrinsic = [&](const char *OldFunc,
4082  llvm::Intrinsic::ID IntrinsicFunc) {
4083  Function *Fn = M.getFunction(OldFunc);
4084 
4085  if (!Fn)
4086  return;
4087 
4088  Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4089 
4090  for (User *U : make_early_inc_range(Fn->users())) {
4091  CallInst *CI = dyn_cast<CallInst>(U);
4092  if (!CI || CI->getCalledFunction() != Fn)
4093  continue;
4094 
4095  IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4096  FunctionType *NewFuncTy = NewFn->getFunctionType();
4098 
4099  // Don't upgrade the intrinsic if it's not valid to bitcast the return
4100  // value to the return type of the old function.
4101  if (NewFuncTy->getReturnType() != CI->getType() &&
4102  !CastInst::castIsValid(Instruction::BitCast, CI,
4103  NewFuncTy->getReturnType()))
4104  continue;
4105 
4106  bool InvalidCast = false;
4107 
4108  for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
4109  Value *Arg = CI->getArgOperand(I);
4110 
4111  // Bitcast argument to the parameter type of the new function if it's
4112  // not a variadic argument.
4113  if (I < NewFuncTy->getNumParams()) {
4114  // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4115  // to the parameter type of the new function.
4116  if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4117  NewFuncTy->getParamType(I))) {
4118  InvalidCast = true;
4119  break;
4120  }
4121  Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4122  }
4123  Args.push_back(Arg);
4124  }
4125 
4126  if (InvalidCast)
4127  continue;
4128 
4129  // Create a call instruction that calls the new function.
4130  CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4131  NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4132  NewCall->takeName(CI);
4133 
4134  // Bitcast the return value back to the type of the old call.
4135  Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4136 
4137  if (!CI->use_empty())
4138  CI->replaceAllUsesWith(NewRetVal);
4139  CI->eraseFromParent();
4140  }
4141 
4142  if (Fn->use_empty())
4143  Fn->eraseFromParent();
4144  };
4145 
4146  // Unconditionally convert a call to "clang.arc.use" to a call to
4147  // "llvm.objc.clang.arc.use".
4148  UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
4149 
4150  // Upgrade the retain release marker. If there is no need to upgrade
4151  // the marker, that means either the module is already new enough to contain
4152  // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
4154  return;
4155 
4156  std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
4157  {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
4158  {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
4159  {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
4160  {"objc_autoreleaseReturnValue",
4161  llvm::Intrinsic::objc_autoreleaseReturnValue},
4162  {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
4163  {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
4164  {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
4165  {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
4166  {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
4167  {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
4168  {"objc_release", llvm::Intrinsic::objc_release},
4169  {"objc_retain", llvm::Intrinsic::objc_retain},
4170  {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
4171  {"objc_retainAutoreleaseReturnValue",
4172  llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
4173  {"objc_retainAutoreleasedReturnValue",
4174  llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
4175  {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
4176  {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
4177  {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
4178  {"objc_unsafeClaimAutoreleasedReturnValue",
4179  llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
4180  {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
4181  {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
4182  {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
4183  {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
4184  {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
4185  {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
4186  {"objc_arc_annotation_topdown_bbstart",
4187  llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
4188  {"objc_arc_annotation_topdown_bbend",
4189  llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
4190  {"objc_arc_annotation_bottomup_bbstart",
4191  llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
4192  {"objc_arc_annotation_bottomup_bbend",
4193  llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
4194 
4195  for (auto &I : RuntimeFuncs)
4196  UpgradeToIntrinsic(I.first, I.second);
4197 }
4198 
4200  NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4201  if (!ModFlags)
4202  return false;
4203 
4204  bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4205  bool HasSwiftVersionFlag = false;
4206  uint8_t SwiftMajorVersion, SwiftMinorVersion;
4207  uint32_t SwiftABIVersion;
4208  auto Int8Ty = Type::getInt8Ty(M.getContext());
4209  auto Int32Ty = Type::getInt32Ty(M.getContext());
4210 
4211  for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4212  MDNode *Op = ModFlags->getOperand(I);
4213  if (Op->getNumOperands() != 3)
4214  continue;
4215  MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4216  if (!ID)
4217  continue;
4218  if (ID->getString() == "Objective-C Image Info Version")
4219  HasObjCFlag = true;
4220  if (ID->getString() == "Objective-C Class Properties")
4221  HasClassProperties = true;
4222  // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
4223  // field was Error and now they are Max.
4224  if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
4225  if (auto *Behavior =
4226  mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4227  if (Behavior->getLimitedValue() == Module::Error) {
4228  Type *Int32Ty = Type::getInt32Ty(M.getContext());
4229  Metadata *Ops[3] = {
4231  MDString::get(M.getContext(), ID->getString()),
4232  Op->getOperand(2)};
4233  ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4234  Changed = true;
4235  }
4236  }
4237  }
4238  // Upgrade Objective-C Image Info Section. Removed the whitespce in the
4239  // section name so that llvm-lto will not complain about mismatching
4240  // module flags that is functionally the same.
4241  if (ID->getString() == "Objective-C Image Info Section") {
4242  if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
4243  SmallVector<StringRef, 4> ValueComp;
4244  Value->getString().split(ValueComp, " ");
4245  if (ValueComp.size() != 1) {
4246  std::string NewValue;
4247  for (auto &