LLVM  13.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/InstVisitor.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/Intrinsics.h"
27 #include "llvm/IR/IntrinsicsAArch64.h"
28 #include "llvm/IR/IntrinsicsARM.h"
29 #include "llvm/IR/IntrinsicsX86.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Verifier.h"
34 #include "llvm/Support/Regex.h"
35 #include <cstring>
36 using namespace llvm;
37 
38 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
39 
40 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
41 // changed their type from v4f32 to v2i64.
43  Function *&NewFn) {
44  // Check whether this is an old version of the function, which received
45  // v4f32 arguments.
46  Type *Arg0Type = F->getFunctionType()->getParamType(0);
47  if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
48  return false;
49 
50  // Yes, it's old, replace it with new version.
51  rename(F);
52  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
53  return true;
54 }
55 
56 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
57 // arguments have changed their type from i32 to i8.
59  Function *&NewFn) {
60  // Check that the last argument is an i32.
61  Type *LastArgType = F->getFunctionType()->getParamType(
62  F->getFunctionType()->getNumParams() - 1);
63  if (!LastArgType->isIntegerTy(32))
64  return false;
65 
66  // Move this function aside and map down.
67  rename(F);
68  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
69  return true;
70 }
71 
72 // Upgrade the declaration of fp compare intrinsics that change return type
73 // from scalar to vXi1 mask.
75  Function *&NewFn) {
76  // Check if the return type is a vector.
77  if (F->getReturnType()->isVectorTy())
78  return false;
79 
80  rename(F);
81  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
82  return true;
83 }
84 
86  // All of the intrinsics matches below should be marked with which llvm
87  // version started autoupgrading them. At some point in the future we would
88  // like to use this information to remove upgrade code for some older
89  // intrinsics. It is currently undecided how we will determine that future
90  // point.
91  if (Name == "addcarryx.u32" || // Added in 8.0
92  Name == "addcarryx.u64" || // Added in 8.0
93  Name == "addcarry.u32" || // Added in 8.0
94  Name == "addcarry.u64" || // Added in 8.0
95  Name == "subborrow.u32" || // Added in 8.0
96  Name == "subborrow.u64" || // Added in 8.0
97  Name.startswith("sse2.padds.") || // Added in 8.0
98  Name.startswith("sse2.psubs.") || // Added in 8.0
99  Name.startswith("sse2.paddus.") || // Added in 8.0
100  Name.startswith("sse2.psubus.") || // Added in 8.0
101  Name.startswith("avx2.padds.") || // Added in 8.0
102  Name.startswith("avx2.psubs.") || // Added in 8.0
103  Name.startswith("avx2.paddus.") || // Added in 8.0
104  Name.startswith("avx2.psubus.") || // Added in 8.0
105  Name.startswith("avx512.padds.") || // Added in 8.0
106  Name.startswith("avx512.psubs.") || // Added in 8.0
107  Name.startswith("avx512.mask.padds.") || // Added in 8.0
108  Name.startswith("avx512.mask.psubs.") || // Added in 8.0
109  Name.startswith("avx512.mask.paddus.") || // Added in 8.0
110  Name.startswith("avx512.mask.psubus.") || // Added in 8.0
111  Name=="ssse3.pabs.b.128" || // Added in 6.0
112  Name=="ssse3.pabs.w.128" || // Added in 6.0
113  Name=="ssse3.pabs.d.128" || // Added in 6.0
114  Name.startswith("fma4.vfmadd.s") || // Added in 7.0
115  Name.startswith("fma.vfmadd.") || // Added in 7.0
116  Name.startswith("fma.vfmsub.") || // Added in 7.0
117  Name.startswith("fma.vfmsubadd.") || // Added in 7.0
118  Name.startswith("fma.vfnmadd.") || // Added in 7.0
119  Name.startswith("fma.vfnmsub.") || // Added in 7.0
120  Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
121  Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
122  Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
123  Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
124  Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
125  Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
126  Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
127  Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
128  Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
129  Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
130  Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
131  Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
132  Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
133  Name.startswith("avx512.kunpck") || //added in 6.0
134  Name.startswith("avx2.pabs.") || // Added in 6.0
135  Name.startswith("avx512.mask.pabs.") || // Added in 6.0
136  Name.startswith("avx512.broadcastm") || // Added in 6.0
137  Name == "sse.sqrt.ss" || // Added in 7.0
138  Name == "sse2.sqrt.sd" || // Added in 7.0
139  Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
140  Name.startswith("avx.sqrt.p") || // Added in 7.0
141  Name.startswith("sse2.sqrt.p") || // Added in 7.0
142  Name.startswith("sse.sqrt.p") || // Added in 7.0
143  Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
144  Name.startswith("sse2.pcmpeq.") || // Added in 3.1
145  Name.startswith("sse2.pcmpgt.") || // Added in 3.1
146  Name.startswith("avx2.pcmpeq.") || // Added in 3.1
147  Name.startswith("avx2.pcmpgt.") || // Added in 3.1
148  Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
149  Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
150  Name.startswith("avx.vperm2f128.") || // Added in 6.0
151  Name == "avx2.vperm2i128" || // Added in 6.0
152  Name == "sse.add.ss" || // Added in 4.0
153  Name == "sse2.add.sd" || // Added in 4.0
154  Name == "sse.sub.ss" || // Added in 4.0
155  Name == "sse2.sub.sd" || // Added in 4.0
156  Name == "sse.mul.ss" || // Added in 4.0
157  Name == "sse2.mul.sd" || // Added in 4.0
158  Name == "sse.div.ss" || // Added in 4.0
159  Name == "sse2.div.sd" || // Added in 4.0
160  Name == "sse41.pmaxsb" || // Added in 3.9
161  Name == "sse2.pmaxs.w" || // Added in 3.9
162  Name == "sse41.pmaxsd" || // Added in 3.9
163  Name == "sse2.pmaxu.b" || // Added in 3.9
164  Name == "sse41.pmaxuw" || // Added in 3.9
165  Name == "sse41.pmaxud" || // Added in 3.9
166  Name == "sse41.pminsb" || // Added in 3.9
167  Name == "sse2.pmins.w" || // Added in 3.9
168  Name == "sse41.pminsd" || // Added in 3.9
169  Name == "sse2.pminu.b" || // Added in 3.9
170  Name == "sse41.pminuw" || // Added in 3.9
171  Name == "sse41.pminud" || // Added in 3.9
172  Name == "avx512.kand.w" || // Added in 7.0
173  Name == "avx512.kandn.w" || // Added in 7.0
174  Name == "avx512.knot.w" || // Added in 7.0
175  Name == "avx512.kor.w" || // Added in 7.0
176  Name == "avx512.kxor.w" || // Added in 7.0
177  Name == "avx512.kxnor.w" || // Added in 7.0
178  Name == "avx512.kortestc.w" || // Added in 7.0
179  Name == "avx512.kortestz.w" || // Added in 7.0
180  Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
181  Name.startswith("avx2.pmax") || // Added in 3.9
182  Name.startswith("avx2.pmin") || // Added in 3.9
183  Name.startswith("avx512.mask.pmax") || // Added in 4.0
184  Name.startswith("avx512.mask.pmin") || // Added in 4.0
185  Name.startswith("avx2.vbroadcast") || // Added in 3.8
186  Name.startswith("avx2.pbroadcast") || // Added in 3.8
187  Name.startswith("avx.vpermil.") || // Added in 3.1
188  Name.startswith("sse2.pshuf") || // Added in 3.9
189  Name.startswith("avx512.pbroadcast") || // Added in 3.9
190  Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
191  Name.startswith("avx512.mask.movddup") || // Added in 3.9
192  Name.startswith("avx512.mask.movshdup") || // Added in 3.9
193  Name.startswith("avx512.mask.movsldup") || // Added in 3.9
194  Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
195  Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
196  Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
197  Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
198  Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
199  Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
200  Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
201  Name.startswith("avx512.mask.punpckl") || // Added in 3.9
202  Name.startswith("avx512.mask.punpckh") || // Added in 3.9
203  Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
204  Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
205  Name.startswith("avx512.mask.pand.") || // Added in 3.9
206  Name.startswith("avx512.mask.pandn.") || // Added in 3.9
207  Name.startswith("avx512.mask.por.") || // Added in 3.9
208  Name.startswith("avx512.mask.pxor.") || // Added in 3.9
209  Name.startswith("avx512.mask.and.") || // Added in 3.9
210  Name.startswith("avx512.mask.andn.") || // Added in 3.9
211  Name.startswith("avx512.mask.or.") || // Added in 3.9
212  Name.startswith("avx512.mask.xor.") || // Added in 3.9
213  Name.startswith("avx512.mask.padd.") || // Added in 4.0
214  Name.startswith("avx512.mask.psub.") || // Added in 4.0
215  Name.startswith("avx512.mask.pmull.") || // Added in 4.0
216  Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
217  Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
218  Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
219  Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
220  Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
221  Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
222  Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
223  Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
224  Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
225  Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
226  Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
227  Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
228  Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
229  Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
230  Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
231  Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
232  Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
233  Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
234  Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
235  Name == "avx512.cvtusi2sd" || // Added in 7.0
236  Name.startswith("avx512.mask.permvar.") || // Added in 7.0
237  Name == "sse2.pmulu.dq" || // Added in 7.0
238  Name == "sse41.pmuldq" || // Added in 7.0
239  Name == "avx2.pmulu.dq" || // Added in 7.0
240  Name == "avx2.pmul.dq" || // Added in 7.0
241  Name == "avx512.pmulu.dq.512" || // Added in 7.0
242  Name == "avx512.pmul.dq.512" || // Added in 7.0
243  Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
244  Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
245  Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
246  Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
247  Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
248  Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
249  Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
250  Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
251  Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
252  Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
253  Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
254  Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
255  Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
256  Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
257  Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
258  Name.startswith("avx512.cmp.p") || // Added in 12.0
259  Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
260  Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
261  Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
262  Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
263  Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
264  Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
265  Name.startswith("avx512.mask.psll.d") || // Added in 4.0
266  Name.startswith("avx512.mask.psll.q") || // Added in 4.0
267  Name.startswith("avx512.mask.psll.w") || // Added in 4.0
268  Name.startswith("avx512.mask.psra.d") || // Added in 4.0
269  Name.startswith("avx512.mask.psra.q") || // Added in 4.0
270  Name.startswith("avx512.mask.psra.w") || // Added in 4.0
271  Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
272  Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
273  Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
274  Name.startswith("avx512.mask.pslli") || // Added in 4.0
275  Name.startswith("avx512.mask.psrai") || // Added in 4.0
276  Name.startswith("avx512.mask.psrli") || // Added in 4.0
277  Name.startswith("avx512.mask.psllv") || // Added in 4.0
278  Name.startswith("avx512.mask.psrav") || // Added in 4.0
279  Name.startswith("avx512.mask.psrlv") || // Added in 4.0
280  Name.startswith("sse41.pmovsx") || // Added in 3.8
281  Name.startswith("sse41.pmovzx") || // Added in 3.9
282  Name.startswith("avx2.pmovsx") || // Added in 3.9
283  Name.startswith("avx2.pmovzx") || // Added in 3.9
284  Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
285  Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
286  Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
287  Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
288  Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
289  Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
290  Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
291  Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
292  Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
293  Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
294  Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
295  Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
296  Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
297  Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
298  Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
299  Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
300  Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
301  Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
302  Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
303  Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
304  Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
305  Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
306  Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
307  Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
308  Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
309  Name.startswith("avx512.vpshld.") || // Added in 8.0
310  Name.startswith("avx512.vpshrd.") || // Added in 8.0
311  Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
312  Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
313  Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
314  Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
315  Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
316  Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
317  Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
318  Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
319  Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
320  Name.startswith("avx512.mask.conflict.") || // Added in 9.0
321  Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
322  Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
323  Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
324  Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
325  Name == "sse.cvtsi2ss" || // Added in 7.0
326  Name == "sse.cvtsi642ss" || // Added in 7.0
327  Name == "sse2.cvtsi2sd" || // Added in 7.0
328  Name == "sse2.cvtsi642sd" || // Added in 7.0
329  Name == "sse2.cvtss2sd" || // Added in 7.0
330  Name == "sse2.cvtdq2pd" || // Added in 3.9
331  Name == "sse2.cvtdq2ps" || // Added in 7.0
332  Name == "sse2.cvtps2pd" || // Added in 3.9
333  Name == "avx.cvtdq2.pd.256" || // Added in 3.9
334  Name == "avx.cvtdq2.ps.256" || // Added in 7.0
335  Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
336  Name.startswith("vcvtph2ps.") || // Added in 11.0
337  Name.startswith("avx.vinsertf128.") || // Added in 3.7
338  Name == "avx2.vinserti128" || // Added in 3.7
339  Name.startswith("avx512.mask.insert") || // Added in 4.0
340  Name.startswith("avx.vextractf128.") || // Added in 3.7
341  Name == "avx2.vextracti128" || // Added in 3.7
342  Name.startswith("avx512.mask.vextract") || // Added in 4.0
343  Name.startswith("sse4a.movnt.") || // Added in 3.9
344  Name.startswith("avx.movnt.") || // Added in 3.2
345  Name.startswith("avx512.storent.") || // Added in 3.9
346  Name == "sse41.movntdqa" || // Added in 5.0
347  Name == "avx2.movntdqa" || // Added in 5.0
348  Name == "avx512.movntdqa" || // Added in 5.0
349  Name == "sse2.storel.dq" || // Added in 3.9
350  Name.startswith("sse.storeu.") || // Added in 3.9
351  Name.startswith("sse2.storeu.") || // Added in 3.9
352  Name.startswith("avx.storeu.") || // Added in 3.9
353  Name.startswith("avx512.mask.storeu.") || // Added in 3.9
354  Name.startswith("avx512.mask.store.p") || // Added in 3.9
355  Name.startswith("avx512.mask.store.b.") || // Added in 3.9
356  Name.startswith("avx512.mask.store.w.") || // Added in 3.9
357  Name.startswith("avx512.mask.store.d.") || // Added in 3.9
358  Name.startswith("avx512.mask.store.q.") || // Added in 3.9
359  Name == "avx512.mask.store.ss" || // Added in 7.0
360  Name.startswith("avx512.mask.loadu.") || // Added in 3.9
361  Name.startswith("avx512.mask.load.") || // Added in 3.9
362  Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
363  Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
364  Name.startswith("avx512.mask.expand.b") || // Added in 9.0
365  Name.startswith("avx512.mask.expand.w") || // Added in 9.0
366  Name.startswith("avx512.mask.expand.d") || // Added in 9.0
367  Name.startswith("avx512.mask.expand.q") || // Added in 9.0
368  Name.startswith("avx512.mask.expand.p") || // Added in 9.0
369  Name.startswith("avx512.mask.compress.b") || // Added in 9.0
370  Name.startswith("avx512.mask.compress.w") || // Added in 9.0
371  Name.startswith("avx512.mask.compress.d") || // Added in 9.0
372  Name.startswith("avx512.mask.compress.q") || // Added in 9.0
373  Name.startswith("avx512.mask.compress.p") || // Added in 9.0
374  Name == "sse42.crc32.64.8" || // Added in 3.4
375  Name.startswith("avx.vbroadcast.s") || // Added in 3.5
376  Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
377  Name.startswith("avx512.mask.palignr.") || // Added in 3.9
378  Name.startswith("avx512.mask.valign.") || // Added in 4.0
379  Name.startswith("sse2.psll.dq") || // Added in 3.7
380  Name.startswith("sse2.psrl.dq") || // Added in 3.7
381  Name.startswith("avx2.psll.dq") || // Added in 3.7
382  Name.startswith("avx2.psrl.dq") || // Added in 3.7
383  Name.startswith("avx512.psll.dq") || // Added in 3.9
384  Name.startswith("avx512.psrl.dq") || // Added in 3.9
385  Name == "sse41.pblendw" || // Added in 3.7
386  Name.startswith("sse41.blendp") || // Added in 3.7
387  Name.startswith("avx.blend.p") || // Added in 3.7
388  Name == "avx2.pblendw" || // Added in 3.7
389  Name.startswith("avx2.pblendd.") || // Added in 3.7
390  Name.startswith("avx.vbroadcastf128") || // Added in 4.0
391  Name == "avx2.vbroadcasti128" || // Added in 3.7
392  Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
393  Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
394  Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
395  Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
396  Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
397  Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
398  Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
399  Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
400  Name == "xop.vpcmov" || // Added in 3.8
401  Name == "xop.vpcmov.256" || // Added in 5.0
402  Name.startswith("avx512.mask.move.s") || // Added in 4.0
403  Name.startswith("avx512.cvtmask2") || // Added in 5.0
404  Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
405  Name.startswith("xop.vprot") || // Added in 8.0
406  Name.startswith("avx512.prol") || // Added in 8.0
407  Name.startswith("avx512.pror") || // Added in 8.0
408  Name.startswith("avx512.mask.prorv.") || // Added in 8.0
409  Name.startswith("avx512.mask.pror.") || // Added in 8.0
410  Name.startswith("avx512.mask.prolv.") || // Added in 8.0
411  Name.startswith("avx512.mask.prol.") || // Added in 8.0
412  Name.startswith("avx512.ptestm") || //Added in 6.0
413  Name.startswith("avx512.ptestnm") || //Added in 6.0
414  Name.startswith("avx512.mask.pavg")) // Added in 6.0
415  return true;
416 
417  return false;
418 }
419 
421  Function *&NewFn) {
422  // Only handle intrinsics that start with "x86.".
423  if (!Name.startswith("x86."))
424  return false;
425  // Remove "x86." prefix.
426  Name = Name.substr(4);
427 
429  NewFn = nullptr;
430  return true;
431  }
432 
433  if (Name == "rdtscp") { // Added in 8.0
434  // If this intrinsic has 0 operands, it's the new version.
435  if (F->getFunctionType()->getNumParams() == 0)
436  return false;
437 
438  rename(F);
439  NewFn = Intrinsic::getDeclaration(F->getParent(),
440  Intrinsic::x86_rdtscp);
441  return true;
442  }
443 
444  // SSE4.1 ptest functions may have an old signature.
445  if (Name.startswith("sse41.ptest")) { // Added in 3.2
446  if (Name.substr(11) == "c")
447  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
448  if (Name.substr(11) == "z")
449  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
450  if (Name.substr(11) == "nzc")
451  return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
452  }
453  // Several blend and other instructions with masks used the wrong number of
454  // bits.
455  if (Name == "sse41.insertps") // Added in 3.6
456  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
457  NewFn);
458  if (Name == "sse41.dppd") // Added in 3.6
459  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
460  NewFn);
461  if (Name == "sse41.dpps") // Added in 3.6
462  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
463  NewFn);
464  if (Name == "sse41.mpsadbw") // Added in 3.6
465  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
466  NewFn);
467  if (Name == "avx.dp.ps.256") // Added in 3.6
468  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
469  NewFn);
470  if (Name == "avx2.mpsadbw") // Added in 3.6
471  return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
472  NewFn);
473  if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
474  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
475  NewFn);
476  if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
477  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
478  NewFn);
479  if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
480  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
481  NewFn);
482  if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
483  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
484  NewFn);
485  if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
486  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
487  NewFn);
488  if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
489  return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
490  NewFn);
491 
492  // frcz.ss/sd may need to have an argument dropped. Added in 3.2
493  if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
494  rename(F);
495  NewFn = Intrinsic::getDeclaration(F->getParent(),
496  Intrinsic::x86_xop_vfrcz_ss);
497  return true;
498  }
499  if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
500  rename(F);
501  NewFn = Intrinsic::getDeclaration(F->getParent(),
502  Intrinsic::x86_xop_vfrcz_sd);
503  return true;
504  }
505  // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
506  if (Name.startswith("xop.vpermil2")) { // Added in 3.9
507  auto Idx = F->getFunctionType()->getParamType(2);
508  if (Idx->isFPOrFPVectorTy()) {
509  rename(F);
510  unsigned IdxSize = Idx->getPrimitiveSizeInBits();
511  unsigned EltSize = Idx->getScalarSizeInBits();
512  Intrinsic::ID Permil2ID;
513  if (EltSize == 64 && IdxSize == 128)
514  Permil2ID = Intrinsic::x86_xop_vpermil2pd;
515  else if (EltSize == 32 && IdxSize == 128)
516  Permil2ID = Intrinsic::x86_xop_vpermil2ps;
517  else if (EltSize == 64 && IdxSize == 256)
518  Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
519  else
520  Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
521  NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
522  return true;
523  }
524  }
525 
526  if (Name == "seh.recoverfp") {
527  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
528  return true;
529  }
530 
531  return false;
532 }
533 
535  assert(F && "Illegal to upgrade a non-existent Function.");
536 
537  // Quickly eliminate it, if it's not a candidate.
538  StringRef Name = F->getName();
539  if (Name.size() <= 8 || !Name.startswith("llvm."))
540  return false;
541  Name = Name.substr(5); // Strip off "llvm."
542 
543  switch (Name[0]) {
544  default: break;
545  case 'a': {
546  if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
547  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
548  F->arg_begin()->getType());
549  return true;
550  }
551  if (Name.startswith("aarch64.neon.frintn")) {
552  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
553  F->arg_begin()->getType());
554  return true;
555  }
556  if (Name.startswith("aarch64.neon.rbit")) {
557  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
558  F->arg_begin()->getType());
559  return true;
560  }
561  if (Name.startswith("arm.neon.vclz")) {
562  Type* args[2] = {
563  F->arg_begin()->getType(),
564  Type::getInt1Ty(F->getContext())
565  };
566  // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
567  // the end of the name. Change name from llvm.arm.neon.vclz.* to
568  // llvm.ctlz.*
569  FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
570  NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
571  "llvm.ctlz." + Name.substr(14), F->getParent());
572  return true;
573  }
574  if (Name.startswith("arm.neon.vcnt")) {
575  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
576  F->arg_begin()->getType());
577  return true;
578  }
579  static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
580  if (vldRegex.match(Name)) {
581  auto fArgs = F->getFunctionType()->params();
582  SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
583  // Can't use Intrinsic::getDeclaration here as the return types might
584  // then only be structurally equal.
585  FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
586  NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
587  "llvm." + Name + ".p0i8", F->getParent());
588  return true;
589  }
590  static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
591  if (vstRegex.match(Name)) {
592  static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
593  Intrinsic::arm_neon_vst2,
594  Intrinsic::arm_neon_vst3,
595  Intrinsic::arm_neon_vst4};
596 
597  static const Intrinsic::ID StoreLaneInts[] = {
598  Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
599  Intrinsic::arm_neon_vst4lane
600  };
601 
602  auto fArgs = F->getFunctionType()->params();
603  Type *Tys[] = {fArgs[0], fArgs[1]};
604  if (Name.find("lane") == StringRef::npos)
605  NewFn = Intrinsic::getDeclaration(F->getParent(),
606  StoreInts[fArgs.size() - 3], Tys);
607  else
608  NewFn = Intrinsic::getDeclaration(F->getParent(),
609  StoreLaneInts[fArgs.size() - 5], Tys);
610  return true;
611  }
612  if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
613  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
614  return true;
615  }
616  if (Name.startswith("arm.neon.vqadds.")) {
617  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
618  F->arg_begin()->getType());
619  return true;
620  }
621  if (Name.startswith("arm.neon.vqaddu.")) {
622  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
623  F->arg_begin()->getType());
624  return true;
625  }
626  if (Name.startswith("arm.neon.vqsubs.")) {
627  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
628  F->arg_begin()->getType());
629  return true;
630  }
631  if (Name.startswith("arm.neon.vqsubu.")) {
632  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
633  F->arg_begin()->getType());
634  return true;
635  }
636  if (Name.startswith("aarch64.neon.addp")) {
637  if (F->arg_size() != 2)
638  break; // Invalid IR.
639  VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
640  if (Ty && Ty->getElementType()->isFloatingPointTy()) {
641  NewFn = Intrinsic::getDeclaration(F->getParent(),
642  Intrinsic::aarch64_neon_faddp, Ty);
643  return true;
644  }
645  }
646 
647  // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
648  // respectively
649  if ((Name.startswith("arm.neon.bfdot.") ||
650  Name.startswith("aarch64.neon.bfdot.")) &&
651  Name.endswith("i8")) {
652  Intrinsic::ID IID =
654  .Cases("arm.neon.bfdot.v2f32.v8i8",
655  "arm.neon.bfdot.v4f32.v16i8",
656  Intrinsic::arm_neon_bfdot)
657  .Cases("aarch64.neon.bfdot.v2f32.v8i8",
658  "aarch64.neon.bfdot.v4f32.v16i8",
659  Intrinsic::aarch64_neon_bfdot)
661  if (IID == Intrinsic::not_intrinsic)
662  break;
663 
664  size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
665  assert((OperandWidth == 64 || OperandWidth == 128) &&
666  "Unexpected operand width");
667  LLVMContext &Ctx = F->getParent()->getContext();
668  std::array<Type *, 2> Tys {{
669  F->getReturnType(),
670  FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
671  }};
672  NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
673  return true;
674  }
675 
676  // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
677  // and accept v8bf16 instead of v16i8
678  if ((Name.startswith("arm.neon.bfm") ||
679  Name.startswith("aarch64.neon.bfm")) &&
680  Name.endswith(".v4f32.v16i8")) {
681  Intrinsic::ID IID =
683  .Case("arm.neon.bfmmla.v4f32.v16i8",
684  Intrinsic::arm_neon_bfmmla)
685  .Case("arm.neon.bfmlalb.v4f32.v16i8",
686  Intrinsic::arm_neon_bfmlalb)
687  .Case("arm.neon.bfmlalt.v4f32.v16i8",
688  Intrinsic::arm_neon_bfmlalt)
689  .Case("aarch64.neon.bfmmla.v4f32.v16i8",
690  Intrinsic::aarch64_neon_bfmmla)
691  .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
692  Intrinsic::aarch64_neon_bfmlalb)
693  .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
694  Intrinsic::aarch64_neon_bfmlalt)
696  if (IID == Intrinsic::not_intrinsic)
697  break;
698 
699  std::array<Type *, 0> Tys;
700  NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
701  return true;
702  }
703  break;
704  }
705 
706  case 'c': {
707  if (Name.startswith("ctlz.") && F->arg_size() == 1) {
708  rename(F);
709  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
710  F->arg_begin()->getType());
711  return true;
712  }
713  if (Name.startswith("cttz.") && F->arg_size() == 1) {
714  rename(F);
715  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
716  F->arg_begin()->getType());
717  return true;
718  }
719  break;
720  }
721  case 'd': {
722  if (Name == "dbg.value" && F->arg_size() == 4) {
723  rename(F);
724  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
725  return true;
726  }
727  break;
728  }
729  case 'e': {
731  static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
732  if (R.match(Name, &Groups)) {
735  .Case("add", Intrinsic::vector_reduce_add)
736  .Case("mul", Intrinsic::vector_reduce_mul)
737  .Case("and", Intrinsic::vector_reduce_and)
738  .Case("or", Intrinsic::vector_reduce_or)
739  .Case("xor", Intrinsic::vector_reduce_xor)
740  .Case("smax", Intrinsic::vector_reduce_smax)
741  .Case("smin", Intrinsic::vector_reduce_smin)
742  .Case("umax", Intrinsic::vector_reduce_umax)
743  .Case("umin", Intrinsic::vector_reduce_umin)
744  .Case("fmax", Intrinsic::vector_reduce_fmax)
745  .Case("fmin", Intrinsic::vector_reduce_fmin)
747  if (ID != Intrinsic::not_intrinsic) {
748  rename(F);
749  auto Args = F->getFunctionType()->params();
750  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
751  return true;
752  }
753  }
754  static const Regex R2(
755  "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
756  Groups.clear();
757  if (R2.match(Name, &Groups)) {
759  if (Groups[1] == "fadd")
760  ID = Intrinsic::vector_reduce_fadd;
761  if (Groups[1] == "fmul")
762  ID = Intrinsic::vector_reduce_fmul;
763  if (ID != Intrinsic::not_intrinsic) {
764  rename(F);
765  auto Args = F->getFunctionType()->params();
766  Type *Tys[] = {Args[1]};
767  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
768  return true;
769  }
770  }
771  break;
772  }
773  case 'i':
774  case 'l': {
775  bool IsLifetimeStart = Name.startswith("lifetime.start");
776  if (IsLifetimeStart || Name.startswith("invariant.start")) {
777  Intrinsic::ID ID = IsLifetimeStart ?
778  Intrinsic::lifetime_start : Intrinsic::invariant_start;
779  auto Args = F->getFunctionType()->params();
780  Type* ObjectPtr[1] = {Args[1]};
781  if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
782  rename(F);
783  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
784  return true;
785  }
786  }
787 
788  bool IsLifetimeEnd = Name.startswith("lifetime.end");
789  if (IsLifetimeEnd || Name.startswith("invariant.end")) {
790  Intrinsic::ID ID = IsLifetimeEnd ?
791  Intrinsic::lifetime_end : Intrinsic::invariant_end;
792 
793  auto Args = F->getFunctionType()->params();
794  Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
795  if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
796  rename(F);
797  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
798  return true;
799  }
800  }
801  if (Name.startswith("invariant.group.barrier")) {
802  // Rename invariant.group.barrier to launder.invariant.group
803  auto Args = F->getFunctionType()->params();
804  Type* ObjectPtr[1] = {Args[0]};
805  rename(F);
806  NewFn = Intrinsic::getDeclaration(F->getParent(),
807  Intrinsic::launder_invariant_group, ObjectPtr);
808  return true;
809 
810  }
811 
812  break;
813  }
814  case 'm': {
815  if (Name.startswith("masked.load.")) {
816  Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
817  if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
818  rename(F);
819  NewFn = Intrinsic::getDeclaration(F->getParent(),
820  Intrinsic::masked_load,
821  Tys);
822  return true;
823  }
824  }
825  if (Name.startswith("masked.store.")) {
826  auto Args = F->getFunctionType()->params();
827  Type *Tys[] = { Args[0], Args[1] };
828  if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
829  rename(F);
830  NewFn = Intrinsic::getDeclaration(F->getParent(),
831  Intrinsic::masked_store,
832  Tys);
833  return true;
834  }
835  }
836  // Renaming gather/scatter intrinsics with no address space overloading
837  // to the new overload which includes an address space
838  if (Name.startswith("masked.gather.")) {
839  Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
840  if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
841  rename(F);
842  NewFn = Intrinsic::getDeclaration(F->getParent(),
843  Intrinsic::masked_gather, Tys);
844  return true;
845  }
846  }
847  if (Name.startswith("masked.scatter.")) {
848  auto Args = F->getFunctionType()->params();
849  Type *Tys[] = {Args[0], Args[1]};
850  if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
851  rename(F);
852  NewFn = Intrinsic::getDeclaration(F->getParent(),
853  Intrinsic::masked_scatter, Tys);
854  return true;
855  }
856  }
857  // Updating the memory intrinsics (memcpy/memmove/memset) that have an
858  // alignment parameter to embedding the alignment as an attribute of
859  // the pointer args.
860  if (Name.startswith("memcpy.") && F->arg_size() == 5) {
861  rename(F);
862  // Get the types of dest, src, and len
863  ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
864  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
865  ParamTypes);
866  return true;
867  }
868  if (Name.startswith("memmove.") && F->arg_size() == 5) {
869  rename(F);
870  // Get the types of dest, src, and len
871  ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
872  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
873  ParamTypes);
874  return true;
875  }
876  if (Name.startswith("memset.") && F->arg_size() == 5) {
877  rename(F);
878  // Get the types of dest, and len
879  const auto *FT = F->getFunctionType();
880  Type *ParamTypes[2] = {
881  FT->getParamType(0), // Dest
882  FT->getParamType(2) // len
883  };
884  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
885  ParamTypes);
886  return true;
887  }
888  break;
889  }
890  case 'n': {
891  if (Name.startswith("nvvm.")) {
892  Name = Name.substr(5);
893 
894  // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
896  .Cases("brev32", "brev64", Intrinsic::bitreverse)
897  .Case("clz.i", Intrinsic::ctlz)
898  .Case("popc.i", Intrinsic::ctpop)
900  if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
901  NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
902  {F->getReturnType()});
903  return true;
904  }
905 
906  // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
907  // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
908  //
909  // TODO: We could add lohi.i2d.
910  bool Expand = StringSwitch<bool>(Name)
911  .Cases("abs.i", "abs.ll", true)
912  .Cases("clz.ll", "popc.ll", "h2f", true)
913  .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
914  .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
915  .StartsWith("atomic.load.add.f32.p", true)
916  .StartsWith("atomic.load.add.f64.p", true)
917  .Default(false);
918  if (Expand) {
919  NewFn = nullptr;
920  return true;
921  }
922  }
923  break;
924  }
925  case 'o':
926  // We only need to change the name to match the mangling including the
927  // address space.
928  if (Name.startswith("objectsize.")) {
929  Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
930  if (F->arg_size() == 2 || F->arg_size() == 3 ||
931  F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
932  rename(F);
933  NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
934  Tys);
935  return true;
936  }
937  }
938  break;
939 
940  case 'p':
941  if (Name == "prefetch") {
942  // Handle address space overloading.
943  Type *Tys[] = {F->arg_begin()->getType()};
944  if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) {
945  rename(F);
946  NewFn =
947  Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
948  return true;
949  }
950  } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
951  rename(F);
952  NewFn = Intrinsic::getDeclaration(F->getParent(),
953  Intrinsic::ptr_annotation,
954  F->arg_begin()->getType());
955  return true;
956  }
957  break;
958 
959  case 's':
960  if (Name == "stackprotectorcheck") {
961  NewFn = nullptr;
962  return true;
963  }
964  break;
965 
966  case 'v': {
967  if (Name == "var.annotation" && F->arg_size() == 4) {
968  rename(F);
969  NewFn = Intrinsic::getDeclaration(F->getParent(),
970  Intrinsic::var_annotation);
971  return true;
972  }
973  break;
974  }
975 
976  case 'x':
977  if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
978  return true;
979  }
980  // Remangle our intrinsic since we upgrade the mangling
982  if (Result != None) {
983  NewFn = Result.getValue();
984  return true;
985  }
986 
987  // This may not belong here. This function is effectively being overloaded
988  // to both detect an intrinsic which needs upgrading, and to provide the
989  // upgraded form of the intrinsic. We should perhaps have two separate
990  // functions for this.
991  return false;
992 }
993 
995  NewFn = nullptr;
996  bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
997  assert(F != NewFn && "Intrinsic function upgraded to the same function");
998 
999  // Upgrade intrinsic attributes. This does not change the function.
1000  if (NewFn)
1001  F = NewFn;
1002  if (Intrinsic::ID id = F->getIntrinsicID())
1003  F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1004  return Upgraded;
1005 }
1006 
1008  if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1009  GV->getName() == "llvm.global_dtors")) ||
1010  !GV->hasInitializer())
1011  return nullptr;
1012  ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1013  if (!ATy)
1014  return nullptr;
1015  StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1016  if (!STy || STy->getNumElements() != 2)
1017  return nullptr;
1018 
1019  LLVMContext &C = GV->getContext();
1020  IRBuilder<> IRB(C);
1021  auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1022  IRB.getInt8PtrTy());
1023  Constant *Init = GV->getInitializer();
1024  unsigned N = Init->getNumOperands();
1025  std::vector<Constant *> NewCtors(N);
1026  for (unsigned i = 0; i != N; ++i) {
1027  auto Ctor = cast<Constant>(Init->getOperand(i));
1028  NewCtors[i] = ConstantStruct::get(
1029  EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
1031  }
1032  Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1033 
1034  return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1035  NewInit, GV->getName());
1036 }
1037 
1038 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1039 // to byte shuffles.
1041  Value *Op, unsigned Shift) {
1042  auto *ResultTy = cast<FixedVectorType>(Op->getType());
1043  unsigned NumElts = ResultTy->getNumElements() * 8;
1044 
1045  // Bitcast from a 64-bit element type to a byte element type.
1046  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1047  Op = Builder.CreateBitCast(Op, VecTy, "cast");
1048 
1049  // We'll be shuffling in zeroes.
1050  Value *Res = Constant::getNullValue(VecTy);
1051 
1052  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1053  // we'll just return the zero vector.
1054  if (Shift < 16) {
1055  int Idxs[64];
1056  // 256/512-bit version is split into 2/4 16-byte lanes.
1057  for (unsigned l = 0; l != NumElts; l += 16)
1058  for (unsigned i = 0; i != 16; ++i) {
1059  unsigned Idx = NumElts + i - Shift;
1060  if (Idx < NumElts)
1061  Idx -= NumElts - 16; // end of lane, switch operand.
1062  Idxs[l + i] = Idx + l;
1063  }
1064 
1065  Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
1066  }
1067 
1068  // Bitcast back to a 64-bit element type.
1069  return Builder.CreateBitCast(Res, ResultTy, "cast");
1070 }
1071 
1072 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1073 // to byte shuffles.
1075  unsigned Shift) {
1076  auto *ResultTy = cast<FixedVectorType>(Op->getType());
1077  unsigned NumElts = ResultTy->getNumElements() * 8;
1078 
1079  // Bitcast from a 64-bit element type to a byte element type.
1080  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1081  Op = Builder.CreateBitCast(Op, VecTy, "cast");
1082 
1083  // We'll be shuffling in zeroes.
1084  Value *Res = Constant::getNullValue(VecTy);
1085 
1086  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1087  // we'll just return the zero vector.
1088  if (Shift < 16) {
1089  int Idxs[64];
1090  // 256/512-bit version is split into 2/4 16-byte lanes.
1091  for (unsigned l = 0; l != NumElts; l += 16)
1092  for (unsigned i = 0; i != 16; ++i) {
1093  unsigned Idx = i + Shift;
1094  if (Idx >= 16)
1095  Idx += NumElts - 16; // end of lane, switch operand.
1096  Idxs[l + i] = Idx + l;
1097  }
1098 
1099  Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
1100  }
1101 
1102  // Bitcast back to a 64-bit element type.
1103  return Builder.CreateBitCast(Res, ResultTy, "cast");
1104 }
1105 
1107  unsigned NumElts) {
1108  assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1110  Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1111  Mask = Builder.CreateBitCast(Mask, MaskTy);
1112 
1113  // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1114  // i8 and we need to extract down to the right number of elements.
1115  if (NumElts <= 4) {
1116  int Indices[4];
1117  for (unsigned i = 0; i != NumElts; ++i)
1118  Indices[i] = i;
1119  Mask = Builder.CreateShuffleVector(
1120  Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
1121  }
1122 
1123  return Mask;
1124 }
1125 
1127  Value *Op0, Value *Op1) {
1128  // If the mask is all ones just emit the first operation.
1129  if (const auto *C = dyn_cast<Constant>(Mask))
1130  if (C->isAllOnesValue())
1131  return Op0;
1132 
1134  cast<FixedVectorType>(Op0->getType())->getNumElements());
1135  return Builder.CreateSelect(Mask, Op0, Op1);
1136 }
1137 
1139  Value *Op0, Value *Op1) {
1140  // If the mask is all ones just emit the first operation.
1141  if (const auto *C = dyn_cast<Constant>(Mask))
1142  if (C->isAllOnesValue())
1143  return Op0;
1144 
1145  auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1146  Mask->getType()->getIntegerBitWidth());
1147  Mask = Builder.CreateBitCast(Mask, MaskTy);
1148  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1149  return Builder.CreateSelect(Mask, Op0, Op1);
1150 }
1151 
1152 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1153 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1154 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1156  Value *Op1, Value *Shift,
1157  Value *Passthru, Value *Mask,
1158  bool IsVALIGN) {
1159  unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1160 
1161  unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1162  assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1163  assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1164  assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1165 
1166  // Mask the immediate for VALIGN.
1167  if (IsVALIGN)
1168  ShiftVal &= (NumElts - 1);
1169 
1170  // If palignr is shifting the pair of vectors more than the size of two
1171  // lanes, emit zero.
1172  if (ShiftVal >= 32)
1173  return llvm::Constant::getNullValue(Op0->getType());
1174 
1175  // If palignr is shifting the pair of input vectors more than one lane,
1176  // but less than two lanes, convert to shifting in zeroes.
1177  if (ShiftVal > 16) {
1178  ShiftVal -= 16;
1179  Op1 = Op0;
1180  Op0 = llvm::Constant::getNullValue(Op0->getType());
1181  }
1182 
1183  int Indices[64];
1184  // 256-bit palignr operates on 128-bit lanes so we need to handle that
1185  for (unsigned l = 0; l < NumElts; l += 16) {
1186  for (unsigned i = 0; i != 16; ++i) {
1187  unsigned Idx = ShiftVal + i;
1188  if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1189  Idx += NumElts - 16; // End of lane, switch operand.
1190  Indices[l + i] = Idx + l;
1191  }
1192  }
1193 
1194  Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1195  makeArrayRef(Indices, NumElts),
1196  "palignr");
1197 
1198  return EmitX86Select(Builder, Mask, Align, Passthru);
1199 }
1200 
1202  bool ZeroMask, bool IndexForm) {
1203  Type *Ty = CI.getType();
1204  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1205  unsigned EltWidth = Ty->getScalarSizeInBits();
1206  bool IsFloat = Ty->isFPOrFPVectorTy();
1207  Intrinsic::ID IID;
1208  if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1209  IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1210  else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1211  IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1212  else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1213  IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1214  else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1215  IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1216  else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1217  IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1218  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1219  IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1220  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1221  IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1222  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1223  IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1224  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1225  IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1226  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1227  IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1228  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1229  IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1230  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1231  IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1232  else if (VecWidth == 128 && EltWidth == 16)
1233  IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1234  else if (VecWidth == 256 && EltWidth == 16)
1235  IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1236  else if (VecWidth == 512 && EltWidth == 16)
1237  IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1238  else if (VecWidth == 128 && EltWidth == 8)
1239  IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1240  else if (VecWidth == 256 && EltWidth == 8)
1241  IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1242  else if (VecWidth == 512 && EltWidth == 8)
1243  IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1244  else
1245  llvm_unreachable("Unexpected intrinsic");
1246 
1247  Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1248  CI.getArgOperand(2) };
1249 
1250  // If this isn't index form we need to swap operand 0 and 1.
1251  if (!IndexForm)
1252  std::swap(Args[0], Args[1]);
1253 
1254  Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1255  Args);
1256  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1257  : Builder.CreateBitCast(CI.getArgOperand(1),
1258  Ty);
1259  return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1260 }
1261 
1263  Intrinsic::ID IID) {
1264  Type *Ty = CI.getType();
1265  Value *Op0 = CI.getOperand(0);
1266  Value *Op1 = CI.getOperand(1);
1267  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1268  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1269 
1270  if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1271  Value *VecSrc = CI.getOperand(2);
1272  Value *Mask = CI.getOperand(3);
1273  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1274  }
1275  return Res;
1276 }
1277 
1279  bool IsRotateRight) {
1280  Type *Ty = CI.getType();
1281  Value *Src = CI.getArgOperand(0);
1282  Value *Amt = CI.getArgOperand(1);
1283 
1284  // Amount may be scalar immediate, in which case create a splat vector.
1285  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1286  // we only care about the lowest log2 bits anyway.
1287  if (Amt->getType() != Ty) {
1288  unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1289  Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1290  Amt = Builder.CreateVectorSplat(NumElts, Amt);
1291  }
1292 
1293  Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1294  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1295  Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1296 
1297  if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1298  Value *VecSrc = CI.getOperand(2);
1299  Value *Mask = CI.getOperand(3);
1300  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1301  }
1302  return Res;
1303 }
1304 
1305 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1306  bool IsSigned) {
1307  Type *Ty = CI.getType();
1308  Value *LHS = CI.getArgOperand(0);
1309  Value *RHS = CI.getArgOperand(1);
1310 
1311  CmpInst::Predicate Pred;
1312  switch (Imm) {
1313  case 0x0:
1314  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1315  break;
1316  case 0x1:
1317  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1318  break;
1319  case 0x2:
1320  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1321  break;
1322  case 0x3:
1323  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1324  break;
1325  case 0x4:
1326  Pred = ICmpInst::ICMP_EQ;
1327  break;
1328  case 0x5:
1329  Pred = ICmpInst::ICMP_NE;
1330  break;
1331  case 0x6:
1332  return Constant::getNullValue(Ty); // FALSE
1333  case 0x7:
1334  return Constant::getAllOnesValue(Ty); // TRUE
1335  default:
1336  llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1337  }
1338 
1339  Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1340  Value *Ext = Builder.CreateSExt(Cmp, Ty);
1341  return Ext;
1342 }
1343 
1345  bool IsShiftRight, bool ZeroMask) {
1346  Type *Ty = CI.getType();
1347  Value *Op0 = CI.getArgOperand(0);
1348  Value *Op1 = CI.getArgOperand(1);
1349  Value *Amt = CI.getArgOperand(2);
1350 
1351  if (IsShiftRight)
1352  std::swap(Op0, Op1);
1353 
1354  // Amount may be scalar immediate, in which case create a splat vector.
1355  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1356  // we only care about the lowest log2 bits anyway.
1357  if (Amt->getType() != Ty) {
1358  unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1359  Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1360  Amt = Builder.CreateVectorSplat(NumElts, Amt);
1361  }
1362 
1363  Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1364  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1365  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1366 
1367  unsigned NumArgs = CI.getNumArgOperands();
1368  if (NumArgs >= 4) { // For masked intrinsics.
1369  Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1370  ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1371  CI.getArgOperand(0);
1372  Value *Mask = CI.getOperand(NumArgs - 1);
1373  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1374  }
1375  return Res;
1376 }
1377 
1379  Value *Ptr, Value *Data, Value *Mask,
1380  bool Aligned) {
1381  // Cast the pointer to the right type.
1382  Ptr = Builder.CreateBitCast(Ptr,
1383  llvm::PointerType::getUnqual(Data->getType()));
1384  const Align Alignment =
1385  Aligned
1386  ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
1387  : Align(1);
1388 
1389  // If the mask is all ones just emit a regular store.
1390  if (const auto *C = dyn_cast<Constant>(Mask))
1391  if (C->isAllOnesValue())
1392  return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1393 
1394  // Convert the mask from an integer type to a vector of i1.
1395  unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1396  Mask = getX86MaskVec(Builder, Mask, NumElts);
1397  return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1398 }
1399 
1401  Value *Ptr, Value *Passthru, Value *Mask,
1402  bool Aligned) {
1403  Type *ValTy = Passthru->getType();
1404  // Cast the pointer to the right type.
1405  Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1406  const Align Alignment =
1407  Aligned
1408  ? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
1409  8)
1410  : Align(1);
1411 
1412  // If the mask is all ones just emit a regular store.
1413  if (const auto *C = dyn_cast<Constant>(Mask))
1414  if (C->isAllOnesValue())
1415  return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1416 
1417  // Convert the mask from an integer type to a vector of i1.
1418  unsigned NumElts =
1419  cast<FixedVectorType>(Passthru->getType())->getNumElements();
1420  Mask = getX86MaskVec(Builder, Mask, NumElts);
1421  return Builder.CreateMaskedLoad(Ptr, Alignment, Mask, Passthru);
1422 }
1423 
1425  Type *Ty = CI.getType();
1426  Value *Op0 = CI.getArgOperand(0);
1428  Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1429  if (CI.getNumArgOperands() == 3)
1430  Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1431  return Res;
1432 }
1433 
1434 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1435  Type *Ty = CI.getType();
1436 
1437  // Arguments have a vXi32 type so cast to vXi64.
1438  Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1439  Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1440 
1441  if (IsSigned) {
1442  // Shift left then arithmetic shift right.
1443  Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1444  LHS = Builder.CreateShl(LHS, ShiftAmt);
1445  LHS = Builder.CreateAShr(LHS, ShiftAmt);
1446  RHS = Builder.CreateShl(RHS, ShiftAmt);
1447  RHS = Builder.CreateAShr(RHS, ShiftAmt);
1448  } else {
1449  // Clear the upper bits.
1450  Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1451  LHS = Builder.CreateAnd(LHS, Mask);
1452  RHS = Builder.CreateAnd(RHS, Mask);
1453  }
1454 
1455  Value *Res = Builder.CreateMul(LHS, RHS);
1456 
1457  if (CI.getNumArgOperands() == 4)
1458  Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1459 
1460  return Res;
1461 }
1462 
1463 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1465  Value *Mask) {
1466  unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1467  if (Mask) {
1468  const auto *C = dyn_cast<Constant>(Mask);
1469  if (!C || !C->isAllOnesValue())
1470  Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1471  }
1472 
1473  if (NumElts < 8) {
1474  int Indices[8];
1475  for (unsigned i = 0; i != NumElts; ++i)
1476  Indices[i] = i;
1477  for (unsigned i = NumElts; i != 8; ++i)
1478  Indices[i] = NumElts + i % NumElts;
1479  Vec = Builder.CreateShuffleVector(Vec,
1481  Indices);
1482  }
1483  return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1484 }
1485 
1487  unsigned CC, bool Signed) {
1488  Value *Op0 = CI.getArgOperand(0);
1489  unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1490 
1491  Value *Cmp;
1492  if (CC == 3) {
1493  Cmp = Constant::getNullValue(
1494  FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1495  } else if (CC == 7) {
1497  FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1498  } else {
1499  ICmpInst::Predicate Pred;
1500  switch (CC) {
1501  default: llvm_unreachable("Unknown condition code");
1502  case 0: Pred = ICmpInst::ICMP_EQ; break;
1503  case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1504  case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1505  case 4: Pred = ICmpInst::ICMP_NE; break;
1506  case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1507  case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1508  }
1509  Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1510  }
1511 
1512  Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1513 
1514  return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1515 }
1516 
1517 // Replace a masked intrinsic with an older unmasked intrinsic.
1519  Intrinsic::ID IID) {
1520  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1521  Value *Rep = Builder.CreateCall(Intrin,
1522  { CI.getArgOperand(0), CI.getArgOperand(1) });
1523  return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1524 }
1525 
1527  Value* A = CI.getArgOperand(0);
1528  Value* B = CI.getArgOperand(1);
1529  Value* Src = CI.getArgOperand(2);
1530  Value* Mask = CI.getArgOperand(3);
1531 
1532  Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1533  Value* Cmp = Builder.CreateIsNotNull(AndNode);
1534  Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1535  Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1536  Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1537  return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1538 }
1539 
1540 
1542  Value* Op = CI.getArgOperand(0);
1543  Type* ReturnOp = CI.getType();
1544  unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1545  Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1546  return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1547 }
1548 
1549 // Replace intrinsic with unmasked version and a select.
1551  CallInst &CI, Value *&Rep) {
1552  Name = Name.substr(12); // Remove avx512.mask.
1553 
1554  unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1555  unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1556  Intrinsic::ID IID;
1557  if (Name.startswith("max.p")) {
1558  if (VecWidth == 128 && EltWidth == 32)
1559  IID = Intrinsic::x86_sse_max_ps;
1560  else if (VecWidth == 128 && EltWidth == 64)
1561  IID = Intrinsic::x86_sse2_max_pd;
1562  else if (VecWidth == 256 && EltWidth == 32)
1563  IID = Intrinsic::x86_avx_max_ps_256;
1564  else if (VecWidth == 256 && EltWidth == 64)
1565  IID = Intrinsic::x86_avx_max_pd_256;
1566  else
1567  llvm_unreachable("Unexpected intrinsic");
1568  } else if (Name.startswith("min.p")) {
1569  if (VecWidth == 128 && EltWidth == 32)
1570  IID = Intrinsic::x86_sse_min_ps;
1571  else if (VecWidth == 128 && EltWidth == 64)
1572  IID = Intrinsic::x86_sse2_min_pd;
1573  else if (VecWidth == 256 && EltWidth == 32)
1574  IID = Intrinsic::x86_avx_min_ps_256;
1575  else if (VecWidth == 256 && EltWidth == 64)
1576  IID = Intrinsic::x86_avx_min_pd_256;
1577  else
1578  llvm_unreachable("Unexpected intrinsic");
1579  } else if (Name.startswith("pshuf.b.")) {
1580  if (VecWidth == 128)
1581  IID = Intrinsic::x86_ssse3_pshuf_b_128;
1582  else if (VecWidth == 256)
1583  IID = Intrinsic::x86_avx2_pshuf_b;
1584  else if (VecWidth == 512)
1585  IID = Intrinsic::x86_avx512_pshuf_b_512;
1586  else
1587  llvm_unreachable("Unexpected intrinsic");
1588  } else if (Name.startswith("pmul.hr.sw.")) {
1589  if (VecWidth == 128)
1590  IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1591  else if (VecWidth == 256)
1592  IID = Intrinsic::x86_avx2_pmul_hr_sw;
1593  else if (VecWidth == 512)
1594  IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1595  else
1596  llvm_unreachable("Unexpected intrinsic");
1597  } else if (Name.startswith("pmulh.w.")) {
1598  if (VecWidth == 128)
1599  IID = Intrinsic::x86_sse2_pmulh_w;
1600  else if (VecWidth == 256)
1601  IID = Intrinsic::x86_avx2_pmulh_w;
1602  else if (VecWidth == 512)
1603  IID = Intrinsic::x86_avx512_pmulh_w_512;
1604  else
1605  llvm_unreachable("Unexpected intrinsic");
1606  } else if (Name.startswith("pmulhu.w.")) {
1607  if (VecWidth == 128)
1608  IID = Intrinsic::x86_sse2_pmulhu_w;
1609  else if (VecWidth == 256)
1610  IID = Intrinsic::x86_avx2_pmulhu_w;
1611  else if (VecWidth == 512)
1612  IID = Intrinsic::x86_avx512_pmulhu_w_512;
1613  else
1614  llvm_unreachable("Unexpected intrinsic");
1615  } else if (Name.startswith("pmaddw.d.")) {
1616  if (VecWidth == 128)
1617  IID = Intrinsic::x86_sse2_pmadd_wd;
1618  else if (VecWidth == 256)
1619  IID = Intrinsic::x86_avx2_pmadd_wd;
1620  else if (VecWidth == 512)
1621  IID = Intrinsic::x86_avx512_pmaddw_d_512;
1622  else
1623  llvm_unreachable("Unexpected intrinsic");
1624  } else if (Name.startswith("pmaddubs.w.")) {
1625  if (VecWidth == 128)
1626  IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1627  else if (VecWidth == 256)
1628  IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1629  else if (VecWidth == 512)
1630  IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1631  else
1632  llvm_unreachable("Unexpected intrinsic");
1633  } else if (Name.startswith("packsswb.")) {
1634  if (VecWidth == 128)
1635  IID = Intrinsic::x86_sse2_packsswb_128;
1636  else if (VecWidth == 256)
1637  IID = Intrinsic::x86_avx2_packsswb;
1638  else if (VecWidth == 512)
1639  IID = Intrinsic::x86_avx512_packsswb_512;
1640  else
1641  llvm_unreachable("Unexpected intrinsic");
1642  } else if (Name.startswith("packssdw.")) {
1643  if (VecWidth == 128)
1644  IID = Intrinsic::x86_sse2_packssdw_128;
1645  else if (VecWidth == 256)
1646  IID = Intrinsic::x86_avx2_packssdw;
1647  else if (VecWidth == 512)
1648  IID = Intrinsic::x86_avx512_packssdw_512;
1649  else
1650  llvm_unreachable("Unexpected intrinsic");
1651  } else if (Name.startswith("packuswb.")) {
1652  if (VecWidth == 128)
1653  IID = Intrinsic::x86_sse2_packuswb_128;
1654  else if (VecWidth == 256)
1655  IID = Intrinsic::x86_avx2_packuswb;
1656  else if (VecWidth == 512)
1657  IID = Intrinsic::x86_avx512_packuswb_512;
1658  else
1659  llvm_unreachable("Unexpected intrinsic");
1660  } else if (Name.startswith("packusdw.")) {
1661  if (VecWidth == 128)
1662  IID = Intrinsic::x86_sse41_packusdw;
1663  else if (VecWidth == 256)
1664  IID = Intrinsic::x86_avx2_packusdw;
1665  else if (VecWidth == 512)
1666  IID = Intrinsic::x86_avx512_packusdw_512;
1667  else
1668  llvm_unreachable("Unexpected intrinsic");
1669  } else if (Name.startswith("vpermilvar.")) {
1670  if (VecWidth == 128 && EltWidth == 32)
1671  IID = Intrinsic::x86_avx_vpermilvar_ps;
1672  else if (VecWidth == 128 && EltWidth == 64)
1673  IID = Intrinsic::x86_avx_vpermilvar_pd;
1674  else if (VecWidth == 256 && EltWidth == 32)
1675  IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1676  else if (VecWidth == 256 && EltWidth == 64)
1677  IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1678  else if (VecWidth == 512 && EltWidth == 32)
1679  IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1680  else if (VecWidth == 512 && EltWidth == 64)
1681  IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1682  else
1683  llvm_unreachable("Unexpected intrinsic");
1684  } else if (Name == "cvtpd2dq.256") {
1685  IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1686  } else if (Name == "cvtpd2ps.256") {
1687  IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1688  } else if (Name == "cvttpd2dq.256") {
1689  IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1690  } else if (Name == "cvttps2dq.128") {
1691  IID = Intrinsic::x86_sse2_cvttps2dq;
1692  } else if (Name == "cvttps2dq.256") {
1693  IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1694  } else if (Name.startswith("permvar.")) {
1695  bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1696  if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1697  IID = Intrinsic::x86_avx2_permps;
1698  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1699  IID = Intrinsic::x86_avx2_permd;
1700  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1701  IID = Intrinsic::x86_avx512_permvar_df_256;
1702  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1703  IID = Intrinsic::x86_avx512_permvar_di_256;
1704  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1705  IID = Intrinsic::x86_avx512_permvar_sf_512;
1706  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1707  IID = Intrinsic::x86_avx512_permvar_si_512;
1708  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1709  IID = Intrinsic::x86_avx512_permvar_df_512;
1710  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1711  IID = Intrinsic::x86_avx512_permvar_di_512;
1712  else if (VecWidth == 128 && EltWidth == 16)
1713  IID = Intrinsic::x86_avx512_permvar_hi_128;
1714  else if (VecWidth == 256 && EltWidth == 16)
1715  IID = Intrinsic::x86_avx512_permvar_hi_256;
1716  else if (VecWidth == 512 && EltWidth == 16)
1717  IID = Intrinsic::x86_avx512_permvar_hi_512;
1718  else if (VecWidth == 128 && EltWidth == 8)
1719  IID = Intrinsic::x86_avx512_permvar_qi_128;
1720  else if (VecWidth == 256 && EltWidth == 8)
1721  IID = Intrinsic::x86_avx512_permvar_qi_256;
1722  else if (VecWidth == 512 && EltWidth == 8)
1723  IID = Intrinsic::x86_avx512_permvar_qi_512;
1724  else
1725  llvm_unreachable("Unexpected intrinsic");
1726  } else if (Name.startswith("dbpsadbw.")) {
1727  if (VecWidth == 128)
1728  IID = Intrinsic::x86_avx512_dbpsadbw_128;
1729  else if (VecWidth == 256)
1730  IID = Intrinsic::x86_avx512_dbpsadbw_256;
1731  else if (VecWidth == 512)
1732  IID = Intrinsic::x86_avx512_dbpsadbw_512;
1733  else
1734  llvm_unreachable("Unexpected intrinsic");
1735  } else if (Name.startswith("pmultishift.qb.")) {
1736  if (VecWidth == 128)
1737  IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1738  else if (VecWidth == 256)
1739  IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1740  else if (VecWidth == 512)
1741  IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1742  else
1743  llvm_unreachable("Unexpected intrinsic");
1744  } else if (Name.startswith("conflict.")) {
1745  if (Name[9] == 'd' && VecWidth == 128)
1746  IID = Intrinsic::x86_avx512_conflict_d_128;
1747  else if (Name[9] == 'd' && VecWidth == 256)
1748  IID = Intrinsic::x86_avx512_conflict_d_256;
1749  else if (Name[9] == 'd' && VecWidth == 512)
1750  IID = Intrinsic::x86_avx512_conflict_d_512;
1751  else if (Name[9] == 'q' && VecWidth == 128)
1752  IID = Intrinsic::x86_avx512_conflict_q_128;
1753  else if (Name[9] == 'q' && VecWidth == 256)
1754  IID = Intrinsic::x86_avx512_conflict_q_256;
1755  else if (Name[9] == 'q' && VecWidth == 512)
1756  IID = Intrinsic::x86_avx512_conflict_q_512;
1757  else
1758  llvm_unreachable("Unexpected intrinsic");
1759  } else if (Name.startswith("pavg.")) {
1760  if (Name[5] == 'b' && VecWidth == 128)
1761  IID = Intrinsic::x86_sse2_pavg_b;
1762  else if (Name[5] == 'b' && VecWidth == 256)
1763  IID = Intrinsic::x86_avx2_pavg_b;
1764  else if (Name[5] == 'b' && VecWidth == 512)
1765  IID = Intrinsic::x86_avx512_pavg_b_512;
1766  else if (Name[5] == 'w' && VecWidth == 128)
1767  IID = Intrinsic::x86_sse2_pavg_w;
1768  else if (Name[5] == 'w' && VecWidth == 256)
1769  IID = Intrinsic::x86_avx2_pavg_w;
1770  else if (Name[5] == 'w' && VecWidth == 512)
1771  IID = Intrinsic::x86_avx512_pavg_w_512;
1772  else
1773  llvm_unreachable("Unexpected intrinsic");
1774  } else
1775  return false;
1776 
1778  CI.arg_operands().end());
1779  Args.pop_back();
1780  Args.pop_back();
1781  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1782  Args);
1783  unsigned NumArgs = CI.getNumArgOperands();
1784  Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1785  CI.getArgOperand(NumArgs - 2));
1786  return true;
1787 }
1788 
1789 /// Upgrade comment in call to inline asm that represents an objc retain release
1790 /// marker.
1791 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1792  size_t Pos;
1793  if (AsmStr->find("mov\tfp") == 0 &&
1794  AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1795  (Pos = AsmStr->find("# marker")) != std::string::npos) {
1796  AsmStr->replace(Pos, 1, ";");
1797  }
1798 }
1799 
1800 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1801 /// provided to seamlessly integrate with existing context.
1803  Function *F = CI->getCalledFunction();
1804  LLVMContext &C = CI->getContext();
1806  Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1807 
1808  assert(F && "Intrinsic call is not direct?");
1809 
1810  if (!NewFn) {
1811  // Get the Function's name.
1812  StringRef Name = F->getName();
1813 
1814  assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1815  Name = Name.substr(5);
1816 
1817  bool IsX86 = Name.startswith("x86.");
1818  if (IsX86)
1819  Name = Name.substr(4);
1820  bool IsNVVM = Name.startswith("nvvm.");
1821  if (IsNVVM)
1822  Name = Name.substr(5);
1823 
1824  if (IsX86 && Name.startswith("sse4a.movnt.")) {
1825  Module *M = F->getParent();
1827  Elts.push_back(
1829  MDNode *Node = MDNode::get(C, Elts);
1830 
1831  Value *Arg0 = CI->getArgOperand(0);
1832  Value *Arg1 = CI->getArgOperand(1);
1833 
1834  // Nontemporal (unaligned) store of the 0'th element of the float/double
1835  // vector.
1836  Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1837  PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1838  Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1839  Value *Extract =
1840  Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1841 
1842  StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
1843  SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1844 
1845  // Remove intrinsic.
1846  CI->eraseFromParent();
1847  return;
1848  }
1849 
1850  if (IsX86 && (Name.startswith("avx.movnt.") ||
1851  Name.startswith("avx512.storent."))) {
1852  Module *M = F->getParent();
1854  Elts.push_back(
1856  MDNode *Node = MDNode::get(C, Elts);
1857 
1858  Value *Arg0 = CI->getArgOperand(0);
1859  Value *Arg1 = CI->getArgOperand(1);
1860 
1861  // Convert the type of the pointer to a pointer to the stored type.
1862  Value *BC = Builder.CreateBitCast(Arg0,
1864  "cast");
1865  StoreInst *SI = Builder.CreateAlignedStore(
1866  Arg1, BC,
1867  Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
1868  SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1869 
1870  // Remove intrinsic.
1871  CI->eraseFromParent();
1872  return;
1873  }
1874 
1875  if (IsX86 && Name == "sse2.storel.dq") {
1876  Value *Arg0 = CI->getArgOperand(0);
1877  Value *Arg1 = CI->getArgOperand(1);
1878 
1879  auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
1880  Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1881  Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1882  Value *BC = Builder.CreateBitCast(Arg0,
1884  "cast");
1885  Builder.CreateAlignedStore(Elt, BC, Align(1));
1886 
1887  // Remove intrinsic.
1888  CI->eraseFromParent();
1889  return;
1890  }
1891 
1892  if (IsX86 && (Name.startswith("sse.storeu.") ||
1893  Name.startswith("sse2.storeu.") ||
1894  Name.startswith("avx.storeu."))) {
1895  Value *Arg0 = CI->getArgOperand(0);
1896  Value *Arg1 = CI->getArgOperand(1);
1897 
1898  Arg0 = Builder.CreateBitCast(Arg0,
1900  "cast");
1901  Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
1902 
1903  // Remove intrinsic.
1904  CI->eraseFromParent();
1905  return;
1906  }
1907 
1908  if (IsX86 && Name == "avx512.mask.store.ss") {
1909  Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1911  Mask, false);
1912 
1913  // Remove intrinsic.
1914  CI->eraseFromParent();
1915  return;
1916  }
1917 
1918  if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1919  // "avx512.mask.storeu." or "avx512.mask.store."
1920  bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1922  CI->getArgOperand(2), Aligned);
1923 
1924  // Remove intrinsic.
1925  CI->eraseFromParent();
1926  return;
1927  }
1928 
1929  Value *Rep;
1930  // Upgrade packed integer vector compare intrinsics to compare instructions.
1931  if (IsX86 && (Name.startswith("sse2.pcmp") ||
1932  Name.startswith("avx2.pcmp"))) {
1933  // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1934  bool CmpEq = Name[9] == 'e';
1935  Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1936  CI->getArgOperand(0), CI->getArgOperand(1));
1937  Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1938  } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1939  Type *ExtTy = Type::getInt32Ty(C);
1940  if (CI->getOperand(0)->getType()->isIntegerTy(8))
1941  ExtTy = Type::getInt64Ty(C);
1942  unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1943  ExtTy->getPrimitiveSizeInBits();
1944  Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1945  Rep = Builder.CreateVectorSplat(NumElts, Rep);
1946  } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1947  Name == "sse2.sqrt.sd")) {
1948  Value *Vec = CI->getArgOperand(0);
1949  Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1950  Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1951  Intrinsic::sqrt, Elt0->getType());
1952  Elt0 = Builder.CreateCall(Intr, Elt0);
1953  Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1954  } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1955  Name.startswith("sse2.sqrt.p") ||
1956  Name.startswith("sse.sqrt.p"))) {
1957  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1958  Intrinsic::sqrt,
1959  CI->getType()),
1960  {CI->getArgOperand(0)});
1961  } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1962  if (CI->getNumArgOperands() == 4 &&
1963  (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1964  cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1965  Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1966  : Intrinsic::x86_avx512_sqrt_pd_512;
1967 
1968  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1969  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1970  IID), Args);
1971  } else {
1972  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1973  Intrinsic::sqrt,
1974  CI->getType()),
1975  {CI->getArgOperand(0)});
1976  }
1977  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1978  CI->getArgOperand(1));
1979  } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1980  Name.startswith("avx512.ptestnm"))) {
1981  Value *Op0 = CI->getArgOperand(0);
1982  Value *Op1 = CI->getArgOperand(1);
1983  Value *Mask = CI->getArgOperand(2);
1984  Rep = Builder.CreateAnd(Op0, Op1);
1985  llvm::Type *Ty = Op0->getType();
1986  Value *Zero = llvm::Constant::getNullValue(Ty);
1987  ICmpInst::Predicate Pred =
1988  Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1989  Rep = Builder.CreateICmp(Pred, Rep, Zero);
1990  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1991  } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1992  unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
1993  ->getNumElements();
1994  Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1995  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1996  CI->getArgOperand(1));
1997  } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1998  unsigned NumElts = CI->getType()->getScalarSizeInBits();
1999  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2000  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2001  int Indices[64];
2002  for (unsigned i = 0; i != NumElts; ++i)
2003  Indices[i] = i;
2004 
2005  // First extract half of each vector. This gives better codegen than
2006  // doing it in a single shuffle.
2007  LHS = Builder.CreateShuffleVector(LHS, LHS,
2008  makeArrayRef(Indices, NumElts / 2));
2009  RHS = Builder.CreateShuffleVector(RHS, RHS,
2010  makeArrayRef(Indices, NumElts / 2));
2011  // Concat the vectors.
2012  // NOTE: Operands have to be swapped to match intrinsic definition.
2013  Rep = Builder.CreateShuffleVector(RHS, LHS,
2014  makeArrayRef(Indices, NumElts));
2015  Rep = Builder.CreateBitCast(Rep, CI->getType());
2016  } else if (IsX86 && Name == "avx512.kand.w") {
2017  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2018  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2019  Rep = Builder.CreateAnd(LHS, RHS);
2020  Rep = Builder.CreateBitCast(Rep, CI->getType());
2021  } else if (IsX86 && Name == "avx512.kandn.w") {
2022  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2023  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2024  LHS = Builder.CreateNot(LHS);
2025  Rep = Builder.CreateAnd(LHS, RHS);
2026  Rep = Builder.CreateBitCast(Rep, CI->getType());
2027  } else if (IsX86 && Name == "avx512.kor.w") {
2028  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2029  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2030  Rep = Builder.CreateOr(LHS, RHS);
2031  Rep = Builder.CreateBitCast(Rep, CI->getType());
2032  } else if (IsX86 && Name == "avx512.kxor.w") {
2033  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2034  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2035  Rep = Builder.CreateXor(LHS, RHS);
2036  Rep = Builder.CreateBitCast(Rep, CI->getType());
2037  } else if (IsX86 && Name == "avx512.kxnor.w") {
2038  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2039  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2040  LHS = Builder.CreateNot(LHS);
2041  Rep = Builder.CreateXor(LHS, RHS);
2042  Rep = Builder.CreateBitCast(Rep, CI->getType());
2043  } else if (IsX86 && Name == "avx512.knot.w") {
2044  Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2045  Rep = Builder.CreateNot(Rep);
2046  Rep = Builder.CreateBitCast(Rep, CI->getType());
2047  } else if (IsX86 &&
2048  (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2049  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2050  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2051  Rep = Builder.CreateOr(LHS, RHS);
2052  Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2053  Value *C;
2054  if (Name[14] == 'c')
2055  C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2056  else
2057  C = ConstantInt::getNullValue(Builder.getInt16Ty());
2058  Rep = Builder.CreateICmpEQ(Rep, C);
2059  Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2060  } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2061  Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2062  Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2063  Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2064  Type *I32Ty = Type::getInt32Ty(C);
2065  Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2066  ConstantInt::get(I32Ty, 0));
2067  Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2068  ConstantInt::get(I32Ty, 0));
2069  Value *EltOp;
2070  if (Name.contains(".add."))
2071  EltOp = Builder.CreateFAdd(Elt0, Elt1);
2072  else if (Name.contains(".sub."))
2073  EltOp = Builder.CreateFSub(Elt0, Elt1);
2074  else if (Name.contains(".mul."))
2075  EltOp = Builder.CreateFMul(Elt0, Elt1);
2076  else
2077  EltOp = Builder.CreateFDiv(Elt0, Elt1);
2078  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2079  ConstantInt::get(I32Ty, 0));
2080  } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
2081  // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2082  bool CmpEq = Name[16] == 'e';
2083  Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2084  } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
2085  Type *OpTy = CI->getArgOperand(0)->getType();
2086  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2087  Intrinsic::ID IID;
2088  switch (VecWidth) {
2089  default: llvm_unreachable("Unexpected intrinsic");
2090  case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2091  case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2092  case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2093  }
2094 
2095  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2096  { CI->getOperand(0), CI->getArgOperand(1) });
2097  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2098  } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
2099  Type *OpTy = CI->getArgOperand(0)->getType();
2100  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2101  unsigned EltWidth = OpTy->getScalarSizeInBits();
2102  Intrinsic::ID IID;
2103  if (VecWidth == 128 && EltWidth == 32)
2104  IID = Intrinsic::x86_avx512_fpclass_ps_128;
2105  else if (VecWidth == 256 && EltWidth == 32)
2106  IID = Intrinsic::x86_avx512_fpclass_ps_256;
2107  else if (VecWidth == 512 && EltWidth == 32)
2108  IID = Intrinsic::x86_avx512_fpclass_ps_512;
2109  else if (VecWidth == 128 && EltWidth == 64)
2110  IID = Intrinsic::x86_avx512_fpclass_pd_128;
2111  else if (VecWidth == 256 && EltWidth == 64)
2112  IID = Intrinsic::x86_avx512_fpclass_pd_256;
2113  else if (VecWidth == 512 && EltWidth == 64)
2114  IID = Intrinsic::x86_avx512_fpclass_pd_512;
2115  else
2116  llvm_unreachable("Unexpected intrinsic");
2117 
2118  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2119  { CI->getOperand(0), CI->getArgOperand(1) });
2120  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2121  } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
2123  CI->arg_operands().end());
2124  Type *OpTy = Args[0]->getType();
2125  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2126  unsigned EltWidth = OpTy->getScalarSizeInBits();
2127  Intrinsic::ID IID;
2128  if (VecWidth == 128 && EltWidth == 32)
2129  IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2130  else if (VecWidth == 256 && EltWidth == 32)
2131  IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2132  else if (VecWidth == 512 && EltWidth == 32)
2133  IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2134  else if (VecWidth == 128 && EltWidth == 64)
2135  IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2136  else if (VecWidth == 256 && EltWidth == 64)
2137  IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2138  else if (VecWidth == 512 && EltWidth == 64)
2139  IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2140  else
2141  llvm_unreachable("Unexpected intrinsic");
2142 
2144  if (VecWidth == 512)
2145  std::swap(Mask, Args.back());
2146  Args.push_back(Mask);
2147 
2148  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2149  Args);
2150  } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
2151  // Integer compare intrinsics.
2152  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2153  Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2154  } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2155  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2156  Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2157  } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2158  Name.startswith("avx512.cvtw2mask.") ||
2159  Name.startswith("avx512.cvtd2mask.") ||
2160  Name.startswith("avx512.cvtq2mask."))) {
2161  Value *Op = CI->getArgOperand(0);
2162  Value *Zero = llvm::Constant::getNullValue(Op->getType());
2163  Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2164  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2165  } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2166  Name == "ssse3.pabs.w.128" ||
2167  Name == "ssse3.pabs.d.128" ||
2168  Name.startswith("avx2.pabs") ||
2169  Name.startswith("avx512.mask.pabs"))) {
2170  Rep = upgradeAbs(Builder, *CI);
2171  } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2172  Name == "sse2.pmaxs.w" ||
2173  Name == "sse41.pmaxsd" ||
2174  Name.startswith("avx2.pmaxs") ||
2175  Name.startswith("avx512.mask.pmaxs"))) {
2177  } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2178  Name == "sse41.pmaxuw" ||
2179  Name == "sse41.pmaxud" ||
2180  Name.startswith("avx2.pmaxu") ||
2181  Name.startswith("avx512.mask.pmaxu"))) {
2183  } else if (IsX86 && (Name == "sse41.pminsb" ||
2184  Name == "sse2.pmins.w" ||
2185  Name == "sse41.pminsd" ||
2186  Name.startswith("avx2.pmins") ||
2187  Name.startswith("avx512.mask.pmins"))) {
2189  } else if (IsX86 && (Name == "sse2.pminu.b" ||
2190  Name == "sse41.pminuw" ||
2191  Name == "sse41.pminud" ||
2192  Name.startswith("avx2.pminu") ||
2193  Name.startswith("avx512.mask.pminu"))) {
2195  } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2196  Name == "avx2.pmulu.dq" ||
2197  Name == "avx512.pmulu.dq.512" ||
2198  Name.startswith("avx512.mask.pmulu.dq."))) {
2199  Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2200  } else if (IsX86 && (Name == "sse41.pmuldq" ||
2201  Name == "avx2.pmul.dq" ||
2202  Name == "avx512.pmul.dq.512" ||
2203  Name.startswith("avx512.mask.pmul.dq."))) {
2204  Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2205  } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2206  Name == "sse2.cvtsi2sd" ||
2207  Name == "sse.cvtsi642ss" ||
2208  Name == "sse2.cvtsi642sd")) {
2209  Rep = Builder.CreateSIToFP(
2210  CI->getArgOperand(1),
2211  cast<VectorType>(CI->getType())->getElementType());
2212  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2213  } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2214  Rep = Builder.CreateUIToFP(
2215  CI->getArgOperand(1),
2216  cast<VectorType>(CI->getType())->getElementType());
2217  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2218  } else if (IsX86 && Name == "sse2.cvtss2sd") {
2219  Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2220  Rep = Builder.CreateFPExt(
2221  Rep, cast<VectorType>(CI->getType())->getElementType());
2222  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2223  } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2224  Name == "sse2.cvtdq2ps" ||
2225  Name == "avx.cvtdq2.pd.256" ||
2226  Name == "avx.cvtdq2.ps.256" ||
2227  Name.startswith("avx512.mask.cvtdq2pd.") ||
2228  Name.startswith("avx512.mask.cvtudq2pd.") ||
2229  Name.startswith("avx512.mask.cvtdq2ps.") ||
2230  Name.startswith("avx512.mask.cvtudq2ps.") ||
2231  Name.startswith("avx512.mask.cvtqq2pd.") ||
2232  Name.startswith("avx512.mask.cvtuqq2pd.") ||
2233  Name == "avx512.mask.cvtqq2ps.256" ||
2234  Name == "avx512.mask.cvtqq2ps.512" ||
2235  Name == "avx512.mask.cvtuqq2ps.256" ||
2236  Name == "avx512.mask.cvtuqq2ps.512" ||
2237  Name == "sse2.cvtps2pd" ||
2238  Name == "avx.cvt.ps2.pd.256" ||
2239  Name == "avx512.mask.cvtps2pd.128" ||
2240  Name == "avx512.mask.cvtps2pd.256")) {
2241  auto *DstTy = cast<FixedVectorType>(CI->getType());
2242  Rep = CI->getArgOperand(0);
2243  auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2244 
2245  unsigned NumDstElts = DstTy->getNumElements();
2246  if (NumDstElts < SrcTy->getNumElements()) {
2247  assert(NumDstElts == 2 && "Unexpected vector size");
2248  Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2249  }
2250 
2251  bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2252  bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2253  if (IsPS2PD)
2254  Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2255  else if (CI->getNumArgOperands() == 4 &&
2256  (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2257  cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2258  Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2259  : Intrinsic::x86_avx512_sitofp_round;
2261  { DstTy, SrcTy });
2262  Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2263  } else {
2264  Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2265  : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2266  }
2267 
2268  if (CI->getNumArgOperands() >= 3)
2269  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2270  CI->getArgOperand(1));
2271  } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
2272  Name.startswith("vcvtph2ps."))) {
2273  auto *DstTy = cast<FixedVectorType>(CI->getType());
2274  Rep = CI->getArgOperand(0);
2275  auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2276  unsigned NumDstElts = DstTy->getNumElements();
2277  if (NumDstElts != SrcTy->getNumElements()) {
2278  assert(NumDstElts == 4 && "Unexpected vector size");
2279  Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2280  }
2281  Rep = Builder.CreateBitCast(
2282  Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2283  Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2284  if (CI->getNumArgOperands() >= 3)
2285  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2286  CI->getArgOperand(1));
2287  } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
2288  Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2289  CI->getArgOperand(1), CI->getArgOperand(2),
2290  /*Aligned*/false);
2291  } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
2292  Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2293  CI->getArgOperand(1),CI->getArgOperand(2),
2294  /*Aligned*/true);
2295  } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2296  auto *ResultTy = cast<FixedVectorType>(CI->getType());
2297  Type *PtrTy = ResultTy->getElementType();
2298 
2299  // Cast the pointer to element type.
2300  Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2302 
2303  Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2304  ResultTy->getNumElements());
2305 
2306  Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2307  Intrinsic::masked_expandload,
2308  ResultTy);
2309  Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2310  } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2311  auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2312  Type *PtrTy = ResultTy->getElementType();
2313 
2314  // Cast the pointer to element type.
2315  Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2317 
2318  Value *MaskVec =
2320  cast<FixedVectorType>(ResultTy)->getNumElements());
2321 
2322  Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2323  Intrinsic::masked_compressstore,
2324  ResultTy);
2325  Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2326  } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2327  Name.startswith("avx512.mask.expand."))) {
2328  auto *ResultTy = cast<FixedVectorType>(CI->getType());
2329 
2330  Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2331  ResultTy->getNumElements());
2332 
2333  bool IsCompress = Name[12] == 'c';
2334  Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2335  : Intrinsic::x86_avx512_mask_expand;
2336  Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2337  Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2338  MaskVec });
2339  } else if (IsX86 && Name.startswith("xop.vpcom")) {
2340  bool IsSigned;
2341  if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2342  Name.endswith("uq"))
2343  IsSigned = false;
2344  else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2345  Name.endswith("q"))
2346  IsSigned = true;
2347  else
2348  llvm_unreachable("Unknown suffix");
2349 
2350  unsigned Imm;
2351  if (CI->getNumArgOperands() == 3) {
2352  Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2353  } else {
2354  Name = Name.substr(9); // strip off "xop.vpcom"
2355  if (Name.startswith("lt"))
2356  Imm = 0;
2357  else if (Name.startswith("le"))
2358  Imm = 1;
2359  else if (Name.startswith("gt"))
2360  Imm = 2;
2361  else if (Name.startswith("ge"))
2362  Imm = 3;
2363  else if (Name.startswith("eq"))
2364  Imm = 4;
2365  else if (Name.startswith("ne"))
2366  Imm = 5;
2367  else if (Name.startswith("false"))
2368  Imm = 6;
2369  else if (Name.startswith("true"))
2370  Imm = 7;
2371  else
2372  llvm_unreachable("Unknown condition");
2373  }
2374 
2375  Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2376  } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2377  Value *Sel = CI->getArgOperand(2);
2378  Value *NotSel = Builder.CreateNot(Sel);
2379  Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2380  Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2381  Rep = Builder.CreateOr(Sel0, Sel1);
2382  } else if (IsX86 && (Name.startswith("xop.vprot") ||
2383  Name.startswith("avx512.prol") ||
2384  Name.startswith("avx512.mask.prol"))) {
2385  Rep = upgradeX86Rotate(Builder, *CI, false);
2386  } else if (IsX86 && (Name.startswith("avx512.pror") ||
2387  Name.startswith("avx512.mask.pror"))) {
2388  Rep = upgradeX86Rotate(Builder, *CI, true);
2389  } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2390  Name.startswith("avx512.mask.vpshld") ||
2391  Name.startswith("avx512.maskz.vpshld"))) {
2392  bool ZeroMask = Name[11] == 'z';
2393  Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2394  } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2395  Name.startswith("avx512.mask.vpshrd") ||
2396  Name.startswith("avx512.maskz.vpshrd"))) {
2397  bool ZeroMask = Name[11] == 'z';
2398  Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2399  } else if (IsX86 && Name == "sse42.crc32.64.8") {
2400  Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2401  Intrinsic::x86_sse42_crc32_32_8);
2402  Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2403  Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2404  Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2405  } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2406  Name.startswith("avx512.vbroadcast.s"))) {
2407  // Replace broadcasts with a series of insertelements.
2408  auto *VecTy = cast<FixedVectorType>(CI->getType());
2409  Type *EltTy = VecTy->getElementType();
2410  unsigned EltNum = VecTy->getNumElements();
2411  Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2412  EltTy->getPointerTo());
2413  Value *Load = Builder.CreateLoad(EltTy, Cast);
2414  Type *I32Ty = Type::getInt32Ty(C);
2415  Rep = UndefValue::get(VecTy);
2416  for (unsigned I = 0; I < EltNum; ++I)
2417  Rep = Builder.CreateInsertElement(Rep, Load,
2418  ConstantInt::get(I32Ty, I));
2419  } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2420  Name.startswith("sse41.pmovzx") ||
2421  Name.startswith("avx2.pmovsx") ||
2422  Name.startswith("avx2.pmovzx") ||
2423  Name.startswith("avx512.mask.pmovsx") ||
2424  Name.startswith("avx512.mask.pmovzx"))) {
2425  auto *DstTy = cast<FixedVectorType>(CI->getType());
2426  unsigned NumDstElts = DstTy->getNumElements();
2427 
2428  // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2429  SmallVector<int, 8> ShuffleMask(NumDstElts);
2430  for (unsigned i = 0; i != NumDstElts; ++i)
2431  ShuffleMask[i] = i;
2432 
2433  Value *SV =
2434  Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2435 
2436  bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2437  Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2438  : Builder.CreateZExt(SV, DstTy);
2439  // If there are 3 arguments, it's a masked intrinsic so we need a select.
2440  if (CI->getNumArgOperands() == 3)
2441  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2442  CI->getArgOperand(1));
2443  } else if (Name == "avx512.mask.pmov.qd.256" ||
2444  Name == "avx512.mask.pmov.qd.512" ||
2445  Name == "avx512.mask.pmov.wb.256" ||
2446  Name == "avx512.mask.pmov.wb.512") {
2447  Type *Ty = CI->getArgOperand(1)->getType();
2448  Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2449  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2450  CI->getArgOperand(1));
2451  } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2452  Name == "avx2.vbroadcasti128")) {
2453  // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2454  Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2455  unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2456  auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2457  Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2459  Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2460  if (NumSrcElts == 2)
2461  Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2462  else
2463  Rep = Builder.CreateShuffleVector(
2464  Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2465  } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2466  Name.startswith("avx512.mask.shuf.f"))) {
2467  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2468  Type *VT = CI->getType();
2469  unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2470  unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2471  unsigned ControlBitsMask = NumLanes - 1;
2472  unsigned NumControlBits = NumLanes / 2;
2473  SmallVector<int, 8> ShuffleMask(0);
2474 
2475  for (unsigned l = 0; l != NumLanes; ++l) {
2476  unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2477  // We actually need the other source.
2478  if (l >= NumLanes / 2)
2479  LaneMask += NumLanes;
2480  for (unsigned i = 0; i != NumElementsInLane; ++i)
2481  ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2482  }
2483  Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2484  CI->getArgOperand(1), ShuffleMask);
2485  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2486  CI->getArgOperand(3));
2487  }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2488  Name.startswith("avx512.mask.broadcasti"))) {
2489  unsigned NumSrcElts =
2490  cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2491  ->getNumElements();
2492  unsigned NumDstElts =
2493  cast<FixedVectorType>(CI->getType())->getNumElements();
2494 
2495  SmallVector<int, 8> ShuffleMask(NumDstElts);
2496  for (unsigned i = 0; i != NumDstElts; ++i)
2497  ShuffleMask[i] = i % NumSrcElts;
2498 
2499  Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2500  CI->getArgOperand(0),
2501  ShuffleMask);
2502  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2503  CI->getArgOperand(1));
2504  } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2505  Name.startswith("avx2.vbroadcast") ||
2506  Name.startswith("avx512.pbroadcast") ||
2507  Name.startswith("avx512.mask.broadcast.s"))) {
2508  // Replace vp?broadcasts with a vector shuffle.
2509  Value *Op = CI->getArgOperand(0);
2510  ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2511  Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2514  Rep = Builder.CreateShuffleVector(Op, M);
2515 
2516  if (CI->getNumArgOperands() == 3)
2517  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2518  CI->getArgOperand(1));
2519  } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2520  Name.startswith("avx2.padds.") ||
2521  Name.startswith("avx512.padds.") ||
2522  Name.startswith("avx512.mask.padds."))) {
2523  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2524  } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
2525  Name.startswith("avx2.psubs.") ||
2526  Name.startswith("avx512.psubs.") ||
2527  Name.startswith("avx512.mask.psubs."))) {
2528  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2529  } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2530  Name.startswith("avx2.paddus.") ||
2531  Name.startswith("avx512.mask.paddus."))) {
2532  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
2533  } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
2534  Name.startswith("avx2.psubus.") ||
2535  Name.startswith("avx512.mask.psubus."))) {
2536  Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
2537  } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2539  CI->getArgOperand(1),
2540  CI->getArgOperand(2),
2541  CI->getArgOperand(3),
2542  CI->getArgOperand(4),
2543  false);
2544  } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2546  CI->getArgOperand(1),
2547  CI->getArgOperand(2),
2548  CI->getArgOperand(3),
2549  CI->getArgOperand(4),
2550  true);
2551  } else if (IsX86 && (Name == "sse2.psll.dq" ||
2552  Name == "avx2.psll.dq")) {
2553  // 128/256-bit shift left specified in bits.
2554  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2556  Shift / 8); // Shift is in bits.
2557  } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2558  Name == "avx2.psrl.dq")) {
2559  // 128/256-bit shift right specified in bits.
2560  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2562  Shift / 8); // Shift is in bits.
2563  } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2564  Name == "avx2.psll.dq.bs" ||
2565  Name == "avx512.psll.dq.512")) {
2566  // 128/256/512-bit shift left specified in bytes.
2567  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2569  } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2570  Name == "avx2.psrl.dq.bs" ||
2571  Name == "avx512.psrl.dq.512")) {
2572  // 128/256/512-bit shift right specified in bytes.
2573  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2575  } else if (IsX86 && (Name == "sse41.pblendw" ||
2576  Name.startswith("sse41.blendp") ||
2577  Name.startswith("avx.blend.p") ||
2578  Name == "avx2.pblendw" ||
2579  Name.startswith("avx2.pblendd."))) {
2580  Value *Op0 = CI->getArgOperand(0);
2581  Value *Op1 = CI->getArgOperand(1);
2582  unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2583  auto *VecTy = cast<FixedVectorType>(CI->getType());
2584  unsigned NumElts = VecTy->getNumElements();
2585 
2586  SmallVector<int, 16> Idxs(NumElts);
2587  for (unsigned i = 0; i != NumElts; ++i)
2588  Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2589 
2590  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2591  } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2592  Name == "avx2.vinserti128" ||
2593  Name.startswith("avx512.mask.insert"))) {
2594  Value *Op0 = CI->getArgOperand(0);
2595  Value *Op1 = CI->getArgOperand(1);
2596  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2597  unsigned DstNumElts =
2598  cast<FixedVectorType>(CI->getType())->getNumElements();
2599  unsigned SrcNumElts =
2600  cast<FixedVectorType>(Op1->getType())->getNumElements();
2601  unsigned Scale = DstNumElts / SrcNumElts;
2602 
2603  // Mask off the high bits of the immediate value; hardware ignores those.
2604  Imm = Imm % Scale;
2605 
2606  // Extend the second operand into a vector the size of the destination.
2607  SmallVector<int, 8> Idxs(DstNumElts);
2608  for (unsigned i = 0; i != SrcNumElts; ++i)
2609  Idxs[i] = i;
2610  for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2611  Idxs[i] = SrcNumElts;
2612  Rep = Builder.CreateShuffleVector(Op1, Idxs);
2613 
2614  // Insert the second operand into the first operand.
2615 
2616  // Note that there is no guarantee that instruction lowering will actually
2617  // produce a vinsertf128 instruction for the created shuffles. In
2618  // particular, the 0 immediate case involves no lane changes, so it can
2619  // be handled as a blend.
2620 
2621  // Example of shuffle mask for 32-bit elements:
2622  // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2623  // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2624 
2625  // First fill with identify mask.
2626  for (unsigned i = 0; i != DstNumElts; ++i)
2627  Idxs[i] = i;
2628  // Then replace the elements where we need to insert.
2629  for (unsigned i = 0; i != SrcNumElts; ++i)
2630  Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2631  Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2632 
2633  // If the intrinsic has a mask operand, handle that.
2634  if (CI->getNumArgOperands() == 5)
2635  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2636  CI->getArgOperand(3));
2637  } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2638  Name == "avx2.vextracti128" ||
2639  Name.startswith("avx512.mask.vextract"))) {
2640  Value *Op0 = CI->getArgOperand(0);
2641  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2642  unsigned DstNumElts =
2643  cast<FixedVectorType>(CI->getType())->getNumElements();
2644  unsigned SrcNumElts =
2645  cast<FixedVectorType>(Op0->getType())->getNumElements();
2646  unsigned Scale = SrcNumElts / DstNumElts;
2647 
2648  // Mask off the high bits of the immediate value; hardware ignores those.
2649  Imm = Imm % Scale;
2650 
2651  // Get indexes for the subvector of the input vector.
2652  SmallVector<int, 8> Idxs(DstNumElts);
2653  for (unsigned i = 0; i != DstNumElts; ++i) {
2654  Idxs[i] = i + (Imm * DstNumElts);
2655  }
2656  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2657 
2658  // If the intrinsic has a mask operand, handle that.
2659  if (CI->getNumArgOperands() == 4)
2660  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2661  CI->getArgOperand(2));
2662  } else if (!IsX86 && Name == "stackprotectorcheck") {
2663  Rep = nullptr;
2664  } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2665  Name.startswith("avx512.mask.perm.di."))) {
2666  Value *Op0 = CI->getArgOperand(0);
2667  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2668  auto *VecTy = cast<FixedVectorType>(CI->getType());
2669  unsigned NumElts = VecTy->getNumElements();
2670 
2671  SmallVector<int, 8> Idxs(NumElts);
2672  for (unsigned i = 0; i != NumElts; ++i)
2673  Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2674 
2675  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2676 
2677  if (CI->getNumArgOperands() == 4)
2678  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2679  CI->getArgOperand(2));
2680  } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2681  Name == "avx2.vperm2i128")) {
2682  // The immediate permute control byte looks like this:
2683  // [1:0] - select 128 bits from sources for low half of destination
2684  // [2] - ignore
2685  // [3] - zero low half of destination
2686  // [5:4] - select 128 bits from sources for high half of destination
2687  // [6] - ignore
2688  // [7] - zero high half of destination
2689 
2690  uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2691 
2692  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2693  unsigned HalfSize = NumElts / 2;
2694  SmallVector<int, 8> ShuffleMask(NumElts);
2695 
2696  // Determine which operand(s) are actually in use for this instruction.
2697  Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2698  Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2699 
2700  // If needed, replace operands based on zero mask.
2701  V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2702  V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2703 
2704  // Permute low half of result.
2705  unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2706  for (unsigned i = 0; i < HalfSize; ++i)
2707  ShuffleMask[i] = StartIndex + i;
2708 
2709  // Permute high half of result.
2710  StartIndex = (Imm & 0x10) ? HalfSize : 0;
2711  for (unsigned i = 0; i < HalfSize; ++i)
2712  ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2713 
2714  Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2715 
2716  } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2717  Name == "sse2.pshuf.d" ||
2718  Name.startswith("avx512.mask.vpermil.p") ||
2719  Name.startswith("avx512.mask.pshuf.d."))) {
2720  Value *Op0 = CI->getArgOperand(0);
2721  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2722  auto *VecTy = cast<FixedVectorType>(CI->getType());
2723  unsigned NumElts = VecTy->getNumElements();
2724  // Calculate the size of each index in the immediate.
2725  unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2726  unsigned IdxMask = ((1 << IdxSize) - 1);
2727 
2728  SmallVector<int, 8> Idxs(NumElts);
2729  // Lookup the bits for this element, wrapping around the immediate every
2730  // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2731  // to offset by the first index of each group.
2732  for (unsigned i = 0; i != NumElts; ++i)
2733  Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2734 
2735  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2736 
2737  if (CI->getNumArgOperands() == 4)
2738  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2739  CI->getArgOperand(2));
2740  } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2741  Name.startswith("avx512.mask.pshufl.w."))) {
2742  Value *Op0 = CI->getArgOperand(0);
2743  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2744  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2745 
2746  SmallVector<int, 16> Idxs(NumElts);
2747  for (unsigned l = 0; l != NumElts; l += 8) {
2748  for (unsigned i = 0; i != 4; ++i)
2749  Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2750  for (unsigned i = 4; i != 8; ++i)
2751  Idxs[i + l] = i + l;
2752  }
2753 
2754  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2755 
2756  if (CI->getNumArgOperands() == 4)
2757  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2758  CI->getArgOperand(2));
2759  } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2760  Name.startswith("avx512.mask.pshufh.w."))) {
2761  Value *Op0 = CI->getArgOperand(0);
2762  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2763  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2764 
2765  SmallVector<int, 16> Idxs(NumElts);
2766  for (unsigned l = 0; l != NumElts; l += 8) {
2767  for (unsigned i = 0; i != 4; ++i)
2768  Idxs[i + l] = i + l;
2769  for (unsigned i = 0; i != 4; ++i)
2770  Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2771  }
2772 
2773  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2774 
2775  if (CI->getNumArgOperands() == 4)
2776  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2777  CI->getArgOperand(2));
2778  } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2779  Value *Op0 = CI->getArgOperand(0);
2780  Value *Op1 = CI->getArgOperand(1);
2781  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2782  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2783 
2784  unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2785  unsigned HalfLaneElts = NumLaneElts / 2;
2786 
2787  SmallVector<int, 16> Idxs(NumElts);
2788  for (unsigned i = 0; i != NumElts; ++i) {
2789  // Base index is the starting element of the lane.
2790  Idxs[i] = i - (i % NumLaneElts);
2791  // If we are half way through the lane switch to the other source.
2792  if ((i % NumLaneElts) >= HalfLaneElts)
2793  Idxs[i] += NumElts;
2794  // Now select the specific element. By adding HalfLaneElts bits from
2795  // the immediate. Wrapping around the immediate every 8-bits.
2796  Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2797  }
2798 
2799  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2800 
2801  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2802  CI->getArgOperand(3));
2803  } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2804  Name.startswith("avx512.mask.movshdup") ||
2805  Name.startswith("avx512.mask.movsldup"))) {
2806  Value *Op0 = CI->getArgOperand(0);
2807  unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2808  unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2809 
2810  unsigned Offset = 0;
2811  if (Name.startswith("avx512.mask.movshdup."))
2812  Offset = 1;
2813 
2814  SmallVector<int, 16> Idxs(NumElts);
2815  for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2816  for (unsigned i = 0; i != NumLaneElts; i += 2) {
2817  Idxs[i + l + 0] = i + l + Offset;
2818  Idxs[i + l + 1] = i + l + Offset;
2819  }
2820 
2821  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2822 
2823  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2824  CI->getArgOperand(1));
2825  } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2826  Name.startswith("avx512.mask.unpckl."))) {
2827  Value *Op0 = CI->getArgOperand(0);
2828  Value *Op1 = CI->getArgOperand(1);
2829  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2830  int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2831 
2832  SmallVector<int, 64> Idxs(NumElts);
2833  for (int l = 0; l != NumElts; l += NumLaneElts)
2834  for (int i = 0; i != NumLaneElts; ++i)
2835  Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2836 
2837  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2838 
2839  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2840  CI->getArgOperand(2));
2841  } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2842  Name.startswith("avx512.mask.unpckh."))) {
2843  Value *Op0 = CI->getArgOperand(0);
2844  Value *Op1 = CI->getArgOperand(1);
2845  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2846  int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2847 
2848  SmallVector<int, 64> Idxs(NumElts);
2849  for (int l = 0; l != NumElts; l += NumLaneElts)
2850  for (int i = 0; i != NumLaneElts; ++i)
2851  Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2852 
2853  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2854 
2855  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2856  CI->getArgOperand(2));
2857  } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2858  Name.startswith("avx512.mask.pand."))) {
2859  VectorType *FTy = cast<VectorType>(CI->getType());
2860  VectorType *ITy = VectorType::getInteger(FTy);
2861  Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2862  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2863  Rep = Builder.CreateBitCast(Rep, FTy);
2864  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2865  CI->getArgOperand(2));
2866  } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2867  Name.startswith("avx512.mask.pandn."))) {
2868  VectorType *FTy = cast<VectorType>(CI->getType());
2869  VectorType *ITy = VectorType::getInteger(FTy);
2870  Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2871  Rep = Builder.CreateAnd(Rep,
2872  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2873  Rep = Builder.CreateBitCast(Rep, FTy);
2874  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2875  CI->getArgOperand(2));
2876  } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2877  Name.startswith("avx512.mask.por."))) {
2878  VectorType *FTy = cast<VectorType>(CI->getType());
2879  VectorType *ITy = VectorType::getInteger(FTy);
2880  Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2881  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2882  Rep = Builder.CreateBitCast(Rep, FTy);
2883  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2884  CI->getArgOperand(2));
2885  } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2886  Name.startswith("avx512.mask.pxor."))) {
2887  VectorType *FTy = cast<VectorType>(CI->getType());
2888  VectorType *ITy = VectorType::getInteger(FTy);
2889  Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2890  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2891  Rep = Builder.CreateBitCast(Rep, FTy);
2892  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2893  CI->getArgOperand(2));
2894  } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2895  Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2896  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2897  CI->getArgOperand(2));
2898  } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2899  Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2900  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2901  CI->getArgOperand(2));
2902  } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2903  Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2904  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2905  CI->getArgOperand(2));
2906  } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2907  if (Name.endswith(".512")) {
2908  Intrinsic::ID IID;
2909  if (Name[17] == 's')
2910  IID = Intrinsic::x86_avx512_add_ps_512;
2911  else
2912  IID = Intrinsic::x86_avx512_add_pd_512;
2913 
2914  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2915  { CI->getArgOperand(0), CI->getArgOperand(1),
2916  CI->getArgOperand(4) });
2917  } else {
2918  Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2919  }
2920  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2921  CI->getArgOperand(2));
2922  } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2923  if (Name.endswith(".512")) {
2924  Intrinsic::ID IID;
2925  if (Name[17] == 's')
2926  IID = Intrinsic::x86_avx512_div_ps_512;
2927  else
2928  IID = Intrinsic::x86_avx512_div_pd_512;
2929 
2930  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2931  { CI->getArgOperand(0), CI->getArgOperand(1),
2932  CI->getArgOperand(4) });
2933  } else {
2934  Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2935  }
2936  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2937  CI->getArgOperand(2));
2938  } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2939  if (Name.endswith(".512")) {
2940  Intrinsic::ID IID;
2941  if (Name[17] == 's')
2942  IID = Intrinsic::x86_avx512_mul_ps_512;
2943  else
2944  IID = Intrinsic::x86_avx512_mul_pd_512;
2945 
2946  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2947  { CI->getArgOperand(0), CI->getArgOperand(1),
2948  CI->getArgOperand(4) });
2949  } else {
2950  Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2951  }
2952  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2953  CI->getArgOperand(2));
2954  } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2955  if (Name.endswith(".512")) {
2956  Intrinsic::ID IID;
2957  if (Name[17] == 's')
2958  IID = Intrinsic::x86_avx512_sub_ps_512;
2959  else
2960  IID = Intrinsic::x86_avx512_sub_pd_512;
2961 
2962  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2963  { CI->getArgOperand(0), CI->getArgOperand(1),
2964  CI->getArgOperand(4) });
2965  } else {
2966  Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2967  }
2968  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2969  CI->getArgOperand(2));
2970  } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2971  Name.startswith("avx512.mask.min.p")) &&
2972  Name.drop_front(18) == ".512") {
2973  bool IsDouble = Name[17] == 'd';
2974  bool IsMin = Name[13] == 'i';
2975  static const Intrinsic::ID MinMaxTbl[2][2] = {
2976  { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
2977  { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
2978  };
2979  Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2980 
2981  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2982  { CI->getArgOperand(0), CI->getArgOperand(1),
2983  CI->getArgOperand(4) });
2984  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2985  CI->getArgOperand(2));
2986  } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2987  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2988  Intrinsic::ctlz,
2989  CI->getType()),
2990  { CI->getArgOperand(0), Builder.getInt1(false) });
2991  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2992  CI->getArgOperand(1));
2993  } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2994  bool IsImmediate = Name[16] == 'i' ||
2995  (Name.size() > 18 && Name[18] == 'i');
2996  bool IsVariable = Name[16] == 'v';
2997  char Size = Name[16] == '.' ? Name[17] :
2998  Name[17] == '.' ? Name[18] :
2999  Name[18] == '.' ? Name[19] :
3000  Name[20];
3001 
3002  Intrinsic::ID IID;
3003  if (IsVariable && Name[17] != '.') {
3004  if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3005  IID = Intrinsic::x86_avx2_psllv_q;
3006  else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3007  IID = Intrinsic::x86_avx2_psllv_q_256;
3008  else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3009  IID = Intrinsic::x86_avx2_psllv_d;
3010  else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3011  IID = Intrinsic::x86_avx2_psllv_d_256;
3012  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3013  IID = Intrinsic::x86_avx512_psllv_w_128;
3014  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3015  IID = Intrinsic::x86_avx512_psllv_w_256;
3016  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3017  IID = Intrinsic::x86_avx512_psllv_w_512;
3018  else
3019  llvm_unreachable("Unexpected size");
3020  } else if (Name.endswith(".128")) {
3021  if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3022  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3023  : Intrinsic::x86_sse2_psll_d;
3024  else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3025  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3026  : Intrinsic::x86_sse2_psll_q;
3027  else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3028  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3029  : Intrinsic::x86_sse2_psll_w;
3030  else
3031  llvm_unreachable("Unexpected size");
3032  } else if (Name.endswith(".256")) {
3033  if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3034  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3035  : Intrinsic::x86_avx2_psll_d;
3036  else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3037  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3038  : Intrinsic::x86_avx2_psll_q;
3039  else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3040  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3041  : Intrinsic::x86_avx2_psll_w;
3042  else
3043  llvm_unreachable("Unexpected size");
3044  } else {
3045  if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3046  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3047  IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
3048  Intrinsic::x86_avx512_psll_d_512;
3049  else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3050  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3051  IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
3052  Intrinsic::x86_avx512_psll_q_512;
3053  else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3054  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3055  : Intrinsic::x86_avx512_psll_w_512;
3056  else
3057  llvm_unreachable("Unexpected size");
3058  }
3059 
3060  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3061  } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
3062  bool IsImmediate = Name[16] == 'i' ||
3063  (Name.size() > 18 && Name[18] == 'i');
3064  bool IsVariable = Name[16] == 'v';
3065  char Size = Name[16] == '.' ? Name[17] :
3066  Name[17] == '.' ? Name[18] :
3067  Name[18] == '.' ? Name[19] :
3068  Name[20];
3069 
3070  Intrinsic::ID IID;
3071  if (IsVariable && Name[17] != '.') {
3072  if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3073  IID = Intrinsic::x86_avx2_psrlv_q;
3074  else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3075  IID = Intrinsic::x86_avx2_psrlv_q_256;
3076  else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3077  IID = Intrinsic::x86_avx2_psrlv_d;
3078  else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3079  IID = Intrinsic::x86_avx2_psrlv_d_256;
3080  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3081  IID = Intrinsic::x86_avx512_psrlv_w_128;
3082  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3083  IID = Intrinsic::x86_avx512_psrlv_w_256;
3084  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3085  IID = Intrinsic::x86_avx512_psrlv_w_512;
3086  else
3087  llvm_unreachable("Unexpected size");
3088  } else if (Name.endswith(".128")) {
3089  if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3090  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3091  : Intrinsic::x86_sse2_psrl_d;
3092  else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3093  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3094  : Intrinsic::x86_sse2_psrl_q;
3095  else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3096  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3097  : Intrinsic::x86_sse2_psrl_w;
3098  else
3099  llvm_unreachable("Unexpected size");
3100  } else if (Name.endswith(".256")) {
3101  if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3102  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3103  : Intrinsic::x86_avx2_psrl_d;
3104  else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3105  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3106  : Intrinsic::x86_avx2_psrl_q;
3107  else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3108  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3109  : Intrinsic::x86_avx2_psrl_w;
3110  else
3111  llvm_unreachable("Unexpected size");
3112  } else {
3113  if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3114  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3115  IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
3116  Intrinsic::x86_avx512_psrl_d_512;
3117  else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3118  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3119  IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
3120  Intrinsic::x86_avx512_psrl_q_512;
3121  else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3122  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3123  : Intrinsic::x86_avx512_psrl_w_512;
3124  else
3125  llvm_unreachable("Unexpected size");
3126  }
3127 
3128  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3129  } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
3130  bool IsImmediate = Name[16] == 'i' ||
3131  (Name.size() > 18 && Name[18] == 'i');
3132  bool IsVariable = Name[16] == 'v';
3133  char Size = Name[16] == '.' ? Name[17] :
3134  Name[17] == '.' ? Name[18] :
3135  Name[18] == '.' ? Name[19] :
3136  Name[20];
3137 
3138  Intrinsic::ID IID;
3139  if (IsVariable && Name[17] != '.') {
3140  if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3141  IID = Intrinsic::x86_avx2_psrav_d;
3142  else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3143  IID = Intrinsic::x86_avx2_psrav_d_256;
3144  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3145  IID = Intrinsic::x86_avx512_psrav_w_128;
3146  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3147  IID = Intrinsic::x86_avx512_psrav_w_256;
3148  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3149  IID = Intrinsic::x86_avx512_psrav_w_512;
3150  else
3151  llvm_unreachable("Unexpected size");
3152  } else if (Name.endswith(".128")) {
3153  if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3154  IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3155  : Intrinsic::x86_sse2_psra_d;
3156  else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3157  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3158  IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
3159  Intrinsic::x86_avx512_psra_q_128;
3160  else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3161  IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3162  : Intrinsic::x86_sse2_psra_w;
3163  else
3164  llvm_unreachable("Unexpected size");
3165  } else if (Name.endswith(".256")) {
3166  if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3167  IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3168  : Intrinsic::x86_avx2_psra_d;
3169  else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3170  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3171  IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
3172  Intrinsic::x86_avx512_psra_q_256;
3173  else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3174  IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3175  : Intrinsic::x86_avx2_psra_w;
3176  else
3177  llvm_unreachable("Unexpected size");
3178  } else {
3179  if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3180  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3181  IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
3182  Intrinsic::x86_avx512_psra_d_512;
3183  else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3184  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3185  IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
3186  Intrinsic::x86_avx512_psra_q_512;
3187  else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3188  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3189  : Intrinsic::x86_avx512_psra_w_512;
3190  else
3191  llvm_unreachable("Unexpected size");
3192  }
3193 
3194  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3195  } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3196  Rep = upgradeMaskedMove(Builder, *CI);
3197  } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3198  Rep = UpgradeMaskToInt(Builder, *CI);
3199  } else if (IsX86 && Name.endswith(".movntdqa")) {
3200  Module *M = F->getParent();
3201  MDNode *Node = MDNode::get(
3203 
3204  Value *Ptr = CI->getArgOperand(0);
3205 
3206  // Convert the type of the pointer to a pointer to the stored type.
3207  Value *BC = Builder.CreateBitCast(
3208  Ptr, PointerType::getUnqual(CI->getType()), "cast");
3209  LoadInst *LI = Builder.CreateAlignedLoad(
3210  CI->getType(), BC,
3212  LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3213  Rep = LI;
3214  } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3215  Name.startswith("fma.vfmsub.") ||
3216  Name.startswith("fma.vfnmadd.") ||
3217  Name.startswith("fma.vfnmsub."))) {
3218  bool NegMul = Name[6] == 'n';
3219  bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3220  bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3221 
3222  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3223  CI->getArgOperand(2) };
3224 
3225  if (IsScalar) {
3226  Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3227  Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3228  Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3229  }
3230 
3231  if (NegMul && !IsScalar)
3232  Ops[0] = Builder.CreateFNeg(Ops[0]);
3233  if (NegMul && IsScalar)
3234  Ops[1] = Builder.CreateFNeg(Ops[1]);
3235  if (NegAcc)
3236  Ops[2] = Builder.CreateFNeg(Ops[2]);
3237 
3238  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3239  Intrinsic::fma,
3240  Ops[0]->getType()),
3241  Ops);
3242 
3243  if (IsScalar)
3244  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3245  (uint64_t)0);
3246  } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3247  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3248  CI->getArgOperand(2) };
3249 
3250  Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3251  Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3252  Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3253 
3254  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3255  Intrinsic::fma,
3256  Ops[0]->getType()),
3257  Ops);
3258 
3259  Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3260  Rep, (uint64_t)0);
3261  } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3262  Name.startswith("avx512.maskz.vfmadd.s") ||
3263  Name.startswith("avx512.mask3.vfmadd.s") ||
3264  Name.startswith("avx512.mask3.vfmsub.s") ||
3265  Name.startswith("avx512.mask3.vfnmsub.s"))) {
3266  bool IsMask3 = Name[11] == '3';
3267  bool IsMaskZ = Name[11] == 'z';
3268  // Drop the "avx512.mask." to make it easier.
3269  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3270  bool NegMul = Name[2] == 'n';
3271  bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3272 
3273  Value *A = CI->getArgOperand(0);
3274  Value *B = CI->getArgOperand(1);
3275  Value *C = CI->getArgOperand(2);
3276 
3277  if (NegMul && (IsMask3 || IsMaskZ))
3278  A = Builder.CreateFNeg(A);
3279  if (NegMul && !(IsMask3 || IsMaskZ))
3280  B = Builder.CreateFNeg(B);
3281  if (NegAcc)
3282  C = Builder.CreateFNeg(C);
3283 
3284  A = Builder.CreateExtractElement(A, (uint64_t)0);
3285  B = Builder.CreateExtractElement(B, (uint64_t)0);
3286  C = Builder.CreateExtractElement(C, (uint64_t)0);
3287 
3288  if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3289  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3290  Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3291 
3292  Intrinsic::ID IID;
3293  if (Name.back() == 'd')
3294  IID = Intrinsic::x86_avx512_vfmadd_f64;
3295  else
3296  IID = Intrinsic::x86_avx512_vfmadd_f32;
3298  Rep = Builder.CreateCall(FMA, Ops);
3299  } else {
3301  Intrinsic::fma,
3302  A->getType());
3303  Rep = Builder.CreateCall(FMA, { A, B, C });
3304  }
3305 
3306  Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3307  IsMask3 ? C : A;
3308 
3309  // For Mask3 with NegAcc, we need to create a new extractelement that
3310  // avoids the negation above.
3311  if (NegAcc && IsMask3)
3312  PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3313  (uint64_t)0);
3314 
3316  Rep, PassThru);
3317  Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3318  Rep, (uint64_t)0);
3319  } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3320  Name.startswith("avx512.mask.vfnmadd.p") ||
3321  Name.startswith("avx512.mask.vfnmsub.p") ||
3322  Name.startswith("avx512.mask3.vfmadd.p") ||
3323  Name.startswith("avx512.mask3.vfmsub.p") ||
3324  Name.startswith("avx512.mask3.vfnmsub.p") ||
3325  Name.startswith("avx512.maskz.vfmadd.p"))) {
3326  bool IsMask3 = Name[11] == '3';
3327  bool IsMaskZ = Name[11] == 'z';
3328  // Drop the "avx512.mask." to make it easier.
3329  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3330  bool NegMul = Name[2] == 'n';
3331  bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3332 
3333  Value *A = CI->getArgOperand(0);
3334  Value *B = CI->getArgOperand(1);
3335  Value *C = CI->getArgOperand(2);
3336 
3337  if (NegMul && (IsMask3 || IsMaskZ))
3338  A = Builder.CreateFNeg(A);
3339  if (NegMul && !(IsMask3 || IsMaskZ))
3340  B = Builder.CreateFNeg(B);
3341  if (NegAcc)
3342  C = Builder.CreateFNeg(C);
3343 
3344  if (CI->getNumArgOperands() == 5 &&
3345  (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3346  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3347  Intrinsic::ID IID;
3348  // Check the character before ".512" in string.
3349  if (Name[Name.size()-5] == 's')
3350  IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3351  else
3352  IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3353 
3354  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3355  { A, B, C, CI->getArgOperand(4) });
3356  } else {
3358  Intrinsic::fma,
3359  A->getType());
3360  Rep = Builder.CreateCall(FMA, { A, B, C });
3361  }
3362 
3363  Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3364  IsMask3 ? CI->getArgOperand(2) :
3365  CI->getArgOperand(0);
3366 
3367  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3368  } else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
3369  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3370  unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3371  Intrinsic::ID IID;
3372  if (VecWidth == 128 && EltWidth == 32)
3373  IID = Intrinsic::x86_fma_vfmaddsub_ps;
3374  else if (VecWidth == 256 && EltWidth == 32)
3375  IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3376  else if (VecWidth == 128 && EltWidth == 64)
3377  IID = Intrinsic::x86_fma_vfmaddsub_pd;
3378  else if (VecWidth == 256 && EltWidth == 64)
3379  IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3380  else
3381  llvm_unreachable("Unexpected intrinsic");
3382 
3383  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3384  CI->getArgOperand(2) };
3385  Ops[2] = Builder.CreateFNeg(Ops[2]);
3386  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3387  Ops);
3388  } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3389  Name.startswith("avx512.mask3.vfmaddsub.p") ||
3390  Name.startswith("avx512.maskz.vfmaddsub.p") ||
3391  Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3392  bool IsMask3 = Name[11] == '3';
3393  bool IsMaskZ = Name[11] == 'z';
3394  // Drop the "avx512.mask." to make it easier.
3395  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3396  bool IsSubAdd = Name[3] == 's';
3397  if (CI->getNumArgOperands() == 5) {
3398  Intrinsic::ID IID;
3399  // Check the character before ".512" in string.
3400  if (Name[Name.size()-5] == 's')
3401  IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3402  else
3403  IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3404 
3405  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3406  CI->getArgOperand(2), CI->getArgOperand(4) };
3407  if (IsSubAdd)
3408  Ops[2] = Builder.CreateFNeg(Ops[2]);
3409 
3410  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3411  Ops);
3412  } else {
3413  int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3414 
3415  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3416  CI->getArgOperand(2) };
3417 
3418  Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3419  Ops[0]->getType());
3420  Value *Odd = Builder.CreateCall(FMA, Ops);
3421  Ops[2] = Builder.CreateFNeg(Ops[2]);
3422  Value *Even = Builder.CreateCall(FMA, Ops);
3423 
3424  if (IsSubAdd)
3425  std::swap(Even, Odd);
3426 
3427  SmallVector<int, 32> Idxs(NumElts);
3428  for (int i = 0; i != NumElts; ++i)
3429  Idxs[i] = i + (i % 2) * NumElts;
3430 
3431  Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3432  }
3433 
3434  Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3435  IsMask3 ? CI->getArgOperand(2) :
3436  CI->getArgOperand(0);
3437 
3438  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3439  } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3440  Name.startswith("avx512.maskz.pternlog."))) {
3441  bool ZeroMask = Name[11] == 'z';
3442  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3443  unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3444  Intrinsic::ID IID;
3445  if (VecWidth == 128 && EltWidth == 32)
3446  IID = Intrinsic::x86_avx512_pternlog_d_128;
3447  else if (VecWidth == 256 && EltWidth == 32)
3448  IID = Intrinsic::x86_avx512_pternlog_d_256;
3449  else if (VecWidth == 512 && EltWidth == 32)
3450  IID = Intrinsic::x86_avx512_pternlog_d_512;
3451  else if (VecWidth == 128 && EltWidth == 64)
3452  IID = Intrinsic::x86_avx512_pternlog_q_128;
3453  else if (VecWidth == 256 && EltWidth == 64)
3454  IID = Intrinsic::x86_avx512_pternlog_q_256;
3455  else if (VecWidth == 512 && EltWidth == 64)
3456  IID = Intrinsic::x86_avx512_pternlog_q_512;
3457  else
3458  llvm_unreachable("Unexpected intrinsic");
3459 
3460  Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3461  CI->getArgOperand(2), CI->getArgOperand(3) };
3462  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3463  Args);
3464  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3465  : CI->getArgOperand(0);
3466  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3467  } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3468  Name.startswith("avx512.maskz.vpmadd52"))) {
3469  bool ZeroMask = Name[11] == 'z';
3470  bool High = Name[20] == 'h' || Name[21] == 'h';
3471  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3472  Intrinsic::ID IID;
3473  if (VecWidth == 128 && !High)
3474  IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3475  else if (VecWidth == 256 && !High)
3476  IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3477  else if (VecWidth == 512 && !High)
3478  IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3479  else if (VecWidth == 128 && High)
3480  IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3481  else if (VecWidth == 256 && High)
3482  IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3483  else if (VecWidth == 512 && High)
3484  IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3485  else
3486  llvm_unreachable("Unexpected intrinsic");
3487 
3488  Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3489  CI->getArgOperand(2) };
3490  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3491  Args);
3492  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3493  : CI->getArgOperand(0);
3494  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3495  } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3496  Name.startswith("avx512.mask.vpermt2var.") ||
3497  Name.startswith("avx512.maskz.vpermt2var."))) {
3498  bool ZeroMask = Name[11] == 'z';
3499  bool IndexForm = Name[17] == 'i';
3500  Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3501  } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3502  Name.startswith("avx512.maskz.vpdpbusd.") ||
3503  Name.startswith("avx512.mask.vpdpbusds.") ||
3504  Name.startswith("avx512.maskz.vpdpbusds."))) {
3505  bool ZeroMask = Name[11] == 'z';
3506  bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3507  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3508  Intrinsic::ID IID;
3509  if (VecWidth == 128 && !IsSaturating)
3510  IID = Intrinsic::x86_avx512_vpdpbusd_128;
3511  else if (VecWidth == 256 && !IsSaturating)
3512  IID = Intrinsic::x86_avx512_vpdpbusd_256;
3513  else if (VecWidth == 512 && !IsSaturating)
3514  IID = Intrinsic::x86_avx512_vpdpbusd_512;
3515  else if (VecWidth == 128 && IsSaturating)
3516  IID = Intrinsic::x86_avx512_vpdpbusds_128;
3517  else if (VecWidth == 256 && IsSaturating)
3518  IID = Intrinsic::x86_avx512_vpdpbusds_256;
3519  else if (VecWidth == 512 && IsSaturating)
3520  IID = Intrinsic::x86_avx512_vpdpbusds_512;
3521  else
3522  llvm_unreachable("Unexpected intrinsic");
3523 
3524  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3525  CI->getArgOperand(2) };
3526  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3527  Args);
3528  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3529  : CI->getArgOperand(0);
3530  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3531  } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3532  Name.startswith("avx512.maskz.vpdpwssd.") ||
3533  Name.startswith("avx512.mask.vpdpwssds.") ||
3534  Name.startswith("avx512.maskz.vpdpwssds."))) {
3535  bool ZeroMask = Name[11] == 'z';
3536  bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3537  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3538  Intrinsic::ID IID;
3539  if (VecWidth == 128 && !IsSaturating)
3540  IID = Intrinsic::x86_avx512_vpdpwssd_128;
3541  else if (VecWidth == 256 && !IsSaturating)
3542  IID = Intrinsic::x86_avx512_vpdpwssd_256;
3543  else if (VecWidth == 512 && !IsSaturating)
3544  IID = Intrinsic::x86_avx512_vpdpwssd_512;
3545  else if (VecWidth == 128 && IsSaturating)
3546  IID = Intrinsic::x86_avx512_vpdpwssds_128;
3547  else if (VecWidth == 256 && IsSaturating)
3548  IID = Intrinsic::x86_avx512_vpdpwssds_256;
3549  else if (VecWidth == 512 && IsSaturating)
3550  IID = Intrinsic::x86_avx512_vpdpwssds_512;
3551  else
3552  llvm_unreachable("Unexpected intrinsic");
3553 
3554  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3555  CI->getArgOperand(2) };
3556  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3557  Args);
3558  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3559  : CI->getArgOperand(0);
3560  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3561  } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3562  Name == "addcarry.u32" || Name == "addcarry.u64" ||
3563  Name == "subborrow.u32" || Name == "subborrow.u64")) {
3564  Intrinsic::ID IID;
3565  if (Name[0] == 'a' && Name.back() == '2')
3566  IID = Intrinsic::x86_addcarry_32;
3567  else if (Name[0] == 'a' && Name.back() == '4')
3568  IID = Intrinsic::x86_addcarry_64;
3569  else if (Name[0] == 's' && Name.back() == '2')
3570  IID = Intrinsic::x86_subborrow_32;
3571  else if (Name[0] == 's' && Name.back() == '4')
3572  IID = Intrinsic::x86_subborrow_64;
3573  else
3574  llvm_unreachable("Unexpected intrinsic");
3575 
3576  // Make a call with 3 operands.
3577  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3578  CI->getArgOperand(2)};
3579  Value *NewCall = Builder.CreateCall(
3581  Args);
3582 
3583  // Extract the second result and store it.
3584  Value *Data = Builder.CreateExtractValue(NewCall, 1);
3585  // Cast the pointer to the right type.
3586  Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3587  llvm::PointerType::getUnqual(Data->getType()));
3588  Builder.CreateAlignedStore(Data, Ptr, Align(1));
3589  // Replace the original call result with the first result of the new call.
3590  Value *CF = Builder.CreateExtractValue(NewCall, 0);
3591 
3592  CI->replaceAllUsesWith(CF);
3593  Rep = nullptr;
3594  } else if (IsX86 && Name.startswith("avx512.mask.") &&
3595  upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3596  // Rep will be updated by the call in the condition.
3597  } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3598  Value *Arg = CI->getArgOperand(0);
3599  Value *Neg = Builder.CreateNeg(Arg, "neg");
3600  Value *Cmp = Builder.CreateICmpSGE(
3601  Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3602  Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3603  } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3604  Name.startswith("atomic.load.add.f64.p"))) {
3605  Value *Ptr = CI->getArgOperand(0);
3606  Value *Val = CI->getArgOperand(1);
3607  Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
3609  } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3610  Name == "max.ui" || Name == "max.ull")) {
3611  Value *Arg0 = CI->getArgOperand(0);
3612  Value *Arg1 = CI->getArgOperand(1);
3613  Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3614  ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3615  : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3616  Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3617  } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3618  Name == "min.ui" || Name == "min.ull")) {
3619  Value *Arg0 = CI->getArgOperand(0);
3620  Value *Arg1 = CI->getArgOperand(1);
3621  Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3622  ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3623  : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3624  Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3625  } else if (IsNVVM && Name == "clz.ll") {
3626  // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3627  Value *Arg = CI->getArgOperand(0);
3628  Value *Ctlz = Builder.CreateCall(
3629  Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3630  {Arg->getType()}),
3631  {Arg, Builder.getFalse()}, "ctlz");
3632  Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3633  } else if (IsNVVM && Name == "popc.ll") {
3634  // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3635  // i64.
3636  Value *Arg = CI->getArgOperand(0);
3637  Value *Popc = Builder.CreateCall(
3638  Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3639  {Arg->getType()}),
3640  Arg, "ctpop");
3641  Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3642  } else if (IsNVVM && Name == "h2f") {
3643  Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3644  F->getParent(), Intrinsic::convert_from_fp16,
3645  {Builder.getFloatTy()}),
3646  CI->getArgOperand(0), "h2f");
3647  } else {
3648  llvm_unreachable("Unknown function for CallInst upgrade.");
3649  }
3650 
3651  if (Rep)
3652  CI->replaceAllUsesWith(Rep);
3653  CI->eraseFromParent();
3654  return;
3655  }
3656 
3657  const auto &DefaultCase = [&NewFn, &CI]() -> void {
3658  // Handle generic mangling change, but nothing else
3659  assert(
3660  (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3661  "Unknown function for CallInst upgrade and isn't just a name change");
3662  CI->setCalledFunction(NewFn);
3663  };
3664  CallInst *NewCall = nullptr;
3665  switch (NewFn->getIntrinsicID()) {
3666  default: {
3667  DefaultCase();
3668  return;
3669  }
3670  case Intrinsic::arm_neon_vld1:
3671  case Intrinsic::arm_neon_vld2:
3672  case Intrinsic::arm_neon_vld3:
3673  case Intrinsic::arm_neon_vld4:
3674  case Intrinsic::arm_neon_vld2lane:
3675  case Intrinsic::arm_neon_vld3lane:
3676  case Intrinsic::arm_neon_vld4lane:
3677  case Intrinsic::arm_neon_vst1:
3678  case Intrinsic::arm_neon_vst2:
3679  case Intrinsic::arm_neon_vst3:
3680  case Intrinsic::arm_neon_vst4:
3681  case Intrinsic::arm_neon_vst2lane:
3682  case Intrinsic::arm_neon_vst3lane:
3683  case Intrinsic::arm_neon_vst4lane: {
3685  CI->arg_operands().end());
3686  NewCall = Builder.CreateCall(NewFn, Args);
3687  break;
3688  }
3689 
3690  case Intrinsic::arm_neon_bfdot:
3691  case Intrinsic::arm_neon_bfmmla:
3692  case Intrinsic::arm_neon_bfmlalb:
3693  case Intrinsic::arm_neon_bfmlalt:
3694  case Intrinsic::aarch64_neon_bfdot:
3695  case Intrinsic::aarch64_neon_bfmmla:
3696  case Intrinsic::aarch64_neon_bfmlalb:
3697  case Intrinsic::aarch64_neon_bfmlalt: {
3699  assert(CI->getNumArgOperands() == 3 &&
3700  "Mismatch between function args and call args");
3701  size_t OperandWidth =
3703  assert((OperandWidth == 64 || OperandWidth == 128) &&
3704  "Unexpected operand width");
3705  Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
3706  auto Iter = CI->arg_operands().begin();
3707  Args.push_back(*Iter++);
3708  Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3709  Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3710  NewCall = Builder.CreateCall(NewFn, Args);
3711  break;
3712  }
3713 
3714  case Intrinsic::bitreverse:
3715  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3716  break;
3717 
3718  case Intrinsic::ctlz:
3719  case Intrinsic::cttz:
3720  assert(CI->getNumArgOperands() == 1 &&
3721  "Mismatch between function args and call args");
3722  NewCall =
3723  Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3724  break;
3725 
3726  case Intrinsic::objectsize: {
3727  Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3728  ? Builder.getFalse()
3729  : CI->getArgOperand(2);
3730  Value *Dynamic =
3731  CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3732  NewCall = Builder.CreateCall(
3733  NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3734  break;
3735  }
3736 
3737  case Intrinsic::ctpop:
3738  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3739  break;
3740 
3741  case Intrinsic::convert_from_fp16:
3742  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3743  break;
3744 
3745  case Intrinsic::dbg_value:
3746  // Upgrade from the old version that had an extra offset argument.
3747  assert(CI->getNumArgOperands() == 4);
3748  // Drop nonzero offsets instead of attempting to upgrade them.
3749  if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3750  if (Offset->isZeroValue()) {
3751  NewCall = Builder.CreateCall(
3752  NewFn,
3753  {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3754  break;
3755  }
3756  CI->eraseFromParent();
3757  return;
3758 
3759  case Intrinsic::ptr_annotation:
3760  // Upgrade from versions that lacked the annotation attribute argument.
3761  assert(CI->getNumArgOperands() == 4 &&
3762  "Before LLVM 12.0 this intrinsic took four arguments");
3763  // Create a new call with an added null annotation attribute argument.
3764  NewCall = Builder.CreateCall(
3765  NewFn,
3766  {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3767  CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3768  NewCall->takeName(CI);
3769  CI->replaceAllUsesWith(NewCall);
3770  CI->eraseFromParent();
3771  return;
3772 
3773  case Intrinsic::var_annotation:
3774  // Upgrade from versions that lacked the annotation attribute argument.
3775  assert(CI->getNumArgOperands() == 4 &&
3776  "Before LLVM 12.0 this intrinsic took four arguments");
3777  // Create a new call with an added null annotation attribute argument.
3778  NewCall = Builder.CreateCall(
3779  NewFn,
3780  {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3781  CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3782  CI->eraseFromParent();
3783  return;
3784 
3785  case Intrinsic::x86_xop_vfrcz_ss:
3786  case Intrinsic::x86_xop_vfrcz_sd:
3787  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3788  break;
3789 
3790  case Intrinsic::x86_xop_vpermil2pd:
3791  case Intrinsic::x86_xop_vpermil2ps:
3792  case Intrinsic::x86_xop_vpermil2pd_256:
3793  case Intrinsic::x86_xop_vpermil2ps_256: {
3795  CI->arg_operands().end());
3796  VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3797  VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3798  Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3799  NewCall = Builder.CreateCall(NewFn, Args);
3800  break;
3801  }
3802 
3803  case Intrinsic::x86_sse41_ptestc:
3804  case Intrinsic::x86_sse41_ptestz:
3805  case Intrinsic::x86_sse41_ptestnzc: {
3806  // The arguments for these intrinsics used to be v4f32, and changed
3807  // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3808  // So, the only thing required is a bitcast for both arguments.
3809  // First, check the arguments have the old type.
3810  Value *Arg0 = CI->getArgOperand(0);
3811  if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
3812  return;
3813 
3814  // Old intrinsic, add bitcasts
3815  Value *Arg1 = CI->getArgOperand(1);
3816 
3817  auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
3818 
3819  Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3820  Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3821 
3822  NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3823  break;
3824  }
3825 
3826  case Intrinsic::x86_rdtscp: {
3827  // This used to take 1 arguments. If we have no arguments, it is already
3828  // upgraded.
3829  if (CI->getNumOperands() == 0)
3830  return;
3831 
3832  NewCall = Builder.CreateCall(NewFn);
3833  // Extract the second result and store it.
3834  Value *Data = Builder.CreateExtractValue(NewCall, 1);
3835  // Cast the pointer to the right type.
3836  Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3837  llvm::PointerType::getUnqual(Data->getType()));
3838  Builder.CreateAlignedStore(Data, Ptr, Align(1));
3839  // Replace the original call result with the first result of the new call.
3840  Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3841 
3842  NewCall->takeName(CI);
3843  CI->replaceAllUsesWith(TSC);
3844  CI->eraseFromParent();
3845  return;
3846  }
3847 
3848  case Intrinsic::x86_sse41_insertps:
3849  case Intrinsic::x86_sse41_dppd:
3850  case Intrinsic::x86_sse41_dpps:
3851  case Intrinsic::x86_sse41_mpsadbw:
3852  case Intrinsic::x86_avx_dp_ps_256:
3853  case Intrinsic::x86_avx2_mpsadbw: {
3854  // Need to truncate the last argument from i32 to i8 -- this argument models
3855  // an inherently 8-bit immediate operand to these x86 instructions.
3857  CI->arg_operands().end());
3858 
3859  // Replace the last argument with a trunc.
3860  Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3861  NewCall = Builder.CreateCall(NewFn, Args);
3862  break;
3863  }
3864 
3865  case Intrinsic::x86_avx512_mask_cmp_pd_128:
3866  case Intrinsic::x86_avx512_mask_cmp_pd_256:
3867  case Intrinsic::x86_avx512_mask_cmp_pd_512:
3868  case Intrinsic::x86_avx512_mask_cmp_ps_128:
3869  case Intrinsic::x86_avx512_mask_cmp_ps_256:
3870  case Intrinsic::x86_avx512_mask_cmp_ps_512: {
3872  CI->arg_operands().end());
3873  unsigned NumElts =
3874  cast<FixedVectorType>(Args[0]->getType())->getNumElements();
3875  Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
3876 
3877  NewCall = Builder.CreateCall(NewFn, Args);
3878  Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
3879 
3880  NewCall->takeName(CI);
3881  CI->replaceAllUsesWith(Res);
3882  CI->eraseFromParent();
3883  return;
3884  }
3885 
3886  case Intrinsic::thread_pointer: {
3887  NewCall = Builder.CreateCall(NewFn, {});
3888  break;
3889  }
3890 
3891  case Intrinsic::invariant_start:
3892  case Intrinsic::invariant_end: {
3894  CI->arg_operands().end());
3895  NewCall = Builder.CreateCall(NewFn, Args);
3896  break;
3897  }
3898  case Intrinsic::masked_load:
3899  case Intrinsic::masked_store:
3900  case Intrinsic::masked_gather:
3901  case Intrinsic::masked_scatter: {
3903  CI->arg_operands().end());
3904  NewCall = Builder.CreateCall(NewFn, Args);
3905  NewCall->copyMetadata(*CI);
3906  break;
3907  }
3908 
3909  case Intrinsic::memcpy:
3910  case Intrinsic::memmove:
3911  case Intrinsic::memset: {
3912  // We have to make sure that the call signature is what we're expecting.
3913  // We only want to change the old signatures by removing the alignment arg:
3914  // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3915  // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3916  // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3917  // -> @llvm.memset...(i8*, i8, i[32|64], i1)
3918  // Note: i8*'s in the above can be any pointer type
3919  if (CI->getNumArgOperands() != 5) {
3920  DefaultCase();
3921  return;
3922  }
3923  // Remove alignment argument (3), and add alignment attributes to the
3924  // dest/src pointers.
3925  Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3926  CI->getArgOperand(2), CI->getArgOperand(4)};
3927  NewCall = Builder.CreateCall(NewFn, Args);
3928  auto *MemCI = cast<MemIntrinsic>(NewCall);
3929  // All mem intrinsics support dest alignment.
3930  const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3931  MemCI->setDestAlignment(Align->getMaybeAlignValue());
3932  // Memcpy/Memmove also support source alignment.
3933  if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3934  MTI->setSourceAlignment(Align->getMaybeAlignValue());
3935  break;
3936  }
3937  }
3938  assert(NewCall && "Should have either set this variable or returned through "
3939  "the default case");
3940  NewCall->takeName(CI);
3941  CI->replaceAllUsesWith(NewCall);
3942  CI->eraseFromParent();
3943 }
3944 
3946  assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3947 
3948  // Check if this function should be upgraded and get the replacement function
3949  // if there is one.
3950  Function *NewFn;
3951  if (UpgradeIntrinsicFunction(F, NewFn)) {
3952  // Replace all users of the old function with the new function or new
3953  // instructions. This is not a range loop because the call is deleted.
3954  for (User *U : make_early_inc_range(F->users()))
3955  if (CallInst *CI = dyn_cast<CallInst>(U))
3956  UpgradeIntrinsicCall(CI, NewFn);
3957 
3958  // Remove old function, no longer used, from the module.
3959  F->eraseFromParent();
3960  }
3961 }
3962 
3964  // Check if the tag uses struct-path aware TBAA format.
3965  if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3966  return &MD;
3967 
3968  auto &Context = MD.getContext();
3969  if (MD.getNumOperands() == 3) {
3970  Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3971  MDNode *ScalarType = MDNode::get(Context, Elts);
3972  // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3973  Metadata *Elts2[] = {ScalarType, ScalarType,
3976  MD.getOperand(2)};
3977  return MDNode::get(Context, Elts2);
3978  }
3979  // Create a MDNode <MD, MD, offset 0>
3982  return MDNode::get(Context, Elts);
3983 }
3984 
3985 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3986  Instruction *&Temp) {
3987  if (Opc != Instruction::BitCast)
3988  return nullptr;
3989 
3990  Temp = nullptr;
3991  Type *SrcTy = V->getType();
3992  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3993  SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3994  LLVMContext &Context = V->getContext();
3995 
3996  // We have no information about target data layout, so we assume that
3997  // the maximum pointer size is 64bit.
3998  Type *MidTy = Type::getInt64Ty(Context);
3999  Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4000 
4001  return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4002  }
4003 
4004  return nullptr;
4005 }
4006 
4007 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4008  if (Opc != Instruction::BitCast)
4009  return nullptr;
4010 
4011  Type *SrcTy = C->getType();
4012  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4013  SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4014  LLVMContext &Context = C->getContext();
4015 
4016  // We have no information about target data layout, so we assume that
4017  // the maximum pointer size is 64bit.
4018  Type *MidTy = Type::getInt64Ty(Context);
4019 
4021  DestTy);
4022  }
4023 
4024  return nullptr;
4025 }
4026 
4027 /// Check the debug info version number, if it is out-dated, drop the debug
4028 /// info. Return true if module is modified.
4032  bool BrokenDebugInfo = false;
4033  if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4034  report_fatal_error("Broken module found, compilation aborted!");
4035  if (!BrokenDebugInfo)
4036  // Everything is ok.
4037  return false;
4038  else {
4039  // Diagnose malformed debug info.
4041  M.getContext().diagnose(Diag);
4042  }
4043  }
4044  bool Modified = StripDebugInfo(M);
4046  // Diagnose a version mismatch.
4048  M.getContext().diagnose(DiagVersion);
4049  }
4050  return Modified;
4051 }
4052 
4053 /// This checks for objc retain release marker which should be upgraded. It
4054 /// returns true if module is modified.
4056  bool Changed = false;
4057  const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4058  NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4059  if (ModRetainReleaseMarker) {
4060  MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4061  if (Op) {
4062  MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4063  if (ID) {
4064  SmallVector<StringRef, 4> ValueComp;
4065  ID->getString().split(ValueComp, "#");
4066  if (ValueComp.size() == 2) {
4067  std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4068  ID = MDString::get(M.getContext(), NewValue);
4069  }
4070  M.addModuleFlag(Module::Error, MarkerKey, ID);
4071  M.eraseNamedMetadata(ModRetainReleaseMarker);
4072  Changed = true;
4073  }
4074  }
4075  }
4076  return Changed;
4077 }
4078 
4080  // This lambda converts normal function calls to ARC runtime functions to
4081  // intrinsic calls.
4082  auto UpgradeToIntrinsic = [&](const char *OldFunc,
4083  llvm::Intrinsic::ID IntrinsicFunc) {
4084  Function *Fn = M.getFunction(OldFunc);
4085 
4086  if (!Fn)
4087  return;
4088 
4089  Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4090 
4091  for (User *U : make_early_inc_range(Fn->users())) {
4092  CallInst *CI = dyn_cast<CallInst>(U);
4093  if (!CI || CI->getCalledFunction() != Fn)
4094  continue;
4095 
4096  IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4097  FunctionType *NewFuncTy = NewFn->getFunctionType();
4099 
4100  // Don't upgrade the intrinsic if it's not valid to bitcast the return
4101  // value to the return type of the old function.
4102  if (NewFuncTy->getReturnType() != CI->getType() &&
4103  !CastInst::castIsValid(Instruction::BitCast, CI,
4104  NewFuncTy->getReturnType()))
4105  continue;
4106 
4107  bool InvalidCast = false;
4108 
4109  for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
4110  Value *Arg = CI->getArgOperand(I);
4111 
4112  // Bitcast argument to the parameter type of the new function if it's
4113  // not a variadic argument.
4114  if (I < NewFuncTy->getNumParams()) {
4115  // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4116  // to the parameter type of the new function.
4117  if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4118  NewFuncTy->getParamType(I))) {
4119  InvalidCast = true;
4120  break;
4121  }
4122  Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4123  }
4124  Args.push_back(Arg);
4125  }
4126 
4127  if (InvalidCast)
4128  continue;
4129 
4130  // Create a call instruction that calls the new function.
4131  CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4132  NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4133  NewCall->takeName(CI);
4134 
4135  // Bitcast the return value back to the type of the old call.
4136  Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4137 
4138  if (!CI->use_empty())
4139  CI->replaceAllUsesWith(NewRetVal);
4140  CI->eraseFromParent();
4141  }
4142 
4143  if (Fn->use_empty())
4144  Fn->eraseFromParent();
4145  };
4146 
4147  // Unconditionally convert a call to "clang.arc.use" to a call to
4148  // "llvm.objc.clang.arc.use".
4149  UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
4150 
4151  // Upgrade the retain release marker. If there is no need to upgrade
4152  // the marker, that means either the module is already new enough to contain
4153  // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
4155  return;
4156 
4157  std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
4158  {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
4159  {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
4160  {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
4161  {"objc_autoreleaseReturnValue",
4162  llvm::Intrinsic::objc_autoreleaseReturnValue},
4163  {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
4164  {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
4165  {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
4166  {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
4167  {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
4168  {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
4169  {"objc_release", llvm::Intrinsic::objc_release},
4170  {"objc_retain", llvm::Intrinsic::objc_retain},
4171  {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
4172  {"objc_retainAutoreleaseReturnValue",
4173  llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
4174  {"objc_retainAutoreleasedReturnValue",
4175  llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
4176  {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
4177  {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
4178  {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
4179  {"objc_unsafeClaimAutoreleasedReturnValue",
4180  llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
4181  {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
4182  {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
4183  {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
4184  {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
4185  {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
4186  {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
4187  {"objc_arc_annotation_topdown_bbstart",
4188  llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
4189  {"objc_arc_annotation_topdown_bbend",
4190  llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
4191  {"objc_arc_annotation_bottomup_bbstart",
4192  llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
4193  {"objc_arc_annotation_bottomup_bbend",
4194  llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
4195 
4196  for (auto &I : RuntimeFuncs)
4197  UpgradeToIntrinsic(I.first, I.second);
4198 }
4199 
4201  NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4202  if (!ModFlags)
4203  return false;
4204 
4205  bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4206  bool HasSwiftVersionFlag = false;
4207  uint8_t SwiftMajorVersion, SwiftMinorVersion;
4208  uint32_t SwiftABIVersion;
4209  auto Int8Ty = Type::getInt8Ty(M.getContext());
4210  auto Int32Ty = Type::getInt32Ty(M.getContext());
4211 
4212  for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4213  MDNode *Op = ModFlags->getOperand(I);
4214  if (Op->getNumOperands() != 3)
4215  continue;
4216  MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4217  if (!ID)
4218  continue;
4219  if (ID->getString() == "Objective-C Image Info Version")
4220  HasObjCFlag = true;
4221  if (ID->getString() == "Objective-C Class Properties")
4222  HasClassProperties = true;
4223  // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
4224  // field was Error and now they are Max.
4225  if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
4226  if (auto *Behavior =
4227  mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4228  if (Behavior->getLimitedValue() == Module::Error) {
4229  Type *Int32Ty = Type::getInt32Ty(M.getContext());
4230  Metadata *Ops[3] = {
4232  MDString::get(M.getContext(), ID->getString()),
4233  Op->getOperand(2)};
4234  ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4235  Changed = true;
4236  }
4237  }
4238  }
4239  // Upgrade Objective-C Image Info Section. Removed the whitespce in the
4240  // section name so that llvm-lto will not complain about mismatching
4241  // module flags that is functionally the same.
4242  if (ID->getString() == "Objective-C Image Info Section") {
4243  if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
4244  SmallVector<StringRef, 4> ValueComp;
4245  Value->getString().split(ValueComp, " ");
4246  if (ValueComp.size() != 1) {
4247  std::string NewValue;
4248  for (auto &S : ValueComp)
4249  NewValue += S.str();
4250  Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
4251