Bug Summary

File:build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/IR/AutoUpgrade.cpp
Warning:line 1529, column 32
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name AutoUpgrade.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm -resource-dir /usr/lib/llvm-15/lib/clang/15.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/IR -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/IR -I include -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-15/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-04-20-140412-16051-1 -x c++ /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/IR/AutoUpgrade.cpp
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/StringSwitch.h"
17#include "llvm/ADT/Triple.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/DebugInfo.h"
20#include "llvm/IR/DiagnosticInfo.h"
21#include "llvm/IR/Function.h"
22#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/InstVisitor.h"
24#include "llvm/IR/Instruction.h"
25#include "llvm/IR/IntrinsicInst.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/IntrinsicsAArch64.h"
28#include "llvm/IR/IntrinsicsARM.h"
29#include "llvm/IR/IntrinsicsX86.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/IR/Module.h"
32#include "llvm/IR/Verifier.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/Regex.h"
35#include <cstring>
36using namespace llvm;
37
38static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
39
40// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
41// changed their type from v4f32 to v2i64.
42static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
43 Function *&NewFn) {
44 // Check whether this is an old version of the function, which received
45 // v4f32 arguments.
46 Type *Arg0Type = F->getFunctionType()->getParamType(0);
47 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
48 return false;
49
50 // Yes, it's old, replace it with new version.
51 rename(F);
52 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
53 return true;
54}
55
56// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
57// arguments have changed their type from i32 to i8.
58static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
59 Function *&NewFn) {
60 // Check that the last argument is an i32.
61 Type *LastArgType = F->getFunctionType()->getParamType(
62 F->getFunctionType()->getNumParams() - 1);
63 if (!LastArgType->isIntegerTy(32))
64 return false;
65
66 // Move this function aside and map down.
67 rename(F);
68 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
69 return true;
70}
71
72// Upgrade the declaration of fp compare intrinsics that change return type
73// from scalar to vXi1 mask.
74static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
75 Function *&NewFn) {
76 // Check if the return type is a vector.
77 if (F->getReturnType()->isVectorTy())
78 return false;
79
80 rename(F);
81 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
82 return true;
83}
84
85static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
86 // All of the intrinsics matches below should be marked with which llvm
87 // version started autoupgrading them. At some point in the future we would
88 // like to use this information to remove upgrade code for some older
89 // intrinsics. It is currently undecided how we will determine that future
90 // point.
91 if (Name == "addcarryx.u32" || // Added in 8.0
92 Name == "addcarryx.u64" || // Added in 8.0
93 Name == "addcarry.u32" || // Added in 8.0
94 Name == "addcarry.u64" || // Added in 8.0
95 Name == "subborrow.u32" || // Added in 8.0
96 Name == "subborrow.u64" || // Added in 8.0
97 Name.startswith("sse2.padds.") || // Added in 8.0
98 Name.startswith("sse2.psubs.") || // Added in 8.0
99 Name.startswith("sse2.paddus.") || // Added in 8.0
100 Name.startswith("sse2.psubus.") || // Added in 8.0
101 Name.startswith("avx2.padds.") || // Added in 8.0
102 Name.startswith("avx2.psubs.") || // Added in 8.0
103 Name.startswith("avx2.paddus.") || // Added in 8.0
104 Name.startswith("avx2.psubus.") || // Added in 8.0
105 Name.startswith("avx512.padds.") || // Added in 8.0
106 Name.startswith("avx512.psubs.") || // Added in 8.0
107 Name.startswith("avx512.mask.padds.") || // Added in 8.0
108 Name.startswith("avx512.mask.psubs.") || // Added in 8.0
109 Name.startswith("avx512.mask.paddus.") || // Added in 8.0
110 Name.startswith("avx512.mask.psubus.") || // Added in 8.0
111 Name=="ssse3.pabs.b.128" || // Added in 6.0
112 Name=="ssse3.pabs.w.128" || // Added in 6.0
113 Name=="ssse3.pabs.d.128" || // Added in 6.0
114 Name.startswith("fma4.vfmadd.s") || // Added in 7.0
115 Name.startswith("fma.vfmadd.") || // Added in 7.0
116 Name.startswith("fma.vfmsub.") || // Added in 7.0
117 Name.startswith("fma.vfmsubadd.") || // Added in 7.0
118 Name.startswith("fma.vfnmadd.") || // Added in 7.0
119 Name.startswith("fma.vfnmsub.") || // Added in 7.0
120 Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
121 Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
122 Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
123 Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
124 Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
125 Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
126 Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
127 Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
128 Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
129 Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
130 Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
131 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
132 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
133 Name.startswith("avx512.kunpck") || //added in 6.0
134 Name.startswith("avx2.pabs.") || // Added in 6.0
135 Name.startswith("avx512.mask.pabs.") || // Added in 6.0
136 Name.startswith("avx512.broadcastm") || // Added in 6.0
137 Name == "sse.sqrt.ss" || // Added in 7.0
138 Name == "sse2.sqrt.sd" || // Added in 7.0
139 Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
140 Name.startswith("avx.sqrt.p") || // Added in 7.0
141 Name.startswith("sse2.sqrt.p") || // Added in 7.0
142 Name.startswith("sse.sqrt.p") || // Added in 7.0
143 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
144 Name.startswith("sse2.pcmpeq.") || // Added in 3.1
145 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
146 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
147 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
148 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
149 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
150 Name.startswith("avx.vperm2f128.") || // Added in 6.0
151 Name == "avx2.vperm2i128" || // Added in 6.0
152 Name == "sse.add.ss" || // Added in 4.0
153 Name == "sse2.add.sd" || // Added in 4.0
154 Name == "sse.sub.ss" || // Added in 4.0
155 Name == "sse2.sub.sd" || // Added in 4.0
156 Name == "sse.mul.ss" || // Added in 4.0
157 Name == "sse2.mul.sd" || // Added in 4.0
158 Name == "sse.div.ss" || // Added in 4.0
159 Name == "sse2.div.sd" || // Added in 4.0
160 Name == "sse41.pmaxsb" || // Added in 3.9
161 Name == "sse2.pmaxs.w" || // Added in 3.9
162 Name == "sse41.pmaxsd" || // Added in 3.9
163 Name == "sse2.pmaxu.b" || // Added in 3.9
164 Name == "sse41.pmaxuw" || // Added in 3.9
165 Name == "sse41.pmaxud" || // Added in 3.9
166 Name == "sse41.pminsb" || // Added in 3.9
167 Name == "sse2.pmins.w" || // Added in 3.9
168 Name == "sse41.pminsd" || // Added in 3.9
169 Name == "sse2.pminu.b" || // Added in 3.9
170 Name == "sse41.pminuw" || // Added in 3.9
171 Name == "sse41.pminud" || // Added in 3.9
172 Name == "avx512.kand.w" || // Added in 7.0
173 Name == "avx512.kandn.w" || // Added in 7.0
174 Name == "avx512.knot.w" || // Added in 7.0
175 Name == "avx512.kor.w" || // Added in 7.0
176 Name == "avx512.kxor.w" || // Added in 7.0
177 Name == "avx512.kxnor.w" || // Added in 7.0
178 Name == "avx512.kortestc.w" || // Added in 7.0
179 Name == "avx512.kortestz.w" || // Added in 7.0
180 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
181 Name.startswith("avx2.pmax") || // Added in 3.9
182 Name.startswith("avx2.pmin") || // Added in 3.9
183 Name.startswith("avx512.mask.pmax") || // Added in 4.0
184 Name.startswith("avx512.mask.pmin") || // Added in 4.0
185 Name.startswith("avx2.vbroadcast") || // Added in 3.8
186 Name.startswith("avx2.pbroadcast") || // Added in 3.8
187 Name.startswith("avx.vpermil.") || // Added in 3.1
188 Name.startswith("sse2.pshuf") || // Added in 3.9
189 Name.startswith("avx512.pbroadcast") || // Added in 3.9
190 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
191 Name.startswith("avx512.mask.movddup") || // Added in 3.9
192 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
193 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
194 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
195 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
196 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
197 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
198 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
199 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
200 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
201 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
202 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
203 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
204 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
205 Name.startswith("avx512.mask.pand.") || // Added in 3.9
206 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
207 Name.startswith("avx512.mask.por.") || // Added in 3.9
208 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
209 Name.startswith("avx512.mask.and.") || // Added in 3.9
210 Name.startswith("avx512.mask.andn.") || // Added in 3.9
211 Name.startswith("avx512.mask.or.") || // Added in 3.9
212 Name.startswith("avx512.mask.xor.") || // Added in 3.9
213 Name.startswith("avx512.mask.padd.") || // Added in 4.0
214 Name.startswith("avx512.mask.psub.") || // Added in 4.0
215 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
216 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
217 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
218 Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
219 Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
220 Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
221 Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
222 Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
223 Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
224 Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
225 Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
226 Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
227 Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
228 Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
229 Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
230 Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
231 Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
232 Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
233 Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
234 Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
235 Name == "avx512.cvtusi2sd" || // Added in 7.0
236 Name.startswith("avx512.mask.permvar.") || // Added in 7.0
237 Name == "sse2.pmulu.dq" || // Added in 7.0
238 Name == "sse41.pmuldq" || // Added in 7.0
239 Name == "avx2.pmulu.dq" || // Added in 7.0
240 Name == "avx2.pmul.dq" || // Added in 7.0
241 Name == "avx512.pmulu.dq.512" || // Added in 7.0
242 Name == "avx512.pmul.dq.512" || // Added in 7.0
243 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
244 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
245 Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
246 Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
247 Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
248 Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
249 Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
250 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
251 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
252 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
253 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
254 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
255 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
256 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
257 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
258 Name.startswith("avx512.cmp.p") || // Added in 12.0
259 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
260 Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
261 Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
262 Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
263 Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
264 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
265 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
266 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
267 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
268 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
269 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
270 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
271 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
272 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
273 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
274 Name.startswith("avx512.mask.pslli") || // Added in 4.0
275 Name.startswith("avx512.mask.psrai") || // Added in 4.0
276 Name.startswith("avx512.mask.psrli") || // Added in 4.0
277 Name.startswith("avx512.mask.psllv") || // Added in 4.0
278 Name.startswith("avx512.mask.psrav") || // Added in 4.0
279 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
280 Name.startswith("sse41.pmovsx") || // Added in 3.8
281 Name.startswith("sse41.pmovzx") || // Added in 3.9
282 Name.startswith("avx2.pmovsx") || // Added in 3.9
283 Name.startswith("avx2.pmovzx") || // Added in 3.9
284 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
285 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
286 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
287 Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
288 Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
289 Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
290 Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
291 Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
292 Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
293 Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
294 Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
295 Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
296 Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
297 Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
298 Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
299 Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
300 Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
301 Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
302 Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
303 Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
304 Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
305 Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
306 Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
307 Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
308 Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
309 Name.startswith("avx512.vpshld.") || // Added in 8.0
310 Name.startswith("avx512.vpshrd.") || // Added in 8.0
311 Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
312 Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
313 Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
314 Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
315 Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
316 Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
317 Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
318 Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
319 Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
320 Name.startswith("avx512.mask.conflict.") || // Added in 9.0
321 Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
322 Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
323 Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
324 Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
325 Name == "sse.cvtsi2ss" || // Added in 7.0
326 Name == "sse.cvtsi642ss" || // Added in 7.0
327 Name == "sse2.cvtsi2sd" || // Added in 7.0
328 Name == "sse2.cvtsi642sd" || // Added in 7.0
329 Name == "sse2.cvtss2sd" || // Added in 7.0
330 Name == "sse2.cvtdq2pd" || // Added in 3.9
331 Name == "sse2.cvtdq2ps" || // Added in 7.0
332 Name == "sse2.cvtps2pd" || // Added in 3.9
333 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
334 Name == "avx.cvtdq2.ps.256" || // Added in 7.0
335 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
336 Name.startswith("vcvtph2ps.") || // Added in 11.0
337 Name.startswith("avx.vinsertf128.") || // Added in 3.7
338 Name == "avx2.vinserti128" || // Added in 3.7
339 Name.startswith("avx512.mask.insert") || // Added in 4.0
340 Name.startswith("avx.vextractf128.") || // Added in 3.7
341 Name == "avx2.vextracti128" || // Added in 3.7
342 Name.startswith("avx512.mask.vextract") || // Added in 4.0
343 Name.startswith("sse4a.movnt.") || // Added in 3.9
344 Name.startswith("avx.movnt.") || // Added in 3.2
345 Name.startswith("avx512.storent.") || // Added in 3.9
346 Name == "sse41.movntdqa" || // Added in 5.0
347 Name == "avx2.movntdqa" || // Added in 5.0
348 Name == "avx512.movntdqa" || // Added in 5.0
349 Name == "sse2.storel.dq" || // Added in 3.9
350 Name.startswith("sse.storeu.") || // Added in 3.9
351 Name.startswith("sse2.storeu.") || // Added in 3.9
352 Name.startswith("avx.storeu.") || // Added in 3.9
353 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
354 Name.startswith("avx512.mask.store.p") || // Added in 3.9
355 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
356 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
357 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
358 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
359 Name == "avx512.mask.store.ss" || // Added in 7.0
360 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
361 Name.startswith("avx512.mask.load.") || // Added in 3.9
362 Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
363 Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
364 Name.startswith("avx512.mask.expand.b") || // Added in 9.0
365 Name.startswith("avx512.mask.expand.w") || // Added in 9.0
366 Name.startswith("avx512.mask.expand.d") || // Added in 9.0
367 Name.startswith("avx512.mask.expand.q") || // Added in 9.0
368 Name.startswith("avx512.mask.expand.p") || // Added in 9.0
369 Name.startswith("avx512.mask.compress.b") || // Added in 9.0
370 Name.startswith("avx512.mask.compress.w") || // Added in 9.0
371 Name.startswith("avx512.mask.compress.d") || // Added in 9.0
372 Name.startswith("avx512.mask.compress.q") || // Added in 9.0
373 Name.startswith("avx512.mask.compress.p") || // Added in 9.0
374 Name == "sse42.crc32.64.8" || // Added in 3.4
375 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
376 Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
377 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
378 Name.startswith("avx512.mask.valign.") || // Added in 4.0
379 Name.startswith("sse2.psll.dq") || // Added in 3.7
380 Name.startswith("sse2.psrl.dq") || // Added in 3.7
381 Name.startswith("avx2.psll.dq") || // Added in 3.7
382 Name.startswith("avx2.psrl.dq") || // Added in 3.7
383 Name.startswith("avx512.psll.dq") || // Added in 3.9
384 Name.startswith("avx512.psrl.dq") || // Added in 3.9
385 Name == "sse41.pblendw" || // Added in 3.7
386 Name.startswith("sse41.blendp") || // Added in 3.7
387 Name.startswith("avx.blend.p") || // Added in 3.7
388 Name == "avx2.pblendw" || // Added in 3.7
389 Name.startswith("avx2.pblendd.") || // Added in 3.7
390 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
391 Name == "avx2.vbroadcasti128" || // Added in 3.7
392 Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
393 Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
394 Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
395 Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
396 Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
397 Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
398 Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
399 Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
400 Name == "xop.vpcmov" || // Added in 3.8
401 Name == "xop.vpcmov.256" || // Added in 5.0
402 Name.startswith("avx512.mask.move.s") || // Added in 4.0
403 Name.startswith("avx512.cvtmask2") || // Added in 5.0
404 Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
405 Name.startswith("xop.vprot") || // Added in 8.0
406 Name.startswith("avx512.prol") || // Added in 8.0
407 Name.startswith("avx512.pror") || // Added in 8.0
408 Name.startswith("avx512.mask.prorv.") || // Added in 8.0
409 Name.startswith("avx512.mask.pror.") || // Added in 8.0
410 Name.startswith("avx512.mask.prolv.") || // Added in 8.0
411 Name.startswith("avx512.mask.prol.") || // Added in 8.0
412 Name.startswith("avx512.ptestm") || //Added in 6.0
413 Name.startswith("avx512.ptestnm") || //Added in 6.0
414 Name.startswith("avx512.mask.pavg")) // Added in 6.0
415 return true;
416
417 return false;
418}
419
420static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
421 Function *&NewFn) {
422 // Only handle intrinsics that start with "x86.".
423 if (!Name.startswith("x86."))
424 return false;
425 // Remove "x86." prefix.
426 Name = Name.substr(4);
427
428 if (ShouldUpgradeX86Intrinsic(F, Name)) {
429 NewFn = nullptr;
430 return true;
431 }
432
433 if (Name == "rdtscp") { // Added in 8.0
434 // If this intrinsic has 0 operands, it's the new version.
435 if (F->getFunctionType()->getNumParams() == 0)
436 return false;
437
438 rename(F);
439 NewFn = Intrinsic::getDeclaration(F->getParent(),
440 Intrinsic::x86_rdtscp);
441 return true;
442 }
443
444 // SSE4.1 ptest functions may have an old signature.
445 if (Name.startswith("sse41.ptest")) { // Added in 3.2
446 if (Name.substr(11) == "c")
447 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
448 if (Name.substr(11) == "z")
449 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
450 if (Name.substr(11) == "nzc")
451 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
452 }
453 // Several blend and other instructions with masks used the wrong number of
454 // bits.
455 if (Name == "sse41.insertps") // Added in 3.6
456 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
457 NewFn);
458 if (Name == "sse41.dppd") // Added in 3.6
459 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
460 NewFn);
461 if (Name == "sse41.dpps") // Added in 3.6
462 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
463 NewFn);
464 if (Name == "sse41.mpsadbw") // Added in 3.6
465 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
466 NewFn);
467 if (Name == "avx.dp.ps.256") // Added in 3.6
468 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
469 NewFn);
470 if (Name == "avx2.mpsadbw") // Added in 3.6
471 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
472 NewFn);
473 if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
474 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
475 NewFn);
476 if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
477 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
478 NewFn);
479 if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
480 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
481 NewFn);
482 if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
483 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
484 NewFn);
485 if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
486 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
487 NewFn);
488 if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
489 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
490 NewFn);
491
492 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
493 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
494 rename(F);
495 NewFn = Intrinsic::getDeclaration(F->getParent(),
496 Intrinsic::x86_xop_vfrcz_ss);
497 return true;
498 }
499 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
500 rename(F);
501 NewFn = Intrinsic::getDeclaration(F->getParent(),
502 Intrinsic::x86_xop_vfrcz_sd);
503 return true;
504 }
505 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
506 if (Name.startswith("xop.vpermil2")) { // Added in 3.9
507 auto Idx = F->getFunctionType()->getParamType(2);
508 if (Idx->isFPOrFPVectorTy()) {
509 rename(F);
510 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
511 unsigned EltSize = Idx->getScalarSizeInBits();
512 Intrinsic::ID Permil2ID;
513 if (EltSize == 64 && IdxSize == 128)
514 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
515 else if (EltSize == 32 && IdxSize == 128)
516 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
517 else if (EltSize == 64 && IdxSize == 256)
518 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
519 else
520 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
521 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
522 return true;
523 }
524 }
525
526 if (Name == "seh.recoverfp") {
527 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
528 return true;
529 }
530
531 return false;
532}
533
534static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
535 assert(F && "Illegal to upgrade a non-existent Function.")(static_cast <bool> (F && "Illegal to upgrade a non-existent Function."
) ? void (0) : __assert_fail ("F && \"Illegal to upgrade a non-existent Function.\""
, "llvm/lib/IR/AutoUpgrade.cpp", 535, __extension__ __PRETTY_FUNCTION__
))
;
536
537 // Quickly eliminate it, if it's not a candidate.
538 StringRef Name = F->getName();
539 if (Name.size() <= 8 || !Name.startswith("llvm."))
540 return false;
541 Name = Name.substr(5); // Strip off "llvm."
542
543 switch (Name[0]) {
544 default: break;
545 case 'a': {
546 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
547 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
548 F->arg_begin()->getType());
549 return true;
550 }
551 if (Name.startswith("aarch64.neon.frintn")) {
552 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
553 F->arg_begin()->getType());
554 return true;
555 }
556 if (Name.startswith("aarch64.neon.rbit")) {
557 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
558 F->arg_begin()->getType());
559 return true;
560 }
561 if (Name.startswith("arm.neon.vclz")) {
562 Type* args[2] = {
563 F->arg_begin()->getType(),
564 Type::getInt1Ty(F->getContext())
565 };
566 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
567 // the end of the name. Change name from llvm.arm.neon.vclz.* to
568 // llvm.ctlz.*
569 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
570 NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
571 "llvm.ctlz." + Name.substr(14), F->getParent());
572 return true;
573 }
574 if (Name.startswith("arm.neon.vcnt")) {
575 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
576 F->arg_begin()->getType());
577 return true;
578 }
579 static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
580 if (vstRegex.match(Name)) {
581 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
582 Intrinsic::arm_neon_vst2,
583 Intrinsic::arm_neon_vst3,
584 Intrinsic::arm_neon_vst4};
585
586 static const Intrinsic::ID StoreLaneInts[] = {
587 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
588 Intrinsic::arm_neon_vst4lane
589 };
590
591 auto fArgs = F->getFunctionType()->params();
592 Type *Tys[] = {fArgs[0], fArgs[1]};
593 if (!Name.contains("lane"))
594 NewFn = Intrinsic::getDeclaration(F->getParent(),
595 StoreInts[fArgs.size() - 3], Tys);
596 else
597 NewFn = Intrinsic::getDeclaration(F->getParent(),
598 StoreLaneInts[fArgs.size() - 5], Tys);
599 return true;
600 }
601 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
602 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
603 return true;
604 }
605 if (Name.startswith("arm.neon.vqadds.")) {
606 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
607 F->arg_begin()->getType());
608 return true;
609 }
610 if (Name.startswith("arm.neon.vqaddu.")) {
611 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
612 F->arg_begin()->getType());
613 return true;
614 }
615 if (Name.startswith("arm.neon.vqsubs.")) {
616 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
617 F->arg_begin()->getType());
618 return true;
619 }
620 if (Name.startswith("arm.neon.vqsubu.")) {
621 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
622 F->arg_begin()->getType());
623 return true;
624 }
625 if (Name.startswith("aarch64.neon.addp")) {
626 if (F->arg_size() != 2)
627 break; // Invalid IR.
628 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
629 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
630 NewFn = Intrinsic::getDeclaration(F->getParent(),
631 Intrinsic::aarch64_neon_faddp, Ty);
632 return true;
633 }
634 }
635
636 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
637 // respectively
638 if ((Name.startswith("arm.neon.bfdot.") ||
639 Name.startswith("aarch64.neon.bfdot.")) &&
640 Name.endswith("i8")) {
641 Intrinsic::ID IID =
642 StringSwitch<Intrinsic::ID>(Name)
643 .Cases("arm.neon.bfdot.v2f32.v8i8",
644 "arm.neon.bfdot.v4f32.v16i8",
645 Intrinsic::arm_neon_bfdot)
646 .Cases("aarch64.neon.bfdot.v2f32.v8i8",
647 "aarch64.neon.bfdot.v4f32.v16i8",
648 Intrinsic::aarch64_neon_bfdot)
649 .Default(Intrinsic::not_intrinsic);
650 if (IID == Intrinsic::not_intrinsic)
651 break;
652
653 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
654 assert((OperandWidth == 64 || OperandWidth == 128) &&(static_cast <bool> ((OperandWidth == 64 || OperandWidth
== 128) && "Unexpected operand width") ? void (0) : __assert_fail
("(OperandWidth == 64 || OperandWidth == 128) && \"Unexpected operand width\""
, "llvm/lib/IR/AutoUpgrade.cpp", 655, __extension__ __PRETTY_FUNCTION__
))
655 "Unexpected operand width")(static_cast <bool> ((OperandWidth == 64 || OperandWidth
== 128) && "Unexpected operand width") ? void (0) : __assert_fail
("(OperandWidth == 64 || OperandWidth == 128) && \"Unexpected operand width\""
, "llvm/lib/IR/AutoUpgrade.cpp", 655, __extension__ __PRETTY_FUNCTION__
))
;
656 LLVMContext &Ctx = F->getParent()->getContext();
657 std::array<Type *, 2> Tys {{
658 F->getReturnType(),
659 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
660 }};
661 NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
662 return true;
663 }
664
665 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
666 // and accept v8bf16 instead of v16i8
667 if ((Name.startswith("arm.neon.bfm") ||
668 Name.startswith("aarch64.neon.bfm")) &&
669 Name.endswith(".v4f32.v16i8")) {
670 Intrinsic::ID IID =
671 StringSwitch<Intrinsic::ID>(Name)
672 .Case("arm.neon.bfmmla.v4f32.v16i8",
673 Intrinsic::arm_neon_bfmmla)
674 .Case("arm.neon.bfmlalb.v4f32.v16i8",
675 Intrinsic::arm_neon_bfmlalb)
676 .Case("arm.neon.bfmlalt.v4f32.v16i8",
677 Intrinsic::arm_neon_bfmlalt)
678 .Case("aarch64.neon.bfmmla.v4f32.v16i8",
679 Intrinsic::aarch64_neon_bfmmla)
680 .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
681 Intrinsic::aarch64_neon_bfmlalb)
682 .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
683 Intrinsic::aarch64_neon_bfmlalt)
684 .Default(Intrinsic::not_intrinsic);
685 if (IID == Intrinsic::not_intrinsic)
686 break;
687
688 std::array<Type *, 0> Tys;
689 NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
690 return true;
691 }
692
693 if (Name == "arm.mve.vctp64" &&
694 cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
695 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename the
696 // function and deal with it below in UpgradeIntrinsicCall.
697 rename(F);
698 return true;
699 }
700 // These too are changed to accept a v2i1 insteead of the old v4i1.
701 if (Name == "arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
702 Name == "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
703 Name == "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
704 Name == "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
705 Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
706 Name == "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
707 Name == "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
708 Name == "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
709 Name == "arm.cde.vcx1q.predicated.v2i64.v4i1" ||
710 Name == "arm.cde.vcx1qa.predicated.v2i64.v4i1" ||
711 Name == "arm.cde.vcx2q.predicated.v2i64.v4i1" ||
712 Name == "arm.cde.vcx2qa.predicated.v2i64.v4i1" ||
713 Name == "arm.cde.vcx3q.predicated.v2i64.v4i1" ||
714 Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
715 return true;
716
717 if (Name == "amdgcn.alignbit") {
718 // Target specific intrinsic became redundant
719 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
720 {F->getReturnType()});
721 return true;
722 }
723
724 break;
725 }
726
727 case 'c': {
728 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
729 rename(F);
730 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
731 F->arg_begin()->getType());
732 return true;
733 }
734 if (Name.startswith("cttz.") && F->arg_size() == 1) {
735 rename(F);
736 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
737 F->arg_begin()->getType());
738 return true;
739 }
740 break;
741 }
742 case 'd': {
743 if (Name == "dbg.value" && F->arg_size() == 4) {
744 rename(F);
745 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
746 return true;
747 }
748 break;
749 }
750 case 'e': {
751 SmallVector<StringRef, 2> Groups;
752 static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
753 if (R.match(Name, &Groups)) {
754 Intrinsic::ID ID;
755 ID = StringSwitch<Intrinsic::ID>(Groups[1])
756 .Case("add", Intrinsic::vector_reduce_add)
757 .Case("mul", Intrinsic::vector_reduce_mul)
758 .Case("and", Intrinsic::vector_reduce_and)
759 .Case("or", Intrinsic::vector_reduce_or)
760 .Case("xor", Intrinsic::vector_reduce_xor)
761 .Case("smax", Intrinsic::vector_reduce_smax)
762 .Case("smin", Intrinsic::vector_reduce_smin)
763 .Case("umax", Intrinsic::vector_reduce_umax)
764 .Case("umin", Intrinsic::vector_reduce_umin)
765 .Case("fmax", Intrinsic::vector_reduce_fmax)
766 .Case("fmin", Intrinsic::vector_reduce_fmin)
767 .Default(Intrinsic::not_intrinsic);
768 if (ID != Intrinsic::not_intrinsic) {
769 rename(F);
770 auto Args = F->getFunctionType()->params();
771 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
772 return true;
773 }
774 }
775 static const Regex R2(
776 "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
777 Groups.clear();
778 if (R2.match(Name, &Groups)) {
779 Intrinsic::ID ID = Intrinsic::not_intrinsic;
780 if (Groups[1] == "fadd")
781 ID = Intrinsic::vector_reduce_fadd;
782 if (Groups[1] == "fmul")
783 ID = Intrinsic::vector_reduce_fmul;
784 if (ID != Intrinsic::not_intrinsic) {
785 rename(F);
786 auto Args = F->getFunctionType()->params();
787 Type *Tys[] = {Args[1]};
788 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
789 return true;
790 }
791 }
792 break;
793 }
794 case 'i':
795 case 'l': {
796 bool IsLifetimeStart = Name.startswith("lifetime.start");
797 if (IsLifetimeStart || Name.startswith("invariant.start")) {
798 Intrinsic::ID ID = IsLifetimeStart ?
799 Intrinsic::lifetime_start : Intrinsic::invariant_start;
800 auto Args = F->getFunctionType()->params();
801 Type* ObjectPtr[1] = {Args[1]};
802 if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
803 rename(F);
804 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
805 return true;
806 }
807 }
808
809 bool IsLifetimeEnd = Name.startswith("lifetime.end");
810 if (IsLifetimeEnd || Name.startswith("invariant.end")) {
811 Intrinsic::ID ID = IsLifetimeEnd ?
812 Intrinsic::lifetime_end : Intrinsic::invariant_end;
813
814 auto Args = F->getFunctionType()->params();
815 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
816 if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
817 rename(F);
818 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
819 return true;
820 }
821 }
822 if (Name.startswith("invariant.group.barrier")) {
823 // Rename invariant.group.barrier to launder.invariant.group
824 auto Args = F->getFunctionType()->params();
825 Type* ObjectPtr[1] = {Args[0]};
826 rename(F);
827 NewFn = Intrinsic::getDeclaration(F->getParent(),
828 Intrinsic::launder_invariant_group, ObjectPtr);
829 return true;
830
831 }
832
833 break;
834 }
835 case 'm': {
836 if (Name.startswith("masked.load.")) {
837 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
838 if (F->getName() !=
839 Intrinsic::getName(Intrinsic::masked_load, Tys, F->getParent())) {
840 rename(F);
841 NewFn = Intrinsic::getDeclaration(F->getParent(),
842 Intrinsic::masked_load,
843 Tys);
844 return true;
845 }
846 }
847 if (Name.startswith("masked.store.")) {
848 auto Args = F->getFunctionType()->params();
849 Type *Tys[] = { Args[0], Args[1] };
850 if (F->getName() !=
851 Intrinsic::getName(Intrinsic::masked_store, Tys, F->getParent())) {
852 rename(F);
853 NewFn = Intrinsic::getDeclaration(F->getParent(),
854 Intrinsic::masked_store,
855 Tys);
856 return true;
857 }
858 }
859 // Renaming gather/scatter intrinsics with no address space overloading
860 // to the new overload which includes an address space
861 if (Name.startswith("masked.gather.")) {
862 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
863 if (F->getName() !=
864 Intrinsic::getName(Intrinsic::masked_gather, Tys, F->getParent())) {
865 rename(F);
866 NewFn = Intrinsic::getDeclaration(F->getParent(),
867 Intrinsic::masked_gather, Tys);
868 return true;
869 }
870 }
871 if (Name.startswith("masked.scatter.")) {
872 auto Args = F->getFunctionType()->params();
873 Type *Tys[] = {Args[0], Args[1]};
874 if (F->getName() !=
875 Intrinsic::getName(Intrinsic::masked_scatter, Tys, F->getParent())) {
876 rename(F);
877 NewFn = Intrinsic::getDeclaration(F->getParent(),
878 Intrinsic::masked_scatter, Tys);
879 return true;
880 }
881 }
882 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
883 // alignment parameter to embedding the alignment as an attribute of
884 // the pointer args.
885 if (Name.startswith("memcpy.") && F->arg_size() == 5) {
886 rename(F);
887 // Get the types of dest, src, and len
888 ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
889 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
890 ParamTypes);
891 return true;
892 }
893 if (Name.startswith("memmove.") && F->arg_size() == 5) {
894 rename(F);
895 // Get the types of dest, src, and len
896 ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
897 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
898 ParamTypes);
899 return true;
900 }
901 if (Name.startswith("memset.") && F->arg_size() == 5) {
902 rename(F);
903 // Get the types of dest, and len
904 const auto *FT = F->getFunctionType();
905 Type *ParamTypes[2] = {
906 FT->getParamType(0), // Dest
907 FT->getParamType(2) // len
908 };
909 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
910 ParamTypes);
911 return true;
912 }
913 break;
914 }
915 case 'n': {
916 if (Name.startswith("nvvm.")) {
917 Name = Name.substr(5);
918
919 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
920 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
921 .Cases("brev32", "brev64", Intrinsic::bitreverse)
922 .Case("clz.i", Intrinsic::ctlz)
923 .Case("popc.i", Intrinsic::ctpop)
924 .Default(Intrinsic::not_intrinsic);
925 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
926 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
927 {F->getReturnType()});
928 return true;
929 }
930
931 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
932 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
933 //
934 // TODO: We could add lohi.i2d.
935 bool Expand = StringSwitch<bool>(Name)
936 .Cases("abs.i", "abs.ll", true)
937 .Cases("clz.ll", "popc.ll", "h2f", true)
938 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
939 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
940 .StartsWith("atomic.load.add.f32.p", true)
941 .StartsWith("atomic.load.add.f64.p", true)
942 .Default(false);
943 if (Expand) {
944 NewFn = nullptr;
945 return true;
946 }
947 }
948 break;
949 }
950 case 'o':
951 // We only need to change the name to match the mangling including the
952 // address space.
953 if (Name.startswith("objectsize.")) {
954 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
955 if (F->arg_size() == 2 || F->arg_size() == 3 ||
956 F->getName() !=
957 Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
958 rename(F);
959 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
960 Tys);
961 return true;
962 }
963 }
964 break;
965
966 case 'p':
967 if (Name == "prefetch") {
968 // Handle address space overloading.
969 Type *Tys[] = {F->arg_begin()->getType()};
970 if (F->getName() !=
971 Intrinsic::getName(Intrinsic::prefetch, Tys, F->getParent())) {
972 rename(F);
973 NewFn =
974 Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
975 return true;
976 }
977 } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
978 rename(F);
979 NewFn = Intrinsic::getDeclaration(F->getParent(),
980 Intrinsic::ptr_annotation,
981 F->arg_begin()->getType());
982 return true;
983 }
984 break;
985
986 case 's':
987 if (Name == "stackprotectorcheck") {
988 NewFn = nullptr;
989 return true;
990 }
991 break;
992
993 case 'v': {
994 if (Name == "var.annotation" && F->arg_size() == 4) {
995 rename(F);
996 NewFn = Intrinsic::getDeclaration(F->getParent(),
997 Intrinsic::var_annotation);
998 return true;
999 }
1000 break;
1001 }
1002
1003 case 'x':
1004 if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
1005 return true;
1006 }
1007
1008 auto *ST = dyn_cast<StructType>(F->getReturnType());
1009 if (ST && (!ST->isLiteral() || ST->isPacked())) {
1010 // Replace return type with literal non-packed struct. Only do this for
1011 // intrinsics declared to return a struct, not for intrinsics with
1012 // overloaded return type, in which case the exact struct type will be
1013 // mangled into the name.
1014 SmallVector<Intrinsic::IITDescriptor> Desc;
1015 Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
1016 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1017 auto *FT = F->getFunctionType();
1018 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1019 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1020 std::string Name = F->getName().str();
1021 rename(F);
1022 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1023 Name, F->getParent());
1024
1025 // The new function may also need remangling.
1026 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F))
1027 NewFn = *Result;
1028 return true;
1029 }
1030 }
1031
1032 // Remangle our intrinsic since we upgrade the mangling
1033 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1034 if (Result != None) {
1035 NewFn = Result.getValue();
1036 return true;
1037 }
1038
1039 // This may not belong here. This function is effectively being overloaded
1040 // to both detect an intrinsic which needs upgrading, and to provide the
1041 // upgraded form of the intrinsic. We should perhaps have two separate
1042 // functions for this.
1043 return false;
1044}
1045
1046bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
1047 NewFn = nullptr;
1048 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
1049 assert(F != NewFn && "Intrinsic function upgraded to the same function")(static_cast <bool> (F != NewFn && "Intrinsic function upgraded to the same function"
) ? void (0) : __assert_fail ("F != NewFn && \"Intrinsic function upgraded to the same function\""
, "llvm/lib/IR/AutoUpgrade.cpp", 1049, __extension__ __PRETTY_FUNCTION__
))
;
1050
1051 // Upgrade intrinsic attributes. This does not change the function.
1052 if (NewFn)
1053 F = NewFn;
1054 if (Intrinsic::ID id = F->getIntrinsicID())
1055 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1056 return Upgraded;
1057}
1058
1059GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1060 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1061 GV->getName() == "llvm.global_dtors")) ||
1062 !GV->hasInitializer())
1063 return nullptr;
1064 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1065 if (!ATy)
1066 return nullptr;
1067 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1068 if (!STy || STy->getNumElements() != 2)
1069 return nullptr;
1070
1071 LLVMContext &C = GV->getContext();
1072 IRBuilder<> IRB(C);
1073 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1074 IRB.getInt8PtrTy());
1075 Constant *Init = GV->getInitializer();
1076 unsigned N = Init->getNumOperands();
1077 std::vector<Constant *> NewCtors(N);
1078 for (unsigned i = 0; i != N; ++i) {
1079 auto Ctor = cast<Constant>(Init->getOperand(i));
1080 NewCtors[i] = ConstantStruct::get(
1081 EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
1082 Constant::getNullValue(IRB.getInt8PtrTy()));
1083 }
1084 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1085
1086 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1087 NewInit, GV->getName());
1088}
1089
1090// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1091// to byte shuffles.
1092static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
1093 Value *Op, unsigned Shift) {
1094 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1095 unsigned NumElts = ResultTy->getNumElements() * 8;
1096
1097 // Bitcast from a 64-bit element type to a byte element type.
1098 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1099 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1100
1101 // We'll be shuffling in zeroes.
1102 Value *Res = Constant::getNullValue(VecTy);
1103
1104 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1105 // we'll just return the zero vector.
1106 if (Shift < 16) {
1107 int Idxs[64];
1108 // 256/512-bit version is split into 2/4 16-byte lanes.
1109 for (unsigned l = 0; l != NumElts; l += 16)
1110 for (unsigned i = 0; i != 16; ++i) {
1111 unsigned Idx = NumElts + i - Shift;
1112 if (Idx < NumElts)
1113 Idx -= NumElts - 16; // end of lane, switch operand.
1114 Idxs[l + i] = Idx + l;
1115 }
1116
1117 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
1118 }
1119
1120 // Bitcast back to a 64-bit element type.
1121 return Builder.CreateBitCast(Res, ResultTy, "cast");
1122}
1123
1124// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1125// to byte shuffles.
1126static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1127 unsigned Shift) {
1128 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1129 unsigned NumElts = ResultTy->getNumElements() * 8;
1130
1131 // Bitcast from a 64-bit element type to a byte element type.
1132 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1133 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1134
1135 // We'll be shuffling in zeroes.
1136 Value *Res = Constant::getNullValue(VecTy);
1137
1138 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1139 // we'll just return the zero vector.
1140 if (Shift < 16) {
1141 int Idxs[64];
1142 // 256/512-bit version is split into 2/4 16-byte lanes.
1143 for (unsigned l = 0; l != NumElts; l += 16)
1144 for (unsigned i = 0; i != 16; ++i) {
1145 unsigned Idx = i + Shift;
1146 if (Idx >= 16)
1147 Idx += NumElts - 16; // end of lane, switch operand.
1148 Idxs[l + i] = Idx + l;
1149 }
1150
1151 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
1152 }
1153
1154 // Bitcast back to a 64-bit element type.
1155 return Builder.CreateBitCast(Res, ResultTy, "cast");
1156}
1157
1158static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1159 unsigned NumElts) {
1160 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements")(static_cast <bool> (isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements"
) ? void (0) : __assert_fail ("isPowerOf2_32(NumElts) && \"Expected power-of-2 mask elements\""
, "llvm/lib/IR/AutoUpgrade.cpp", 1160, __extension__ __PRETTY_FUNCTION__
))
;
1161 llvm::VectorType *MaskTy = FixedVectorType::get(
1162 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1163 Mask = Builder.CreateBitCast(Mask, MaskTy);
1164
1165 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1166 // i8 and we need to extract down to the right number of elements.
1167 if (NumElts <= 4) {
1168 int Indices[4];
1169 for (unsigned i = 0; i != NumElts; ++i)
1170 Indices[i] = i;
1171 Mask = Builder.CreateShuffleVector(
1172 Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
1173 }
1174
1175 return Mask;
1176}
1177
1178static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
1179 Value *Op0, Value *Op1) {
1180 // If the mask is all ones just emit the first operation.
1181 if (const auto *C = dyn_cast<Constant>(Mask))
1182 if (C->isAllOnesValue())
1183 return Op0;
1184
1185 Mask = getX86MaskVec(Builder, Mask,
1186 cast<FixedVectorType>(Op0->getType())->getNumElements());
1187 return Builder.CreateSelect(Mask, Op0, Op1);
1188}
1189
1190static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
1191 Value *Op0, Value *Op1) {
1192 // If the mask is all ones just emit the first operation.
1193 if (const auto *C = dyn_cast<Constant>(Mask))
1194 if (C->isAllOnesValue())
1195 return Op0;
1196
1197 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1198 Mask->getType()->getIntegerBitWidth());
1199 Mask = Builder.CreateBitCast(Mask, MaskTy);
1200 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1201 return Builder.CreateSelect(Mask, Op0, Op1);
1202}
1203
1204// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1205// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1206// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1207static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1208 Value *Op1, Value *Shift,
1209 Value *Passthru, Value *Mask,
1210 bool IsVALIGN) {
1211 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1212
1213 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1214 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!")(static_cast <bool> ((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!") ? void (0) : __assert_fail (
"(IsVALIGN || NumElts % 16 == 0) && \"Illegal NumElts for PALIGNR!\""
, "llvm/lib/IR/AutoUpgrade.cpp", 1214, __extension__ __PRETTY_FUNCTION__
))
;
1215 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!")(static_cast <bool> ((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!") ? void (0) : __assert_fail (
"(!IsVALIGN || NumElts <= 16) && \"NumElts too large for VALIGN!\""
, "llvm/lib/IR/AutoUpgrade.cpp", 1215, __extension__ __PRETTY_FUNCTION__
))
;
1216 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!")(static_cast <bool> (isPowerOf2_32(NumElts) && "NumElts not a power of 2!"
) ? void (0) : __assert_fail ("isPowerOf2_32(NumElts) && \"NumElts not a power of 2!\""
, "llvm/lib/IR/AutoUpgrade.cpp", 1216, __extension__ __PRETTY_FUNCTION__
))
;
1217
1218 // Mask the immediate for VALIGN.
1219 if (IsVALIGN)
1220 ShiftVal &= (NumElts - 1);
1221
1222 // If palignr is shifting the pair of vectors more than the size of two
1223 // lanes, emit zero.
1224 if (ShiftVal >= 32)
1225 return llvm::Constant::getNullValue(Op0->getType());
1226
1227 // If palignr is shifting the pair of input vectors more than one lane,
1228 // but less than two lanes, convert to shifting in zeroes.
1229 if (ShiftVal > 16) {
1230 ShiftVal -= 16;
1231 Op1 = Op0;
1232 Op0 = llvm::Constant::getNullValue(Op0->getType());
1233 }
1234
1235 int Indices[64];
1236 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1237 for (unsigned l = 0; l < NumElts; l += 16) {
1238 for (unsigned i = 0; i != 16; ++i) {
1239 unsigned Idx = ShiftVal + i;
1240 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1241 Idx += NumElts - 16; // End of lane, switch operand.
1242 Indices[l + i] = Idx + l;
1243 }
1244 }
1245
1246 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1247 makeArrayRef(Indices, NumElts),
1248 "palignr");
1249
1250 return EmitX86Select(Builder, Mask, Align, Passthru);
1251}
1252
1253static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
1254 bool ZeroMask, bool IndexForm) {
1255 Type *Ty = CI.getType();
1256 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1257 unsigned EltWidth = Ty->getScalarSizeInBits();
1258 bool IsFloat = Ty->isFPOrFPVectorTy();
1259 Intrinsic::ID IID;
1260 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1261 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1262 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1263 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1264 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1265 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1266 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1267 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1268 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1269 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1270 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1271 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1272 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1273 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1274 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1275 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1276 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1277 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1278 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1279 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1280 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1281 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1282 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1283 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1284 else if (VecWidth == 128 && EltWidth == 16)
1285 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1286 else if (VecWidth == 256 && EltWidth == 16)
1287 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1288 else if (VecWidth == 512 && EltWidth == 16)
1289 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1290 else if (VecWidth == 128 && EltWidth == 8)
1291 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1292 else if (VecWidth == 256 && EltWidth == 8)
1293 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1294 else if (VecWidth == 512 && EltWidth == 8)
1295 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1296 else
1297 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1297)
;
1298
1299 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1300 CI.getArgOperand(2) };
1301
1302 // If this isn't index form we need to swap operand 0 and 1.
1303 if (!IndexForm)
1304 std::swap(Args[0], Args[1]);
1305
1306 Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1307 Args);
1308 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1309 : Builder.CreateBitCast(CI.getArgOperand(1),
1310 Ty);
1311 return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1312}
1313
1314static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
1315 Intrinsic::ID IID) {
1316 Type *Ty = CI.getType();
1317 Value *Op0 = CI.getOperand(0);
1318 Value *Op1 = CI.getOperand(1);
1319 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1320 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1321
1322 if (CI.arg_size() == 4) { // For masked intrinsics.
1323 Value *VecSrc = CI.getOperand(2);
1324 Value *Mask = CI.getOperand(3);
1325 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1326 }
1327 return Res;
1328}
1329
1330static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
1331 bool IsRotateRight) {
1332 Type *Ty = CI.getType();
1333 Value *Src = CI.getArgOperand(0);
1334 Value *Amt = CI.getArgOperand(1);
1335
1336 // Amount may be scalar immediate, in which case create a splat vector.
1337 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1338 // we only care about the lowest log2 bits anyway.
1339 if (Amt->getType() != Ty) {
1340 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1341 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1342 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1343 }
1344
1345 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1346 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1347 Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1348
1349 if (CI.arg_size() == 4) { // For masked intrinsics.
1350 Value *VecSrc = CI.getOperand(2);
1351 Value *Mask = CI.getOperand(3);
1352 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1353 }
1354 return Res;
1355}
1356
1357static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1358 bool IsSigned) {
1359 Type *Ty = CI.getType();
1360 Value *LHS = CI.getArgOperand(0);
1361 Value *RHS = CI.getArgOperand(1);
1362
1363 CmpInst::Predicate Pred;
1364 switch (Imm) {
1365 case 0x0:
1366 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1367 break;
1368 case 0x1:
1369 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1370 break;
1371 case 0x2:
1372 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1373 break;
1374 case 0x3:
1375 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1376 break;
1377 case 0x4:
1378 Pred = ICmpInst::ICMP_EQ;
1379 break;
1380 case 0x5:
1381 Pred = ICmpInst::ICMP_NE;
1382 break;
1383 case 0x6:
1384 return Constant::getNullValue(Ty); // FALSE
1385 case 0x7:
1386 return Constant::getAllOnesValue(Ty); // TRUE
1387 default:
1388 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate")::llvm::llvm_unreachable_internal("Unknown XOP vpcom/vpcomu predicate"
, "llvm/lib/IR/AutoUpgrade.cpp", 1388)
;
1389 }
1390
1391 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1392 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1393 return Ext;
1394}
1395
1396static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
1397 bool IsShiftRight, bool ZeroMask) {
1398 Type *Ty = CI.getType();
1399 Value *Op0 = CI.getArgOperand(0);
1400 Value *Op1 = CI.getArgOperand(1);
1401 Value *Amt = CI.getArgOperand(2);
1402
1403 if (IsShiftRight)
1404 std::swap(Op0, Op1);
1405
1406 // Amount may be scalar immediate, in which case create a splat vector.
1407 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1408 // we only care about the lowest log2 bits anyway.
1409 if (Amt->getType() != Ty) {
1410 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1411 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1412 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1413 }
1414
1415 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1416 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1417 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1418
1419 unsigned NumArgs = CI.arg_size();
1420 if (NumArgs >= 4) { // For masked intrinsics.
1421 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1422 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1423 CI.getArgOperand(0);
1424 Value *Mask = CI.getOperand(NumArgs - 1);
1425 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1426 }
1427 return Res;
1428}
1429
1430static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1431 Value *Ptr, Value *Data, Value *Mask,
1432 bool Aligned) {
1433 // Cast the pointer to the right type.
1434 Ptr = Builder.CreateBitCast(Ptr,
1435 llvm::PointerType::getUnqual(Data->getType()));
1436 const Align Alignment =
1437 Aligned
1438 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
1439 : Align(1);
1440
1441 // If the mask is all ones just emit a regular store.
1442 if (const auto *C = dyn_cast<Constant>(Mask))
1443 if (C->isAllOnesValue())
1444 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1445
1446 // Convert the mask from an integer type to a vector of i1.
1447 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1448 Mask = getX86MaskVec(Builder, Mask, NumElts);
1449 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1450}
1451
1452static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1453 Value *Ptr, Value *Passthru, Value *Mask,
1454 bool Aligned) {
1455 Type *ValTy = Passthru->getType();
1456 // Cast the pointer to the right type.
1457 Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1458 const Align Alignment =
1459 Aligned
1460 ? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
1461 8)
1462 : Align(1);
1463
1464 // If the mask is all ones just emit a regular store.
1465 if (const auto *C = dyn_cast<Constant>(Mask))
1466 if (C->isAllOnesValue())
1467 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1468
1469 // Convert the mask from an integer type to a vector of i1.
1470 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1471 Mask = getX86MaskVec(Builder, Mask, NumElts);
1472 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1473}
1474
1475static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1476 Type *Ty = CI.getType();
1477 Value *Op0 = CI.getArgOperand(0);
1478 Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1479 Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1480 if (CI.arg_size() == 3)
1481 Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1482 return Res;
1483}
1484
1485static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1486 Type *Ty = CI.getType();
1487
1488 // Arguments have a vXi32 type so cast to vXi64.
1489 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1490 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1491
1492 if (IsSigned) {
1493 // Shift left then arithmetic shift right.
1494 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1495 LHS = Builder.CreateShl(LHS, ShiftAmt);
1496 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1497 RHS = Builder.CreateShl(RHS, ShiftAmt);
1498 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1499 } else {
1500 // Clear the upper bits.
1501 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1502 LHS = Builder.CreateAnd(LHS, Mask);
1503 RHS = Builder.CreateAnd(RHS, Mask);
1504 }
1505
1506 Value *Res = Builder.CreateMul(LHS, RHS);
1507
1508 if (CI.arg_size() == 4)
1509 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1510
1511 return Res;
1512}
1513
1514// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1515static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1516 Value *Mask) {
1517 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
7
The object is a 'FixedVectorType'
8
'NumElts' initialized here
1518 if (Mask
8.1
'Mask' is null
) {
9
Taking false branch
1519 const auto *C = dyn_cast<Constant>(Mask);
1520 if (!C || !C->isAllOnesValue())
1521 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1522 }
1523
1524 if (NumElts < 8) {
10
Assuming 'NumElts' is < 8
11
Taking true branch
1525 int Indices[8];
1526 for (unsigned i = 0; i != NumElts; ++i)
12
Assuming 'i' is equal to 'NumElts'
13
Loop condition is false. Execution continues on line 1528
1527 Indices[i] = i;
1528 for (unsigned i = NumElts; i != 8; ++i)
14
Loop condition is true. Entering loop body
1529 Indices[i] = NumElts + i % NumElts;
15
Division by zero
1530 Vec = Builder.CreateShuffleVector(Vec,
1531 Constant::getNullValue(Vec->getType()),
1532 Indices);
1533 }
1534 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1535}
1536
1537static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
1538 unsigned CC, bool Signed) {
1539 Value *Op0 = CI.getArgOperand(0);
1540 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1
The object is a 'FixedVectorType'
1541
1542 Value *Cmp;
1543 if (CC == 3) {
2
Assuming 'CC' is not equal to 3
3
Taking false branch
1544 Cmp = Constant::getNullValue(
1545 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1546 } else if (CC == 7) {
4
Assuming 'CC' is equal to 7
5
Taking true branch
1547 Cmp = Constant::getAllOnesValue(
1548 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1549 } else {
1550 ICmpInst::Predicate Pred;
1551 switch (CC) {
1552 default: llvm_unreachable("Unknown condition code")::llvm::llvm_unreachable_internal("Unknown condition code", "llvm/lib/IR/AutoUpgrade.cpp"
, 1552)
;
1553 case 0: Pred = ICmpInst::ICMP_EQ; break;
1554 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1555 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1556 case 4: Pred = ICmpInst::ICMP_NE; break;
1557 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1558 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1559 }
1560 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1561 }
1562
1563 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1564
1565 return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
6
Calling 'ApplyX86MaskOn1BitsVec'
1566}
1567
1568// Replace a masked intrinsic with an older unmasked intrinsic.
1569static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
1570 Intrinsic::ID IID) {
1571 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1572 Value *Rep = Builder.CreateCall(Intrin,
1573 { CI.getArgOperand(0), CI.getArgOperand(1) });
1574 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1575}
1576
1577static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
1578 Value* A = CI.getArgOperand(0);
1579 Value* B = CI.getArgOperand(1);
1580 Value* Src = CI.getArgOperand(2);
1581 Value* Mask = CI.getArgOperand(3);
1582
1583 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1584 Value* Cmp = Builder.CreateIsNotNull(AndNode);
1585 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1586 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1587 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1588 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1589}
1590
1591
1592static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
1593 Value* Op = CI.getArgOperand(0);
1594 Type* ReturnOp = CI.getType();
1595 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1596 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1597 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1598}
1599
1600// Replace intrinsic with unmasked version and a select.
1601static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1602 CallBase &CI, Value *&Rep) {
1603 Name = Name.substr(12); // Remove avx512.mask.
1604
1605 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1606 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1607 Intrinsic::ID IID;
1608 if (Name.startswith("max.p")) {
1609 if (VecWidth == 128 && EltWidth == 32)
1610 IID = Intrinsic::x86_sse_max_ps;
1611 else if (VecWidth == 128 && EltWidth == 64)
1612 IID = Intrinsic::x86_sse2_max_pd;
1613 else if (VecWidth == 256 && EltWidth == 32)
1614 IID = Intrinsic::x86_avx_max_ps_256;
1615 else if (VecWidth == 256 && EltWidth == 64)
1616 IID = Intrinsic::x86_avx_max_pd_256;
1617 else
1618 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1618)
;
1619 } else if (Name.startswith("min.p")) {
1620 if (VecWidth == 128 && EltWidth == 32)
1621 IID = Intrinsic::x86_sse_min_ps;
1622 else if (VecWidth == 128 && EltWidth == 64)
1623 IID = Intrinsic::x86_sse2_min_pd;
1624 else if (VecWidth == 256 && EltWidth == 32)
1625 IID = Intrinsic::x86_avx_min_ps_256;
1626 else if (VecWidth == 256 && EltWidth == 64)
1627 IID = Intrinsic::x86_avx_min_pd_256;
1628 else
1629 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1629)
;
1630 } else if (Name.startswith("pshuf.b.")) {
1631 if (VecWidth == 128)
1632 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1633 else if (VecWidth == 256)
1634 IID = Intrinsic::x86_avx2_pshuf_b;
1635 else if (VecWidth == 512)
1636 IID = Intrinsic::x86_avx512_pshuf_b_512;
1637 else
1638 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1638)
;
1639 } else if (Name.startswith("pmul.hr.sw.")) {
1640 if (VecWidth == 128)
1641 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1642 else if (VecWidth == 256)
1643 IID = Intrinsic::x86_avx2_pmul_hr_sw;
1644 else if (VecWidth == 512)
1645 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1646 else
1647 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1647)
;
1648 } else if (Name.startswith("pmulh.w.")) {
1649 if (VecWidth == 128)
1650 IID = Intrinsic::x86_sse2_pmulh_w;
1651 else if (VecWidth == 256)
1652 IID = Intrinsic::x86_avx2_pmulh_w;
1653 else if (VecWidth == 512)
1654 IID = Intrinsic::x86_avx512_pmulh_w_512;
1655 else
1656 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1656)
;
1657 } else if (Name.startswith("pmulhu.w.")) {
1658 if (VecWidth == 128)
1659 IID = Intrinsic::x86_sse2_pmulhu_w;
1660 else if (VecWidth == 256)
1661 IID = Intrinsic::x86_avx2_pmulhu_w;
1662 else if (VecWidth == 512)
1663 IID = Intrinsic::x86_avx512_pmulhu_w_512;
1664 else
1665 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1665)
;
1666 } else if (Name.startswith("pmaddw.d.")) {
1667 if (VecWidth == 128)
1668 IID = Intrinsic::x86_sse2_pmadd_wd;
1669 else if (VecWidth == 256)
1670 IID = Intrinsic::x86_avx2_pmadd_wd;
1671 else if (VecWidth == 512)
1672 IID = Intrinsic::x86_avx512_pmaddw_d_512;
1673 else
1674 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1674)
;
1675 } else if (Name.startswith("pmaddubs.w.")) {
1676 if (VecWidth == 128)
1677 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1678 else if (VecWidth == 256)
1679 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1680 else if (VecWidth == 512)
1681 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1682 else
1683 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1683)
;
1684 } else if (Name.startswith("packsswb.")) {
1685 if (VecWidth == 128)
1686 IID = Intrinsic::x86_sse2_packsswb_128;
1687 else if (VecWidth == 256)
1688 IID = Intrinsic::x86_avx2_packsswb;
1689 else if (VecWidth == 512)
1690 IID = Intrinsic::x86_avx512_packsswb_512;
1691 else
1692 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1692)
;
1693 } else if (Name.startswith("packssdw.")) {
1694 if (VecWidth == 128)
1695 IID = Intrinsic::x86_sse2_packssdw_128;
1696 else if (VecWidth == 256)
1697 IID = Intrinsic::x86_avx2_packssdw;
1698 else if (VecWidth == 512)
1699 IID = Intrinsic::x86_avx512_packssdw_512;
1700 else
1701 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1701)
;
1702 } else if (Name.startswith("packuswb.")) {
1703 if (VecWidth == 128)
1704 IID = Intrinsic::x86_sse2_packuswb_128;
1705 else if (VecWidth == 256)
1706 IID = Intrinsic::x86_avx2_packuswb;
1707 else if (VecWidth == 512)
1708 IID = Intrinsic::x86_avx512_packuswb_512;
1709 else
1710 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1710)
;
1711 } else if (Name.startswith("packusdw.")) {
1712 if (VecWidth == 128)
1713 IID = Intrinsic::x86_sse41_packusdw;
1714 else if (VecWidth == 256)
1715 IID = Intrinsic::x86_avx2_packusdw;
1716 else if (VecWidth == 512)
1717 IID = Intrinsic::x86_avx512_packusdw_512;
1718 else
1719 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1719)
;
1720 } else if (Name.startswith("vpermilvar.")) {
1721 if (VecWidth == 128 && EltWidth == 32)
1722 IID = Intrinsic::x86_avx_vpermilvar_ps;
1723 else if (VecWidth == 128 && EltWidth == 64)
1724 IID = Intrinsic::x86_avx_vpermilvar_pd;
1725 else if (VecWidth == 256 && EltWidth == 32)
1726 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1727 else if (VecWidth == 256 && EltWidth == 64)
1728 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1729 else if (VecWidth == 512 && EltWidth == 32)
1730 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1731 else if (VecWidth == 512 && EltWidth == 64)
1732 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1733 else
1734 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1734)
;
1735 } else if (Name == "cvtpd2dq.256") {
1736 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1737 } else if (Name == "cvtpd2ps.256") {
1738 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1739 } else if (Name == "cvttpd2dq.256") {
1740 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1741 } else if (Name == "cvttps2dq.128") {
1742 IID = Intrinsic::x86_sse2_cvttps2dq;
1743 } else if (Name == "cvttps2dq.256") {
1744 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1745 } else if (Name.startswith("permvar.")) {
1746 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1747 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1748 IID = Intrinsic::x86_avx2_permps;
1749 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1750 IID = Intrinsic::x86_avx2_permd;
1751 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1752 IID = Intrinsic::x86_avx512_permvar_df_256;
1753 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1754 IID = Intrinsic::x86_avx512_permvar_di_256;
1755 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1756 IID = Intrinsic::x86_avx512_permvar_sf_512;
1757 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1758 IID = Intrinsic::x86_avx512_permvar_si_512;
1759 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1760 IID = Intrinsic::x86_avx512_permvar_df_512;
1761 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1762 IID = Intrinsic::x86_avx512_permvar_di_512;
1763 else if (VecWidth == 128 && EltWidth == 16)
1764 IID = Intrinsic::x86_avx512_permvar_hi_128;
1765 else if (VecWidth == 256 && EltWidth == 16)
1766 IID = Intrinsic::x86_avx512_permvar_hi_256;
1767 else if (VecWidth == 512 && EltWidth == 16)
1768 IID = Intrinsic::x86_avx512_permvar_hi_512;
1769 else if (VecWidth == 128 && EltWidth == 8)
1770 IID = Intrinsic::x86_avx512_permvar_qi_128;
1771 else if (VecWidth == 256 && EltWidth == 8)
1772 IID = Intrinsic::x86_avx512_permvar_qi_256;
1773 else if (VecWidth == 512 && EltWidth == 8)
1774 IID = Intrinsic::x86_avx512_permvar_qi_512;
1775 else
1776 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1776)
;
1777 } else if (Name.startswith("dbpsadbw.")) {
1778 if (VecWidth == 128)
1779 IID = Intrinsic::x86_avx512_dbpsadbw_128;
1780 else if (VecWidth == 256)
1781 IID = Intrinsic::x86_avx512_dbpsadbw_256;
1782 else if (VecWidth == 512)
1783 IID = Intrinsic::x86_avx512_dbpsadbw_512;
1784 else
1785 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1785)
;
1786 } else if (Name.startswith("pmultishift.qb.")) {
1787 if (VecWidth == 128)
1788 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1789 else if (VecWidth == 256)
1790 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1791 else if (VecWidth == 512)
1792 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1793 else
1794 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1794)
;
1795 } else if (Name.startswith("conflict.")) {
1796 if (Name[9] == 'd' && VecWidth == 128)
1797 IID = Intrinsic::x86_avx512_conflict_d_128;
1798 else if (Name[9] == 'd' && VecWidth == 256)
1799 IID = Intrinsic::x86_avx512_conflict_d_256;
1800 else if (Name[9] == 'd' && VecWidth == 512)
1801 IID = Intrinsic::x86_avx512_conflict_d_512;
1802 else if (Name[9] == 'q' && VecWidth == 128)
1803 IID = Intrinsic::x86_avx512_conflict_q_128;
1804 else if (Name[9] == 'q' && VecWidth == 256)
1805 IID = Intrinsic::x86_avx512_conflict_q_256;
1806 else if (Name[9] == 'q' && VecWidth == 512)
1807 IID = Intrinsic::x86_avx512_conflict_q_512;
1808 else
1809 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1809)
;
1810 } else if (Name.startswith("pavg.")) {
1811 if (Name[5] == 'b' && VecWidth == 128)
1812 IID = Intrinsic::x86_sse2_pavg_b;
1813 else if (Name[5] == 'b' && VecWidth == 256)
1814 IID = Intrinsic::x86_avx2_pavg_b;
1815 else if (Name[5] == 'b' && VecWidth == 512)
1816 IID = Intrinsic::x86_avx512_pavg_b_512;
1817 else if (Name[5] == 'w' && VecWidth == 128)
1818 IID = Intrinsic::x86_sse2_pavg_w;
1819 else if (Name[5] == 'w' && VecWidth == 256)
1820 IID = Intrinsic::x86_avx2_pavg_w;
1821 else if (Name[5] == 'w' && VecWidth == 512)
1822 IID = Intrinsic::x86_avx512_pavg_w_512;
1823 else
1824 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 1824)
;
1825 } else
1826 return false;
1827
1828 SmallVector<Value *, 4> Args(CI.args());
1829 Args.pop_back();
1830 Args.pop_back();
1831 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1832 Args);
1833 unsigned NumArgs = CI.arg_size();
1834 Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1835 CI.getArgOperand(NumArgs - 2));
1836 return true;
1837}
1838
1839/// Upgrade comment in call to inline asm that represents an objc retain release
1840/// marker.
1841void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1842 size_t Pos;
1843 if (AsmStr->find("mov\tfp") == 0 &&
1844 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1845 (Pos = AsmStr->find("# marker")) != std::string::npos) {
1846 AsmStr->replace(Pos, 1, ";");
1847 }
1848}
1849
1850static Value *UpgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
1851 IRBuilder<> &Builder) {
1852 if (Name == "mve.vctp64.old") {
1853 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
1854 // correct type.
1855 Value *VCTP = Builder.CreateCall(
1856 Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
1857 CI->getArgOperand(0), CI->getName());
1858 Value *C1 = Builder.CreateCall(
1859 Intrinsic::getDeclaration(
1860 F->getParent(), Intrinsic::arm_mve_pred_v2i,
1861 {VectorType::get(Builder.getInt1Ty(), 2, false)}),
1862 VCTP);
1863 return Builder.CreateCall(
1864 Intrinsic::getDeclaration(
1865 F->getParent(), Intrinsic::arm_mve_pred_i2v,
1866 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
1867 C1);
1868 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
1869 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
1870 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
1871 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
1872 Name == "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
1873 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
1874 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
1875 Name == "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
1876 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
1877 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
1878 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
1879 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
1880 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
1881 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
1882 std::vector<Type *> Tys;
1883 unsigned ID = CI->getIntrinsicID();
1884 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
1885 switch (ID) {
1886 case Intrinsic::arm_mve_mull_int_predicated:
1887 case Intrinsic::arm_mve_vqdmull_predicated:
1888 case Intrinsic::arm_mve_vldr_gather_base_predicated:
1889 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
1890 break;
1891 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
1892 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
1893 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
1894 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
1895 V2I1Ty};
1896 break;
1897 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
1898 Tys = {CI->getType(), CI->getOperand(0)->getType(),
1899 CI->getOperand(1)->getType(), V2I1Ty};
1900 break;
1901 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
1902 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
1903 CI->getOperand(2)->getType(), V2I1Ty};
1904 break;
1905 case Intrinsic::arm_cde_vcx1q_predicated:
1906 case Intrinsic::arm_cde_vcx1qa_predicated:
1907 case Intrinsic::arm_cde_vcx2q_predicated:
1908 case Intrinsic::arm_cde_vcx2qa_predicated:
1909 case Intrinsic::arm_cde_vcx3q_predicated:
1910 case Intrinsic::arm_cde_vcx3qa_predicated:
1911 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
1912 break;
1913 default:
1914 llvm_unreachable("Unhandled Intrinsic!")::llvm::llvm_unreachable_internal("Unhandled Intrinsic!", "llvm/lib/IR/AutoUpgrade.cpp"
, 1914)
;
1915 }
1916
1917 std::vector<Value *> Ops;
1918 for (Value *Op : CI->args()) {
1919 Type *Ty = Op->getType();
1920 if (Ty->getScalarSizeInBits() == 1) {
1921 Value *C1 = Builder.CreateCall(
1922 Intrinsic::getDeclaration(
1923 F->getParent(), Intrinsic::arm_mve_pred_v2i,
1924 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
1925 Op);
1926 Op = Builder.CreateCall(
1927 Intrinsic::getDeclaration(F->getParent(),
1928 Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
1929 C1);
1930 }
1931 Ops.push_back(Op);
1932 }
1933
1934 Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1935 return Builder.CreateCall(Fn, Ops, CI->getName());
1936 }
1937 llvm_unreachable("Unknown function for ARM CallBase upgrade.")::llvm::llvm_unreachable_internal("Unknown function for ARM CallBase upgrade."
, "llvm/lib/IR/AutoUpgrade.cpp", 1937)
;
1938}
1939
1940/// Upgrade a call to an old intrinsic. All argument and return casting must be
1941/// provided to seamlessly integrate with existing context.
1942void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
1943 Function *F = CI->getCalledFunction();
1944 LLVMContext &C = CI->getContext();
1945 IRBuilder<> Builder(C);
1946 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1947
1948 assert(F && "Intrinsic call is not direct?")(static_cast <bool> (F && "Intrinsic call is not direct?"
) ? void (0) : __assert_fail ("F && \"Intrinsic call is not direct?\""
, "llvm/lib/IR/AutoUpgrade.cpp", 1948, __extension__ __PRETTY_FUNCTION__
))
;
1949
1950 if (!NewFn) {
1951 // Get the Function's name.
1952 StringRef Name = F->getName();
1953
1954 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'")(static_cast <bool> (Name.startswith("llvm.") &&
"Intrinsic doesn't start with 'llvm.'") ? void (0) : __assert_fail
("Name.startswith(\"llvm.\") && \"Intrinsic doesn't start with 'llvm.'\""
, "llvm/lib/IR/AutoUpgrade.cpp", 1954, __extension__ __PRETTY_FUNCTION__
))
;
1955 Name = Name.substr(5);
1956
1957 bool IsX86 = Name.startswith("x86.");
1958 if (IsX86)
1959 Name = Name.substr(4);
1960 bool IsNVVM = Name.startswith("nvvm.");
1961 if (IsNVVM)
1962 Name = Name.substr(5);
1963 bool IsARM = Name.startswith("arm.");
1964 if (IsARM)
1965 Name = Name.substr(4);
1966
1967 if (IsX86 && Name.startswith("sse4a.movnt.")) {
1968 Module *M = F->getParent();
1969 SmallVector<Metadata *, 1> Elts;
1970 Elts.push_back(
1971 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1972 MDNode *Node = MDNode::get(C, Elts);
1973
1974 Value *Arg0 = CI->getArgOperand(0);
1975 Value *Arg1 = CI->getArgOperand(1);
1976
1977 // Nontemporal (unaligned) store of the 0'th element of the float/double
1978 // vector.
1979 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1980 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1981 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1982 Value *Extract =
1983 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1984
1985 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
1986 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1987
1988 // Remove intrinsic.
1989 CI->eraseFromParent();
1990 return;
1991 }
1992
1993 if (IsX86 && (Name.startswith("avx.movnt.") ||
1994 Name.startswith("avx512.storent."))) {
1995 Module *M = F->getParent();
1996 SmallVector<Metadata *, 1> Elts;
1997 Elts.push_back(
1998 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1999 MDNode *Node = MDNode::get(C, Elts);
2000
2001 Value *Arg0 = CI->getArgOperand(0);
2002 Value *Arg1 = CI->getArgOperand(1);
2003
2004 // Convert the type of the pointer to a pointer to the stored type.
2005 Value *BC = Builder.CreateBitCast(Arg0,
2006 PointerType::getUnqual(Arg1->getType()),
2007 "cast");
2008 StoreInst *SI = Builder.CreateAlignedStore(
2009 Arg1, BC,
2010 Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
2011 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
2012
2013 // Remove intrinsic.
2014 CI->eraseFromParent();
2015 return;
2016 }
2017
2018 if (IsX86 && Name == "sse2.storel.dq") {
2019 Value *Arg0 = CI->getArgOperand(0);
2020 Value *Arg1 = CI->getArgOperand(1);
2021
2022 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2023 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2024 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2025 Value *BC = Builder.CreateBitCast(Arg0,
2026 PointerType::getUnqual(Elt->getType()),
2027 "cast");
2028 Builder.CreateAlignedStore(Elt, BC, Align(1));
2029
2030 // Remove intrinsic.
2031 CI->eraseFromParent();
2032 return;
2033 }
2034
2035 if (IsX86 && (Name.startswith("sse.storeu.") ||
2036 Name.startswith("sse2.storeu.") ||
2037 Name.startswith("avx.storeu."))) {
2038 Value *Arg0 = CI->getArgOperand(0);
2039 Value *Arg1 = CI->getArgOperand(1);
2040
2041 Arg0 = Builder.CreateBitCast(Arg0,
2042 PointerType::getUnqual(Arg1->getType()),
2043 "cast");
2044 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2045
2046 // Remove intrinsic.
2047 CI->eraseFromParent();
2048 return;
2049 }
2050
2051 if (IsX86 && Name == "avx512.mask.store.ss") {
2052 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2053 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2054 Mask, false);
2055
2056 // Remove intrinsic.
2057 CI->eraseFromParent();
2058 return;
2059 }
2060
2061 if (IsX86 && (Name.startswith("avx512.mask.store"))) {
2062 // "avx512.mask.storeu." or "avx512.mask.store."
2063 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2064 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2065 CI->getArgOperand(2), Aligned);
2066
2067 // Remove intrinsic.
2068 CI->eraseFromParent();
2069 return;
2070 }
2071
2072 Value *Rep;
2073 // Upgrade packed integer vector compare intrinsics to compare instructions.
2074 if (IsX86 && (Name.startswith("sse2.pcmp") ||
2075 Name.startswith("avx2.pcmp"))) {
2076 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2077 bool CmpEq = Name[9] == 'e';
2078 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2079 CI->getArgOperand(0), CI->getArgOperand(1));
2080 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2081 } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
2082 Type *ExtTy = Type::getInt32Ty(C);
2083 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2084 ExtTy = Type::getInt64Ty(C);
2085 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2086 ExtTy->getPrimitiveSizeInBits();
2087 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2088 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2089 } else if (IsX86 && (Name == "sse.sqrt.ss" ||
2090 Name == "sse2.sqrt.sd")) {
2091 Value *Vec = CI->getArgOperand(0);
2092 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2093 Function *Intr = Intrinsic::getDeclaration(F->getParent(),
2094 Intrinsic::sqrt, Elt0->getType());
2095 Elt0 = Builder.CreateCall(Intr, Elt0);
2096 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2097 } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
2098 Name.startswith("sse2.sqrt.p") ||
2099 Name.startswith("sse.sqrt.p"))) {
2100 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2101 Intrinsic::sqrt,
2102 CI->getType()),
2103 {CI->getArgOperand(0)});
2104 } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
2105 if (CI->arg_size() == 4 &&
2106 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2107 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2108 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2109 : Intrinsic::x86_avx512_sqrt_pd_512;
2110
2111 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
2112 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2113 IID), Args);
2114 } else {
2115 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2116 Intrinsic::sqrt,
2117 CI->getType()),
2118 {CI->getArgOperand(0)});
2119 }
2120 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2121 CI->getArgOperand(1));
2122 } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
2123 Name.startswith("avx512.ptestnm"))) {
2124 Value *Op0 = CI->getArgOperand(0);
2125 Value *Op1 = CI->getArgOperand(1);
2126 Value *Mask = CI->getArgOperand(2);
2127 Rep = Builder.CreateAnd(Op0, Op1);
2128 llvm::Type *Ty = Op0->getType();
2129 Value *Zero = llvm::Constant::getNullValue(Ty);
2130 ICmpInst::Predicate Pred =
2131 Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2132 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2133 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
2134 } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
2135 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2136 ->getNumElements();
2137 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2138 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2139 CI->getArgOperand(1));
2140 } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
2141 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2142 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2143 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2144 int Indices[64];
2145 for (unsigned i = 0; i != NumElts; ++i)
2146 Indices[i] = i;
2147
2148 // First extract half of each vector. This gives better codegen than
2149 // doing it in a single shuffle.
2150 LHS = Builder.CreateShuffleVector(LHS, LHS,
2151 makeArrayRef(Indices, NumElts / 2));
2152 RHS = Builder.CreateShuffleVector(RHS, RHS,
2153 makeArrayRef(Indices, NumElts / 2));
2154 // Concat the vectors.
2155 // NOTE: Operands have to be swapped to match intrinsic definition.
2156 Rep = Builder.CreateShuffleVector(RHS, LHS,
2157 makeArrayRef(Indices, NumElts));
2158 Rep = Builder.CreateBitCast(Rep, CI->getType());
2159 } else if (IsX86 && Name == "avx512.kand.w") {
2160 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2161 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2162 Rep = Builder.CreateAnd(LHS, RHS);
2163 Rep = Builder.CreateBitCast(Rep, CI->getType());
2164 } else if (IsX86 && Name == "avx512.kandn.w") {
2165 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2166 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2167 LHS = Builder.CreateNot(LHS);
2168 Rep = Builder.CreateAnd(LHS, RHS);
2169 Rep = Builder.CreateBitCast(Rep, CI->getType());
2170 } else if (IsX86 && Name == "avx512.kor.w") {
2171 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2172 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2173 Rep = Builder.CreateOr(LHS, RHS);
2174 Rep = Builder.CreateBitCast(Rep, CI->getType());
2175 } else if (IsX86 && Name == "avx512.kxor.w") {
2176 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2177 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2178 Rep = Builder.CreateXor(LHS, RHS);
2179 Rep = Builder.CreateBitCast(Rep, CI->getType());
2180 } else if (IsX86 && Name == "avx512.kxnor.w") {
2181 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2182 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2183 LHS = Builder.CreateNot(LHS);
2184 Rep = Builder.CreateXor(LHS, RHS);
2185 Rep = Builder.CreateBitCast(Rep, CI->getType());
2186 } else if (IsX86 && Name == "avx512.knot.w") {
2187 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2188 Rep = Builder.CreateNot(Rep);
2189 Rep = Builder.CreateBitCast(Rep, CI->getType());
2190 } else if (IsX86 &&
2191 (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2192 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2193 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2194 Rep = Builder.CreateOr(LHS, RHS);
2195 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2196 Value *C;
2197 if (Name[14] == 'c')
2198 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2199 else
2200 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2201 Rep = Builder.CreateICmpEQ(Rep, C);
2202 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2203 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2204 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2205 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2206 Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2207 Type *I32Ty = Type::getInt32Ty(C);
2208 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2209 ConstantInt::get(I32Ty, 0));
2210 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2211 ConstantInt::get(I32Ty, 0));
2212 Value *EltOp;
2213 if (Name.contains(".add."))
2214 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2215 else if (Name.contains(".sub."))
2216 EltOp = Builder.CreateFSub(Elt0, Elt1);
2217 else if (Name.contains(".mul."))
2218 EltOp = Builder.CreateFMul(Elt0, Elt1);
2219 else
2220 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2221 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2222 ConstantInt::get(I32Ty, 0));
2223 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
2224 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2225 bool CmpEq = Name[16] == 'e';
2226 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2227 } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
2228 Type *OpTy = CI->getArgOperand(0)->getType();
2229 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2230 Intrinsic::ID IID;
2231 switch (VecWidth) {
2232 default: llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 2232)
;
2233 case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2234 case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2235 case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2236 }
2237
2238 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2239 { CI->getOperand(0), CI->getArgOperand(1) });
2240 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2241 } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
2242 Type *OpTy = CI->getArgOperand(0)->getType();
2243 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2244 unsigned EltWidth = OpTy->getScalarSizeInBits();
2245 Intrinsic::ID IID;
2246 if (VecWidth == 128 && EltWidth == 32)
2247 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2248 else if (VecWidth == 256 && EltWidth == 32)
2249 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2250 else if (VecWidth == 512 && EltWidth == 32)
2251 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2252 else if (VecWidth == 128 && EltWidth == 64)
2253 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2254 else if (VecWidth == 256 && EltWidth == 64)
2255 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2256 else if (VecWidth == 512 && EltWidth == 64)
2257 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2258 else
2259 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 2259)
;
2260
2261 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2262 { CI->getOperand(0), CI->getArgOperand(1) });
2263 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2264 } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
2265 SmallVector<Value *, 4> Args(CI->args());
2266 Type *OpTy = Args[0]->getType();
2267 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2268 unsigned EltWidth = OpTy->getScalarSizeInBits();
2269 Intrinsic::ID IID;
2270 if (VecWidth == 128 && EltWidth == 32)
2271 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2272 else if (VecWidth == 256 && EltWidth == 32)
2273 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2274 else if (VecWidth == 512 && EltWidth == 32)
2275 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2276 else if (VecWidth == 128 && EltWidth == 64)
2277 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2278 else if (VecWidth == 256 && EltWidth == 64)
2279 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2280 else if (VecWidth == 512 && EltWidth == 64)
2281 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2282 else
2283 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 2283)
;
2284
2285 Value *Mask = Constant::getAllOnesValue(CI->getType());
2286 if (VecWidth == 512)
2287 std::swap(Mask, Args.back());
2288 Args.push_back(Mask);
2289
2290 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2291 Args);
2292 } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
2293 // Integer compare intrinsics.
2294 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2295 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2296 } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2297 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2298 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2299 } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2300 Name.startswith("avx512.cvtw2mask.") ||
2301 Name.startswith("avx512.cvtd2mask.") ||
2302 Name.startswith("avx512.cvtq2mask."))) {
2303 Value *Op = CI->getArgOperand(0);
2304 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2305 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2306 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2307 } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2308 Name == "ssse3.pabs.w.128" ||
2309 Name == "ssse3.pabs.d.128" ||
2310 Name.startswith("avx2.pabs") ||
2311 Name.startswith("avx512.mask.pabs"))) {
2312 Rep = upgradeAbs(Builder, *CI);
2313 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2314 Name == "sse2.pmaxs.w" ||
2315 Name == "sse41.pmaxsd" ||
2316 Name.startswith("avx2.pmaxs") ||
2317 Name.startswith("avx512.mask.pmaxs"))) {
2318 Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2319 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2320 Name == "sse41.pmaxuw" ||
2321 Name == "sse41.pmaxud" ||
2322 Name.startswith("avx2.pmaxu") ||
2323 Name.startswith("avx512.mask.pmaxu"))) {
2324 Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2325 } else if (IsX86 && (Name == "sse41.pminsb" ||
2326 Name == "sse2.pmins.w" ||
2327 Name == "sse41.pminsd" ||
2328 Name.startswith("avx2.pmins") ||
2329 Name.startswith("avx512.mask.pmins"))) {
2330 Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2331 } else if (IsX86 && (Name == "sse2.pminu.b" ||
2332 Name == "sse41.pminuw" ||
2333 Name == "sse41.pminud" ||
2334 Name.startswith("avx2.pminu") ||
2335 Name.startswith("avx512.mask.pminu"))) {
2336 Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2337 } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2338 Name == "avx2.pmulu.dq" ||
2339 Name == "avx512.pmulu.dq.512" ||
2340 Name.startswith("avx512.mask.pmulu.dq."))) {
2341 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2342 } else if (IsX86 && (Name == "sse41.pmuldq" ||
2343 Name == "avx2.pmul.dq" ||
2344 Name == "avx512.pmul.dq.512" ||
2345 Name.startswith("avx512.mask.pmul.dq."))) {
2346 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2347 } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2348 Name == "sse2.cvtsi2sd" ||
2349 Name == "sse.cvtsi642ss" ||
2350 Name == "sse2.cvtsi642sd")) {
2351 Rep = Builder.CreateSIToFP(
2352 CI->getArgOperand(1),
2353 cast<VectorType>(CI->getType())->getElementType());
2354 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2355 } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2356 Rep = Builder.CreateUIToFP(
2357 CI->getArgOperand(1),
2358 cast<VectorType>(CI->getType())->getElementType());
2359 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2360 } else if (IsX86 && Name == "sse2.cvtss2sd") {
2361 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2362 Rep = Builder.CreateFPExt(
2363 Rep, cast<VectorType>(CI->getType())->getElementType());
2364 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2365 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2366 Name == "sse2.cvtdq2ps" ||
2367 Name == "avx.cvtdq2.pd.256" ||
2368 Name == "avx.cvtdq2.ps.256" ||
2369 Name.startswith("avx512.mask.cvtdq2pd.") ||
2370 Name.startswith("avx512.mask.cvtudq2pd.") ||
2371 Name.startswith("avx512.mask.cvtdq2ps.") ||
2372 Name.startswith("avx512.mask.cvtudq2ps.") ||
2373 Name.startswith("avx512.mask.cvtqq2pd.") ||
2374 Name.startswith("avx512.mask.cvtuqq2pd.") ||
2375 Name == "avx512.mask.cvtqq2ps.256" ||
2376 Name == "avx512.mask.cvtqq2ps.512" ||
2377 Name == "avx512.mask.cvtuqq2ps.256" ||
2378 Name == "avx512.mask.cvtuqq2ps.512" ||
2379 Name == "sse2.cvtps2pd" ||
2380 Name == "avx.cvt.ps2.pd.256" ||
2381 Name == "avx512.mask.cvtps2pd.128" ||
2382 Name == "avx512.mask.cvtps2pd.256")) {
2383 auto *DstTy = cast<FixedVectorType>(CI->getType());
2384 Rep = CI->getArgOperand(0);
2385 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2386
2387 unsigned NumDstElts = DstTy->getNumElements();
2388 if (NumDstElts < SrcTy->getNumElements()) {
2389 assert(NumDstElts == 2 && "Unexpected vector size")(static_cast <bool> (NumDstElts == 2 && "Unexpected vector size"
) ? void (0) : __assert_fail ("NumDstElts == 2 && \"Unexpected vector size\""
, "llvm/lib/IR/AutoUpgrade.cpp", 2389, __extension__ __PRETTY_FUNCTION__
))
;
2390 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2391 }
2392
2393 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2394 bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2395 if (IsPS2PD)
2396 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2397 else if (CI->arg_size() == 4 &&
2398 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2399 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2400 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2401 : Intrinsic::x86_avx512_sitofp_round;
2402 Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2403 { DstTy, SrcTy });
2404 Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2405 } else {
2406 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2407 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2408 }
2409
2410 if (CI->arg_size() >= 3)
2411 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2412 CI->getArgOperand(1));
2413 } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
2414 Name.startswith("vcvtph2ps."))) {
2415 auto *DstTy = cast<FixedVectorType>(CI->getType());
2416 Rep = CI->getArgOperand(0);
2417 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2418 unsigned NumDstElts = DstTy->getNumElements();
2419 if (NumDstElts != SrcTy->getNumElements()) {
2420 assert(NumDstElts == 4 && "Unexpected vector size")(static_cast <bool> (NumDstElts == 4 && "Unexpected vector size"
) ? void (0) : __assert_fail ("NumDstElts == 4 && \"Unexpected vector size\""
, "llvm/lib/IR/AutoUpgrade.cpp", 2420, __extension__ __PRETTY_FUNCTION__
))
;
2421 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2422 }
2423 Rep = Builder.CreateBitCast(
2424 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2425 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2426 if (CI->arg_size() >= 3)
2427 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2428 CI->getArgOperand(1));
2429 } else if (IsX86 && Name.startswith("avx512.mask.load")) {
2430 // "avx512.mask.loadu." or "avx512.mask.load."
2431 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2432 Rep =
2433 UpgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2434 CI->getArgOperand(2), Aligned);
2435 } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2436 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2437 Type *PtrTy = ResultTy->getElementType();
2438
2439 // Cast the pointer to element type.
2440 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2441 llvm::PointerType::getUnqual(PtrTy));
2442
2443 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2444 ResultTy->getNumElements());
2445
2446 Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2447 Intrinsic::masked_expandload,
2448 ResultTy);
2449 Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2450 } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2451 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2452 Type *PtrTy = ResultTy->getElementType();
2453
2454 // Cast the pointer to element type.
2455 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2456 llvm::PointerType::getUnqual(PtrTy));
2457
2458 Value *MaskVec =
2459 getX86MaskVec(Builder, CI->getArgOperand(2),
2460 cast<FixedVectorType>(ResultTy)->getNumElements());
2461
2462 Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2463 Intrinsic::masked_compressstore,
2464 ResultTy);
2465 Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2466 } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2467 Name.startswith("avx512.mask.expand."))) {
2468 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2469
2470 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2471 ResultTy->getNumElements());
2472
2473 bool IsCompress = Name[12] == 'c';
2474 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2475 : Intrinsic::x86_avx512_mask_expand;
2476 Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2477 Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2478 MaskVec });
2479 } else if (IsX86 && Name.startswith("xop.vpcom")) {
2480 bool IsSigned;
2481 if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2482 Name.endswith("uq"))
2483 IsSigned = false;
2484 else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2485 Name.endswith("q"))
2486 IsSigned = true;
2487 else
2488 llvm_unreachable("Unknown suffix")::llvm::llvm_unreachable_internal("Unknown suffix", "llvm/lib/IR/AutoUpgrade.cpp"
, 2488)
;
2489
2490 unsigned Imm;
2491 if (CI->arg_size() == 3) {
2492 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2493 } else {
2494 Name = Name.substr(9); // strip off "xop.vpcom"
2495 if (Name.startswith("lt"))
2496 Imm = 0;
2497 else if (Name.startswith("le"))
2498 Imm = 1;
2499 else if (Name.startswith("gt"))
2500 Imm = 2;
2501 else if (Name.startswith("ge"))
2502 Imm = 3;
2503 else if (Name.startswith("eq"))
2504 Imm = 4;
2505 else if (Name.startswith("ne"))
2506 Imm = 5;
2507 else if (Name.startswith("false"))
2508 Imm = 6;
2509 else if (Name.startswith("true"))
2510 Imm = 7;
2511 else
2512 llvm_unreachable("Unknown condition")::llvm::llvm_unreachable_internal("Unknown condition", "llvm/lib/IR/AutoUpgrade.cpp"
, 2512)
;
2513 }
2514
2515 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2516 } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2517 Value *Sel = CI->getArgOperand(2);
2518 Value *NotSel = Builder.CreateNot(Sel);
2519 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2520 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2521 Rep = Builder.CreateOr(Sel0, Sel1);
2522 } else if (IsX86 && (Name.startswith("xop.vprot") ||
2523 Name.startswith("avx512.prol") ||
2524 Name.startswith("avx512.mask.prol"))) {
2525 Rep = upgradeX86Rotate(Builder, *CI, false);
2526 } else if (IsX86 && (Name.startswith("avx512.pror") ||
2527 Name.startswith("avx512.mask.pror"))) {
2528 Rep = upgradeX86Rotate(Builder, *CI, true);
2529 } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2530 Name.startswith("avx512.mask.vpshld") ||
2531 Name.startswith("avx512.maskz.vpshld"))) {
2532 bool ZeroMask = Name[11] == 'z';
2533 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2534 } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2535 Name.startswith("avx512.mask.vpshrd") ||
2536 Name.startswith("avx512.maskz.vpshrd"))) {
2537 bool ZeroMask = Name[11] == 'z';
2538 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2539 } else if (IsX86 && Name == "sse42.crc32.64.8") {
2540 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2541 Intrinsic::x86_sse42_crc32_32_8);
2542 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2543 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2544 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2545 } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2546 Name.startswith("avx512.vbroadcast.s"))) {
2547 // Replace broadcasts with a series of insertelements.
2548 auto *VecTy = cast<FixedVectorType>(CI->getType());
2549 Type *EltTy = VecTy->getElementType();
2550 unsigned EltNum = VecTy->getNumElements();
2551 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2552 EltTy->getPointerTo());
2553 Value *Load = Builder.CreateLoad(EltTy, Cast);
2554 Type *I32Ty = Type::getInt32Ty(C);
2555 Rep = PoisonValue::get(VecTy);
2556 for (unsigned I = 0; I < EltNum; ++I)
2557 Rep = Builder.CreateInsertElement(Rep, Load,
2558 ConstantInt::get(I32Ty, I));
2559 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2560 Name.startswith("sse41.pmovzx") ||
2561 Name.startswith("avx2.pmovsx") ||
2562 Name.startswith("avx2.pmovzx") ||
2563 Name.startswith("avx512.mask.pmovsx") ||
2564 Name.startswith("avx512.mask.pmovzx"))) {
2565 auto *DstTy = cast<FixedVectorType>(CI->getType());
2566 unsigned NumDstElts = DstTy->getNumElements();
2567
2568 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2569 SmallVector<int, 8> ShuffleMask(NumDstElts);
2570 for (unsigned i = 0; i != NumDstElts; ++i)
2571 ShuffleMask[i] = i;
2572
2573 Value *SV =
2574 Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2575
2576 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2577 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2578 : Builder.CreateZExt(SV, DstTy);
2579 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2580 if (CI->arg_size() == 3)
2581 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2582 CI->getArgOperand(1));
2583 } else if (Name == "avx512.mask.pmov.qd.256" ||
2584 Name == "avx512.mask.pmov.qd.512" ||
2585 Name == "avx512.mask.pmov.wb.256" ||
2586 Name == "avx512.mask.pmov.wb.512") {
2587 Type *Ty = CI->getArgOperand(1)->getType();
2588 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2589 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2590 CI->getArgOperand(1));
2591 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2592 Name == "avx2.vbroadcasti128")) {
2593 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2594 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2595 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2596 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2597 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2598 PointerType::getUnqual(VT));
2599 Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2600 if (NumSrcElts == 2)
2601 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2602 else
2603 Rep = Builder.CreateShuffleVector(
2604 Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2605 } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2606 Name.startswith("avx512.mask.shuf.f"))) {
2607 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2608 Type *VT = CI->getType();
2609 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2610 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2611 unsigned ControlBitsMask = NumLanes - 1;
2612 unsigned NumControlBits = NumLanes / 2;
2613 SmallVector<int, 8> ShuffleMask(0);
2614
2615 for (unsigned l = 0; l != NumLanes; ++l) {
2616 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2617 // We actually need the other source.
2618 if (l >= NumLanes / 2)
2619 LaneMask += NumLanes;
2620 for (unsigned i = 0; i != NumElementsInLane; ++i)
2621 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2622 }
2623 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2624 CI->getArgOperand(1), ShuffleMask);
2625 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2626 CI->getArgOperand(3));
2627 }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2628 Name.startswith("avx512.mask.broadcasti"))) {
2629 unsigned NumSrcElts =
2630 cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2631 ->getNumElements();
2632 unsigned NumDstElts =
2633 cast<FixedVectorType>(CI->getType())->getNumElements();
2634
2635 SmallVector<int, 8> ShuffleMask(NumDstElts);
2636 for (unsigned i = 0; i != NumDstElts; ++i)
2637 ShuffleMask[i] = i % NumSrcElts;
2638
2639 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2640 CI->getArgOperand(0),
2641 ShuffleMask);
2642 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2643 CI->getArgOperand(1));
2644 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2645 Name.startswith("avx2.vbroadcast") ||
2646 Name.startswith("avx512.pbroadcast") ||
2647 Name.startswith("avx512.mask.broadcast.s"))) {
2648 // Replace vp?broadcasts with a vector shuffle.
2649 Value *Op = CI->getArgOperand(0);
2650 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2651 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2652 SmallVector<int, 8> M;
2653 ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
2654 Rep = Builder.CreateShuffleVector(Op, M);
2655
2656 if (CI->arg_size() == 3)
2657 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2658 CI->getArgOperand(1));
2659 } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2660 Name.startswith("avx2.padds.") ||
2661 Name.startswith("avx512.padds.") ||
2662 Name.startswith("avx512.mask.padds."))) {
2663 Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2664 } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
2665 Name.startswith("avx2.psubs.") ||
2666 Name.startswith("avx512.psubs.") ||
2667 Name.startswith("avx512.mask.psubs."))) {
2668 Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2669 } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2670 Name.startswith("avx2.paddus.") ||
2671 Name.startswith("avx512.mask.paddus."))) {
2672 Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
2673 } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
2674 Name.startswith("avx2.psubus.") ||
2675 Name.startswith("avx512.mask.psubus."))) {
2676 Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
2677 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2678 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2679 CI->getArgOperand(1),
2680 CI->getArgOperand(2),
2681 CI->getArgOperand(3),
2682 CI->getArgOperand(4),
2683 false);
2684 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2685 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2686 CI->getArgOperand(1),
2687 CI->getArgOperand(2),
2688 CI->getArgOperand(3),
2689 CI->getArgOperand(4),
2690 true);
2691 } else if (IsX86 && (Name == "sse2.psll.dq" ||
2692 Name == "avx2.psll.dq")) {
2693 // 128/256-bit shift left specified in bits.
2694 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2695 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2696 Shift / 8); // Shift is in bits.
2697 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2698 Name == "avx2.psrl.dq")) {
2699 // 128/256-bit shift right specified in bits.
2700 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2701 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2702 Shift / 8); // Shift is in bits.
2703 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2704 Name == "avx2.psll.dq.bs" ||
2705 Name == "avx512.psll.dq.512")) {
2706 // 128/256/512-bit shift left specified in bytes.
2707 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2708 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2709 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2710 Name == "avx2.psrl.dq.bs" ||
2711 Name == "avx512.psrl.dq.512")) {
2712 // 128/256/512-bit shift right specified in bytes.
2713 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2714 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2715 } else if (IsX86 && (Name == "sse41.pblendw" ||
2716 Name.startswith("sse41.blendp") ||
2717 Name.startswith("avx.blend.p") ||
2718 Name == "avx2.pblendw" ||
2719 Name.startswith("avx2.pblendd."))) {
2720 Value *Op0 = CI->getArgOperand(0);
2721 Value *Op1 = CI->getArgOperand(1);
2722 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2723 auto *VecTy = cast<FixedVectorType>(CI->getType());
2724 unsigned NumElts = VecTy->getNumElements();
2725
2726 SmallVector<int, 16> Idxs(NumElts);
2727 for (unsigned i = 0; i != NumElts; ++i)
2728 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2729
2730 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2731 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2732 Name == "avx2.vinserti128" ||
2733 Name.startswith("avx512.mask.insert"))) {
2734 Value *Op0 = CI->getArgOperand(0);
2735 Value *Op1 = CI->getArgOperand(1);
2736 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2737 unsigned DstNumElts =
2738 cast<FixedVectorType>(CI->getType())->getNumElements();
2739 unsigned SrcNumElts =
2740 cast<FixedVectorType>(Op1->getType())->getNumElements();
2741 unsigned Scale = DstNumElts / SrcNumElts;
2742
2743 // Mask off the high bits of the immediate value; hardware ignores those.
2744 Imm = Imm % Scale;
2745
2746 // Extend the second operand into a vector the size of the destination.
2747 SmallVector<int, 8> Idxs(DstNumElts);
2748 for (unsigned i = 0; i != SrcNumElts; ++i)
2749 Idxs[i] = i;
2750 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2751 Idxs[i] = SrcNumElts;
2752 Rep = Builder.CreateShuffleVector(Op1, Idxs);
2753
2754 // Insert the second operand into the first operand.
2755
2756 // Note that there is no guarantee that instruction lowering will actually
2757 // produce a vinsertf128 instruction for the created shuffles. In
2758 // particular, the 0 immediate case involves no lane changes, so it can
2759 // be handled as a blend.
2760
2761 // Example of shuffle mask for 32-bit elements:
2762 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2763 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2764
2765 // First fill with identify mask.
2766 for (unsigned i = 0; i != DstNumElts; ++i)
2767 Idxs[i] = i;
2768 // Then replace the elements where we need to insert.
2769 for (unsigned i = 0; i != SrcNumElts; ++i)
2770 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2771 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2772
2773 // If the intrinsic has a mask operand, handle that.
2774 if (CI->arg_size() == 5)
2775 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2776 CI->getArgOperand(3));
2777 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2778 Name == "avx2.vextracti128" ||
2779 Name.startswith("avx512.mask.vextract"))) {
2780 Value *Op0 = CI->getArgOperand(0);
2781 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2782 unsigned DstNumElts =
2783 cast<FixedVectorType>(CI->getType())->getNumElements();
2784 unsigned SrcNumElts =
2785 cast<FixedVectorType>(Op0->getType())->getNumElements();
2786 unsigned Scale = SrcNumElts / DstNumElts;
2787
2788 // Mask off the high bits of the immediate value; hardware ignores those.
2789 Imm = Imm % Scale;
2790
2791 // Get indexes for the subvector of the input vector.
2792 SmallVector<int, 8> Idxs(DstNumElts);
2793 for (unsigned i = 0; i != DstNumElts; ++i) {
2794 Idxs[i] = i + (Imm * DstNumElts);
2795 }
2796 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2797
2798 // If the intrinsic has a mask operand, handle that.
2799 if (CI->arg_size() == 4)
2800 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2801 CI->getArgOperand(2));
2802 } else if (!IsX86 && Name == "stackprotectorcheck") {
2803 Rep = nullptr;
2804 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2805 Name.startswith("avx512.mask.perm.di."))) {
2806 Value *Op0 = CI->getArgOperand(0);
2807 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2808 auto *VecTy = cast<FixedVectorType>(CI->getType());
2809 unsigned NumElts = VecTy->getNumElements();
2810
2811 SmallVector<int, 8> Idxs(NumElts);
2812 for (unsigned i = 0; i != NumElts; ++i)
2813 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2814
2815 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2816
2817 if (CI->arg_size() == 4)
2818 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2819 CI->getArgOperand(2));
2820 } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2821 Name == "avx2.vperm2i128")) {
2822 // The immediate permute control byte looks like this:
2823 // [1:0] - select 128 bits from sources for low half of destination
2824 // [2] - ignore
2825 // [3] - zero low half of destination
2826 // [5:4] - select 128 bits from sources for high half of destination
2827 // [6] - ignore
2828 // [7] - zero high half of destination
2829
2830 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2831
2832 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2833 unsigned HalfSize = NumElts / 2;
2834 SmallVector<int, 8> ShuffleMask(NumElts);
2835
2836 // Determine which operand(s) are actually in use for this instruction.
2837 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2838 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2839
2840 // If needed, replace operands based on zero mask.
2841 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2842 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2843
2844 // Permute low half of result.
2845 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2846 for (unsigned i = 0; i < HalfSize; ++i)
2847 ShuffleMask[i] = StartIndex + i;
2848
2849 // Permute high half of result.
2850 StartIndex = (Imm & 0x10) ? HalfSize : 0;
2851 for (unsigned i = 0; i < HalfSize; ++i)
2852 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2853
2854 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2855
2856 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2857 Name == "sse2.pshuf.d" ||
2858 Name.startswith("avx512.mask.vpermil.p") ||
2859 Name.startswith("avx512.mask.pshuf.d."))) {
2860 Value *Op0 = CI->getArgOperand(0);
2861 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2862 auto *VecTy = cast<FixedVectorType>(CI->getType());
2863 unsigned NumElts = VecTy->getNumElements();
2864 // Calculate the size of each index in the immediate.
2865 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2866 unsigned IdxMask = ((1 << IdxSize) - 1);
2867
2868 SmallVector<int, 8> Idxs(NumElts);
2869 // Lookup the bits for this element, wrapping around the immediate every
2870 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2871 // to offset by the first index of each group.
2872 for (unsigned i = 0; i != NumElts; ++i)
2873 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2874
2875 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2876
2877 if (CI->arg_size() == 4)
2878 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2879 CI->getArgOperand(2));
2880 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2881 Name.startswith("avx512.mask.pshufl.w."))) {
2882 Value *Op0 = CI->getArgOperand(0);
2883 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2884 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2885
2886 SmallVector<int, 16> Idxs(NumElts);
2887 for (unsigned l = 0; l != NumElts; l += 8) {
2888 for (unsigned i = 0; i != 4; ++i)
2889 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2890 for (unsigned i = 4; i != 8; ++i)
2891 Idxs[i + l] = i + l;
2892 }
2893
2894 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2895
2896 if (CI->arg_size() == 4)
2897 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2898 CI->getArgOperand(2));
2899 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2900 Name.startswith("avx512.mask.pshufh.w."))) {
2901 Value *Op0 = CI->getArgOperand(0);
2902 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2903 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2904
2905 SmallVector<int, 16> Idxs(NumElts);
2906 for (unsigned l = 0; l != NumElts; l += 8) {
2907 for (unsigned i = 0; i != 4; ++i)
2908 Idxs[i + l] = i + l;
2909 for (unsigned i = 0; i != 4; ++i)
2910 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2911 }
2912
2913 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2914
2915 if (CI->arg_size() == 4)
2916 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2917 CI->getArgOperand(2));
2918 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2919 Value *Op0 = CI->getArgOperand(0);
2920 Value *Op1 = CI->getArgOperand(1);
2921 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2922 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2923
2924 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2925 unsigned HalfLaneElts = NumLaneElts / 2;
2926
2927 SmallVector<int, 16> Idxs(NumElts);
2928 for (unsigned i = 0; i != NumElts; ++i) {
2929 // Base index is the starting element of the lane.
2930 Idxs[i] = i - (i % NumLaneElts);
2931 // If we are half way through the lane switch to the other source.
2932 if ((i % NumLaneElts) >= HalfLaneElts)
2933 Idxs[i] += NumElts;
2934 // Now select the specific element. By adding HalfLaneElts bits from
2935 // the immediate. Wrapping around the immediate every 8-bits.
2936 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2937 }
2938
2939 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2940
2941 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2942 CI->getArgOperand(3));
2943 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2944 Name.startswith("avx512.mask.movshdup") ||
2945 Name.startswith("avx512.mask.movsldup"))) {
2946 Value *Op0 = CI->getArgOperand(0);
2947 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2948 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2949
2950 unsigned Offset = 0;
2951 if (Name.startswith("avx512.mask.movshdup."))
2952 Offset = 1;
2953
2954 SmallVector<int, 16> Idxs(NumElts);
2955 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2956 for (unsigned i = 0; i != NumLaneElts; i += 2) {
2957 Idxs[i + l + 0] = i + l + Offset;
2958 Idxs[i + l + 1] = i + l + Offset;
2959 }
2960
2961 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2962
2963 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2964 CI->getArgOperand(1));
2965 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2966 Name.startswith("avx512.mask.unpckl."))) {
2967 Value *Op0 = CI->getArgOperand(0);
2968 Value *Op1 = CI->getArgOperand(1);
2969 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2970 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2971
2972 SmallVector<int, 64> Idxs(NumElts);
2973 for (int l = 0; l != NumElts; l += NumLaneElts)
2974 for (int i = 0; i != NumLaneElts; ++i)
2975 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2976
2977 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2978
2979 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2980 CI->getArgOperand(2));
2981 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2982 Name.startswith("avx512.mask.unpckh."))) {
2983 Value *Op0 = CI->getArgOperand(0);
2984 Value *Op1 = CI->getArgOperand(1);
2985 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2986 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2987
2988 SmallVector<int, 64> Idxs(NumElts);
2989 for (int l = 0; l != NumElts; l += NumLaneElts)
2990 for (int i = 0; i != NumLaneElts; ++i)
2991 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2992
2993 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2994
2995 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2996 CI->getArgOperand(2));
2997 } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2998 Name.startswith("avx512.mask.pand."))) {
2999 VectorType *FTy = cast<VectorType>(CI->getType());
3000 VectorType *ITy = VectorType::getInteger(FTy);
3001 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3002 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3003 Rep = Builder.CreateBitCast(Rep, FTy);
3004 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3005 CI->getArgOperand(2));
3006 } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
3007 Name.startswith("avx512.mask.pandn."))) {
3008 VectorType *FTy = cast<VectorType>(CI->getType());
3009 VectorType *ITy = VectorType::getInteger(FTy);
3010 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3011 Rep = Builder.CreateAnd(Rep,
3012 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3013 Rep = Builder.CreateBitCast(Rep, FTy);
3014 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3015 CI->getArgOperand(2));
3016 } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
3017 Name.startswith("avx512.mask.por."))) {
3018 VectorType *FTy = cast<VectorType>(CI->getType());
3019 VectorType *ITy = VectorType::getInteger(FTy);
3020 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3021 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3022 Rep = Builder.CreateBitCast(Rep, FTy);
3023 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3024 CI->getArgOperand(2));
3025 } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
3026 Name.startswith("avx512.mask.pxor."))) {
3027 VectorType *FTy = cast<VectorType>(CI->getType());
3028 VectorType *ITy = VectorType::getInteger(FTy);
3029 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3030 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3031 Rep = Builder.CreateBitCast(Rep, FTy);
3032 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3033 CI->getArgOperand(2));
3034 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
3035 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3036 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3037 CI->getArgOperand(2));
3038 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
3039 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3040 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3041 CI->getArgOperand(2));
3042 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
3043 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3044 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3045 CI->getArgOperand(2));
3046 } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
3047 if (Name.endswith(".512")) {
3048 Intrinsic::ID IID;
3049 if (Name[17] == 's')
3050 IID = Intrinsic::x86_avx512_add_ps_512;
3051 else
3052 IID = Intrinsic::x86_avx512_add_pd_512;
3053
3054 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3055 { CI->getArgOperand(0), CI->getArgOperand(1),
3056 CI->getArgOperand(4) });
3057 } else {
3058 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3059 }
3060 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3061 CI->getArgOperand(2));
3062 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
3063 if (Name.endswith(".512")) {
3064 Intrinsic::ID IID;
3065 if (Name[17] == 's')
3066 IID = Intrinsic::x86_avx512_div_ps_512;
3067 else
3068 IID = Intrinsic::x86_avx512_div_pd_512;
3069
3070 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3071 { CI->getArgOperand(0), CI->getArgOperand(1),
3072 CI->getArgOperand(4) });
3073 } else {
3074 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3075 }
3076 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3077 CI->getArgOperand(2));
3078 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
3079 if (Name.endswith(".512")) {
3080 Intrinsic::ID IID;
3081 if (Name[17] == 's')
3082 IID = Intrinsic::x86_avx512_mul_ps_512;
3083 else
3084 IID = Intrinsic::x86_avx512_mul_pd_512;
3085
3086 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3087 { CI->getArgOperand(0), CI->getArgOperand(1),
3088 CI->getArgOperand(4) });
3089 } else {
3090 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3091 }
3092 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3093 CI->getArgOperand(2));
3094 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
3095 if (Name.endswith(".512")) {
3096 Intrinsic::ID IID;
3097 if (Name[17] == 's')
3098 IID = Intrinsic::x86_avx512_sub_ps_512;
3099 else
3100 IID = Intrinsic::x86_avx512_sub_pd_512;
3101
3102 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3103 { CI->getArgOperand(0), CI->getArgOperand(1),
3104 CI->getArgOperand(4) });
3105 } else {
3106 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3107 }
3108 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3109 CI->getArgOperand(2));
3110 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
3111 Name.startswith("avx512.mask.min.p")) &&
3112 Name.drop_front(18) == ".512") {
3113 bool IsDouble = Name[17] == 'd';
3114 bool IsMin = Name[13] == 'i';
3115 static const Intrinsic::ID MinMaxTbl[2][2] = {
3116 { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
3117 { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
3118 };
3119 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3120
3121 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3122 { CI->getArgOperand(0), CI->getArgOperand(1),
3123 CI->getArgOperand(4) });
3124 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3125 CI->getArgOperand(2));
3126 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
3127 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
3128 Intrinsic::ctlz,
3129 CI->getType()),
3130 { CI->getArgOperand(0), Builder.getInt1(false) });
3131 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3132 CI->getArgOperand(1));
3133 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
3134 bool IsImmediate = Name[16] == 'i' ||
3135 (Name.size() > 18 && Name[18] == 'i');
3136 bool IsVariable = Name[16] == 'v';
3137 char Size = Name[16] == '.' ? Name[17] :
3138 Name[17] == '.' ? Name[18] :
3139 Name[18] == '.' ? Name[19] :
3140 Name[20];
3141
3142 Intrinsic::ID IID;
3143 if (IsVariable && Name[17] != '.') {
3144 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3145 IID = Intrinsic::x86_avx2_psllv_q;
3146 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3147 IID = Intrinsic::x86_avx2_psllv_q_256;
3148 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3149 IID = Intrinsic::x86_avx2_psllv_d;
3150 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3151 IID = Intrinsic::x86_avx2_psllv_d_256;
3152 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3153 IID = Intrinsic::x86_avx512_psllv_w_128;
3154 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3155 IID = Intrinsic::x86_avx512_psllv_w_256;
3156 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3157 IID = Intrinsic::x86_avx512_psllv_w_512;
3158 else
3159 llvm_unreachable("Unexpected size")::llvm::llvm_unreachable_internal("Unexpected size", "llvm/lib/IR/AutoUpgrade.cpp"
, 3159)
;
3160 } else if (Name.endswith(".128")) {
3161 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3162 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3163 : Intrinsic::x86_sse2_psll_d;
3164 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3165 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3166 : Intrinsic::x86_sse2_psll_q;
3167 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3168 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3169 : Intrinsic::x86_sse2_psll_w;
3170 else
3171 llvm_unreachable("Unexpected size")::llvm::llvm_unreachable_internal("Unexpected size", "llvm/lib/IR/AutoUpgrade.cpp"
, 3171)
;
3172 } else if (Name.endswith(".256")) {
3173 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3174 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3175 : Intrinsic::x86_avx2_psll_d;
3176 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3177 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3178 : Intrinsic::x86_avx2_psll_q;
3179 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3180 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3181 : Intrinsic::x86_avx2_psll_w;
3182 else
3183 llvm_unreachable("Unexpected size")::llvm::llvm_unreachable_internal("Unexpected size", "llvm/lib/IR/AutoUpgrade.cpp"
, 3183)
;
3184 } else {
3185 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3186 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3187 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
3188 Intrinsic::x86_avx512_psll_d_512;
3189 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3190 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3191 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
3192 Intrinsic::x86_avx512_psll_q_512;
3193 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3194 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3195 : Intrinsic::x86_avx512_psll_w_512;
3196 else
3197 llvm_unreachable("Unexpected size")::llvm::llvm_unreachable_internal("Unexpected size", "llvm/lib/IR/AutoUpgrade.cpp"
, 3197)
;
3198 }
3199
3200 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3201 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
3202 bool IsImmediate = Name[16] == 'i' ||
3203 (Name.size() > 18 && Name[18] == 'i');
3204 bool IsVariable = Name[16] == 'v';
3205 char Size = Name[16] == '.' ? Name[17] :
3206 Name[17] == '.' ? Name[18] :
3207 Name[18] == '.' ? Name[19] :
3208 Name[20];
3209
3210 Intrinsic::ID IID;
3211 if (IsVariable && Name[17] != '.') {
3212 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3213 IID = Intrinsic::x86_avx2_psrlv_q;
3214 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3215 IID = Intrinsic::x86_avx2_psrlv_q_256;
3216 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3217 IID = Intrinsic::x86_avx2_psrlv_d;
3218 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3219 IID = Intrinsic::x86_avx2_psrlv_d_256;
3220 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3221 IID = Intrinsic::x86_avx512_psrlv_w_128;
3222 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3223 IID = Intrinsic::x86_avx512_psrlv_w_256;
3224 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3225 IID = Intrinsic::x86_avx512_psrlv_w_512;
3226 else
3227 llvm_unreachable("Unexpected size")::llvm::llvm_unreachable_internal("Unexpected size", "llvm/lib/IR/AutoUpgrade.cpp"
, 3227)
;
3228 } else if (Name.endswith(".128")) {
3229 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3230 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3231 : Intrinsic::x86_sse2_psrl_d;
3232 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3233 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3234 : Intrinsic::x86_sse2_psrl_q;
3235 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3236 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3237 : Intrinsic::x86_sse2_psrl_w;
3238 else
3239 llvm_unreachable("Unexpected size")::llvm::llvm_unreachable_internal("Unexpected size", "llvm/lib/IR/AutoUpgrade.cpp"
, 3239)
;
3240 } else if (Name.endswith(".256")) {
3241 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3242 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3243 : Intrinsic::x86_avx2_psrl_d;
3244 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3245 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3246 : Intrinsic::x86_avx2_psrl_q;
3247 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3248 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3249 : Intrinsic::x86_avx2_psrl_w;
3250 else
3251 llvm_unreachable("Unexpected size")::llvm::llvm_unreachable_internal("Unexpected size", "llvm/lib/IR/AutoUpgrade.cpp"
, 3251)
;
3252 } else {
3253 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3254 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3255 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
3256 Intrinsic::x86_avx512_psrl_d_512;
3257 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3258 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3259 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
3260 Intrinsic::x86_avx512_psrl_q_512;
3261 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3262 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3263 : Intrinsic::x86_avx512_psrl_w_512;
3264 else
3265 llvm_unreachable("Unexpected size")::llvm::llvm_unreachable_internal("Unexpected size", "llvm/lib/IR/AutoUpgrade.cpp"
, 3265)
;
3266 }
3267
3268 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3269 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
3270 bool IsImmediate = Name[16] == 'i' ||
3271 (Name.size() > 18 && Name[18] == 'i');
3272 bool IsVariable = Name[16] == 'v';
3273 char Size = Name[16] == '.' ? Name[17] :
3274 Name[17] == '.' ? Name[18] :
3275 Name[18] == '.' ? Name[19] :
3276 Name[20];
3277
3278 Intrinsic::ID IID;
3279 if (IsVariable && Name[17] != '.') {
3280 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3281 IID = Intrinsic::x86_avx2_psrav_d;
3282 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3283 IID = Intrinsic::x86_avx2_psrav_d_256;
3284 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3285 IID = Intrinsic::x86_avx512_psrav_w_128;
3286 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3287 IID = Intrinsic::x86_avx512_psrav_w_256;
3288 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3289 IID = Intrinsic::x86_avx512_psrav_w_512;
3290 else
3291 llvm_unreachable("Unexpected size")::llvm::llvm_unreachable_internal("Unexpected size", "llvm/lib/IR/AutoUpgrade.cpp"
, 3291)
;
3292 } else if (Name.endswith(".128")) {
3293 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3294 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3295 : Intrinsic::x86_sse2_psra_d;
3296 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3297 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3298 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
3299 Intrinsic::x86_avx512_psra_q_128;
3300 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3301 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3302 : Intrinsic::x86_sse2_psra_w;
3303 else
3304 llvm_unreachable("Unexpected size")::llvm::llvm_unreachable_internal("Unexpected size", "llvm/lib/IR/AutoUpgrade.cpp"
, 3304)
;
3305 } else if (Name.endswith(".256")) {
3306 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3307 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3308 : Intrinsic::x86_avx2_psra_d;
3309 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3310 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3311 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
3312 Intrinsic::x86_avx512_psra_q_256;
3313 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3314 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3315 : Intrinsic::x86_avx2_psra_w;
3316 else
3317 llvm_unreachable("Unexpected size")::llvm::llvm_unreachable_internal("Unexpected size", "llvm/lib/IR/AutoUpgrade.cpp"
, 3317)
;
3318 } else {
3319 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3320 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3321 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
3322 Intrinsic::x86_avx512_psra_d_512;
3323 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3324 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3325 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
3326 Intrinsic::x86_avx512_psra_q_512;
3327 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3328 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3329 : Intrinsic::x86_avx512_psra_w_512;
3330 else
3331 llvm_unreachable("Unexpected size")::llvm::llvm_unreachable_internal("Unexpected size", "llvm/lib/IR/AutoUpgrade.cpp"
, 3331)
;
3332 }
3333
3334 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3335 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3336 Rep = upgradeMaskedMove(Builder, *CI);
3337 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3338 Rep = UpgradeMaskToInt(Builder, *CI);
3339 } else if (IsX86 && Name.endswith(".movntdqa")) {
3340 Module *M = F->getParent();
3341 MDNode *Node = MDNode::get(
3342 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3343
3344 Value *Ptr = CI->getArgOperand(0);
3345
3346 // Convert the type of the pointer to a pointer to the stored type.
3347 Value *BC = Builder.CreateBitCast(
3348 Ptr, PointerType::getUnqual(CI->getType()), "cast");
3349 LoadInst *LI = Builder.CreateAlignedLoad(
3350 CI->getType(), BC,
3351 Align(CI->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
3352 LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3353 Rep = LI;
3354 } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3355 Name.startswith("fma.vfmsub.") ||
3356 Name.startswith("fma.vfnmadd.") ||
3357 Name.startswith("fma.vfnmsub."))) {
3358 bool NegMul = Name[6] == 'n';
3359 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3360 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3361
3362 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3363 CI->getArgOperand(2) };
3364
3365 if (IsScalar) {
3366 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3367 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3368 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3369 }
3370
3371 if (NegMul && !IsScalar)
3372 Ops[0] = Builder.CreateFNeg(Ops[0]);
3373 if (NegMul && IsScalar)
3374 Ops[1] = Builder.CreateFNeg(Ops[1]);
3375 if (NegAcc)
3376 Ops[2] = Builder.CreateFNeg(Ops[2]);
3377
3378 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3379 Intrinsic::fma,
3380 Ops[0]->getType()),
3381 Ops);
3382
3383 if (IsScalar)
3384 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3385 (uint64_t)0);
3386 } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3387 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3388 CI->getArgOperand(2) };
3389
3390 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3391 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3392 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3393
3394 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3395 Intrinsic::fma,
3396 Ops[0]->getType()),
3397 Ops);
3398
3399 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3400 Rep, (uint64_t)0);
3401 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3402 Name.startswith("avx512.maskz.vfmadd.s") ||
3403 Name.startswith("avx512.mask3.vfmadd.s") ||
3404 Name.startswith("avx512.mask3.vfmsub.s") ||
3405 Name.startswith("avx512.mask3.vfnmsub.s"))) {
3406 bool IsMask3 = Name[11] == '3';
3407 bool IsMaskZ = Name[11] == 'z';
3408 // Drop the "avx512.mask." to make it easier.
3409 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3410 bool NegMul = Name[2] == 'n';
3411 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3412
3413 Value *A = CI->getArgOperand(0);
3414 Value *B = CI->getArgOperand(1);
3415 Value *C = CI->getArgOperand(2);
3416
3417 if (NegMul && (IsMask3 || IsMaskZ))
3418 A = Builder.CreateFNeg(A);
3419 if (NegMul && !(IsMask3 || IsMaskZ))
3420 B = Builder.CreateFNeg(B);
3421 if (NegAcc)
3422 C = Builder.CreateFNeg(C);
3423
3424 A = Builder.CreateExtractElement(A, (uint64_t)0);
3425 B = Builder.CreateExtractElement(B, (uint64_t)0);
3426 C = Builder.CreateExtractElement(C, (uint64_t)0);
3427
3428 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3429 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3430 Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3431
3432 Intrinsic::ID IID;
3433 if (Name.back() == 'd')
3434 IID = Intrinsic::x86_avx512_vfmadd_f64;
3435 else
3436 IID = Intrinsic::x86_avx512_vfmadd_f32;
3437 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3438 Rep = Builder.CreateCall(FMA, Ops);
3439 } else {
3440 Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3441 Intrinsic::fma,
3442 A->getType());
3443 Rep = Builder.CreateCall(FMA, { A, B, C });
3444 }
3445
3446 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3447 IsMask3 ? C : A;
3448
3449 // For Mask3 with NegAcc, we need to create a new extractelement that
3450 // avoids the negation above.
3451 if (NegAcc && IsMask3)
3452 PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3453 (uint64_t)0);
3454
3455 Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3456 Rep, PassThru);
3457 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3458 Rep, (uint64_t)0);
3459 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3460 Name.startswith("avx512.mask.vfnmadd.p") ||
3461 Name.startswith("avx512.mask.vfnmsub.p") ||
3462 Name.startswith("avx512.mask3.vfmadd.p") ||
3463 Name.startswith("avx512.mask3.vfmsub.p") ||
3464 Name.startswith("avx512.mask3.vfnmsub.p") ||
3465 Name.startswith("avx512.maskz.vfmadd.p"))) {
3466 bool IsMask3 = Name[11] == '3';
3467 bool IsMaskZ = Name[11] == 'z';
3468 // Drop the "avx512.mask." to make it easier.
3469 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3470 bool NegMul = Name[2] == 'n';
3471 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3472
3473 Value *A = CI->getArgOperand(0);
3474 Value *B = CI->getArgOperand(1);
3475 Value *C = CI->getArgOperand(2);
3476
3477 if (NegMul && (IsMask3 || IsMaskZ))
3478 A = Builder.CreateFNeg(A);
3479 if (NegMul && !(IsMask3 || IsMaskZ))
3480 B = Builder.CreateFNeg(B);
3481 if (NegAcc)
3482 C = Builder.CreateFNeg(C);
3483
3484 if (CI->arg_size() == 5 &&
3485 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3486 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3487 Intrinsic::ID IID;
3488 // Check the character before ".512" in string.
3489 if (Name[Name.size()-5] == 's')
3490 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3491 else
3492 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3493
3494 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3495 { A, B, C, CI->getArgOperand(4) });
3496 } else {
3497 Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3498 Intrinsic::fma,
3499 A->getType());
3500 Rep = Builder.CreateCall(FMA, { A, B, C });
3501 }
3502
3503 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3504 IsMask3 ? CI->getArgOperand(2) :
3505 CI->getArgOperand(0);
3506
3507 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3508 } else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
3509 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3510 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3511 Intrinsic::ID IID;
3512 if (VecWidth == 128 && EltWidth == 32)
3513 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3514 else if (VecWidth == 256 && EltWidth == 32)
3515 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3516 else if (VecWidth == 128 && EltWidth == 64)
3517 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3518 else if (VecWidth == 256 && EltWidth == 64)
3519 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3520 else
3521 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 3521)
;
3522
3523 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3524 CI->getArgOperand(2) };
3525 Ops[2] = Builder.CreateFNeg(Ops[2]);
3526 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3527 Ops);
3528 } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3529 Name.startswith("avx512.mask3.vfmaddsub.p") ||
3530 Name.startswith("avx512.maskz.vfmaddsub.p") ||
3531 Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3532 bool IsMask3 = Name[11] == '3';
3533 bool IsMaskZ = Name[11] == 'z';
3534 // Drop the "avx512.mask." to make it easier.
3535 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3536 bool IsSubAdd = Name[3] == 's';
3537 if (CI->arg_size() == 5) {
3538 Intrinsic::ID IID;
3539 // Check the character before ".512" in string.
3540 if (Name[Name.size()-5] == 's')
3541 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3542 else
3543 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3544
3545 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3546 CI->getArgOperand(2), CI->getArgOperand(4) };
3547 if (IsSubAdd)
3548 Ops[2] = Builder.CreateFNeg(Ops[2]);
3549
3550 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3551 Ops);
3552 } else {
3553 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3554
3555 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3556 CI->getArgOperand(2) };
3557
3558 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3559 Ops[0]->getType());
3560 Value *Odd = Builder.CreateCall(FMA, Ops);
3561 Ops[2] = Builder.CreateFNeg(Ops[2]);
3562 Value *Even = Builder.CreateCall(FMA, Ops);
3563
3564 if (IsSubAdd)
3565 std::swap(Even, Odd);
3566
3567 SmallVector<int, 32> Idxs(NumElts);
3568 for (int i = 0; i != NumElts; ++i)
3569 Idxs[i] = i + (i % 2) * NumElts;
3570
3571 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3572 }
3573
3574 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3575 IsMask3 ? CI->getArgOperand(2) :
3576 CI->getArgOperand(0);
3577
3578 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3579 } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3580 Name.startswith("avx512.maskz.pternlog."))) {
3581 bool ZeroMask = Name[11] == 'z';
3582 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3583 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3584 Intrinsic::ID IID;
3585 if (VecWidth == 128 && EltWidth == 32)
3586 IID = Intrinsic::x86_avx512_pternlog_d_128;
3587 else if (VecWidth == 256 && EltWidth == 32)
3588 IID = Intrinsic::x86_avx512_pternlog_d_256;
3589 else if (VecWidth == 512 && EltWidth == 32)
3590 IID = Intrinsic::x86_avx512_pternlog_d_512;
3591 else if (VecWidth == 128 && EltWidth == 64)
3592 IID = Intrinsic::x86_avx512_pternlog_q_128;
3593 else if (VecWidth == 256 && EltWidth == 64)
3594 IID = Intrinsic::x86_avx512_pternlog_q_256;
3595 else if (VecWidth == 512 && EltWidth == 64)
3596 IID = Intrinsic::x86_avx512_pternlog_q_512;
3597 else
3598 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 3598)
;
3599
3600 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3601 CI->getArgOperand(2), CI->getArgOperand(3) };
3602 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3603 Args);
3604 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3605 : CI->getArgOperand(0);
3606 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3607 } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3608 Name.startswith("avx512.maskz.vpmadd52"))) {
3609 bool ZeroMask = Name[11] == 'z';
3610 bool High = Name[20] == 'h' || Name[21] == 'h';
3611 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3612 Intrinsic::ID IID;
3613 if (VecWidth == 128 && !High)
3614 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3615 else if (VecWidth == 256 && !High)
3616 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3617 else if (VecWidth == 512 && !High)
3618 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3619 else if (VecWidth == 128 && High)
3620 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3621 else if (VecWidth == 256 && High)
3622 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3623 else if (VecWidth == 512 && High)
3624 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3625 else
3626 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 3626)
;
3627
3628 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3629 CI->getArgOperand(2) };
3630 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3631 Args);
3632 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3633 : CI->getArgOperand(0);
3634 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3635 } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3636 Name.startswith("avx512.mask.vpermt2var.") ||
3637 Name.startswith("avx512.maskz.vpermt2var."))) {
3638 bool ZeroMask = Name[11] == 'z';
3639 bool IndexForm = Name[17] == 'i';
3640 Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3641 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3642 Name.startswith("avx512.maskz.vpdpbusd.") ||
3643 Name.startswith("avx512.mask.vpdpbusds.") ||
3644 Name.startswith("avx512.maskz.vpdpbusds."))) {
3645 bool ZeroMask = Name[11] == 'z';
3646 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3647 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3648 Intrinsic::ID IID;
3649 if (VecWidth == 128 && !IsSaturating)
3650 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3651 else if (VecWidth == 256 && !IsSaturating)
3652 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3653 else if (VecWidth == 512 && !IsSaturating)
3654 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3655 else if (VecWidth == 128 && IsSaturating)
3656 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3657 else if (VecWidth == 256 && IsSaturating)
3658 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3659 else if (VecWidth == 512 && IsSaturating)
3660 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3661 else
3662 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 3662)
;
3663
3664 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3665 CI->getArgOperand(2) };
3666 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3667 Args);
3668 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3669 : CI->getArgOperand(0);
3670 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3671 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3672 Name.startswith("avx512.maskz.vpdpwssd.") ||
3673 Name.startswith("avx512.mask.vpdpwssds.") ||
3674 Name.startswith("avx512.maskz.vpdpwssds."))) {
3675 bool ZeroMask = Name[11] == 'z';
3676 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3677 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3678 Intrinsic::ID IID;
3679 if (VecWidth == 128 && !IsSaturating)
3680 IID = Intrinsic::x86_avx512_vpdpwssd_128;
3681 else if (VecWidth == 256 && !IsSaturating)
3682 IID = Intrinsic::x86_avx512_vpdpwssd_256;
3683 else if (VecWidth == 512 && !IsSaturating)
3684 IID = Intrinsic::x86_avx512_vpdpwssd_512;
3685 else if (VecWidth == 128 && IsSaturating)
3686 IID = Intrinsic::x86_avx512_vpdpwssds_128;
3687 else if (VecWidth == 256 && IsSaturating)
3688 IID = Intrinsic::x86_avx512_vpdpwssds_256;
3689 else if (VecWidth == 512 && IsSaturating)
3690 IID = Intrinsic::x86_avx512_vpdpwssds_512;
3691 else
3692 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 3692)
;
3693
3694 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3695 CI->getArgOperand(2) };
3696 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3697 Args);
3698 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3699 : CI->getArgOperand(0);
3700 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3701 } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3702 Name == "addcarry.u32" || Name == "addcarry.u64" ||
3703 Name == "subborrow.u32" || Name == "subborrow.u64")) {
3704 Intrinsic::ID IID;
3705 if (Name[0] == 'a' && Name.back() == '2')
3706 IID = Intrinsic::x86_addcarry_32;
3707 else if (Name[0] == 'a' && Name.back() == '4')
3708 IID = Intrinsic::x86_addcarry_64;
3709 else if (Name[0] == 's' && Name.back() == '2')
3710 IID = Intrinsic::x86_subborrow_32;
3711 else if (Name[0] == 's' && Name.back() == '4')
3712 IID = Intrinsic::x86_subborrow_64;
3713 else
3714 llvm_unreachable("Unexpected intrinsic")::llvm::llvm_unreachable_internal("Unexpected intrinsic", "llvm/lib/IR/AutoUpgrade.cpp"
, 3714)
;
3715
3716 // Make a call with 3 operands.
3717 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3718 CI->getArgOperand(2)};
3719 Value *NewCall = Builder.CreateCall(
3720 Intrinsic::getDeclaration(CI->getModule(), IID),
3721 Args);
3722
3723 // Extract the second result and store it.
3724 Value *Data = Builder.CreateExtractValue(NewCall, 1);
3725 // Cast the pointer to the right type.
3726 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3727 llvm::PointerType::getUnqual(Data->getType()));
3728 Builder.CreateAlignedStore(Data, Ptr, Align(1));
3729 // Replace the original call result with the first result of the new call.
3730 Value *CF = Builder.CreateExtractValue(NewCall, 0);
3731
3732 CI->replaceAllUsesWith(CF);
3733 Rep = nullptr;
3734 } else if (IsX86 && Name.startswith("avx512.mask.") &&
3735 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3736 // Rep will be updated by the call in the condition.
3737 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3738 Value *Arg = CI->getArgOperand(0);
3739 Value *Neg = Builder.CreateNeg(Arg, "neg");
3740 Value *Cmp = Builder.CreateICmpSGE(
3741 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3742 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3743 } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3744 Name.startswith("atomic.load.add.f64.p"))) {
3745 Value *Ptr = CI->getArgOperand(0);
3746 Value *Val = CI->getArgOperand(1);
3747 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
3748 AtomicOrdering::SequentiallyConsistent);
3749 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3750 Name == "max.ui" || Name == "max.ull")) {
3751 Value *Arg0 = CI->getArgOperand(0);
3752 Value *Arg1 = CI->getArgOperand(1);
3753 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3754 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3755 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3756 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3757 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3758 Name == "min.ui" || Name == "min.ull")) {
3759 Value *Arg0 = CI->getArgOperand(0);
3760 Value *Arg1 = CI->getArgOperand(1);
3761 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3762 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3763 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3764 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3765 } else if (IsNVVM && Name == "clz.ll") {
3766 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3767 Value *Arg = CI->getArgOperand(0);
3768 Value *Ctlz = Builder.CreateCall(
3769 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3770 {Arg->getType()}),
3771 {Arg, Builder.getFalse()}, "ctlz");
3772 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3773 } else if (IsNVVM && Name == "popc.ll") {
3774 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3775 // i64.
3776 Value *Arg = CI->getArgOperand(0);
3777 Value *Popc = Builder.CreateCall(
3778 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3779 {Arg->getType()}),
3780 Arg, "ctpop");
3781 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3782 } else if (IsNVVM && Name == "h2f") {
3783 Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3784 F->getParent(), Intrinsic::convert_from_fp16,
3785 {Builder.getFloatTy()}),
3786 CI->getArgOperand(0), "h2f");
3787 } else if (IsARM) {
3788 Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder);
3789 } else {
3790 llvm_unreachable("Unknown function for CallBase upgrade.")::llvm::llvm_unreachable_internal("Unknown function for CallBase upgrade."
, "llvm/lib/IR/AutoUpgrade.cpp", 3790)
;
3791 }
3792
3793 if (Rep)
3794 CI->replaceAllUsesWith(Rep);
3795 CI->eraseFromParent();
3796 return;
3797 }
3798
3799 const auto &DefaultCase = [&]() -> void {
3800 if (CI->getFunctionType() == NewFn->getFunctionType()) {
3801 // Handle generic mangling change.
3802 assert((static_cast <bool> ((CI->getCalledFunction()->getName
() != NewFn->getName()) && "Unknown function for CallBase upgrade and isn't just a name change"
) ? void (0) : __assert_fail ("(CI->getCalledFunction()->getName() != NewFn->getName()) && \"Unknown function for CallBase upgrade and isn't just a name change\""
, "llvm/lib/IR/AutoUpgrade.cpp", 3804, __extension__ __PRETTY_FUNCTION__
))
3803 (CI->getCalledFunction()->getName() != NewFn->getName()) &&(static_cast <bool> ((CI->getCalledFunction()->getName
() != NewFn->getName()) && "Unknown function for CallBase upgrade and isn't just a name change"
) ? void (0) : __assert_fail ("(CI->getCalledFunction()->getName() != NewFn->getName()) && \"Unknown function for CallBase upgrade and isn't just a name change\""
, "llvm/lib/IR/AutoUpgrade.cpp", 3804, __extension__ __PRETTY_FUNCTION__
))
3804 "Unknown function for CallBase upgrade and isn't just a name change")(static_cast <bool> ((CI->getCalledFunction()->getName
() != NewFn->getName()) && "Unknown function for CallBase upgrade and isn't just a name change"
) ? void (0) : __assert_fail ("(CI->getCalledFunction()->getName() != NewFn->getName()) && \"Unknown function for CallBase upgrade and isn't just a name change\""
, "llvm/lib/IR/AutoUpgrade.cpp", 3804, __extension__ __PRETTY_FUNCTION__
))
;
3805 CI->setCalledFunction(NewFn);
3806 return;
3807 }
3808
3809 // This must be an upgrade from a named to a literal struct.
3810 auto *OldST = cast<StructType>(CI->getType());
3811 assert(OldST != NewFn->getReturnType() && "Return type must have changed")(static_cast <bool> (OldST != NewFn->getReturnType()
&& "Return type must have changed") ? void (0) : __assert_fail
("OldST != NewFn->getReturnType() && \"Return type must have changed\""
, "llvm/lib/IR/AutoUpgrade.cpp", 3811, __extension__ __PRETTY_FUNCTION__
))
;
3812 assert(OldST->getNumElements() ==(static_cast <bool> (OldST->getNumElements() == cast
<StructType>(NewFn->getReturnType())->getNumElements
() && "Must have same number of elements") ? void (0)
: __assert_fail ("OldST->getNumElements() == cast<StructType>(NewFn->getReturnType())->getNumElements() && \"Must have same number of elements\""
, "llvm/lib/IR/AutoUpgrade.cpp", 3814, __extension__ __PRETTY_FUNCTION__
))
3813 cast<StructType>(NewFn->getReturnType())->getNumElements() &&(static_cast <bool> (OldST->getNumElements() == cast
<StructType>(NewFn->getReturnType())->getNumElements
() && "Must have same number of elements") ? void (0)
: __assert_fail ("OldST->getNumElements() == cast<StructType>(NewFn->getReturnType())->getNumElements() && \"Must have same number of elements\""
, "llvm/lib/IR/AutoUpgrade.cpp", 3814, __extension__ __PRETTY_FUNCTION__
))
3814 "Must have same number of elements")(static_cast <bool> (OldST->getNumElements() == cast
<StructType>(NewFn->getReturnType())->getNumElements
() && "Must have same number of elements") ? void (0)
: __assert_fail ("OldST->getNumElements() == cast<StructType>(NewFn->getReturnType())->getNumElements() && \"Must have same number of elements\""
, "llvm/lib/IR/AutoUpgrade.cpp", 3814, __extension__ __PRETTY_FUNCTION__
))
;
3815
3816 SmallVector<Value *> Args(CI->args());
3817 Value *NewCI = Builder.CreateCall(NewFn, Args);
3818 Value *Res = PoisonValue::get(OldST);
3819 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
3820 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
3821 Res = Builder.CreateInsertValue(Res, Elem, Idx);
3822 }
3823 CI->replaceAllUsesWith(Res);
3824 CI->eraseFromParent();
3825 return;
3826 };
3827 CallInst *NewCall = nullptr;
3828 switch (NewFn->getIntrinsicID()) {
3829 default: {
3830 DefaultCase();
3831 return;
3832 }
3833 case Intrinsic::arm_neon_vst1:
3834 case Intrinsic::arm_neon_vst2:
3835 case Intrinsic::arm_neon_vst3:
3836 case Intrinsic::arm_neon_vst4:
3837 case Intrinsic::arm_neon_vst2lane:
3838 case Intrinsic::arm_neon_vst3lane:
3839 case Intrinsic::arm_neon_vst4lane: {
3840 SmallVector<Value *, 4> Args(CI->args());
3841 NewCall = Builder.CreateCall(NewFn, Args);
3842 break;
3843 }
3844
3845 case Intrinsic::arm_neon_bfdot:
3846 case Intrinsic::arm_neon_bfmmla:
3847 case Intrinsic::arm_neon_bfmlalb:
3848 case Intrinsic::arm_neon_bfmlalt:
3849 case Intrinsic::aarch64_neon_bfdot:
3850 case Intrinsic::aarch64_neon_bfmmla:
3851 case Intrinsic::aarch64_neon_bfmlalb:
3852 case Intrinsic::aarch64_neon_bfmlalt: {
3853 SmallVector<Value *, 3> Args;
3854 assert(CI->arg_size() == 3 &&(static_cast <bool> (CI->arg_size() == 3 && "Mismatch between function args and call args"
) ? void (0) : __assert_fail ("CI->arg_size() == 3 && \"Mismatch between function args and call args\""
, "llvm/lib/IR/AutoUpgrade.cpp", 3855, __extension__ __PRETTY_FUNCTION__
))
3855 "Mismatch between function args and call args")(static_cast <bool> (CI->arg_size() == 3 && "Mismatch between function args and call args"
) ? void (0) : __assert_fail ("CI->arg_size() == 3 && \"Mismatch between function args and call args\""
, "llvm/lib/IR/AutoUpgrade.cpp", 3855, __extension__ __PRETTY_FUNCTION__
))
;
3856 size_t OperandWidth =
3857 CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
3858 assert((OperandWidth == 64 || OperandWidth == 128) &&(static_cast <bool> ((OperandWidth == 64 || OperandWidth
== 128) && "Unexpected operand width") ? void (0) : __assert_fail
("(OperandWidth == 64 || OperandWidth == 128) && \"Unexpected operand width\""
, "llvm/lib/IR/AutoUpgrade.cpp", 3859, __extension__ __PRETTY_FUNCTION__
))
3859 "Unexpected operand width")(static_cast <bool> ((OperandWidth == 64 || OperandWidth
== 128) && "Unexpected operand width") ? void (0) : __assert_fail
("(OperandWidth == 64 || OperandWidth == 128) && \"Unexpected operand width\""
, "llvm/lib/IR/AutoUpgrade.cpp", 3859, __extension__ __PRETTY_FUNCTION__
))
;
3860 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
3861 auto Iter = CI->args().begin();
3862 Args.push_back(*Iter++);
3863 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3864 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3865 NewCall = Builder.CreateCall(NewFn, Args);
3866 break;
3867 }
3868
3869 case Intrinsic::bitreverse:
3870 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3871 break;
3872
3873 case Intrinsic::ctlz:
3874 case Intrinsic::cttz:
3875 assert(CI->arg_size() == 1 &&(static_cast <bool> (CI->arg_size() == 1 && "Mismatch between function args and call args"
) ? void (0) : __assert_fail ("CI->arg_size() == 1 && \"Mismatch between function args and call args\""
, "llvm/lib/IR/AutoUpgrade.cpp", 3876, __extension__ __PRETTY_FUNCTION__
))
3876 "Mismatch between function args and call args")(static_cast <bool> (CI->arg_size() == 1 && "Mismatch between function args and call args"
) ? void (0) : __assert_fail ("CI->arg_size() == 1 && \"Mismatch between function args and call args\""
, "llvm/lib/IR/AutoUpgrade.cpp", 3876, __extension__ __PRETTY_FUNCTION__
))
;
3877 NewCall =
3878 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3879 break;
3880
3881 case Intrinsic::objectsize: {
3882 Value *NullIsUnknownSize =
3883 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
3884 Value *Dynamic =
3885 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3886 NewCall = Builder.CreateCall(
3887 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3888 break;
3889 }
3890
3891 case Intrinsic::ctpop:
3892 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3893 break;
3894
3895 case Intrinsic::convert_from_fp16:
3896 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3897 break;
3898
3899 case Intrinsic::dbg_value:
3900 // Upgrade from the old version that had an extra offset argument.
3901 assert(CI->arg_size() == 4)(static_cast <bool> (CI->arg_size() == 4) ? void (0)
: __assert_fail ("CI->arg_size() == 4", "llvm/lib/IR/AutoUpgrade.cpp"
, 3901, __extension__ __PRETTY_FUNCTION__))
;
3902 // Drop nonzero offsets instead of attempting to upgrade them.
3903 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3904 if (Offset->isZeroValue()) {
3905 NewCall = Builder.CreateCall(
3906 NewFn,
3907 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3908 break;
3909 }
3910 CI->eraseFromParent();
3911 return;
3912
3913 case Intrinsic::ptr_annotation:
3914 // Upgrade from versions that lacked the annotation attribute argument.
3915 if (CI->arg_size() != 4) {
3916 DefaultCase();
3917 return;
3918 }
3919
3920 // Create a new call with an added null annotation attribute argument.
3921 NewCall = Builder.CreateCall(
3922 NewFn,
3923 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3924 CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3925 NewCall->takeName(CI);
3926 CI->replaceAllUsesWith(NewCall);
3927 CI->eraseFromParent();
3928 return;
3929
3930 case Intrinsic::var_annotation:
3931 // Upgrade from versions that lacked the annotation attribute argument.
3932 assert(CI->arg_size() == 4 &&(static_cast <bool> (CI->arg_size() == 4 && "Before LLVM 12.0 this intrinsic took four arguments"
) ? void (0) : __assert_fail ("CI->arg_size() == 4 && \"Before LLVM 12.0 this intrinsic took four arguments\""
, "llvm/lib/IR/AutoUpgrade.cpp", 3933, __extension__ __PRETTY_FUNCTION__
))
3933 "Before LLVM 12.0 this intrinsic took four arguments")(static_cast <bool> (CI->arg_size() == 4 && "Before LLVM 12.0 this intrinsic took four arguments"
) ? void (0) : __assert_fail ("CI->arg_size() == 4 && \"Before LLVM 12.0 this intrinsic took four arguments\""
, "llvm/lib/IR/AutoUpgrade.cpp", 3933, __extension__ __PRETTY_FUNCTION__
))
;
3934 // Create a new call with an added null annotation attribute argument.
3935 NewCall = Builder.CreateCall(
3936 NewFn,
3937 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3938 CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3939 CI->eraseFromParent();
3940 return;
3941
3942 case Intrinsic::x86_xop_vfrcz_ss:
3943 case Intrinsic::x86_xop_vfrcz_sd:
3944 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3945 break;
3946
3947 case Intrinsic::x86_xop_vpermil2pd:
3948 case Intrinsic::x86_xop_vpermil2ps:
3949 case Intrinsic::x86_xop_vpermil2pd_256:
3950 case Intrinsic::x86_xop_vpermil2ps_256: {
3951 SmallVector<Value *, 4> Args(CI->args());
3952 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3953 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3954 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3955 NewCall = Builder.CreateCall(NewFn, Args);
3956 break;
3957 }
3958
3959 case Intrinsic::x86_sse41_ptestc:
3960 case Intrinsic::x86_sse41_ptestz:
3961 case Intrinsic::x86_sse41_ptestnzc: {
3962 // The arguments for these intrinsics used to be v4f32, and changed
3963 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3964 // So, the only thing required is a bitcast for both arguments.
3965 // First, check the arguments have the old type.
3966 Value *Arg0 = CI->getArgOperand(0);
3967 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
3968 return;
3969
3970 // Old intrinsic, add bitcasts
3971 Value *Arg1 = CI->getArgOperand(1);
3972
3973 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
3974
3975 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3976 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3977
3978 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3979 break;
3980 }
3981
3982 case Intrinsic::x86_rdtscp: {
3983 // This used to take 1 arguments. If we have no arguments, it is already
3984 // upgraded.
3985 if (CI->getNumOperands() == 0)
3986 return;
3987
3988 NewCall = Builder.CreateCall(NewFn);
3989 // Extract the second result and store it.
3990 Value *Data = Builder.CreateExtractValue(NewCall, 1);
3991 // Cast the pointer to the right type.
3992 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3993 llvm::PointerType::getUnqual(Data->getType()));
3994 Builder.CreateAlignedStore(Data, Ptr, Align(1));
3995 // Replace the original call result with the first result of the new call.
3996 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3997
3998 NewCall->takeName(CI);
3999 CI->replaceAllUsesWith(TSC);
4000 CI->eraseFromParent();
4001 return;
4002 }
4003
4004 case Intrinsic::x86_sse41_insertps:
4005 case Intrinsic::x86_sse41_dppd:
4006 case Intrinsic::x86_sse41_dpps:
4007 case Intrinsic::x86_sse41_mpsadbw:
4008 case Intrinsic::x86_avx_dp_ps_256:
4009 case Intrinsic::x86_avx2_mpsadbw: {
4010 // Need to truncate the last argument from i32 to i8 -- this argument models
4011 // an inherently 8-bit immediate operand to these x86 instructions.
4012 SmallVector<Value *, 4> Args(CI->args());
4013
4014 // Replace the last argument with a trunc.
4015 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4016 NewCall = Builder.CreateCall(NewFn, Args);
4017 break;
4018 }
4019
4020 case Intrinsic::x86_avx512_mask_cmp_pd_128:
4021 case Intrinsic::x86_avx512_mask_cmp_pd_256:
4022 case Intrinsic::x86_avx512_mask_cmp_pd_512:
4023 case Intrinsic::x86_avx512_mask_cmp_ps_128:
4024 case Intrinsic::x86_avx512_mask_cmp_ps_256:
4025 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4026 SmallVector<Value *, 4> Args(CI->args());
4027 unsigned NumElts =
4028 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4029 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4030
4031 NewCall = Builder.CreateCall(NewFn, Args);
4032 Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4033
4034 NewCall->takeName(CI);
4035 CI->replaceAllUsesWith(Res);
4036 CI->eraseFromParent();
4037 return;
4038 }
4039
4040 case Intrinsic::thread_pointer: {
4041 NewCall = Builder.CreateCall(NewFn, {});
4042 break;
4043 }
4044
4045 case Intrinsic::invariant_start:
4046 case Intrinsic::invariant_end: {
4047 SmallVector<Value *, 4> Args(CI->args());
4048 NewCall = Builder.CreateCall(NewFn, Args);
4049 break;
4050 }
4051 case Intrinsic::masked_load:
4052 case Intrinsic::masked_store:
4053 case Intrinsic::masked_gather:
4054 case Intrinsic::masked_scatter: {
4055 SmallVector<Value *, 4> Args(CI->args());
4056 NewCall = Builder.CreateCall(NewFn, Args);
4057 NewCall->copyMetadata(*CI);
4058 break;
4059 }
4060
4061 case Intrinsic::memcpy:
4062 case Intrinsic::memmove:
4063 case Intrinsic::memset: {
4064 // We have to make sure that the call signature is what we're expecting.
4065 // We only want to change the old signatures by removing the alignment arg:
4066 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4067 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4068 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4069 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
4070 // Note: i8*'s in the above can be any pointer type
4071 if (CI->arg_size() != 5) {
4072 DefaultCase();
4073 return;
4074 }
4075 // Remove alignment argument (3), and add alignment attributes to the
4076 // dest/src pointers.
4077 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4078 CI->getArgOperand(2), CI->getArgOperand(4)};
4079 NewCall = Builder.CreateCall(NewFn, Args);
4080 AttributeList OldAttrs = CI->getAttributes();
4081 AttributeList NewAttrs = AttributeList::get(
4082 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4083 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4084 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4085 NewCall->setAttributes(NewAttrs);
4086 auto *MemCI = cast<MemIntrinsic>(NewCall);
4087 // All mem intrinsics support dest alignment.
4088 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4089 MemCI->setDestAlignment(Align->getMaybeAlignValue());
4090 // Memcpy/Memmove also support source alignment.
4091 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4092 MTI->setSourceAlignment(Align->getMaybeAlignValue());
4093 break;
4094 }
4095 }
4096 assert(NewCall && "Should have either set this variable or returned through "(static_cast <bool> (NewCall && "Should have either set this variable or returned through "
"the default case") ? void (0) : __assert_fail ("NewCall && \"Should have either set this variable or returned through \" \"the default case\""
, "llvm/lib/IR/AutoUpgrade.cpp", 4097, __extension__ __PRETTY_FUNCTION__
))
4097 "the default case")(static_cast <bool> (NewCall && "Should have either set this variable or returned through "
"the default case") ? void (0) : __assert_fail ("NewCall && \"Should have either set this variable or returned through \" \"the default case\""
, "llvm/lib/IR/AutoUpgrade.cpp", 4097, __extension__ __PRETTY_FUNCTION__
))
;
4098 NewCall->takeName(CI);
4099 CI->replaceAllUsesWith(NewCall);
4100 CI->eraseFromParent();
4101}
4102
4103void llvm::UpgradeCallsToIntrinsic(Function *F) {
4104 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.")(static_cast <bool> (F && "Illegal attempt to upgrade a non-existent intrinsic."
) ? void (0) : __assert_fail ("F && \"Illegal attempt to upgrade a non-existent intrinsic.\""
, "llvm/lib/IR/AutoUpgrade.cpp", 4104, __extension__ __PRETTY_FUNCTION__
))
;
4105
4106 // Check if this function should be upgraded and get the replacement function
4107 // if there is one.
4108 Function *NewFn;
4109 if (UpgradeIntrinsicFunction(F, NewFn)) {
4110 // Replace all users of the old function with the new function or new
4111 // instructions. This is not a range loop because the call is deleted.
4112 for (User *U : make_early_inc_range(F->users()))
4113 if (CallBase *CB = dyn_cast<CallBase>(U))
4114 UpgradeIntrinsicCall(CB, NewFn);
4115
4116 // Remove old function, no longer used, from the module.
4117 F->eraseFromParent();
4118 }
4119}
4120
4121MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
4122 // Check if the tag uses struct-path aware TBAA format.
4123 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
4124 return &MD;
4125
4126 auto &Context = MD.getContext();
4127 if (MD.getNumOperands() == 3) {
4128 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4129 MDNode *ScalarType = MDNode::get(Context, Elts);
4130 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4131 Metadata *Elts2[] = {ScalarType, ScalarType,
4132 ConstantAsMetadata::get(
4133 Constant::getNullValue(Type::getInt64Ty(Context))),
4134 MD.getOperand(2)};
4135 return MDNode::get(Context, Elts2);
4136 }
4137 // Create a MDNode <MD, MD, offset 0>
4138 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
4139 Type::getInt64Ty(Context)))};
4140 return MDNode::get(Context, Elts);
4141}
4142
4143Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4144 Instruction *&Temp) {
4145 if (Opc != Instruction::BitCast)
4146 return nullptr;
4147
4148 Temp = nullptr;
4149 Type *SrcTy = V->getType();
4150 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4151 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4152 LLVMContext &Context = V->getContext();
4153
4154 // We have no information about target data layout, so we assume that
4155 // the maximum pointer size is 64bit.
4156 Type *MidTy = Type::getInt64Ty(Context);
4157 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4158
4159 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4160 }
4161
4162 return nullptr;
4163}
4164
4165Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4166 if (Opc != Instruction::BitCast)
4167 return nullptr;
4168
4169 Type *SrcTy = C->getType();
4170 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4171 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4172 LLVMContext &Context = C->getContext();
4173
4174 // We have no information about target data layout, so we assume that
4175 // the maximum pointer size is 64bit.
4176 Type *MidTy = Type::getInt64Ty(Context);
4177
4178 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
4179 DestTy);
4180 }
4181
4182 return nullptr;
4183}
4184
4185/// Check the debug info version number, if it is out-dated, drop the debug
4186/// info. Return true if module is modified.
4187bool llvm::UpgradeDebugInfo(Module &M) {
4188 unsigned Version = getDebugMetadataVersionFromModule(M);
4189 if (Version == DEBUG_METADATA_VERSION) {
4190 bool BrokenDebugInfo = false;
4191 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4192 report_fatal_error("Broken module found, compilation aborted!");
4193 if (!BrokenDebugInfo)
4194 // Everything is ok.
4195 return false;
4196 else {
4197 // Diagnose malformed debug info.
4198 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
4199 M.getContext().diagnose(Diag);
4200 }
4201 }
4202 bool Modified = StripDebugInfo(M);
4203 if (Modified && Version != DEBUG_METADATA_VERSION) {
4204 // Diagnose a version mismatch.
4205 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4206 M.getContext().diagnose(DiagVersion);
4207 }
4208 return Modified;
4209}
4210
4211/// This checks for objc retain release marker which should be upgraded. It
4212/// returns true if module is modified.
4213static bool UpgradeRetainReleaseMarker(Module &M) {
4214 bool Changed = false;
4215 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4216 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4217 if (ModRetainReleaseMarker) {
4218 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4219 if (Op) {
4220 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4221 if (ID) {
4222 SmallVector<StringRef, 4> ValueComp;
4223 ID->getString().split(ValueComp, "#");
4224 if (ValueComp.size() == 2) {
4225 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4226 ID = MDString::get(M.getContext(), NewValue);
4227 }
4228 M.addModuleFlag(Module::Error, MarkerKey, ID);
4229 M.eraseNamedMetadata(ModRetainReleaseMarker);
4230 Changed = true;
4231 }
4232 }
4233 }
4234 return Changed;
4235}
4236
4237void llvm::UpgradeARCRuntime(Module &M) {
4238 // This lambda converts normal function calls to ARC runtime functions to
4239 // intrinsic calls.
4240 auto UpgradeToIntrinsic = [&](const char *OldFunc,
4241 llvm::Intrinsic::ID IntrinsicFunc) {
4242 Function *Fn = M.getFunction(OldFunc);
4243
4244 if (!Fn)
4245 return;
4246
4247 Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4248
4249 for (User *U : make_early_inc_range(Fn->users())) {
4250 CallInst *CI = dyn_cast<CallInst>(U);
4251 if (!CI || CI->getCalledFunction() != Fn)
4252 continue;
4253
4254 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4255 FunctionType *NewFuncTy = NewFn->getFunctionType();
4256 SmallVector<Value *, 2> Args;
4257
4258 // Don't upgrade the intrinsic if it's not valid to bitcast the return
4259 // value to the return type of the old function.
4260 if (NewFuncTy->getReturnType() != CI->getType() &&
4261 !CastInst::castIsValid(Instruction::BitCast, CI,
4262 NewFuncTy->getReturnType()))
4263 continue;
4264
4265 bool InvalidCast = false;
4266
4267 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
4268 Value *Arg = CI->getArgOperand(I);
4269
4270 // Bitcast argument to the parameter type of the new function if it's
4271 // not a variadic argument.
4272 if (I < NewFuncTy->getNumParams()) {
4273 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4274 // to the parameter type of the new function.
4275 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4276 NewFuncTy->getParamType(I))) {
4277 InvalidCast = true;
4278 break;
4279 }
4280 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4281 }
4282 Args.push_back(Arg);
4283 }
4284
4285 if (InvalidCast)
4286 continue;
4287
4288 // Create a call instruction that calls the new function.
4289 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4290 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4291 NewCall->takeName(CI);
4292
4293 // Bitcast the return value back to the type of the old call.
4294 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4295
4296 if (!CI->use_empty())
4297 CI->replaceAllUsesWith(NewRetVal);
4298 CI->eraseFromParent();
4299 }
4300
4301 if (Fn->use_empty())
4302 Fn->eraseFromParent();
4303 };
4304
4305 // Unconditionally convert a call to "clang.arc.use" to a call to
4306 // "llvm.objc.clang.arc.use".
4307 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
4308
4309 // Upgrade the retain release marker. If there is no need to upgrade
4310 // the marker, that means either the module is already new enough to contain
4311 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
4312 if (!UpgradeRetainReleaseMarker(M))
4313 return;
4314
4315 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
4316 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
4317 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
4318 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
4319 {"objc_autoreleaseReturnValue",
4320 llvm::Intrinsic::objc_autoreleaseReturnValue},
4321 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
4322 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
4323 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
4324 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
4325 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
4326 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
4327 {"objc_release", llvm::Intrinsic::objc_release},
4328 {"objc_retain", llvm::Intrinsic::objc_retain},
4329 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
4330 {"objc_retainAutoreleaseReturnValue",
4331 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
4332 {"objc_retainAutoreleasedReturnValue",
4333 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
4334 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
4335 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
4336 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
4337 {"objc_unsafeClaimAutoreleasedReturnValue",
4338 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
4339 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
4340 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
4341 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
4342 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
4343 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
4344 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
4345 {"objc_arc_annotation_topdown_bbstart",
4346 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
4347 {"objc_arc_annotation_topdown_bbend",
4348 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
4349 {"objc_arc_annotation_bottomup_bbstart",
4350 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
4351 {"objc_arc_annotation_bottomup_bbend",
4352 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
4353
4354 for (auto &I : RuntimeFuncs)
4355 UpgradeToIntrinsic(I.first, I.second);
4356}
4357
4358bool llvm::UpgradeModuleFlags(Module &M) {
4359 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4360 if (!ModFlags)
4361 return false;
4362
4363 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4364 bool HasSwiftVersionFlag = false;
4365 uint8_t SwiftMajorVersion, SwiftMinorVersion;
4366 uint32_t SwiftABIVersion;
4367 auto Int8Ty = Type::getInt8Ty(M.getContext());
4368 auto Int32Ty = Type::getInt32Ty(M.getContext());
4369
4370 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4371 MDNode *Op = ModFlags->getOperand(I);
4372 if (Op->getNumOperands() != 3)
4373 continue;
4374 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4375 if (!ID)
4376 continue;
4377 if (ID->getString() == "Objective-C Image Info Version")
4378 HasObjCFlag = true;
4379 if (ID->getString() == "Objective-C Class Properties")
4380 HasClassProperties = true;
4381 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
4382 // field was Error and now they are Max.
4383 if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
4384 if (auto *Behavior =
4385 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4386 if (Behavior->getLimitedValue() == Module::Error) {
4387 Type *Int32Ty = Type::getInt32Ty(M.getContext());
4388 Metadata *Ops[3] = {
4389 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
4390 MDString::get(M.getContext(), ID->getString()),
4391 Op->getOperand(2)};
4392 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4393 Changed = true;
4394 }
4395 }
4396 }
4397
4398 // Upgrade branch protection and return address signing module flags. The
4399 // module flag behavior for these fields were Error and now they are Min.
4400 if (ID->getString() == "branch-target-enforcement" ||
4401 ID->getString().startswith("sign-return-address")) {
4402 if (auto *Behavior =
4403 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4404 if (Behavior->getLimitedValue() == Module::Error) {
4405 Type *Int32Ty = Type::getInt32Ty(M.getContext());
4406 Metadata *Ops[3] = {
4407 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
4408 Op->getOperand(1), Op->getOperand(2)};
4409 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4410 Changed = true;
4411 }
4412 }
4413 }
4414
4415 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
4416 // section name so that llvm-lto will not complain about mismatching
4417 // module flags that is functionally the same.
4418 if (ID->getString() == "Objective-C Image Info Section") {
4419 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
4420 SmallVector<StringRef, 4> ValueComp;
4421 Value->getString().split(ValueComp, " ");
4422 if (ValueComp.size() != 1) {
4423 std::string NewValue;
4424 for (auto &S : ValueComp)
4425 NewValue += S.str();
4426 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
4427 MDString::get(M.getContext(), NewValue)};
4428 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4429 Changed = true;
4430 }
4431 }
4432 }
4433
4434 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
4435 // If the higher bits are set, it adds new module flag for swift info.
4436 if (ID->getString() == "Objective-C Garbage Collection") {
4437 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
4438 if (Md) {
4439 assert(Md->getValue() && "Expected non-empty metadata")(static_cast <bool> (Md->getValue() && "Expected non-empty metadata"
) ? void (0) : __assert_fail ("Md->getValue() && \"Expected non-empty metadata\""
, "llvm/lib/IR/AutoUpgrade.cpp", 4439, __extension__ __PRETTY_FUNCTION__
))
;
4440 auto Type = Md->getValue()->getType();
4441 if (Type == Int8Ty)
4442 continue;
4443 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
4444 if ((Val & 0xff) != Val) {
4445 HasSwiftVersionFlag = true;
4446 SwiftABIVersion = (Val & 0xff00) >> 8;
4447 SwiftMajorVersion = (Val & 0xff000000) >> 24;
4448 SwiftMinorVersion = (Val & 0xff0000) >> 16;
4449 }
4450 Metadata *Ops[3] = {
4451 ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
4452 Op->getOperand(1),
4453 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
4454 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4455 Changed = true;
4456 }
4457 }
4458 }
4459
4460 // "Objective-C Class Properties" is recently added for Objective-C. We
4461 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
4462 // flag of value 0, so we can correclty downgrade this flag when trying to
4463 // link an ObjC bitcode without this module flag with an ObjC bitcode with
4464 // this module flag.
4465 if (HasObjCFlag && !HasClassProperties) {
4466 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
4467 (uint32_t)0);
4468 Changed = true;
4469 }
4470
4471 if (HasSwiftVersionFlag) {
4472 M.addModuleFlag(Module::Error, "Swift ABI Version",
4473 SwiftABIVersion);
4474 M.addModuleFlag(Module::Error, "Swift Major Version",
4475 ConstantInt::get(Int8Ty, SwiftMajorVersion));
4476 M.addModuleFlag(Module::Error, "Swift Minor Version",
4477 ConstantInt::get(Int8Ty, SwiftMinorVersion));
4478 Changed = true;
4479 }
4480
4481 return Changed;
4482}
4483
4484void llvm::UpgradeSectionAttributes(Module &M) {
4485 auto TrimSpaces = [](StringRef Section) -> std::string {
4486 SmallVector<StringRef, 5> Components;
4487 Section.split(Components, ',');
4488
4489 SmallString<32> Buffer;
4490 raw_svector_ostream OS(Buffer);
4491
4492 for (auto Component : Components)
4493 OS << ',' << Component.trim();
4494
4495 return std::string(OS.str().substr(1));
4496 };
4497
4498 for (auto &GV : M.globals()) {
4499 if (!GV.hasSection())
4500 continue;
4501
4502 StringRef Section = GV.getSection();
4503
4504 if (!Section.startswith("__DATA, __objc_catlist"))
4505 continue;
4506
4507 // __DATA, __objc_catlist, regular, no_dead_strip
4508 // __DATA,__objc_catlist,regular,no_dead_strip
4509 GV.setSection(TrimSpaces(Section));
4510 }
4511}
4512
4513namespace {
4514// Prior to LLVM 10.0, the strictfp attribute could be used on individual
4515// callsites within a function that did not also have the strictfp attribute.
4516// Since 10.0, if strict FP semantics are needed within a function, the
4517// function must have the strictfp attribute and all calls within the function
4518// must also have the strictfp attribute. This latter restriction is
4519// necessary to prevent unwanted libcall simplification when a function is
4520// being cloned (such as for inlining).
4521//
4522// The "dangling" strictfp attribute usage was only used to prevent constant
4523// folding and other libcall simplification. The nobuiltin attribute on the
4524// callsite has the same effect.
4525struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
4526 StrictFPUpgradeVisitor() = default;
4527
4528 void visitCallBase(CallBase &Call) {
4529 if (!Call.isStrictFP())
4530 return;
4531 if (isa<ConstrainedFPIntrinsic>(&Call))
4532 return;
4533 // If we get here, the caller doesn't have the strictfp attribute
4534 // but this callsite does. Replace the strictfp attribute with nobuiltin.
4535 Call.removeFnAttr(Attribute::StrictFP);
4536 Call.addFnAttr(Attribute::NoBuiltin);
4537 }
4538};
4539} // namespace
4540
4541void llvm::UpgradeFunctionAttributes(Function &F) {
4542 // If a function definition doesn't have the strictfp attribute,
4543 // convert any callsite strictfp attributes to nobuiltin.
4544 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
4545 StrictFPUpgradeVisitor SFPV;
4546 SFPV.visit(F);
4547 }
4548
4549 // Remove all incompatibile attributes from function.
4550 F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
4551 for (auto &Arg : F.args())
4552 Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
4553}
4554
4555static bool isOldLoopArgument(Metadata *MD) {
4556 auto *T = dyn_cast_or_null<MDTuple>(MD);
4557 if (!T)
4558 return false;
4559 if (T->getNumOperands() < 1)
4560 return false;
4561 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
4562 if (!S)
4563 return false;
4564 return S->getString().startswith("llvm.vectorizer.");
4565}
4566
4567static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
4568 StringRef OldPrefix = "llvm.vectorizer.";
4569 assert(OldTag.startswith(OldPrefix) && "Expected old prefix")(static_cast <bool> (OldTag.startswith(OldPrefix) &&
"Expected old prefix") ? void (0) : __assert_fail ("OldTag.startswith(OldPrefix) && \"Expected old prefix\""
, "llvm/lib/IR/AutoUpgrade.cpp", 4569, __extension__ __PRETTY_FUNCTION__
))
;
4570
4571 if (OldTag == "llvm.vectorizer.unroll")
4572 return MDString::get(C, "llvm.loop.interleave.count");
4573
4574 return MDString::get(
4575 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
4576 .str());
4577}
4578
4579static Metadata *upgradeLoopArgument(Metadata *MD) {
4580 auto *T = dyn_cast_or_null<MDTuple>(MD);
4581 if (!T)
4582 return MD;
4583 if (T->getNumOperands() < 1)
4584 return MD;
4585 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
4586 if (!OldTag)
4587 return MD;
4588 if (!OldTag->getString().startswith("llvm.vectorizer."))
4589 return MD;
4590
4591 // This has an old tag. Upgrade it.
4592 SmallVector<Metadata *, 8> Ops;
4593 Ops.reserve(T->getNumOperands());
4594 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
4595 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
4596 Ops.push_back(T->getOperand(I));
4597
4598 return MDTuple::get(T->getContext(), Ops);
4599}
4600
4601MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
4602 auto *T = dyn_cast<MDTuple>(&N);
4603 if (!T)
4604 return &N;
4605
4606 if (none_of(T->operands(), isOldLoopArgument))
4607 return &N;
4608
4609 SmallVector<Metadata *, 8> Ops;
4610 Ops.reserve(T->getNumOperands());
4611 for (Metadata *MD : T->operands())
4612 Ops.push_back(upgradeLoopArgument(MD));
4613
4614 return MDTuple::get(T->getContext(), Ops);
4615}
4616
4617std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
4618 Triple T(TT);
4619 // For AMDGPU we uprgrade older DataLayouts to include the default globals
4620 // address space of 1.
4621 if (T.isAMDGPU() && !DL.contains("-G") && !DL.startswith("G")) {
4622 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
4623 }
4624
4625 std::string Res = DL.str();
4626 if (!T.isX86())
4627 return Res;
4628
4629 // If the datalayout matches the expected format, add pointer size address
4630 // spaces to the datalayout.
4631 std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
4632 if (!DL.contains(AddrSpaces)) {
4633 SmallVector<StringRef, 4> Groups;
4634 Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
4635 if (R.match(DL, &Groups))
4636 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
4637 }
4638
4639 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
4640 // Raising the alignment is safe because Clang did not produce f80 values in
4641 // the MSVC environment before this upgrade was added.
4642 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
4643 StringRef Ref = Res;
4644 auto I = Ref.find("-f80:32-");
4645 if (I != StringRef::npos)
4646 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
4647 }
4648
4649 return Res;
4650}
4651
4652void llvm::UpgradeAttributes(AttrBuilder &B) {
4653 StringRef FramePointer;
4654 Attribute A = B.getAttribute("no-frame-pointer-elim");
4655 if (A.isValid()) {
4656 // The value can be "true" or "false".
4657 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
4658 B.removeAttribute("no-frame-pointer-elim");
4659 }
4660 if (B.contains("no-frame-pointer-elim-non-leaf")) {
4661 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
4662 if (FramePointer != "all")
4663 FramePointer = "non-leaf";
4664 B.removeAttribute("no-frame-pointer-elim-non-leaf");
4665 }
4666 if (!FramePointer.empty())
4667 B.addAttribute("frame-pointer", FramePointer);
4668
4669 A = B.getAttribute("null-pointer-is-valid");
4670 if (A.isValid()) {
4671 // The value can be "true" or "false".
4672 bool NullPointerIsValid = A.getValueAsString() == "true";
4673 B.removeAttribute("null-pointer-is-valid");
4674 if (NullPointerIsValid)
4675 B.addAttribute(Attribute::NullPointerIsValid);
4676 }
4677}