LCOV - code coverage report
Current view: top level - lib/IR - AutoUpgrade.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1584 1663 95.2 %
Date: 2018-07-13 00:08:38 Functions: 39 39 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file implements the auto-upgrade helper functions.
      11             : // This is where deprecated IR intrinsics and other IR features are updated to
      12             : // current specifications.
      13             : //
      14             : //===----------------------------------------------------------------------===//
      15             : 
      16             : #include "llvm/IR/AutoUpgrade.h"
      17             : #include "llvm/ADT/StringSwitch.h"
      18             : #include "llvm/IR/Constants.h"
      19             : #include "llvm/IR/DIBuilder.h"
      20             : #include "llvm/IR/DebugInfo.h"
      21             : #include "llvm/IR/DiagnosticInfo.h"
      22             : #include "llvm/IR/Function.h"
      23             : #include "llvm/IR/IRBuilder.h"
      24             : #include "llvm/IR/Instruction.h"
      25             : #include "llvm/IR/IntrinsicInst.h"
      26             : #include "llvm/IR/LLVMContext.h"
      27             : #include "llvm/IR/Module.h"
      28             : #include "llvm/IR/Verifier.h"
      29             : #include "llvm/Support/ErrorHandling.h"
      30             : #include "llvm/Support/Regex.h"
      31             : #include <cstring>
      32             : using namespace llvm;
      33             : 
      34        1122 : static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
      35             : 
      36             : // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
      37             : // changed their type from v4f32 to v2i64.
      38         211 : static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
      39             :                                   Function *&NewFn) {
      40             :   // Check whether this is an old version of the function, which received
      41             :   // v4f32 arguments.
      42         211 :   Type *Arg0Type = F->getFunctionType()->getParamType(0);
      43         211 :   if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
      44             :     return false;
      45             : 
      46             :   // Yes, it's old, replace it with new version.
      47           8 :   rename(F);
      48           8 :   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
      49           8 :   return true;
      50             : }
      51             : 
      52             : // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
      53             : // arguments have changed their type from i32 to i8.
      54         245 : static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
      55             :                                              Function *&NewFn) {
      56             :   // Check that the last argument is an i32.
      57             :   Type *LastArgType = F->getFunctionType()->getParamType(
      58         490 :      F->getFunctionType()->getNumParams() - 1);
      59         245 :   if (!LastArgType->isIntegerTy(32))
      60             :     return false;
      61             : 
      62             :   // Move this function aside and map down.
      63          41 :   rename(F);
      64             :   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
      65          41 :   return true;
      66             : }
      67             : 
      68       16175 : static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
      69             :   // All of the intrinsics matches below should be marked with which llvm
      70             :   // version started autoupgrading them. At some point in the future we would
      71             :   // like to use this information to remove upgrade code for some older
      72             :   // intrinsics. It is currently undecided how we will determine that future
      73             :   // point.
      74             :   if (Name=="ssse3.pabs.b.128" || // Added in 6.0
      75             :       Name=="ssse3.pabs.w.128" || // Added in 6.0
      76             :       Name=="ssse3.pabs.d.128" || // Added in 6.0
      77             :       Name.startswith("fma4.vfmadd.s") || // Added in 7.0
      78             :       Name.startswith("fma.vfmadd.") || // Added in 7.0
      79             :       Name.startswith("fma.vfmsub.") || // Added in 7.0
      80             :       Name.startswith("fma.vfmaddsub.") || // Added in 7.0
      81             :       Name.startswith("fma.vfmsubadd.") || // Added in 7.0
      82             :       Name.startswith("fma.vfnmadd.") || // Added in 7.0
      83             :       Name.startswith("fma.vfnmsub.") || // Added in 7.0
      84             :       Name.startswith("avx512.mask.vfmadd.p") || // Added in 7.0
      85             :       Name.startswith("avx512.mask.vfnmadd.p") || // Added in 7.0
      86             :       Name.startswith("avx512.mask.vfnmsub.p") || // Added in 7.0
      87             :       Name.startswith("avx512.mask3.vfmadd.p") || // Added in 7.0
      88             :       Name.startswith("avx512.maskz.vfmadd.p") || // Added in 7.0
      89             :       Name.startswith("avx512.mask3.vfmsub.p") || // Added in 7.0
      90             :       Name.startswith("avx512.mask3.vfnmsub.p") || // Added in 7.0
      91             :       Name.startswith("avx512.mask.vfmaddsub.p") || // Added in 7.0
      92             :       Name.startswith("avx512.maskz.vfmaddsub.p") || // Added in 7.0
      93             :       Name.startswith("avx512.mask3.vfmaddsub.p") || // Added in 7.0
      94             :       Name.startswith("avx512.mask3.vfmsubadd.p") || // Added in 7.0
      95             :       Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
      96             :       Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
      97             :       Name.startswith("avx512.kunpck") || //added in 6.0 
      98             :       Name.startswith("avx2.pabs.") || // Added in 6.0
      99             :       Name.startswith("avx512.mask.pabs.") || // Added in 6.0
     100             :       Name.startswith("avx512.broadcastm") || // Added in 6.0
     101             :       Name == "sse.sqrt.ss" || // Added in 7.0
     102             :       Name == "sse2.sqrt.sd" || // Added in 7.0
     103             :       Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
     104             :       Name.startswith("avx.sqrt.p") || // Added in 7.0
     105             :       Name.startswith("sse2.sqrt.p") || // Added in 7.0
     106             :       Name.startswith("sse.sqrt.p") || // Added in 7.0
     107             :       Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
     108             :       Name.startswith("sse2.pcmpeq.") || // Added in 3.1
     109             :       Name.startswith("sse2.pcmpgt.") || // Added in 3.1
     110             :       Name.startswith("avx2.pcmpeq.") || // Added in 3.1
     111             :       Name.startswith("avx2.pcmpgt.") || // Added in 3.1
     112             :       Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
     113             :       Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
     114             :       Name.startswith("avx.vperm2f128.") || // Added in 6.0
     115             :       Name == "avx2.vperm2i128" || // Added in 6.0
     116             :       Name == "sse.add.ss" || // Added in 4.0
     117             :       Name == "sse2.add.sd" || // Added in 4.0
     118             :       Name == "sse.sub.ss" || // Added in 4.0
     119             :       Name == "sse2.sub.sd" || // Added in 4.0
     120             :       Name == "sse.mul.ss" || // Added in 4.0
     121             :       Name == "sse2.mul.sd" || // Added in 4.0
     122             :       Name == "sse.div.ss" || // Added in 4.0
     123             :       Name == "sse2.div.sd" || // Added in 4.0
     124             :       Name == "sse41.pmaxsb" || // Added in 3.9
     125             :       Name == "sse2.pmaxs.w" || // Added in 3.9
     126             :       Name == "sse41.pmaxsd" || // Added in 3.9
     127             :       Name == "sse2.pmaxu.b" || // Added in 3.9
     128             :       Name == "sse41.pmaxuw" || // Added in 3.9
     129             :       Name == "sse41.pmaxud" || // Added in 3.9
     130             :       Name == "sse41.pminsb" || // Added in 3.9
     131             :       Name == "sse2.pmins.w" || // Added in 3.9
     132             :       Name == "sse41.pminsd" || // Added in 3.9
     133             :       Name == "sse2.pminu.b" || // Added in 3.9
     134             :       Name == "sse41.pminuw" || // Added in 3.9
     135             :       Name == "sse41.pminud" || // Added in 3.9
     136             :       Name == "avx512.kand.w" || // Added in 7.0
     137             :       Name == "avx512.kandn.w" || // Added in 7.0
     138             :       Name == "avx512.knot.w" || // Added in 7.0
     139             :       Name == "avx512.kor.w" || // Added in 7.0
     140             :       Name == "avx512.kxor.w" || // Added in 7.0
     141             :       Name == "avx512.kxnor.w" || // Added in 7.0
     142             :       Name == "avx512.kortestc.w" || // Added in 7.0
     143             :       Name == "avx512.kortestz.w" || // Added in 7.0
     144             :       Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
     145             :       Name.startswith("avx2.pmax") || // Added in 3.9
     146             :       Name.startswith("avx2.pmin") || // Added in 3.9
     147             :       Name.startswith("avx512.mask.pmax") || // Added in 4.0
     148             :       Name.startswith("avx512.mask.pmin") || // Added in 4.0
     149             :       Name.startswith("avx2.vbroadcast") || // Added in 3.8
     150             :       Name.startswith("avx2.pbroadcast") || // Added in 3.8
     151             :       Name.startswith("avx.vpermil.") || // Added in 3.1
     152             :       Name.startswith("sse2.pshuf") || // Added in 3.9
     153             :       Name.startswith("avx512.pbroadcast") || // Added in 3.9
     154             :       Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
     155             :       Name.startswith("avx512.mask.movddup") || // Added in 3.9
     156             :       Name.startswith("avx512.mask.movshdup") || // Added in 3.9
     157             :       Name.startswith("avx512.mask.movsldup") || // Added in 3.9
     158             :       Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
     159             :       Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
     160             :       Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
     161             :       Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
     162             :       Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
     163             :       Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
     164             :       Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
     165             :       Name.startswith("avx512.mask.punpckl") || // Added in 3.9
     166             :       Name.startswith("avx512.mask.punpckh") || // Added in 3.9
     167             :       Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
     168             :       Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
     169             :       Name.startswith("avx512.mask.pand.") || // Added in 3.9
     170             :       Name.startswith("avx512.mask.pandn.") || // Added in 3.9
     171             :       Name.startswith("avx512.mask.por.") || // Added in 3.9
     172             :       Name.startswith("avx512.mask.pxor.") || // Added in 3.9
     173             :       Name.startswith("avx512.mask.and.") || // Added in 3.9
     174             :       Name.startswith("avx512.mask.andn.") || // Added in 3.9
     175             :       Name.startswith("avx512.mask.or.") || // Added in 3.9
     176             :       Name.startswith("avx512.mask.xor.") || // Added in 3.9
     177             :       Name.startswith("avx512.mask.padd.") || // Added in 4.0
     178             :       Name.startswith("avx512.mask.psub.") || // Added in 4.0
     179             :       Name.startswith("avx512.mask.pmull.") || // Added in 4.0
     180             :       Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
     181             :       Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
     182             :       Name == "avx512.mask.cvtudq2ps.128" || // Added in 7.0
     183             :       Name == "avx512.mask.cvtudq2ps.256" || // Added in 7.0
     184             :       Name == "avx512.mask.cvtqq2pd.128" || // Added in 7.0
     185             :       Name == "avx512.mask.cvtqq2pd.256" || // Added in 7.0
     186             :       Name == "avx512.mask.cvtuqq2pd.128" || // Added in 7.0
     187             :       Name == "avx512.mask.cvtuqq2pd.256" || // Added in 7.0
     188             :       Name == "avx512.mask.cvtdq2ps.128" || // Added in 7.0
     189             :       Name == "avx512.mask.cvtdq2ps.256" || // Added in 7.0
     190             :       Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
     191             :       Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
     192             :       Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
     193             :       Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
     194             :       Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
     195             :       Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
     196             :       Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
     197             :       Name == "avx512.cvtusi2sd" || // Added in 7.0
     198             :       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
     199             :       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
     200             :       Name == "sse2.pmulu.dq" || // Added in 7.0
     201             :       Name == "sse41.pmuldq" || // Added in 7.0
     202             :       Name == "avx2.pmulu.dq" || // Added in 7.0
     203             :       Name == "avx2.pmul.dq" || // Added in 7.0
     204             :       Name == "avx512.pmulu.dq.512" || // Added in 7.0
     205             :       Name == "avx512.pmul.dq.512" || // Added in 7.0
     206             :       Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
     207             :       Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
     208             :       Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
     209             :       Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
     210             :       Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
     211             :       Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
     212             :       Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
     213             :       Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
     214             :       Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
     215             :       Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
     216             :       Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
     217             :       Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
     218             :       Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
     219             :       Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
     220             :       Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
     221             :       Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
     222             :       Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
     223             :       Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
     224             :       Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
     225             :       Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
     226             :       Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
     227             :       Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
     228             :       Name.startswith("avx512.mask.psll.d") || // Added in 4.0
     229             :       Name.startswith("avx512.mask.psll.q") || // Added in 4.0
     230             :       Name.startswith("avx512.mask.psll.w") || // Added in 4.0
     231             :       Name.startswith("avx512.mask.psra.d") || // Added in 4.0
     232             :       Name.startswith("avx512.mask.psra.q") || // Added in 4.0
     233             :       Name.startswith("avx512.mask.psra.w") || // Added in 4.0
     234             :       Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
     235             :       Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
     236             :       Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
     237             :       Name.startswith("avx512.mask.pslli") || // Added in 4.0
     238             :       Name.startswith("avx512.mask.psrai") || // Added in 4.0
     239             :       Name.startswith("avx512.mask.psrli") || // Added in 4.0
     240             :       Name.startswith("avx512.mask.psllv") || // Added in 4.0
     241             :       Name.startswith("avx512.mask.psrav") || // Added in 4.0
     242             :       Name.startswith("avx512.mask.psrlv") || // Added in 4.0
     243             :       Name.startswith("sse41.pmovsx") || // Added in 3.8
     244             :       Name.startswith("sse41.pmovzx") || // Added in 3.9
     245             :       Name.startswith("avx2.pmovsx") || // Added in 3.9
     246             :       Name.startswith("avx2.pmovzx") || // Added in 3.9
     247             :       Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
     248             :       Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
     249             :       Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
     250             :       Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
     251             :       Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
     252             :       Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
     253             :       Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
     254             :       Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
     255             :       Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
     256             :       Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
     257             :       Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
     258             :       Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
     259             :       Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
     260             :       Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
     261             :       Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
     262             :       Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
     263             :       Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
     264             :       Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
     265             :       Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
     266             :       Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
     267             :       Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
     268             :       Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
     269             :       Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
     270             :       Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
     271             :       Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
     272             :       Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
     273             :       Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
     274             :       Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
     275             :       Name.startswith("avx512.mask.prorv.") || // Added in 7.0
     276             :       Name.startswith("avx512.mask.pror.") || // Added in 7.0
     277             :       Name.startswith("avx512.mask.prolv.") || // Added in 7.0
     278             :       Name.startswith("avx512.mask.prol.") || // Added in 7.0
     279             :       Name == "sse.cvtsi2ss" || // Added in 7.0
     280             :       Name == "sse.cvtsi642ss" || // Added in 7.0
     281             :       Name == "sse2.cvtsi2sd" || // Added in 7.0
     282             :       Name == "sse2.cvtsi642sd" || // Added in 7.0
     283             :       Name == "sse2.cvtss2sd" || // Added in 7.0
     284             :       Name == "sse2.cvtdq2pd" || // Added in 3.9
     285             :       Name == "sse2.cvtdq2ps" || // Added in 7.0
     286             :       Name == "sse2.cvtps2pd" || // Added in 3.9
     287             :       Name == "avx.cvtdq2.pd.256" || // Added in 3.9
     288             :       Name == "avx.cvtdq2.ps.256" || // Added in 7.0
     289             :       Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
     290             :       Name.startswith("avx.vinsertf128.") || // Added in 3.7
     291             :       Name == "avx2.vinserti128" || // Added in 3.7
     292             :       Name.startswith("avx512.mask.insert") || // Added in 4.0
     293             :       Name.startswith("avx.vextractf128.") || // Added in 3.7
     294             :       Name == "avx2.vextracti128" || // Added in 3.7
     295             :       Name.startswith("avx512.mask.vextract") || // Added in 4.0
     296             :       Name.startswith("sse4a.movnt.") || // Added in 3.9
     297             :       Name.startswith("avx.movnt.") || // Added in 3.2
     298             :       Name.startswith("avx512.storent.") || // Added in 3.9
     299             :       Name == "sse41.movntdqa" || // Added in 5.0
     300             :       Name == "avx2.movntdqa" || // Added in 5.0
     301             :       Name == "avx512.movntdqa" || // Added in 5.0
     302             :       Name == "sse2.storel.dq" || // Added in 3.9
     303             :       Name.startswith("sse.storeu.") || // Added in 3.9
     304             :       Name.startswith("sse2.storeu.") || // Added in 3.9
     305             :       Name.startswith("avx.storeu.") || // Added in 3.9
     306             :       Name.startswith("avx512.mask.storeu.") || // Added in 3.9
     307             :       Name.startswith("avx512.mask.store.p") || // Added in 3.9
     308             :       Name.startswith("avx512.mask.store.b.") || // Added in 3.9
     309             :       Name.startswith("avx512.mask.store.w.") || // Added in 3.9
     310             :       Name.startswith("avx512.mask.store.d.") || // Added in 3.9
     311             :       Name.startswith("avx512.mask.store.q.") || // Added in 3.9
     312             :       Name == "avx512.mask.store.ss" || // Added in 7.0
     313             :       Name.startswith("avx512.mask.loadu.") || // Added in 3.9
     314             :       Name.startswith("avx512.mask.load.") || // Added in 3.9
     315             :       Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
     316             :       Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
     317             :       Name == "sse42.crc32.64.8" || // Added in 3.4
     318             :       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
     319             :       Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
     320             :       Name.startswith("avx512.mask.palignr.") || // Added in 3.9
     321             :       Name.startswith("avx512.mask.valign.") || // Added in 4.0
     322             :       Name.startswith("sse2.psll.dq") || // Added in 3.7
     323             :       Name.startswith("sse2.psrl.dq") || // Added in 3.7
     324             :       Name.startswith("avx2.psll.dq") || // Added in 3.7
     325             :       Name.startswith("avx2.psrl.dq") || // Added in 3.7
     326             :       Name.startswith("avx512.psll.dq") || // Added in 3.9
     327             :       Name.startswith("avx512.psrl.dq") || // Added in 3.9
     328             :       Name == "sse41.pblendw" || // Added in 3.7
     329             :       Name.startswith("sse41.blendp") || // Added in 3.7
     330             :       Name.startswith("avx.blend.p") || // Added in 3.7
     331             :       Name == "avx2.pblendw" || // Added in 3.7
     332             :       Name.startswith("avx2.pblendd.") || // Added in 3.7
     333             :       Name.startswith("avx.vbroadcastf128") || // Added in 4.0
     334             :       Name == "avx2.vbroadcasti128" || // Added in 3.7
     335             :       Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
     336             :       Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
     337             :       Name == "xop.vpcmov" || // Added in 3.8
     338             :       Name == "xop.vpcmov.256" || // Added in 5.0
     339             :       Name.startswith("avx512.mask.move.s") || // Added in 4.0
     340             :       Name.startswith("avx512.cvtmask2") || // Added in 5.0
     341         192 :       (Name.startswith("xop.vpcom") && // Added in 3.2
     342         192 :        F->arg_size() == 2) ||
     343             :       Name.startswith("avx512.ptestm") || //Added in 6.0
     344             :       Name.startswith("avx512.ptestnm") || //Added in 6.0
     345             :       Name.startswith("sse2.pavg") || // Added in 6.0
     346             :       Name.startswith("avx2.pavg") || // Added in 6.0
     347             :       Name.startswith("avx512.mask.pavg")) // Added in 6.0
     348             :     return true;
     349             : 
     350             :   return false;
     351             : }
     352             : 
     353       16251 : static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
     354             :                                         Function *&NewFn) {
     355             :   // Only handle intrinsics that start with "x86.".
     356             :   if (!Name.startswith("x86."))
     357             :     return false;
     358             :   // Remove "x86." prefix.
     359       16175 :   Name = Name.substr(4);
     360             : 
     361       16175 :   if (ShouldUpgradeX86Intrinsic(F, Name)) {
     362        4195 :     NewFn = nullptr;
     363        4195 :     return true;
     364             :   }
     365             : 
     366             :   // SSE4.1 ptest functions may have an old signature.
     367             :   if (Name.startswith("sse41.ptest")) { // Added in 3.2
     368             :     if (Name.substr(11) == "c")
     369          84 :       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
     370             :     if (Name.substr(11) == "z")
     371          65 :       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
     372             :     if (Name.substr(11) == "nzc")
     373          62 :       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
     374             :   }
     375             :   // Several blend and other instructions with masks used the wrong number of
     376             :   // bits.
     377             :   if (Name == "sse41.insertps") // Added in 3.6
     378             :     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
     379          68 :                                             NewFn);
     380             :   if (Name == "sse41.dppd") // Added in 3.6
     381             :     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
     382          44 :                                             NewFn);
     383             :   if (Name == "sse41.dpps") // Added in 3.6
     384             :     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
     385          44 :                                             NewFn);
     386             :   if (Name == "sse41.mpsadbw") // Added in 3.6
     387             :     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
     388          44 :                                             NewFn);
     389             :   if (Name == "avx.dp.ps.256") // Added in 3.6
     390             :     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
     391          24 :                                             NewFn);
     392             :   if (Name == "avx2.mpsadbw") // Added in 3.6
     393             :     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
     394          21 :                                             NewFn);
     395             : 
     396             :   // frcz.ss/sd may need to have an argument dropped. Added in 3.2
     397           5 :   if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
     398           0 :     rename(F);
     399           0 :     NewFn = Intrinsic::getDeclaration(F->getParent(),
     400             :                                       Intrinsic::x86_xop_vfrcz_ss);
     401           0 :     return true;
     402             :   }
     403           5 :   if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
     404           0 :     rename(F);
     405           0 :     NewFn = Intrinsic::getDeclaration(F->getParent(),
     406             :                                       Intrinsic::x86_xop_vfrcz_sd);
     407           0 :     return true;
     408             :   }
     409             :   // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
     410             :   if (Name.startswith("xop.vpermil2")) { // Added in 3.9
     411          48 :     auto Idx = F->getFunctionType()->getParamType(2);
     412             :     if (Idx->isFPOrFPVectorTy()) {
     413           4 :       rename(F);
     414           4 :       unsigned IdxSize = Idx->getPrimitiveSizeInBits();
     415           4 :       unsigned EltSize = Idx->getScalarSizeInBits();
     416             :       Intrinsic::ID Permil2ID;
     417           4 :       if (EltSize == 64 && IdxSize == 128)
     418             :         Permil2ID = Intrinsic::x86_xop_vpermil2pd;
     419           3 :       else if (EltSize == 32 && IdxSize == 128)
     420             :         Permil2ID = Intrinsic::x86_xop_vpermil2ps;
     421           2 :       else if (EltSize == 64 && IdxSize == 256)
     422             :         Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
     423             :       else
     424             :         Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
     425           4 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
     426           4 :       return true;
     427             :     }
     428             :   }
     429             : 
     430             :   return false;
     431             : }
     432             : 
     433      361399 : static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     434             :   assert(F && "Illegal to upgrade a non-existent Function.");
     435             : 
     436             :   // Quickly eliminate it, if it's not a candidate.
     437      361399 :   StringRef Name = F->getName();
     438      361399 :   if (Name.size() <= 8 || !Name.startswith("llvm."))
     439             :     return false;
     440       47755 :   Name = Name.substr(5); // Strip off "llvm."
     441             : 
     442       47755 :   switch (Name[0]) {
     443             :   default: break;
     444             :   case 'a': {
     445             :     if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
     446           3 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
     447           6 :                                         F->arg_begin()->getType());
     448          26 :       return true;
     449             :     }
     450             :     if (Name.startswith("arm.neon.vclz")) {
     451             :       Type* args[2] = {
     452           2 :         F->arg_begin()->getType(),
     453           2 :         Type::getInt1Ty(F->getContext())
     454           4 :       };
     455             :       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
     456             :       // the end of the name. Change name from llvm.arm.neon.vclz.* to
     457             :       //  llvm.ctlz.*
     458           2 :       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
     459           4 :       NewFn = Function::Create(fType, F->getLinkage(),
     460           4 :                                "llvm.ctlz." + Name.substr(14), F->getParent());
     461             :       return true;
     462             :     }
     463             :     if (Name.startswith("arm.neon.vcnt")) {
     464           2 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
     465           4 :                                         F->arg_begin()->getType());
     466           2 :       return true;
     467             :     }
     468        7365 :     Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
     469        7365 :     if (vldRegex.match(Name)) {
     470             :       auto fArgs = F->getFunctionType()->params();
     471             :       SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
     472             :       // Can't use Intrinsic::getDeclaration here as the return types might
     473             :       // then only be structurally equal.
     474           7 :       FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
     475          14 :       NewFn = Function::Create(fType, F->getLinkage(),
     476           7 :                                "llvm." + Name + ".p0i8", F->getParent());
     477             :       return true;
     478             :     }
     479        7358 :     Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
     480        7358 :     if (vstRegex.match(Name)) {
     481             :       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
     482             :                                                 Intrinsic::arm_neon_vst2,
     483             :                                                 Intrinsic::arm_neon_vst3,
     484             :                                                 Intrinsic::arm_neon_vst4};
     485             : 
     486             :       static const Intrinsic::ID StoreLaneInts[] = {
     487             :         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
     488             :         Intrinsic::arm_neon_vst4lane
     489             :       };
     490             : 
     491             :       auto fArgs = F->getFunctionType()->params();
     492           7 :       Type *Tys[] = {fArgs[0], fArgs[1]};
     493           7 :       if (Name.find("lane") == StringRef::npos)
     494           4 :         NewFn = Intrinsic::getDeclaration(F->getParent(),
     495           4 :                                           StoreInts[fArgs.size() - 3], Tys);
     496             :       else
     497           3 :         NewFn = Intrinsic::getDeclaration(F->getParent(),
     498           3 :                                           StoreLaneInts[fArgs.size() - 5], Tys);
     499             :       return true;
     500             :     }
     501             :     if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
     502           2 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
     503           2 :       return true;
     504             :     }
     505        7349 :     break;
     506             :   }
     507             : 
     508             :   case 'c': {
     509         766 :     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
     510          39 :       rename(F);
     511          39 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
     512          78 :                                         F->arg_begin()->getType());
     513          39 :       return true;
     514             :     }
     515         577 :     if (Name.startswith("cttz.") && F->arg_size() == 1) {
     516          25 :       rename(F);
     517          25 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
     518          50 :                                         F->arg_begin()->getType());
     519          25 :       return true;
     520             :     }
     521             :     break;
     522             :   }
     523             :   case 'd': {
     524         513 :     if (Name == "dbg.value" && F->arg_size() == 4) {
     525         129 :       rename(F);
     526         129 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
     527         129 :       return true;
     528             :     }
     529             :     break;
     530             :   }
     531             :   case 'i':
     532             :   case 'l': {
     533             :     bool IsLifetimeStart = Name.startswith("lifetime.start");
     534             :     if (IsLifetimeStart || Name.startswith("invariant.start")) {
     535         417 :       Intrinsic::ID ID = IsLifetimeStart ?
     536             :         Intrinsic::lifetime_start : Intrinsic::invariant_start;
     537             :       auto Args = F->getFunctionType()->params();
     538         417 :       Type* ObjectPtr[1] = {Args[1]};
     539        1251 :       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
     540          74 :         rename(F);
     541          74 :         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
     542          74 :         return true;
     543             :       }
     544             :     }
     545             : 
     546             :     bool IsLifetimeEnd = Name.startswith("lifetime.end");
     547             :     if (IsLifetimeEnd || Name.startswith("invariant.end")) {
     548         381 :       Intrinsic::ID ID = IsLifetimeEnd ?
     549             :         Intrinsic::lifetime_end : Intrinsic::invariant_end;
     550             : 
     551             :       auto Args = F->getFunctionType()->params();
     552         762 :       Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
     553        1143 :       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
     554          55 :         rename(F);
     555          55 :         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
     556          55 :         return true;
     557             :       }
     558             :     }
     559             :     if (Name.startswith("invariant.group.barrier")) {
     560             :       // Rename invariant.group.barrier to launder.invariant.group
     561             :       auto Args = F->getFunctionType()->params();
     562           3 :       Type* ObjectPtr[1] = {Args[0]};
     563           3 :       rename(F);
     564           3 :       NewFn = Intrinsic::getDeclaration(F->getParent(),
     565             :           Intrinsic::launder_invariant_group, ObjectPtr);
     566             :       return true;
     567             : 
     568             :     }
     569             : 
     570             :     break;
     571             :   }
     572             :   case 'm': {
     573             :     if (Name.startswith("masked.load.")) {
     574         452 :       Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
     575         678 :       if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
     576          10 :         rename(F);
     577          10 :         NewFn = Intrinsic::getDeclaration(F->getParent(),
     578             :                                           Intrinsic::masked_load,
     579             :                                           Tys);
     580          10 :         return true;
     581             :       }
     582             :     }
     583             :     if (Name.startswith("masked.store.")) {
     584             :       auto Args = F->getFunctionType()->params();
     585         214 :       Type *Tys[] = { Args[0], Args[1] };
     586         642 :       if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
     587          10 :         rename(F);
     588          10 :         NewFn = Intrinsic::getDeclaration(F->getParent(),
     589             :                                           Intrinsic::masked_store,
     590             :                                           Tys);
     591          10 :         return true;
     592             :       }
     593             :     }
     594             :     // Renaming gather/scatter intrinsics with no address space overloading
     595             :     // to the new overload which includes an address space
     596             :     if (Name.startswith("masked.gather.")) {
     597         498 :       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
     598         747 :       if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
     599          44 :         rename(F);
     600          44 :         NewFn = Intrinsic::getDeclaration(F->getParent(),
     601             :                                           Intrinsic::masked_gather, Tys);
     602          44 :         return true;
     603             :       }
     604             :     }
     605             :     if (Name.startswith("masked.scatter.")) {
     606             :       auto Args = F->getFunctionType()->params();
     607         137 :       Type *Tys[] = {Args[0], Args[1]};
     608         411 :       if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
     609           4 :         rename(F);
     610           4 :         NewFn = Intrinsic::getDeclaration(F->getParent(),
     611             :                                           Intrinsic::masked_scatter, Tys);
     612           4 :         return true;
     613             :       }
     614             :     }
     615             :     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
     616             :     // alignment parameter to embedding the alignment as an attribute of
     617             :     // the pointer args.
     618         782 :     if (Name.startswith("memcpy.") && F->arg_size() == 5) {
     619          28 :       rename(F);
     620             :       // Get the types of dest, src, and len
     621          28 :       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
     622          28 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
     623             :                                         ParamTypes);
     624             :       return true;
     625             :     }
     626         135 :     if (Name.startswith("memmove.") && F->arg_size() == 5) {
     627           5 :       rename(F);
     628             :       // Get the types of dest, src, and len
     629           5 :       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
     630           5 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
     631             :                                         ParamTypes);
     632             :       return true;
     633             :     }
     634         408 :     if (Name.startswith("memset.") && F->arg_size() == 5) {
     635          46 :       rename(F);
     636             :       // Get the types of dest, and len
     637             :       const auto *FT = F->getFunctionType();
     638             :       Type *ParamTypes[2] = {
     639          46 :           FT->getParamType(0), // Dest
     640             :           FT->getParamType(2)  // len
     641          92 :       };
     642          46 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
     643             :                                         ParamTypes);
     644             :       return true;
     645             :     }
     646             :     break;
     647             :   }
     648             :   case 'n': {
     649             :     if (Name.startswith("nvvm.")) {
     650         547 :       Name = Name.substr(5);
     651             : 
     652             :       // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
     653         547 :       Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
     654             :                               .Cases("brev32", "brev64", Intrinsic::bitreverse)
     655             :                               .Case("clz.i", Intrinsic::ctlz)
     656             :                               .Case("popc.i", Intrinsic::ctpop)
     657             :                               .Default(Intrinsic::not_intrinsic);
     658           8 :       if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
     659          16 :         NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
     660             :                                           {F->getReturnType()});
     661           8 :         return true;
     662             :       }
     663             : 
     664             :       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
     665             :       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
     666             :       //
     667             :       // TODO: We could add lohi.i2d.
     668         539 :       bool Expand = StringSwitch<bool>(Name)
     669             :                         .Cases("abs.i", "abs.ll", true)
     670             :                         .Cases("clz.ll", "popc.ll", "h2f", true)
     671             :                         .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
     672             :                         .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
     673             :                         .Default(false);
     674          26 :       if (Expand) {
     675          26 :         NewFn = nullptr;
     676          26 :         return true;
     677             :       }
     678             :     }
     679             :     break;
     680             :   }
     681             :   case 'o':
     682             :     // We only need to change the name to match the mangling including the
     683             :     // address space.
     684             :     if (Name.startswith("objectsize.")) {
     685         246 :       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
     686         246 :       if (F->arg_size() == 2 ||
     687         335 :           F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
     688          36 :         rename(F);
     689          36 :         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
     690             :                                           Tys);
     691          36 :         return true;
     692             :       }
     693             :     }
     694             :     break;
     695             : 
     696             :   case 's':
     697             :     if (Name == "stackprotectorcheck") {
     698           6 :       NewFn = nullptr;
     699           6 :       return true;
     700             :     }
     701             :     break;
     702             : 
     703       16251 :   case 'x':
     704       16251 :     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
     705             :       return true;
     706             :   }
     707             :   // Remangle our intrinsic since we upgrade the mangling
     708       42936 :   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
     709       42936 :   if (Result != None) {
     710         103 :     NewFn = Result.getValue();
     711         103 :     return true;
     712             :   }
     713             : 
     714             :   //  This may not belong here. This function is effectively being overloaded
     715             :   //  to both detect an intrinsic which needs upgrading, and to provide the
     716             :   //  upgraded form of the intrinsic. We should perhaps have two separate
     717             :   //  functions for this.
     718             :   return false;
     719             : }
     720             : 
     721      361399 : bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
     722      361399 :   NewFn = nullptr;
     723      361399 :   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
     724             :   assert(F != NewFn && "Intrinsic function upgraded to the same function");
     725             : 
     726             :   // Upgrade intrinsic attributes.  This does not change the function.
     727      361399 :   if (NewFn)
     728             :     F = NewFn;
     729      361399 :   if (Intrinsic::ID id = F->getIntrinsicID())
     730       43220 :     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
     731      361399 :   return Upgraded;
     732             : }
     733             : 
     734       37289 : bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
     735             :   // Nothing to do yet.
     736       37289 :   return false;
     737             : }
     738             : 
     739             : // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
     740             : // to byte shuffles.
     741          30 : static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
     742             :                                          Value *Op, unsigned Shift) {
     743          30 :   Type *ResultTy = Op->getType();
     744          30 :   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
     745             : 
     746             :   // Bitcast from a 64-bit element type to a byte element type.
     747          60 :   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
     748          30 :   Op = Builder.CreateBitCast(Op, VecTy, "cast");
     749             : 
     750             :   // We'll be shuffling in zeroes.
     751          30 :   Value *Res = Constant::getNullValue(VecTy);
     752             : 
     753             :   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
     754             :   // we'll just return the zero vector.
     755          30 :   if (Shift < 16) {
     756             :     uint32_t Idxs[64];
     757             :     // 256/512-bit version is split into 2/4 16-byte lanes.
     758         142 :     for (unsigned l = 0; l != NumElts; l += 16)
     759        1848 :       for (unsigned i = 0; i != 16; ++i) {
     760         896 :         unsigned Idx = NumElts + i - Shift;
     761         896 :         if (Idx < NumElts)
     762         244 :           Idx -= NumElts - 16; // end of lane, switch operand.
     763         896 :         Idxs[l + i] = Idx + l;
     764             :       }
     765             : 
     766          30 :     Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
     767             :   }
     768             : 
     769             :   // Bitcast back to a 64-bit element type.
     770          30 :   return Builder.CreateBitCast(Res, ResultTy, "cast");
     771             : }
     772             : 
     773             : // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
     774             : // to byte shuffles.
     775         124 : static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
     776             :                                          unsigned Shift) {
     777         124 :   Type *ResultTy = Op->getType();
     778         124 :   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
     779             : 
     780             :   // Bitcast from a 64-bit element type to a byte element type.
     781         248 :   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
     782         124 :   Op = Builder.CreateBitCast(Op, VecTy, "cast");
     783             : 
     784             :   // We'll be shuffling in zeroes.
     785         124 :   Value *Res = Constant::getNullValue(VecTy);
     786             : 
     787             :   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
     788             :   // we'll just return the zero vector.
     789         124 :   if (Shift < 16) {
     790             :     uint32_t Idxs[64];
     791             :     // 256/512-bit version is split into 2/4 16-byte lanes.
     792         424 :     for (unsigned l = 0; l != NumElts; l += 16)
     793        4950 :       for (unsigned i = 0; i != 16; ++i) {
     794        2400 :         unsigned Idx = i + Shift;
     795        2400 :         if (Idx >= 16)
     796         660 :           Idx += NumElts - 16; // end of lane, switch operand.
     797        2400 :         Idxs[l + i] = Idx + l;
     798             :       }
     799             : 
     800         124 :     Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
     801             :   }
     802             : 
     803             :   // Bitcast back to a 64-bit element type.
     804         124 :   return Builder.CreateBitCast(Res, ResultTy, "cast");
     805             : }
     806             : 
     807        3773 : static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
     808             :                             unsigned NumElts) {
     809        7546 :   llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
     810        3773 :                              cast<IntegerType>(Mask->getType())->getBitWidth());
     811        3773 :   Mask = Builder.CreateBitCast(Mask, MaskTy);
     812             : 
     813             :   // If we have less than 8 elements, then the starting mask was an i8 and
     814             :   // we need to extract down to the right number of elements.
     815        3773 :   if (NumElts < 8) {
     816             :     uint32_t Indices[4];
     817        9256 :     for (unsigned i = 0; i != NumElts; ++i)
     818        4040 :       Indices[i] = i;
     819        1176 :     Mask = Builder.CreateShuffleVector(Mask, Mask,
     820             :                                        makeArrayRef(Indices, NumElts),
     821             :                                        "extract");
     822             :   }
     823             : 
     824        3773 :   return Mask;
     825             : }
     826             : 
     827        4563 : static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
     828             :                             Value *Op0, Value *Op1) {
     829             :   // If the mask is all ones just emit the align operation.
     830             :   if (const auto *C = dyn_cast<Constant>(Mask))
     831        1733 :     if (C->isAllOnesValue())
     832             :       return Op0;
     833             : 
     834        5660 :   Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
     835        2830 :   return Builder.CreateSelect(Mask, Op0, Op1);
     836             : }
     837             : 
     838             : // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
     839             : // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
     840             : // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
     841          42 : static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
     842             :                                         Value *Op1, Value *Shift,
     843             :                                         Value *Passthru, Value *Mask,
     844             :                                         bool IsVALIGN) {
     845          42 :   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
     846             : 
     847          42 :   unsigned NumElts = Op0->getType()->getVectorNumElements();
     848             :   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
     849             :   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
     850             :   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
     851             : 
     852             :   // Mask the immediate for VALIGN.
     853          42 :   if (IsVALIGN)
     854          24 :     ShiftVal &= (NumElts - 1);
     855             : 
     856             :   // If palignr is shifting the pair of vectors more than the size of two
     857             :   // lanes, emit zero.
     858          42 :   if (ShiftVal >= 32)
     859           0 :     return llvm::Constant::getNullValue(Op0->getType());
     860             : 
     861             :   // If palignr is shifting the pair of input vectors more than one lane,
     862             :   // but less than two lanes, convert to shifting in zeroes.
     863          42 :   if (ShiftVal > 16) {
     864           0 :     ShiftVal -= 16;
     865             :     Op1 = Op0;
     866           0 :     Op0 = llvm::Constant::getNullValue(Op0->getType());
     867             :   }
     868             : 
     869             :   uint32_t Indices[64];
     870             :   // 256-bit palignr operates on 128-bit lanes so we need to handle that
     871         174 :   for (unsigned l = 0; l < NumElts; l += 16) {
     872        2178 :     for (unsigned i = 0; i != 16; ++i) {
     873        1056 :       unsigned Idx = ShiftVal + i;
     874        1056 :       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
     875          84 :         Idx += NumElts - 16; // End of lane, switch operand.
     876        1056 :       Indices[l + i] = Idx + l;
     877             :     }
     878             :   }
     879             : 
     880          42 :   Value *Align = Builder.CreateShuffleVector(Op1, Op0,
     881             :                                              makeArrayRef(Indices, NumElts),
     882          42 :                                              "palignr");
     883             : 
     884          42 :   return EmitX86Select(Builder, Mask, Align, Passthru);
     885             : }
     886             : 
     887         120 : static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
     888             :                                  Value *Ptr, Value *Data, Value *Mask,
     889             :                                  bool Aligned) {
     890             :   // Cast the pointer to the right type.
     891         120 :   Ptr = Builder.CreateBitCast(Ptr,
     892         120 :                               llvm::PointerType::getUnqual(Data->getType()));
     893             :   unsigned Align =
     894         168 :     Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
     895             : 
     896             :   // If the mask is all ones just emit a regular store.
     897             :   if (const auto *C = dyn_cast<Constant>(Mask))
     898          60 :     if (C->isAllOnesValue())
     899          60 :       return Builder.CreateAlignedStore(Data, Ptr, Align);
     900             : 
     901             :   // Convert the mask from an integer type to a vector of i1.
     902          60 :   unsigned NumElts = Data->getType()->getVectorNumElements();
     903          60 :   Mask = getX86MaskVec(Builder, Mask, NumElts);
     904          60 :   return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
     905             : }
     906             : 
     907         180 : static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
     908             :                                 Value *Ptr, Value *Passthru, Value *Mask,
     909             :                                 bool Aligned) {
     910             :   // Cast the pointer to the right type.
     911         180 :   Ptr = Builder.CreateBitCast(Ptr,
     912         180 :                              llvm::PointerType::getUnqual(Passthru->getType()));
     913             :   unsigned Align =
     914         252 :     Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
     915             : 
     916             :   // If the mask is all ones just emit a regular store.
     917             :   if (const auto *C = dyn_cast<Constant>(Mask))
     918          60 :     if (C->isAllOnesValue())
     919          60 :       return Builder.CreateAlignedLoad(Ptr, Align);
     920             : 
     921             :   // Convert the mask from an integer type to a vector of i1.
     922         120 :   unsigned NumElts = Passthru->getType()->getVectorNumElements();
     923         120 :   Mask = getX86MaskVec(Builder, Mask, NumElts);
     924         240 :   return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
     925             : }
     926             : 
     927         246 : static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
     928         246 :   Value *Op0 = CI.getArgOperand(0);
     929         246 :   llvm::Type *Ty = Op0->getType();
     930         246 :   Value *Zero = llvm::Constant::getNullValue(Ty);
     931         246 :   Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
     932         246 :   Value *Neg = Builder.CreateNeg(Op0);
     933         246 :   Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
     934             : 
     935         246 :   if (CI.getNumArgOperands() == 3)
     936          48 :     Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
     937             : 
     938         246 :   return Res;
     939             : }
     940             : 
     941        1074 : static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
     942             :                                ICmpInst::Predicate Pred) {
     943        1074 :   Value *Op0 = CI.getArgOperand(0);
     944             :   Value *Op1 = CI.getArgOperand(1);
     945        1074 :   Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
     946        1074 :   Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
     947             : 
     948        1074 :   if (CI.getNumArgOperands() == 4)
     949         192 :     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
     950             : 
     951        1074 :   return Res;
     952             : }
     953             : 
     954         322 : static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
     955         322 :   Type *Ty = CI.getType();
     956             : 
     957             :   // Arguments have a vXi32 type so cast to vXi64.
     958         644 :   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
     959         322 :   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
     960             : 
     961         322 :   if (IsSigned) {
     962             :     // Shift left then arithmetic shift right.
     963         146 :     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
     964         146 :     LHS = Builder.CreateShl(LHS, ShiftAmt);
     965         146 :     LHS = Builder.CreateAShr(LHS, ShiftAmt);
     966         146 :     RHS = Builder.CreateShl(RHS, ShiftAmt);
     967         146 :     RHS = Builder.CreateAShr(RHS, ShiftAmt);
     968             :   } else {
     969             :     // Clear the upper bits.
     970         176 :     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
     971         176 :     LHS = Builder.CreateAnd(LHS, Mask);
     972         176 :     RHS = Builder.CreateAnd(RHS, Mask);
     973             :   }
     974             : 
     975         322 :   Value *Res = Builder.CreateMul(LHS, RHS);
     976             : 
     977         322 :   if (CI.getNumArgOperands() == 4)
     978         108 :     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
     979             : 
     980         322 :   return Res;
     981             : }
     982             : 
     983             : // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
     984        1026 : static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
     985             :                                      Value *Mask) {
     986        2052 :   unsigned NumElts = Vec->getType()->getVectorNumElements();
     987        1026 :   if (Mask) {
     988             :     const auto *C = dyn_cast<Constant>(Mask);
     989         505 :     if (!C || !C->isAllOnesValue())
     990         495 :       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
     991             :   }
     992             : 
     993        1026 :   if (NumElts < 8) {
     994             :     uint32_t Indices[8];
     995        2033 :     for (unsigned i = 0; i != NumElts; ++i)
     996         884 :       Indices[i] = i;
     997        2737 :     for (unsigned i = NumElts; i != 8; ++i)
     998        1236 :       Indices[i] = NumElts + i % NumElts;
     999         265 :     Vec = Builder.CreateShuffleVector(Vec,
    1000         265 :                                       Constant::getNullValue(Vec->getType()),
    1001             :                                       Indices);
    1002             :   }
    1003        3078 :   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
    1004             : }
    1005             : 
    1006         864 : static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
    1007             :                                    unsigned CC, bool Signed) {
    1008         864 :   Value *Op0 = CI.getArgOperand(0);
    1009         864 :   unsigned NumElts = Op0->getType()->getVectorNumElements();
    1010             : 
    1011             :   Value *Cmp;
    1012         864 :   if (CC == 3) {
    1013         192 :     Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
    1014         768 :   } else if (CC == 7) {
    1015         192 :     Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
    1016             :   } else {
    1017             :     ICmpInst::Predicate Pred;
    1018         672 :     switch (CC) {
    1019           0 :     default: llvm_unreachable("Unknown condition code");
    1020             :     case 0: Pred = ICmpInst::ICMP_EQ;  break;
    1021          96 :     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
    1022          96 :     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
    1023          96 :     case 4: Pred = ICmpInst::ICMP_NE;  break;
    1024          96 :     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
    1025         144 :     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
    1026             :     }
    1027         672 :     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
    1028             :   }
    1029             : 
    1030         864 :   Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
    1031             : 
    1032         864 :   return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
    1033             : }
    1034             : 
    1035             : // Replace a masked intrinsic with an older unmasked intrinsic.
    1036         500 : static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
    1037             :                                     Intrinsic::ID IID) {
    1038        1000 :   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
    1039        1500 :   Value *Rep = Builder.CreateCall(Intrin,
    1040         500 :                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
    1041         500 :   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
    1042             : }
    1043             : 
    1044           8 : static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
    1045           8 :   Value* A = CI.getArgOperand(0);
    1046             :   Value* B = CI.getArgOperand(1);
    1047             :   Value* Src = CI.getArgOperand(2);
    1048             :   Value* Mask = CI.getArgOperand(3);
    1049             : 
    1050          16 :   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
    1051           8 :   Value* Cmp = Builder.CreateIsNotNull(AndNode);
    1052           8 :   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
    1053           8 :   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
    1054           8 :   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
    1055           8 :   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
    1056             : }
    1057             : 
    1058             : 
    1059          24 : static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
    1060          24 :   Value* Op = CI.getArgOperand(0);
    1061          24 :   Type* ReturnOp = CI.getType();
    1062             :   unsigned NumElts = CI.getType()->getVectorNumElements();
    1063          24 :   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
    1064          24 :   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
    1065             : }
    1066             : 
    1067             : // Replace intrinsic with unmasked version and a select.
    1068         736 : static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
    1069             :                                       CallInst &CI, Value *&Rep) {
    1070         736 :   Name = Name.substr(12); // Remove avx512.mask.
    1071             : 
    1072         736 :   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
    1073         736 :   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
    1074             :   Intrinsic::ID IID;
    1075             :   if (Name.startswith("max.p")) {
    1076          12 :     if (VecWidth == 128 && EltWidth == 32)
    1077             :       IID = Intrinsic::x86_sse_max_ps;
    1078           6 :     else if (VecWidth == 128 && EltWidth == 64)
    1079             :       IID = Intrinsic::x86_sse2_max_pd;
    1080           6 :     else if (VecWidth == 256 && EltWidth == 32)
    1081             :       IID = Intrinsic::x86_avx_max_ps_256;
    1082           0 :     else if (VecWidth == 256 && EltWidth == 64)
    1083             :       IID = Intrinsic::x86_avx_max_pd_256;
    1084             :     else
    1085           0 :       llvm_unreachable("Unexpected intrinsic");
    1086             :   } else if (Name.startswith("min.p")) {
    1087          12 :     if (VecWidth == 128 && EltWidth == 32)
    1088             :       IID = Intrinsic::x86_sse_min_ps;
    1089           6 :     else if (VecWidth == 128 && EltWidth == 64)
    1090             :       IID = Intrinsic::x86_sse2_min_pd;
    1091           6 :     else if (VecWidth == 256 && EltWidth == 32)
    1092             :       IID = Intrinsic::x86_avx_min_ps_256;
    1093           0 :     else if (VecWidth == 256 && EltWidth == 64)
    1094             :       IID = Intrinsic::x86_avx_min_pd_256;
    1095             :     else
    1096           0 :       llvm_unreachable("Unexpected intrinsic");
    1097             :   } else if (Name.startswith("pshuf.b.")) {
    1098          28 :     if (VecWidth == 128)
    1099             :       IID = Intrinsic::x86_ssse3_pshuf_b_128;
    1100          24 :     else if (VecWidth == 256)
    1101             :       IID = Intrinsic::x86_avx2_pshuf_b;
    1102          20 :     else if (VecWidth == 512)
    1103             :       IID = Intrinsic::x86_avx512_pshuf_b_512;
    1104             :     else
    1105           0 :       llvm_unreachable("Unexpected intrinsic");
    1106             :   } else if (Name.startswith("pmul.hr.sw.")) {
    1107          12 :     if (VecWidth == 128)
    1108             :       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
    1109           8 :     else if (VecWidth == 256)
    1110             :       IID = Intrinsic::x86_avx2_pmul_hr_sw;
    1111           4 :     else if (VecWidth == 512)
    1112             :       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
    1113             :     else
    1114           0 :       llvm_unreachable("Unexpected intrinsic");
    1115             :   } else if (Name.startswith("pmulh.w.")) {
    1116          12 :     if (VecWidth == 128)
    1117             :       IID = Intrinsic::x86_sse2_pmulh_w;
    1118           8 :     else if (VecWidth == 256)
    1119             :       IID = Intrinsic::x86_avx2_pmulh_w;
    1120           4 :     else if (VecWidth == 512)
    1121             :       IID = Intrinsic::x86_avx512_pmulh_w_512;
    1122             :     else
    1123           0 :       llvm_unreachable("Unexpected intrinsic");
    1124             :   } else if (Name.startswith("pmulhu.w.")) {
    1125          12 :     if (VecWidth == 128)
    1126             :       IID = Intrinsic::x86_sse2_pmulhu_w;
    1127           8 :     else if (VecWidth == 256)
    1128             :       IID = Intrinsic::x86_avx2_pmulhu_w;
    1129           4 :     else if (VecWidth == 512)
    1130             :       IID = Intrinsic::x86_avx512_pmulhu_w_512;
    1131             :     else
    1132           0 :       llvm_unreachable("Unexpected intrinsic");
    1133             :   } else if (Name.startswith("pmaddw.d.")) {
    1134          12 :     if (VecWidth == 128)
    1135             :       IID = Intrinsic::x86_sse2_pmadd_wd;
    1136           8 :     else if (VecWidth == 256)
    1137             :       IID = Intrinsic::x86_avx2_pmadd_wd;
    1138           4 :     else if (VecWidth == 512)
    1139             :       IID = Intrinsic::x86_avx512_pmaddw_d_512;
    1140             :     else
    1141           0 :       llvm_unreachable("Unexpected intrinsic");
    1142             :   } else if (Name.startswith("pmaddubs.w.")) {
    1143          12 :     if (VecWidth == 128)
    1144             :       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
    1145           8 :     else if (VecWidth == 256)
    1146             :       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
    1147           4 :     else if (VecWidth == 512)
    1148             :       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
    1149             :     else
    1150           0 :       llvm_unreachable("Unexpected intrinsic");
    1151             :   } else if (Name.startswith("packsswb.")) {
    1152          36 :     if (VecWidth == 128)
    1153             :       IID = Intrinsic::x86_sse2_packsswb_128;
    1154          24 :     else if (VecWidth == 256)
    1155             :       IID = Intrinsic::x86_avx2_packsswb;
    1156          12 :     else if (VecWidth == 512)
    1157             :       IID = Intrinsic::x86_avx512_packsswb_512;
    1158             :     else
    1159           0 :       llvm_unreachable("Unexpected intrinsic");
    1160             :   } else if (Name.startswith("packssdw.")) {
    1161          54 :     if (VecWidth == 128)
    1162             :       IID = Intrinsic::x86_sse2_packssdw_128;
    1163          36 :     else if (VecWidth == 256)
    1164             :       IID = Intrinsic::x86_avx2_packssdw;
    1165          18 :     else if (VecWidth == 512)
    1166             :       IID = Intrinsic::x86_avx512_packssdw_512;
    1167             :     else
    1168           0 :       llvm_unreachable("Unexpected intrinsic");
    1169             :   } else if (Name.startswith("packuswb.")) {
    1170          36 :     if (VecWidth == 128)
    1171             :       IID = Intrinsic::x86_sse2_packuswb_128;
    1172          24 :     else if (VecWidth == 256)
    1173             :       IID = Intrinsic::x86_avx2_packuswb;
    1174          12 :     else if (VecWidth == 512)
    1175             :       IID = Intrinsic::x86_avx512_packuswb_512;
    1176             :     else
    1177           0 :       llvm_unreachable("Unexpected intrinsic");
    1178             :   } else if (Name.startswith("packusdw.")) {
    1179          54 :     if (VecWidth == 128)
    1180             :       IID = Intrinsic::x86_sse41_packusdw;
    1181          36 :     else if (VecWidth == 256)
    1182             :       IID = Intrinsic::x86_avx2_packusdw;
    1183          18 :     else if (VecWidth == 512)
    1184             :       IID = Intrinsic::x86_avx512_packusdw_512;
    1185             :     else
    1186           0 :       llvm_unreachable("Unexpected intrinsic");
    1187             :   } else if (Name.startswith("vpermilvar.")) {
    1188          46 :     if (VecWidth == 128 && EltWidth == 32)
    1189             :       IID = Intrinsic::x86_avx_vpermilvar_ps;
    1190          40 :     else if (VecWidth == 128 && EltWidth == 64)
    1191             :       IID = Intrinsic::x86_avx_vpermilvar_pd;
    1192          34 :     else if (VecWidth == 256 && EltWidth == 32)
    1193             :       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
    1194          28 :     else if (VecWidth == 256 && EltWidth == 64)
    1195             :       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
    1196          22 :     else if (VecWidth == 512 && EltWidth == 32)
    1197             :       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
    1198           6 :     else if (VecWidth == 512 && EltWidth == 64)
    1199             :       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
    1200             :     else
    1201           0 :       llvm_unreachable("Unexpected intrinsic");
    1202             :   } else if (Name == "cvtpd2dq.256") {
    1203             :     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
    1204             :   } else if (Name == "cvtpd2ps.256") {
    1205             :     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
    1206             :   } else if (Name == "cvttpd2dq.256") {
    1207             :     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
    1208             :   } else if (Name == "cvttps2dq.128") {
    1209             :     IID = Intrinsic::x86_sse2_cvttps2dq;
    1210             :   } else if (Name == "cvttps2dq.256") {
    1211             :     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
    1212             :   } else if (Name.startswith("permvar.")) {
    1213             :     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
    1214          84 :     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
    1215             :       IID = Intrinsic::x86_avx2_permps;
    1216          78 :     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
    1217             :       IID = Intrinsic::x86_avx2_permd;
    1218          72 :     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
    1219             :       IID = Intrinsic::x86_avx512_permvar_df_256;
    1220          66 :     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
    1221             :       IID = Intrinsic::x86_avx512_permvar_di_256;
    1222          60 :     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
    1223             :       IID = Intrinsic::x86_avx512_permvar_sf_512;
    1224          54 :     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
    1225             :       IID = Intrinsic::x86_avx512_permvar_si_512;
    1226          48 :     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
    1227             :       IID = Intrinsic::x86_avx512_permvar_df_512;
    1228          42 :     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
    1229             :       IID = Intrinsic::x86_avx512_permvar_di_512;
    1230          36 :     else if (VecWidth == 128 && EltWidth == 16)
    1231             :       IID = Intrinsic::x86_avx512_permvar_hi_128;
    1232          30 :     else if (VecWidth == 256 && EltWidth == 16)
    1233             :       IID = Intrinsic::x86_avx512_permvar_hi_256;
    1234          24 :     else if (VecWidth == 512 && EltWidth == 16)
    1235             :       IID = Intrinsic::x86_avx512_permvar_hi_512;
    1236          18 :     else if (VecWidth == 128 && EltWidth == 8)
    1237             :       IID = Intrinsic::x86_avx512_permvar_qi_128;
    1238          12 :     else if (VecWidth == 256 && EltWidth == 8)
    1239             :       IID = Intrinsic::x86_avx512_permvar_qi_256;
    1240           6 :     else if (VecWidth == 512 && EltWidth == 8)
    1241             :       IID = Intrinsic::x86_avx512_permvar_qi_512;
    1242             :     else
    1243           0 :       llvm_unreachable("Unexpected intrinsic");
    1244             :   } else if (Name.startswith("dbpsadbw.")) {
    1245          18 :     if (VecWidth == 128)
    1246             :       IID = Intrinsic::x86_avx512_dbpsadbw_128;
    1247          12 :     else if (VecWidth == 256)
    1248             :       IID = Intrinsic::x86_avx512_dbpsadbw_256;
    1249           6 :     else if (VecWidth == 512)
    1250             :       IID = Intrinsic::x86_avx512_dbpsadbw_512;
    1251             :     else
    1252           0 :       llvm_unreachable("Unexpected intrinsic");
    1253             :   } else if (Name.startswith("vpshld.")) {
    1254          52 :     if (VecWidth == 128 && Name[7] == 'q')
    1255             :       IID = Intrinsic::x86_avx512_vpshld_q_128;
    1256          44 :     else if (VecWidth == 128 && Name[7] == 'd')
    1257             :       IID = Intrinsic::x86_avx512_vpshld_d_128;
    1258          32 :     else if (VecWidth == 128 && Name[7] == 'w')
    1259             :       IID = Intrinsic::x86_avx512_vpshld_w_128;
    1260          36 :     else if (VecWidth == 256 && Name[7] == 'q')
    1261             :       IID = Intrinsic::x86_avx512_vpshld_q_256;
    1262          28 :     else if (VecWidth == 256 && Name[7] == 'd')
    1263             :       IID = Intrinsic::x86_avx512_vpshld_d_256;
    1264          20 :     else if (VecWidth == 256 && Name[7] == 'w')
    1265             :       IID = Intrinsic::x86_avx512_vpshld_w_256;
    1266          24 :     else if (VecWidth == 512 && Name[7] == 'q')
    1267             :       IID = Intrinsic::x86_avx512_vpshld_q_512;
    1268          16 :     else if (VecWidth == 512 && Name[7] == 'd')
    1269             :       IID = Intrinsic::x86_avx512_vpshld_d_512;
    1270           8 :     else if (VecWidth == 512 && Name[7] == 'w')
    1271             :       IID = Intrinsic::x86_avx512_vpshld_w_512;
    1272             :     else
    1273           0 :       llvm_unreachable("Unexpected intrinsic");
    1274             :   } else if (Name.startswith("vpshrd.")) {
    1275          52 :     if (VecWidth == 128 && Name[7] == 'q')
    1276             :       IID = Intrinsic::x86_avx512_vpshrd_q_128;
    1277          44 :     else if (VecWidth == 128 && Name[7] == 'd')
    1278             :       IID = Intrinsic::x86_avx512_vpshrd_d_128;
    1279          32 :     else if (VecWidth == 128 && Name[7] == 'w')
    1280             :       IID = Intrinsic::x86_avx512_vpshrd_w_128;
    1281          36 :     else if (VecWidth == 256 && Name[7] == 'q')
    1282             :       IID = Intrinsic::x86_avx512_vpshrd_q_256;
    1283          28 :     else if (VecWidth == 256 && Name[7] == 'd')
    1284             :       IID = Intrinsic::x86_avx512_vpshrd_d_256;
    1285          20 :     else if (VecWidth == 256 && Name[7] == 'w')
    1286             :       IID = Intrinsic::x86_avx512_vpshrd_w_256;
    1287          24 :     else if (VecWidth == 512 && Name[7] == 'q')
    1288             :       IID = Intrinsic::x86_avx512_vpshrd_q_512;
    1289          16 :     else if (VecWidth == 512 && Name[7] == 'd')
    1290             :       IID = Intrinsic::x86_avx512_vpshrd_d_512;
    1291           8 :     else if (VecWidth == 512 && Name[7] == 'w')
    1292             :       IID = Intrinsic::x86_avx512_vpshrd_w_512;
    1293             :     else
    1294           0 :       llvm_unreachable("Unexpected intrinsic");
    1295             :   } else if (Name.startswith("prorv.")) {
    1296          64 :     if (VecWidth == 128 && EltWidth == 32)
    1297             :       IID = Intrinsic::x86_avx512_prorv_d_128;
    1298          58 :     else if (VecWidth == 256 && EltWidth == 32)
    1299             :       IID = Intrinsic::x86_avx512_prorv_d_256;
    1300          52 :     else if (VecWidth == 512 && EltWidth == 32)
    1301             :       IID = Intrinsic::x86_avx512_prorv_d_512;
    1302          32 :     else if (VecWidth == 128 && EltWidth == 64)
    1303             :       IID = Intrinsic::x86_avx512_prorv_q_128;
    1304          26 :     else if (VecWidth == 256 && EltWidth == 64)
    1305             :       IID = Intrinsic::x86_avx512_prorv_q_256;
    1306          20 :     else if (VecWidth == 512 && EltWidth == 64)
    1307             :       IID = Intrinsic::x86_avx512_prorv_q_512;
    1308             :     else
    1309           0 :       llvm_unreachable("Unexpected intrinsic");
    1310             :   } else if (Name.startswith("prolv.")) {
    1311          64 :     if (VecWidth == 128 && EltWidth == 32)
    1312             :       IID = Intrinsic::x86_avx512_prolv_d_128;
    1313          58 :     else if (VecWidth == 256 && EltWidth == 32)
    1314             :       IID = Intrinsic::x86_avx512_prolv_d_256;
    1315          52 :     else if (VecWidth == 512 && EltWidth == 32)
    1316             :       IID = Intrinsic::x86_avx512_prolv_d_512;
    1317          32 :     else if (VecWidth == 128 && EltWidth == 64)
    1318             :       IID = Intrinsic::x86_avx512_prolv_q_128;
    1319          26 :     else if (VecWidth == 256 && EltWidth == 64)
    1320             :       IID = Intrinsic::x86_avx512_prolv_q_256;
    1321          20 :     else if (VecWidth == 512 && EltWidth == 64)
    1322             :       IID = Intrinsic::x86_avx512_prolv_q_512;
    1323             :     else
    1324           0 :       llvm_unreachable("Unexpected intrinsic");
    1325             :   } else if (Name.startswith("pror.")) {
    1326          36 :     if (VecWidth == 128 && EltWidth == 32)
    1327             :       IID = Intrinsic::x86_avx512_pror_d_128;
    1328          30 :     else if (VecWidth == 256 && EltWidth == 32)
    1329             :       IID = Intrinsic::x86_avx512_pror_d_256;
    1330          24 :     else if (VecWidth == 512 && EltWidth == 32)
    1331             :       IID = Intrinsic::x86_avx512_pror_d_512;
    1332          18 :     else if (VecWidth == 128 && EltWidth == 64)
    1333             :       IID = Intrinsic::x86_avx512_pror_q_128;
    1334          12 :     else if (VecWidth == 256 && EltWidth == 64)
    1335             :       IID = Intrinsic::x86_avx512_pror_q_256;
    1336           6 :     else if (VecWidth == 512 && EltWidth == 64)
    1337             :       IID = Intrinsic::x86_avx512_pror_q_512;
    1338             :     else
    1339           0 :       llvm_unreachable("Unexpected intrinsic");
    1340             :   } else if (Name.startswith("prol.")) {
    1341          36 :     if (VecWidth == 128 && EltWidth == 32)
    1342             :       IID = Intrinsic::x86_avx512_prol_d_128;
    1343          30 :     else if (VecWidth == 256 && EltWidth == 32)
    1344             :       IID = Intrinsic::x86_avx512_prol_d_256;
    1345          24 :     else if (VecWidth == 512 && EltWidth == 32)
    1346             :       IID = Intrinsic::x86_avx512_prol_d_512;
    1347          18 :     else if (VecWidth == 128 && EltWidth == 64)
    1348             :       IID = Intrinsic::x86_avx512_prol_q_128;
    1349          12 :     else if (VecWidth == 256 && EltWidth == 64)
    1350             :       IID = Intrinsic::x86_avx512_prol_q_256;
    1351           6 :     else if (VecWidth == 512 && EltWidth == 64)
    1352             :       IID = Intrinsic::x86_avx512_prol_q_512;
    1353             :     else
    1354           0 :       llvm_unreachable("Unexpected intrinsic");
    1355             :   } else
    1356             :     return false;
    1357             : 
    1358        1472 :   SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
    1359        2208 :                                CI.arg_operands().end());
    1360             :   Args.pop_back();
    1361             :   Args.pop_back();
    1362        2208 :   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
    1363             :                            Args);
    1364             :   unsigned NumArgs = CI.getNumArgOperands();
    1365        2208 :   Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
    1366             :                       CI.getArgOperand(NumArgs - 2));
    1367             :   return true;
    1368             : }
    1369             : 
    1370             : /// Upgrade comment in call to inline asm that represents an objc retain release
    1371             : /// marker.
    1372          70 : void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
    1373             :   size_t Pos;
    1374           1 :   if (AsmStr->find("mov\tfp") == 0 &&
    1375          71 :       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
    1376             :       (Pos = AsmStr->find("# marker")) != std::string::npos) {
    1377           1 :     AsmStr->replace(Pos, 1, ";");
    1378             :   }
    1379          70 :   return;
    1380             : }
    1381             : 
    1382             : /// Upgrade a call to an old intrinsic. All argument and return casting must be
    1383             : /// provided to seamlessly integrate with existing context.
    1384       10875 : void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
    1385       10875 :   Function *F = CI->getCalledFunction();
    1386       10875 :   LLVMContext &C = CI->getContext();
    1387             :   IRBuilder<> Builder(C);
    1388       10875 :   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
    1389             : 
    1390             :   assert(F && "Intrinsic call is not direct?");
    1391             : 
    1392       10875 :   if (!NewFn) {
    1393             :     // Get the Function's name.
    1394        9594 :     StringRef Name = F->getName();
    1395             : 
    1396             :     assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
    1397        9594 :     Name = Name.substr(5);
    1398             : 
    1399             :     bool IsX86 = Name.startswith("x86.");
    1400             :     if (IsX86)
    1401        9562 :       Name = Name.substr(4);
    1402             :     bool IsNVVM = Name.startswith("nvvm.");
    1403             :     if (IsNVVM)
    1404          26 :       Name = Name.substr(5);
    1405             : 
    1406        9594 :     if (IsX86 && Name.startswith("sse4a.movnt.")) {
    1407          14 :       Module *M = F->getParent();
    1408             :       SmallVector<Metadata *, 1> Elts;
    1409          14 :       Elts.push_back(
    1410          42 :           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
    1411             :       MDNode *Node = MDNode::get(C, Elts);
    1412             : 
    1413          14 :       Value *Arg0 = CI->getArgOperand(0);
    1414             :       Value *Arg1 = CI->getArgOperand(1);
    1415             : 
    1416             :       // Nontemporal (unaligned) store of the 0'th element of the float/double
    1417             :       // vector.
    1418          14 :       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
    1419             :       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
    1420          14 :       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
    1421             :       Value *Extract =
    1422          14 :           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
    1423             : 
    1424             :       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
    1425          28 :       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
    1426             : 
    1427             :       // Remove intrinsic.
    1428          14 :       CI->eraseFromParent();
    1429             :       return;
    1430             :     }
    1431             : 
    1432        9580 :     if (IsX86 && (Name.startswith("avx.movnt.") ||
    1433             :                   Name.startswith("avx512.storent."))) {
    1434          18 :       Module *M = F->getParent();
    1435             :       SmallVector<Metadata *, 1> Elts;
    1436          18 :       Elts.push_back(
    1437          54 :           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
    1438             :       MDNode *Node = MDNode::get(C, Elts);
    1439             : 
    1440          18 :       Value *Arg0 = CI->getArgOperand(0);
    1441             :       Value *Arg1 = CI->getArgOperand(1);
    1442             : 
    1443             :       // Convert the type of the pointer to a pointer to the stored type.
    1444          18 :       Value *BC = Builder.CreateBitCast(Arg0,
    1445          18 :                                         PointerType::getUnqual(Arg1->getType()),
    1446             :                                         "cast");
    1447          18 :       VectorType *VTy = cast<VectorType>(Arg1->getType());
    1448          18 :       StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
    1449             :                                                  VTy->getBitWidth() / 8);
    1450          36 :       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
    1451             : 
    1452             :       // Remove intrinsic.
    1453          18 :       CI->eraseFromParent();
    1454             :       return;
    1455             :     }
    1456             : 
    1457        9562 :     if (IsX86 && Name == "sse2.storel.dq") {
    1458           6 :       Value *Arg0 = CI->getArgOperand(0);
    1459             :       Value *Arg1 = CI->getArgOperand(1);
    1460             : 
    1461           6 :       Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
    1462           6 :       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
    1463           6 :       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
    1464           6 :       Value *BC = Builder.CreateBitCast(Arg0,
    1465           6 :                                         PointerType::getUnqual(Elt->getType()),
    1466             :                                         "cast");
    1467             :       Builder.CreateAlignedStore(Elt, BC, 1);
    1468             : 
    1469             :       // Remove intrinsic.
    1470           6 :       CI->eraseFromParent();
    1471           6 :       return;
    1472             :     }
    1473             : 
    1474        9556 :     if (IsX86 && (Name.startswith("sse.storeu.") ||
    1475             :                   Name.startswith("sse2.storeu.") ||
    1476             :                   Name.startswith("avx.storeu."))) {
    1477          85 :       Value *Arg0 = CI->getArgOperand(0);
    1478             :       Value *Arg1 = CI->getArgOperand(1);
    1479             : 
    1480          85 :       Arg0 = Builder.CreateBitCast(Arg0,
    1481          85 :                                    PointerType::getUnqual(Arg1->getType()),
    1482             :                                    "cast");
    1483             :       Builder.CreateAlignedStore(Arg1, Arg0, 1);
    1484             : 
    1485             :       // Remove intrinsic.
    1486          85 :       CI->eraseFromParent();
    1487          85 :       return;
    1488             :     }
    1489             : 
    1490        9471 :     if (IsX86 && Name == "avx512.mask.store.ss") {
    1491           0 :       Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
    1492           0 :       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
    1493             :                          Mask, false);
    1494             : 
    1495             :       // Remove intrinsic.
    1496           0 :       CI->eraseFromParent();
    1497           0 :       return;
    1498             :     }
    1499             : 
    1500        9471 :     if (IsX86 && (Name.startswith("avx512.mask.store"))) {
    1501             :       // "avx512.mask.storeu." or "avx512.mask.store."
    1502         120 :       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
    1503         240 :       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
    1504             :                          CI->getArgOperand(2), Aligned);
    1505             : 
    1506             :       // Remove intrinsic.
    1507         120 :       CI->eraseFromParent();
    1508         120 :       return;
    1509             :     }
    1510             : 
    1511             :     Value *Rep;
    1512             :     // Upgrade packed integer vector compare intrinsics to compare instructions.
    1513        9351 :     if (IsX86 && (Name.startswith("sse2.pcmp") ||
    1514             :                   Name.startswith("avx2.pcmp"))) {
    1515             :       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
    1516           1 :       bool CmpEq = Name[9] == 'e';
    1517           2 :       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
    1518             :                                CI->getArgOperand(0), CI->getArgOperand(1));
    1519           2 :       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
    1520        9350 :     } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
    1521          12 :       Type *ExtTy = Type::getInt32Ty(C);
    1522          24 :       if (CI->getOperand(0)->getType()->isIntegerTy(8))
    1523           6 :         ExtTy = Type::getInt64Ty(C);
    1524          12 :       unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
    1525          12 :                          ExtTy->getPrimitiveSizeInBits();
    1526          12 :       Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
    1527          12 :       Rep = Builder.CreateVectorSplat(NumElts, Rep);
    1528        9338 :     } else if (IsX86 && (Name == "sse.sqrt.ss" ||
    1529             :                          Name == "sse2.sqrt.sd")) {
    1530         138 :       Value *Vec = CI->getArgOperand(0);
    1531         138 :       Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
    1532         276 :       Function *Intr = Intrinsic::getDeclaration(F->getParent(),
    1533         414 :                                                  Intrinsic::sqrt, Elt0->getType());
    1534         138 :       Elt0 = Builder.CreateCall(Intr, Elt0);
    1535         138 :       Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
    1536        9200 :     } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
    1537             :                          Name.startswith("sse2.sqrt.p") ||
    1538             :                          Name.startswith("sse.sqrt.p"))) {
    1539         544 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
    1540             :                                                          Intrinsic::sqrt,
    1541         272 :                                                          CI->getType()),
    1542         136 :                                {CI->getArgOperand(0)});
    1543        9064 :     } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
    1544          80 :       if (CI->getNumArgOperands() == 4 &&
    1545          24 :           (!isa<ConstantInt>(CI->getArgOperand(3)) ||
    1546             :            cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
    1547          24 :         Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
    1548             :                                             : Intrinsic::x86_avx512_sqrt_pd_512;
    1549             : 
    1550          12 :         Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
    1551          36 :         Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
    1552             :                                                            IID), Args);
    1553             :       } else {
    1554          64 :         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
    1555             :                                                            Intrinsic::sqrt,
    1556          32 :                                                            CI->getType()),
    1557             :                                  {CI->getArgOperand(0)});
    1558             :       }
    1559          84 :       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
    1560             :                           CI->getArgOperand(1));
    1561        9036 :     } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
    1562             :                          Name.startswith("avx512.ptestnm"))) {
    1563         100 :       Value *Op0 = CI->getArgOperand(0);
    1564             :       Value *Op1 = CI->getArgOperand(1);
    1565             :       Value *Mask = CI->getArgOperand(2);
    1566         100 :       Rep = Builder.CreateAnd(Op0, Op1);
    1567         100 :       llvm::Type *Ty = Op0->getType();
    1568         100 :       Value *Zero = llvm::Constant::getNullValue(Ty);
    1569             :       ICmpInst::Predicate Pred =
    1570             :         Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
    1571         100 :       Rep = Builder.CreateICmp(Pred, Rep, Zero);
    1572         100 :       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
    1573        8936 :     } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
    1574             :       unsigned NumElts =
    1575         144 :           CI->getArgOperand(1)->getType()->getVectorNumElements();
    1576          72 :       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
    1577          72 :       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
    1578             :                           CI->getArgOperand(1));
    1579        8864 :     } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
    1580           6 :       unsigned NumElts = CI->getType()->getScalarSizeInBits();
    1581          12 :       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
    1582           6 :       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
    1583             :       uint32_t Indices[64];
    1584         454 :       for (unsigned i = 0; i != NumElts; ++i)
    1585         224 :         Indices[i] = i;
    1586             : 
    1587             :       // First extract half of each vector. This gives better codegen than
    1588             :       // doing it in a single shuffle.
    1589           6 :       LHS = Builder.CreateShuffleVector(LHS, LHS,
    1590           6 :                                         makeArrayRef(Indices, NumElts / 2));
    1591           6 :       RHS = Builder.CreateShuffleVector(RHS, RHS,
    1592             :                                         makeArrayRef(Indices, NumElts / 2));
    1593             :       // Concat the vectors.
    1594             :       // NOTE: Operands have to be swapped to match intrinsic definition.
    1595           6 :       Rep = Builder.CreateShuffleVector(RHS, LHS,
    1596             :                                         makeArrayRef(Indices, NumElts));
    1597          12 :       Rep = Builder.CreateBitCast(Rep, CI->getType());
    1598        8858 :     } else if (IsX86 && Name == "avx512.kand.w") {
    1599           8 :       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
    1600           4 :       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
    1601           4 :       Rep = Builder.CreateAnd(LHS, RHS);
    1602           8 :       Rep = Builder.CreateBitCast(Rep, CI->getType());
    1603        8854 :     } else if (IsX86 && Name == "avx512.kandn.w") {
    1604           8 :       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
    1605           4 :       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
    1606           4 :       LHS = Builder.CreateNot(LHS);
    1607           4 :       Rep = Builder.CreateAnd(LHS, RHS);
    1608           8 :       Rep = Builder.CreateBitCast(Rep, CI->getType());
    1609        8850 :     } else if (IsX86 && Name == "avx512.kor.w") {
    1610           8 :       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
    1611           4 :       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
    1612           4 :       Rep = Builder.CreateOr(LHS, RHS);
    1613           8 :       Rep = Builder.CreateBitCast(Rep, CI->getType());
    1614        8846 :     } else if (IsX86 && Name == "avx512.kxor.w") {
    1615           8 :       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
    1616           4 :       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
    1617           4 :       Rep = Builder.CreateXor(LHS, RHS);
    1618           8 :       Rep = Builder.CreateBitCast(Rep, CI->getType());
    1619        8842 :     } else if (IsX86 && Name == "avx512.kxnor.w") {
    1620           8 :       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
    1621           4 :       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
    1622           4 :       LHS = Builder.CreateNot(LHS);
    1623           4 :       Rep = Builder.CreateXor(LHS, RHS);
    1624           8 :       Rep = Builder.CreateBitCast(Rep, CI->getType());
    1625        8838 :     } else if (IsX86 && Name == "avx512.knot.w") {
    1626           4 :       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
    1627           2 :       Rep = Builder.CreateNot(Rep);
    1628           4 :       Rep = Builder.CreateBitCast(Rep, CI->getType());
    1629        8836 :     } else if (IsX86 &&
    1630             :                (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
    1631           8 :       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
    1632           4 :       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
    1633           4 :       Rep = Builder.CreateOr(LHS, RHS);
    1634          12 :       Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
    1635             :       Value *C;
    1636           8 :       if (Name[14] == 'c')
    1637           0 :         C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
    1638             :       else
    1639           8 :         C = ConstantInt::getNullValue(Builder.getInt16Ty());
    1640           8 :       Rep = Builder.CreateICmpEQ(Rep, C);
    1641          12 :       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
    1642        8832 :     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
    1643          17 :       Type *I32Ty = Type::getInt32Ty(C);
    1644          34 :       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
    1645          34 :                                                  ConstantInt::get(I32Ty, 0));
    1646          17 :       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
    1647          34 :                                                  ConstantInt::get(I32Ty, 0));
    1648          34 :       Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
    1649             :                                         Builder.CreateFAdd(Elt0, Elt1),
    1650          17 :                                         ConstantInt::get(I32Ty, 0));
    1651        8815 :     } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
    1652          23 :       Type *I32Ty = Type::getInt32Ty(C);
    1653          46 :       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
    1654          46 :                                                  ConstantInt::get(I32Ty, 0));
    1655          23 :       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
    1656          46 :                                                  ConstantInt::get(I32Ty, 0));
    1657          46 :       Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
    1658             :                                         Builder.CreateFSub(Elt0, Elt1),
    1659          23 :                                         ConstantInt::get(I32Ty, 0));
    1660        8792 :     } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
    1661          23 :       Type *I32Ty = Type::getInt32Ty(C);
    1662          46 :       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
    1663          46 :                                                  ConstantInt::get(I32Ty, 0));
    1664          23 :       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
    1665          46 :                                                  ConstantInt::get(I32Ty, 0));
    1666          46 :       Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
    1667             :                                         Builder.CreateFMul(Elt0, Elt1),
    1668          23 :                                         ConstantInt::get(I32Ty, 0));
    1669        8769 :     } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
    1670          16 :       Type *I32Ty = Type::getInt32Ty(C);
    1671          32 :       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
    1672          32 :                                                  ConstantInt::get(I32Ty, 0));
    1673          16 :       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
    1674          32 :                                                  ConstantInt::get(I32Ty, 0));
    1675          32 :       Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
    1676             :                                         Builder.CreateFDiv(Elt0, Elt1),
    1677          16 :                                         ConstantInt::get(I32Ty, 0));
    1678        8753 :     } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
    1679             :       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
    1680             :       bool CmpEq = Name[16] == 'e';
    1681          96 :       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
    1682        8657 :     } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
    1683          48 :       Type *OpTy = CI->getArgOperand(0)->getType();
    1684          24 :       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
    1685          24 :       unsigned EltWidth = OpTy->getScalarSizeInBits();
    1686             :       Intrinsic::ID IID;
    1687          24 :       if (VecWidth == 128 && EltWidth == 32)
    1688             :         IID = Intrinsic::x86_avx512_fpclass_ps_128;
    1689          20 :       else if (VecWidth == 256 && EltWidth == 32)
    1690             :         IID = Intrinsic::x86_avx512_fpclass_ps_256;
    1691          16 :       else if (VecWidth == 512 && EltWidth == 32)
    1692             :         IID = Intrinsic::x86_avx512_fpclass_ps_512;
    1693          12 :       else if (VecWidth == 128 && EltWidth == 64)
    1694             :         IID = Intrinsic::x86_avx512_fpclass_pd_128;
    1695           8 :       else if (VecWidth == 256 && EltWidth == 64)
    1696             :         IID = Intrinsic::x86_avx512_fpclass_pd_256;
    1697           4 :       else if (VecWidth == 512 && EltWidth == 64)
    1698             :         IID = Intrinsic::x86_avx512_fpclass_pd_512;
    1699             :       else
    1700           0 :         llvm_unreachable("Unexpected intrinsic");
    1701             : 
    1702          96 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
    1703             :                                { CI->getOperand(0), CI->getArgOperand(1) });
    1704          24 :       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
    1705        8633 :     } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
    1706          24 :       Type *OpTy = CI->getArgOperand(0)->getType();
    1707          12 :       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
    1708          12 :       unsigned EltWidth = OpTy->getScalarSizeInBits();
    1709             :       Intrinsic::ID IID;
    1710          12 :       if (VecWidth == 128 && EltWidth == 32)
    1711             :         IID = Intrinsic::x86_avx512_cmp_ps_128;
    1712          10 :       else if (VecWidth == 256 && EltWidth == 32)
    1713             :         IID = Intrinsic::x86_avx512_cmp_ps_256;
    1714           8 :       else if (VecWidth == 512 && EltWidth == 32)
    1715             :         IID = Intrinsic::x86_avx512_cmp_ps_512;
    1716           6 :       else if (VecWidth == 128 && EltWidth == 64)
    1717             :         IID = Intrinsic::x86_avx512_cmp_pd_128;
    1718           4 :       else if (VecWidth == 256 && EltWidth == 64)
    1719             :         IID = Intrinsic::x86_avx512_cmp_pd_256;
    1720           2 :       else if (VecWidth == 512 && EltWidth == 64)
    1721             :         IID = Intrinsic::x86_avx512_cmp_pd_512;
    1722             :       else
    1723           0 :         llvm_unreachable("Unexpected intrinsic");
    1724             : 
    1725             :       SmallVector<Value *, 4> Args;
    1726          12 :       Args.push_back(CI->getArgOperand(0));
    1727          12 :       Args.push_back(CI->getArgOperand(1));
    1728          12 :       Args.push_back(CI->getArgOperand(2));
    1729          12 :       if (CI->getNumArgOperands() == 5)
    1730           4 :         Args.push_back(CI->getArgOperand(4));
    1731             : 
    1732          24 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
    1733             :                                Args);
    1734          12 :       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
    1735        9005 :     } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
    1736             :                Name[16] != 'p') {
    1737             :       // Integer compare intrinsics.
    1738         768 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
    1739         384 :       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
    1740        8237 :     } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
    1741         768 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
    1742         384 :       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
    1743        7853 :     } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
    1744             :                          Name.startswith("avx512.cvtw2mask.") ||
    1745             :                          Name.startswith("avx512.cvtd2mask.") ||
    1746             :                          Name.startswith("avx512.cvtq2mask."))) {
    1747          26 :       Value *Op = CI->getArgOperand(0);
    1748          26 :       Value *Zero = llvm::Constant::getNullValue(Op->getType());
    1749          26 :       Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
    1750          26 :       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
    1751        7827 :     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
    1752             :                         Name == "ssse3.pabs.w.128" ||
    1753             :                         Name == "ssse3.pabs.d.128" ||
    1754             :                         Name.startswith("avx2.pabs") ||
    1755             :                         Name.startswith("avx512.mask.pabs"))) {
    1756         246 :       Rep = upgradeAbs(Builder, *CI);
    1757        7581 :     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
    1758             :                          Name == "sse2.pmaxs.w" ||
    1759             :                          Name == "sse41.pmaxsd" ||
    1760             :                          Name.startswith("avx2.pmaxs") ||
    1761             :                          Name.startswith("avx512.mask.pmaxs"))) {
    1762         261 :       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
    1763        7320 :     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
    1764             :                          Name == "sse41.pmaxuw" ||
    1765             :                          Name == "sse41.pmaxud" ||
    1766             :                          Name.startswith("avx2.pmaxu") ||
    1767             :                          Name.startswith("avx512.mask.pmaxu"))) {
    1768         298 :       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
    1769        7022 :     } else if (IsX86 && (Name == "sse41.pminsb" ||
    1770             :                          Name == "sse2.pmins.w" ||
    1771             :                          Name == "sse41.pminsd" ||
    1772             :                          Name.startswith("avx2.pmins") ||
    1773             :                          Name.startswith("avx512.mask.pmins"))) {
    1774         257 :       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
    1775        6765 :     } else if (IsX86 && (Name == "sse2.pminu.b" ||
    1776             :                          Name == "sse41.pminuw" ||
    1777             :                          Name == "sse41.pminud" ||
    1778             :                          Name.startswith("avx2.pminu") ||
    1779             :                          Name.startswith("avx512.mask.pminu"))) {
    1780         258 :       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
    1781        6507 :     } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
    1782             :                          Name == "avx2.pmulu.dq" ||
    1783             :                          Name == "avx512.pmulu.dq.512" ||
    1784             :                          Name.startswith("avx512.mask.pmulu.dq."))) {
    1785         176 :       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
    1786        6331 :     } else if (IsX86 && (Name == "sse41.pmuldq" ||
    1787             :                          Name == "avx2.pmul.dq" ||
    1788             :                          Name == "avx512.pmul.dq.512" ||
    1789             :                          Name.startswith("avx512.mask.pmul.dq."))) {
    1790         146 :       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
    1791        6185 :     } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
    1792             :                          Name == "sse2.cvtsi2sd" ||
    1793             :                          Name == "sse.cvtsi642ss" ||
    1794             :                          Name == "sse2.cvtsi642sd")) {
    1795         114 :       Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
    1796             :                                  CI->getType()->getVectorElementType());
    1797          38 :       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
    1798        6147 :     } else if (IsX86 && Name == "avx512.cvtusi2sd") {
    1799           6 :       Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
    1800             :                                  CI->getType()->getVectorElementType());
    1801           2 :       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
    1802        6145 :     } else if (IsX86 && Name == "sse2.cvtss2sd") {
    1803          42 :       Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
    1804          42 :       Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
    1805          21 :       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
    1806        6124 :     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
    1807             :                          Name == "sse2.cvtdq2ps" ||
    1808             :                          Name == "avx.cvtdq2.pd.256" ||
    1809             :                          Name == "avx.cvtdq2.ps.256" ||
    1810             :                          Name.startswith("avx512.mask.cvtdq2pd.") ||
    1811             :                          Name.startswith("avx512.mask.cvtudq2pd.") ||
    1812             :                          Name == "avx512.mask.cvtdq2ps.128" ||
    1813             :                          Name == "avx512.mask.cvtdq2ps.256" ||
    1814             :                          Name == "avx512.mask.cvtudq2ps.128" ||
    1815             :                          Name == "avx512.mask.cvtudq2ps.256" ||
    1816             :                          Name == "avx512.mask.cvtqq2pd.128" ||
    1817             :                          Name == "avx512.mask.cvtqq2pd.256" ||
    1818             :                          Name == "avx512.mask.cvtuqq2pd.128" ||
    1819             :                          Name == "avx512.mask.cvtuqq2pd.256" ||
    1820             :                          Name == "sse2.cvtps2pd" ||
    1821             :                          Name == "avx.cvt.ps2.pd.256" ||
    1822             :                          Name == "avx512.mask.cvtps2pd.128" ||
    1823             :                          Name == "avx512.mask.cvtps2pd.256")) {
    1824         115 :       Type *DstTy = CI->getType();
    1825         230 :       Rep = CI->getArgOperand(0);
    1826             : 
    1827             :       unsigned NumDstElts = DstTy->getVectorNumElements();
    1828         230 :       if (NumDstElts < Rep->getType()->getVectorNumElements()) {
    1829             :         assert(NumDstElts == 2 && "Unexpected vector size");
    1830          32 :         uint32_t ShuffleMask[2] = { 0, 1 };
    1831          32 :         Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
    1832             :       }
    1833             : 
    1834         115 :       bool IsPS2PD = (StringRef::npos != Name.find("ps2"));
    1835         115 :       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
    1836         115 :       if (IsPS2PD)
    1837          44 :         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
    1838          93 :       else if (IsUnsigned)
    1839          56 :         Rep = Builder.CreateUIToFP(Rep, DstTy, "cvt");
    1840             :       else
    1841         130 :         Rep = Builder.CreateSIToFP(Rep, DstTy, "cvt");
    1842             : 
    1843         230 :       if (CI->getNumArgOperands() == 3)
    1844         128 :         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
    1845             :                             CI->getArgOperand(1));
    1846        6009 :     } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
    1847         216 :       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
    1848             :                               CI->getArgOperand(1), CI->getArgOperand(2),
    1849             :                               /*Aligned*/false);
    1850        5901 :     } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
    1851         144 :       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
    1852             :                               CI->getArgOperand(1),CI->getArgOperand(2),
    1853             :                               /*Aligned*/true);
    1854        5829 :     } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
    1855         110 :       Type *ResultTy = CI->getType();
    1856         110 :       Type *PtrTy = ResultTy->getVectorElementType();
    1857             : 
    1858             :       // Cast the pointer to element type.
    1859         220 :       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
    1860             :                                          llvm::PointerType::getUnqual(PtrTy));
    1861             : 
    1862         110 :       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
    1863         110 :                                      ResultTy->getVectorNumElements());
    1864             : 
    1865         220 :       Function *ELd = Intrinsic::getDeclaration(F->getParent(),
    1866             :                                                 Intrinsic::masked_expandload,
    1867         110 :                                                 ResultTy);
    1868         330 :       Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
    1869        5719 :     } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
    1870         144 :       Type *ResultTy = CI->getArgOperand(1)->getType();
    1871          72 :       Type *PtrTy = ResultTy->getVectorElementType();
    1872             : 
    1873             :       // Cast the pointer to element type.
    1874          72 :       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
    1875             :                                          llvm::PointerType::getUnqual(PtrTy));
    1876             : 
    1877          72 :       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
    1878          72 :                                      ResultTy->getVectorNumElements());
    1879             : 
    1880         144 :       Function *CSt = Intrinsic::getDeclaration(F->getParent(),
    1881             :                                                 Intrinsic::masked_compressstore,
    1882          72 :                                                 ResultTy);
    1883         144 :       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
    1884        5647 :     } else if (IsX86 && Name.startswith("xop.vpcom")) {
    1885             :       Intrinsic::ID intID;
    1886             :       if (Name.endswith("ub"))
    1887             :         intID = Intrinsic::x86_xop_vpcomub;
    1888             :       else if (Name.endswith("uw"))
    1889             :         intID = Intrinsic::x86_xop_vpcomuw;
    1890             :       else if (Name.endswith("ud"))
    1891             :         intID = Intrinsic::x86_xop_vpcomud;
    1892             :       else if (Name.endswith("uq"))
    1893             :         intID = Intrinsic::x86_xop_vpcomuq;
    1894             :       else if (Name.endswith("b"))
    1895             :         intID = Intrinsic::x86_xop_vpcomb;
    1896             :       else if (Name.endswith("w"))
    1897             :         intID = Intrinsic::x86_xop_vpcomw;
    1898             :       else if (Name.endswith("d"))
    1899             :         intID = Intrinsic::x86_xop_vpcomd;
    1900             :       else if (Name.endswith("q"))
    1901             :         intID = Intrinsic::x86_xop_vpcomq;
    1902             :       else
    1903           0 :         llvm_unreachable("Unknown suffix");
    1904             : 
    1905          81 :       Name = Name.substr(9); // strip off "xop.vpcom"
    1906             :       unsigned Imm;
    1907             :       if (Name.startswith("lt"))
    1908             :         Imm = 0;
    1909             :       else if (Name.startswith("le"))
    1910             :         Imm = 1;
    1911             :       else if (Name.startswith("gt"))
    1912             :         Imm = 2;
    1913             :       else if (Name.startswith("ge"))
    1914             :         Imm = 3;
    1915             :       else if (Name.startswith("eq"))
    1916             :         Imm = 4;
    1917             :       else if (Name.startswith("ne"))
    1918             :         Imm = 5;
    1919             :       else if (Name.startswith("false"))
    1920             :         Imm = 6;
    1921             :       else if (Name.startswith("true"))
    1922             :         Imm = 7;
    1923             :       else
    1924           0 :         llvm_unreachable("Unknown condition");
    1925             : 
    1926          81 :       Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
    1927          81 :       Rep =
    1928         405 :           Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
    1929          81 :                                      Builder.getInt8(Imm)});
    1930        5566 :     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
    1931          12 :       Value *Sel = CI->getArgOperand(2);
    1932          12 :       Value *NotSel = Builder.CreateNot(Sel);
    1933          12 :       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
    1934          12 :       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
    1935          12 :       Rep = Builder.CreateOr(Sel0, Sel1);
    1936        5554 :     } else if (IsX86 && Name == "sse42.crc32.64.8") {
    1937          84 :       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
    1938          42 :                                                Intrinsic::x86_sse42_crc32_32_8);
    1939          84 :       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
    1940         126 :       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
    1941          84 :       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
    1942        5512 :     } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
    1943             :                          Name.startswith("avx512.vbroadcast.s"))) {
    1944             :       // Replace broadcasts with a series of insertelements.
    1945           7 :       Type *VecTy = CI->getType();
    1946           7 :       Type *EltTy = VecTy->getVectorElementType();
    1947             :       unsigned EltNum = VecTy->getVectorNumElements();
    1948          14 :       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
    1949           7 :                                           EltTy->getPointerTo());
    1950           7 :       Value *Load = Builder.CreateLoad(EltTy, Cast);
    1951           7 :       Type *I32Ty = Type::getInt32Ty(C);
    1952           7 :       Rep = UndefValue::get(VecTy);
    1953         135 :       for (unsigned I = 0; I < EltNum; ++I)
    1954          64 :         Rep = Builder.CreateInsertElement(Rep, Load,
    1955          64 :                                           ConstantInt::get(I32Ty, I));
    1956        5505 :     } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
    1957             :                          Name.startswith("sse41.pmovzx") ||
    1958             :                          Name.startswith("avx2.pmovsx") ||
    1959             :                          Name.startswith("avx2.pmovzx") ||
    1960             :                          Name.startswith("avx512.mask.pmovsx") ||
    1961             :                          Name.startswith("avx512.mask.pmovzx"))) {
    1962         788 :       VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
    1963         394 :       VectorType *DstTy = cast<VectorType>(CI->getType());
    1964         394 :       unsigned NumDstElts = DstTy->getNumElements();
    1965             : 
    1966             :       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
    1967         788 :       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
    1968        5746 :       for (unsigned i = 0; i != NumDstElts; ++i)
    1969        5352 :         ShuffleMask[i] = i;
    1970             : 
    1971         394 :       Value *SV = Builder.CreateShuffleVector(
    1972         788 :           CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
    1973             : 
    1974         394 :       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
    1975        1182 :       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
    1976         394 :                    : Builder.CreateZExt(SV, DstTy);
    1977             :       // If there are 3 arguments, it's a masked intrinsic so we need a select.
    1978         788 :       if (CI->getNumArgOperands() == 3)
    1979         432 :         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
    1980             :                             CI->getArgOperand(1));
    1981        5111 :     } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
    1982             :                          Name == "avx2.vbroadcasti128")) {
    1983             :       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
    1984           9 :       Type *EltTy = CI->getType()->getVectorElementType();
    1985           9 :       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
    1986           9 :       Type *VT = VectorType::get(EltTy, NumSrcElts);
    1987          18 :       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
    1988           9 :                                             PointerType::getUnqual(VT));
    1989           9 :       Value *Load = Builder.CreateAlignedLoad(Op, 1);
    1990           9 :       if (NumSrcElts == 2)
    1991           5 :         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
    1992             :                                           { 0, 1, 0, 1 });
    1993             :       else
    1994           4 :         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
    1995             :                                           { 0, 1, 2, 3, 0, 1, 2, 3 });
    1996        5102 :     } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
    1997             :                          Name.startswith("avx512.mask.shuf.f"))) {
    1998          76 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
    1999          38 :       Type *VT = CI->getType();
    2000          38 :       unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
    2001          38 :       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
    2002          38 :       unsigned ControlBitsMask = NumLanes - 1;
    2003          38 :       unsigned NumControlBits = NumLanes / 2;
    2004          76 :       SmallVector<uint32_t, 8> ShuffleMask(0);
    2005             : 
    2006         262 :       for (unsigned l = 0; l != NumLanes; ++l) {
    2007         112 :         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
    2008             :         // We actually need the other source.
    2009         112 :         if (l >= NumLanes / 2)
    2010          56 :           LaneMask += NumLanes;
    2011         768 :         for (unsigned i = 0; i != NumElementsInLane; ++i)
    2012         328 :           ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
    2013             :       }
    2014          38 :       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
    2015             :                                         CI->getArgOperand(1), ShuffleMask);
    2016          38 :       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
    2017             :                           CI->getArgOperand(3));
    2018        5064 :     }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
    2019             :                          Name.startswith("avx512.mask.broadcasti"))) {
    2020             :       unsigned NumSrcElts =
    2021         252 :                         CI->getArgOperand(0)->getType()->getVectorNumElements();
    2022         126 :       unsigned NumDstElts = CI->getType()->getVectorNumElements();
    2023             : 
    2024         252 :       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
    2025        2670 :       for (unsigned i = 0; i != NumDstElts; ++i)
    2026        2544 :         ShuffleMask[i] = i % NumSrcElts;
    2027             : 
    2028         126 :       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
    2029             :                                         CI->getArgOperand(0),
    2030             :                                         ShuffleMask);
    2031         126 :       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
    2032             :                           CI->getArgOperand(1));
    2033        4938 :     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
    2034             :                          Name.startswith("avx2.vbroadcast") ||
    2035             :                          Name.startswith("avx512.pbroadcast") ||
    2036             :                          Name.startswith("avx512.mask.broadcast.s"))) {
    2037             :       // Replace vp?broadcasts with a vector shuffle.
    2038         146 :       Value *Op = CI->getArgOperand(0);
    2039         146 :       unsigned NumElts = CI->getType()->getVectorNumElements();
    2040         146 :       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
    2041         146 :       Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
    2042         146 :                                         Constant::getNullValue(MaskTy));
    2043             : 
    2044         146 :       if (CI->getNumArgOperands() == 3)
    2045         204 :         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
    2046             :                             CI->getArgOperand(1));
    2047        4792 :     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
    2048          36 :       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
    2049             :                                       CI->getArgOperand(1),
    2050             :                                       CI->getArgOperand(2),
    2051             :                                       CI->getArgOperand(3),
    2052             :                                       CI->getArgOperand(4),
    2053             :                                       false);
    2054        4774 :     } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
    2055          48 :       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
    2056             :                                       CI->getArgOperand(1),
    2057             :                                       CI->getArgOperand(2),
    2058             :                                       CI->getArgOperand(3),
    2059             :                                       CI->getArgOperand(4),
    2060             :                                       true);
    2061        4750 :     } else if (IsX86 && (Name == "sse2.psll.dq" ||
    2062             :                          Name == "avx2.psll.dq")) {
    2063             :       // 128/256-bit shift left specified in bits.
    2064          28 :       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    2065          28 :       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
    2066             :                                        Shift / 8); // Shift is in bits.
    2067        4736 :     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
    2068             :                          Name == "avx2.psrl.dq")) {
    2069             :       // 128/256-bit shift right specified in bits.
    2070         216 :       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    2071         216 :       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
    2072             :                                        Shift / 8); // Shift is in bits.
    2073        4628 :     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
    2074             :                          Name == "avx2.psll.dq.bs" ||
    2075             :                          Name == "avx512.psll.dq.512")) {
    2076             :       // 128/256/512-bit shift left specified in bytes.
    2077          32 :       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    2078          16 :       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
    2079        4612 :     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
    2080             :                          Name == "avx2.psrl.dq.bs" ||
    2081             :                          Name == "avx512.psrl.dq.512")) {
    2082             :       // 128/256/512-bit shift right specified in bytes.
    2083          32 :       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    2084          16 :       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
    2085        4596 :     } else if (IsX86 && (Name == "sse41.pblendw" ||
    2086             :                          Name.startswith("sse41.blendp") ||
    2087             :                          Name.startswith("avx.blend.p") ||
    2088             :                          Name == "avx2.pblendw" ||
    2089             :                          Name.startswith("avx2.pblendd."))) {
    2090          98 :       Value *Op0 = CI->getArgOperand(0);
    2091             :       Value *Op1 = CI->getArgOperand(1);
    2092          98 :       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
    2093          98 :       VectorType *VecTy = cast<VectorType>(CI->getType());
    2094          98 :       unsigned NumElts = VecTy->getNumElements();
    2095             : 
    2096         196 :       SmallVector<uint32_t, 16> Idxs(NumElts);
    2097        1398 :       for (unsigned i = 0; i != NumElts; ++i)
    2098        1300 :         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
    2099             : 
    2100          98 :       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
    2101        4498 :     } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
    2102             :                          Name == "avx2.vinserti128" ||
    2103             :                          Name.startswith("avx512.mask.insert"))) {
    2104         102 :       Value *Op0 = CI->getArgOperand(0);
    2105             :       Value *Op1 = CI->getArgOperand(1);
    2106         102 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
    2107         102 :       unsigned DstNumElts = CI->getType()->getVectorNumElements();
    2108         102 :       unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
    2109         102 :       unsigned Scale = DstNumElts / SrcNumElts;
    2110             : 
    2111             :       // Mask off the high bits of the immediate value; hardware ignores those.
    2112         102 :       Imm = Imm % Scale;
    2113             : 
    2114             :       // Extend the second operand into a vector the size of the destination.
    2115         102 :       Value *UndefV = UndefValue::get(Op1->getType());
    2116         204 :       SmallVector<uint32_t, 8> Idxs(DstNumElts);
    2117         882 :       for (unsigned i = 0; i != SrcNumElts; ++i)
    2118         780 :         Idxs[i] = i;
    2119        1170 :       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
    2120        1068 :         Idxs[i] = SrcNumElts;
    2121         102 :       Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
    2122             : 
    2123             :       // Insert the second operand into the first operand.
    2124             : 
    2125             :       // Note that there is no guarantee that instruction lowering will actually
    2126             :       // produce a vinsertf128 instruction for the created shuffles. In
    2127             :       // particular, the 0 immediate case involves no lane changes, so it can
    2128             :       // be handled as a blend.
    2129             : 
    2130             :       // Example of shuffle mask for 32-bit elements:
    2131             :       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
    2132             :       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
    2133             : 
    2134             :       // First fill with identify mask.
    2135        1950 :       for (unsigned i = 0; i != DstNumElts; ++i)
    2136        1848 :         Idxs[i] = i;
    2137             :       // Then replace the elements where we need to insert.
    2138         882 :       for (unsigned i = 0; i != SrcNumElts; ++i)
    2139         780 :         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
    2140         102 :       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
    2141             : 
    2142             :       // If the intrinsic has a mask operand, handle that.
    2143         102 :       if (CI->getNumArgOperands() == 5)
    2144         144 :         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
    2145             :                             CI->getArgOperand(3));
    2146        4396 :     } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
    2147             :                          Name == "avx2.vextracti128" ||
    2148             :                          Name.startswith("avx512.mask.vextract"))) {
    2149          67 :       Value *Op0 = CI->getArgOperand(0);
    2150          67 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    2151          67 :       unsigned DstNumElts = CI->getType()->getVectorNumElements();
    2152          67 :       unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
    2153          67 :       unsigned Scale = SrcNumElts / DstNumElts;
    2154             : 
    2155             :       // Mask off the high bits of the immediate value; hardware ignores those.
    2156          67 :       Imm = Imm % Scale;
    2157             : 
    2158             :       // Get indexes for the subvector of the input vector.
    2159         134 :       SmallVector<uint32_t, 8> Idxs(DstNumElts);
    2160         547 :       for (unsigned i = 0; i != DstNumElts; ++i) {
    2161         480 :         Idxs[i] = i + (Imm * DstNumElts);
    2162             :       }
    2163          67 :       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
    2164             : 
    2165             :       // If the intrinsic has a mask operand, handle that.
    2166          67 :       if (CI->getNumArgOperands() == 4)
    2167          64 :         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2168             :                             CI->getArgOperand(2));
    2169        4329 :     } else if (!IsX86 && Name == "stackprotectorcheck") {
    2170           6 :       Rep = nullptr;
    2171        4323 :     } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
    2172             :                          Name.startswith("avx512.mask.perm.di."))) {
    2173          24 :       Value *Op0 = CI->getArgOperand(0);
    2174          24 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    2175          24 :       VectorType *VecTy = cast<VectorType>(CI->getType());
    2176          24 :       unsigned NumElts = VecTy->getNumElements();
    2177             : 
    2178          48 :       SmallVector<uint32_t, 8> Idxs(NumElts);
    2179         312 :       for (unsigned i = 0; i != NumElts; ++i)
    2180         288 :         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
    2181             : 
    2182          24 :       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
    2183             : 
    2184          24 :       if (CI->getNumArgOperands() == 4)
    2185          48 :         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2186             :                             CI->getArgOperand(2));
    2187        4299 :     } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
    2188             :                          Name == "avx2.vperm2i128")) {
    2189             :       // The immediate permute control byte looks like this:
    2190             :       //    [1:0] - select 128 bits from sources for low half of destination
    2191             :       //    [2]   - ignore
    2192             :       //    [3]   - zero low half of destination
    2193             :       //    [5:4] - select 128 bits from sources for high half of destination
    2194             :       //    [6]   - ignore
    2195             :       //    [7]   - zero high half of destination
    2196             : 
    2197          32 :       uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
    2198             : 
    2199          16 :       unsigned NumElts = CI->getType()->getVectorNumElements();
    2200          16 :       unsigned HalfSize = NumElts / 2;
    2201          32 :       SmallVector<uint32_t, 8> ShuffleMask(NumElts);
    2202             : 
    2203             :       // Determine which operand(s) are actually in use for this instruction.
    2204          16 :       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
    2205          16 :       Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
    2206             : 
    2207             :       // If needed, replace operands based on zero mask.
    2208          16 :       V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
    2209          16 :       V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
    2210             : 
    2211             :       // Permute low half of result.
    2212          16 :       unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
    2213         112 :       for (unsigned i = 0; i < HalfSize; ++i)
    2214          96 :         ShuffleMask[i] = StartIndex + i;
    2215             : 
    2216             :       // Permute high half of result.
    2217          16 :       StartIndex = (Imm & 0x10) ? HalfSize : 0;
    2218         112 :       for (unsigned i = 0; i < HalfSize; ++i)
    2219          96 :         ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
    2220             : 
    2221          16 :       Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
    2222             : 
    2223        4283 :     } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
    2224             :                          Name == "sse2.pshuf.d" ||
    2225             :                          Name.startswith("avx512.mask.vpermil.p") ||
    2226             :                          Name.startswith("avx512.mask.pshuf.d."))) {
    2227         148 :       Value *Op0 = CI->getArgOperand(0);
    2228         148 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    2229         148 :       VectorType *VecTy = cast<VectorType>(CI->getType());
    2230         148 :       unsigned NumElts = VecTy->getNumElements();
    2231             :       // Calculate the size of each index in the immediate.
    2232         148 :       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
    2233         148 :       unsigned IdxMask = ((1 << IdxSize) - 1);
    2234             : 
    2235         296 :       SmallVector<uint32_t, 8> Idxs(NumElts);
    2236             :       // Lookup the bits for this element, wrapping around the immediate every
    2237             :       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
    2238             :       // to offset by the first index of each group.
    2239        1756 :       for (unsigned i = 0; i != NumElts; ++i)
    2240        1608 :         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
    2241             : 
    2242         148 :       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
    2243             : 
    2244         148 :       if (CI->getNumArgOperands() == 4)
    2245         108 :         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2246             :                             CI->getArgOperand(2));
    2247        4135 :     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
    2248             :                          Name.startswith("avx512.mask.pshufl.w."))) {
    2249          78 :       Value *Op0 = CI->getArgOperand(0);
    2250          78 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    2251          78 :       unsigned NumElts = CI->getType()->getVectorNumElements();
    2252             : 
    2253         156 :       SmallVector<uint32_t, 16> Idxs(NumElts);
    2254         282 :       for (unsigned l = 0; l != NumElts; l += 8) {
    2255         918 :         for (unsigned i = 0; i != 4; ++i)
    2256         816 :           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
    2257         918 :         for (unsigned i = 4; i != 8; ++i)
    2258         816 :           Idxs[i + l] = i + l;
    2259             :       }
    2260             : 
    2261          78 :       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
    2262             : 
    2263          78 :       if (CI->getNumArgOperands() == 4)
    2264          36 :         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2265             :                             CI->getArgOperand(2));
    2266        4057 :     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
    2267             :                          Name.startswith("avx512.mask.pshufh.w."))) {
    2268          60 :       Value *Op0 = CI->getArgOperand(0);
    2269          60 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    2270          60 :       unsigned NumElts = CI->getType()->getVectorNumElements();
    2271             : 
    2272         120 :       SmallVector<uint32_t, 16> Idxs(NumElts);
    2273         228 :       for (unsigned l = 0; l != NumElts; l += 8) {
    2274         756 :         for (unsigned i = 0; i != 4; ++i)
    2275         672 :           Idxs[i + l] = i + l;
    2276         756 :         for (unsigned i = 0; i != 4; ++i)
    2277         672 :           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
    2278             :       }
    2279             : 
    2280          60 :       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
    2281             : 
    2282          60 :       if (CI->getNumArgOperands() == 4)
    2283          36 :         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2284             :                             CI->getArgOperand(2));
    2285        3997 :     } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
    2286          28 :       Value *Op0 = CI->getArgOperand(0);
    2287             :       Value *Op1 = CI->getArgOperand(1);
    2288          28 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
    2289          28 :       unsigned NumElts = CI->getType()->getVectorNumElements();
    2290             : 
    2291          28 :       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
    2292          28 :       unsigned HalfLaneElts = NumLaneElts / 2;
    2293             : 
    2294          56 :       SmallVector<uint32_t, 16> Idxs(NumElts);
    2295         404 :       for (unsigned i = 0; i != NumElts; ++i) {
    2296             :         // Base index is the starting element of the lane.
    2297         376 :         Idxs[i] = i - (i % NumLaneElts);
    2298             :         // If we are half way through the lane switch to the other source.
    2299         188 :         if ((i % NumLaneElts) >= HalfLaneElts)
    2300          94 :           Idxs[i] += NumElts;
    2301             :         // Now select the specific element. By adding HalfLaneElts bits from
    2302             :         // the immediate. Wrapping around the immediate every 8-bits.
    2303         188 :         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
    2304             :       }
    2305             : 
    2306          28 :       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
    2307             : 
    2308          28 :       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
    2309             :                           CI->getArgOperand(3));
    2310        3969 :     } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
    2311             :                          Name.startswith("avx512.mask.movshdup") ||
    2312             :                          Name.startswith("avx512.mask.movsldup"))) {
    2313          54 :       Value *Op0 = CI->getArgOperand(0);
    2314          54 :       unsigned NumElts = CI->getType()->getVectorNumElements();
    2315          54 :       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
    2316             : 
    2317             :       unsigned Offset = 0;
    2318             :       if (Name.startswith("avx512.mask.movshdup."))
    2319             :         Offset = 1;
    2320             : 
    2321         108 :       SmallVector<uint32_t, 16> Idxs(NumElts);
    2322         306 :       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
    2323         546 :         for (unsigned i = 0; i != NumLaneElts; i += 2) {
    2324         420 :           Idxs[i + l + 0] = i + l + Offset;
    2325         420 :           Idxs[i + l + 1] = i + l + Offset;
    2326             :         }
    2327             : 
    2328          54 :       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
    2329             : 
    2330          54 :       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
    2331             :                           CI->getArgOperand(1));
    2332        3915 :     } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
    2333             :                          Name.startswith("avx512.mask.unpckl."))) {
    2334          74 :       Value *Op0 = CI->getArgOperand(0);
    2335             :       Value *Op1 = CI->getArgOperand(1);
    2336         148 :       int NumElts = CI->getType()->getVectorNumElements();
    2337          74 :       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
    2338             : 
    2339         148 :       SmallVector<uint32_t, 64> Idxs(NumElts);
    2340         426 :       for (int l = 0; l != NumElts; l += NumLaneElts)
    2341        2224 :         for (int i = 0; i != NumLaneElts; ++i)
    2342        2048 :           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
    2343             : 
    2344          74 :       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
    2345             : 
    2346          74 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2347             :                           CI->getArgOperand(2));
    2348        3841 :     } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
    2349             :                          Name.startswith("avx512.mask.unpckh."))) {
    2350          72 :       Value *Op0 = CI->getArgOperand(0);
    2351             :       Value *Op1 = CI->getArgOperand(1);
    2352         144 :       int NumElts = CI->getType()->getVectorNumElements();
    2353          72 :       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
    2354             : 
    2355         144 :       SmallVector<uint32_t, 64> Idxs(NumElts);
    2356         408 :       for (int l = 0; l != NumElts; l += NumLaneElts)
    2357        2184 :         for (int i = 0; i != NumLaneElts; ++i)
    2358        2016 :           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
    2359             : 
    2360          72 :       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
    2361             : 
    2362          72 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2363             :                           CI->getArgOperand(2));
    2364        3769 :     } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
    2365          88 :       Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
    2366          44 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2367             :                           CI->getArgOperand(2));
    2368        3725 :     } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
    2369         144 :       Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
    2370             :                               CI->getArgOperand(1));
    2371          72 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2372             :                           CI->getArgOperand(2));
    2373        3653 :     } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
    2374          88 :       Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
    2375          44 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2376             :                           CI->getArgOperand(2));
    2377        3609 :     } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
    2378          88 :       Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
    2379          44 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2380             :                           CI->getArgOperand(2));
    2381        3565 :     } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
    2382          54 :       VectorType *FTy = cast<VectorType>(CI->getType());
    2383          54 :       VectorType *ITy = VectorType::getInteger(FTy);
    2384         108 :       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
    2385             :                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
    2386          54 :       Rep = Builder.CreateBitCast(Rep, FTy);
    2387          54 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2388             :                           CI->getArgOperand(2));
    2389        3511 :     } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
    2390          54 :       VectorType *FTy = cast<VectorType>(CI->getType());
    2391          54 :       VectorType *ITy = VectorType::getInteger(FTy);
    2392         108 :       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
    2393          54 :       Rep = Builder.CreateAnd(Rep,
    2394             :                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
    2395          54 :       Rep = Builder.CreateBitCast(Rep, FTy);
    2396          54 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2397             :                           CI->getArgOperand(2));
    2398        3457 :     } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
    2399          54 :       VectorType *FTy = cast<VectorType>(CI->getType());
    2400          54 :       VectorType *ITy = VectorType::getInteger(FTy);
    2401         108 :       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
    2402             :                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
    2403          54 :       Rep = Builder.CreateBitCast(Rep, FTy);
    2404          54 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2405             :                           CI->getArgOperand(2));
    2406        3403 :     } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
    2407          54 :       VectorType *FTy = cast<VectorType>(CI->getType());
    2408          54 :       VectorType *ITy = VectorType::getInteger(FTy);
    2409         108 :       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
    2410             :                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
    2411          54 :       Rep = Builder.CreateBitCast(Rep, FTy);
    2412          54 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2413             :                           CI->getArgOperand(2));
    2414        3349 :     } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
    2415         216 :       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
    2416         108 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2417             :                           CI->getArgOperand(2));
    2418        3241 :     } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
    2419         216 :       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
    2420         108 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2421             :                           CI->getArgOperand(2));
    2422        3133 :     } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
    2423         216 :       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
    2424         108 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2425             :                           CI->getArgOperand(2));
    2426        3025 :     } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
    2427             :       if (Name.endswith(".512")) {
    2428             :         Intrinsic::ID IID;
    2429          30 :         if (Name[17] == 's')
    2430             :           IID = Intrinsic::x86_avx512_add_ps_512;
    2431             :         else
    2432             :           IID = Intrinsic::x86_avx512_add_pd_512;
    2433             : 
    2434         150 :         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
    2435          30 :                                  { CI->getArgOperand(0), CI->getArgOperand(1),
    2436             :                                    CI->getArgOperand(4) });
    2437             :       } else {
    2438          24 :         Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
    2439             :       }
    2440         126 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2441             :                           CI->getArgOperand(2));
    2442        2983 :     } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
    2443             :       if (Name.endswith(".512")) {
    2444             :         Intrinsic::ID IID;
    2445          30 :         if (Name[17] == 's')
    2446             :           IID = Intrinsic::x86_avx512_div_ps_512;
    2447             :         else
    2448             :           IID = Intrinsic::x86_avx512_div_pd_512;
    2449             : 
    2450         150 :         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
    2451          30 :                                  { CI->getArgOperand(0), CI->getArgOperand(1),
    2452             :                                    CI->getArgOperand(4) });
    2453             :       } else {
    2454          24 :         Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
    2455             :       }
    2456         126 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2457             :                           CI->getArgOperand(2));
    2458        2941 :     } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
    2459             :       if (Name.endswith(".512")) {
    2460             :         Intrinsic::ID IID;
    2461          32 :         if (Name[17] == 's')
    2462             :           IID = Intrinsic::x86_avx512_mul_ps_512;
    2463             :         else
    2464             :           IID = Intrinsic::x86_avx512_mul_pd_512;
    2465             : 
    2466         160 :         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
    2467          32 :                                  { CI->getArgOperand(0), CI->getArgOperand(1),
    2468             :                                    CI->getArgOperand(4) });
    2469             :       } else {
    2470          24 :         Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
    2471             :       }
    2472         132 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2473             :                           CI->getArgOperand(2));
    2474        2897 :     } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
    2475             :       if (Name.endswith(".512")) {
    2476             :         Intrinsic::ID IID;
    2477          28 :         if (Name[17] == 's')
    2478             :           IID = Intrinsic::x86_avx512_sub_ps_512;
    2479             :         else
    2480             :           IID = Intrinsic::x86_avx512_sub_pd_512;
    2481             : 
    2482         140 :         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
    2483          28 :                                  { CI->getArgOperand(0), CI->getArgOperand(1),
    2484             :                                    CI->getArgOperand(4) });
    2485             :       } else {
    2486          24 :         Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
    2487             :       }
    2488         120 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2489             :                           CI->getArgOperand(2));
    2490        2857 :     } else if (IsX86 && Name.startswith("avx512.mask.max.p") &&
    2491             :                Name.drop_front(18) == ".512") {
    2492             :       Intrinsic::ID IID;
    2493          12 :       if (Name[17] == 's')
    2494             :         IID = Intrinsic::x86_avx512_max_ps_512;
    2495             :       else
    2496             :         IID = Intrinsic::x86_avx512_max_pd_512;
    2497             : 
    2498          60 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
    2499          12 :                                { CI->getArgOperand(0), CI->getArgOperand(1),
    2500             :                                  CI->getArgOperand(4) });
    2501          12 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2502             :                           CI->getArgOperand(2));
    2503        2845 :     } else if (IsX86 && Name.startswith("avx512.mask.min.p") &&
    2504             :                Name.drop_front(18) == ".512") {
    2505             :       Intrinsic::ID IID;
    2506          12 :       if (Name[17] == 's')
    2507             :         IID = Intrinsic::x86_avx512_min_ps_512;
    2508             :       else
    2509             :         IID = Intrinsic::x86_avx512_min_pd_512;
    2510             : 
    2511          60 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
    2512          12 :                                { CI->getArgOperand(0), CI->getArgOperand(1),
    2513             :                                  CI->getArgOperand(4) });
    2514          12 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2515             :                           CI->getArgOperand(2));
    2516        2833 :     } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
    2517         130 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
    2518             :                                                          Intrinsic::ctlz,
    2519          52 :                                                          CI->getType()),
    2520          52 :                                { CI->getArgOperand(0), Builder.getInt1(false) });
    2521          26 :       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
    2522             :                           CI->getArgOperand(1));
    2523        2807 :     } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
    2524         156 :       bool IsImmediate = Name[16] == 'i' ||
    2525         132 :                          (Name.size() > 18 && Name[18] == 'i');
    2526             :       bool IsVariable = Name[16] == 'v';
    2527         156 :       char Size = Name[16] == '.' ? Name[17] :
    2528          66 :                   Name[17] == '.' ? Name[18] :
    2529          42 :                   Name[18] == '.' ? Name[19] :
    2530             :                                     Name[20];
    2531             : 
    2532             :       Intrinsic::ID IID;
    2533         210 :       if (IsVariable && Name[17] != '.') {
    2534          42 :         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
    2535             :           IID = Intrinsic::x86_avx2_psllv_q;
    2536          36 :         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
    2537             :           IID = Intrinsic::x86_avx2_psllv_q_256;
    2538          30 :         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
    2539             :           IID = Intrinsic::x86_avx2_psllv_d;
    2540          24 :         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
    2541             :           IID = Intrinsic::x86_avx2_psllv_d_256;
    2542          18 :         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
    2543             :           IID = Intrinsic::x86_avx512_psllv_w_128;
    2544          12 :         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
    2545             :           IID = Intrinsic::x86_avx512_psllv_w_256;
    2546          12 :         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
    2547             :           IID = Intrinsic::x86_avx512_psllv_w_512;
    2548             :         else
    2549           0 :           llvm_unreachable("Unexpected size");
    2550             :       } else if (Name.endswith(".128")) {
    2551          24 :         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
    2552          12 :           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
    2553             :                             : Intrinsic::x86_sse2_psll_d;
    2554          12 :         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
    2555           0 :           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
    2556             :                             : Intrinsic::x86_sse2_psll_q;
    2557          12 :         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
    2558          12 :           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
    2559             :                             : Intrinsic::x86_sse2_psll_w;
    2560             :         else
    2561           0 :           llvm_unreachable("Unexpected size");
    2562             :       } else if (Name.endswith(".256")) {
    2563          30 :         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
    2564          12 :           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
    2565             :                             : Intrinsic::x86_avx2_psll_d;
    2566          18 :         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
    2567           6 :           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
    2568             :                             : Intrinsic::x86_avx2_psll_q;
    2569          12 :         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
    2570          12 :           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
    2571             :                             : Intrinsic::x86_avx2_psll_w;
    2572             :         else
    2573           0 :           llvm_unreachable("Unexpected size");
    2574             :       } else {
    2575          60 :         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
    2576          24 :           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
    2577          12 :                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
    2578             :                               Intrinsic::x86_avx512_psll_d_512;
    2579          36 :         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
    2580          24 :           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
    2581          12 :                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
    2582             :                               Intrinsic::x86_avx512_psll_q_512;
    2583          12 :         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
    2584          12 :           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
    2585             :                             : Intrinsic::x86_avx512_psll_w_512;
    2586             :         else
    2587           0 :           llvm_unreachable("Unexpected size");
    2588             :       }
    2589             : 
    2590         156 :       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
    2591        2651 :     } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
    2592         176 :       bool IsImmediate = Name[16] == 'i' ||
    2593         152 :                          (Name.size() > 18 && Name[18] == 'i');
    2594             :       bool IsVariable = Name[16] == 'v';
    2595         176 :       char Size = Name[16] == '.' ? Name[17] :
    2596          68 :                   Name[17] == '.' ? Name[18] :
    2597          42 :                   Name[18] == '.' ? Name[19] :
    2598             :                                     Name[20];
    2599             : 
    2600             :       Intrinsic::ID IID;
    2601         232 :       if (IsVariable && Name[17] != '.') {
    2602          42 :         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
    2603             :           IID = Intrinsic::x86_avx2_psrlv_q;
    2604          36 :         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
    2605             :           IID = Intrinsic::x86_avx2_psrlv_q_256;
    2606          30 :         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
    2607             :           IID = Intrinsic::x86_avx2_psrlv_d;
    2608          24 :         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
    2609             :           IID = Intrinsic::x86_avx2_psrlv_d_256;
    2610          18 :         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
    2611             :           IID = Intrinsic::x86_avx512_psrlv_w_128;
    2612          12 :         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
    2613             :           IID = Intrinsic::x86_avx512_psrlv_w_256;
    2614          12 :         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
    2615             :           IID = Intrinsic::x86_avx512_psrlv_w_512;
    2616             :         else
    2617           0 :           llvm_unreachable("Unexpected size");
    2618             :       } else if (Name.endswith(".128")) {
    2619          36 :         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
    2620          12 :           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
    2621             :                             : Intrinsic::x86_sse2_psrl_d;
    2622          24 :         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
    2623          12 :           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
    2624             :                             : Intrinsic::x86_sse2_psrl_q;
    2625          12 :         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
    2626          12 :           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
    2627             :                             : Intrinsic::x86_sse2_psrl_w;
    2628             :         else
    2629           0 :           llvm_unreachable("Unexpected size");
    2630             :       } else if (Name.endswith(".256")) {
    2631          36 :         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
    2632          12 :           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
    2633             :                             : Intrinsic::x86_avx2_psrl_d;
    2634          24 :         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
    2635          12 :           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
    2636             :                             : Intrinsic::x86_avx2_psrl_q;
    2637          12 :         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
    2638          12 :           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
    2639             :                             : Intrinsic::x86_avx2_psrl_w;
    2640             :         else
    2641           0 :           llvm_unreachable("Unexpected size");
    2642             :       } else {
    2643          62 :         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
    2644          24 :           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
    2645          12 :                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
    2646             :                               Intrinsic::x86_avx512_psrl_d_512;
    2647          38 :         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
    2648          26 :           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
    2649          14 :                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
    2650             :                               Intrinsic::x86_avx512_psrl_q_512;
    2651          12 :         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
    2652          12 :           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
    2653             :                             : Intrinsic::x86_avx512_psrl_w_512;
    2654             :         else
    2655           0 :           llvm_unreachable("Unexpected size");
    2656             :       }
    2657             : 
    2658         176 :       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
    2659        2475 :     } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
    2660         168 :       bool IsImmediate = Name[16] == 'i' ||
    2661         144 :                          (Name.size() > 18 && Name[18] == 'i');
    2662             :       bool IsVariable = Name[16] == 'v';
    2663         168 :       char Size = Name[16] == '.' ? Name[17] :
    2664          72 :                   Name[17] == '.' ? Name[18] :
    2665          34 :                   Name[18] == '.' ? Name[19] :
    2666             :                                     Name[20];
    2667             : 
    2668             :       Intrinsic::ID IID;
    2669         228 :       if (IsVariable && Name[17] != '.') {
    2670          34 :         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
    2671             :           IID = Intrinsic::x86_avx2_psrav_d;
    2672          28 :         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
    2673             :           IID = Intrinsic::x86_avx2_psrav_d_256;
    2674          20 :         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
    2675             :           IID = Intrinsic::x86_avx512_psrav_w_128;
    2676          14 :         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
    2677             :           IID = Intrinsic::x86_avx512_psrav_w_256;
    2678          16 :         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
    2679             :           IID = Intrinsic::x86_avx512_psrav_w_512;
    2680             :         else
    2681           0 :           llvm_unreachable("Unexpected size");
    2682             :       } else if (Name.endswith(".128")) {
    2683          38 :         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
    2684           6 :           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
    2685             :                             : Intrinsic::x86_sse2_psra_d;
    2686          32 :         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
    2687          20 :           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
    2688          14 :                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
    2689             :                               Intrinsic::x86_avx512_psra_q_128;
    2690          12 :         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
    2691          12 :           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
    2692             :                             : Intrinsic::x86_sse2_psra_w;
    2693             :         else
    2694           0 :           llvm_unreachable("Unexpected size");
    2695             :       } else if (Name.endswith(".256")) {
    2696          36 :         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
    2697           6 :           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
    2698             :                             : Intrinsic::x86_avx2_psra_d;
    2699          30 :         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
    2700          18 :           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
    2701          12 :                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
    2702             :                               Intrinsic::x86_avx512_psra_q_256;
    2703          12 :         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
    2704          12 :           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
    2705             :                             : Intrinsic::x86_avx2_psra_w;
    2706             :         else
    2707           0 :           llvm_unreachable("Unexpected size");
    2708             :       } else {
    2709          60 :         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
    2710          24 :           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
    2711          12 :                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
    2712             :                               Intrinsic::x86_avx512_psra_d_512;
    2713          36 :         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
    2714          24 :           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
    2715          12 :                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
    2716             :                               Intrinsic::x86_avx512_psra_q_512;
    2717          12 :         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
    2718          12 :           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
    2719             :                             : Intrinsic::x86_avx512_psra_w_512;
    2720             :         else
    2721           0 :           llvm_unreachable("Unexpected size");
    2722             :       }
    2723             : 
    2724         168 :       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
    2725        2307 :     } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
    2726           8 :       Rep = upgradeMaskedMove(Builder, *CI);
    2727        2299 :     } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
    2728          24 :       Rep = UpgradeMaskToInt(Builder, *CI);
    2729        2275 :     } else if (IsX86 && Name.endswith(".movntdqa")) {
    2730          44 :       Module *M = F->getParent();
    2731             :       MDNode *Node = MDNode::get(
    2732         132 :           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
    2733             : 
    2734          44 :       Value *Ptr = CI->getArgOperand(0);
    2735          44 :       VectorType *VTy = cast<VectorType>(CI->getType());
    2736             : 
    2737             :       // Convert the type of the pointer to a pointer to the stored type.
    2738             :       Value *BC =
    2739          44 :           Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
    2740          88 :       LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
    2741          88 :       LI->setMetadata(M->getMDKindID("nontemporal"), Node);
    2742          44 :       Rep = LI;
    2743        2231 :     } else if (IsX86 &&
    2744             :                (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") ||
    2745             :                 Name.startswith("avx512.mask.pavg"))) {
    2746             :       // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w,
    2747             :       // llvm.x86.avx512.mask.pavg.b/w
    2748          46 :       Value *A = CI->getArgOperand(0);
    2749             :       Value *B = CI->getArgOperand(1);
    2750          46 :       VectorType *ZextType = VectorType::getExtendedElementVectorType(
    2751          46 :           cast<VectorType>(A->getType()));
    2752          46 :       Value *ExtendedA = Builder.CreateZExt(A, ZextType);
    2753          46 :       Value *ExtendedB = Builder.CreateZExt(B, ZextType);
    2754          46 :       Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB);
    2755          46 :       Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1));
    2756          46 :       Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1));
    2757          92 :       Rep = Builder.CreateTrunc(ShiftR, A->getType());
    2758          46 :       if (CI->getNumArgOperands() > 2) {
    2759          48 :         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2760             :                             CI->getArgOperand(2));
    2761             :       }
    2762        2185 :     } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
    2763             :                          Name.startswith("fma.vfmsub.") ||
    2764             :                          Name.startswith("fma.vfnmadd.") ||
    2765             :                          Name.startswith("fma.vfnmsub."))) {
    2766         940 :       bool NegMul = Name[6] == 'n';
    2767         940 :       bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
    2768         940 :       bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
    2769             : 
    2770         470 :       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
    2771        1410 :                        CI->getArgOperand(2) };
    2772             : 
    2773         470 :       if (IsScalar) {
    2774         154 :         Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
    2775         308 :         Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
    2776         308 :         Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
    2777             :       }
    2778             : 
    2779         470 :       if (NegMul && !IsScalar)
    2780         144 :         Ops[0] = Builder.CreateFNeg(Ops[0]);
    2781         470 :       if (NegMul && IsScalar)
    2782          78 :         Ops[1] = Builder.CreateFNeg(Ops[1]);
    2783         470 :       if (NegAcc)
    2784         228 :         Ops[2] = Builder.CreateFNeg(Ops[2]);
    2785             : 
    2786        1880 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
    2787             :                                                          Intrinsic::fma,
    2788         940 :                                                          Ops[0]->getType()),
    2789             :                                Ops);
    2790             : 
    2791         470 :       if (IsScalar)
    2792         154 :         Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
    2793             :                                           (uint64_t)0);
    2794        1715 :     } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
    2795          46 :       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
    2796         138 :                        CI->getArgOperand(2) };
    2797             : 
    2798          46 :       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
    2799          92 :       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
    2800          92 :       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
    2801             : 
    2802         184 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
    2803             :                                                          Intrinsic::fma,
    2804          92 :                                                          Ops[0]->getType()),
    2805             :                                Ops);
    2806             : 
    2807          92 :       Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
    2808             :                                         Rep, (uint64_t)0);
    2809        1669 :     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
    2810             :                          Name.startswith("avx512.mask.vfnmadd.p") ||
    2811             :                          Name.startswith("avx512.mask.vfnmsub.p") ||
    2812             :                          Name.startswith("avx512.mask3.vfmadd.p") ||
    2813             :                          Name.startswith("avx512.mask3.vfmsub.p") ||
    2814             :                          Name.startswith("avx512.mask3.vfnmsub.p") ||
    2815             :                          Name.startswith("avx512.maskz.vfmadd.p"))) {
    2816         328 :       bool IsMask3 = Name[11] == '3';
    2817         164 :       bool IsMaskZ = Name[11] == 'z';
    2818             :       // Drop the "avx512.mask." to make it easier.
    2819         328 :       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
    2820             :       bool NegMul = Name[2] == 'n';
    2821         328 :       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
    2822             : 
    2823         388 :       if (CI->getNumArgOperands() == 5 &&
    2824          60 :           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
    2825             :            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
    2826             :         Intrinsic::ID IID;
    2827             :         // Check the character before ".512" in string.
    2828          56 :         if (Name[Name.size()-5] == 's')
    2829             :           IID = Intrinsic::x86_avx512_vfmadd_ps_512;
    2830             :         else
    2831             :           IID = Intrinsic::x86_avx512_vfmadd_pd_512;
    2832             : 
    2833             :         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
    2834          84 :                          CI->getArgOperand(2), CI->getArgOperand(4) };
    2835             : 
    2836          28 :         if (NegMul) {
    2837          12 :           if (IsMaskZ || IsMask3)
    2838           0 :             Ops[0] = Builder.CreateFNeg(Ops[0]);
    2839             :           else
    2840          12 :             Ops[1] = Builder.CreateFNeg(Ops[1]);
    2841             :         }
    2842          28 :         if (NegAcc)
    2843          12 :           Ops[2] = Builder.CreateFNeg(Ops[2]);
    2844             : 
    2845          56 :         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
    2846             :                                  Ops);
    2847             :       } else {
    2848             : 
    2849             :         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
    2850         408 :                          CI->getArgOperand(2) };
    2851             : 
    2852         136 :         if (NegMul) {
    2853          60 :           if (IsMaskZ || IsMask3)
    2854          10 :             Ops[0] = Builder.CreateFNeg(Ops[0]);
    2855             :           else
    2856          50 :             Ops[1] = Builder.CreateFNeg(Ops[1]);
    2857             :         }
    2858         136 :         if (NegAcc)
    2859          46 :           Ops[2] = Builder.CreateFNeg(Ops[2]);
    2860             : 
    2861         272 :         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
    2862             :                                                   Intrinsic::fma,
    2863         408 :                                                   Ops[0]->getType());
    2864         136 :         Rep = Builder.CreateCall(FMA, Ops);
    2865             :       }
    2866             : 
    2867         164 :       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
    2868         154 :                         IsMask3 ? CI->getArgOperand(2) :
    2869         124 :                                   CI->getArgOperand(0);
    2870             : 
    2871         328 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
    2872        1505 :     } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
    2873             :                          Name.startswith("fma.vfmsubadd.p"))) {
    2874          48 :       bool IsSubAdd = Name[7] == 's';
    2875          96 :       int NumElts = CI->getType()->getVectorNumElements();
    2876             : 
    2877          48 :       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
    2878         144 :                        CI->getArgOperand(2) };
    2879             : 
    2880          96 :       Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
    2881         144 :                                                 Ops[0]->getType());
    2882          48 :       Value *Odd = Builder.CreateCall(FMA, Ops);
    2883          48 :       Ops[2] = Builder.CreateFNeg(Ops[2]);
    2884          48 :       Value *Even = Builder.CreateCall(FMA, Ops);
    2885             : 
    2886          48 :       if (IsSubAdd)
    2887             :         std::swap(Even, Odd);
    2888             : 
    2889          96 :       SmallVector<uint32_t, 32> Idxs(NumElts);
    2890         480 :       for (int i = 0; i != NumElts; ++i)
    2891         432 :         Idxs[i] = i + (i % 2) * NumElts;
    2892             : 
    2893          48 :       Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
    2894        1457 :     } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
    2895             :                          Name.startswith("avx512.mask3.vfmaddsub.p") ||
    2896             :                          Name.startswith("avx512.maskz.vfmaddsub.p") ||
    2897             :                          Name.startswith("avx512.mask3.vfmsubadd.p"))) {
    2898         104 :       bool IsMask3 = Name[11] == '3';
    2899          52 :       bool IsMaskZ = Name[11] == 'z';
    2900             :       // Drop the "avx512.mask." to make it easier.
    2901         104 :       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
    2902             :       bool IsSubAdd = Name[3] == 's';
    2903         116 :       if (CI->getNumArgOperands() == 5 &&
    2904          12 :           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
    2905             :            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
    2906             :         Intrinsic::ID IID;
    2907             :         // Check the character before ".512" in string.
    2908           0 :         if (Name[Name.size()-5] == 's')
    2909             :           IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
    2910             :         else
    2911             :           IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
    2912             : 
    2913             :         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
    2914             :                          CI->getArgOperand(2), CI->getArgOperand(4) };
    2915           0 :         if (IsSubAdd)
    2916           0 :           Ops[2] = Builder.CreateFNeg(Ops[2]);
    2917             : 
    2918           0 :         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
    2919             :                                  {CI->getArgOperand(0), CI->getArgOperand(1),
    2920             :                                   CI->getArgOperand(2), CI->getArgOperand(4)});
    2921             :       } else {
    2922         104 :         int NumElts = CI->getType()->getVectorNumElements();
    2923             : 
    2924             :         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
    2925         156 :                          CI->getArgOperand(2) };
    2926             : 
    2927         104 :         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
    2928         156 :                                                   Ops[0]->getType());
    2929          52 :         Value *Odd = Builder.CreateCall(FMA, Ops);
    2930          52 :         Ops[2] = Builder.CreateFNeg(Ops[2]);
    2931          52 :         Value *Even = Builder.CreateCall(FMA, Ops);
    2932             : 
    2933          52 :         if (IsSubAdd)
    2934             :           std::swap(Even, Odd);
    2935             : 
    2936         104 :         SmallVector<uint32_t, 32> Idxs(NumElts);
    2937         700 :         for (int i = 0; i != NumElts; ++i)
    2938         648 :           Idxs[i] = i + (i % 2) * NumElts;
    2939             : 
    2940          52 :         Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
    2941             :       }
    2942             : 
    2943          52 :       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
    2944          42 :                         IsMask3 ? CI->getArgOperand(2) :
    2945          22 :                                   CI->getArgOperand(0);
    2946             : 
    2947         104 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
    2948        1405 :     } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
    2949             :                          Name.startswith("avx512.maskz.pternlog."))) {
    2950          48 :       bool ZeroMask = Name[11] == 'z';
    2951          48 :       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
    2952          48 :       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
    2953             :       Intrinsic::ID IID;
    2954          48 :       if (VecWidth == 128 && EltWidth == 32)
    2955             :         IID = Intrinsic::x86_avx512_pternlog_d_128;
    2956          40 :       else if (VecWidth == 256 && EltWidth == 32)
    2957             :         IID = Intrinsic::x86_avx512_pternlog_d_256;
    2958          32 :       else if (VecWidth == 512 && EltWidth == 32)
    2959             :         IID = Intrinsic::x86_avx512_pternlog_d_512;
    2960          24 :       else if (VecWidth == 128 && EltWidth == 64)
    2961             :         IID = Intrinsic::x86_avx512_pternlog_q_128;
    2962          16 :       else if (VecWidth == 256 && EltWidth == 64)
    2963             :         IID = Intrinsic::x86_avx512_pternlog_q_256;
    2964           8 :       else if (VecWidth == 512 && EltWidth == 64)
    2965             :         IID = Intrinsic::x86_avx512_pternlog_q_512;
    2966             :       else
    2967           0 :         llvm_unreachable("Unexpected intrinsic");
    2968             : 
    2969          48 :       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
    2970         192 :                         CI->getArgOperand(2), CI->getArgOperand(3) };
    2971         144 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
    2972             :                                Args);
    2973          48 :       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
    2974             :                                  : CI->getArgOperand(0);
    2975          96 :       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
    2976        1357 :     } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
    2977             :                          Name.startswith("avx512.maskz.vpmadd52"))) {
    2978         120 :       bool ZeroMask = Name[11] == 'z';
    2979         200 :       bool High = Name[20] == 'h' || Name[21] == 'h';
    2980         120 :       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
    2981             :       Intrinsic::ID IID;
    2982         120 :       if (VecWidth == 128 && !High)
    2983             :         IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
    2984         104 :       else if (VecWidth == 256 && !High)
    2985             :         IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
    2986          88 :       else if (VecWidth == 512 && !High)
    2987             :         IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
    2988          72 :       else if (VecWidth == 128 && High)
    2989             :         IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
    2990          56 :       else if (VecWidth == 256 && High)
    2991             :         IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
    2992          40 :       else if (VecWidth == 512 && High)
    2993             :         IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
    2994             :       else
    2995           0 :         llvm_unreachable("Unexpected intrinsic");
    2996             : 
    2997         120 :       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
    2998         360 :                         CI->getArgOperand(2) };
    2999         360 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
    3000             :                                Args);
    3001         120 :       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
    3002             :                                  : CI->getArgOperand(0);
    3003         240 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
    3004        1237 :     } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
    3005             :                          Name.startswith("avx512.mask.vpermt2var.") ||
    3006             :                          Name.startswith("avx512.maskz.vpermt2var."))) {
    3007         403 :       bool ZeroMask = Name[11] == 'z';
    3008             :       bool IndexForm = Name[17] == 'i';
    3009         403 :       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
    3010         403 :       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
    3011             :       bool IsFloat = CI->getType()->isFPOrFPVectorTy();
    3012             :       Intrinsic::ID IID;
    3013         403 :       if (VecWidth == 128 && EltWidth == 32 && IsFloat)
    3014             :         IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
    3015         394 :       else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
    3016             :         IID = Intrinsic::x86_avx512_vpermi2var_d_128;
    3017         379 :       else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
    3018             :         IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
    3019         372 :       else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
    3020             :         IID = Intrinsic::x86_avx512_vpermi2var_q_128;
    3021         360 :       else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
    3022             :         IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
    3023         351 :       else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
    3024             :         IID = Intrinsic::x86_avx512_vpermi2var_d_256;
    3025         337 :       else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
    3026             :         IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
    3027         330 :       else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
    3028             :         IID = Intrinsic::x86_avx512_vpermi2var_q_256;
    3029         318 :       else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
    3030             :         IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
    3031         258 :       else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
    3032             :         IID = Intrinsic::x86_avx512_vpermi2var_d_512;
    3033         217 :       else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
    3034             :         IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
    3035         179 :       else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
    3036             :         IID = Intrinsic::x86_avx512_vpermi2var_q_512;
    3037         151 :       else if (VecWidth == 128 && EltWidth == 16)
    3038             :         IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
    3039         139 :       else if (VecWidth == 256 && EltWidth == 16)
    3040             :         IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
    3041         107 :       else if (VecWidth == 512 && EltWidth == 16)
    3042             :         IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
    3043          75 :       else if (VecWidth == 128 && EltWidth == 8)
    3044             :         IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
    3045          40 :       else if (VecWidth == 256 && EltWidth == 8)
    3046             :         IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
    3047          18 :       else if (VecWidth == 512 && EltWidth == 8)
    3048             :         IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
    3049             :       else
    3050           0 :         llvm_unreachable("Unexpected intrinsic");
    3051             : 
    3052         403 :       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
    3053        1209 :                         CI->getArgOperand(2) };
    3054             : 
    3055             :       // If this isn't index form we need to swap operand 0 and 1.
    3056         403 :       if (!IndexForm)
    3057             :         std::swap(Args[0], Args[1]);
    3058             : 
    3059        1209 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
    3060             :                                Args);
    3061         403 :       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
    3062         627 :                                  : Builder.CreateBitCast(CI->getArgOperand(1),
    3063             :                                                          CI->getType());
    3064         806 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
    3065         834 :     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
    3066             :                          Name.startswith("avx512.maskz.vpdpbusd.") ||
    3067             :                          Name.startswith("avx512.mask.vpdpbusds.") ||
    3068             :                          Name.startswith("avx512.maskz.vpdpbusds."))) {
    3069          36 :       bool ZeroMask = Name[11] == 'z';
    3070          72 :       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
    3071          36 :       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
    3072             :       Intrinsic::ID IID;
    3073          36 :       if (VecWidth == 128 && !IsSaturating)
    3074             :         IID = Intrinsic::x86_avx512_vpdpbusd_128;
    3075          30 :       else if (VecWidth == 256 && !IsSaturating)
    3076             :         IID = Intrinsic::x86_avx512_vpdpbusd_256;
    3077          24 :       else if (VecWidth == 512 && !IsSaturating)
    3078             :         IID = Intrinsic::x86_avx512_vpdpbusd_512;
    3079          18 :       else if (VecWidth == 128 && IsSaturating)
    3080             :         IID = Intrinsic::x86_avx512_vpdpbusds_128;
    3081          12 :       else if (VecWidth == 256 && IsSaturating)
    3082             :         IID = Intrinsic::x86_avx512_vpdpbusds_256;
    3083           6 :       else if (VecWidth == 512 && IsSaturating)
    3084             :         IID = Intrinsic::x86_avx512_vpdpbusds_512;
    3085             :       else
    3086           0 :         llvm_unreachable("Unexpected intrinsic");
    3087             : 
    3088          36 :       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
    3089         108 :                         CI->getArgOperand(2)  };
    3090         108 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
    3091             :                                Args);
    3092          36 :       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
    3093             :                                  : CI->getArgOperand(0);
    3094          72 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
    3095         798 :     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
    3096             :                          Name.startswith("avx512.maskz.vpdpwssd.") ||
    3097             :                          Name.startswith("avx512.mask.vpdpwssds.") ||
    3098             :                          Name.startswith("avx512.maskz.vpdpwssds."))) {
    3099          36 :       bool ZeroMask = Name[11] == 'z';
    3100          72 :       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
    3101          36 :       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
    3102             :       Intrinsic::ID IID;
    3103          36 :       if (VecWidth == 128 && !IsSaturating)
    3104             :         IID = Intrinsic::x86_avx512_vpdpwssd_128;
    3105          30 :       else if (VecWidth == 256 && !IsSaturating)
    3106             :         IID = Intrinsic::x86_avx512_vpdpwssd_256;
    3107          24 :       else if (VecWidth == 512 && !IsSaturating)
    3108             :         IID = Intrinsic::x86_avx512_vpdpwssd_512;
    3109          18 :       else if (VecWidth == 128 && IsSaturating)
    3110             :         IID = Intrinsic::x86_avx512_vpdpwssds_128;
    3111          12 :       else if (VecWidth == 256 && IsSaturating)
    3112             :         IID = Intrinsic::x86_avx512_vpdpwssds_256;
    3113           6 :       else if (VecWidth == 512 && IsSaturating)
    3114             :         IID = Intrinsic::x86_avx512_vpdpwssds_512;
    3115             :       else
    3116           0 :         llvm_unreachable("Unexpected intrinsic");
    3117             : 
    3118          36 :       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
    3119         108 :                         CI->getArgOperand(2)  };
    3120         108 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
    3121             :                                Args);
    3122          36 :       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
    3123             :                                  : CI->getArgOperand(0);
    3124          72 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
    3125        1498 :     } else if (IsX86 && Name.startswith("avx512.mask.") &&
    3126         736 :                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
    3127             :       // Rep will be updated by the call in the condition.
    3128          26 :     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
    3129           4 :       Value *Arg = CI->getArgOperand(0);
    3130           4 :       Value *Neg = Builder.CreateNeg(Arg, "neg");
    3131           4 :       Value *Cmp = Builder.CreateICmpSGE(
    3132           4 :           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
    3133           4 :       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
    3134          22 :     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
    3135             :                           Name == "max.ui" || Name == "max.ull")) {
    3136           8 :       Value *Arg0 = CI->getArgOperand(0);
    3137             :       Value *Arg1 = CI->getArgOperand(1);
    3138             :       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
    3139          12 :                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
    3140           8 :                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
    3141           8 :       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
    3142          14 :     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
    3143             :                           Name == "min.ui" || Name == "min.ull")) {
    3144           8 :       Value *Arg0 = CI->getArgOperand(0);
    3145             :       Value *Arg1 = CI->getArgOperand(1);
    3146             :       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
    3147          12 :                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
    3148           8 :                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
    3149           8 :       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
    3150           6 :     } else if (IsNVVM && Name == "clz.ll") {
    3151             :       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
    3152           2 :       Value *Arg = CI->getArgOperand(0);
    3153          10 :       Value *Ctlz = Builder.CreateCall(
    3154             :           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
    3155           2 :                                     {Arg->getType()}),
    3156           2 :           {Arg, Builder.getFalse()}, "ctlz");
    3157           4 :       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
    3158           4 :     } else if (IsNVVM && Name == "popc.ll") {
    3159             :       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
    3160             :       // i64.
    3161           4 :       Value *Arg = CI->getArgOperand(0);
    3162           6 :       Value *Popc = Builder.CreateCall(
    3163             :           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
    3164           2 :                                     {Arg->getType()}),
    3165             :           Arg, "ctpop");
    3166           4 :       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
    3167           2 :     } else if (IsNVVM && Name == "h2f") {
    3168           8 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(
    3169             :                                    F->getParent(), Intrinsic::convert_from_fp16,
    3170           2 :                                    {Builder.getFloatTy()}),
    3171           6 :                                CI->getArgOperand(0), "h2f");
    3172             :     } else {
    3173           0 :       llvm_unreachable("Unknown function for CallInst upgrade.");
    3174             :     }
    3175             : 
    3176        9351 :     if (Rep)
    3177        9345 :       CI->replaceAllUsesWith(Rep);
    3178        9351 :     CI->eraseFromParent();
    3179        9351 :     return;
    3180             :   }
    3181             : 
    3182             :   const auto &DefaultCase = [&NewFn, &CI]() -> void {
    3183             :     // Handle generic mangling change, but nothing else
    3184             :     assert(
    3185             :         (CI->getCalledFunction()->getName() != NewFn->getName()) &&
    3186             :         "Unknown function for CallInst upgrade and isn't just a name change");
    3187             :     CI->setCalledFunction(NewFn);
    3188             :   };
    3189             :   CallInst *NewCall = nullptr;
    3190        1281 :   switch (NewFn->getIntrinsicID()) {
    3191             :   default: {
    3192             :     DefaultCase();
    3193             :     return;
    3194             :   }
    3195             : 
    3196          14 :   case Intrinsic::arm_neon_vld1:
    3197             :   case Intrinsic::arm_neon_vld2:
    3198             :   case Intrinsic::arm_neon_vld3:
    3199             :   case Intrinsic::arm_neon_vld4:
    3200             :   case Intrinsic::arm_neon_vld2lane:
    3201             :   case Intrinsic::arm_neon_vld3lane:
    3202             :   case Intrinsic::arm_neon_vld4lane:
    3203             :   case Intrinsic::arm_neon_vst1:
    3204             :   case Intrinsic::arm_neon_vst2:
    3205             :   case Intrinsic::arm_neon_vst3:
    3206             :   case Intrinsic::arm_neon_vst4:
    3207             :   case Intrinsic::arm_neon_vst2lane:
    3208             :   case Intrinsic::arm_neon_vst3lane:
    3209             :   case Intrinsic::arm_neon_vst4lane: {
    3210          28 :     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
    3211          42 :                                  CI->arg_operands().end());
    3212          14 :     NewCall = Builder.CreateCall(NewFn, Args);
    3213             :     break;
    3214             :   }
    3215             : 
    3216             :   case Intrinsic::bitreverse:
    3217          24 :     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
    3218           8 :     break;
    3219             : 
    3220             :   case Intrinsic::ctlz:
    3221             :   case Intrinsic::cttz:
    3222             :     assert(CI->getNumArgOperands() == 1 &&
    3223             :            "Mismatch between function args and call args");
    3224             :     NewCall =
    3225         272 :         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
    3226          68 :     break;
    3227             : 
    3228          75 :   case Intrinsic::objectsize: {
    3229          75 :     Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
    3230          75 :                                    ? Builder.getFalse()
    3231             :                                    : CI->getArgOperand(2);
    3232         225 :     NewCall = Builder.CreateCall(
    3233             :         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
    3234          75 :     break;
    3235             :   }
    3236             : 
    3237             :   case Intrinsic::ctpop:
    3238          18 :     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
    3239           6 :     break;
    3240             : 
    3241             :   case Intrinsic::convert_from_fp16:
    3242           0 :     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
    3243           0 :     break;
    3244             : 
    3245         452 :   case Intrinsic::dbg_value:
    3246             :     // Upgrade from the old version that had an extra offset argument.
    3247             :     assert(CI->getNumArgOperands() == 4);
    3248             :     // Drop nonzero offsets instead of attempting to upgrade them.
    3249         452 :     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
    3250         452 :       if (Offset->isZeroValue()) {
    3251        1800 :         NewCall = Builder.CreateCall(
    3252             :             NewFn,
    3253             :             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
    3254         450 :         break;
    3255             :       }
    3256           2 :     CI->eraseFromParent();
    3257           2 :     return;
    3258             : 
    3259             :   case Intrinsic::x86_xop_vfrcz_ss:
    3260             :   case Intrinsic::x86_xop_vfrcz_sd:
    3261           0 :     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
    3262           0 :     break;
    3263             : 
    3264           8 :   case Intrinsic::x86_xop_vpermil2pd:
    3265             :   case Intrinsic::x86_xop_vpermil2ps:
    3266             :   case Intrinsic::x86_xop_vpermil2pd_256:
    3267             :   case Intrinsic::x86_xop_vpermil2ps_256: {
    3268          16 :     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
    3269          24 :                                  CI->arg_operands().end());
    3270           8 :     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
    3271           8 :     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
    3272          16 :     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
    3273           8 :     NewCall = Builder.CreateCall(NewFn, Args);
    3274             :     break;
    3275             :   }
    3276             : 
    3277          12 :   case Intrinsic::x86_sse41_ptestc:
    3278             :   case Intrinsic::x86_sse41_ptestz:
    3279             :   case Intrinsic::x86_sse41_ptestnzc: {
    3280             :     // The arguments for these intrinsics used to be v4f32, and changed
    3281             :     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
    3282             :     // So, the only thing required is a bitcast for both arguments.
    3283             :     // First, check the arguments have the old type.
    3284          12 :     Value *Arg0 = CI->getArgOperand(0);
    3285          12 :     if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
    3286             :       return;
    3287             : 
    3288             :     // Old intrinsic, add bitcasts
    3289             :     Value *Arg1 = CI->getArgOperand(1);
    3290             : 
    3291          12 :     Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
    3292             : 
    3293          12 :     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
    3294          12 :     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
    3295             : 
    3296          24 :     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
    3297          12 :     break;
    3298             :   }
    3299             : 
    3300         111 :   case Intrinsic::x86_sse41_insertps:
    3301             :   case Intrinsic::x86_sse41_dppd:
    3302             :   case Intrinsic::x86_sse41_dpps:
    3303             :   case Intrinsic::x86_sse41_mpsadbw:
    3304             :   case Intrinsic::x86_avx_dp_ps_256:
    3305             :   case Intrinsic::x86_avx2_mpsadbw: {
    3306             :     // Need to truncate the last argument from i32 to i8 -- this argument models
    3307             :     // an inherently 8-bit immediate operand to these x86 instructions.
    3308         222 :     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
    3309         333 :                                  CI->arg_operands().end());
    3310             : 
    3311             :     // Replace the last argument with a trunc.
    3312         222 :     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
    3313         111 :     NewCall = Builder.CreateCall(NewFn, Args);
    3314             :     break;
    3315             :   }
    3316             : 
    3317             :   case Intrinsic::thread_pointer: {
    3318           2 :     NewCall = Builder.CreateCall(NewFn, {});
    3319           2 :     break;
    3320             :   }
    3321             : 
    3322          86 :   case Intrinsic::invariant_start:
    3323             :   case Intrinsic::invariant_end:
    3324             :   case Intrinsic::masked_load:
    3325             :   case Intrinsic::masked_store:
    3326             :   case Intrinsic::masked_gather:
    3327             :   case Intrinsic::masked_scatter: {
    3328         172 :     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
    3329         258 :                                  CI->arg_operands().end());
    3330          86 :     NewCall = Builder.CreateCall(NewFn, Args);
    3331             :     break;
    3332             :   }
    3333             : 
    3334         213 :   case Intrinsic::memcpy:
    3335             :   case Intrinsic::memmove:
    3336             :   case Intrinsic::memset: {
    3337             :     // We have to make sure that the call signature is what we're expecting.
    3338             :     // We only want to change the old signatures by removing the alignment arg:
    3339             :     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
    3340             :     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
    3341             :     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
    3342             :     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
    3343             :     // Note: i8*'s in the above can be any pointer type
    3344         426 :     if (CI->getNumArgOperands() != 5) {
    3345             :       DefaultCase();
    3346          19 :       return;
    3347             :     }
    3348             :     // Remove alignment argument (3), and add alignment attributes to the
    3349             :     // dest/src pointers.
    3350             :     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
    3351         776 :                       CI->getArgOperand(2), CI->getArgOperand(4)};
    3352         194 :     NewCall = Builder.CreateCall(NewFn, Args);
    3353             :     auto *MemCI = cast<MemIntrinsic>(NewCall);
    3354             :     // All mem intrinsics support dest alignment.
    3355             :     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
    3356         388 :     MemCI->setDestAlignment(Align->getZExtValue());
    3357             :     // Memcpy/Memmove also support source alignment.
    3358             :     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
    3359         258 :       MTI->setSourceAlignment(Align->getZExtValue());
    3360         194 :     break;
    3361             :   }
    3362             :   }
    3363             :   assert(NewCall && "Should have either set this variable or returned through "
    3364             :                     "the default case");
    3365        2068 :   std::string Name = CI->getName();
    3366        1034 :   if (!Name.empty()) {
    3367         618 :     CI->setName(Name + ".old");
    3368         412 :     NewCall->setName(Name);
    3369             :   }
    3370        1034 :   CI->replaceAllUsesWith(NewCall);
    3371        1034 :   CI->eraseFromParent();
    3372             : }
    3373             : 
    3374      307323 : void llvm::UpgradeCallsToIntrinsic(Function *F) {
    3375             :   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
    3376             : 
    3377             :   // Check if this function should be upgraded and get the replacement function
    3378             :   // if there is one.
    3379             :   Function *NewFn;
    3380      307323 :   if (UpgradeIntrinsicFunction(F, NewFn)) {
    3381             :     // Replace all users of the old function with the new function or new
    3382             :     // instructions. This is not a range loop because the call is deleted.
    3383       15603 :     for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
    3384             :       if (CallInst *CI = dyn_cast<CallInst>(*UI++))
    3385       10717 :         UpgradeIntrinsicCall(CI, NewFn);
    3386             : 
    3387             :     // Remove old function, no longer used, from the module.
    3388        4886 :     F->eraseFromParent();
    3389             :   }
    3390      307323 : }
    3391             : 
    3392        4844 : MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
    3393             :   // Check if the tag uses struct-path aware TBAA format.
    3394        4645 :   if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
    3395             :     return &MD;
    3396             : 
    3397             :   auto &Context = MD.getContext();
    3398         199 :   if (MD.getNumOperands() == 3) {
    3399          64 :     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
    3400             :     MDNode *ScalarType = MDNode::get(Context, Elts);
    3401             :     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
    3402             :     Metadata *Elts2[] = {ScalarType, ScalarType,
    3403          32 :                          ConstantAsMetadata::get(
    3404          32 :                              Constant::getNullValue(Type::getInt64Ty(Context))),
    3405          96 :                          MD.getOperand(2)};
    3406             :     return MDNode::get(Context, Elts2);
    3407             :   }
    3408             :   // Create a MDNode <MD, MD, offset 0>
    3409         167 :   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
    3410         334 :                                     Type::getInt64Ty(Context)))};
    3411             :   return MDNode::get(Context, Elts);
    3412             : }
    3413             : 
    3414       14778 : Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
    3415             :                                       Instruction *&Temp) {
    3416       14778 :   if (Opc != Instruction::BitCast)
    3417             :     return nullptr;
    3418             : 
    3419       11779 :   Temp = nullptr;
    3420       11779 :   Type *SrcTy = V->getType();
    3421       33367 :   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
    3422             :       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
    3423           0 :     LLVMContext &Context = V->getContext();
    3424             : 
    3425             :     // We have no information about target data layout, so we assume that
    3426             :     // the maximum pointer size is 64bit.
    3427           0 :     Type *MidTy = Type::getInt64Ty(Context);
    3428           0 :     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
    3429             : 
    3430           0 :     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
    3431             :   }
    3432             : 
    3433             :   return nullptr;
    3434             : }
    3435             : 
    3436        3633 : Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
    3437        3633 :   if (Opc != Instruction::BitCast)
    3438             :     return nullptr;
    3439             : 
    3440        3331 :   Type *SrcTy = C->getType();
    3441        9911 :   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
    3442             :       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
    3443           0 :     LLVMContext &Context = C->getContext();
    3444             : 
    3445             :     // We have no information about target data layout, so we assume that
    3446             :     // the maximum pointer size is 64bit.
    3447           0 :     Type *MidTy = Type::getInt64Ty(Context);
    3448             : 
    3449           0 :     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
    3450           0 :                                      DestTy);
    3451             :   }
    3452             : 
    3453             :   return nullptr;
    3454             : }
    3455             : 
    3456             : /// Check the debug info version number, if it is out-dated, drop the debug
    3457             : /// info. Return true if module is modified.
    3458       36791 : bool llvm::UpgradeDebugInfo(Module &M) {
    3459       36791 :   unsigned Version = getDebugMetadataVersionFromModule(M);
    3460       36791 :   if (Version == DEBUG_METADATA_VERSION) {
    3461        1976 :     bool BrokenDebugInfo = false;
    3462        1976 :     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
    3463           0 :       report_fatal_error("Broken module found, compilation aborted!");
    3464        1976 :     if (!BrokenDebugInfo)
    3465             :       // Everything is ok.
    3466        1930 :       return false;
    3467             :     else {
    3468             :       // Diagnose malformed debug info.
    3469             :       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
    3470          46 :       M.getContext().diagnose(Diag);
    3471             :     }
    3472             :   }
    3473       34861 :   bool Modified = StripDebugInfo(M);
    3474       34861 :   if (Modified && Version != DEBUG_METADATA_VERSION) {
    3475             :     // Diagnose a version mismatch.
    3476             :     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
    3477          49 :     M.getContext().diagnose(DiagVersion);
    3478             :   }
    3479             :   return Modified;
    3480             : }
    3481             : 
    3482        2569 : bool llvm::UpgradeRetainReleaseMarker(Module &M) {
    3483             :   bool Changed = false;
    3484             :   NamedMDNode *ModRetainReleaseMarker =
    3485        2569 :       M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker");
    3486        2569 :   if (ModRetainReleaseMarker) {
    3487           1 :     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
    3488           1 :     if (Op) {
    3489             :       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
    3490             :       if (ID) {
    3491             :         SmallVector<StringRef, 4> ValueComp;
    3492           2 :         ID->getString().split(ValueComp, "#");
    3493           1 :         if (ValueComp.size() == 2) {
    3494           4 :           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
    3495           1 :           Metadata *Ops[1] = {MDString::get(M.getContext(), NewValue)};
    3496           1 :           ModRetainReleaseMarker->setOperand(0,
    3497           1 :                                              MDNode::get(M.getContext(), Ops));
    3498             :           Changed = true;
    3499             :         }
    3500             :       }
    3501             :     }
    3502             :   }
    3503        2569 :   return Changed;
    3504             : }
    3505             : 
    3506       35602 : bool llvm::UpgradeModuleFlags(Module &M) {
    3507       35602 :   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
    3508       35602 :   if (!ModFlags)
    3509             :     return false;
    3510             : 
    3511             :   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
    3512        7656 :   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
    3513        4999 :     MDNode *Op = ModFlags->getOperand(I);
    3514        4999 :     if (Op->getNumOperands() != 3)
    3515           1 :       continue;
    3516             :     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
    3517           3 :     if (!ID)
    3518           3 :       continue;
    3519        4995 :     if (ID->getString() == "Objective-C Image Info Version")
    3520             :       HasObjCFlag = true;
    3521        4995 :     if (ID->getString() == "Objective-C Class Properties")
    3522             :       HasClassProperties = true;
    3523             :     // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
    3524             :     // field was Error and now they are Max.
    3525        9623 :     if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
    3526             :       if (auto *Behavior =
    3527             :               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
    3528         415 :         if (Behavior->getLimitedValue() == Module::Error) {
    3529         280 :           Type *Int32Ty = Type::getInt32Ty(M.getContext());
    3530             :           Metadata *Ops[3] = {
    3531         280 :               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
    3532         280 :               MDString::get(M.getContext(), ID->getString()),
    3533         840 :               Op->getOperand(2)};
    3534         560 :           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
    3535             :           Changed = true;
    3536             :         }
    3537             :       }
    3538             :     }
    3539             :     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
    3540             :     // section name so that llvm-lto will not complain about mismatching
    3541             :     // module flags that is functionally the same.
    3542        4995 :     if (ID->getString() == "Objective-C Image Info Section") {
    3543             :       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
    3544             :         SmallVector<StringRef, 4> ValueComp;
    3545          64 :         Value->getString().split(ValueComp, " ");
    3546          32 :         if (ValueComp.size() != 1) {
    3547             :           std::string NewValue;
    3548         167 :           for (auto &S : ValueComp)
    3549          74 :             NewValue += S.str();
    3550             :           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
    3551          57 :                               MDString::get(M.getContext(), NewValue)};
    3552          38 :           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
    3553             :           Changed = true;
    3554             :         }
    3555             :       }
    3556             :     }
    3557             :   }
    3558             : 
    3559             :   // "Objective-C Class Properties" is recently added for Objective-C. We
    3560             :   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
    3561             :   // flag of value 0, so we can correclty downgrade this flag when trying to
    3562             :   // link an ObjC bitcode without this module flag with an ObjC bitcode with
    3563             :   // this module flag.
    3564        2657 :   if (HasObjCFlag && !HasClassProperties) {
    3565          18 :     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
    3566             :                     (uint32_t)0);
    3567             :     Changed = true;
    3568             :   }
    3569             : 
    3570             :   return Changed;
    3571             : }
    3572             : 
    3573       33033 : void llvm::UpgradeSectionAttributes(Module &M) {
    3574           2 :   auto TrimSpaces = [](StringRef Section) -> std::string {
    3575             :     SmallVector<StringRef, 5> Components;
    3576           2 :     Section.split(Components, ',');
    3577             : 
    3578             :     SmallString<32> Buffer;
    3579             :     raw_svector_ostream OS(Buffer);
    3580             : 
    3581          18 :     for (auto Component : Components)
    3582           8 :       OS << ',' << Component.trim();
    3583             : 
    3584           8 :     return OS.str().substr(1);
    3585             :   };
    3586             : 
    3587       60269 :   for (auto &GV : M.globals()) {
    3588       27236 :     if (!GV.hasSection())
    3589       26679 :       continue;
    3590             : 
    3591         557 :     StringRef Section = GV.getSection();
    3592             : 
    3593         555 :     if (!Section.startswith("__DATA, __objc_catlist"))
    3594         555 :       continue;
    3595             : 
    3596             :     // __DATA, __objc_catlist, regular, no_dead_strip
    3597             :     // __DATA,__objc_catlist,regular,no_dead_strip
    3598           6 :     GV.setSection(TrimSpaces(Section));
    3599             :   }
    3600       33033 : }
    3601             : 
    3602           4 : static bool isOldLoopArgument(Metadata *MD) {
    3603             :   auto *T = dyn_cast_or_null<MDTuple>(MD);
    3604             :   if (!T)
    3605             :     return false;
    3606           4 :   if (T->getNumOperands() < 1)
    3607             :     return false;
    3608           4 :   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
    3609             :   if (!S)
    3610             :     return false;
    3611           4 :   return S->getString().startswith("llvm.vectorizer.");
    3612             : }
    3613             : 
    3614           8 : static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
    3615             :   StringRef OldPrefix = "llvm.vectorizer.";
    3616             :   assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
    3617             : 
    3618             :   if (OldTag == "llvm.vectorizer.unroll")
    3619           2 :     return MDString::get(C, "llvm.loop.interleave.count");
    3620             : 
    3621           6 :   return MDString::get(
    3622          12 :       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
    3623          18 :              .str());
    3624             : }
    3625             : 
    3626          10 : static Metadata *upgradeLoopArgument(Metadata *MD) {
    3627             :   auto *T = dyn_cast_or_null<MDTuple>(MD);
    3628             :   if (!T)
    3629             :     return MD;
    3630          10 :   if (T->getNumOperands() < 1)
    3631             :     return MD;
    3632          10 :   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
    3633             :   if (!OldTag)
    3634             :     return MD;
    3635           8 :   if (!OldTag->getString().startswith("llvm.vectorizer."))
    3636             :     return MD;
    3637             : 
    3638             :   // This has an old tag.  Upgrade it.
    3639             :   SmallVector<Metadata *, 8> Ops;
    3640           8 :   Ops.reserve(T->getNumOperands());
    3641          16 :   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
    3642          16 :   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
    3643           8 :     Ops.push_back(T->getOperand(I));
    3644             : 
    3645             :   return MDTuple::get(T->getContext(), Ops);
    3646             : }
    3647             : 
    3648           2 : MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
    3649             :   auto *T = dyn_cast<MDTuple>(&N);
    3650             :   if (!T)
    3651             :     return &N;
    3652             : 
    3653           4 :   if (none_of(T->operands(), isOldLoopArgument))
    3654             :     return &N;
    3655             : 
    3656             :   SmallVector<Metadata *, 8> Ops;
    3657           2 :   Ops.reserve(T->getNumOperands());
    3658          22 :   for (Metadata *MD : T->operands())
    3659          10 :     Ops.push_back(upgradeLoopArgument(MD));
    3660             : 
    3661             :   return MDTuple::get(T->getContext(), Ops);
    3662             : }

Generated by: LCOV version 1.13