LCOV - code coverage report
Current view: top level - lib/IR - AutoUpgrade.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1482 1529 96.9 %
Date: 2017-09-14 15:23:50 Functions: 32 32 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file implements the auto-upgrade helper functions.
      11             : // This is where deprecated IR intrinsics and other IR features are updated to
      12             : // current specifications.
      13             : //
      14             : //===----------------------------------------------------------------------===//
      15             : 
      16             : #include "llvm/IR/AutoUpgrade.h"
      17             : #include "llvm/ADT/StringSwitch.h"
      18             : #include "llvm/IR/CFG.h"
      19             : #include "llvm/IR/CallSite.h"
      20             : #include "llvm/IR/Constants.h"
      21             : #include "llvm/IR/DIBuilder.h"
      22             : #include "llvm/IR/DebugInfo.h"
      23             : #include "llvm/IR/DiagnosticInfo.h"
      24             : #include "llvm/IR/Function.h"
      25             : #include "llvm/IR/IRBuilder.h"
      26             : #include "llvm/IR/Instruction.h"
      27             : #include "llvm/IR/IntrinsicInst.h"
      28             : #include "llvm/IR/LLVMContext.h"
      29             : #include "llvm/IR/Module.h"
      30             : #include "llvm/Support/ErrorHandling.h"
      31             : #include "llvm/Support/Regex.h"
      32             : #include <cstring>
      33             : using namespace llvm;
      34             : 
      35         766 : static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
      36             : 
      37             : // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
      38             : // changed their type from v4f32 to v2i64.
      39         168 : static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
      40             :                                   Function *&NewFn) {
      41             :   // Check whether this is an old version of the function, which received
      42             :   // v4f32 arguments.
      43         336 :   Type *Arg0Type = F->getFunctionType()->getParamType(0);
      44         168 :   if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
      45             :     return false;
      46             : 
      47             :   // Yes, it's old, replace it with new version.
      48           8 :   rename(F);
      49           8 :   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
      50           8 :   return true;
      51             : }
      52             : 
      53             : // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
      54             : // arguments have changed their type from i32 to i8.
      55         113 : static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
      56             :                                              Function *&NewFn) {
      57             :   // Check that the last argument is an i32.
      58         113 :   Type *LastArgType = F->getFunctionType()->getParamType(
      59         339 :      F->getFunctionType()->getNumParams() - 1);
      60         113 :   if (!LastArgType->isIntegerTy(32))
      61             :     return false;
      62             : 
      63             :   // Move this function aside and map down.
      64          13 :   rename(F);
      65          13 :   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
      66          13 :   return true;
      67             : }
      68             : 
      69        9006 : static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
      70             :   // All of the intrinsics matches below should be marked with which llvm
      71             :   // version started autoupgrading them. At some point in the future we would
      72             :   // like to use this information to remove upgrade code for some older
      73             :   // intrinsics. It is currently undecided how we will determine that future
      74             :   // point.
      75        9006 :   if (Name=="ssse3.pabs.b.128" || // Added in 6.0
      76        8986 :       Name=="ssse3.pabs.w.128" || // Added in 6.0
      77        8966 :       Name=="ssse3.pabs.d.128" || // Added in 6.0
      78        8946 :       Name.startswith("avx2.pabs.") || // Added in 6.0
      79        8907 :       Name.startswith("avx512.mask.pabs.") || // Added in 6.0
      80        8887 :       Name.startswith("sse2.pcmpeq.") || // Added in 3.1
      81        8886 :       Name.startswith("sse2.pcmpgt.") || // Added in 3.1
      82        8886 :       Name.startswith("avx2.pcmpeq.") || // Added in 3.1
      83        8886 :       Name.startswith("avx2.pcmpgt.") || // Added in 3.1
      84        8886 :       Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
      85        8872 :       Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
      86        8858 :       Name == "sse.add.ss" || // Added in 4.0
      87        8853 :       Name == "sse2.add.sd" || // Added in 4.0
      88        8849 :       Name == "sse.sub.ss" || // Added in 4.0
      89        8838 :       Name == "sse2.sub.sd" || // Added in 4.0
      90        8834 :       Name == "sse.mul.ss" || // Added in 4.0
      91        8823 :       Name == "sse2.mul.sd" || // Added in 4.0
      92        8819 :       Name == "sse.div.ss" || // Added in 4.0
      93        8815 :       Name == "sse2.div.sd" || // Added in 4.0
      94        8811 :       Name == "sse41.pmaxsb" || // Added in 3.9
      95        8796 :       Name == "sse2.pmaxs.w" || // Added in 3.9
      96        8780 :       Name == "sse41.pmaxsd" || // Added in 3.9
      97        8760 :       Name == "sse2.pmaxu.b" || // Added in 3.9
      98        8741 :       Name == "sse41.pmaxuw" || // Added in 3.9
      99        8726 :       Name == "sse41.pmaxud" || // Added in 3.9
     100        8709 :       Name == "sse41.pminsb" || // Added in 3.9
     101        8694 :       Name == "sse2.pmins.w" || // Added in 3.9
     102        8677 :       Name == "sse41.pminsd" || // Added in 3.9
     103        8660 :       Name == "sse2.pminu.b" || // Added in 3.9
     104        8644 :       Name == "sse41.pminuw" || // Added in 3.9
     105        8629 :       Name == "sse41.pminud" || // Added in 3.9
     106        8611 :       Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
     107        8600 :       Name.startswith("avx2.pmax") || // Added in 3.9
     108        8540 :       Name.startswith("avx2.pmin") || // Added in 3.9
     109        8480 :       Name.startswith("avx512.mask.pmax") || // Added in 4.0
     110        8448 :       Name.startswith("avx512.mask.pmin") || // Added in 4.0
     111        8416 :       Name.startswith("avx2.vbroadcast") || // Added in 3.8
     112        8409 :       Name.startswith("avx2.pbroadcast") || // Added in 3.8
     113        8393 :       Name.startswith("avx.vpermil.") || // Added in 3.1
     114        8361 :       Name.startswith("sse2.pshuf") || // Added in 3.9
     115        8343 :       Name.startswith("avx512.pbroadcast") || // Added in 3.9
     116        8331 :       Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
     117        8326 :       Name.startswith("avx512.mask.movddup") || // Added in 3.9
     118        8323 :       Name.startswith("avx512.mask.movshdup") || // Added in 3.9
     119        8320 :       Name.startswith("avx512.mask.movsldup") || // Added in 3.9
     120        8317 :       Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
     121        8314 :       Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
     122        8310 :       Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
     123        8306 :       Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
     124        8300 :       Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
     125        8294 :       Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
     126        8292 :       Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
     127        8290 :       Name.startswith("avx512.mask.punpckl") || // Added in 3.9
     128        8276 :       Name.startswith("avx512.mask.punpckh") || // Added in 3.9
     129        8262 :       Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
     130        8256 :       Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
     131        8250 :       Name.startswith("avx512.mask.pand.") || // Added in 3.9
     132        8246 :       Name.startswith("avx512.mask.pandn.") || // Added in 3.9
     133        8242 :       Name.startswith("avx512.mask.por.") || // Added in 3.9
     134        8238 :       Name.startswith("avx512.mask.pxor.") || // Added in 3.9
     135        8234 :       Name.startswith("avx512.mask.and.") || // Added in 3.9
     136        8231 :       Name.startswith("avx512.mask.andn.") || // Added in 3.9
     137        8228 :       Name.startswith("avx512.mask.or.") || // Added in 3.9
     138        8225 :       Name.startswith("avx512.mask.xor.") || // Added in 3.9
     139        8222 :       Name.startswith("avx512.mask.padd.") || // Added in 4.0
     140        8215 :       Name.startswith("avx512.mask.psub.") || // Added in 4.0
     141        8208 :       Name.startswith("avx512.mask.pmull.") || // Added in 4.0
     142        8201 :       Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
     143        8198 :       Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
     144        8195 :       Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
     145        8192 :       Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
     146        8189 :       Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
     147        8184 :       Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
     148        8179 :       Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
     149        8174 :       Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
     150        8169 :       Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
     151        8165 :       Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
     152        8162 :       Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
     153        8159 :       Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
     154        8155 :       Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
     155        8141 :       Name == "avx512.mask.add.pd.128" || // Added in 4.0
     156        8141 :       Name == "avx512.mask.add.pd.256" || // Added in 4.0
     157        8141 :       Name == "avx512.mask.add.ps.128" || // Added in 4.0
     158        8140 :       Name == "avx512.mask.add.ps.256" || // Added in 4.0
     159        8139 :       Name == "avx512.mask.div.pd.128" || // Added in 4.0
     160        8139 :       Name == "avx512.mask.div.pd.256" || // Added in 4.0
     161        8139 :       Name == "avx512.mask.div.ps.128" || // Added in 4.0
     162        8138 :       Name == "avx512.mask.div.ps.256" || // Added in 4.0
     163        8137 :       Name == "avx512.mask.mul.pd.128" || // Added in 4.0
     164        8137 :       Name == "avx512.mask.mul.pd.256" || // Added in 4.0
     165        8137 :       Name == "avx512.mask.mul.ps.128" || // Added in 4.0
     166        8136 :       Name == "avx512.mask.mul.ps.256" || // Added in 4.0
     167        8135 :       Name == "avx512.mask.sub.pd.128" || // Added in 4.0
     168        8135 :       Name == "avx512.mask.sub.pd.256" || // Added in 4.0
     169        8135 :       Name == "avx512.mask.sub.ps.128" || // Added in 4.0
     170        8134 :       Name == "avx512.mask.sub.ps.256" || // Added in 4.0
     171        8133 :       Name == "avx512.mask.max.pd.128" || // Added in 5.0
     172        8133 :       Name == "avx512.mask.max.pd.256" || // Added in 5.0
     173        8133 :       Name == "avx512.mask.max.ps.128" || // Added in 5.0
     174        8132 :       Name == "avx512.mask.max.ps.256" || // Added in 5.0
     175        8131 :       Name == "avx512.mask.min.pd.128" || // Added in 5.0
     176        8131 :       Name == "avx512.mask.min.pd.256" || // Added in 5.0
     177        8131 :       Name == "avx512.mask.min.ps.128" || // Added in 5.0
     178        8130 :       Name == "avx512.mask.min.ps.256" || // Added in 5.0
     179        8129 :       Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
     180        8119 :       Name.startswith("avx512.mask.psll.d") || // Added in 4.0
     181        8113 :       Name.startswith("avx512.mask.psll.q") || // Added in 4.0
     182        8110 :       Name.startswith("avx512.mask.psll.w") || // Added in 4.0
     183        8102 :       Name.startswith("avx512.mask.psra.d") || // Added in 4.0
     184        8098 :       Name.startswith("avx512.mask.psra.q") || // Added in 4.0
     185        8092 :       Name.startswith("avx512.mask.psra.w") || // Added in 4.0
     186        8084 :       Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
     187        8078 :       Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
     188        8072 :       Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
     189        8064 :       Name.startswith("avx512.mask.pslli") || // Added in 4.0
     190        8062 :       Name.startswith("avx512.mask.psrai") || // Added in 4.0
     191        8060 :       Name.startswith("avx512.mask.psrli") || // Added in 4.0
     192        8058 :       Name.startswith("avx512.mask.psllv") || // Added in 4.0
     193        8048 :       Name.startswith("avx512.mask.psrav") || // Added in 4.0
     194        8038 :       Name.startswith("avx512.mask.psrlv") || // Added in 4.0
     195        8028 :       Name.startswith("sse41.pmovsx") || // Added in 3.8
     196        8010 :       Name.startswith("sse41.pmovzx") || // Added in 3.9
     197        7987 :       Name.startswith("avx2.pmovsx") || // Added in 3.9
     198        7975 :       Name.startswith("avx2.pmovzx") || // Added in 3.9
     199        7963 :       Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
     200        7944 :       Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
     201        7925 :       Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
     202        7919 :       Name == "sse2.cvtdq2pd" || // Added in 3.9
     203        7914 :       Name == "sse2.cvtps2pd" || // Added in 3.9
     204        7909 :       Name == "avx.cvtdq2.pd.256" || // Added in 3.9
     205        7903 :       Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
     206        7900 :       Name.startswith("avx.vinsertf128.") || // Added in 3.7
     207        7889 :       Name == "avx2.vinserti128" || // Added in 3.7
     208        7887 :       Name.startswith("avx512.mask.insert") || // Added in 4.0
     209        7875 :       Name.startswith("avx.vextractf128.") || // Added in 3.7
     210        7862 :       Name == "avx2.vextracti128" || // Added in 3.7
     211        7860 :       Name.startswith("avx512.mask.vextract") || // Added in 4.0
     212        7852 :       Name.startswith("sse4a.movnt.") || // Added in 3.9
     213        7838 :       Name.startswith("avx.movnt.") || // Added in 3.2
     214        7832 :       Name.startswith("avx512.storent.") || // Added in 3.9
     215        7829 :       Name == "sse41.movntdqa" || // Added in 5.0
     216        7818 :       Name == "avx2.movntdqa" || // Added in 5.0
     217        7810 :       Name == "avx512.movntdqa" || // Added in 5.0
     218        7809 :       Name == "sse2.storel.dq" || // Added in 3.9
     219        7808 :       Name.startswith("sse.storeu.") || // Added in 3.9
     220        7803 :       Name.startswith("sse2.storeu.") || // Added in 3.9
     221        7793 :       Name.startswith("avx.storeu.") || // Added in 3.9
     222        7783 :       Name.startswith("avx512.mask.storeu.") || // Added in 3.9
     223        7763 :       Name.startswith("avx512.mask.store.p") || // Added in 3.9
     224        7757 :       Name.startswith("avx512.mask.store.b.") || // Added in 3.9
     225        7757 :       Name.startswith("avx512.mask.store.w.") || // Added in 3.9
     226        7757 :       Name.startswith("avx512.mask.store.d.") || // Added in 3.9
     227        7754 :       Name.startswith("avx512.mask.store.q.") || // Added in 3.9
     228        7751 :       Name.startswith("avx512.mask.loadu.") || // Added in 3.9
     229        7731 :       Name.startswith("avx512.mask.load.") || // Added in 3.9
     230        7719 :       Name == "sse42.crc32.64.8" || // Added in 3.4
     231        7706 :       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
     232        7703 :       Name.startswith("avx512.mask.palignr.") || // Added in 3.9
     233        7699 :       Name.startswith("avx512.mask.valign.") || // Added in 4.0
     234        7693 :       Name.startswith("sse2.psll.dq") || // Added in 3.7
     235        7689 :       Name.startswith("sse2.psrl.dq") || // Added in 3.7
     236        7682 :       Name.startswith("avx2.psll.dq") || // Added in 3.7
     237        7678 :       Name.startswith("avx2.psrl.dq") || // Added in 3.7
     238        7674 :       Name.startswith("avx512.psll.dq") || // Added in 3.9
     239        7672 :       Name.startswith("avx512.psrl.dq") || // Added in 3.9
     240        7670 :       Name == "sse41.pblendw" || // Added in 3.7
     241        7661 :       Name.startswith("sse41.blendp") || // Added in 3.7
     242        7649 :       Name.startswith("avx.blend.p") || // Added in 3.7
     243        7641 :       Name == "avx2.pblendw" || // Added in 3.7
     244        7634 :       Name.startswith("avx2.pblendd.") || // Added in 3.7
     245        7622 :       Name.startswith("avx.vbroadcastf128") || // Added in 4.0
     246        7618 :       Name == "avx2.vbroadcasti128" || // Added in 3.7
     247        7618 :       Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
     248        7616 :       Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
     249        7614 :       Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
     250        7612 :       Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
     251        7610 :       Name == "avx512.mask.broadcastf32x8.512" || // Added in 6.0
     252        7609 :       Name == "avx512.mask.broadcasti32x8.512" || // Added in 6.0
     253        7608 :       Name == "avx512.mask.broadcastf64x4.512" || // Added in 6.0
     254        7607 :       Name == "avx512.mask.broadcasti64x4.512" || // Added in 6.0
     255        7606 :       Name == "xop.vpcmov" || // Added in 3.8
     256        7602 :       Name == "xop.vpcmov.256" || // Added in 5.0
     257        7598 :       Name.startswith("avx512.mask.move.s") || // Added in 4.0
     258        7596 :       Name.startswith("avx512.cvtmask2") || // Added in 5.0
     259        7966 :       (Name.startswith("xop.vpcom") && // Added in 3.2
     260         192 :        F->arg_size() == 2) ||
     261        7454 :       Name.startswith("sse2.pavg") || // Added in 6.0
     262        7451 :       Name.startswith("avx2.pavg") || // Added in 6.0
     263        7447 :       Name.startswith("avx512.mask.pavg")) // Added in 6.0
     264             :     return true;
     265             : 
     266             :   return false;
     267             : }
     268             : 
     269        9080 : static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
     270             :                                         Function *&NewFn) {
     271             :   // Only handle intrinsics that start with "x86.".
     272       18086 :   if (!Name.startswith("x86."))
     273             :     return false;
     274             :   // Remove "x86." prefix.
     275        9006 :   Name = Name.substr(4);
     276             : 
     277        9006 :   if (ShouldUpgradeX86Intrinsic(F, Name)) {
     278        1567 :     NewFn = nullptr;
     279        1567 :     return true;
     280             :   }
     281             : 
     282             :   // SSE4.1 ptest functions may have an old signature.
     283        7607 :   if (Name.startswith("sse41.ptest")) { // Added in 3.2
     284         399 :     if (Name.substr(11) == "c")
     285          63 :       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
     286         264 :     if (Name.substr(11) == "z")
     287          54 :       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
     288         153 :     if (Name.substr(11) == "nzc")
     289          51 :       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
     290             :   }
     291             :   // Several blend and other instructions with masks used the wrong number of
     292             :   // bits.
     293        7304 :   if (Name == "sse41.insertps") // Added in 3.6
     294             :     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
     295          33 :                                             NewFn);
     296        7255 :   if (Name == "sse41.dppd") // Added in 3.6
     297             :     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
     298          17 :                                             NewFn);
     299        7238 :   if (Name == "sse41.dpps") // Added in 3.6
     300             :     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
     301          17 :                                             NewFn);
     302        7221 :   if (Name == "sse41.mpsadbw") // Added in 3.6
     303             :     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
     304          17 :                                             NewFn);
     305        7203 :   if (Name == "avx.dp.ps.256") // Added in 3.6
     306             :     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
     307          16 :                                             NewFn);
     308        7184 :   if (Name == "avx2.mpsadbw") // Added in 3.6
     309             :     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
     310          13 :                                             NewFn);
     311             : 
     312             :   // frcz.ss/sd may need to have an argument dropped. Added in 3.2
     313        7163 :   if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
     314           0 :     rename(F);
     315           0 :     NewFn = Intrinsic::getDeclaration(F->getParent(),
     316             :                                       Intrinsic::x86_xop_vfrcz_ss);
     317           0 :     return true;
     318             :   }
     319        7163 :   if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
     320           0 :     rename(F);
     321           0 :     NewFn = Intrinsic::getDeclaration(F->getParent(),
     322             :                                       Intrinsic::x86_xop_vfrcz_sd);
     323           0 :     return true;
     324             :   }
     325             :   // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
     326        7206 :   if (Name.startswith("xop.vpermil2")) { // Added in 3.9
     327          96 :     auto Idx = F->getFunctionType()->getParamType(2);
     328             :     if (Idx->isFPOrFPVectorTy()) {
     329           4 :       rename(F);
     330           4 :       unsigned IdxSize = Idx->getPrimitiveSizeInBits();
     331           4 :       unsigned EltSize = Idx->getScalarSizeInBits();
     332             :       Intrinsic::ID Permil2ID;
     333           4 :       if (EltSize == 64 && IdxSize == 128)
     334             :         Permil2ID = Intrinsic::x86_xop_vpermil2pd;
     335           3 :       else if (EltSize == 32 && IdxSize == 128)
     336             :         Permil2ID = Intrinsic::x86_xop_vpermil2ps;
     337           2 :       else if (EltSize == 64 && IdxSize == 256)
     338             :         Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
     339             :       else
     340           1 :         Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
     341           4 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
     342           4 :       return true;
     343             :     }
     344             :   }
     345             : 
     346             :   return false;
     347             : }
     348             : 
     349      264688 : static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     350             :   assert(F && "Illegal to upgrade a non-existent Function.");
     351             : 
     352             :   // Quickly eliminate it, if it's not a candidate.
     353      264688 :   StringRef Name = F->getName();
     354      297757 :   if (Name.size() <= 8 || !Name.startswith("llvm."))
     355             :     return false;
     356       33069 :   Name = Name.substr(5); // Strip off "llvm."
     357             : 
     358       66138 :   switch (Name[0]) {
     359             :   default: break;
     360        6249 :   case 'a': {
     361       12497 :     if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
     362           3 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
     363           6 :                                         F->arg_begin()->getType());
     364          26 :       return true;
     365             :     }
     366        6248 :     if (Name.startswith("arm.neon.vclz")) {
     367             :       Type* args[2] = {
     368           2 :         F->arg_begin()->getType(),
     369           2 :         Type::getInt1Ty(F->getContext())
     370           4 :       };
     371             :       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
     372             :       // the end of the name. Change name from llvm.arm.neon.vclz.* to
     373             :       //  llvm.ctlz.*
     374           4 :       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
     375           8 :       NewFn = Function::Create(fType, F->getLinkage(),
     376           6 :                                "llvm.ctlz." + Name.substr(14), F->getParent());
     377             :       return true;
     378             :     }
     379        6246 :     if (Name.startswith("arm.neon.vcnt")) {
     380           2 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
     381           4 :                                         F->arg_begin()->getType());
     382           2 :       return true;
     383             :     }
     384        6242 :     Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
     385        6242 :     if (vldRegex.match(Name)) {
     386          14 :       auto fArgs = F->getFunctionType()->params();
     387          21 :       SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
     388             :       // Can't use Intrinsic::getDeclaration here as the return types might
     389             :       // then only be structurally equal.
     390          14 :       FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
     391          28 :       NewFn = Function::Create(fType, F->getLinkage(),
     392          28 :                                "llvm." + Name + ".p0i8", F->getParent());
     393           7 :       return true;
     394             :     }
     395        6235 :     Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
     396        6235 :     if (vstRegex.match(Name)) {
     397             :       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
     398             :                                                 Intrinsic::arm_neon_vst2,
     399             :                                                 Intrinsic::arm_neon_vst3,
     400             :                                                 Intrinsic::arm_neon_vst4};
     401             : 
     402             :       static const Intrinsic::ID StoreLaneInts[] = {
     403             :         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
     404             :         Intrinsic::arm_neon_vst4lane
     405             :       };
     406             : 
     407          14 :       auto fArgs = F->getFunctionType()->params();
     408           7 :       Type *Tys[] = {fArgs[0], fArgs[1]};
     409           7 :       if (Name.find("lane") == StringRef::npos)
     410           4 :         NewFn = Intrinsic::getDeclaration(F->getParent(),
     411           4 :                                           StoreInts[fArgs.size() - 3], Tys);
     412             :       else
     413           3 :         NewFn = Intrinsic::getDeclaration(F->getParent(),
     414           3 :                                           StoreLaneInts[fArgs.size() - 5], Tys);
     415             :       return true;
     416             :     }
     417       12455 :     if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
     418           2 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
     419           2 :       return true;
     420             :     }
     421        6226 :     break;
     422             :   }
     423             : 
     424        2627 :   case 'c': {
     425        3360 :     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
     426          32 :       rename(F);
     427          32 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
     428          64 :                                         F->arg_begin()->getType());
     429          32 :       return true;
     430             :     }
     431        3134 :     if (Name.startswith("cttz.") && F->arg_size() == 1) {
     432          18 :       rename(F);
     433          18 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
     434          36 :                                         F->arg_begin()->getType());
     435          18 :       return true;
     436             :     }
     437             :     break;
     438             :   }
     439         919 :   case 'd': {
     440        1291 :     if (Name == "dbg.value" && F->arg_size() == 4) {
     441         122 :       rename(F);
     442         122 :       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
     443         122 :       return true;
     444             :     }
     445             :     break;
     446             :   }
     447        1086 :   case 'i':
     448             :   case 'l': {
     449        1086 :     bool IsLifetimeStart = Name.startswith("lifetime.start");
     450         771 :     if (IsLifetimeStart || Name.startswith("invariant.start")) {
     451         362 :       Intrinsic::ID ID = IsLifetimeStart ?
     452             :         Intrinsic::lifetime_start : Intrinsic::invariant_start;
     453         724 :       auto Args = F->getFunctionType()->params();
     454         362 :       Type* ObjectPtr[1] = {Args[1]};
     455        1448 :       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
     456          70 :         rename(F);
     457          70 :         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
     458          70 :         return true;
     459             :       }
     460             :     }
     461             : 
     462        1016 :     bool IsLifetimeEnd = Name.startswith("lifetime.end");
     463         724 :     if (IsLifetimeEnd || Name.startswith("invariant.end")) {
     464         331 :       Intrinsic::ID ID = IsLifetimeEnd ?
     465             :         Intrinsic::lifetime_end : Intrinsic::invariant_end;
     466             : 
     467         662 :       auto Args = F->getFunctionType()->params();
     468         662 :       Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
     469        1324 :       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
     470          53 :         rename(F);
     471          53 :         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
     472          53 :         return true;
     473             :       }
     474             :     }
     475             :     break;
     476             :   }
     477        3777 :   case 'm': {
     478        3950 :     if (Name.startswith("masked.load.")) {
     479         346 :       Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
     480         692 :       if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
     481          10 :         rename(F);
     482          10 :         NewFn = Intrinsic::getDeclaration(F->getParent(),
     483             :                                           Intrinsic::masked_load,
     484          10 :                                           Tys);
     485          10 :         return true;
     486             :       }
     487             :     }
     488        3928 :     if (Name.startswith("masked.store.")) {
     489         322 :       auto Args = F->getFunctionType()->params();
     490         161 :       Type *Tys[] = { Args[0], Args[1] };
     491         644 :       if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
     492          10 :         rename(F);
     493          10 :         NewFn = Intrinsic::getDeclaration(F->getParent(),
     494             :                                           Intrinsic::masked_store,
     495          10 :                                           Tys);
     496          10 :         return true;
     497             :       }
     498             :     }
     499             :     // Renaming gather/scatter intrinsics with no address space overloading
     500             :     // to the new overload which includes an address space
     501        3892 :     if (Name.startswith("masked.gather.")) {
     502         270 :       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
     503         540 :       if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
     504           3 :         rename(F);
     505           3 :         NewFn = Intrinsic::getDeclaration(F->getParent(),
     506           3 :                                           Intrinsic::masked_gather, Tys);
     507           3 :         return true;
     508             :       }
     509             :     }
     510        3862 :     if (Name.startswith("masked.scatter.")) {
     511         216 :       auto Args = F->getFunctionType()->params();
     512         108 :       Type *Tys[] = {Args[0], Args[1]};
     513         432 :       if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
     514           3 :         rename(F);
     515           3 :         NewFn = Intrinsic::getDeclaration(F->getParent(),
     516           3 :                                           Intrinsic::masked_scatter, Tys);
     517           3 :         return true;
     518             :       }
     519             :     }
     520             :     break;
     521             :   }
     522         646 :   case 'n': {
     523        1159 :     if (Name.startswith("nvvm.")) {
     524         513 :       Name = Name.substr(5);
     525             : 
     526             :       // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
     527         513 :       Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
     528        1539 :                               .Cases("brev32", "brev64", Intrinsic::bitreverse)
     529        1539 :                               .Case("clz.i", Intrinsic::ctlz)
     530        1539 :                               .Case("popc.i", Intrinsic::ctpop)
     531        1026 :                               .Default(Intrinsic::not_intrinsic);
     532           8 :       if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
     533          16 :         NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
     534           8 :                                           {F->getReturnType()});
     535           8 :         return true;
     536             :       }
     537             : 
     538             :       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
     539             :       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
     540             :       //
     541             :       // TODO: We could add lohi.i2d.
     542         505 :       bool Expand = StringSwitch<bool>(Name)
     543        1515 :                         .Cases("abs.i", "abs.ll", true)
     544        1515 :                         .Cases("clz.ll", "popc.ll", "h2f", true)
     545        1515 :                         .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
     546        1515 :                         .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
     547        1010 :                         .Default(false);
     548          26 :       if (Expand) {
     549          26 :         NewFn = nullptr;
     550          26 :         return true;
     551             :       }
     552             :     }
     553             :     break;
     554             :   }
     555         123 :   case 'o':
     556             :     // We only need to change the name to match the mangling including the
     557             :     // address space.
     558         246 :     if (Name.startswith("objectsize.")) {
     559         246 :       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
     560         246 :       if (F->arg_size() == 2 ||
     561         334 :           F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
     562          37 :         rename(F);
     563          37 :         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
     564          37 :                                           Tys);
     565          37 :         return true;
     566             :       }
     567             :     }
     568             :     break;
     569             : 
     570        1126 :   case 's':
     571        1132 :     if (Name == "stackprotectorcheck") {
     572           6 :       NewFn = nullptr;
     573           6 :       return true;
     574             :     }
     575             :     break;
     576             : 
     577        9080 :   case 'x':
     578        9080 :     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
     579             :       return true;
     580             :   }
     581             :   // Remangle our intrinsic since we upgrade the mangling
     582       31056 :   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
     583       62112 :   if (Result != None) {
     584          83 :     NewFn = Result.getValue();
     585          83 :     return true;
     586             :   }
     587             : 
     588             :   //  This may not belong here. This function is effectively being overloaded
     589             :   //  to both detect an intrinsic which needs upgrading, and to provide the
     590             :   //  upgraded form of the intrinsic. We should perhaps have two separate
     591             :   //  functions for this.
     592             :   return false;
     593             : }
     594             : 
     595      264688 : bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
     596      264688 :   NewFn = nullptr;
     597      264688 :   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
     598             :   assert(F != NewFn && "Intrinsic function upgraded to the same function");
     599             : 
     600             :   // Upgrade intrinsic attributes.  This does not change the function.
     601      264688 :   if (NewFn)
     602         497 :     F = NewFn;
     603      264688 :   if (Intrinsic::ID id = F->getIntrinsicID())
     604       31092 :     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
     605      264688 :   return Upgraded;
     606             : }
     607             : 
     608       17400 : bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
     609             :   // Nothing to do yet.
     610       17400 :   return false;
     611             : }
     612             : 
     613             : // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
     614             : // to byte shuffles.
     615          14 : static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
     616             :                                          Value *Op, unsigned Shift) {
     617          14 :   Type *ResultTy = Op->getType();
     618          14 :   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
     619             : 
     620             :   // Bitcast from a 64-bit element type to a byte element type.
     621          28 :   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
     622          28 :   Op = Builder.CreateBitCast(Op, VecTy, "cast");
     623             : 
     624             :   // We'll be shuffling in zeroes.
     625          14 :   Value *Res = Constant::getNullValue(VecTy);
     626             : 
     627             :   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
     628             :   // we'll just return the zero vector.
     629          14 :   if (Shift < 16) {
     630             :     uint32_t Idxs[64];
     631             :     // 256/512-bit version is split into 2/4 16-byte lanes.
     632          86 :     for (unsigned l = 0; l != NumElts; l += 16)
     633        1188 :       for (unsigned i = 0; i != 16; ++i) {
     634         576 :         unsigned Idx = NumElts + i - Shift;
     635         576 :         if (Idx < NumElts)
     636         170 :           Idx -= NumElts - 16; // end of lane, switch operand.
     637         576 :         Idxs[l + i] = Idx + l;
     638             :       }
     639             : 
     640          28 :     Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
     641             :   }
     642             : 
     643             :   // Bitcast back to a 64-bit element type.
     644          28 :   return Builder.CreateBitCast(Res, ResultTy, "cast");
     645             : }
     646             : 
     647             : // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
     648             : // to byte shuffles.
     649         108 : static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
     650             :                                          unsigned Shift) {
     651         108 :   Type *ResultTy = Op->getType();
     652         108 :   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
     653             : 
     654             :   // Bitcast from a 64-bit element type to a byte element type.
     655         216 :   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
     656         216 :   Op = Builder.CreateBitCast(Op, VecTy, "cast");
     657             : 
     658             :   // We'll be shuffling in zeroes.
     659         108 :   Value *Res = Constant::getNullValue(VecTy);
     660             : 
     661             :   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
     662             :   // we'll just return the zero vector.
     663         108 :   if (Shift < 16) {
     664             :     uint32_t Idxs[64];
     665             :     // 256/512-bit version is split into 2/4 16-byte lanes.
     666         368 :     for (unsigned l = 0; l != NumElts; l += 16)
     667        4290 :       for (unsigned i = 0; i != 16; ++i) {
     668        2080 :         unsigned Idx = i + Shift;
     669        2080 :         if (Idx >= 16)
     670         586 :           Idx += NumElts - 16; // end of lane, switch operand.
     671        2080 :         Idxs[l + i] = Idx + l;
     672             :       }
     673             : 
     674         216 :     Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
     675             :   }
     676             : 
     677             :   // Bitcast back to a 64-bit element type.
     678         216 :   return Builder.CreateBitCast(Res, ResultTy, "cast");
     679             : }
     680             : 
     681        1388 : static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
     682             :                             unsigned NumElts) {
     683        4164 :   llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
     684        1388 :                              cast<IntegerType>(Mask->getType())->getBitWidth());
     685        2776 :   Mask = Builder.CreateBitCast(Mask, MaskTy);
     686             : 
     687             :   // If we have less than 8 elements, then the starting mask was an i8 and
     688             :   // we need to extract down to the right number of elements.
     689        1388 :   if (NumElts < 8) {
     690             :     uint32_t Indices[4];
     691        3169 :     for (unsigned i = 0; i != NumElts; ++i)
     692        1384 :       Indices[i] = i;
     693         802 :     Mask = Builder.CreateShuffleVector(Mask, Mask,
     694             :                                        makeArrayRef(Indices, NumElts),
     695             :                                        "extract");
     696             :   }
     697             : 
     698        1388 :   return Mask;
     699             : }
     700             : 
     701        1611 : static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
     702             :                             Value *Op0, Value *Op1) {
     703             :   // If the mask is all ones just emit the align operation.
     704         585 :   if (const auto *C = dyn_cast<Constant>(Mask))
     705         585 :     if (C->isAllOnesValue())
     706             :       return Op0;
     707             : 
     708        2052 :   Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
     709        1026 :   return Builder.CreateSelect(Mask, Op0, Op1);
     710             : }
     711             : 
     712             : // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
     713             : // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
     714             : // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
     715          24 : static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
     716             :                                         Value *Op1, Value *Shift,
     717             :                                         Value *Passthru, Value *Mask,
     718             :                                         bool IsVALIGN) {
     719          48 :   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
     720             : 
     721          48 :   unsigned NumElts = Op0->getType()->getVectorNumElements();
     722             :   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
     723             :   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
     724             :   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
     725             : 
     726             :   // Mask the immediate for VALIGN.
     727          24 :   if (IsVALIGN)
     728          12 :     ShiftVal &= (NumElts - 1);
     729             : 
     730             :   // If palignr is shifting the pair of vectors more than the size of two
     731             :   // lanes, emit zero.
     732          24 :   if (ShiftVal >= 32)
     733           0 :     return llvm::Constant::getNullValue(Op0->getType());
     734             : 
     735             :   // If palignr is shifting the pair of input vectors more than one lane,
     736             :   // but less than two lanes, convert to shifting in zeroes.
     737          24 :   if (ShiftVal > 16) {
     738           0 :     ShiftVal -= 16;
     739           0 :     Op1 = Op0;
     740           0 :     Op0 = llvm::Constant::getNullValue(Op0->getType());
     741             :   }
     742             : 
     743             :   uint32_t Indices[64];
     744             :   // 256-bit palignr operates on 128-bit lanes so we need to handle that
     745          69 :   for (unsigned l = 0; l < NumElts; l += 16) {
     746        1485 :     for (unsigned i = 0; i != 16; ++i) {
     747         720 :       unsigned Idx = ShiftVal + i;
     748         720 :       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
     749          66 :         Idx += NumElts - 16; // End of lane, switch operand.
     750         720 :       Indices[l + i] = Idx + l;
     751             :     }
     752             :   }
     753             : 
     754          72 :   Value *Align = Builder.CreateShuffleVector(Op1, Op0,
     755             :                                              makeArrayRef(Indices, NumElts),
     756          24 :                                              "palignr");
     757             : 
     758          24 :   return EmitX86Select(Builder, Mask, Align, Passthru);
     759             : }
     760             : 
     761          64 : static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
     762             :                                  Value *Ptr, Value *Data, Value *Mask,
     763             :                                  bool Aligned) {
     764             :   // Cast the pointer to the right type.
     765         128 :   Ptr = Builder.CreateBitCast(Ptr,
     766         128 :                               llvm::PointerType::getUnqual(Data->getType()));
     767             :   unsigned Align =
     768         112 :     Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
     769             : 
     770             :   // If the mask is all ones just emit a regular store.
     771          32 :   if (const auto *C = dyn_cast<Constant>(Mask))
     772          32 :     if (C->isAllOnesValue())
     773          32 :       return Builder.CreateAlignedStore(Data, Ptr, Align);
     774             : 
     775             :   // Convert the mask from an integer type to a vector of i1.
     776          64 :   unsigned NumElts = Data->getType()->getVectorNumElements();
     777          32 :   Mask = getX86MaskVec(Builder, Mask, NumElts);
     778          32 :   return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
     779             : }
     780             : 
     781          96 : static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
     782             :                                 Value *Ptr, Value *Passthru, Value *Mask,
     783             :                                 bool Aligned) {
     784             :   // Cast the pointer to the right type.
     785         192 :   Ptr = Builder.CreateBitCast(Ptr,
     786         192 :                              llvm::PointerType::getUnqual(Passthru->getType()));
     787             :   unsigned Align =
     788         168 :     Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
     789             : 
     790             :   // If the mask is all ones just emit a regular store.
     791          32 :   if (const auto *C = dyn_cast<Constant>(Mask))
     792          32 :     if (C->isAllOnesValue())
     793          64 :       return Builder.CreateAlignedLoad(Ptr, Align);
     794             : 
     795             :   // Convert the mask from an integer type to a vector of i1.
     796         128 :   unsigned NumElts = Passthru->getType()->getVectorNumElements();
     797          64 :   Mask = getX86MaskVec(Builder, Mask, NumElts);
     798         128 :   return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
     799             : }
     800             : 
     801         168 : static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
     802         168 :   Value *Op0 = CI.getArgOperand(0);
     803         168 :   llvm::Type *Ty = Op0->getType();
     804         168 :   Value *Zero = llvm::Constant::getNullValue(Ty);
     805         168 :   Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
     806         168 :   Value *Neg = Builder.CreateNeg(Op0);
     807         168 :   Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
     808             : 
     809         168 :   if (CI.getNumArgOperands() == 3)
     810          84 :     Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
     811             : 
     812         168 :   return Res;
     813             : }
     814             : 
     815         634 : static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
     816             :                                ICmpInst::Predicate Pred) {
     817         634 :   Value *Op0 = CI.getArgOperand(0);
     818         634 :   Value *Op1 = CI.getArgOperand(1);
     819         634 :   Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
     820         634 :   Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
     821             : 
     822         634 :   if (CI.getNumArgOperands() == 4)
     823         248 :     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
     824             : 
     825         634 :   return Res;
     826             : }
     827             : 
     828         504 : static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
     829             :                                    unsigned CC, bool Signed) {
     830         504 :   Value *Op0 = CI.getArgOperand(0);
     831        1008 :   unsigned NumElts = Op0->getType()->getVectorNumElements();
     832             : 
     833             :   Value *Cmp;
     834         504 :   if (CC == 3) {
     835         112 :     Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
     836         448 :   } else if (CC == 7) {
     837         112 :     Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
     838             :   } else {
     839             :     ICmpInst::Predicate Pred;
     840         392 :     switch (CC) {
     841           0 :     default: llvm_unreachable("Unknown condition code");
     842             :     case 0: Pred = ICmpInst::ICMP_EQ;  break;
     843          56 :     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
     844          56 :     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
     845          56 :     case 4: Pred = ICmpInst::ICMP_NE;  break;
     846          56 :     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
     847          84 :     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
     848             :     }
     849         784 :     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
     850             :   }
     851             : 
     852        1008 :   Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
     853         252 :   const auto *C = dyn_cast<Constant>(Mask);
     854         252 :   if (!C || !C->isAllOnesValue())
     855         252 :     Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
     856             : 
     857         504 :   if (NumElts < 8) {
     858             :     uint32_t Indices[8];
     859         828 :     for (unsigned i = 0; i != NumElts; ++i)
     860         360 :       Indices[i] = i;
     861        1116 :     for (unsigned i = NumElts; i != 8; ++i)
     862         504 :       Indices[i] = NumElts + i % NumElts;
     863         216 :     Cmp = Builder.CreateShuffleVector(Cmp,
     864         108 :                                       Constant::getNullValue(Cmp->getType()),
     865         108 :                                       Indices);
     866             :   }
     867        1512 :   return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
     868        2016 :                                                      std::max(NumElts, 8U)));
     869             : }
     870             : 
     871             : // Replace a masked intrinsic with an older unmasked intrinsic.
     872         278 : static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
     873             :                                     Intrinsic::ID IID) {
     874         278 :   Function *F = CI.getCalledFunction();
     875         278 :   Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
     876        1390 :   Value *Rep = Builder.CreateCall(Intrin,
     877         834 :                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
     878         556 :   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
     879             : }
     880             : 
     881           4 : static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
     882           4 :   Value* A = CI.getArgOperand(0);
     883           4 :   Value* B = CI.getArgOperand(1);
     884           4 :   Value* Src = CI.getArgOperand(2);
     885           4 :   Value* Mask = CI.getArgOperand(3);
     886             : 
     887          12 :   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
     888           4 :   Value* Cmp = Builder.CreateIsNotNull(AndNode);
     889           8 :   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
     890           8 :   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
     891           4 :   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
     892           8 :   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
     893             : }
     894             : 
     895             : 
     896          14 : static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
     897          14 :   Value* Op = CI.getArgOperand(0);
     898          14 :   Type* ReturnOp = CI.getType();
     899          28 :   unsigned NumElts = CI.getType()->getVectorNumElements();
     900          14 :   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
     901          28 :   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
     902             : }
     903             : 
     904             : /// Upgrade a call to an old intrinsic. All argument and return casting must be
     905             : /// provided to seamlessly integrate with existing context.
     906        4622 : void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     907        4622 :   Function *F = CI->getCalledFunction();
     908        4622 :   LLVMContext &C = CI->getContext();
     909        9930 :   IRBuilder<> Builder(C);
     910        9244 :   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
     911             : 
     912             :   assert(F && "Intrinsic call is not direct?");
     913             : 
     914        4622 :   if (!NewFn) {
     915             :     // Get the Function's name.
     916        3730 :     StringRef Name = F->getName();
     917             : 
     918             :     assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
     919        3730 :     Name = Name.substr(5);
     920             : 
     921        7428 :     bool IsX86 = Name.startswith("x86.");
     922             :     if (IsX86)
     923        3698 :       Name = Name.substr(4);
     924        3756 :     bool IsNVVM = Name.startswith("nvvm.");
     925             :     if (IsNVVM)
     926          26 :       Name = Name.substr(5);
     927             : 
     928        3744 :     if (IsX86 && Name.startswith("sse4a.movnt.")) {
     929          14 :       Module *M = F->getParent();
     930          28 :       SmallVector<Metadata *, 1> Elts;
     931          14 :       Elts.push_back(
     932          42 :           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
     933          28 :       MDNode *Node = MDNode::get(C, Elts);
     934             : 
     935          14 :       Value *Arg0 = CI->getArgOperand(0);
     936          14 :       Value *Arg1 = CI->getArgOperand(1);
     937             : 
     938             :       // Nontemporal (unaligned) store of the 0'th element of the float/double
     939             :       // vector.
     940          28 :       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
     941          14 :       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
     942          28 :       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
     943             :       Value *Extract =
     944          28 :           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
     945             : 
     946          14 :       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
     947          28 :       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
     948             : 
     949             :       // Remove intrinsic.
     950          14 :       CI->eraseFromParent();
     951             :       return;
     952             :     }
     953             : 
     954        7400 :     if (IsX86 && (Name.startswith("avx.movnt.") ||
     955        3678 :                   Name.startswith("avx512.storent."))) {
     956           9 :       Module *M = F->getParent();
     957          18 :       SmallVector<Metadata *, 1> Elts;
     958           9 :       Elts.push_back(
     959          27 :           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
     960          18 :       MDNode *Node = MDNode::get(C, Elts);
     961             : 
     962           9 :       Value *Arg0 = CI->getArgOperand(0);
     963           9 :       Value *Arg1 = CI->getArgOperand(1);
     964             : 
     965             :       // Convert the type of the pointer to a pointer to the stored type.
     966          18 :       Value *BC = Builder.CreateBitCast(Arg0,
     967          18 :                                         PointerType::getUnqual(Arg1->getType()),
     968           9 :                                         "cast");
     969          18 :       VectorType *VTy = cast<VectorType>(Arg1->getType());
     970           9 :       StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
     971          18 :                                                  VTy->getBitWidth() / 8);
     972          18 :       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
     973             : 
     974             :       // Remove intrinsic.
     975           9 :       CI->eraseFromParent();
     976             :       return;
     977             :     }
     978             : 
     979        7383 :     if (IsX86 && Name == "sse2.storel.dq") {
     980           1 :       Value *Arg0 = CI->getArgOperand(0);
     981           1 :       Value *Arg1 = CI->getArgOperand(1);
     982             : 
     983           1 :       Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
     984           2 :       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
     985           2 :       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
     986           2 :       Value *BC = Builder.CreateBitCast(Arg0,
     987           2 :                                         PointerType::getUnqual(Elt->getType()),
     988           1 :                                         "cast");
     989           1 :       Builder.CreateAlignedStore(Elt, BC, 1);
     990             : 
     991             :       // Remove intrinsic.
     992           1 :       CI->eraseFromParent();
     993           1 :       return;
     994             :     }
     995             : 
     996        7380 :     if (IsX86 && (Name.startswith("sse.storeu.") ||
     997        3669 :                   Name.startswith("sse2.storeu.") ||
     998        3648 :                   Name.startswith("avx.storeu."))) {
     999          56 :       Value *Arg0 = CI->getArgOperand(0);
    1000          56 :       Value *Arg1 = CI->getArgOperand(1);
    1001             : 
    1002         112 :       Arg0 = Builder.CreateBitCast(Arg0,
    1003         112 :                                    PointerType::getUnqual(Arg1->getType()),
    1004             :                                    "cast");
    1005          56 :       Builder.CreateAlignedStore(Arg1, Arg0, 1);
    1006             : 
    1007             :       // Remove intrinsic.
    1008          56 :       CI->eraseFromParent();
    1009          56 :       return;
    1010             :     }
    1011             : 
    1012        7332 :     if (IsX86 && (Name.startswith("avx512.mask.store"))) {
    1013             :       // "avx512.mask.storeu." or "avx512.mask.store."
    1014         128 :       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
    1015         256 :       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
    1016             :                          CI->getArgOperand(2), Aligned);
    1017             : 
    1018             :       // Remove intrinsic.
    1019          64 :       CI->eraseFromParent();
    1020          64 :       return;
    1021             :     }
    1022             : 
    1023             :     Value *Rep;
    1024             :     // Upgrade packed integer vector compare intrinsics to compare instructions.
    1025        7140 :     if (IsX86 && (Name.startswith("sse2.pcmp") ||
    1026        3553 :                   Name.startswith("avx2.pcmp"))) {
    1027             :       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
    1028           2 :       bool CmpEq = Name[9] == 'e';
    1029           3 :       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
    1030             :                                CI->getArgOperand(0), CI->getArgOperand(1));
    1031           2 :       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
    1032       10685 :     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
    1033          11 :       Type *I32Ty = Type::getInt32Ty(C);
    1034          33 :       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
    1035          22 :                                                  ConstantInt::get(I32Ty, 0));
    1036          33 :       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
    1037          22 :                                                  ConstantInt::get(I32Ty, 0));
    1038          33 :       Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
    1039             :                                         Builder.CreateFAdd(Elt0, Elt1),
    1040          11 :                                         ConstantInt::get(I32Ty, 0));
    1041       10646 :     } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
    1042          17 :       Type *I32Ty = Type::getInt32Ty(C);
    1043          51 :       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
    1044          34 :                                                  ConstantInt::get(I32Ty, 0));
    1045          51 :       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
    1046          34 :                                                  ConstantInt::get(I32Ty, 0));
    1047          51 :       Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
    1048             :                                         Builder.CreateFSub(Elt0, Elt1),
    1049          17 :                                         ConstantInt::get(I32Ty, 0));
    1050       10595 :     } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
    1051          17 :       Type *I32Ty = Type::getInt32Ty(C);
    1052          51 :       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
    1053          34 :                                                  ConstantInt::get(I32Ty, 0));
    1054          51 :       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
    1055          34 :                                                  ConstantInt::get(I32Ty, 0));
    1056          51 :       Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
    1057             :                                         Builder.CreateFMul(Elt0, Elt1),
    1058          17 :                                         ConstantInt::get(I32Ty, 0));
    1059       10551 :     } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
    1060          10 :       Type *I32Ty = Type::getInt32Ty(C);
    1061          30 :       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
    1062          20 :                                                  ConstantInt::get(I32Ty, 0));
    1063          30 :       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
    1064          20 :                                                  ConstantInt::get(I32Ty, 0));
    1065          30 :       Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
    1066             :                                         Builder.CreateFDiv(Elt0, Elt1),
    1067          10 :                                         ConstantInt::get(I32Ty, 0));
    1068        7084 :     } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
    1069             :       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
    1070         112 :       bool CmpEq = Name[16] == 'e';
    1071          56 :       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
    1072        7140 :     } else if (IsX86 && Name.startswith("avx512.mask.cmp")) {
    1073         672 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
    1074         224 :       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
    1075        6692 :     } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) {
    1076         672 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
    1077         224 :       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
    1078        6020 :     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
    1079        2967 :                         Name == "ssse3.pabs.w.128" ||
    1080        2940 :                         Name == "ssse3.pabs.d.128" ||
    1081        2913 :                         Name.startswith("avx2.pabs") ||
    1082        2868 :                         Name.startswith("avx512.mask.pabs"))) {
    1083         168 :       Rep = upgradeAbs(Builder, *CI);
    1084        2858 :     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
    1085        2803 :                          Name == "sse2.pmaxs.w" ||
    1086        2778 :                          Name == "sse41.pmaxsd" ||
    1087        2748 :                          Name.startswith("avx2.pmaxs") ||
    1088        2706 :                          Name.startswith("avx512.mask.pmaxs"))) {
    1089         150 :       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
    1090        2708 :     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
    1091        2611 :                          Name == "sse41.pmaxuw" ||
    1092        2588 :                          Name == "sse41.pmaxud" ||
    1093        2561 :                          Name.startswith("avx2.pmaxu") ||
    1094        2519 :                          Name.startswith("avx512.mask.pmaxu"))) {
    1095         191 :       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
    1096        2517 :     } else if (IsX86 && (Name == "sse41.pminsb" ||
    1097        2462 :                          Name == "sse2.pmins.w" ||
    1098        2436 :                          Name == "sse41.pminsd" ||
    1099        2411 :                          Name.startswith("avx2.pmins") ||
    1100        2369 :                          Name.startswith("avx512.mask.pmins"))) {
    1101         146 :       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
    1102        2371 :     } else if (IsX86 && (Name == "sse2.pminu.b" ||
    1103        2314 :                          Name == "sse41.pminuw" ||
    1104        2291 :                          Name == "sse41.pminud" ||
    1105        2264 :                          Name.startswith("avx2.pminu") ||
    1106        2222 :                          Name.startswith("avx512.mask.pminu"))) {
    1107         147 :       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
    1108        2224 :     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
    1109        2187 :                          Name == "sse2.cvtps2pd" ||
    1110        2182 :                          Name == "avx.cvtdq2.pd.256" ||
    1111        2176 :                          Name == "avx.cvt.ps2.pd.256" ||
    1112        2173 :                          Name.startswith("avx512.mask.cvtdq2pd.") ||
    1113        2167 :                          Name.startswith("avx512.mask.cvtudq2pd."))) {
    1114             :       // Lossless i32/float to double conversion.
    1115             :       // Extract the bottom elements if necessary and convert to double vector.
    1116          31 :       Value *Src = CI->getArgOperand(0);
    1117          62 :       VectorType *SrcTy = cast<VectorType>(Src->getType());
    1118          62 :       VectorType *DstTy = cast<VectorType>(CI->getType());
    1119          31 :       Rep = CI->getArgOperand(0);
    1120             : 
    1121          31 :       unsigned NumDstElts = DstTy->getNumElements();
    1122          31 :       if (NumDstElts < SrcTy->getNumElements()) {
    1123             :         assert(NumDstElts == 2 && "Unexpected vector size");
    1124          14 :         uint32_t ShuffleMask[2] = { 0, 1 };
    1125          28 :         Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
    1126          14 :                                           ShuffleMask);
    1127             :       }
    1128             : 
    1129          31 :       bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
    1130          31 :       bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
    1131          31 :       if (SInt2Double)
    1132          34 :         Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
    1133          14 :       else if (UInt2Double)
    1134          12 :         Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
    1135             :       else
    1136          16 :         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
    1137             : 
    1138          31 :       if (CI->getNumArgOperands() == 3)
    1139          24 :         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
    1140             :                             CI->getArgOperand(1));
    1141        4414 :     } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
    1142         180 :       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
    1143             :                               CI->getArgOperand(1), CI->getArgOperand(2),
    1144             :                               /*Aligned*/false);
    1145        4270 :     } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
    1146         108 :       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
    1147             :                               CI->getArgOperand(1),CI->getArgOperand(2),
    1148             :                               /*Aligned*/true);
    1149        4243 :     } else if (IsX86 && Name.startswith("xop.vpcom")) {
    1150             :       Intrinsic::ID intID;
    1151          81 :       if (Name.endswith("ub"))
    1152             :         intID = Intrinsic::x86_xop_vpcomub;
    1153          71 :       else if (Name.endswith("uw"))
    1154             :         intID = Intrinsic::x86_xop_vpcomuw;
    1155          61 :       else if (Name.endswith("ud"))
    1156             :         intID = Intrinsic::x86_xop_vpcomud;
    1157          51 :       else if (Name.endswith("uq"))
    1158             :         intID = Intrinsic::x86_xop_vpcomuq;
    1159          41 :       else if (Name.endswith("b"))
    1160             :         intID = Intrinsic::x86_xop_vpcomb;
    1161          30 :       else if (Name.endswith("w"))
    1162             :         intID = Intrinsic::x86_xop_vpcomw;
    1163          20 :       else if (Name.endswith("d"))
    1164             :         intID = Intrinsic::x86_xop_vpcomd;
    1165          10 :       else if (Name.endswith("q"))
    1166             :         intID = Intrinsic::x86_xop_vpcomq;
    1167             :       else
    1168           0 :         llvm_unreachable("Unknown suffix");
    1169             : 
    1170          81 :       Name = Name.substr(9); // strip off "xop.vpcom"
    1171             :       unsigned Imm;
    1172          81 :       if (Name.startswith("lt"))
    1173             :         Imm = 0;
    1174          71 :       else if (Name.startswith("le"))
    1175             :         Imm = 1;
    1176          61 :       else if (Name.startswith("gt"))
    1177             :         Imm = 2;
    1178          51 :       else if (Name.startswith("ge"))
    1179             :         Imm = 3;
    1180          41 :       else if (Name.startswith("eq"))
    1181             :         Imm = 4;
    1182          30 :       else if (Name.startswith("ne"))
    1183             :         Imm = 5;
    1184          20 :       else if (Name.startswith("false"))
    1185             :         Imm = 6;
    1186          10 :       else if (Name.startswith("true"))
    1187             :         Imm = 7;
    1188             :       else
    1189           0 :         llvm_unreachable("Unknown condition");
    1190             : 
    1191          81 :       Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
    1192          81 :       Rep =
    1193         486 :           Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
    1194          81 :                                      Builder.getInt8(Imm)});
    1195        4012 :     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
    1196          12 :       Value *Sel = CI->getArgOperand(2);
    1197          12 :       Value *NotSel = Builder.CreateNot(Sel);
    1198          24 :       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
    1199          24 :       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
    1200          12 :       Rep = Builder.CreateOr(Sel0, Sel1);
    1201        3996 :     } else if (IsX86 && Name == "sse42.crc32.64.8") {
    1202          40 :       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
    1203          20 :                                                Intrinsic::x86_sse42_crc32_32_8);
    1204          60 :       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
    1205          80 :       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
    1206          40 :       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
    1207        3939 :     } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
    1208             :       // Replace broadcasts with a series of insertelements.
    1209           3 :       Type *VecTy = CI->getType();
    1210           3 :       Type *EltTy = VecTy->getVectorElementType();
    1211           3 :       unsigned EltNum = VecTy->getVectorNumElements();
    1212           9 :       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
    1213           6 :                                           EltTy->getPointerTo());
    1214           3 :       Value *Load = Builder.CreateLoad(EltTy, Cast);
    1215           3 :       Type *I32Ty = Type::getInt32Ty(C);
    1216           3 :       Rep = UndefValue::get(VecTy);
    1217          19 :       for (unsigned I = 0; I < EltNum; ++I)
    1218          32 :         Rep = Builder.CreateInsertElement(Rep, Load,
    1219          16 :                                           ConstantInt::get(I32Ty, I));
    1220        3930 :     } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
    1221        1931 :                          Name.startswith("sse41.pmovzx") ||
    1222        1907 :                          Name.startswith("avx2.pmovsx") ||
    1223        1895 :                          Name.startswith("avx2.pmovzx") ||
    1224        1883 :                          Name.startswith("avx512.mask.pmovsx") ||
    1225        1826 :                          Name.startswith("avx512.mask.pmovzx"))) {
    1226         360 :       VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
    1227         360 :       VectorType *DstTy = cast<VectorType>(CI->getType());
    1228         180 :       unsigned NumDstElts = DstTy->getNumElements();
    1229             : 
    1230             :       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
    1231         540 :       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
    1232        1632 :       for (unsigned i = 0; i != NumDstElts; ++i)
    1233        2904 :         ShuffleMask[i] = i;
    1234             : 
    1235         720 :       Value *SV = Builder.CreateShuffleVector(
    1236         360 :           CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
    1237             : 
    1238         180 :       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
    1239         627 :       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
    1240         273 :                    : Builder.CreateZExt(SV, DstTy);
    1241             :       // If there are 3 arguments, it's a masked intrinsic so we need a select.
    1242         180 :       if (CI->getNumArgOperands() == 3)
    1243         228 :         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
    1244             :                             CI->getArgOperand(1));
    1245        3570 :     } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
    1246        1765 :                          Name == "avx2.vbroadcasti128")) {
    1247             :       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
    1248          10 :       Type *EltTy = CI->getType()->getVectorElementType();
    1249           5 :       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
    1250           5 :       Type *VT = VectorType::get(EltTy, NumSrcElts);
    1251          15 :       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
    1252          10 :                                             PointerType::getUnqual(VT));
    1253          10 :       Value *Load = Builder.CreateAlignedLoad(Op, 1);
    1254           5 :       if (NumSrcElts == 2)
    1255           6 :         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
    1256             :                                           { 0, 1, 0, 1 });
    1257             :       else
    1258           4 :         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
    1259             :                                           { 0, 1, 2, 3, 0, 1, 2, 3 });
    1260        3560 :     } else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
    1261        1740 :                          Name.startswith("avx512.mask.broadcasti"))) {
    1262             :       unsigned NumSrcElts =
    1263          96 :                         CI->getArgOperand(0)->getType()->getVectorNumElements();
    1264          96 :       unsigned NumDstElts = CI->getType()->getVectorNumElements();
    1265             : 
    1266         144 :       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
    1267         528 :       for (unsigned i = 0; i != NumDstElts; ++i)
    1268         960 :         ShuffleMask[i] = i % NumSrcElts;
    1269             : 
    1270         192 :       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
    1271             :                                         CI->getArgOperand(0),
    1272             :                                         ShuffleMask);
    1273          96 :       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
    1274             :                           CI->getArgOperand(1));
    1275        3464 :     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
    1276        1700 :                          Name.startswith("avx2.vbroadcast") ||
    1277        1694 :                          Name.startswith("avx512.pbroadcast") ||
    1278        1658 :                          Name.startswith("avx512.mask.broadcast.s"))) {
    1279             :       // Replace vp?broadcasts with a vector shuffle.
    1280          73 :       Value *Op = CI->getArgOperand(0);
    1281         146 :       unsigned NumElts = CI->getType()->getVectorNumElements();
    1282          73 :       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
    1283         146 :       Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
    1284          73 :                                         Constant::getNullValue(MaskTy));
    1285             : 
    1286          73 :       if (CI->getNumArgOperands() == 3)
    1287         102 :         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
    1288             :                             CI->getArgOperand(1));
    1289        3330 :     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
    1290          60 :       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
    1291             :                                       CI->getArgOperand(1),
    1292             :                                       CI->getArgOperand(2),
    1293             :                                       CI->getArgOperand(3),
    1294             :                                       CI->getArgOperand(4),
    1295             :                                       false);
    1296        3306 :     } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
    1297          60 :       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
    1298             :                                       CI->getArgOperand(1),
    1299             :                                       CI->getArgOperand(2),
    1300             :                                       CI->getArgOperand(3),
    1301             :                                       CI->getArgOperand(4),
    1302             :                                       true);
    1303        3270 :     } else if (IsX86 && (Name == "sse2.psll.dq" ||
    1304        1616 :                          Name == "avx2.psll.dq")) {
    1305             :       // 128/256-bit shift left specified in bits.
    1306          15 :       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    1307          10 :       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
    1308             :                                        Shift / 8); // Shift is in bits.
    1309        3260 :     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
    1310        1517 :                          Name == "avx2.psrl.dq")) {
    1311             :       // 128/256-bit shift right specified in bits.
    1312         297 :       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    1313         198 :       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
    1314             :                                        Shift / 8); // Shift is in bits.
    1315        3062 :     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
    1316        1514 :                          Name == "avx2.psll.dq.bs" ||
    1317        1512 :                          Name == "avx512.psll.dq.512")) {
    1318             :       // 128/256/512-bit shift left specified in bytes.
    1319          27 :       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    1320           9 :       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
    1321        3044 :     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
    1322        1505 :                          Name == "avx2.psrl.dq.bs" ||
    1323        1503 :                          Name == "avx512.psrl.dq.512")) {
    1324             :       // 128/256/512-bit shift right specified in bytes.
    1325          27 :       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    1326           9 :       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
    1327        3026 :     } else if (IsX86 && (Name == "sse41.pblendw" ||
    1328        1486 :                          Name.startswith("sse41.blendp") ||
    1329        1470 :                          Name.startswith("avx.blend.p") ||
    1330        1458 :                          Name == "avx2.pblendw" ||
    1331        1449 :                          Name.startswith("avx2.pblendd."))) {
    1332          64 :       Value *Op0 = CI->getArgOperand(0);
    1333          64 :       Value *Op1 = CI->getArgOperand(1);
    1334         192 :       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
    1335         128 :       VectorType *VecTy = cast<VectorType>(CI->getType());
    1336          64 :       unsigned NumElts = VecTy->getNumElements();
    1337             : 
    1338         192 :       SmallVector<uint32_t, 16> Idxs(NumElts);
    1339         512 :       for (unsigned i = 0; i != NumElts; ++i)
    1340         896 :         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
    1341             : 
    1342         128 :       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
    1343        2898 :     } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
    1344        1415 :                          Name == "avx2.vinserti128" ||
    1345        1413 :                          Name.startswith("avx512.mask.insert"))) {
    1346          56 :       Value *Op0 = CI->getArgOperand(0);
    1347          56 :       Value *Op1 = CI->getArgOperand(1);
    1348         168 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
    1349         112 :       unsigned DstNumElts = CI->getType()->getVectorNumElements();
    1350         112 :       unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
    1351          56 :       unsigned Scale = DstNumElts / SrcNumElts;
    1352             : 
    1353             :       // Mask off the high bits of the immediate value; hardware ignores those.
    1354          56 :       Imm = Imm % Scale;
    1355             : 
    1356             :       // Extend the second operand into a vector the size of the destination.
    1357          56 :       Value *UndefV = UndefValue::get(Op1->getType());
    1358         168 :       SmallVector<uint32_t, 8> Idxs(DstNumElts);
    1359         270 :       for (unsigned i = 0; i != SrcNumElts; ++i)
    1360         428 :         Idxs[i] = i;
    1361         628 :       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
    1362         572 :         Idxs[i] = SrcNumElts;
    1363         112 :       Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
    1364             : 
    1365             :       // Insert the second operand into the first operand.
    1366             : 
    1367             :       // Note that there is no guarantee that instruction lowering will actually
    1368             :       // produce a vinsertf128 instruction for the created shuffles. In
    1369             :       // particular, the 0 immediate case involves no lane changes, so it can
    1370             :       // be handled as a blend.
    1371             : 
    1372             :       // Example of shuffle mask for 32-bit elements:
    1373             :       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
    1374             :       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
    1375             : 
    1376             :       // First fill with identify mask.
    1377         556 :       for (unsigned i = 0; i != DstNumElts; ++i)
    1378        1000 :         Idxs[i] = i;
    1379             :       // Then replace the elements where we need to insert.
    1380         484 :       for (unsigned i = 0; i != SrcNumElts; ++i)
    1381         428 :         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
    1382         112 :       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
    1383             : 
    1384             :       // If the intrinsic has a mask operand, handle that.
    1385          56 :       if (CI->getNumArgOperands() == 5)
    1386          72 :         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
    1387             :                             CI->getArgOperand(3));
    1388        2786 :     } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
    1389        1354 :                          Name == "avx2.vextracti128" ||
    1390        1352 :                          Name.startswith("avx512.mask.vextract"))) {
    1391          41 :       Value *Op0 = CI->getArgOperand(0);
    1392         123 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    1393          82 :       unsigned DstNumElts = CI->getType()->getVectorNumElements();
    1394          82 :       unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
    1395          41 :       unsigned Scale = SrcNumElts / DstNumElts;
    1396             : 
    1397             :       // Mask off the high bits of the immediate value; hardware ignores those.
    1398          41 :       Imm = Imm % Scale;
    1399             : 
    1400             :       // Get indexes for the subvector of the input vector.
    1401         123 :       SmallVector<uint32_t, 8> Idxs(DstNumElts);
    1402         189 :       for (unsigned i = 0; i != DstNumElts; ++i) {
    1403         296 :         Idxs[i] = i + (Imm * DstNumElts);
    1404             :       }
    1405          82 :       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
    1406             : 
    1407             :       // If the intrinsic has a mask operand, handle that.
    1408          41 :       if (CI->getNumArgOperands() == 4)
    1409          32 :         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1410             :                             CI->getArgOperand(2));
    1411        1400 :     } else if (!IsX86 && Name == "stackprotectorcheck") {
    1412             :       Rep = nullptr;
    1413        1362 :     } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
    1414        1330 :                          Name.startswith("avx512.mask.perm.di."))) {
    1415          12 :       Value *Op0 = CI->getArgOperand(0);
    1416          36 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    1417          24 :       VectorType *VecTy = cast<VectorType>(CI->getType());
    1418          12 :       unsigned NumElts = VecTy->getNumElements();
    1419             : 
    1420          36 :       SmallVector<uint32_t, 8> Idxs(NumElts);
    1421          84 :       for (unsigned i = 0; i != NumElts; ++i)
    1422         144 :         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
    1423             : 
    1424          24 :       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
    1425             : 
    1426          12 :       if (CI->getNumArgOperands() == 4)
    1427          24 :         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1428             :                             CI->getArgOperand(2));
    1429        2674 :     } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
    1430        1316 :                          Name == "sse2.pshuf.d" ||
    1431        1255 :                          Name.startswith("avx512.mask.vpermil.p") ||
    1432        1237 :                          Name.startswith("avx512.mask.pshuf.d."))) {
    1433          96 :       Value *Op0 = CI->getArgOperand(0);
    1434         288 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    1435         192 :       VectorType *VecTy = cast<VectorType>(CI->getType());
    1436          96 :       unsigned NumElts = VecTy->getNumElements();
    1437             :       // Calculate the size of each index in the immediate.
    1438          96 :       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
    1439          96 :       unsigned IdxMask = ((1 << IdxSize) - 1);
    1440             : 
    1441         288 :       SmallVector<uint32_t, 8> Idxs(NumElts);
    1442             :       // Lookup the bits for this element, wrapping around the immediate every
    1443             :       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
    1444             :       // to offset by the first index of each group.
    1445         586 :       for (unsigned i = 0; i != NumElts; ++i)
    1446         980 :         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
    1447             : 
    1448         192 :       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
    1449             : 
    1450          96 :       if (CI->getNumArgOperands() == 4)
    1451          54 :         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1452             :                             CI->getArgOperand(2));
    1453        2482 :     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
    1454        1182 :                          Name.startswith("avx512.mask.pshufl.w."))) {
    1455          58 :       Value *Op0 = CI->getArgOperand(0);
    1456         174 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    1457         116 :       unsigned NumElts = CI->getType()->getVectorNumElements();
    1458             : 
    1459         174 :       SmallVector<uint32_t, 16> Idxs(NumElts);
    1460         137 :       for (unsigned l = 0; l != NumElts; l += 8) {
    1461         711 :         for (unsigned i = 0; i != 4; ++i)
    1462         632 :           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
    1463         711 :         for (unsigned i = 4; i != 8; ++i)
    1464         632 :           Idxs[i + l] = i + l;
    1465             :       }
    1466             : 
    1467         116 :       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
    1468             : 
    1469          58 :       if (CI->getNumArgOperands() == 4)
    1470          24 :         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1471             :                             CI->getArgOperand(2));
    1472        2366 :     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
    1473        1139 :                          Name.startswith("avx512.mask.pshufh.w."))) {
    1474          43 :       Value *Op0 = CI->getArgOperand(0);
    1475         129 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
    1476          86 :       unsigned NumElts = CI->getType()->getVectorNumElements();
    1477             : 
    1478         129 :       SmallVector<uint32_t, 16> Idxs(NumElts);
    1479         107 :       for (unsigned l = 0; l != NumElts; l += 8) {
    1480         576 :         for (unsigned i = 0; i != 4; ++i)
    1481         512 :           Idxs[i + l] = i + l;
    1482         576 :         for (unsigned i = 0; i != 4; ++i)
    1483         512 :           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
    1484             :       }
    1485             : 
    1486          86 :       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
    1487             : 
    1488          43 :       if (CI->getNumArgOperands() == 4)
    1489          24 :         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1490             :                             CI->getArgOperand(2));
    1491        2294 :     } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
    1492          14 :       Value *Op0 = CI->getArgOperand(0);
    1493          14 :       Value *Op1 = CI->getArgOperand(1);
    1494          42 :       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
    1495          28 :       unsigned NumElts = CI->getType()->getVectorNumElements();
    1496             : 
    1497          14 :       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
    1498          14 :       unsigned HalfLaneElts = NumLaneElts / 2;
    1499             : 
    1500          42 :       SmallVector<uint32_t, 16> Idxs(NumElts);
    1501         108 :       for (unsigned i = 0; i != NumElts; ++i) {
    1502             :         // Base index is the starting element of the lane.
    1503         188 :         Idxs[i] = i - (i % NumLaneElts);
    1504             :         // If we are half way through the lane switch to the other source.
    1505          94 :         if ((i % NumLaneElts) >= HalfLaneElts)
    1506          94 :           Idxs[i] += NumElts;
    1507             :         // Now select the specific element. By adding HalfLaneElts bits from
    1508             :         // the immediate. Wrapping around the immediate every 8-bits.
    1509         188 :         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
    1510             :       }
    1511             : 
    1512          28 :       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
    1513             : 
    1514          28 :       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
    1515             :                           CI->getArgOperand(3));
    1516        2252 :     } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
    1517        1104 :                          Name.startswith("avx512.mask.movshdup") ||
    1518        1095 :                          Name.startswith("avx512.mask.movsldup"))) {
    1519          27 :       Value *Op0 = CI->getArgOperand(0);
    1520          54 :       unsigned NumElts = CI->getType()->getVectorNumElements();
    1521          27 :       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
    1522             : 
    1523          27 :       unsigned Offset = 0;
    1524          36 :       if (Name.startswith("avx512.mask.movshdup."))
    1525             :         Offset = 1;
    1526             : 
    1527          81 :       SmallVector<uint32_t, 16> Idxs(NumElts);
    1528          90 :       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
    1529         273 :         for (unsigned i = 0; i != NumLaneElts; i += 2) {
    1530         210 :           Idxs[i + l + 0] = i + l + Offset;
    1531         210 :           Idxs[i + l + 1] = i + l + Offset;
    1532             :         }
    1533             : 
    1534          54 :       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
    1535             : 
    1536          54 :       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
    1537             :                           CI->getArgOperand(1));
    1538        2198 :     } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
    1539        1057 :                          Name.startswith("avx512.mask.unpckl."))) {
    1540          41 :       Value *Op0 = CI->getArgOperand(0);
    1541          41 :       Value *Op1 = CI->getArgOperand(1);
    1542          82 :       int NumElts = CI->getType()->getVectorNumElements();
    1543          41 :       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
    1544             : 
    1545         123 :       SmallVector<uint32_t, 64> Idxs(NumElts);
    1546         145 :       for (int l = 0; l != NumElts; l += NumLaneElts)
    1547        1512 :         for (int i = 0; i != NumLaneElts; ++i)
    1548        1408 :           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
    1549             : 
    1550          82 :       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
    1551             : 
    1552          82 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1553             :                           CI->getArgOperand(2));
    1554        2116 :     } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
    1555        1017 :                          Name.startswith("avx512.mask.unpckh."))) {
    1556          40 :       Value *Op0 = CI->getArgOperand(0);
    1557          40 :       Value *Op1 = CI->getArgOperand(1);
    1558          80 :       int NumElts = CI->getType()->getVectorNumElements();
    1559          40 :       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
    1560             : 
    1561         120 :       SmallVector<uint32_t, 64> Idxs(NumElts);
    1562         140 :       for (int l = 0; l != NumElts; l += NumLaneElts)
    1563        1492 :         for (int i = 0; i != NumLaneElts; ++i)
    1564        1392 :           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
    1565             : 
    1566          80 :       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
    1567             : 
    1568          80 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1569             :                           CI->getArgOperand(2));
    1570        2058 :     } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
    1571          66 :       Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
    1572          44 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1573             :                           CI->getArgOperand(2));
    1574        2028 :     } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
    1575         144 :       Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
    1576             :                               CI->getArgOperand(1));
    1577          72 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1578             :                           CI->getArgOperand(2));
    1579        1942 :     } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
    1580          66 :       Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
    1581          44 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1582             :                           CI->getArgOperand(2));
    1583        1898 :     } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
    1584          66 :       Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
    1585          44 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1586             :                           CI->getArgOperand(2));
    1587        1859 :     } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
    1588          54 :       VectorType *FTy = cast<VectorType>(CI->getType());
    1589          27 :       VectorType *ITy = VectorType::getInteger(FTy);
    1590         189 :       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
    1591             :                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
    1592          54 :       Rep = Builder.CreateBitCast(Rep, FTy);
    1593          54 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1594             :                           CI->getArgOperand(2));
    1595        1805 :     } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
    1596          54 :       VectorType *FTy = cast<VectorType>(CI->getType());
    1597          27 :       VectorType *ITy = VectorType::getInteger(FTy);
    1598         108 :       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
    1599         108 :       Rep = Builder.CreateAnd(Rep,
    1600             :                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
    1601          54 :       Rep = Builder.CreateBitCast(Rep, FTy);
    1602          54 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1603             :                           CI->getArgOperand(2));
    1604        1751 :     } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
    1605          54 :       VectorType *FTy = cast<VectorType>(CI->getType());
    1606          27 :       VectorType *ITy = VectorType::getInteger(FTy);
    1607         189 :       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
    1608             :                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
    1609          54 :       Rep = Builder.CreateBitCast(Rep, FTy);
    1610          54 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1611             :                           CI->getArgOperand(2));
    1612        1697 :     } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
    1613          54 :       VectorType *FTy = cast<VectorType>(CI->getType());
    1614          27 :       VectorType *ITy = VectorType::getInteger(FTy);
    1615         189 :       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
    1616             :                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
    1617          54 :       Rep = Builder.CreateBitCast(Rep, FTy);
    1618          54 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1619             :                           CI->getArgOperand(2));
    1620        1670 :     } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
    1621         162 :       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
    1622         108 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1623             :                           CI->getArgOperand(2));
    1624        1562 :     } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
    1625         162 :       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
    1626         108 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1627             :                           CI->getArgOperand(2));
    1628        1454 :     } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
    1629         162 :       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
    1630         108 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1631             :                           CI->getArgOperand(2));
    1632        1298 :     } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
    1633          18 :       Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
    1634          12 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1635             :                           CI->getArgOperand(2));
    1636        1286 :     } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
    1637          18 :       Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
    1638          12 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1639             :                           CI->getArgOperand(2));
    1640        1274 :     } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
    1641          18 :       Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
    1642          12 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1643             :                           CI->getArgOperand(2));
    1644        1262 :     } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
    1645          18 :       Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
    1646          12 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1647             :                           CI->getArgOperand(2));
    1648        1257 :     } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
    1649          91 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
    1650             :                                                          Intrinsic::ctlz,
    1651          26 :                                                          CI->getType()),
    1652          26 :                                { CI->getArgOperand(0), Builder.getInt1(false) });
    1653          26 :       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
    1654             :                           CI->getArgOperand(1));
    1655        1218 :     } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
    1656         590 :                          Name.startswith("avx512.mask.min.p"))) {
    1657          24 :       bool IsMin = Name[13] == 'i';
    1658          24 :       VectorType *VecTy = cast<VectorType>(CI->getType());
    1659          12 :       unsigned VecWidth = VecTy->getPrimitiveSizeInBits();
    1660          12 :       unsigned EltWidth = VecTy->getScalarSizeInBits();
    1661             :       Intrinsic::ID IID;
    1662          15 :       if (!IsMin && VecWidth == 128 && EltWidth == 32)
    1663             :         IID = Intrinsic::x86_sse_max_ps;
    1664           9 :       else if (!IsMin && VecWidth == 128 && EltWidth == 64)
    1665             :         IID = Intrinsic::x86_sse2_max_pd;
    1666           9 :       else if (!IsMin && VecWidth == 256 && EltWidth == 32)
    1667             :         IID = Intrinsic::x86_avx_max_ps_256;
    1668           6 :       else if (!IsMin && VecWidth == 256 && EltWidth == 64)
    1669             :         IID = Intrinsic::x86_avx_max_pd_256;
    1670           6 :       else if (IsMin && VecWidth == 128 && EltWidth == 32)
    1671             :         IID = Intrinsic::x86_sse_min_ps;
    1672           3 :       else if (IsMin && VecWidth == 128 && EltWidth == 64)
    1673             :         IID = Intrinsic::x86_sse2_min_pd;
    1674           3 :       else if (IsMin && VecWidth == 256 && EltWidth == 32)
    1675             :         IID = Intrinsic::x86_avx_min_ps_256;
    1676           0 :       else if (IsMin && VecWidth == 256 && EltWidth == 64)
    1677             :         IID = Intrinsic::x86_avx_min_pd_256;
    1678             :       else
    1679           0 :         llvm_unreachable("Unexpected intrinsic");
    1680             : 
    1681          72 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
    1682          24 :                                { CI->getArgOperand(0), CI->getArgOperand(1) });
    1683          24 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1684             :                           CI->getArgOperand(2));
    1685        1227 :     } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
    1686          66 :       VectorType *VecTy = cast<VectorType>(CI->getType());
    1687             :       Intrinsic::ID IID;
    1688          33 :       if (VecTy->getPrimitiveSizeInBits() == 128)
    1689             :         IID = Intrinsic::x86_ssse3_pshuf_b_128;
    1690          28 :       else if (VecTy->getPrimitiveSizeInBits() == 256)
    1691             :         IID = Intrinsic::x86_avx2_pshuf_b;
    1692          23 :       else if (VecTy->getPrimitiveSizeInBits() == 512)
    1693             :         IID = Intrinsic::x86_avx512_pshuf_b_512;
    1694             :       else
    1695           0 :         llvm_unreachable("Unexpected intrinsic");
    1696             : 
    1697         198 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
    1698          66 :                                { CI->getArgOperand(0), CI->getArgOperand(1) });
    1699          66 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1700             :                           CI->getArgOperand(2));
    1701        1128 :     } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") ||
    1702         524 :                          Name.startswith("avx512.mask.pmulu.dq."))) {
    1703         108 :       bool IsUnsigned = Name[16] == 'u';
    1704         108 :       VectorType *VecTy = cast<VectorType>(CI->getType());
    1705             :       Intrinsic::ID IID;
    1706          81 :       if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
    1707             :         IID = Intrinsic::x86_sse41_pmuldq;
    1708          45 :       else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
    1709             :         IID = Intrinsic::x86_avx2_pmul_dq;
    1710          36 :       else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
    1711             :         IID = Intrinsic::x86_avx512_pmul_dq_512;
    1712          27 :       else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
    1713             :         IID = Intrinsic::x86_sse2_pmulu_dq;
    1714          18 :       else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
    1715             :         IID = Intrinsic::x86_avx2_pmulu_dq;
    1716           9 :       else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
    1717             :         IID = Intrinsic::x86_avx512_pmulu_dq_512;
    1718             :       else
    1719           0 :         llvm_unreachable("Unexpected intrinsic");
    1720             : 
    1721         324 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
    1722         108 :                                { CI->getArgOperand(0), CI->getArgOperand(1) });
    1723         108 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1724             :                           CI->getArgOperand(2));
    1725        1146 :     } else if (IsX86 && Name.startswith("avx512.mask.pack")) {
    1726         252 :       bool IsUnsigned = Name[16] == 'u';
    1727         252 :       bool IsDW = Name[18] == 'd';
    1728         252 :       VectorType *VecTy = cast<VectorType>(CI->getType());
    1729             :       Intrinsic::ID IID;
    1730         126 :       if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
    1731             :         IID = Intrinsic::x86_sse2_packsswb_128;
    1732         120 :       else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
    1733             :         IID = Intrinsic::x86_avx2_packsswb;
    1734         114 :       else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
    1735             :         IID = Intrinsic::x86_avx512_packsswb_512;
    1736         101 :       else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
    1737             :         IID = Intrinsic::x86_sse2_packssdw_128;
    1738          92 :       else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
    1739             :         IID = Intrinsic::x86_avx2_packssdw;
    1740          83 :       else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
    1741             :         IID = Intrinsic::x86_avx512_packssdw_512;
    1742          64 :       else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
    1743             :         IID = Intrinsic::x86_sse2_packuswb_128;
    1744          58 :       else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
    1745             :         IID = Intrinsic::x86_avx2_packuswb;
    1746          52 :       else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
    1747             :         IID = Intrinsic::x86_avx512_packuswb_512;
    1748          39 :       else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
    1749             :         IID = Intrinsic::x86_sse41_packusdw;
    1750          30 :       else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
    1751             :         IID = Intrinsic::x86_avx2_packusdw;
    1752          21 :       else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
    1753             :         IID = Intrinsic::x86_avx512_packusdw_512;
    1754             :       else
    1755           0 :         llvm_unreachable("Unexpected intrinsic");
    1756             : 
    1757         756 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
    1758         252 :                                { CI->getArgOperand(0), CI->getArgOperand(1) });
    1759         252 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1760             :                           CI->getArgOperand(2));
    1761         855 :     } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
    1762         255 :       bool IsImmediate = Name[16] == 'i' ||
    1763         231 :                          (Name.size() > 18 && Name[18] == 'i');
    1764          87 :       bool IsVariable = Name[16] == 'v';
    1765         138 :       char Size = Name[16] == '.' ? Name[17] :
    1766          84 :                   Name[17] == '.' ? Name[18] :
    1767          72 :                   Name[18] == '.' ? Name[19] :
    1768          87 :                                     Name[20];
    1769             : 
    1770             :       Intrinsic::ID IID;
    1771         117 :       if (IsVariable && Name[17] != '.') {
    1772          24 :         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
    1773             :           IID = Intrinsic::x86_avx2_psllv_q;
    1774          21 :         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
    1775             :           IID = Intrinsic::x86_avx2_psllv_q_256;
    1776          18 :         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
    1777             :           IID = Intrinsic::x86_avx2_psllv_d;
    1778          15 :         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
    1779             :           IID = Intrinsic::x86_avx2_psllv_d_256;
    1780          12 :         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
    1781             :           IID = Intrinsic::x86_avx512_psllv_w_128;
    1782           9 :         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
    1783             :           IID = Intrinsic::x86_avx512_psllv_w_256;
    1784          12 :         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
    1785             :           IID = Intrinsic::x86_avx512_psllv_w_512;
    1786             :         else
    1787           0 :           llvm_unreachable("Unexpected size");
    1788          75 :       } else if (Name.endswith(".128")) {
    1789          12 :         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
    1790           6 :           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
    1791             :                             : Intrinsic::x86_sse2_psll_d;
    1792           6 :         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
    1793           0 :           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
    1794             :                             : Intrinsic::x86_sse2_psll_q;
    1795           6 :         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
    1796           6 :           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
    1797             :                             : Intrinsic::x86_sse2_psll_w;
    1798             :         else
    1799           0 :           llvm_unreachable("Unexpected size");
    1800          66 :       } else if (Name.endswith(".256")) {
    1801          15 :         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
    1802           6 :           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
    1803             :                             : Intrinsic::x86_avx2_psll_d;
    1804           9 :         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
    1805           3 :           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
    1806             :                             : Intrinsic::x86_avx2_psll_q;
    1807           6 :         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
    1808           6 :           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
    1809             :                             : Intrinsic::x86_avx2_psll_w;
    1810             :         else
    1811           0 :           llvm_unreachable("Unexpected size");
    1812             :       } else {
    1813          36 :         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
    1814          12 :           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
    1815           6 :                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
    1816             :                               Intrinsic::x86_avx512_psll_d_512;
    1817          24 :         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
    1818          12 :           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
    1819           6 :                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
    1820             :                               Intrinsic::x86_avx512_psll_q_512;
    1821          12 :         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
    1822          12 :           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
    1823             :                             : Intrinsic::x86_avx512_psll_w_512;
    1824             :         else
    1825           0 :           llvm_unreachable("Unexpected size");
    1826             :       }
    1827             : 
    1828          87 :       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
    1829         691 :     } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
    1830         285 :       bool IsImmediate = Name[16] == 'i' ||
    1831         261 :                          (Name.size() > 18 && Name[18] == 'i');
    1832          97 :       bool IsVariable = Name[16] == 'v';
    1833         157 :       char Size = Name[16] == '.' ? Name[17] :
    1834          87 :                   Name[17] == '.' ? Name[18] :
    1835          72 :                   Name[18] == '.' ? Name[19] :
    1836          97 :                                     Name[20];
    1837             : 
    1838             :       Intrinsic::ID IID;
    1839         128 :       if (IsVariable && Name[17] != '.') {
    1840          24 :         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
    1841             :           IID = Intrinsic::x86_avx2_psrlv_q;
    1842          21 :         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
    1843             :           IID = Intrinsic::x86_avx2_psrlv_q_256;
    1844          18 :         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
    1845             :           IID = Intrinsic::x86_avx2_psrlv_d;
    1846          15 :         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
    1847             :           IID = Intrinsic::x86_avx2_psrlv_d_256;
    1848          12 :         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
    1849             :           IID = Intrinsic::x86_avx512_psrlv_w_128;
    1850           9 :         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
    1851             :           IID = Intrinsic::x86_avx512_psrlv_w_256;
    1852          12 :         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
    1853             :           IID = Intrinsic::x86_avx512_psrlv_w_512;
    1854             :         else
    1855           0 :           llvm_unreachable("Unexpected size");
    1856          91 :       } else if (Name.endswith(".128")) {
    1857          18 :         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
    1858           6 :           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
    1859             :                             : Intrinsic::x86_sse2_psrl_d;
    1860          12 :         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
    1861           6 :           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
    1862             :                             : Intrinsic::x86_sse2_psrl_q;
    1863           6 :         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
    1864           6 :           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
    1865             :                             : Intrinsic::x86_sse2_psrl_w;
    1866             :         else
    1867           0 :           llvm_unreachable("Unexpected size");
    1868          73 :       } else if (Name.endswith(".256")) {
    1869          18 :         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
    1870           6 :           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
    1871             :                             : Intrinsic::x86_avx2_psrl_d;
    1872          12 :         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
    1873           6 :           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
    1874             :                             : Intrinsic::x86_avx2_psrl_q;
    1875           6 :         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
    1876           6 :           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
    1877             :                             : Intrinsic::x86_avx2_psrl_w;
    1878             :         else
    1879           0 :           llvm_unreachable("Unexpected size");
    1880             :       } else {
    1881          37 :         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
    1882          12 :           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
    1883           6 :                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
    1884             :                               Intrinsic::x86_avx512_psrl_d_512;
    1885          25 :         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
    1886          13 :           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
    1887           7 :                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
    1888             :                               Intrinsic::x86_avx512_psrl_q_512;
    1889          12 :         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
    1890          12 :           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
    1891             :                             : Intrinsic::x86_avx512_psrl_w_512;
    1892             :         else
    1893           0 :           llvm_unreachable("Unexpected size");
    1894             :       }
    1895             : 
    1896          97 :       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
    1897         494 :     } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
    1898         276 :       bool IsImmediate = Name[16] == 'i' ||
    1899         252 :                          (Name.size() > 18 && Name[18] == 'i');
    1900          94 :       bool IsVariable = Name[16] == 'v';
    1901         148 :       char Size = Name[16] == '.' ? Name[17] :
    1902          99 :                   Name[17] == '.' ? Name[18] :
    1903          63 :                   Name[18] == '.' ? Name[19] :
    1904          94 :                                     Name[20];
    1905             : 
    1906             :       Intrinsic::ID IID;
    1907         128 :       if (IsVariable && Name[17] != '.') {
    1908          21 :         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
    1909             :           IID = Intrinsic::x86_avx2_psrav_d;
    1910          18 :         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
    1911             :           IID = Intrinsic::x86_avx2_psrav_d_256;
    1912          14 :         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
    1913             :           IID = Intrinsic::x86_avx512_psrav_w_128;
    1914          11 :         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
    1915             :           IID = Intrinsic::x86_avx512_psrav_w_256;
    1916          16 :         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
    1917             :           IID = Intrinsic::x86_avx512_psrav_w_512;
    1918             :         else
    1919           0 :           llvm_unreachable("Unexpected size");
    1920          92 :       } else if (Name.endswith(".128")) {
    1921          19 :         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
    1922           3 :           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
    1923             :                             : Intrinsic::x86_sse2_psra_d;
    1924          16 :         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
    1925          10 :           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
    1926           7 :                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
    1927             :                               Intrinsic::x86_avx512_psra_q_128;
    1928           6 :         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
    1929           6 :           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
    1930             :                             : Intrinsic::x86_sse2_psra_w;
    1931             :         else
    1932           0 :           llvm_unreachable("Unexpected size");
    1933          72 :       } else if (Name.endswith(".256")) {
    1934          18 :         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
    1935           3 :           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
    1936             :                             : Intrinsic::x86_avx2_psra_d;
    1937          15 :         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
    1938           9 :           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
    1939           6 :                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
    1940             :                               Intrinsic::x86_avx512_psra_q_256;
    1941           6 :         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
    1942           6 :           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
    1943             :                             : Intrinsic::x86_avx2_psra_w;
    1944             :         else
    1945           0 :           llvm_unreachable("Unexpected size");
    1946             :       } else {
    1947          36 :         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
    1948          12 :           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
    1949           6 :                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
    1950             :                               Intrinsic::x86_avx512_psra_d_512;
    1951          24 :         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
    1952          12 :           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
    1953           6 :                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
    1954             :                               Intrinsic::x86_avx512_psra_q_512;
    1955          12 :         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
    1956          12 :           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
    1957             :                             : Intrinsic::x86_avx512_psra_w_512;
    1958             :         else
    1959           0 :           llvm_unreachable("Unexpected size");
    1960             :       }
    1961             : 
    1962          94 :       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
    1963         216 :     } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
    1964           4 :       Rep = upgradeMaskedMove(Builder, *CI);
    1965         218 :     } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
    1966          14 :       Rep = UpgradeMaskToInt(Builder, *CI);
    1967         207 :     } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
    1968             :       Intrinsic::ID IID;
    1969          31 :       if (Name.endswith("ps.128"))
    1970             :         IID = Intrinsic::x86_avx_vpermilvar_ps;
    1971          28 :       else if (Name.endswith("pd.128"))
    1972             :         IID = Intrinsic::x86_avx_vpermilvar_pd;
    1973          25 :       else if (Name.endswith("ps.256"))
    1974             :         IID = Intrinsic::x86_avx_vpermilvar_ps_256;
    1975          22 :       else if (Name.endswith("pd.256"))
    1976             :         IID = Intrinsic::x86_avx_vpermilvar_pd_256;
    1977          19 :       else if (Name.endswith("ps.512"))
    1978             :         IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
    1979           6 :       else if (Name.endswith("pd.512"))
    1980             :         IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
    1981             :       else
    1982           0 :         llvm_unreachable("Unexpected vpermilvar intrinsic");
    1983             : 
    1984          31 :       Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
    1985         155 :       Rep = Builder.CreateCall(Intrin,
    1986          62 :                                { CI->getArgOperand(0), CI->getArgOperand(1) });
    1987          62 :       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    1988             :                           CI->getArgOperand(2));
    1989         134 :     } else if (IsX86 && Name.endswith(".movntdqa")) {
    1990          20 :       Module *M = F->getParent();
    1991          20 :       MDNode *Node = MDNode::get(
    1992          60 :           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
    1993             : 
    1994          20 :       Value *Ptr = CI->getArgOperand(0);
    1995          40 :       VectorType *VTy = cast<VectorType>(CI->getType());
    1996             : 
    1997             :       // Convert the type of the pointer to a pointer to the stored type.
    1998             :       Value *BC =
    1999          60 :           Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
    2000          60 :       LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
    2001          40 :       LI->setMetadata(M->getMDKindID("nontemporal"), Node);
    2002          20 :       Rep = LI;
    2003          50 :     } else if (IsX86 &&
    2004          44 :                (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") ||
    2005          16 :                 Name.startswith("avx512.mask.pavg"))) {
    2006             :       // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w,
    2007             :       // llvm.x86.avx512.mask.pavg.b/w
    2008          24 :       Value *A = CI->getArgOperand(0);
    2009          24 :       Value *B = CI->getArgOperand(1);
    2010          48 :       VectorType *ZextType = VectorType::getExtendedElementVectorType(
    2011          24 :           cast<VectorType>(A->getType()));
    2012          48 :       Value *ExtendedA = Builder.CreateZExt(A, ZextType);
    2013          48 :       Value *ExtendedB = Builder.CreateZExt(B, ZextType);
    2014          24 :       Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB);
    2015          24 :       Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1));
    2016          24 :       Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1));
    2017          48 :       Rep = Builder.CreateTrunc(ShiftR, A->getType());
    2018          24 :       if (CI->getNumArgOperands() > 2) {
    2019          32 :         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
    2020             :                             CI->getArgOperand(2));
    2021             :       }
    2022          76 :     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
    2023           4 :       Value *Arg = CI->getArgOperand(0);
    2024           4 :       Value *Neg = Builder.CreateNeg(Arg, "neg");
    2025           8 :       Value *Cmp = Builder.CreateICmpSGE(
    2026           8 :           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
    2027           4 :       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
    2028          64 :     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
    2029          34 :                           Name == "max.ui" || Name == "max.ull")) {
    2030           8 :       Value *Arg0 = CI->getArgOperand(0);
    2031           8 :       Value *Arg1 = CI->getArgOperand(1);
    2032          22 :       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
    2033          20 :                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
    2034          12 :                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
    2035           8 :       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
    2036          40 :     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
    2037          18 :                           Name == "min.ui" || Name == "min.ull")) {
    2038           8 :       Value *Arg0 = CI->getArgOperand(0);
    2039           8 :       Value *Arg1 = CI->getArgOperand(1);
    2040          22 :       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
    2041          20 :                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
    2042          12 :                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
    2043           8 :       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
    2044          14 :     } else if (IsNVVM && Name == "clz.ll") {
    2045             :       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
    2046           2 :       Value *Arg = CI->getArgOperand(0);
    2047          10 :       Value *Ctlz = Builder.CreateCall(
    2048             :           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
    2049             :                                     {Arg->getType()}),
    2050           4 :           {Arg, Builder.getFalse()}, "ctlz");
    2051           6 :       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
    2052          10 :     } else if (IsNVVM && Name == "popc.ll") {
    2053             :       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
    2054             :       // i64.
    2055           2 :       Value *Arg = CI->getArgOperand(0);
    2056           8 :       Value *Popc = Builder.CreateCall(
    2057             :           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
    2058           2 :                                     {Arg->getType()}),
    2059           2 :           Arg, "ctpop");
    2060           6 :       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
    2061           6 :     } else if (IsNVVM && Name == "h2f") {
    2062          12 :       Rep = Builder.CreateCall(Intrinsic::getDeclaration(
    2063             :                                    F->getParent(), Intrinsic::convert_from_fp16,
    2064           2 :                                    {Builder.getFloatTy()}),
    2065           4 :                                CI->getArgOperand(0), "h2f");
    2066             :     } else {
    2067           0 :       llvm_unreachable("Unknown function for CallInst upgrade.");
    2068             :     }
    2069             : 
    2070        3580 :     if (Rep)
    2071        3580 :       CI->replaceAllUsesWith(Rep);
    2072        3586 :     CI->eraseFromParent();
    2073        3586 :     return;
    2074             :   }
    2075             : 
    2076         892 :   CallInst *NewCall = nullptr;
    2077         892 :   switch (NewFn->getIntrinsicID()) {
    2078         204 :   default: {
    2079             :     // Handle generic mangling change, but nothing else
    2080             :     assert(
    2081             :         (CI->getCalledFunction()->getName() != NewFn->getName()) &&
    2082             :         "Unknown function for CallInst upgrade and isn't just a name change");
    2083             :     CI->setCalledFunction(NewFn);
    2084             :     return;
    2085             :   }
    2086             : 
    2087          14 :   case Intrinsic::arm_neon_vld1:
    2088             :   case Intrinsic::arm_neon_vld2:
    2089             :   case Intrinsic::arm_neon_vld3:
    2090             :   case Intrinsic::arm_neon_vld4:
    2091             :   case Intrinsic::arm_neon_vld2lane:
    2092             :   case Intrinsic::arm_neon_vld3lane:
    2093             :   case Intrinsic::arm_neon_vld4lane:
    2094             :   case Intrinsic::arm_neon_vst1:
    2095             :   case Intrinsic::arm_neon_vst2:
    2096             :   case Intrinsic::arm_neon_vst3:
    2097             :   case Intrinsic::arm_neon_vst4:
    2098             :   case Intrinsic::arm_neon_vst2lane:
    2099             :   case Intrinsic::arm_neon_vst3lane:
    2100             :   case Intrinsic::arm_neon_vst4lane: {
    2101          28 :     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
    2102          70 :                                  CI->arg_operands().end());
    2103          42 :     NewCall = Builder.CreateCall(NewFn, Args);
    2104             :     break;
    2105             :   }
    2106             : 
    2107           8 :   case Intrinsic::bitreverse:
    2108          32 :     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
    2109           8 :     break;
    2110             : 
    2111          54 :   case Intrinsic::ctlz:
    2112             :   case Intrinsic::cttz:
    2113             :     assert(CI->getNumArgOperands() == 1 &&
    2114             :            "Mismatch between function args and call args");
    2115          54 :     NewCall =
    2116         270 :         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
    2117          54 :     break;
    2118             : 
    2119          74 :   case Intrinsic::objectsize: {
    2120          74 :     Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
    2121          74 :                                    ? Builder.getFalse()
    2122          74 :                                    : CI->getArgOperand(2);
    2123         370 :     NewCall = Builder.CreateCall(
    2124         148 :         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
    2125          74 :     break;
    2126             :   }
    2127             : 
    2128           4 :   case Intrinsic::ctpop:
    2129          16 :     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
    2130           4 :     break;
    2131             : 
    2132           0 :   case Intrinsic::convert_from_fp16:
    2133           0 :     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
    2134           0 :     break;
    2135             : 
    2136         430 :   case Intrinsic::dbg_value:
    2137             :     // Upgrade from the old version that had an extra offset argument.
    2138             :     assert(CI->getNumArgOperands() == 4);
    2139             :     // Drop nonzero offsets instead of attempting to upgrade them.
    2140         860 :     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
    2141         430 :       if (Offset->isZeroValue()) {
    2142        2568 :         NewCall = Builder.CreateCall(
    2143             :             NewFn,
    2144        1284 :             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
    2145         428 :         break;
    2146             :       }
    2147           2 :     CI->eraseFromParent();
    2148           2 :     return;
    2149             : 
    2150           0 :   case Intrinsic::x86_xop_vfrcz_ss:
    2151             :   case Intrinsic::x86_xop_vfrcz_sd:
    2152           0 :     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
    2153           0 :     break;
    2154             : 
    2155           8 :   case Intrinsic::x86_xop_vpermil2pd:
    2156             :   case Intrinsic::x86_xop_vpermil2ps:
    2157             :   case Intrinsic::x86_xop_vpermil2pd_256:
    2158             :   case Intrinsic::x86_xop_vpermil2ps_256: {
    2159          16 :     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
    2160          40 :                                  CI->arg_operands().end());
    2161          16 :     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
    2162           8 :     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
    2163          32 :     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
    2164          24 :     NewCall = Builder.CreateCall(NewFn, Args);
    2165             :     break;
    2166             :   }
    2167             : 
    2168          12 :   case Intrinsic::x86_sse41_ptestc:
    2169             :   case Intrinsic::x86_sse41_ptestz:
    2170             :   case Intrinsic::x86_sse41_ptestnzc: {
    2171             :     // The arguments for these intrinsics used to be v4f32, and changed
    2172             :     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
    2173             :     // So, the only thing required is a bitcast for both arguments.
    2174             :     // First, check the arguments have the old type.
    2175          12 :     Value *Arg0 = CI->getArgOperand(0);
    2176          12 :     if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
    2177             :       return;
    2178             : 
    2179             :     // Old intrinsic, add bitcasts
    2180          12 :     Value *Arg1 = CI->getArgOperand(1);
    2181             : 
    2182          12 :     Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
    2183             : 
    2184          24 :     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
    2185          24 :     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
    2186             : 
    2187          36 :     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
    2188          12 :     break;
    2189             :   }
    2190             : 
    2191          47 :   case Intrinsic::x86_sse41_insertps:
    2192             :   case Intrinsic::x86_sse41_dppd:
    2193             :   case Intrinsic::x86_sse41_dpps:
    2194             :   case Intrinsic::x86_sse41_mpsadbw:
    2195             :   case Intrinsic::x86_avx_dp_ps_256:
    2196             :   case Intrinsic::x86_avx2_mpsadbw: {
    2197             :     // Need to truncate the last argument from i32 to i8 -- this argument models
    2198             :     // an inherently 8-bit immediate operand to these x86 instructions.
    2199          94 :     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
    2200         235 :                                  CI->arg_operands().end());
    2201             : 
    2202             :     // Replace the last argument with a trunc.
    2203         188 :     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
    2204         141 :     NewCall = Builder.CreateCall(NewFn, Args);
    2205             :     break;
    2206             :   }
    2207             : 
    2208           2 :   case Intrinsic::thread_pointer: {
    2209           4 :     NewCall = Builder.CreateCall(NewFn, {});
    2210           2 :     break;
    2211             :   }
    2212             : 
    2213          35 :   case Intrinsic::invariant_start:
    2214             :   case Intrinsic::invariant_end:
    2215             :   case Intrinsic::masked_load:
    2216             :   case Intrinsic::masked_store:
    2217             :   case Intrinsic::masked_gather:
    2218             :   case Intrinsic::masked_scatter: {
    2219          70 :     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
    2220         175 :                                  CI->arg_operands().end());
    2221         105 :     NewCall = Builder.CreateCall(NewFn, Args);
    2222             :     break;
    2223             :   }
    2224             :   }
    2225             :   assert(NewCall && "Should have either set this variable or returned through "
    2226             :                     "the default case");
    2227        2058 :   std::string Name = CI->getName();
    2228         686 :   if (!Name.empty()) {
    2229         342 :     CI->setName(Name + ".old");
    2230         228 :     NewCall->setName(Name);
    2231             :   }
    2232         686 :   CI->replaceAllUsesWith(NewCall);
    2233         686 :   CI->eraseFromParent();
    2234             : }
    2235             : 
    2236      233510 : void llvm::UpgradeCallsToIntrinsic(Function *F) {
    2237             :   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
    2238             : 
    2239             :   // Check if this function should be upgraded and get the replacement function
    2240             :   // if there is one.
    2241             :   Function *NewFn;
    2242      233510 :   if (UpgradeIntrinsicFunction(F, NewFn)) {
    2243             :     // Replace all users of the old function with the new function or new
    2244             :     // instructions. This is not a range loop because the call is deleted.
    2245       12738 :     for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
    2246       17864 :       if (CallInst *CI = dyn_cast<CallInst>(*UI++))
    2247        4466 :         UpgradeIntrinsicCall(CI, NewFn);
    2248             : 
    2249             :     // Remove old function, no longer used, from the module.
    2250        2068 :     F->eraseFromParent();
    2251             :   }
    2252      233510 : }
    2253             : 
    2254        3364 : MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
    2255             :   // Check if the tag uses struct-path aware TBAA format.
    2256        6485 :   if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
    2257             :     return &MD;
    2258             : 
    2259         243 :   auto &Context = MD.getContext();
    2260         243 :   if (MD.getNumOperands() == 3) {
    2261         128 :     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
    2262          64 :     MDNode *ScalarType = MDNode::get(Context, Elts);
    2263             :     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
    2264             :     Metadata *Elts2[] = {ScalarType, ScalarType,
    2265          64 :                          ConstantAsMetadata::get(
    2266          32 :                              Constant::getNullValue(Type::getInt64Ty(Context))),
    2267         128 :                          MD.getOperand(2)};
    2268          64 :     return MDNode::get(Context, Elts2);
    2269             :   }
    2270             :   // Create a MDNode <MD, MD, offset 0>
    2271         422 :   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
    2272         422 :                                     Type::getInt64Ty(Context)))};
    2273         422 :   return MDNode::get(Context, Elts);
    2274             : }
    2275             : 
    2276        5981 : Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
    2277             :                                       Instruction *&Temp) {
    2278        5981 :   if (Opc != Instruction::BitCast)
    2279             :     return nullptr;
    2280             : 
    2281        4792 :   Temp = nullptr;
    2282        4792 :   Type *SrcTy = V->getType();
    2283       12536 :   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
    2284        7744 :       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
    2285           0 :     LLVMContext &Context = V->getContext();
    2286             : 
    2287             :     // We have no information about target data layout, so we assume that
    2288             :     // the maximum pointer size is 64bit.
    2289           0 :     Type *MidTy = Type::getInt64Ty(Context);
    2290           0 :     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
    2291             : 
    2292           0 :     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
    2293             :   }
    2294             : 
    2295             :   return nullptr;
    2296             : }
    2297             : 
    2298        1489 : Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
    2299        1489 :   if (Opc != Instruction::BitCast)
    2300             :     return nullptr;
    2301             : 
    2302        1262 :   Type *SrcTy = C->getType();
    2303        3738 :   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
    2304        2476 :       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
    2305           0 :     LLVMContext &Context = C->getContext();
    2306             : 
    2307             :     // We have no information about target data layout, so we assume that
    2308             :     // the maximum pointer size is 64bit.
    2309           0 :     Type *MidTy = Type::getInt64Ty(Context);
    2310             : 
    2311           0 :     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
    2312           0 :                                      DestTy);
    2313             :   }
    2314             : 
    2315             :   return nullptr;
    2316             : }
    2317             : 
    2318             : /// Check the debug info version number, if it is out-dated, drop the debug
    2319             : /// info. Return true if module is modified.
    2320       31088 : bool llvm::UpgradeDebugInfo(Module &M) {
    2321       31088 :   unsigned Version = getDebugMetadataVersionFromModule(M);
    2322       31088 :   if (Version == DEBUG_METADATA_VERSION)
    2323             :     return false;
    2324             : 
    2325       29481 :   bool RetCode = StripDebugInfo(M);
    2326       29481 :   if (RetCode) {
    2327         108 :     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
    2328          54 :     M.getContext().diagnose(DiagVersion);
    2329             :   }
    2330             :   return RetCode;
    2331             : }
    2332             : 
    2333       30171 : bool llvm::UpgradeModuleFlags(Module &M) {
    2334       30171 :   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
    2335       30171 :   if (!ModFlags)
    2336             :     return false;
    2337             : 
    2338        2036 :   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
    2339        5674 :   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
    2340        3638 :     MDNode *Op = ModFlags->getOperand(I);
    2341        3638 :     if (Op->getNumOperands() != 3)
    2342           1 :       continue;
    2343        7271 :     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
    2344           3 :     if (!ID)
    2345           3 :       continue;
    2346        3666 :     if (ID->getString() == "Objective-C Image Info Version")
    2347             :       HasObjCFlag = true;
    2348        3648 :     if (ID->getString() == "Objective-C Class Properties")
    2349             :       HasClassProperties = true;
    2350             :     // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
    2351             :     // field was Error and now they are Max.
    2352        7004 :     if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
    2353             :       if (auto *Behavior =
    2354         594 :               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
    2355         297 :         if (Behavior->getLimitedValue() == Module::Error) {
    2356         255 :           Type *Int32Ty = Type::getInt32Ty(M.getContext());
    2357             :           Metadata *Ops[3] = {
    2358         510 :               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
    2359         255 :               MDString::get(M.getContext(), ID->getString()),
    2360        1020 :               Op->getOperand(2)};
    2361         510 :           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
    2362         255 :           Changed = true;
    2363             :         }
    2364             :       }
    2365             :     }
    2366             :   }
    2367             : 
    2368             :   // "Objective-C Class Properties" is recently added for Objective-C. We
    2369             :   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
    2370             :   // flag of value 0, so we can correclty downgrade this flag when trying to
    2371             :   // link an ObjC bitcode without this module flag with an ObjC bitcode with
    2372             :   // this module flag.
    2373        2036 :   if (HasObjCFlag && !HasClassProperties) {
    2374          18 :     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
    2375             :                     (uint32_t)0);
    2376          18 :     Changed = true;
    2377             :   }
    2378             : 
    2379             :   return Changed;
    2380             : }
    2381             : 
    2382           4 : static bool isOldLoopArgument(Metadata *MD) {
    2383           4 :   auto *T = dyn_cast_or_null<MDTuple>(MD);
    2384             :   if (!T)
    2385             :     return false;
    2386           4 :   if (T->getNumOperands() < 1)
    2387             :     return false;
    2388          10 :   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
    2389             :   if (!S)
    2390             :     return false;
    2391           4 :   return S->getString().startswith("llvm.vectorizer.");
    2392             : }
    2393             : 
    2394           8 : static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
    2395           8 :   StringRef OldPrefix = "llvm.vectorizer.";
    2396             :   assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
    2397             : 
    2398          10 :   if (OldTag == "llvm.vectorizer.unroll")
    2399           2 :     return MDString::get(C, "llvm.loop.interleave.count");
    2400             : 
    2401          12 :   return MDString::get(
    2402          36 :       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
    2403          18 :              .str());
    2404             : }
    2405             : 
    2406          10 : static Metadata *upgradeLoopArgument(Metadata *MD) {
    2407          10 :   auto *T = dyn_cast_or_null<MDTuple>(MD);
    2408             :   if (!T)
    2409             :     return MD;
    2410          10 :   if (T->getNumOperands() < 1)
    2411             :     return MD;
    2412          28 :   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
    2413             :   if (!OldTag)
    2414             :     return MD;
    2415           8 :   if (!OldTag->getString().startswith("llvm.vectorizer."))
    2416             :     return MD;
    2417             : 
    2418             :   // This has an old tag.  Upgrade it.
    2419           8 :   SmallVector<Metadata *, 8> Ops;
    2420           8 :   Ops.reserve(T->getNumOperands());
    2421          16 :   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
    2422          16 :   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
    2423          24 :     Ops.push_back(T->getOperand(I));
    2424             : 
    2425          24 :   return MDTuple::get(T->getContext(), Ops);
    2426             : }
    2427             : 
    2428           2 : MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
    2429           2 :   auto *T = dyn_cast<MDTuple>(&N);
    2430             :   if (!T)
    2431             :     return &N;
    2432             : 
    2433           6 :   if (none_of(T->operands(), isOldLoopArgument))
    2434             :     return &N;
    2435             : 
    2436           2 :   SmallVector<Metadata *, 8> Ops;
    2437           2 :   Ops.reserve(T->getNumOperands());
    2438          24 :   for (Metadata *MD : T->operands())
    2439          10 :     Ops.push_back(upgradeLoopArgument(MD));
    2440             : 
    2441           6 :   return MDTuple::get(T->getContext(), Ops);
    2442             : }

Generated by: LCOV version 1.13