LLVM  mainline
AutoUpgrade.cpp
Go to the documentation of this file.
00001 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the auto-upgrade helper functions.
00011 // This is where deprecated IR intrinsics and other IR features are updated to
00012 // current specifications.
00013 //
00014 //===----------------------------------------------------------------------===//
00015 
00016 #include "llvm/IR/AutoUpgrade.h"
00017 #include "llvm/IR/CFG.h"
00018 #include "llvm/IR/CallSite.h"
00019 #include "llvm/IR/Constants.h"
00020 #include "llvm/IR/DIBuilder.h"
00021 #include "llvm/IR/DebugInfo.h"
00022 #include "llvm/IR/DiagnosticInfo.h"
00023 #include "llvm/IR/Function.h"
00024 #include "llvm/IR/IRBuilder.h"
00025 #include "llvm/IR/Instruction.h"
00026 #include "llvm/IR/IntrinsicInst.h"
00027 #include "llvm/IR/LLVMContext.h"
00028 #include "llvm/IR/Module.h"
00029 #include "llvm/Support/ErrorHandling.h"
00030 #include "llvm/Support/Regex.h"
00031 #include <cstring>
00032 using namespace llvm;
00033 
00034 // Upgrade the declarations of the SSE4.1 functions whose arguments have
00035 // changed their type from v4f32 to v2i64.
00036 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
00037                                  Function *&NewFn) {
00038   // Check whether this is an old version of the function, which received
00039   // v4f32 arguments.
00040   Type *Arg0Type = F->getFunctionType()->getParamType(0);
00041   if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
00042     return false;
00043 
00044   // Yes, it's old, replace it with new version.
00045   F->setName(F->getName() + ".old");
00046   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
00047   return true;
00048 }
00049 
00050 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
00051 // arguments have changed their type from i32 to i8.
00052 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
00053                                              Function *&NewFn) {
00054   // Check that the last argument is an i32.
00055   Type *LastArgType = F->getFunctionType()->getParamType(
00056      F->getFunctionType()->getNumParams() - 1);
00057   if (!LastArgType->isIntegerTy(32))
00058     return false;
00059 
00060   // Move this function aside and map down.
00061   F->setName(F->getName() + ".old");
00062   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
00063   return true;
00064 }
00065 
00066 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
00067   assert(F && "Illegal to upgrade a non-existent Function.");
00068 
00069   // Quickly eliminate it, if it's not a candidate.
00070   StringRef Name = F->getName();
00071   if (Name.size() <= 8 || !Name.startswith("llvm."))
00072     return false;
00073   Name = Name.substr(5); // Strip off "llvm."
00074 
00075   switch (Name[0]) {
00076   default: break;
00077   case 'a': {
00078     if (Name.startswith("arm.neon.vclz")) {
00079       Type* args[2] = {
00080         F->arg_begin()->getType(),
00081         Type::getInt1Ty(F->getContext())
00082       };
00083       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
00084       // the end of the name. Change name from llvm.arm.neon.vclz.* to
00085       //  llvm.ctlz.*
00086       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
00087       NewFn = Function::Create(fType, F->getLinkage(),
00088                                "llvm.ctlz." + Name.substr(14), F->getParent());
00089       return true;
00090     }
00091     if (Name.startswith("arm.neon.vcnt")) {
00092       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
00093                                         F->arg_begin()->getType());
00094       return true;
00095     }
00096     Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
00097     if (vldRegex.match(Name)) {
00098       auto fArgs = F->getFunctionType()->params();
00099       SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
00100       // Can't use Intrinsic::getDeclaration here as the return types might
00101       // then only be structurally equal.
00102       FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
00103       NewFn = Function::Create(fType, F->getLinkage(),
00104                                "llvm." + Name + ".p0i8", F->getParent());
00105       return true;
00106     }
00107     Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
00108     if (vstRegex.match(Name)) {
00109       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
00110                                                 Intrinsic::arm_neon_vst2,
00111                                                 Intrinsic::arm_neon_vst3,
00112                                                 Intrinsic::arm_neon_vst4};
00113 
00114       static const Intrinsic::ID StoreLaneInts[] = {
00115         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
00116         Intrinsic::arm_neon_vst4lane
00117       };
00118 
00119       auto fArgs = F->getFunctionType()->params();
00120       Type *Tys[] = {fArgs[0], fArgs[1]};
00121       if (Name.find("lane") == StringRef::npos)
00122         NewFn = Intrinsic::getDeclaration(F->getParent(),
00123                                           StoreInts[fArgs.size() - 3], Tys);
00124       else
00125         NewFn = Intrinsic::getDeclaration(F->getParent(),
00126                                           StoreLaneInts[fArgs.size() - 5], Tys);
00127       return true;
00128     }
00129     break;
00130   }
00131 
00132   case 'c': {
00133     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
00134       F->setName(Name + ".old");
00135       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
00136                                         F->arg_begin()->getType());
00137       return true;
00138     }
00139     if (Name.startswith("cttz.") && F->arg_size() == 1) {
00140       F->setName(Name + ".old");
00141       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
00142                                         F->arg_begin()->getType());
00143       return true;
00144     }
00145     break;
00146   }
00147 
00148   case 'o':
00149     // We only need to change the name to match the mangling including the
00150     // address space.
00151     if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
00152       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
00153       if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
00154         F->setName(Name + ".old");
00155         NewFn = Intrinsic::getDeclaration(F->getParent(),
00156                                           Intrinsic::objectsize, Tys);
00157         return true;
00158       }
00159     }
00160     break;
00161 
00162   case 'x': {
00163     if (Name.startswith("x86.sse2.pcmpeq.") ||
00164         Name.startswith("x86.sse2.pcmpgt.") ||
00165         Name.startswith("x86.avx2.pcmpeq.") ||
00166         Name.startswith("x86.avx2.pcmpgt.") ||
00167         Name.startswith("x86.avx2.vbroadcast") ||
00168         Name.startswith("x86.avx2.pbroadcast") ||
00169         Name.startswith("x86.avx.vpermil.") ||
00170         Name.startswith("x86.sse41.pmovsx") ||
00171         Name == "x86.avx.vinsertf128.pd.256" ||
00172         Name == "x86.avx.vinsertf128.ps.256" ||
00173         Name == "x86.avx.vinsertf128.si.256" ||
00174         Name == "x86.avx2.vinserti128" ||
00175         Name == "x86.avx.vextractf128.pd.256" ||
00176         Name == "x86.avx.vextractf128.ps.256" ||
00177         Name == "x86.avx.vextractf128.si.256" ||
00178         Name == "x86.avx2.vextracti128" ||
00179         Name == "x86.avx.movnt.dq.256" ||
00180         Name == "x86.avx.movnt.pd.256" ||
00181         Name == "x86.avx.movnt.ps.256" ||
00182         Name == "x86.sse42.crc32.64.8" ||
00183         Name == "x86.avx.vbroadcast.ss" ||
00184         Name == "x86.avx.vbroadcast.ss.256" ||
00185         Name == "x86.avx.vbroadcast.sd.256" ||
00186         Name == "x86.sse2.psll.dq" ||
00187         Name == "x86.sse2.psrl.dq" ||
00188         Name == "x86.avx2.psll.dq" ||
00189         Name == "x86.avx2.psrl.dq" ||
00190         Name == "x86.sse2.psll.dq.bs" ||
00191         Name == "x86.sse2.psrl.dq.bs" ||
00192         Name == "x86.avx2.psll.dq.bs" ||
00193         Name == "x86.avx2.psrl.dq.bs" ||
00194         Name == "x86.sse41.pblendw" ||
00195         Name == "x86.sse41.blendpd" ||
00196         Name == "x86.sse41.blendps" ||
00197         Name == "x86.avx.blend.pd.256" ||
00198         Name == "x86.avx.blend.ps.256" ||
00199         Name == "x86.avx2.pblendw" ||
00200         Name == "x86.avx2.pblendd.128" ||
00201         Name == "x86.avx2.pblendd.256" ||
00202         Name == "x86.avx2.vbroadcasti128" ||
00203         Name == "x86.xop.vpcmov" ||
00204         (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
00205       NewFn = nullptr;
00206       return true;
00207     }
00208     // SSE4.1 ptest functions may have an old signature.
00209     if (Name.startswith("x86.sse41.ptest")) {
00210       if (Name == "x86.sse41.ptestc")
00211         return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
00212       if (Name == "x86.sse41.ptestz")
00213         return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
00214       if (Name == "x86.sse41.ptestnzc")
00215         return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
00216     }
00217     // Several blend and other instructions with masks used the wrong number of
00218     // bits.
00219     if (Name == "x86.sse41.insertps")
00220       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
00221                                               NewFn);
00222     if (Name == "x86.sse41.dppd")
00223       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
00224                                               NewFn);
00225     if (Name == "x86.sse41.dpps")
00226       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
00227                                               NewFn);
00228     if (Name == "x86.sse41.mpsadbw")
00229       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
00230                                               NewFn);
00231     if (Name == "x86.avx.dp.ps.256")
00232       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
00233                                               NewFn);
00234     if (Name == "x86.avx2.mpsadbw")
00235       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
00236                                               NewFn);
00237 
00238     // frcz.ss/sd may need to have an argument dropped
00239     if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
00240       F->setName(Name + ".old");
00241       NewFn = Intrinsic::getDeclaration(F->getParent(),
00242                                         Intrinsic::x86_xop_vfrcz_ss);
00243       return true;
00244     }
00245     if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
00246       F->setName(Name + ".old");
00247       NewFn = Intrinsic::getDeclaration(F->getParent(),
00248                                         Intrinsic::x86_xop_vfrcz_sd);
00249       return true;
00250     }
00251     // Fix the FMA4 intrinsics to remove the 4
00252     if (Name.startswith("x86.fma4.")) {
00253       F->setName("llvm.x86.fma" + Name.substr(8));
00254       NewFn = F;
00255       return true;
00256     }
00257     break;
00258   }
00259   }
00260 
00261   //  This may not belong here. This function is effectively being overloaded
00262   //  to both detect an intrinsic which needs upgrading, and to provide the
00263   //  upgraded form of the intrinsic. We should perhaps have two separate
00264   //  functions for this.
00265   return false;
00266 }
00267 
00268 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
00269   NewFn = nullptr;
00270   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
00271   assert(F != NewFn && "Intrinsic function upgraded to the same function");
00272 
00273   // Upgrade intrinsic attributes.  This does not change the function.
00274   if (NewFn)
00275     F = NewFn;
00276   if (Intrinsic::ID id = F->getIntrinsicID())
00277     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
00278   return Upgraded;
00279 }
00280 
00281 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
00282   // Nothing to do yet.
00283   return false;
00284 }
00285 
00286 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
00287 // to byte shuffles.
00288 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
00289                                          Value *Op, unsigned NumLanes,
00290                                          unsigned Shift) {
00291   // Each lane is 16 bytes.
00292   unsigned NumElts = NumLanes * 16;
00293 
00294   // Bitcast from a 64-bit element type to a byte element type.
00295   Op = Builder.CreateBitCast(Op,
00296                              VectorType::get(Type::getInt8Ty(C), NumElts),
00297                              "cast");
00298   // We'll be shuffling in zeroes.
00299   Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
00300 
00301   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
00302   // we'll just return the zero vector.
00303   if (Shift < 16) {
00304     SmallVector<Constant*, 32> Idxs;
00305     // 256-bit version is split into two 16-byte lanes.
00306     for (unsigned l = 0; l != NumElts; l += 16)
00307       for (unsigned i = 0; i != 16; ++i) {
00308         unsigned Idx = NumElts + i - Shift;
00309         if (Idx < NumElts)
00310           Idx -= NumElts - 16; // end of lane, switch operand.
00311         Idxs.push_back(Builder.getInt32(Idx + l));
00312       }
00313 
00314     Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
00315   }
00316 
00317   // Bitcast back to a 64-bit element type.
00318   return Builder.CreateBitCast(Res,
00319                                VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
00320                                "cast");
00321 }
00322 
00323 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
00324 // to byte shuffles.
00325 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
00326                                          Value *Op, unsigned NumLanes,
00327                                          unsigned Shift) {
00328   // Each lane is 16 bytes.
00329   unsigned NumElts = NumLanes * 16;
00330 
00331   // Bitcast from a 64-bit element type to a byte element type.
00332   Op = Builder.CreateBitCast(Op,
00333                              VectorType::get(Type::getInt8Ty(C), NumElts),
00334                              "cast");
00335   // We'll be shuffling in zeroes.
00336   Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
00337 
00338   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
00339   // we'll just return the zero vector.
00340   if (Shift < 16) {
00341     SmallVector<Constant*, 32> Idxs;
00342     // 256-bit version is split into two 16-byte lanes.
00343     for (unsigned l = 0; l != NumElts; l += 16)
00344       for (unsigned i = 0; i != 16; ++i) {
00345         unsigned Idx = i + Shift;
00346         if (Idx >= 16)
00347           Idx += NumElts - 16; // end of lane, switch operand.
00348         Idxs.push_back(Builder.getInt32(Idx + l));
00349       }
00350 
00351     Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
00352   }
00353 
00354   // Bitcast back to a 64-bit element type.
00355   return Builder.CreateBitCast(Res,
00356                                VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
00357                                "cast");
00358 }
00359 
00360 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
00361 // upgraded intrinsic. All argument and return casting must be provided in
00362 // order to seamlessly integrate with existing context.
00363 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
00364   Function *F = CI->getCalledFunction();
00365   LLVMContext &C = CI->getContext();
00366   IRBuilder<> Builder(C);
00367   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
00368 
00369   assert(F && "Intrinsic call is not direct?");
00370 
00371   if (!NewFn) {
00372     // Get the Function's name.
00373     StringRef Name = F->getName();
00374 
00375     Value *Rep;
00376     // Upgrade packed integer vector compares intrinsics to compare instructions
00377     if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
00378         Name.startswith("llvm.x86.avx2.pcmpeq.")) {
00379       Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
00380                                  "pcmpeq");
00381       // need to sign extend since icmp returns vector of i1
00382       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
00383     } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
00384                Name.startswith("llvm.x86.avx2.pcmpgt.")) {
00385       Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
00386                                   "pcmpgt");
00387       // need to sign extend since icmp returns vector of i1
00388       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
00389     } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
00390                Name == "llvm.x86.avx.movnt.ps.256" ||
00391                Name == "llvm.x86.avx.movnt.pd.256") {
00392       IRBuilder<> Builder(C);
00393       Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
00394 
00395       Module *M = F->getParent();
00396       SmallVector<Metadata *, 1> Elts;
00397       Elts.push_back(
00398           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
00399       MDNode *Node = MDNode::get(C, Elts);
00400 
00401       Value *Arg0 = CI->getArgOperand(0);
00402       Value *Arg1 = CI->getArgOperand(1);
00403 
00404       // Convert the type of the pointer to a pointer to the stored type.
00405       Value *BC = Builder.CreateBitCast(Arg0,
00406                                         PointerType::getUnqual(Arg1->getType()),
00407                                         "cast");
00408       StoreInst *SI = Builder.CreateStore(Arg1, BC);
00409       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
00410       SI->setAlignment(32);
00411 
00412       // Remove intrinsic.
00413       CI->eraseFromParent();
00414       return;
00415     } else if (Name.startswith("llvm.x86.xop.vpcom")) {
00416       Intrinsic::ID intID;
00417       if (Name.endswith("ub"))
00418         intID = Intrinsic::x86_xop_vpcomub;
00419       else if (Name.endswith("uw"))
00420         intID = Intrinsic::x86_xop_vpcomuw;
00421       else if (Name.endswith("ud"))
00422         intID = Intrinsic::x86_xop_vpcomud;
00423       else if (Name.endswith("uq"))
00424         intID = Intrinsic::x86_xop_vpcomuq;
00425       else if (Name.endswith("b"))
00426         intID = Intrinsic::x86_xop_vpcomb;
00427       else if (Name.endswith("w"))
00428         intID = Intrinsic::x86_xop_vpcomw;
00429       else if (Name.endswith("d"))
00430         intID = Intrinsic::x86_xop_vpcomd;
00431       else if (Name.endswith("q"))
00432         intID = Intrinsic::x86_xop_vpcomq;
00433       else
00434         llvm_unreachable("Unknown suffix");
00435 
00436       Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
00437       unsigned Imm;
00438       if (Name.startswith("lt"))
00439         Imm = 0;
00440       else if (Name.startswith("le"))
00441         Imm = 1;
00442       else if (Name.startswith("gt"))
00443         Imm = 2;
00444       else if (Name.startswith("ge"))
00445         Imm = 3;
00446       else if (Name.startswith("eq"))
00447         Imm = 4;
00448       else if (Name.startswith("ne"))
00449         Imm = 5;
00450       else if (Name.startswith("false"))
00451         Imm = 6;
00452       else if (Name.startswith("true"))
00453         Imm = 7;
00454       else
00455         llvm_unreachable("Unknown condition");
00456 
00457       Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
00458       Rep =
00459           Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
00460                                      Builder.getInt8(Imm)});
00461     } else if (Name == "llvm.x86.xop.vpcmov") {
00462       Value *Arg0 = CI->getArgOperand(0);
00463       Value *Arg1 = CI->getArgOperand(1);
00464       Value *Sel = CI->getArgOperand(2);
00465       unsigned NumElts = CI->getType()->getVectorNumElements();
00466       Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1));
00467       Value *NotSel = Builder.CreateXor(Sel, MinusOne);
00468       Value *Sel0 = Builder.CreateAnd(Arg0, Sel);
00469       Value *Sel1 = Builder.CreateAnd(Arg1, NotSel);
00470       Rep = Builder.CreateOr(Sel0, Sel1);
00471     } else if (Name == "llvm.x86.sse42.crc32.64.8") {
00472       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
00473                                                Intrinsic::x86_sse42_crc32_32_8);
00474       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
00475       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
00476       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
00477     } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
00478       // Replace broadcasts with a series of insertelements.
00479       Type *VecTy = CI->getType();
00480       Type *EltTy = VecTy->getVectorElementType();
00481       unsigned EltNum = VecTy->getVectorNumElements();
00482       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
00483                                           EltTy->getPointerTo());
00484       Value *Load = Builder.CreateLoad(EltTy, Cast);
00485       Type *I32Ty = Type::getInt32Ty(C);
00486       Rep = UndefValue::get(VecTy);
00487       for (unsigned I = 0; I < EltNum; ++I)
00488         Rep = Builder.CreateInsertElement(Rep, Load,
00489                                           ConstantInt::get(I32Ty, I));
00490     } else if (Name.startswith("llvm.x86.sse41.pmovsx")) {
00491       VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
00492       VectorType *DstTy = cast<VectorType>(CI->getType());
00493       unsigned NumDstElts = DstTy->getNumElements();
00494 
00495       // Extract a subvector of the first NumDstElts lanes and sign extend.
00496       SmallVector<int, 8> ShuffleMask;
00497       for (int i = 0; i != (int)NumDstElts; ++i)
00498         ShuffleMask.push_back(i);
00499 
00500       Value *SV = Builder.CreateShuffleVector(
00501           CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
00502       Rep = Builder.CreateSExt(SV, DstTy);
00503     } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
00504       // Replace vbroadcasts with a vector shuffle.
00505       Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
00506       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
00507                                             PointerType::getUnqual(VT));
00508       Value *Load = Builder.CreateLoad(VT, Op);
00509       const int Idxs[4] = { 0, 1, 0, 1 };
00510       Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
00511                                         Idxs);
00512     } else if (Name.startswith("llvm.x86.avx2.pbroadcast") ||
00513                Name.startswith("llvm.x86.avx2.vbroadcast")) {
00514       // Replace vp?broadcasts with a vector shuffle.
00515       Value *Op = CI->getArgOperand(0);
00516       unsigned NumElts = CI->getType()->getVectorNumElements();
00517       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
00518       Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
00519                                         Constant::getNullValue(MaskTy));
00520     } else if (Name == "llvm.x86.sse2.psll.dq") {
00521       // 128-bit shift left specified in bits.
00522       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
00523       Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
00524                                        Shift / 8); // Shift is in bits.
00525     } else if (Name == "llvm.x86.sse2.psrl.dq") {
00526       // 128-bit shift right specified in bits.
00527       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
00528       Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
00529                                        Shift / 8); // Shift is in bits.
00530     } else if (Name == "llvm.x86.avx2.psll.dq") {
00531       // 256-bit shift left specified in bits.
00532       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
00533       Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
00534                                        Shift / 8); // Shift is in bits.
00535     } else if (Name == "llvm.x86.avx2.psrl.dq") {
00536       // 256-bit shift right specified in bits.
00537       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
00538       Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
00539                                        Shift / 8); // Shift is in bits.
00540     } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
00541       // 128-bit shift left specified in bytes.
00542       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
00543       Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
00544                                        Shift);
00545     } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
00546       // 128-bit shift right specified in bytes.
00547       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
00548       Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
00549                                        Shift);
00550     } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
00551       // 256-bit shift left specified in bytes.
00552       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
00553       Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
00554                                        Shift);
00555     } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
00556       // 256-bit shift right specified in bytes.
00557       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
00558       Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
00559                                        Shift);
00560     } else if (Name == "llvm.x86.sse41.pblendw" ||
00561                Name == "llvm.x86.sse41.blendpd" ||
00562                Name == "llvm.x86.sse41.blendps" ||
00563                Name == "llvm.x86.avx.blend.pd.256" ||
00564                Name == "llvm.x86.avx.blend.ps.256" ||
00565                Name == "llvm.x86.avx2.pblendw" ||
00566                Name == "llvm.x86.avx2.pblendd.128" ||
00567                Name == "llvm.x86.avx2.pblendd.256") {
00568       Value *Op0 = CI->getArgOperand(0);
00569       Value *Op1 = CI->getArgOperand(1);
00570       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
00571       VectorType *VecTy = cast<VectorType>(CI->getType());
00572       unsigned NumElts = VecTy->getNumElements();
00573 
00574       SmallVector<Constant*, 16> Idxs;
00575       for (unsigned i = 0; i != NumElts; ++i) {
00576         unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
00577         Idxs.push_back(Builder.getInt32(Idx));
00578       }
00579 
00580       Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
00581     } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
00582                Name == "llvm.x86.avx.vinsertf128.ps.256" ||
00583                Name == "llvm.x86.avx.vinsertf128.si.256" ||
00584                Name == "llvm.x86.avx2.vinserti128") {
00585       Value *Op0 = CI->getArgOperand(0);
00586       Value *Op1 = CI->getArgOperand(1);
00587       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
00588       VectorType *VecTy = cast<VectorType>(CI->getType());
00589       unsigned NumElts = VecTy->getNumElements();
00590 
00591       // Mask off the high bits of the immediate value; hardware ignores those.
00592       Imm = Imm & 1;
00593 
00594       // Extend the second operand into a vector that is twice as big.
00595       Value *UndefV = UndefValue::get(Op1->getType());
00596       SmallVector<Constant*, 8> Idxs;
00597       for (unsigned i = 0; i != NumElts; ++i) {
00598         Idxs.push_back(Builder.getInt32(i));
00599       }
00600       Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
00601 
00602       // Insert the second operand into the first operand.
00603 
00604       // Note that there is no guarantee that instruction lowering will actually
00605       // produce a vinsertf128 instruction for the created shuffles. In
00606       // particular, the 0 immediate case involves no lane changes, so it can
00607       // be handled as a blend.
00608 
00609       // Example of shuffle mask for 32-bit elements:
00610       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
00611       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
00612 
00613       SmallVector<Constant*, 8> Idxs2;
00614       // The low half of the result is either the low half of the 1st operand
00615       // or the low half of the 2nd operand (the inserted vector).
00616       for (unsigned i = 0; i != NumElts / 2; ++i) {
00617         unsigned Idx = Imm ? i : (i + NumElts);
00618         Idxs2.push_back(Builder.getInt32(Idx));
00619       }
00620       // The high half of the result is either the low half of the 2nd operand
00621       // (the inserted vector) or the high half of the 1st operand.
00622       for (unsigned i = NumElts / 2; i != NumElts; ++i) {
00623         unsigned Idx = Imm ? (i + NumElts / 2) : i;
00624         Idxs2.push_back(Builder.getInt32(Idx));
00625       }
00626       Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
00627     } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
00628                Name == "llvm.x86.avx.vextractf128.ps.256" ||
00629                Name == "llvm.x86.avx.vextractf128.si.256" ||
00630                Name == "llvm.x86.avx2.vextracti128") {
00631       Value *Op0 = CI->getArgOperand(0);
00632       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
00633       VectorType *VecTy = cast<VectorType>(CI->getType());
00634       unsigned NumElts = VecTy->getNumElements();
00635 
00636       // Mask off the high bits of the immediate value; hardware ignores those.
00637       Imm = Imm & 1;
00638 
00639       // Get indexes for either the high half or low half of the input vector.
00640       SmallVector<Constant*, 4> Idxs(NumElts);
00641       for (unsigned i = 0; i != NumElts; ++i) {
00642         unsigned Idx = Imm ? (i + NumElts) : i;
00643         Idxs[i] = Builder.getInt32(Idx);
00644       }
00645 
00646       Value *UndefV = UndefValue::get(Op0->getType());
00647       Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
00648     } else {
00649       bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
00650       if (Name == "llvm.x86.avx.vpermil.pd.256")
00651         PD256 = true;
00652       else if (Name == "llvm.x86.avx.vpermil.pd")
00653         PD128 = true;
00654       else if (Name == "llvm.x86.avx.vpermil.ps.256")
00655         PS256 = true;
00656       else if (Name == "llvm.x86.avx.vpermil.ps")
00657         PS128 = true;
00658 
00659       if (PD256 || PD128 || PS256 || PS128) {
00660         Value *Op0 = CI->getArgOperand(0);
00661         unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
00662         SmallVector<Constant*, 8> Idxs;
00663 
00664         if (PD128)
00665           for (unsigned i = 0; i != 2; ++i)
00666             Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
00667         else if (PD256)
00668           for (unsigned l = 0; l != 4; l+=2)
00669             for (unsigned i = 0; i != 2; ++i)
00670               Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
00671         else if (PS128)
00672           for (unsigned i = 0; i != 4; ++i)
00673             Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
00674         else if (PS256)
00675           for (unsigned l = 0; l != 8; l+=4)
00676             for (unsigned i = 0; i != 4; ++i)
00677               Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
00678         else
00679           llvm_unreachable("Unexpected function");
00680 
00681         Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
00682       } else {
00683         llvm_unreachable("Unknown function for CallInst upgrade.");
00684       }
00685     }
00686 
00687     CI->replaceAllUsesWith(Rep);
00688     CI->eraseFromParent();
00689     return;
00690   }
00691 
00692   std::string Name = CI->getName();
00693   if (!Name.empty())
00694     CI->setName(Name + ".old");
00695 
00696   switch (NewFn->getIntrinsicID()) {
00697   default:
00698     llvm_unreachable("Unknown function for CallInst upgrade.");
00699 
00700   case Intrinsic::arm_neon_vld1:
00701   case Intrinsic::arm_neon_vld2:
00702   case Intrinsic::arm_neon_vld3:
00703   case Intrinsic::arm_neon_vld4:
00704   case Intrinsic::arm_neon_vld2lane:
00705   case Intrinsic::arm_neon_vld3lane:
00706   case Intrinsic::arm_neon_vld4lane:
00707   case Intrinsic::arm_neon_vst1:
00708   case Intrinsic::arm_neon_vst2:
00709   case Intrinsic::arm_neon_vst3:
00710   case Intrinsic::arm_neon_vst4:
00711   case Intrinsic::arm_neon_vst2lane:
00712   case Intrinsic::arm_neon_vst3lane:
00713   case Intrinsic::arm_neon_vst4lane: {
00714     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
00715                                  CI->arg_operands().end());
00716     CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
00717     CI->eraseFromParent();
00718     return;
00719   }
00720 
00721   case Intrinsic::ctlz:
00722   case Intrinsic::cttz:
00723     assert(CI->getNumArgOperands() == 1 &&
00724            "Mismatch between function args and call args");
00725     CI->replaceAllUsesWith(Builder.CreateCall(
00726         NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
00727     CI->eraseFromParent();
00728     return;
00729 
00730   case Intrinsic::objectsize:
00731     CI->replaceAllUsesWith(Builder.CreateCall(
00732         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
00733     CI->eraseFromParent();
00734     return;
00735 
00736   case Intrinsic::ctpop: {
00737     CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
00738     CI->eraseFromParent();
00739     return;
00740   }
00741 
00742   case Intrinsic::x86_xop_vfrcz_ss:
00743   case Intrinsic::x86_xop_vfrcz_sd:
00744     CI->replaceAllUsesWith(
00745         Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
00746     CI->eraseFromParent();
00747     return;
00748 
00749   case Intrinsic::x86_sse41_ptestc:
00750   case Intrinsic::x86_sse41_ptestz:
00751   case Intrinsic::x86_sse41_ptestnzc: {
00752     // The arguments for these intrinsics used to be v4f32, and changed
00753     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
00754     // So, the only thing required is a bitcast for both arguments.
00755     // First, check the arguments have the old type.
00756     Value *Arg0 = CI->getArgOperand(0);
00757     if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
00758       return;
00759 
00760     // Old intrinsic, add bitcasts
00761     Value *Arg1 = CI->getArgOperand(1);
00762 
00763     Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
00764 
00765     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
00766     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
00767 
00768     CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
00769     CI->replaceAllUsesWith(NewCall);
00770     CI->eraseFromParent();
00771     return;
00772   }
00773 
00774   case Intrinsic::x86_sse41_insertps:
00775   case Intrinsic::x86_sse41_dppd:
00776   case Intrinsic::x86_sse41_dpps:
00777   case Intrinsic::x86_sse41_mpsadbw:
00778   case Intrinsic::x86_avx_dp_ps_256:
00779   case Intrinsic::x86_avx2_mpsadbw: {
00780     // Need to truncate the last argument from i32 to i8 -- this argument models
00781     // an inherently 8-bit immediate operand to these x86 instructions.
00782     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
00783                                  CI->arg_operands().end());
00784 
00785     // Replace the last argument with a trunc.
00786     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
00787 
00788     CallInst *NewCall = Builder.CreateCall(NewFn, Args);
00789     CI->replaceAllUsesWith(NewCall);
00790     CI->eraseFromParent();
00791     return;
00792   }
00793   }
00794 }
00795 
00796 // This tests each Function to determine if it needs upgrading. When we find
00797 // one we are interested in, we then upgrade all calls to reflect the new
00798 // function.
00799 void llvm::UpgradeCallsToIntrinsic(Function* F) {
00800   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
00801 
00802   // Upgrade the function and check if it is a totaly new function.
00803   Function *NewFn;
00804   if (UpgradeIntrinsicFunction(F, NewFn)) {
00805     // Replace all uses to the old function with the new one if necessary.
00806     for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
00807          UI != UE;) {
00808       if (CallInst *CI = dyn_cast<CallInst>(*UI++))
00809         UpgradeIntrinsicCall(CI, NewFn);
00810     }
00811     // Remove old function, no longer used, from the module.
00812     F->eraseFromParent();
00813   }
00814 }
00815 
00816 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
00817   MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
00818   assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
00819   // Check if the tag uses struct-path aware TBAA format.
00820   if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
00821     return;
00822 
00823   if (MD->getNumOperands() == 3) {
00824     Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
00825     MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
00826     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
00827     Metadata *Elts2[] = {ScalarType, ScalarType,
00828                          ConstantAsMetadata::get(Constant::getNullValue(
00829                              Type::getInt64Ty(I->getContext()))),
00830                          MD->getOperand(2)};
00831     I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
00832   } else {
00833     // Create a MDNode <MD, MD, offset 0>
00834     Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
00835                                     Type::getInt64Ty(I->getContext())))};
00836     I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
00837   }
00838 }
00839 
00840 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
00841                                       Instruction *&Temp) {
00842   if (Opc != Instruction::BitCast)
00843     return nullptr;
00844 
00845   Temp = nullptr;
00846   Type *SrcTy = V->getType();
00847   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
00848       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
00849     LLVMContext &Context = V->getContext();
00850 
00851     // We have no information about target data layout, so we assume that
00852     // the maximum pointer size is 64bit.
00853     Type *MidTy = Type::getInt64Ty(Context);
00854     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
00855 
00856     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
00857   }
00858 
00859   return nullptr;
00860 }
00861 
00862 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
00863   if (Opc != Instruction::BitCast)
00864     return nullptr;
00865 
00866   Type *SrcTy = C->getType();
00867   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
00868       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
00869     LLVMContext &Context = C->getContext();
00870 
00871     // We have no information about target data layout, so we assume that
00872     // the maximum pointer size is 64bit.
00873     Type *MidTy = Type::getInt64Ty(Context);
00874 
00875     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
00876                                      DestTy);
00877   }
00878 
00879   return nullptr;
00880 }
00881 
00882 /// Check the debug info version number, if it is out-dated, drop the debug
00883 /// info. Return true if module is modified.
00884 bool llvm::UpgradeDebugInfo(Module &M) {
00885   unsigned Version = getDebugMetadataVersionFromModule(M);
00886   if (Version == DEBUG_METADATA_VERSION)
00887     return false;
00888 
00889   bool RetCode = StripDebugInfo(M);
00890   if (RetCode) {
00891     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
00892     M.getContext().diagnose(DiagVersion);
00893   }
00894   return RetCode;
00895 }
00896 
00897 void llvm::UpgradeMDStringConstant(std::string &String) {
00898   const std::string OldPrefix = "llvm.vectorizer.";
00899   if (String == "llvm.vectorizer.unroll") {
00900     String = "llvm.loop.interleave.count";
00901   } else if (String.find(OldPrefix) == 0) {
00902     String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");
00903   }
00904 }