LLVM  mainline
ExternalFunctions.cpp
Go to the documentation of this file.
00001 //===-- ExternalFunctions.cpp - Implement External Functions --------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 //  This file contains both code to deal with invoking "external" functions, but
00011 //  also contains code that implements "exported" external functions.
00012 //
00013 //  There are currently two mechanisms for handling external functions in the
00014 //  Interpreter.  The first is to implement lle_* wrapper functions that are
00015 //  specific to well-known library functions which manually translate the
00016 //  arguments from GenericValues and make the call.  If such a wrapper does
00017 //  not exist, and libffi is available, then the Interpreter will attempt to
00018 //  invoke the function using libffi, after finding its address.
00019 //
00020 //===----------------------------------------------------------------------===//
00021 
00022 #include "Interpreter.h"
00023 #include "llvm/Config/config.h"     // Detect libffi
00024 #include "llvm/IR/DataLayout.h"
00025 #include "llvm/IR/DerivedTypes.h"
00026 #include "llvm/IR/Module.h"
00027 #include "llvm/Support/DynamicLibrary.h"
00028 #include "llvm/Support/ErrorHandling.h"
00029 #include "llvm/Support/ManagedStatic.h"
00030 #include "llvm/Support/Mutex.h"
00031 #include "llvm/Support/UniqueLock.h"
00032 #include <cmath>
00033 #include <csignal>
00034 #include <cstdio>
00035 #include <cstring>
00036 #include <map>
00037 
00038 #ifdef HAVE_FFI_CALL
00039 #ifdef HAVE_FFI_H
00040 #include <ffi.h>
00041 #define USE_LIBFFI
00042 #elif HAVE_FFI_FFI_H
00043 #include <ffi/ffi.h>
00044 #define USE_LIBFFI
00045 #endif
00046 #endif
00047 
00048 using namespace llvm;
00049 
00050 static ManagedStatic<sys::Mutex> FunctionsLock;
00051 
00052 typedef GenericValue (*ExFunc)(FunctionType *, ArrayRef<GenericValue>);
00053 static ManagedStatic<std::map<const Function *, ExFunc> > ExportedFunctions;
00054 static ManagedStatic<std::map<std::string, ExFunc> > FuncNames;
00055 
00056 #ifdef USE_LIBFFI
00057 typedef void (*RawFunc)();
00058 static ManagedStatic<std::map<const Function *, RawFunc> > RawFunctions;
00059 #endif
00060 
00061 static Interpreter *TheInterpreter;
00062 
00063 static char getTypeID(Type *Ty) {
00064   switch (Ty->getTypeID()) {
00065   case Type::VoidTyID:    return 'V';
00066   case Type::IntegerTyID:
00067     switch (cast<IntegerType>(Ty)->getBitWidth()) {
00068       case 1:  return 'o';
00069       case 8:  return 'B';
00070       case 16: return 'S';
00071       case 32: return 'I';
00072       case 64: return 'L';
00073       default: return 'N';
00074     }
00075   case Type::FloatTyID:   return 'F';
00076   case Type::DoubleTyID:  return 'D';
00077   case Type::PointerTyID: return 'P';
00078   case Type::FunctionTyID:return 'M';
00079   case Type::StructTyID:  return 'T';
00080   case Type::ArrayTyID:   return 'A';
00081   default: return 'U';
00082   }
00083 }
00084 
00085 // Try to find address of external function given a Function object.
00086 // Please note, that interpreter doesn't know how to assemble a
00087 // real call in general case (this is JIT job), that's why it assumes,
00088 // that all external functions has the same (and pretty "general") signature.
00089 // The typical example of such functions are "lle_X_" ones.
00090 static ExFunc lookupFunction(const Function *F) {
00091   // Function not found, look it up... start by figuring out what the
00092   // composite function name should be.
00093   std::string ExtName = "lle_";
00094   FunctionType *FT = F->getFunctionType();
00095   for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i)
00096     ExtName += getTypeID(FT->getContainedType(i));
00097   ExtName += ("_" + F->getName()).str();
00098 
00099   sys::ScopedLock Writer(*FunctionsLock);
00100   ExFunc FnPtr = (*FuncNames)[ExtName];
00101   if (!FnPtr)
00102     FnPtr = (*FuncNames)[("lle_X_" + F->getName()).str()];
00103   if (!FnPtr)  // Try calling a generic function... if it exists...
00104     FnPtr = (ExFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol(
00105         ("lle_X_" + F->getName()).str());
00106   if (FnPtr)
00107     ExportedFunctions->insert(std::make_pair(F, FnPtr));  // Cache for later
00108   return FnPtr;
00109 }
00110 
00111 #ifdef USE_LIBFFI
00112 static ffi_type *ffiTypeFor(Type *Ty) {
00113   switch (Ty->getTypeID()) {
00114     case Type::VoidTyID: return &ffi_type_void;
00115     case Type::IntegerTyID:
00116       switch (cast<IntegerType>(Ty)->getBitWidth()) {
00117         case 8:  return &ffi_type_sint8;
00118         case 16: return &ffi_type_sint16;
00119         case 32: return &ffi_type_sint32;
00120         case 64: return &ffi_type_sint64;
00121       }
00122     case Type::FloatTyID:   return &ffi_type_float;
00123     case Type::DoubleTyID:  return &ffi_type_double;
00124     case Type::PointerTyID: return &ffi_type_pointer;
00125     default: break;
00126   }
00127   // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
00128   report_fatal_error("Type could not be mapped for use with libffi.");
00129   return NULL;
00130 }
00131 
00132 static void *ffiValueFor(Type *Ty, const GenericValue &AV,
00133                          void *ArgDataPtr) {
00134   switch (Ty->getTypeID()) {
00135     case Type::IntegerTyID:
00136       switch (cast<IntegerType>(Ty)->getBitWidth()) {
00137         case 8: {
00138           int8_t *I8Ptr = (int8_t *) ArgDataPtr;
00139           *I8Ptr = (int8_t) AV.IntVal.getZExtValue();
00140           return ArgDataPtr;
00141         }
00142         case 16: {
00143           int16_t *I16Ptr = (int16_t *) ArgDataPtr;
00144           *I16Ptr = (int16_t) AV.IntVal.getZExtValue();
00145           return ArgDataPtr;
00146         }
00147         case 32: {
00148           int32_t *I32Ptr = (int32_t *) ArgDataPtr;
00149           *I32Ptr = (int32_t) AV.IntVal.getZExtValue();
00150           return ArgDataPtr;
00151         }
00152         case 64: {
00153           int64_t *I64Ptr = (int64_t *) ArgDataPtr;
00154           *I64Ptr = (int64_t) AV.IntVal.getZExtValue();
00155           return ArgDataPtr;
00156         }
00157       }
00158     case Type::FloatTyID: {
00159       float *FloatPtr = (float *) ArgDataPtr;
00160       *FloatPtr = AV.FloatVal;
00161       return ArgDataPtr;
00162     }
00163     case Type::DoubleTyID: {
00164       double *DoublePtr = (double *) ArgDataPtr;
00165       *DoublePtr = AV.DoubleVal;
00166       return ArgDataPtr;
00167     }
00168     case Type::PointerTyID: {
00169       void **PtrPtr = (void **) ArgDataPtr;
00170       *PtrPtr = GVTOP(AV);
00171       return ArgDataPtr;
00172     }
00173     default: break;
00174   }
00175   // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
00176   report_fatal_error("Type value could not be mapped for use with libffi.");
00177   return NULL;
00178 }
00179 
00180 static bool ffiInvoke(RawFunc Fn, Function *F, ArrayRef<GenericValue> ArgVals,
00181                       const DataLayout *TD, GenericValue &Result) {
00182   ffi_cif cif;
00183   FunctionType *FTy = F->getFunctionType();
00184   const unsigned NumArgs = F->arg_size();
00185 
00186   // TODO: We don't have type information about the remaining arguments, because
00187   // this information is never passed into ExecutionEngine::runFunction().
00188   if (ArgVals.size() > NumArgs && F->isVarArg()) {
00189     report_fatal_error("Calling external var arg function '" + F->getName()
00190                       + "' is not supported by the Interpreter.");
00191   }
00192 
00193   unsigned ArgBytes = 0;
00194 
00195   std::vector<ffi_type*> args(NumArgs);
00196   for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end();
00197        A != E; ++A) {
00198     const unsigned ArgNo = A->getArgNo();
00199     Type *ArgTy = FTy->getParamType(ArgNo);
00200     args[ArgNo] = ffiTypeFor(ArgTy);
00201     ArgBytes += TD->getTypeStoreSize(ArgTy);
00202   }
00203 
00204   SmallVector<uint8_t, 128> ArgData;
00205   ArgData.resize(ArgBytes);
00206   uint8_t *ArgDataPtr = ArgData.data();
00207   SmallVector<void*, 16> values(NumArgs);
00208   for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end();
00209        A != E; ++A) {
00210     const unsigned ArgNo = A->getArgNo();
00211     Type *ArgTy = FTy->getParamType(ArgNo);
00212     values[ArgNo] = ffiValueFor(ArgTy, ArgVals[ArgNo], ArgDataPtr);
00213     ArgDataPtr += TD->getTypeStoreSize(ArgTy);
00214   }
00215 
00216   Type *RetTy = FTy->getReturnType();
00217   ffi_type *rtype = ffiTypeFor(RetTy);
00218 
00219   if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, &args[0]) == FFI_OK) {
00220     SmallVector<uint8_t, 128> ret;
00221     if (RetTy->getTypeID() != Type::VoidTyID)
00222       ret.resize(TD->getTypeStoreSize(RetTy));
00223     ffi_call(&cif, Fn, ret.data(), values.data());
00224     switch (RetTy->getTypeID()) {
00225       case Type::IntegerTyID:
00226         switch (cast<IntegerType>(RetTy)->getBitWidth()) {
00227           case 8:  Result.IntVal = APInt(8 , *(int8_t *) ret.data()); break;
00228           case 16: Result.IntVal = APInt(16, *(int16_t*) ret.data()); break;
00229           case 32: Result.IntVal = APInt(32, *(int32_t*) ret.data()); break;
00230           case 64: Result.IntVal = APInt(64, *(int64_t*) ret.data()); break;
00231         }
00232         break;
00233       case Type::FloatTyID:   Result.FloatVal   = *(float *) ret.data(); break;
00234       case Type::DoubleTyID:  Result.DoubleVal  = *(double*) ret.data(); break;
00235       case Type::PointerTyID: Result.PointerVal = *(void **) ret.data(); break;
00236       default: break;
00237     }
00238     return true;
00239   }
00240 
00241   return false;
00242 }
00243 #endif // USE_LIBFFI
00244 
00245 GenericValue Interpreter::callExternalFunction(Function *F,
00246                                                ArrayRef<GenericValue> ArgVals) {
00247   TheInterpreter = this;
00248 
00249   unique_lock<sys::Mutex> Guard(*FunctionsLock);
00250 
00251   // Do a lookup to see if the function is in our cache... this should just be a
00252   // deferred annotation!
00253   std::map<const Function *, ExFunc>::iterator FI = ExportedFunctions->find(F);
00254   if (ExFunc Fn = (FI == ExportedFunctions->end()) ? lookupFunction(F)
00255                                                    : FI->second) {
00256     Guard.unlock();
00257     return Fn(F->getFunctionType(), ArgVals);
00258   }
00259 
00260 #ifdef USE_LIBFFI
00261   std::map<const Function *, RawFunc>::iterator RF = RawFunctions->find(F);
00262   RawFunc RawFn;
00263   if (RF == RawFunctions->end()) {
00264     RawFn = (RawFunc)(intptr_t)
00265       sys::DynamicLibrary::SearchForAddressOfSymbol(F->getName());
00266     if (!RawFn)
00267       RawFn = (RawFunc)(intptr_t)getPointerToGlobalIfAvailable(F);
00268     if (RawFn != 0)
00269       RawFunctions->insert(std::make_pair(F, RawFn));  // Cache for later
00270   } else {
00271     RawFn = RF->second;
00272   }
00273 
00274   Guard.unlock();
00275 
00276   GenericValue Result;
00277   if (RawFn != 0 && ffiInvoke(RawFn, F, ArgVals, getDataLayout(), Result))
00278     return Result;
00279 #endif // USE_LIBFFI
00280 
00281   if (F->getName() == "__main")
00282     errs() << "Tried to execute an unknown external function: "
00283       << *F->getType() << " __main\n";
00284   else
00285     report_fatal_error("Tried to execute an unknown external function: " +
00286                        F->getName());
00287 #ifndef USE_LIBFFI
00288   errs() << "Recompiling LLVM with --enable-libffi might help.\n";
00289 #endif
00290   return GenericValue();
00291 }
00292 
00293 
00294 //===----------------------------------------------------------------------===//
00295 //  Functions "exported" to the running application...
00296 //
00297 
00298 // void atexit(Function*)
00299 static GenericValue lle_X_atexit(FunctionType *FT,
00300                                  ArrayRef<GenericValue> Args) {
00301   assert(Args.size() == 1);
00302   TheInterpreter->addAtExitHandler((Function*)GVTOP(Args[0]));
00303   GenericValue GV;
00304   GV.IntVal = 0;
00305   return GV;
00306 }
00307 
00308 // void exit(int)
00309 static GenericValue lle_X_exit(FunctionType *FT, ArrayRef<GenericValue> Args) {
00310   TheInterpreter->exitCalled(Args[0]);
00311   return GenericValue();
00312 }
00313 
00314 // void abort(void)
00315 static GenericValue lle_X_abort(FunctionType *FT, ArrayRef<GenericValue> Args) {
00316   //FIXME: should we report or raise here?
00317   //report_fatal_error("Interpreted program raised SIGABRT");
00318   raise (SIGABRT);
00319   return GenericValue();
00320 }
00321 
00322 // int sprintf(char *, const char *, ...) - a very rough implementation to make
00323 // output useful.
00324 static GenericValue lle_X_sprintf(FunctionType *FT,
00325                                   ArrayRef<GenericValue> Args) {
00326   char *OutputBuffer = (char *)GVTOP(Args[0]);
00327   const char *FmtStr = (const char *)GVTOP(Args[1]);
00328   unsigned ArgNo = 2;
00329 
00330   // printf should return # chars printed.  This is completely incorrect, but
00331   // close enough for now.
00332   GenericValue GV;
00333   GV.IntVal = APInt(32, strlen(FmtStr));
00334   while (1) {
00335     switch (*FmtStr) {
00336     case 0: return GV;             // Null terminator...
00337     default:                       // Normal nonspecial character
00338       sprintf(OutputBuffer++, "%c", *FmtStr++);
00339       break;
00340     case '\\': {                   // Handle escape codes
00341       sprintf(OutputBuffer, "%c%c", *FmtStr, *(FmtStr+1));
00342       FmtStr += 2; OutputBuffer += 2;
00343       break;
00344     }
00345     case '%': {                    // Handle format specifiers
00346       char FmtBuf[100] = "", Buffer[1000] = "";
00347       char *FB = FmtBuf;
00348       *FB++ = *FmtStr++;
00349       char Last = *FB++ = *FmtStr++;
00350       unsigned HowLong = 0;
00351       while (Last != 'c' && Last != 'd' && Last != 'i' && Last != 'u' &&
00352              Last != 'o' && Last != 'x' && Last != 'X' && Last != 'e' &&
00353              Last != 'E' && Last != 'g' && Last != 'G' && Last != 'f' &&
00354              Last != 'p' && Last != 's' && Last != '%') {
00355         if (Last == 'l' || Last == 'L') HowLong++;  // Keep track of l's
00356         Last = *FB++ = *FmtStr++;
00357       }
00358       *FB = 0;
00359 
00360       switch (Last) {
00361       case '%':
00362         memcpy(Buffer, "%", 2); break;
00363       case 'c':
00364         sprintf(Buffer, FmtBuf, uint32_t(Args[ArgNo++].IntVal.getZExtValue()));
00365         break;
00366       case 'd': case 'i':
00367       case 'u': case 'o':
00368       case 'x': case 'X':
00369         if (HowLong >= 1) {
00370           if (HowLong == 1 &&
00371               TheInterpreter->getDataLayout()->getPointerSizeInBits() == 64 &&
00372               sizeof(long) < sizeof(int64_t)) {
00373             // Make sure we use %lld with a 64 bit argument because we might be
00374             // compiling LLI on a 32 bit compiler.
00375             unsigned Size = strlen(FmtBuf);
00376             FmtBuf[Size] = FmtBuf[Size-1];
00377             FmtBuf[Size+1] = 0;
00378             FmtBuf[Size-1] = 'l';
00379           }
00380           sprintf(Buffer, FmtBuf, Args[ArgNo++].IntVal.getZExtValue());
00381         } else
00382           sprintf(Buffer, FmtBuf,uint32_t(Args[ArgNo++].IntVal.getZExtValue()));
00383         break;
00384       case 'e': case 'E': case 'g': case 'G': case 'f':
00385         sprintf(Buffer, FmtBuf, Args[ArgNo++].DoubleVal); break;
00386       case 'p':
00387         sprintf(Buffer, FmtBuf, (void*)GVTOP(Args[ArgNo++])); break;
00388       case 's':
00389         sprintf(Buffer, FmtBuf, (char*)GVTOP(Args[ArgNo++])); break;
00390       default:
00391         errs() << "<unknown printf code '" << *FmtStr << "'!>";
00392         ArgNo++; break;
00393       }
00394       size_t Len = strlen(Buffer);
00395       memcpy(OutputBuffer, Buffer, Len + 1);
00396       OutputBuffer += Len;
00397       }
00398       break;
00399     }
00400   }
00401   return GV;
00402 }
00403 
00404 // int printf(const char *, ...) - a very rough implementation to make output
00405 // useful.
00406 static GenericValue lle_X_printf(FunctionType *FT,
00407                                  ArrayRef<GenericValue> Args) {
00408   char Buffer[10000];
00409   std::vector<GenericValue> NewArgs;
00410   NewArgs.push_back(PTOGV((void*)&Buffer[0]));
00411   NewArgs.insert(NewArgs.end(), Args.begin(), Args.end());
00412   GenericValue GV = lle_X_sprintf(FT, NewArgs);
00413   outs() << Buffer;
00414   return GV;
00415 }
00416 
00417 // int sscanf(const char *format, ...);
00418 static GenericValue lle_X_sscanf(FunctionType *FT,
00419                                  ArrayRef<GenericValue> args) {
00420   assert(args.size() < 10 && "Only handle up to 10 args to sscanf right now!");
00421 
00422   char *Args[10];
00423   for (unsigned i = 0; i < args.size(); ++i)
00424     Args[i] = (char*)GVTOP(args[i]);
00425 
00426   GenericValue GV;
00427   GV.IntVal = APInt(32, sscanf(Args[0], Args[1], Args[2], Args[3], Args[4],
00428                     Args[5], Args[6], Args[7], Args[8], Args[9]));
00429   return GV;
00430 }
00431 
00432 // int scanf(const char *format, ...);
00433 static GenericValue lle_X_scanf(FunctionType *FT, ArrayRef<GenericValue> args) {
00434   assert(args.size() < 10 && "Only handle up to 10 args to scanf right now!");
00435 
00436   char *Args[10];
00437   for (unsigned i = 0; i < args.size(); ++i)
00438     Args[i] = (char*)GVTOP(args[i]);
00439 
00440   GenericValue GV;
00441   GV.IntVal = APInt(32, scanf( Args[0], Args[1], Args[2], Args[3], Args[4],
00442                     Args[5], Args[6], Args[7], Args[8], Args[9]));
00443   return GV;
00444 }
00445 
00446 // int fprintf(FILE *, const char *, ...) - a very rough implementation to make
00447 // output useful.
00448 static GenericValue lle_X_fprintf(FunctionType *FT,
00449                                   ArrayRef<GenericValue> Args) {
00450   assert(Args.size() >= 2);
00451   char Buffer[10000];
00452   std::vector<GenericValue> NewArgs;
00453   NewArgs.push_back(PTOGV(Buffer));
00454   NewArgs.insert(NewArgs.end(), Args.begin()+1, Args.end());
00455   GenericValue GV = lle_X_sprintf(FT, NewArgs);
00456 
00457   fputs(Buffer, (FILE *) GVTOP(Args[0]));
00458   return GV;
00459 }
00460 
00461 static GenericValue lle_X_memset(FunctionType *FT,
00462                                  ArrayRef<GenericValue> Args) {
00463   int val = (int)Args[1].IntVal.getSExtValue();
00464   size_t len = (size_t)Args[2].IntVal.getZExtValue();
00465   memset((void *)GVTOP(Args[0]), val, len);
00466   // llvm.memset.* returns void, lle_X_* returns GenericValue,
00467   // so here we return GenericValue with IntVal set to zero
00468   GenericValue GV;
00469   GV.IntVal = 0;
00470   return GV;
00471 }
00472 
00473 static GenericValue lle_X_memcpy(FunctionType *FT,
00474                                  ArrayRef<GenericValue> Args) {
00475   memcpy(GVTOP(Args[0]), GVTOP(Args[1]),
00476          (size_t)(Args[2].IntVal.getLimitedValue()));
00477 
00478   // llvm.memcpy* returns void, lle_X_* returns GenericValue,
00479   // so here we return GenericValue with IntVal set to zero
00480   GenericValue GV;
00481   GV.IntVal = 0;
00482   return GV;
00483 }
00484 
00485 void Interpreter::initializeExternalFunctions() {
00486   sys::ScopedLock Writer(*FunctionsLock);
00487   (*FuncNames)["lle_X_atexit"]       = lle_X_atexit;
00488   (*FuncNames)["lle_X_exit"]         = lle_X_exit;
00489   (*FuncNames)["lle_X_abort"]        = lle_X_abort;
00490 
00491   (*FuncNames)["lle_X_printf"]       = lle_X_printf;
00492   (*FuncNames)["lle_X_sprintf"]      = lle_X_sprintf;
00493   (*FuncNames)["lle_X_sscanf"]       = lle_X_sscanf;
00494   (*FuncNames)["lle_X_scanf"]        = lle_X_scanf;
00495   (*FuncNames)["lle_X_fprintf"]      = lle_X_fprintf;
00496   (*FuncNames)["lle_X_memset"]       = lle_X_memset;
00497   (*FuncNames)["lle_X_memcpy"]       = lle_X_memcpy;
00498 }