LLVM API Documentation

ArchiveWriter.cpp
Go to the documentation of this file.
00001 //===-- ArchiveWriter.cpp - Write LLVM archive files ----------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // Builds up an LLVM archive file (.a) containing LLVM bitcode.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "llvm/Bitcode/Archive.h"
00015 #include "ArchiveInternals.h"
00016 #include "llvm/ADT/OwningPtr.h"
00017 #include "llvm/Bitcode/ReaderWriter.h"
00018 #include "llvm/IR/Module.h"
00019 #include "llvm/Support/FileSystem.h"
00020 #include "llvm/Support/MemoryBuffer.h"
00021 #include "llvm/Support/Process.h"
00022 #include "llvm/Support/Signals.h"
00023 #include "llvm/Support/system_error.h"
00024 #include <fstream>
00025 #include <iomanip>
00026 #include <ostream>
00027 using namespace llvm;
00028 
00029 // Write an integer using variable bit rate encoding. This saves a few bytes
00030 // per entry in the symbol table.
00031 static inline void writeInteger(unsigned num, std::ofstream& ARFile) {
00032   while (1) {
00033     if (num < 0x80) { // done?
00034       ARFile << (unsigned char)num;
00035       return;
00036     }
00037 
00038     // Nope, we are bigger than a character, output the next 7 bits and set the
00039     // high bit to say that there is more coming...
00040     ARFile << (unsigned char)(0x80 | ((unsigned char)num & 0x7F));
00041     num >>= 7;  // Shift out 7 bits now...
00042   }
00043 }
00044 
00045 // Compute how many bytes are taken by a given VBR encoded value. This is needed
00046 // to pre-compute the size of the symbol table.
00047 static inline unsigned numVbrBytes(unsigned num) {
00048 
00049   // Note that the following nested ifs are somewhat equivalent to a binary
00050   // search. We split it in half by comparing against 2^14 first. This allows
00051   // most reasonable values to be done in 2 comparisons instead of 1 for
00052   // small ones and four for large ones. We expect this to access file offsets
00053   // in the 2^10 to 2^24 range and symbol lengths in the 2^0 to 2^8 range,
00054   // so this approach is reasonable.
00055   if (num < 1<<14) {
00056     if (num < 1<<7)
00057       return 1;
00058     else
00059       return 2;
00060   }
00061   if (num < 1<<21)
00062     return 3;
00063 
00064   if (num < 1<<28)
00065     return 4;
00066   return 5; // anything >= 2^28 takes 5 bytes
00067 }
00068 
00069 // Create an empty archive.
00070 Archive* Archive::CreateEmpty(const sys::Path& FilePath, LLVMContext& C) {
00071   Archive* result = new Archive(FilePath, C);
00072   return result;
00073 }
00074 
00075 // Fill the ArchiveMemberHeader with the information from a member. If
00076 // TruncateNames is true, names are flattened to 15 chars or less. The sz field
00077 // is provided here instead of coming from the mbr because the member might be
00078 // stored compressed and the compressed size is not the ArchiveMember's size.
00079 // Furthermore compressed files have negative size fields to identify them as
00080 // compressed.
00081 bool
00082 Archive::fillHeader(const ArchiveMember &mbr, ArchiveMemberHeader& hdr,
00083                     int sz, bool TruncateNames) const {
00084 
00085   // Set the permissions mode, uid and gid
00086   hdr.init();
00087   char buffer[32];
00088   sprintf(buffer, "%-8o", mbr.getMode());
00089   memcpy(hdr.mode,buffer,8);
00090   sprintf(buffer,  "%-6u", mbr.getUser());
00091   memcpy(hdr.uid,buffer,6);
00092   sprintf(buffer,  "%-6u", mbr.getGroup());
00093   memcpy(hdr.gid,buffer,6);
00094 
00095   // Set the last modification date
00096   uint64_t secondsSinceEpoch = mbr.getModTime().toEpochTime();
00097   sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch));
00098   memcpy(hdr.date,buffer,12);
00099 
00100   // Get rid of trailing blanks in the name
00101   std::string mbrPath = mbr.getPath().str();
00102   size_t mbrLen = mbrPath.length();
00103   while (mbrLen > 0 && mbrPath[mbrLen-1] == ' ') {
00104     mbrPath.erase(mbrLen-1,1);
00105     mbrLen--;
00106   }
00107 
00108   // Set the name field in one of its various flavors.
00109   bool writeLongName = false;
00110   if (mbr.isStringTable()) {
00111     memcpy(hdr.name,ARFILE_STRTAB_NAME,16);
00112   } else if (mbr.isSVR4SymbolTable()) {
00113     memcpy(hdr.name,ARFILE_SVR4_SYMTAB_NAME,16);
00114   } else if (mbr.isBSD4SymbolTable()) {
00115     memcpy(hdr.name,ARFILE_BSD4_SYMTAB_NAME,16);
00116   } else if (mbr.isLLVMSymbolTable()) {
00117     memcpy(hdr.name,ARFILE_LLVM_SYMTAB_NAME,16);
00118   } else if (TruncateNames) {
00119     const char* nm = mbrPath.c_str();
00120     unsigned len = mbrPath.length();
00121     size_t slashpos = mbrPath.rfind('/');
00122     if (slashpos != std::string::npos) {
00123       nm += slashpos + 1;
00124       len -= slashpos +1;
00125     }
00126     if (len > 15)
00127       len = 15;
00128     memcpy(hdr.name,nm,len);
00129     hdr.name[len] = '/';
00130   } else if (mbrPath.length() < 16 && mbrPath.find('/') == std::string::npos) {
00131     memcpy(hdr.name,mbrPath.c_str(),mbrPath.length());
00132     hdr.name[mbrPath.length()] = '/';
00133   } else {
00134     std::string nm = "#1/";
00135     nm += utostr(mbrPath.length());
00136     memcpy(hdr.name,nm.data(),nm.length());
00137     if (sz < 0)
00138       sz -= mbrPath.length();
00139     else
00140       sz += mbrPath.length();
00141     writeLongName = true;
00142   }
00143 
00144   // Set the size field
00145   if (sz < 0) {
00146     buffer[0] = '-';
00147     sprintf(&buffer[1],"%-9u",(unsigned)-sz);
00148   } else {
00149     sprintf(buffer, "%-10u", (unsigned)sz);
00150   }
00151   memcpy(hdr.size,buffer,10);
00152 
00153   return writeLongName;
00154 }
00155 
00156 // Insert a file into the archive before some other member. This also takes care
00157 // of extracting the necessary flags and information from the file.
00158 bool
00159 Archive::addFileBefore(const sys::Path& filePath, iterator where,
00160                         std::string* ErrMsg) {
00161   bool Exists;
00162   if (sys::fs::exists(filePath.str(), Exists) || !Exists) {
00163     if (ErrMsg)
00164       *ErrMsg = "Can not add a non-existent file to archive";
00165     return true;
00166   }
00167 
00168   ArchiveMember* mbr = new ArchiveMember(this);
00169 
00170   mbr->data = 0;
00171   mbr->path = filePath;
00172   const sys::FileStatus *FSInfo = mbr->path.getFileStatus(false, ErrMsg);
00173   if (!FSInfo) {
00174     delete mbr;
00175     return true;
00176   }
00177   mbr->info = *FSInfo;
00178 
00179   unsigned flags = 0;
00180   bool hasSlash = filePath.str().find('/') != std::string::npos;
00181   if (hasSlash)
00182     flags |= ArchiveMember::HasPathFlag;
00183   if (hasSlash || filePath.str().length() > 15)
00184     flags |= ArchiveMember::HasLongFilenameFlag;
00185 
00186   sys::fs::file_magic type;
00187   if (sys::fs::identify_magic(mbr->path.str(), type))
00188     type = sys::fs::file_magic::unknown;
00189   switch (type) {
00190     case sys::fs::file_magic::bitcode:
00191       flags |= ArchiveMember::BitcodeFlag;
00192       break;
00193     default:
00194       break;
00195   }
00196   mbr->flags = flags;
00197   members.insert(where,mbr);
00198   return false;
00199 }
00200 
00201 // Write one member out to the file.
00202 bool
00203 Archive::writeMember(
00204   const ArchiveMember& member,
00205   std::ofstream& ARFile,
00206   bool CreateSymbolTable,
00207   bool TruncateNames,
00208   std::string* ErrMsg
00209 ) {
00210 
00211   unsigned filepos = ARFile.tellp();
00212   filepos -= 8;
00213 
00214   // Get the data and its size either from the
00215   // member's in-memory data or directly from the file.
00216   size_t fSize = member.getSize();
00217   const char *data = (const char*)member.getData();
00218   MemoryBuffer *mFile = 0;
00219   if (!data) {
00220     OwningPtr<MemoryBuffer> File;
00221     if (error_code ec = MemoryBuffer::getFile(member.getPath().c_str(), File)) {
00222       if (ErrMsg)
00223         *ErrMsg = ec.message();
00224       return true;
00225     }
00226     mFile = File.take();
00227     data = mFile->getBufferStart();
00228     fSize = mFile->getBufferSize();
00229   }
00230 
00231   // Now that we have the data in memory, update the
00232   // symbol table if it's a bitcode file.
00233   if (CreateSymbolTable && member.isBitcode()) {
00234     std::vector<std::string> symbols;
00235     std::string FullMemberName = archPath.str() + "(" + member.getPath().str()
00236       + ")";
00237     Module* M =
00238       GetBitcodeSymbols(data, fSize, FullMemberName, Context, symbols, ErrMsg);
00239 
00240     // If the bitcode parsed successfully
00241     if ( M ) {
00242       for (std::vector<std::string>::iterator SI = symbols.begin(),
00243            SE = symbols.end(); SI != SE; ++SI) {
00244 
00245         std::pair<SymTabType::iterator,bool> Res =
00246           symTab.insert(std::make_pair(*SI,filepos));
00247 
00248         if (Res.second) {
00249           symTabSize += SI->length() +
00250                         numVbrBytes(SI->length()) +
00251                         numVbrBytes(filepos);
00252         }
00253       }
00254       // We don't need this module any more.
00255       delete M;
00256     } else {
00257       delete mFile;
00258       if (ErrMsg)
00259         *ErrMsg = "Can't parse bitcode member: " + member.getPath().str()
00260           + ": " + *ErrMsg;
00261       return true;
00262     }
00263   }
00264 
00265   int hdrSize = fSize;
00266 
00267   // Compute the fields of the header
00268   ArchiveMemberHeader Hdr;
00269   bool writeLongName = fillHeader(member,Hdr,hdrSize,TruncateNames);
00270 
00271   // Write header to archive file
00272   ARFile.write((char*)&Hdr, sizeof(Hdr));
00273 
00274   // Write the long filename if its long
00275   if (writeLongName) {
00276     ARFile.write(member.getPath().str().data(),
00277                  member.getPath().str().length());
00278   }
00279 
00280   // Write the (possibly compressed) member's content to the file.
00281   ARFile.write(data,fSize);
00282 
00283   // Make sure the member is an even length
00284   if ((ARFile.tellp() & 1) == 1)
00285     ARFile << ARFILE_PAD;
00286 
00287   // Close the mapped file if it was opened
00288   delete mFile;
00289   return false;
00290 }
00291 
00292 // Write out the LLVM symbol table as an archive member to the file.
00293 void
00294 Archive::writeSymbolTable(std::ofstream& ARFile) {
00295 
00296   // Construct the symbol table's header
00297   ArchiveMemberHeader Hdr;
00298   Hdr.init();
00299   memcpy(Hdr.name,ARFILE_LLVM_SYMTAB_NAME,16);
00300   uint64_t secondsSinceEpoch = sys::TimeValue::now().toEpochTime();
00301   char buffer[32];
00302   sprintf(buffer, "%-8o", 0644);
00303   memcpy(Hdr.mode,buffer,8);
00304   sprintf(buffer, "%-6u", sys::Process::GetCurrentUserId());
00305   memcpy(Hdr.uid,buffer,6);
00306   sprintf(buffer, "%-6u", sys::Process::GetCurrentGroupId());
00307   memcpy(Hdr.gid,buffer,6);
00308   sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch));
00309   memcpy(Hdr.date,buffer,12);
00310   sprintf(buffer,"%-10u",symTabSize);
00311   memcpy(Hdr.size,buffer,10);
00312 
00313   // Write the header
00314   ARFile.write((char*)&Hdr, sizeof(Hdr));
00315 
00316 #ifndef NDEBUG
00317   // Save the starting position of the symbol tables data content.
00318   unsigned startpos = ARFile.tellp();
00319 #endif
00320 
00321   // Write out the symbols sequentially
00322   for ( Archive::SymTabType::iterator I = symTab.begin(), E = symTab.end();
00323         I != E; ++I)
00324   {
00325     // Write out the file index
00326     writeInteger(I->second, ARFile);
00327     // Write out the length of the symbol
00328     writeInteger(I->first.length(), ARFile);
00329     // Write out the symbol
00330     ARFile.write(I->first.data(), I->first.length());
00331   }
00332 
00333 #ifndef NDEBUG
00334   // Now that we're done with the symbol table, get the ending file position
00335   unsigned endpos = ARFile.tellp();
00336 #endif
00337 
00338   // Make sure that the amount we wrote is what we pre-computed. This is
00339   // critical for file integrity purposes.
00340   assert(endpos - startpos == symTabSize && "Invalid symTabSize computation");
00341 
00342   // Make sure the symbol table is even sized
00343   if (symTabSize % 2 != 0 )
00344     ARFile << ARFILE_PAD;
00345 }
00346 
00347 // Write the entire archive to the file specified when the archive was created.
00348 // This writes to a temporary file first. Options are for creating a symbol
00349 // table, flattening the file names (no directories, 15 chars max) and
00350 // compressing each archive member.
00351 bool
00352 Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames,
00353                      std::string* ErrMsg)
00354 {
00355   // Make sure they haven't opened up the file, not loaded it,
00356   // but are now trying to write it which would wipe out the file.
00357   if (members.empty() && mapfile && mapfile->getBufferSize() > 8) {
00358     if (ErrMsg)
00359       *ErrMsg = "Can't write an archive not opened for writing";
00360     return true;
00361   }
00362 
00363   // Create a temporary file to store the archive in
00364   sys::Path TmpArchive = archPath;
00365   if (TmpArchive.createTemporaryFileOnDisk(ErrMsg))
00366     return true;
00367 
00368   // Make sure the temporary gets removed if we crash
00369   sys::RemoveFileOnSignal(TmpArchive);
00370 
00371   // Create archive file for output.
00372   std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
00373                                std::ios::binary;
00374   std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode);
00375 
00376   // Check for errors opening or creating archive file.
00377   if (!ArchiveFile.is_open() || ArchiveFile.bad()) {
00378     TmpArchive.eraseFromDisk();
00379     if (ErrMsg)
00380       *ErrMsg = "Error opening archive file: " + archPath.str();
00381     return true;
00382   }
00383 
00384   // If we're creating a symbol table, reset it now
00385   if (CreateSymbolTable) {
00386     symTabSize = 0;
00387     symTab.clear();
00388   }
00389 
00390   // Write magic string to archive.
00391   ArchiveFile << ARFILE_MAGIC;
00392 
00393   // Loop over all member files, and write them out. Note that this also
00394   // builds the symbol table, symTab.
00395   for (MembersList::iterator I = begin(), E = end(); I != E; ++I) {
00396     if (writeMember(*I, ArchiveFile, CreateSymbolTable,
00397                      TruncateNames, ErrMsg)) {
00398       TmpArchive.eraseFromDisk();
00399       ArchiveFile.close();
00400       return true;
00401     }
00402   }
00403 
00404   // Close archive file.
00405   ArchiveFile.close();
00406 
00407   // Write the symbol table
00408   if (CreateSymbolTable) {
00409     // At this point we have written a file that is a legal archive but it
00410     // doesn't have a symbol table in it. To aid in faster reading and to
00411     // ensure compatibility with other archivers we need to put the symbol
00412     // table first in the file. Unfortunately, this means mapping the file
00413     // we just wrote back in and copying it to the destination file.
00414     sys::Path FinalFilePath = archPath;
00415 
00416     // Map in the archive we just wrote.
00417     {
00418     OwningPtr<MemoryBuffer> arch;
00419     if (error_code ec = MemoryBuffer::getFile(TmpArchive.c_str(), arch)) {
00420       if (ErrMsg)
00421         *ErrMsg = ec.message();
00422       return true;
00423     }
00424     const char* base = arch->getBufferStart();
00425 
00426     // Open another temporary file in order to avoid invalidating the
00427     // mmapped data
00428     if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg))
00429       return true;
00430     sys::RemoveFileOnSignal(FinalFilePath);
00431 
00432     std::ofstream FinalFile(FinalFilePath.c_str(), io_mode);
00433     if (!FinalFile.is_open() || FinalFile.bad()) {
00434       TmpArchive.eraseFromDisk();
00435       if (ErrMsg)
00436         *ErrMsg = "Error opening archive file: " + FinalFilePath.str();
00437       return true;
00438     }
00439 
00440     // Write the file magic number
00441     FinalFile << ARFILE_MAGIC;
00442 
00443     // If there is a foreign symbol table, put it into the file now. Most
00444     // ar(1) implementations require the symbol table to be first but llvm-ar
00445     // can deal with it being after a foreign symbol table. This ensures
00446     // compatibility with other ar(1) implementations as well as allowing the
00447     // archive to store both native .o and LLVM .bc files, both indexed.
00448     if (foreignST) {
00449       if (writeMember(*foreignST, FinalFile, false, false, ErrMsg)) {
00450         FinalFile.close();
00451         TmpArchive.eraseFromDisk();
00452         return true;
00453       }
00454     }
00455 
00456     // Put out the LLVM symbol table now.
00457     writeSymbolTable(FinalFile);
00458 
00459     // Copy the temporary file contents being sure to skip the file's magic
00460     // number.
00461     FinalFile.write(base + sizeof(ARFILE_MAGIC)-1,
00462       arch->getBufferSize()-sizeof(ARFILE_MAGIC)+1);
00463 
00464     // Close up shop
00465     FinalFile.close();
00466     } // free arch.
00467 
00468     // Move the final file over top of TmpArchive
00469     if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg))
00470       return true;
00471   }
00472 
00473   // Before we replace the actual archive, we need to forget all the
00474   // members, since they point to data in that old archive. We need to do
00475   // this because we cannot replace an open file on Windows.
00476   cleanUpMemory();
00477 
00478   if (TmpArchive.renamePathOnDisk(archPath, ErrMsg))
00479     return true;
00480 
00481   // Set correct read and write permissions after temporary file is moved
00482   // to final destination path.
00483   if (archPath.makeReadableOnDisk(ErrMsg))
00484     return true;
00485   if (archPath.makeWriteableOnDisk(ErrMsg))
00486     return true;
00487 
00488   return false;
00489 }