LLVM  3.7.0
Archive.cpp
Go to the documentation of this file.
1 //===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the ArchiveObjectFile class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Object/Archive.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Support/Endian.h"
20 #include "llvm/Support/Path.h"
21 
22 using namespace llvm;
23 using namespace object;
24 using namespace llvm::support::endian;
25 
26 static const char *const Magic = "!<arch>\n";
27 static const char *const ThinMagic = "!<thin>\n";
28 
29 void Archive::anchor() { }
30 
32  char EndCond;
33  if (Name[0] == '/' || Name[0] == '#')
34  EndCond = ' ';
35  else
36  EndCond = '/';
38  llvm::StringRef(Name, sizeof(Name)).find(EndCond);
39  if (end == llvm::StringRef::npos)
40  end = sizeof(Name);
41  assert(end <= sizeof(Name) && end > 0);
42  // Don't include the EndCond if there is one.
43  return llvm::StringRef(Name, end);
44 }
45 
46 uint32_t ArchiveMemberHeader::getSize() const {
47  uint32_t Ret;
48  if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret))
49  llvm_unreachable("Size is not a decimal number.");
50  return Ret;
51 }
52 
54  unsigned Ret;
55  if (StringRef(AccessMode, sizeof(AccessMode)).rtrim(" ").getAsInteger(8, Ret))
56  llvm_unreachable("Access mode is not an octal number.");
57  return static_cast<sys::fs::perms>(Ret);
58 }
59 
61  unsigned Seconds;
62  if (StringRef(LastModified, sizeof(LastModified)).rtrim(" ")
63  .getAsInteger(10, Seconds))
64  llvm_unreachable("Last modified time not a decimal number.");
65 
67  Ret.fromEpochTime(Seconds);
68  return Ret;
69 }
70 
71 unsigned ArchiveMemberHeader::getUID() const {
72  unsigned Ret;
73  if (StringRef(UID, sizeof(UID)).rtrim(" ").getAsInteger(10, Ret))
74  llvm_unreachable("UID time not a decimal number.");
75  return Ret;
76 }
77 
78 unsigned ArchiveMemberHeader::getGID() const {
79  unsigned Ret;
80  if (StringRef(GID, sizeof(GID)).rtrim(" ").getAsInteger(10, Ret))
81  llvm_unreachable("GID time not a decimal number.");
82  return Ret;
83 }
84 
85 Archive::Child::Child(const Archive *Parent, const char *Start)
86  : Parent(Parent) {
87  if (!Start)
88  return;
89 
90  const ArchiveMemberHeader *Header =
91  reinterpret_cast<const ArchiveMemberHeader *>(Start);
92  uint64_t Size = sizeof(ArchiveMemberHeader);
93  if (!Parent->IsThin || Header->getName() == "/" || Header->getName() == "//")
94  Size += Header->getSize();
95  Data = StringRef(Start, Size);
96 
97  // Setup StartOfFile and PaddingBytes.
98  StartOfFile = sizeof(ArchiveMemberHeader);
99  // Don't include attached name.
100  StringRef Name = Header->getName();
101  if (Name.startswith("#1/")) {
102  uint64_t NameSize;
103  if (Name.substr(3).rtrim(" ").getAsInteger(10, NameSize))
104  llvm_unreachable("Long name length is not an integer");
105  StartOfFile += NameSize;
106  }
107 }
108 
109 uint64_t Archive::Child::getSize() const {
110  if (Parent->IsThin)
111  return getHeader()->getSize();
112  return Data.size() - StartOfFile;
113 }
114 
115 uint64_t Archive::Child::getRawSize() const {
116  return getHeader()->getSize();
117 }
118 
120  if (!Parent->IsThin)
121  return StringRef(Data.data() + StartOfFile, getSize());
123  if (std::error_code EC = Name.getError())
124  return EC;
125  SmallString<128> FullName =
126  Parent->getMemoryBufferRef().getBufferIdentifier();
127  sys::path::remove_filename(FullName);
128  sys::path::append(FullName, *Name);
130  if (std::error_code EC = Buf.getError())
131  return EC;
132  Parent->ThinBuffers.push_back(std::move(*Buf));
133  return Parent->ThinBuffers.back()->getBuffer();
134 }
135 
137  size_t SpaceToSkip = Data.size();
138  // If it's odd, add 1 to make it even.
139  if (SpaceToSkip & 1)
140  ++SpaceToSkip;
141 
142  const char *NextLoc = Data.data() + SpaceToSkip;
143 
144  // Check to see if this is past the end of the archive.
145  if (NextLoc >= Parent->Data.getBufferEnd())
146  return Child(Parent, nullptr);
147 
148  return Child(Parent, NextLoc);
149 }
150 
152  const char *a = Parent->Data.getBuffer().data();
153  const char *c = Data.data();
154  uint64_t offset = c - a;
155  return offset;
156 }
157 
159  StringRef name = getRawName();
160  // Check if it's a special name.
161  if (name[0] == '/') {
162  if (name.size() == 1) // Linker member.
163  return name;
164  if (name.size() == 2 && name[1] == '/') // String table.
165  return name;
166  // It's a long name.
167  // Get the offset.
168  std::size_t offset;
169  if (name.substr(1).rtrim(" ").getAsInteger(10, offset))
170  llvm_unreachable("Long name offset is not an integer");
171  const char *addr = Parent->StringTable->Data.begin()
172  + sizeof(ArchiveMemberHeader)
173  + offset;
174  // Verify it.
175  if (Parent->StringTable == Parent->child_end()
176  || addr < (Parent->StringTable->Data.begin()
177  + sizeof(ArchiveMemberHeader))
178  || addr > (Parent->StringTable->Data.begin()
179  + sizeof(ArchiveMemberHeader)
180  + Parent->StringTable->getSize()))
182 
183  // GNU long file names end with a "/\n".
184  if (Parent->kind() == K_GNU || Parent->kind() == K_MIPS64) {
185  StringRef::size_type End = StringRef(addr).find('\n');
186  return StringRef(addr, End - 1);
187  }
188  return StringRef(addr);
189  } else if (name.startswith("#1/")) {
190  uint64_t name_size;
191  if (name.substr(3).rtrim(" ").getAsInteger(10, name_size))
192  llvm_unreachable("Long name length is not an ingeter");
193  return Data.substr(sizeof(ArchiveMemberHeader), name_size)
194  .rtrim(StringRef("\0", 1));
195  }
196  // It's a simple name.
197  if (name[name.size() - 1] == '/')
198  return name.substr(0, name.size() - 1);
199  return name;
200 }
201 
203  ErrorOr<StringRef> NameOrErr = getName();
204  if (std::error_code EC = NameOrErr.getError())
205  return EC;
206  StringRef Name = NameOrErr.get();
207  ErrorOr<StringRef> Buf = getBuffer();
208  if (std::error_code EC = Buf.getError())
209  return EC;
210  return MemoryBufferRef(*Buf, Name);
211 }
212 
216  if (std::error_code EC = BuffOrErr.getError())
217  return EC;
218 
219  return createBinary(BuffOrErr.get(), Context);
220 }
221 
223  std::error_code EC;
224  std::unique_ptr<Archive> Ret(new Archive(Source, EC));
225  if (EC)
226  return EC;
227  return std::move(Ret);
228 }
229 
231  : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()),
232  StringTable(child_end()), FirstRegular(child_end()) {
233  StringRef Buffer = Data.getBuffer();
234  // Check for sufficient magic.
235  if (Buffer.startswith(ThinMagic)) {
236  IsThin = true;
237  } else if (Buffer.startswith(Magic)) {
238  IsThin = false;
239  } else {
241  return;
242  }
243 
244  // Get the special members.
245  child_iterator i = child_begin(false);
247 
248  if (i == e) {
249  ec = std::error_code();
250  return;
251  }
252 
253  StringRef Name = i->getRawName();
254 
255  // Below is the pattern that is used to figure out the archive format
256  // GNU archive format
257  // First member : / (may exist, if it exists, points to the symbol table )
258  // Second member : // (may exist, if it exists, points to the string table)
259  // Note : The string table is used if the filename exceeds 15 characters
260  // BSD archive format
261  // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
262  // There is no string table, if the filename exceeds 15 characters or has a
263  // embedded space, the filename has #1/<size>, The size represents the size
264  // of the filename that needs to be read after the archive header
265  // COFF archive format
266  // First member : /
267  // Second member : / (provides a directory of symbols)
268  // Third member : // (may exist, if it exists, contains the string table)
269  // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
270  // even if the string table is empty. However, lib.exe does not in fact
271  // seem to create the third member if there's no member whose filename
272  // exceeds 15 characters. So the third member is optional.
273 
274  if (Name == "__.SYMDEF") {
275  Format = K_BSD;
276  SymbolTable = i;
277  ++i;
278  FirstRegular = i;
279  ec = std::error_code();
280  return;
281  }
282 
283  if (Name.startswith("#1/")) {
284  Format = K_BSD;
285  // We know this is BSD, so getName will work since there is no string table.
286  ErrorOr<StringRef> NameOrErr = i->getName();
287  ec = NameOrErr.getError();
288  if (ec)
289  return;
290  Name = NameOrErr.get();
291  if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
292  SymbolTable = i;
293  ++i;
294  }
295  FirstRegular = i;
296  return;
297  }
298 
299  // MIPS 64-bit ELF archives use a special format of a symbol table.
300  // This format is marked by `ar_name` field equals to "/SYM64/".
301  // For detailed description see page 96 in the following document:
302  // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
303 
304  bool has64SymTable = false;
305  if (Name == "/" || Name == "/SYM64/") {
306  SymbolTable = i;
307  if (Name == "/SYM64/")
308  has64SymTable = true;
309 
310  ++i;
311  if (i == e) {
312  ec = std::error_code();
313  return;
314  }
315  Name = i->getRawName();
316  }
317 
318  if (Name == "//") {
319  Format = has64SymTable ? K_MIPS64 : K_GNU;
320  StringTable = i;
321  ++i;
322  FirstRegular = i;
323  ec = std::error_code();
324  return;
325  }
326 
327  if (Name[0] != '/') {
328  Format = has64SymTable ? K_MIPS64 : K_GNU;
329  FirstRegular = i;
330  ec = std::error_code();
331  return;
332  }
333 
334  if (Name != "/") {
336  return;
337  }
338 
339  Format = K_COFF;
340  SymbolTable = i;
341 
342  ++i;
343  if (i == e) {
344  FirstRegular = i;
345  ec = std::error_code();
346  return;
347  }
348 
349  Name = i->getRawName();
350 
351  if (Name == "//") {
352  StringTable = i;
353  ++i;
354  }
355 
356  FirstRegular = i;
357  ec = std::error_code();
358 }
359 
361  if (Data.getBufferSize() == 8) // empty archive.
362  return child_end();
363 
364  if (SkipInternal)
365  return FirstRegular;
366 
367  const char *Loc = Data.getBufferStart() + strlen(Magic);
368  Child c(this, Loc);
369  return c;
370 }
371 
373  return Child(this, nullptr);
374 }
375 
377  return Parent->getSymbolTable().begin() + StringIndex;
378 }
379 
381  const char *Buf = Parent->getSymbolTable().begin();
382  const char *Offsets = Buf;
383  if (Parent->kind() == K_MIPS64)
384  Offsets += sizeof(uint64_t);
385  else
386  Offsets += sizeof(uint32_t);
387  uint32_t Offset = 0;
388  if (Parent->kind() == K_GNU) {
389  Offset = read32be(Offsets + SymbolIndex * 4);
390  } else if (Parent->kind() == K_MIPS64) {
391  Offset = read64be(Offsets + SymbolIndex * 8);
392  } else if (Parent->kind() == K_BSD) {
393  // The SymbolIndex is an index into the ranlib structs that start at
394  // Offsets (the first uint32_t is the number of bytes of the ranlib
395  // structs). The ranlib structs are a pair of uint32_t's the first
396  // being a string table offset and the second being the offset into
397  // the archive of the member that defines the symbol. Which is what
398  // is needed here.
399  Offset = read32le(Offsets + SymbolIndex * 8 + 4);
400  } else {
401  // Skip offsets.
402  uint32_t MemberCount = read32le(Buf);
403  Buf += MemberCount * 4 + 4;
404 
405  uint32_t SymbolCount = read32le(Buf);
406  if (SymbolIndex >= SymbolCount)
408 
409  // Skip SymbolCount to get to the indices table.
410  const char *Indices = Buf + 4;
411 
412  // Get the index of the offset in the file member offset table for this
413  // symbol.
414  uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
415  // Subtract 1 since OffsetIndex is 1 based.
416  --OffsetIndex;
417 
418  if (OffsetIndex >= MemberCount)
420 
421  Offset = read32le(Offsets + OffsetIndex * 4);
422  }
423 
424  const char *Loc = Parent->getData().begin() + Offset;
425  child_iterator Iter(Child(Parent, Loc));
426  return Iter;
427 }
428 
430  Symbol t(*this);
431  if (Parent->kind() == K_BSD) {
432  // t.StringIndex is an offset from the start of the __.SYMDEF or
433  // "__.SYMDEF SORTED" member into the string table for the ranlib
434  // struct indexed by t.SymbolIndex . To change t.StringIndex to the
435  // offset in the string table for t.SymbolIndex+1 we subtract the
436  // its offset from the start of the string table for t.SymbolIndex
437  // and add the offset of the string table for t.SymbolIndex+1.
438 
439  // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
440  // which is the number of bytes of ranlib structs that follow. The ranlib
441  // structs are a pair of uint32_t's the first being a string table offset
442  // and the second being the offset into the archive of the member that
443  // define the symbol. After that the next uint32_t is the byte count of
444  // the string table followed by the string table.
445  const char *Buf = Parent->getSymbolTable().begin();
446  uint32_t RanlibCount = 0;
447  RanlibCount = read32le(Buf) / 8;
448  // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
449  // don't change the t.StringIndex as we don't want to reference a ranlib
450  // past RanlibCount.
451  if (t.SymbolIndex + 1 < RanlibCount) {
452  const char *Ranlibs = Buf + 4;
453  uint32_t CurRanStrx = 0;
454  uint32_t NextRanStrx = 0;
455  CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
456  NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
457  t.StringIndex -= CurRanStrx;
458  t.StringIndex += NextRanStrx;
459  }
460  } else {
461  // Go to one past next null.
462  t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
463  }
464  ++t.SymbolIndex;
465  return t;
466 }
467 
469  if (!hasSymbolTable())
470  return symbol_iterator(Symbol(this, 0, 0));
471 
472  const char *buf = getSymbolTable().begin();
473  if (kind() == K_GNU) {
474  uint32_t symbol_count = 0;
475  symbol_count = read32be(buf);
476  buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
477  } else if (kind() == K_MIPS64) {
478  uint64_t symbol_count = read64be(buf);
479  buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
480  } else if (kind() == K_BSD) {
481  // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
482  // which is the number of bytes of ranlib structs that follow. The ranlib
483  // structs are a pair of uint32_t's the first being a string table offset
484  // and the second being the offset into the archive of the member that
485  // define the symbol. After that the next uint32_t is the byte count of
486  // the string table followed by the string table.
487  uint32_t ranlib_count = 0;
488  ranlib_count = read32le(buf) / 8;
489  const char *ranlibs = buf + 4;
490  uint32_t ran_strx = 0;
491  ran_strx = read32le(ranlibs);
492  buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
493  // Skip the byte count of the string table.
494  buf += sizeof(uint32_t);
495  buf += ran_strx;
496  } else {
497  uint32_t member_count = 0;
498  uint32_t symbol_count = 0;
499  member_count = read32le(buf);
500  buf += 4 + (member_count * 4); // Skip offsets.
501  symbol_count = read32le(buf);
502  buf += 4 + (symbol_count * 2); // Skip indices.
503  }
504  uint32_t string_start_offset = buf - getSymbolTable().begin();
505  return symbol_iterator(Symbol(this, 0, string_start_offset));
506 }
507 
509  if (!hasSymbolTable())
510  return symbol_iterator(Symbol(this, 0, 0));
511  return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
512 }
513 
514 uint32_t Archive::getNumberOfSymbols() const {
515  const char *buf = getSymbolTable().begin();
516  if (kind() == K_GNU)
517  return read32be(buf);
518  if (kind() == K_MIPS64)
519  return read64be(buf);
520  if (kind() == K_BSD)
521  return read32le(buf) / 8;
522  uint32_t member_count = 0;
523  member_count = read32le(buf);
524  buf += 4 + (member_count * 4); // Skip offsets.
525  return read32le(buf);
526 }
527 
531 
532  for (; bs != es; ++bs) {
533  StringRef SymName = bs->getName();
534  if (SymName == name) {
535  ErrorOr<Archive::child_iterator> ResultOrErr = bs->getMember();
536  // FIXME: Should we really eat the error?
537  if (ResultOrErr.getError())
538  return child_end();
539  return ResultOrErr.get();
540  }
541  }
542  return child_end();
543 }
544 
546  return SymbolTable != child_end();
547 }
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:347
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:240
std::error_code getError() const
Definition: ErrorOr.h:178
Represents either an error or a value T.
Definition: ErrorOr.h:82
Archive(MemoryBufferRef Source, std::error_code &EC)
Definition: Archive.cpp:230
symbol_iterator symbol_end() const
Definition: Archive.cpp:508
size_t size() const
size - Get the string size.
Definition: StringRef.h:113
static const char *const ThinMagic
Definition: Archive.cpp:27
ErrorOr< std::unique_ptr< Binary > > getAsBinary(LLVMContext *Context=nullptr) const
Definition: Archive.cpp:214
Kind kind() const
Definition: Archive.h:183
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:240
uint64_t getRawSize() const
Definition: Archive.cpp:115
uint64_t getChildOffset() const
Definition: Archive.cpp:151
child_iterator child_begin(bool SkipInternal=true) const
Definition: Archive.cpp:360
StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:405
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:378
uint32_t getSize() const
Members are not larger than 4GB.
Definition: Archive.cpp:46
const char * getBufferStart() const
Definition: MemoryBuffer.h:161
StringRef getSymbolTable() const
Definition: Archive.h:209
uint64_t read64be(const void *p)
Definition: Endian.h:216
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition: Path.cpp:443
StringRef rtrim(StringRef Chars=" \t\n\v\f\r") const
Return string with consecutive characters in Chars starting from the right removed.
Definition: StringRef.h:517
void remove_filename(SmallVectorImpl< char > &path)
Remove the last component from path unless it is the root dir.
Definition: Path.cpp:498
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
ErrorOr< MemoryBufferRef > getMemoryBufferRef() const
Definition: Archive.cpp:202
static StringRef getName(Value *V)
This file implements a class to represent arbitrary precision integral constant values and operations...
MemoryBufferRef getMemoryBufferRef() const
Definition: Binary.cpp:37
ErrorOr< child_iterator > getMember() const
Definition: Archive.cpp:380
iterator begin() const
Definition: StringRef.h:90
bool hasSymbolTable() const
Definition: Archive.cpp:545
ErrorOr< std::unique_ptr< Binary > > createBinary(MemoryBufferRef Source, LLVMContext *Context=nullptr)
Create a Binary from Source, autodetecting the file type.
Definition: Binary.cpp:39
uint32_t read32le(const void *p)
Definition: Endian.h:212
child_iterator child_end() const
Definition: Archive.cpp:372
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
uint32_t read32be(const void *p)
Definition: Endian.h:215
StringRef getName() const
Definition: Archive.cpp:376
Child(const Archive *Parent, const char *Start)
Definition: Archive.cpp:85
sys::fs::perms getAccessMode() const
Definition: Archive.cpp:53
sys::TimeValue getLastModified() const
Definition: Archive.cpp:60
child_iterator findSym(StringRef name) const
Definition: Archive.cpp:528
size_t getBufferSize() const
Definition: MemoryBuffer.h:163
static const char *const Magic
Definition: Archive.cpp:26
symbol_iterator symbol_begin() const
Definition: Archive.cpp:468
bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:215
StringRef getBuffer() const
Definition: MemoryBuffer.h:157
void fromEpochTime(SecondsType seconds)
Converts the seconds argument from PosixTime to the corresponding TimeValue and assigns that value to...
Definition: TimeValue.h:334
MemoryBufferRef Data
Definition: Binary.h:37
static ErrorOr< std::unique_ptr< Archive > > create(MemoryBufferRef Source)
Definition: Archive.cpp:222
StringRef getRawName() const
Definition: Archive.h:80
uint64_t getSize() const
Definition: Archive.cpp:109
ErrorOr< StringRef > getName() const
Definition: Archive.cpp:158
static const size_t npos
Definition: StringRef.h:44
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatileSize=false)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful, otherwise returning null.
uint16_t read16le(const void *p)
Definition: Endian.h:211
size_t size_type
Definition: StringRef.h:45
llvm::StringRef getName() const
Get the name without looking up long names.
Definition: Archive.cpp:31
uint32_t getNumberOfSymbols() const
Definition: Archive.cpp:514
static const char * name
This class is used where a precise fixed point in time is required.
Definition: TimeValue.h:31
ErrorOr< StringRef > getBuffer() const
Definition: Archive.cpp:119
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
reference get()
Definition: ErrorOr.h:175
void * addr