LLVM  6.0.0svn
Magic.cpp
Go to the documentation of this file.
1 //===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 
12 #include "llvm/BinaryFormat/COFF.h"
13 #include "llvm/BinaryFormat/ELF.h"
15 #include "llvm/Support/Endian.h"
17 
18 #if !defined(_MSC_VER) && !defined(__MINGW32__)
19 #include <unistd.h>
20 #else
21 #include <io.h>
22 #endif
23 
24 using namespace llvm;
25 using namespace llvm::support::endian;
26 using namespace llvm::sys::fs;
27 
28 template <size_t N>
29 static bool startswith(StringRef Magic, const char (&S)[N]) {
30  return Magic.startswith(StringRef(S, N - 1));
31 }
32 
33 /// @brief Identify the magic in magic.
35  if (Magic.size() < 4)
36  return file_magic::unknown;
37  switch ((unsigned char)Magic[0]) {
38  case 0x00: {
39  // COFF bigobj, CL.exe's LTO object file, or short import library file
40  if (startswith(Magic, "\0\0\xFF\xFF")) {
41  size_t MinSize =
42  offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic);
43  if (Magic.size() < MinSize)
45 
46  const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
47  if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
49  if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
52  }
53  // Windows resource file
54  if (Magic.size() >= sizeof(COFF::WinResMagic) &&
55  memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0)
57  // 0x0000 = COFF unknown machine type
58  if (Magic[1] == 0)
60  if (startswith(Magic, "\0asm"))
62  break;
63  }
64  case 0xDE: // 0x0B17C0DE = BC wraper
65  if (startswith(Magic, "\xDE\xC0\x17\x0B"))
66  return file_magic::bitcode;
67  break;
68  case 'B':
69  if (startswith(Magic, "BC\xC0\xDE"))
70  return file_magic::bitcode;
71  break;
72  case '!':
73  if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
74  return file_magic::archive;
75  break;
76 
77  case '\177':
78  if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
79  bool Data2MSB = Magic[5] == 2;
80  unsigned high = Data2MSB ? 16 : 17;
81  unsigned low = Data2MSB ? 17 : 16;
82  if (Magic[high] == 0) {
83  switch (Magic[low]) {
84  default:
85  return file_magic::elf;
86  case 1:
88  case 2:
90  case 3:
92  case 4:
93  return file_magic::elf_core;
94  }
95  }
96  // It's still some type of ELF file.
97  return file_magic::elf;
98  }
99  break;
100 
101  case 0xCA:
102  if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
103  startswith(Magic, "\xCA\xFE\xBA\xBF")) {
104  // This is complicated by an overlap with Java class files.
105  // See the Mach-O section in /usr/share/file/magic for details.
106  if (Magic.size() >= 8 && Magic[7] < 43)
108  }
109  break;
110 
111  // The two magic numbers for mach-o are:
112  // 0xfeedface - 32-bit mach-o
113  // 0xfeedfacf - 64-bit mach-o
114  case 0xFE:
115  case 0xCE:
116  case 0xCF: {
117  uint16_t type = 0;
118  if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
119  startswith(Magic, "\xFE\xED\xFA\xCF")) {
120  /* Native endian */
121  size_t MinSize;
122  if (Magic[3] == char(0xCE))
123  MinSize = sizeof(MachO::mach_header);
124  else
125  MinSize = sizeof(MachO::mach_header_64);
126  if (Magic.size() >= MinSize)
127  type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
128  } else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
129  startswith(Magic, "\xCF\xFA\xED\xFE")) {
130  /* Reverse endian */
131  size_t MinSize;
132  if (Magic[0] == char(0xCE))
133  MinSize = sizeof(MachO::mach_header);
134  else
135  MinSize = sizeof(MachO::mach_header_64);
136  if (Magic.size() >= MinSize)
137  type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12];
138  }
139  switch (type) {
140  default:
141  break;
142  case 1:
144  case 2:
146  case 3:
148  case 4:
149  return file_magic::macho_core;
150  case 5:
152  case 6:
154  case 7:
156  case 8:
158  case 9:
160  case 10:
162  case 11:
164  }
165  break;
166  }
167  case 0xF0: // PowerPC Windows
168  case 0x83: // Alpha 32-bit
169  case 0x84: // Alpha 64-bit
170  case 0x66: // MPS R4000 Windows
171  case 0x50: // mc68K
172  case 0x4c: // 80386 Windows
173  case 0xc4: // ARMNT Windows
174  if (Magic[1] == 0x01)
177 
178  case 0x90: // PA-RISC Windows
179  case 0x68: // mc68K Windows
180  if (Magic[1] == 0x02)
182  break;
183 
184  case 'M': // Possible MS-DOS stub on Windows PE file
185  if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
186  uint32_t off = read32le(Magic.data() + 0x3c);
187  // PE/COFF file, either EXE or DLL.
188  if (Magic.substr(off).startswith(
191  }
192  break;
193 
194  case 0x64: // x86-64 or ARM64 Windows.
195  if (Magic[1] == char(0x86) || Magic[1] == char(0xaa))
197  break;
198 
199  default:
200  break;
201  }
202  return file_magic::unknown;
203 }
204 
205 std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
206  int FD;
207  if (std::error_code EC = openFileForRead(Path, FD))
208  return EC;
209 
210  char Buffer[32];
211  int Length = read(FD, Buffer, sizeof(Buffer));
212  if (close(FD) != 0 || Length < 0)
213  return std::error_code(errno, std::generic_category());
214 
215  Result = identify_magic(StringRef(Buffer, Length));
216  return std::error_code();
217 }
Mach-O Object file.
Definition: Magic.h:31
ELF core image.
Definition: Magic.h:30
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Microsoft cl.exe&#39;s intermediate code file.
Definition: Magic.h:43
ELF dynamically linked shared lib.
Definition: Magic.h:29
std::error_code openFileForRead(const Twine &Name, int &ResultFD, SmallVectorImpl< char > *RealPath=nullptr)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
Windows compiled resource file (.res)
Definition: Magic.h:47
Mach-O Bundle file.
Definition: Magic.h:38
Mach-O Preloaded Executable.
Definition: Magic.h:35
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:128
static const char BigObjMagic[]
Definition: COFF.h:39
Bitcode file.
Definition: Magic.h:24
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
Definition: Magic.cpp:34
The Mach-O dynamic linker.
Definition: Magic.h:37
ELF Relocatable object file.
Definition: Magic.h:27
static bool startswith(StringRef Magic, const char(&S)[N])
Definition: Magic.cpp:29
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:267
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:598
COFF import library.
Definition: Magic.h:45
Mach-O universal binary.
Definition: Magic.h:42
static const char ClGlObjMagic[]
Definition: COFF.h:44
Mach-O Core File.
Definition: Magic.h:34
Mach-O dSYM companion file.
Definition: Magic.h:40
PECOFF executable file.
Definition: Magic.h:46
COFF object file.
Definition: Magic.h:44
static const char *const Magic
Definition: Archive.cpp:42
Mach-O kext bundle file.
Definition: Magic.h:41
ar style archive file
Definition: Magic.h:25
ELF Unknown type.
Definition: Magic.h:26
ELF Executable image.
Definition: Magic.h:28
WebAssembly Object file.
Definition: Magic.h:48
Merge contiguous icmps into a memcmp
Definition: MergeICmps.cpp:649
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition: Endian.h:66
#define N
uint32_t read32le(const void *P)
Definition: Endian.h:369
static const char PEMagic[]
Definition: COFF.h:37
static const char WinResMagic[]
Definition: COFF.h:50
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
Mach-O Executable.
Definition: Magic.h:32
Unrecognized file.
Definition: Magic.h:23
file_magic - An "enum class" enumeration of file types based on magic (the first N bytes of the file)...
Definition: Magic.h:21