LLVM  16.0.0git
Magic.cpp
Go to the documentation of this file.
1 //===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/StringRef.h"
11 #include "llvm/ADT/Twine.h"
12 #include "llvm/BinaryFormat/COFF.h"
14 #include "llvm/Support/Endian.h"
16 
17 #if !defined(_MSC_VER) && !defined(__MINGW32__)
18 #include <unistd.h>
19 #else
20 #include <io.h>
21 #endif
22 
23 using namespace llvm;
24 using namespace llvm::support::endian;
25 using namespace llvm::sys::fs;
26 
27 template <size_t N>
28 static bool startswith(StringRef Magic, const char (&S)[N]) {
29  return Magic.startswith(StringRef(S, N - 1));
30 }
31 
32 /// Identify the magic in magic.
34  if (Magic.size() < 4)
35  return file_magic::unknown;
36  switch ((unsigned char)Magic[0]) {
37  case 0x00: {
38  // COFF bigobj, CL.exe's LTO object file, or short import library file
39  if (startswith(Magic, "\0\0\xFF\xFF")) {
40  size_t MinSize =
42  if (Magic.size() < MinSize)
44 
45  const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
46  if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
48  if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
51  }
52  // Windows resource file
53  if (Magic.size() >= sizeof(COFF::WinResMagic) &&
54  memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0)
56  // 0x0000 = COFF unknown machine type
57  if (Magic[1] == 0)
59  if (startswith(Magic, "\0asm"))
61  break;
62  }
63 
64  case 0x01:
65  // XCOFF format
66  if (startswith(Magic, "\x01\xDF"))
68  if (startswith(Magic, "\x01\xF7"))
70  break;
71 
72  case 0x03:
73  if (startswith(Magic, "\x03\xF0\x00"))
75  break;
76 
77  case 0x10:
78  if (startswith(Magic, "\x10\xFF\x10\xAD"))
80  break;
81 
82  case 0xDE: // 0x0B17C0DE = BC wraper
83  if (startswith(Magic, "\xDE\xC0\x17\x0B"))
84  return file_magic::bitcode;
85  break;
86  case 'B':
87  if (startswith(Magic, "BC\xC0\xDE"))
88  return file_magic::bitcode;
89  break;
90  case '!':
91  if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
92  return file_magic::archive;
93  break;
94  case '<':
95  if (startswith(Magic, "<bigaf>\n"))
96  return file_magic::archive;
97  break;
98  case '\177':
99  if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
100  bool Data2MSB = Magic[5] == 2;
101  unsigned high = Data2MSB ? 16 : 17;
102  unsigned low = Data2MSB ? 17 : 16;
103  if (Magic[high] == 0) {
104  switch (Magic[low]) {
105  default:
106  return file_magic::elf;
107  case 1:
109  case 2:
111  case 3:
113  case 4:
114  return file_magic::elf_core;
115  }
116  }
117  // It's still some type of ELF file.
118  return file_magic::elf;
119  }
120  break;
121 
122  case 0xCA:
123  if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
124  startswith(Magic, "\xCA\xFE\xBA\xBF")) {
125  // This is complicated by an overlap with Java class files.
126  // See the Mach-O section in /usr/share/file/magic for details.
127  if (Magic.size() >= 8 && Magic[7] < 43)
129  }
130  break;
131 
132  // The two magic numbers for mach-o are:
133  // 0xfeedface - 32-bit mach-o
134  // 0xfeedfacf - 64-bit mach-o
135  case 0xFE:
136  case 0xCE:
137  case 0xCF: {
138  uint16_t type = 0;
139  if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
140  startswith(Magic, "\xFE\xED\xFA\xCF")) {
141  /* Native endian */
142  size_t MinSize;
143  if (Magic[3] == char(0xCE))
144  MinSize = sizeof(MachO::mach_header);
145  else
146  MinSize = sizeof(MachO::mach_header_64);
147  if (Magic.size() >= MinSize)
148  type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
149  } else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
150  startswith(Magic, "\xCF\xFA\xED\xFE")) {
151  /* Reverse endian */
152  size_t MinSize;
153  if (Magic[0] == char(0xCE))
154  MinSize = sizeof(MachO::mach_header);
155  else
156  MinSize = sizeof(MachO::mach_header_64);
157  if (Magic.size() >= MinSize)
158  type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12];
159  }
160  switch (type) {
161  default:
162  break;
163  case 1:
165  case 2:
167  case 3:
169  case 4:
170  return file_magic::macho_core;
171  case 5:
173  case 6:
175  case 7:
177  case 8:
179  case 9:
181  case 10:
183  case 11:
185  case 12:
187  }
188  break;
189  }
190  case 0xF0: // PowerPC Windows
191  case 0x83: // Alpha 32-bit
192  case 0x84: // Alpha 64-bit
193  case 0x66: // MPS R4000 Windows
194  case 0x50: // mc68K
195  if (startswith(Magic, "\x50\xed\x55\xba"))
197  [[fallthrough]];
198 
199  case 0x4c: // 80386 Windows
200  case 0xc4: // ARMNT Windows
201  if (Magic[1] == 0x01)
203  [[fallthrough]];
204 
205  case 0x90: // PA-RISC Windows
206  case 0x68: // mc68K Windows
207  if (Magic[1] == 0x02)
209  break;
210 
211  case 'M': // Possible MS-DOS stub on Windows PE file, MSF/PDB file or a
212  // Minidump file.
213  if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
214  uint32_t off = read32le(Magic.data() + 0x3c);
215  // PE/COFF file, either EXE or DLL.
216  if (Magic.substr(off).startswith(
219  }
220  if (Magic.startswith("Microsoft C/C++ MSF 7.00\r\n"))
221  return file_magic::pdb;
222  if (startswith(Magic, "MDMP"))
223  return file_magic::minidump;
224  break;
225 
226  case 0x64: // x86-64 or ARM64 Windows.
227  if (Magic[1] == char(0x86) || Magic[1] == char(0xaa))
229  break;
230 
231  case 0x2d: // YAML '-'
232  if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:"))
233  return file_magic::tapi_file;
234  break;
235 
236  case 'D': // DirectX container file - DXBC
237  if (startswith(Magic, "DXBC"))
239  break;
240 
241  case 0x41: // ARM64EC windows
242  if (Magic[1] == char(0xA6))
244  break;
245 
246  default:
247  break;
248  }
249  return file_magic::unknown;
250 }
251 
252 std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
253  auto FileOrError = MemoryBuffer::getFile(Path, /*IsText=*/false,
254  /*RequiresNullTerminator=*/false);
255  if (!FileOrError)
256  return FileOrError.getError();
257 
258  std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
259  Result = identify_magic(FileBuffer->getBuffer());
260 
261  return std::error_code();
262 }
MemoryBuffer.h
llvm::file_magic::unknown
@ unknown
Unrecognized file.
Definition: Magic.h:22
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::file_magic
file_magic - An "enum class" enumeration of file types based on magic (the first N bytes of the file)...
Definition: Magic.h:20
llvm::file_magic::cuda_fatbinary
@ cuda_fatbinary
CUDA Fatbinary object file.
Definition: Magic.h:55
StringRef.h
llvm::file_magic::elf_core
@ elf_core
ELF core image.
Definition: Magic.h:29
llvm::file_magic::pdb
@ pdb
Windows PDB debug info file.
Definition: Magic.h:53
offsetof
#define offsetof(TYPE, MEMBER)
Definition: AMDHSAKernelDescriptor.h:23
llvm::file_magic::macho_dynamic_linker
@ macho_dynamic_linker
The Mach-O dynamic linker.
Definition: Magic.h:37
llvm::file_magic::elf_shared_object
@ elf_shared_object
ELF dynamically linked shared lib.
Definition: Magic.h:28
memcmp
Merge contiguous icmps into a memcmp
Definition: MergeICmps.cpp:903
COFF.h
llvm::file_magic::goff_object
@ goff_object
GOFF object file.
Definition: Magic.h:30
startswith
static bool startswith(StringRef Magic, const char(&S)[N])
Definition: Magic.cpp:28
llvm::file_magic::offload_binary
@ offload_binary
LLVM offload object file.
Definition: Magic.h:56
llvm::COFF::BigObjHeader
Definition: COFF.h:74
llvm::file_magic::minidump
@ minidump
Windows minidump file.
Definition: Magic.h:44
x86
Note that only the low bits of effective_addr2 are used On bit we don t eliminate the computation of the top half of effective_addr2 because we don t have whole function selection dags On x86
Definition: README.txt:318
llvm::MemoryBuffer::getFile
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, Optional< Align > Alignment=None)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Definition: MemoryBuffer.cpp:248
llvm::file_magic::macho_file_set
@ macho_file_set
Mach-O file set binary.
Definition: Magic.h:43
llvm::COFF::PEMagic
static const char PEMagic[]
Definition: COFF.h:35
llvm::file_magic::bitcode
@ bitcode
Bitcode file.
Definition: Magic.h:23
llvm::support::endian
Definition: Endian.h:42
Magic.h
Twine.h
llvm::file_magic::xcoff_object_64
@ xcoff_object_64
64-bit XCOFF object file
Definition: Magic.h:51
llvm::file_magic::macho_preload_executable
@ macho_preload_executable
Mach-O Preloaded Executable.
Definition: Magic.h:35
llvm::file_magic::macho_bundle
@ macho_bundle
Mach-O Bundle file.
Definition: Magic.h:38
llvm::file_magic::macho_core
@ macho_core
Mach-O Core File.
Definition: Magic.h:34
llvm::file_magic::macho_dsym_companion
@ macho_dsym_companion
Mach-O dSYM companion file.
Definition: Magic.h:40
llvm::file_magic::dxcontainer_object
@ dxcontainer_object
DirectX container file.
Definition: Magic.h:57
llvm::COFF::WinResMagic
static const char WinResMagic[]
Definition: COFF.h:48
llvm::sys::fs
Definition: UniqueID.h:24
llvm::file_magic::elf_relocatable
@ elf_relocatable
ELF Relocatable object file.
Definition: Magic.h:26
llvm::file_magic::coff_import_library
@ coff_import_library
COFF import library.
Definition: Magic.h:47
llvm::MachO::mach_header_64
Definition: MachO.h:526
llvm::file_magic::pecoff_executable
@ pecoff_executable
PECOFF executable file.
Definition: Magic.h:48
llvm::file_magic::macho_universal_binary
@ macho_universal_binary
Mach-O universal binary.
Definition: Magic.h:42
llvm::file_magic::macho_dynamically_linked_shared_lib
@ macho_dynamically_linked_shared_lib
Mach-O dynlinked shared lib.
Definition: Magic.h:36
type
AMD64 Optimization Manual has some nice information about optimizing integer multiplication by a constant How much of it applies to Intel s X86 implementation There are definite trade offs to xmm0 cvttss2siq rdx jb L3 subss xmm0 rax cvttss2siq rdx xorq rdx rax ret instead of xmm1 cvttss2siq rcx movaps xmm2 subss xmm2 cvttss2siq rax rdx xorq rax ucomiss xmm0 cmovb rax ret Seems like the jb branch has high likelihood of being taken It would have saved a few instructions It s not possible to reference and DH registers in an instruction requiring REX prefix divb and mulb both produce results in AH If isel emits a CopyFromReg which gets turned into a movb and that can be allocated a r8b r15b To get around isel emits a CopyFromReg from AX and then right shift it down by and truncate it It s not pretty but it works We need some register allocation magic to make the hack go which would often require a callee saved register Callees usually need to keep this value live for most of their body so it doesn t add a significant burden on them We currently implement this in however this is suboptimal because it means that it would be quite awkward to implement the optimization for callers A better implementation would be to relax the LLVM IR rules for sret arguments to allow a function with an sret argument to have a non void return type
Definition: README-X86-64.txt:70
llvm::file_magic::macho_dynamically_linked_shared_lib_stub
@ macho_dynamically_linked_shared_lib_stub
Mach-O Shared lib stub.
Definition: Magic.h:39
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::file_magic::coff_object
@ coff_object
COFF object file.
Definition: Magic.h:46
UUID
std::pair< llvm::MachO::Target, std::string > UUID
Definition: TextStubCommon.h:23
llvm::COFF::BigObjMagic
static const char BigObjMagic[]
Definition: COFF.h:37
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::msf::Magic
static const char Magic[]
Definition: MSFCommon.h:23
llvm::file_magic::macho_kext_bundle
@ macho_kext_bundle
Mach-O kext bundle file.
Definition: Magic.h:41
uint32_t
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::file_magic::macho_fixed_virtual_memory_shared_lib
@ macho_fixed_virtual_memory_shared_lib
Mach-O Shared Lib, FVM.
Definition: Magic.h:33
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
uint16_t
llvm::file_magic::elf
@ elf
ELF Unknown type.
Definition: Magic.h:25
llvm::file_magic::archive
@ archive
ar style archive file
Definition: Magic.h:24
llvm::file_magic::elf_executable
@ elf_executable
ELF Executable image.
Definition: Magic.h:27
llvm::MachO::mach_header
Definition: MachO.h:516
llvm::support::endian::read32le
uint32_t read32le(const void *P)
Definition: Endian.h:381
llvm::COFF::ClGlObjMagic
static const char ClGlObjMagic[]
Definition: COFF.h:42
N
#define N
llvm::file_magic::wasm_object
@ wasm_object
WebAssembly Object file.
Definition: Magic.h:52
llvm::file_magic::coff_cl_gl_object
@ coff_cl_gl_object
Microsoft cl.exe's intermediate code file.
Definition: Magic.h:45
llvm::file_magic::macho_executable
@ macho_executable
Mach-O Executable.
Definition: Magic.h:32
MachO.h
llvm::file_magic::tapi_file
@ tapi_file
Text-based Dynamic Library Stub file.
Definition: Magic.h:54
llvm::identify_magic
file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
Definition: Magic.cpp:33
Endian.h
llvm::file_magic::windows_resource
@ windows_resource
Windows compiled resource file (.res)
Definition: Magic.h:49
llvm::file_magic::macho_object
@ macho_object
Mach-O Object file.
Definition: Magic.h:31
llvm::file_magic::xcoff_object_32
@ xcoff_object_32
32-bit XCOFF object file
Definition: Magic.h:50