LLVM  14.0.0git
X86InstrFMA3Info.cpp
Go to the documentation of this file.
1 //===-- X86InstrFMA3Info.cpp - X86 FMA3 Instruction Information -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the implementation of the classes providing information
10 // about existing X86 FMA3 opcodes, classifying and grouping them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86InstrFMA3Info.h"
15 #include "X86InstrInfo.h"
17 #include "llvm/Support/Threading.h"
18 #include <cassert>
19 #include <cstdint>
20 
21 using namespace llvm;
22 
23 #define FMA3GROUP(Name, Suf, Attrs) \
24  { { X86::Name##132##Suf, X86::Name##213##Suf, X86::Name##231##Suf }, Attrs },
25 
26 #define FMA3GROUP_MASKED(Name, Suf, Attrs) \
27  FMA3GROUP(Name, Suf, Attrs) \
28  FMA3GROUP(Name, Suf##k, Attrs | X86InstrFMA3Group::KMergeMasked) \
29  FMA3GROUP(Name, Suf##kz, Attrs | X86InstrFMA3Group::KZeroMasked)
30 
31 #define FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
32  FMA3GROUP_MASKED(Name, Suf##Z128m, Attrs) \
33  FMA3GROUP_MASKED(Name, Suf##Z128r, Attrs) \
34  FMA3GROUP_MASKED(Name, Suf##Z256m, Attrs) \
35  FMA3GROUP_MASKED(Name, Suf##Z256r, Attrs) \
36  FMA3GROUP_MASKED(Name, Suf##Zm, Attrs) \
37  FMA3GROUP_MASKED(Name, Suf##Zr, Attrs) \
38 
39 #define FMA3GROUP_PACKED_WIDTHS_ALL(Name, Suf, Attrs) \
40  FMA3GROUP(Name, Suf##Ym, Attrs) \
41  FMA3GROUP(Name, Suf##Yr, Attrs) \
42  FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
43  FMA3GROUP(Name, Suf##m, Attrs) \
44  FMA3GROUP(Name, Suf##r, Attrs)
45 
46 #define FMA3GROUP_PACKED(Name, Attrs) \
47  FMA3GROUP_PACKED_WIDTHS_ALL(Name, PD, Attrs) \
48  FMA3GROUP_PACKED_WIDTHS_Z(Name, PH, Attrs) \
49  FMA3GROUP_PACKED_WIDTHS_ALL(Name, PS, Attrs)
50 
51 #define FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
52  FMA3GROUP(Name, Suf##Zm, Attrs) \
53  FMA3GROUP_MASKED(Name, Suf##Zm_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
54  FMA3GROUP(Name, Suf##Zr, Attrs) \
55  FMA3GROUP_MASKED(Name, Suf##Zr_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
56 
57 #define FMA3GROUP_SCALAR_WIDTHS_ALL(Name, Suf, Attrs) \
58  FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
59  FMA3GROUP(Name, Suf##m, Attrs) \
60  FMA3GROUP(Name, Suf##m_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
61  FMA3GROUP(Name, Suf##r, Attrs) \
62  FMA3GROUP(Name, Suf##r_Int, Attrs | X86InstrFMA3Group::Intrinsic)
63 
64 #define FMA3GROUP_SCALAR(Name, Attrs) \
65  FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SD, Attrs) \
66  FMA3GROUP_SCALAR_WIDTHS_Z(Name, SH, Attrs) \
67  FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SS, Attrs)
68 
69 #define FMA3GROUP_FULL(Name, Attrs) \
70  FMA3GROUP_PACKED(Name, Attrs) \
71  FMA3GROUP_SCALAR(Name, Attrs)
72 
73 static const X86InstrFMA3Group Groups[] = {
74  FMA3GROUP_FULL(VFMADD, 0)
75  FMA3GROUP_PACKED(VFMADDSUB, 0)
76  FMA3GROUP_FULL(VFMSUB, 0)
77  FMA3GROUP_PACKED(VFMSUBADD, 0)
78  FMA3GROUP_FULL(VFNMADD, 0)
79  FMA3GROUP_FULL(VFNMSUB, 0)
80 };
81 
82 #define FMA3GROUP_PACKED_AVX512_WIDTHS(Name, Type, Suf, Attrs) \
83  FMA3GROUP_MASKED(Name, Type##Z128##Suf, Attrs) \
84  FMA3GROUP_MASKED(Name, Type##Z256##Suf, Attrs) \
85  FMA3GROUP_MASKED(Name, Type##Z##Suf, Attrs)
86 
87 #define FMA3GROUP_PACKED_AVX512(Name, Suf, Attrs) \
88  FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
89  FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PH, Suf, Attrs) \
90  FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
91 
92 #define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs) \
93  FMA3GROUP_MASKED(Name, PDZ##Suf, Attrs) \
94  FMA3GROUP_MASKED(Name, PHZ##Suf, Attrs) \
95  FMA3GROUP_MASKED(Name, PSZ##Suf, Attrs)
96 
97 #define FMA3GROUP_SCALAR_AVX512_ROUND(Name, Suf, Attrs) \
98  FMA3GROUP(Name, SDZ##Suf, Attrs) \
99  FMA3GROUP_MASKED(Name, SDZ##Suf##_Int, Attrs) \
100  FMA3GROUP(Name, SHZ##Suf, Attrs) \
101  FMA3GROUP_MASKED(Name, SHZ##Suf##_Int, Attrs) \
102  FMA3GROUP(Name, SSZ##Suf, Attrs) \
103  FMA3GROUP_MASKED(Name, SSZ##Suf##_Int, Attrs)
104 
106  FMA3GROUP_PACKED_AVX512(VFMADD, mb, 0)
107  FMA3GROUP_PACKED_AVX512(VFMADDSUB, mb, 0)
108  FMA3GROUP_PACKED_AVX512(VFMSUB, mb, 0)
109  FMA3GROUP_PACKED_AVX512(VFMSUBADD, mb, 0)
110  FMA3GROUP_PACKED_AVX512(VFNMADD, mb, 0)
111  FMA3GROUP_PACKED_AVX512(VFNMSUB, mb, 0)
112 };
113 
114 static const X86InstrFMA3Group RoundGroups[] = {
115  FMA3GROUP_PACKED_AVX512_ROUND(VFMADD, rb, 0)
117  FMA3GROUP_PACKED_AVX512_ROUND(VFMADDSUB, rb, 0)
118  FMA3GROUP_PACKED_AVX512_ROUND(VFMSUB, rb, 0)
120  FMA3GROUP_PACKED_AVX512_ROUND(VFMSUBADD, rb, 0)
121  FMA3GROUP_PACKED_AVX512_ROUND(VFNMADD, rb, 0)
123  FMA3GROUP_PACKED_AVX512_ROUND(VFNMSUB, rb, 0)
125 };
126 
127 static void verifyTables() {
128 #ifndef NDEBUG
129  static std::atomic<bool> TableChecked(false);
130  if (!TableChecked.load(std::memory_order_relaxed)) {
132  llvm::is_sorted(BroadcastGroups) && "FMA3 tables not sorted!");
133  TableChecked.store(true, std::memory_order_relaxed);
134  }
135 #endif
136 }
137 
138 /// Returns a reference to a group of FMA3 opcodes to where the given
139 /// \p Opcode is included. If the given \p Opcode is not recognized as FMA3
140 /// and not included into any FMA3 group, then nullptr is returned.
141 const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode, uint64_t TSFlags) {
142 
143  // FMA3 instructions have a well defined encoding pattern we can exploit.
144  uint8_t BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
145  bool IsFMA3Opcode = ((BaseOpcode >= 0x96 && BaseOpcode <= 0x9F) ||
146  (BaseOpcode >= 0xA6 && BaseOpcode <= 0xAF) ||
147  (BaseOpcode >= 0xB6 && BaseOpcode <= 0xBF));
148  bool IsFMA3Encoding = ((TSFlags & X86II::EncodingMask) == X86II::VEX &&
149  (TSFlags & X86II::OpMapMask) == X86II::T8) ||
150  ((TSFlags & X86II::EncodingMask) == X86II::EVEX &&
151  ((TSFlags & X86II::OpMapMask) == X86II::T8 ||
152  (TSFlags & X86II::OpMapMask) == X86II::T_MAP6));
153  bool IsFMA3Prefix = (TSFlags & X86II::OpPrefixMask) == X86II::PD;
154  if (!IsFMA3Opcode || !IsFMA3Encoding || !IsFMA3Prefix)
155  return nullptr;
156 
157  verifyTables();
158 
160  if (TSFlags & X86II::EVEX_RC)
161  Table = makeArrayRef(RoundGroups);
162  else if (TSFlags & X86II::EVEX_B)
163  Table = makeArrayRef(BroadcastGroups);
164  else
165  Table = makeArrayRef(Groups);
166 
167  // FMA 132 instructions have an opcode of 0x96-0x9F
168  // FMA 213 instructions have an opcode of 0xA6-0xAF
169  // FMA 231 instructions have an opcode of 0xB6-0xBF
170  unsigned FormIndex = ((BaseOpcode - 0x90) >> 4) & 0x3;
171 
172  auto I = partition_point(Table, [=](const X86InstrFMA3Group &Group) {
173  return Group.Opcodes[FormIndex] < Opcode;
174  });
175  assert(I != Table.end() && I->Opcodes[FormIndex] == Opcode &&
176  "Couldn't find FMA3 opcode!");
177  return I;
178 }
X86InstrFMA3Info.h
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
ManagedStatic.h
llvm::X86II::PD
@ PD
Definition: X86BaseInfo.h:782
llvm::X86II::getBaseOpcodeFor
uint8_t getBaseOpcodeFor(uint64_t TSFlags)
Definition: X86BaseInfo.h:980
Groups
static const X86InstrFMA3Group Groups[]
Definition: X86InstrFMA3Info.cpp:73
llvm::X86II::EVEX_RC
@ EVEX_RC
Definition: X86BaseInfo.h:957
llvm::X86II::EncodingMask
@ EncodingMask
Definition: X86BaseInfo.h:900
BroadcastGroups
static const X86InstrFMA3Group BroadcastGroups[]
Definition: X86InstrFMA3Info.cpp:105
FMA3GROUP_FULL
#define FMA3GROUP_FULL(Name, Attrs)
Definition: X86InstrFMA3Info.cpp:69
llvm::X86InstrFMA3Group
This class is used to group {132, 213, 231} forms of FMA opcodes together.
Definition: X86InstrFMA3Info.h:24
FMA3GROUP_SCALAR_AVX512_ROUND
#define FMA3GROUP_SCALAR_AVX512_ROUND(Name, Suf, Attrs)
Definition: X86InstrFMA3Info.cpp:97
llvm::X86II::OpMapMask
@ OpMapMask
Definition: X86BaseInfo.h:793
verifyTables
static void verifyTables()
Definition: X86InstrFMA3Info.cpp:127
Threading.h
uint64_t
FMA3GROUP_PACKED_AVX512
#define FMA3GROUP_PACKED_AVX512(Name, Suf, Attrs)
Definition: X86InstrFMA3Info.cpp:87
llvm::getFMA3Group
const X86InstrFMA3Group * getFMA3Group(unsigned Opcode, uint64_t TSFlags)
Returns a reference to a group of FMA3 opcodes to where the given Opcode is included.
Definition: X86InstrFMA3Info.cpp:141
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::X86II::T_MAP6
@ T_MAP6
Definition: X86BaseInfo.h:824
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::X86II::VEX
@ VEX
Definition: X86BaseInfo.h:903
llvm::X86II::EVEX
@ EVEX
Definition: X86BaseInfo.h:912
llvm::X86InstrFMA3Group::Opcodes
uint16_t Opcodes[3]
An array holding 3 forms of FMA opcodes.
Definition: X86InstrFMA3Info.h:26
FMA3GROUP_PACKED_AVX512_ROUND
#define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs)
Definition: X86InstrFMA3Info.cpp:92
llvm::X86InstrFMA3Group::Intrinsic
@ Intrinsic
This bit must be set in the 'Attributes' field of FMA group if such group of FMA opcodes consists of ...
Definition: X86InstrFMA3Info.h:41
FMA3GROUP_PACKED
#define FMA3GROUP_PACKED(Name, Attrs)
Definition: X86InstrFMA3Info.cpp:46
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::X86II::OpPrefixMask
@ OpPrefixMask
Definition: X86BaseInfo.h:778
llvm::X86II::T8
@ T8
Definition: X86BaseInfo.h:803
llvm::partition_point
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:1699
RoundGroups
static const X86InstrFMA3Group RoundGroups[]
Definition: X86InstrFMA3Info.cpp:114
llvm::is_sorted
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1622
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:476
X86InstrInfo.h
llvm::ArrayRef::end
iterator end() const
Definition: ArrayRef.h:154
llvm::X86II::EVEX_B
@ EVEX_B
Definition: X86BaseInfo.h:949