LLVM 20.0.0git
X86InstrFMA3Info.cpp
Go to the documentation of this file.
1//===-- X86InstrFMA3Info.cpp - X86 FMA3 Instruction Information -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the implementation of the classes providing information
10// about existing X86 FMA3 opcodes, classifying and grouping them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86InstrFMA3Info.h"
15#include "X86InstrInfo.h"
16#include <atomic>
17#include <cassert>
18#include <cstdint>
19
20using namespace llvm;
21
22#define FMA3GROUP(Name, Suf, Attrs) \
23 { { X86::Name##132##Suf, X86::Name##213##Suf, X86::Name##231##Suf }, Attrs },
24
25#define FMA3GROUP_MASKED(Name, Suf, Attrs) \
26 FMA3GROUP(Name, Suf, Attrs) \
27 FMA3GROUP(Name, Suf##k, Attrs | X86InstrFMA3Group::KMergeMasked) \
28 FMA3GROUP(Name, Suf##kz, Attrs | X86InstrFMA3Group::KZeroMasked)
29
30#define FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
31 FMA3GROUP_MASKED(Name, Suf##Z128m, Attrs) \
32 FMA3GROUP_MASKED(Name, Suf##Z128r, Attrs) \
33 FMA3GROUP_MASKED(Name, Suf##Z256m, Attrs) \
34 FMA3GROUP_MASKED(Name, Suf##Z256r, Attrs) \
35 FMA3GROUP_MASKED(Name, Suf##Zm, Attrs) \
36 FMA3GROUP_MASKED(Name, Suf##Zr, Attrs) \
37
38#define FMA3GROUP_PACKED_WIDTHS_ALL(Name, Suf, Attrs) \
39 FMA3GROUP(Name, Suf##Ym, Attrs) \
40 FMA3GROUP(Name, Suf##Yr, Attrs) \
41 FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
42 FMA3GROUP(Name, Suf##m, Attrs) \
43 FMA3GROUP(Name, Suf##r, Attrs)
44
45#define FMA3GROUP_PACKED_DHS(Name, Attrs) \
46 FMA3GROUP_PACKED_WIDTHS_ALL(Name, PD, Attrs) \
47 FMA3GROUP_PACKED_WIDTHS_Z(Name, PH, Attrs) \
48 FMA3GROUP_PACKED_WIDTHS_ALL(Name, PS, Attrs)
49
50#define FMA3GROUP_PACKED_BF16(Name, Attrs) \
51 FMA3GROUP_PACKED_WIDTHS_Z(Name, NEPBF16, Attrs)
52
53#define FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
54 FMA3GROUP(Name, Suf##Zm, Attrs) \
55 FMA3GROUP_MASKED(Name, Suf##Zm_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
56 FMA3GROUP(Name, Suf##Zr, Attrs) \
57 FMA3GROUP_MASKED(Name, Suf##Zr_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
58
59#define FMA3GROUP_SCALAR_WIDTHS_ALL(Name, Suf, Attrs) \
60 FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
61 FMA3GROUP(Name, Suf##m, Attrs) \
62 FMA3GROUP(Name, Suf##m_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
63 FMA3GROUP(Name, Suf##r, Attrs) \
64 FMA3GROUP(Name, Suf##r_Int, Attrs | X86InstrFMA3Group::Intrinsic)
65
66#define FMA3GROUP_SCALAR(Name, Attrs) \
67 FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SD, Attrs) \
68 FMA3GROUP_SCALAR_WIDTHS_Z(Name, SH, Attrs) \
69 FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SS, Attrs)
70
71#define FMA3GROUP_FULL(Name, Attrs) \
72 FMA3GROUP_PACKED_BF16(Name, Attrs) \
73 FMA3GROUP_PACKED_DHS(Name, Attrs) \
74 FMA3GROUP_SCALAR(Name, Attrs)
75
76static const X86InstrFMA3Group Groups[] = {
77 FMA3GROUP_FULL(VFMADD, 0)
78 FMA3GROUP_PACKED_DHS(VFMADDSUB, 0)
79 FMA3GROUP_FULL(VFMSUB, 0)
80 FMA3GROUP_PACKED_DHS(VFMSUBADD, 0)
81 FMA3GROUP_FULL(VFNMADD, 0)
82 FMA3GROUP_FULL(VFNMSUB, 0)
83};
84
85#define FMA3GROUP_PACKED_AVX512_WIDTHS(Name, Type, Suf, Attrs) \
86 FMA3GROUP_MASKED(Name, Type##Z128##Suf, Attrs) \
87 FMA3GROUP_MASKED(Name, Type##Z256##Suf, Attrs) \
88 FMA3GROUP_MASKED(Name, Type##Z##Suf, Attrs)
89
90#define FMA3GROUP_PACKED_AVX512_ALL(Name, Suf, Attrs) \
91 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, NEPBF16, Suf, Attrs) \
92 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
93 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PH, Suf, Attrs) \
94 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
95
96#define FMA3GROUP_PACKED_AVX512_DHS(Name, Suf, Attrs) \
97 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
98 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PH, Suf, Attrs) \
99 FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
100
101#define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs) \
102 FMA3GROUP_MASKED(Name, PDZ256##Suf, Attrs) \
103 FMA3GROUP_MASKED(Name, PDZ##Suf, Attrs) \
104 FMA3GROUP_MASKED(Name, PHZ256##Suf, Attrs) \
105 FMA3GROUP_MASKED(Name, PHZ##Suf, Attrs) \
106 FMA3GROUP_MASKED(Name, PSZ256##Suf, Attrs) \
107 FMA3GROUP_MASKED(Name, PSZ##Suf, Attrs)
108
109#define FMA3GROUP_SCALAR_AVX512_ROUND(Name, Suf, Attrs) \
110 FMA3GROUP(Name, SDZ##Suf, Attrs) \
111 FMA3GROUP_MASKED(Name, SDZ##Suf##_Int, Attrs) \
112 FMA3GROUP(Name, SHZ##Suf, Attrs) \
113 FMA3GROUP_MASKED(Name, SHZ##Suf##_Int, Attrs) \
114 FMA3GROUP(Name, SSZ##Suf, Attrs) \
115 FMA3GROUP_MASKED(Name, SSZ##Suf##_Int, Attrs)
116
118 FMA3GROUP_PACKED_AVX512_ALL(VFMADD, mb, 0)
119 FMA3GROUP_PACKED_AVX512_DHS(VFMADDSUB, mb, 0)
120 FMA3GROUP_PACKED_AVX512_ALL(VFMSUB, mb, 0)
121 FMA3GROUP_PACKED_AVX512_DHS(VFMSUBADD, mb, 0)
122 FMA3GROUP_PACKED_AVX512_ALL(VFNMADD, mb, 0)
123 FMA3GROUP_PACKED_AVX512_ALL(VFNMSUB, mb, 0)
124};
125
127 FMA3GROUP_PACKED_AVX512_ROUND(VFMADD, rb, 0)
129 FMA3GROUP_PACKED_AVX512_ROUND(VFMADDSUB, rb, 0)
130 FMA3GROUP_PACKED_AVX512_ROUND(VFMSUB, rb, 0)
132 FMA3GROUP_PACKED_AVX512_ROUND(VFMSUBADD, rb, 0)
133 FMA3GROUP_PACKED_AVX512_ROUND(VFNMADD, rb, 0)
135 FMA3GROUP_PACKED_AVX512_ROUND(VFNMSUB, rb, 0)
137};
138
139static void verifyTables() {
140#ifndef NDEBUG
141 static std::atomic<bool> TableChecked(false);
142 if (!TableChecked.load(std::memory_order_relaxed)) {
144 llvm::is_sorted(BroadcastGroups) && "FMA3 tables not sorted!");
145 TableChecked.store(true, std::memory_order_relaxed);
146 }
147#endif
148}
149
150/// Returns a reference to a group of FMA3 opcodes to where the given
151/// \p Opcode is included. If the given \p Opcode is not recognized as FMA3
152/// and not included into any FMA3 group, then nullptr is returned.
153const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode, uint64_t TSFlags) {
154
155 // FMA3 instructions have a well defined encoding pattern we can exploit.
156 uint8_t BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
157 bool IsFMA3Opcode = ((BaseOpcode >= 0x96 && BaseOpcode <= 0x9F) ||
158 (BaseOpcode >= 0xA6 && BaseOpcode <= 0xAF) ||
159 (BaseOpcode >= 0xB6 && BaseOpcode <= 0xBF));
160 bool IsFMA3Encoding = ((TSFlags & X86II::EncodingMask) == X86II::VEX &&
161 (TSFlags & X86II::OpMapMask) == X86II::T8) ||
162 ((TSFlags & X86II::EncodingMask) == X86II::EVEX &&
163 ((TSFlags & X86II::OpMapMask) == X86II::T8 ||
164 (TSFlags & X86II::OpMapMask) == X86II::T_MAP6));
165 bool IsFMA3Prefix = (TSFlags & X86II::OpPrefixMask) == X86II::PD ||
166 (TSFlags & X86II::OpPrefixMask) == 0; // X86II::PS
167 if (!IsFMA3Opcode || !IsFMA3Encoding || !IsFMA3Prefix)
168 return nullptr;
169
170 verifyTables();
171
173 if (TSFlags & X86II::EVEX_RC)
174 Table = ArrayRef(RoundGroups);
175 else if (TSFlags & X86II::EVEX_B)
176 Table = ArrayRef(BroadcastGroups);
177 else
178 Table = ArrayRef(Groups);
179
180 // FMA 132 instructions have an opcode of 0x96-0x9F
181 // FMA 213 instructions have an opcode of 0xA6-0xAF
182 // FMA 231 instructions have an opcode of 0xB6-0xBF
183 unsigned FormIndex = ((BaseOpcode - 0x90) >> 4) & 0x3;
184
185 auto I = partition_point(Table, [=](const X86InstrFMA3Group &Group) {
186 return Group.Opcodes[FormIndex] < Opcode;
187 });
188 assert(I != Table.end() && I->Opcodes[FormIndex] == Opcode &&
189 "Couldn't find FMA3 opcode!");
190 return I;
191}
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void verifyTables()
#define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs)
#define FMA3GROUP_PACKED_AVX512_ALL(Name, Suf, Attrs)
#define FMA3GROUP_PACKED_AVX512_DHS(Name, Suf, Attrs)
#define FMA3GROUP_SCALAR_AVX512_ROUND(Name, Suf, Attrs)
static const X86InstrFMA3Group RoundGroups[]
static const X86InstrFMA3Group Groups[]
static const X86InstrFMA3Group BroadcastGroups[]
#define FMA3GROUP_PACKED_DHS(Name, Attrs)
#define FMA3GROUP_FULL(Name, Attrs)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
@ EVEX
EVEX - Specifies that this instruction use EVEX form which provides syntax support up to 32 512-bit r...
Definition: X86BaseInfo.h:825
@ VEX
VEX - encoding using 0xC4/0xC5.
Definition: X86BaseInfo.h:818
@ PD
PD - Prefix code for packed double precision vector floating point operations performed in the SSE re...
Definition: X86BaseInfo.h:721
@ T8
T8, TA - Prefix after the 0x0F prefix.
Definition: X86BaseInfo.h:737
uint8_t getBaseOpcodeFor(uint64_t TSFlags)
Definition: X86BaseInfo.h:893
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:2050
const X86InstrFMA3Group * getFMA3Group(unsigned Opcode, uint64_t TSFlags)
Returns a reference to a group of FMA3 opcodes to where the given Opcode is included.
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1926
This class is used to group {132, 213, 231} forms of FMA opcodes together.
@ Intrinsic
This bit must be set in the 'Attributes' field of FMA group if such group of FMA opcodes consists of ...
uint16_t Opcodes[3]
An array holding 3 forms of FMA opcodes.