LLVM 20.0.0git
ARMFixCortexA57AES1742098Pass.cpp
Go to the documentation of this file.
1//===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This pass works around a Cortex Core Fused AES erratum:
9// - Cortex-A57 Erratum 1742098
10// - Cortex-A72 Erratum 1655431
11//
12// The erratum may be triggered if an input vector register to AESE or AESD was
13// last written by an instruction that only updated 32 bits of it. This can
14// occur for either of the input registers.
15//
16// The workaround chosen is to update the input register using `r = VORRq r, r`,
17// as this updates all 128 bits of the register unconditionally, but does not
18// change the values observed in `r`, making the input safe.
19//
20// This pass has to be conservative in a few cases:
21// - an input vector register to the AES instruction is defined outside the
22// current function, where we have to assume the register was updated in an
23// unsafe way; and
24// - an input vector register to the AES instruction is updated along multiple
25// different control-flow paths, where we have to ensure all the register
26// updating instructions are safe.
27//
28// Both of these cases may apply to a input vector register. In either case, we
29// need to ensure that, when the pass is finished, there exists a safe
30// instruction between every unsafe register updating instruction and the AES
31// instruction.
32//
33//===----------------------------------------------------------------------===//
34
35#include "ARM.h"
36#include "ARMBaseInstrInfo.h"
37#include "ARMBaseRegisterInfo.h"
38#include "ARMSubtarget.h"
39#include "Utils/ARMBaseInfo.h"
40#include "llvm/ADT/STLExtras.h"
43#include "llvm/ADT/StringRef.h"
54#include "llvm/IR/DebugLoc.h"
56#include "llvm/MC/MCInstrDesc.h"
57#include "llvm/Pass.h"
58#include "llvm/PassRegistry.h"
59#include "llvm/Support/Debug.h"
61#include <assert.h>
62#include <stdint.h>
63
64using namespace llvm;
65
66#define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098"
67
68//===----------------------------------------------------------------------===//
69
70namespace {
71class ARMFixCortexA57AES1742098 : public MachineFunctionPass {
72public:
73 static char ID;
74 explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID) {
76 }
77
79
82 MachineFunctionProperties::Property::NoVRegs);
83 }
84
85 StringRef getPassName() const override {
86 return "ARM fix for Cortex-A57 AES Erratum 1742098";
87 }
88
89 void getAnalysisUsage(AnalysisUsage &AU) const override {
91 AU.setPreservesCFG();
93 }
94
95private:
96 // This is the information needed to insert the fixup in the right place.
97 struct AESFixupLocation {
98 MachineBasicBlock *Block;
99 // The fixup instruction will be inserted *before* InsertionPt.
100 MachineInstr *InsertionPt;
101 MachineOperand *MOp;
102 };
103
104 void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA,
106 SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const;
107
108 void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
109 const ARMBaseRegisterInfo *TRI) const;
110
111 static bool isFirstAESPairInstr(MachineInstr &MI);
112 static bool isSafeAESInput(MachineInstr &MI);
113};
114char ARMFixCortexA57AES1742098::ID = 0;
115
116} // end anonymous namespace
117
118INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE,
119 "ARM fix for Cortex-A57 AES Erratum 1742098", false,
120 false)
122INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE,
123 "ARM fix for Cortex-A57 AES Erratum 1742098", false, false)
124
125//===----------------------------------------------------------------------===//
126
127bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) {
128 unsigned Opc = MI.getOpcode();
129 return Opc == ARM::AESD || Opc == ARM::AESE;
130}
131
132bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) {
133 auto CondCodeIsAL = [](MachineInstr &MI) -> bool {
134 int CCIdx = MI.findFirstPredOperandIdx();
135 if (CCIdx == -1)
136 return false;
137 return MI.getOperand(CCIdx).getImm() == (int64_t)ARMCC::AL;
138 };
139
140 switch (MI.getOpcode()) {
141 // Unknown: Assume not safe.
142 default:
143 return false;
144 // 128-bit wide AES instructions
145 case ARM::AESD:
146 case ARM::AESE:
147 case ARM::AESMC:
148 case ARM::AESIMC:
149 // No CondCode.
150 return true;
151 // 128-bit and 64-bit wide bitwise ops (when condition = al)
152 case ARM::VANDd:
153 case ARM::VANDq:
154 case ARM::VORRd:
155 case ARM::VORRq:
156 case ARM::VEORd:
157 case ARM::VEORq:
158 case ARM::VMVNd:
159 case ARM::VMVNq:
160 // VMOV of 64-bit value between D registers (when condition = al)
161 case ARM::VMOVD:
162 // VMOV of 64 bit value from GPRs (when condition = al)
163 case ARM::VMOVDRR:
164 // VMOV of immediate into D or Q registers (when condition = al)
165 case ARM::VMOVv2i64:
166 case ARM::VMOVv1i64:
167 case ARM::VMOVv2f32:
168 case ARM::VMOVv4f32:
169 case ARM::VMOVv2i32:
170 case ARM::VMOVv4i32:
171 case ARM::VMOVv4i16:
172 case ARM::VMOVv8i16:
173 case ARM::VMOVv8i8:
174 case ARM::VMOVv16i8:
175 // Loads (when condition = al)
176 // VLD Dn, [Rn, #imm]
177 case ARM::VLDRD:
178 // VLDM
179 case ARM::VLDMDDB_UPD:
180 case ARM::VLDMDIA_UPD:
181 case ARM::VLDMDIA:
182 // VLDn to all lanes.
183 case ARM::VLD1d64:
184 case ARM::VLD1q64:
185 case ARM::VLD1d32:
186 case ARM::VLD1q32:
187 case ARM::VLD2b32:
188 case ARM::VLD2d32:
189 case ARM::VLD2q32:
190 case ARM::VLD1d16:
191 case ARM::VLD1q16:
192 case ARM::VLD2d16:
193 case ARM::VLD2q16:
194 case ARM::VLD1d8:
195 case ARM::VLD1q8:
196 case ARM::VLD2b8:
197 case ARM::VLD2d8:
198 case ARM::VLD2q8:
199 case ARM::VLD3d32:
200 case ARM::VLD3q32:
201 case ARM::VLD3d16:
202 case ARM::VLD3q16:
203 case ARM::VLD3d8:
204 case ARM::VLD3q8:
205 case ARM::VLD4d32:
206 case ARM::VLD4q32:
207 case ARM::VLD4d16:
208 case ARM::VLD4q16:
209 case ARM::VLD4d8:
210 case ARM::VLD4q8:
211 // VLD1 (single element to one lane)
212 case ARM::VLD1LNd32:
213 case ARM::VLD1LNd32_UPD:
214 case ARM::VLD1LNd8:
215 case ARM::VLD1LNd8_UPD:
216 case ARM::VLD1LNd16:
217 case ARM::VLD1LNd16_UPD:
218 // VLD1 (single element to all lanes)
219 case ARM::VLD1DUPd32:
220 case ARM::VLD1DUPd32wb_fixed:
221 case ARM::VLD1DUPd32wb_register:
222 case ARM::VLD1DUPd16:
223 case ARM::VLD1DUPd16wb_fixed:
224 case ARM::VLD1DUPd16wb_register:
225 case ARM::VLD1DUPd8:
226 case ARM::VLD1DUPd8wb_fixed:
227 case ARM::VLD1DUPd8wb_register:
228 case ARM::VLD1DUPq32:
229 case ARM::VLD1DUPq32wb_fixed:
230 case ARM::VLD1DUPq32wb_register:
231 case ARM::VLD1DUPq16:
232 case ARM::VLD1DUPq16wb_fixed:
233 case ARM::VLD1DUPq16wb_register:
234 case ARM::VLD1DUPq8:
235 case ARM::VLD1DUPq8wb_fixed:
236 case ARM::VLD1DUPq8wb_register:
237 // VMOV
238 case ARM::VSETLNi32:
239 case ARM::VSETLNi16:
240 case ARM::VSETLNi8:
241 return CondCodeIsAL(MI);
242 };
243
244 return false;
245}
246
247bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) {
248 LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n");
249 auto &STI = F.getSubtarget<ARMSubtarget>();
250
251 // Fix not requested or AES instructions not present: skip pass.
252 if (!STI.hasAES() || !STI.fixCortexA57AES1742098())
253 return false;
254
255 const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo();
256 const ARMBaseInstrInfo *TII = STI.getInstrInfo();
257
258 auto &RDA = getAnalysis<ReachingDefAnalysis>();
259
260 // Analyze whole function to find instructions which need fixing up...
261 SmallVector<AESFixupLocation> FixupLocsForFn{};
262 analyzeMF(F, RDA, TRI, FixupLocsForFn);
263
264 // ... and fix the instructions up all at the same time.
265 bool Changed = false;
266 LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n");
267 for (AESFixupLocation &FixupLoc : FixupLocsForFn) {
268 insertAESFixup(FixupLoc, TII, TRI);
269 Changed |= true;
270 }
271
272 return Changed;
273}
274
275void ARMFixCortexA57AES1742098::analyzeMF(
278 SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const {
279 unsigned MaxAllowedFixups = 0;
280
281 for (MachineBasicBlock &MBB : MF) {
282 for (MachineInstr &MI : MBB) {
283 if (!isFirstAESPairInstr(MI))
284 continue;
285
286 // Found an instruction to check the operands of.
287 LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI);
288 assert(MI.getNumExplicitOperands() == 3 && MI.getNumExplicitDefs() == 1 &&
289 "Unknown AES Instruction Format. Expected 1 def, 2 uses.");
290
291 // A maximum of two fixups should be inserted for each AES pair (one per
292 // register use).
293 MaxAllowedFixups += 2;
294
295 // Inspect all operands, choosing whether to insert a fixup.
296 for (MachineOperand &MOp : MI.uses()) {
298 RDA.getGlobalReachingDefs(&MI, MOp.getReg(), AllDefs);
299
300 // Planned Fixup: This should be added to FixupLocsForFn at most once.
301 AESFixupLocation NewLoc{&MBB, &MI, &MOp};
302
303 // In small functions with loops, this operand may be both a live-in and
304 // have definitions within the function itself. These will need a fixup.
305 bool IsLiveIn = MF.front().isLiveIn(MOp.getReg());
306
307 // If the register doesn't have defining instructions, and is not a
308 // live-in, then something is wrong and the fixup must always be
309 // inserted to be safe.
310 if (!IsLiveIn && AllDefs.size() == 0) {
312 << "Fixup Planned: No Defining Instrs found, not live-in: "
313 << printReg(MOp.getReg(), TRI) << "\n");
314 FixupLocsForFn.emplace_back(NewLoc);
315 continue;
316 }
317
318 auto IsUnsafe = [](MachineInstr *MI) -> bool {
319 return !isSafeAESInput(*MI);
320 };
321 size_t UnsafeCount = llvm::count_if(AllDefs, IsUnsafe);
322
323 // If there are no unsafe definitions...
324 if (UnsafeCount == 0) {
325 // ... and the register is not live-in ...
326 if (!IsLiveIn) {
327 // ... then skip the fixup.
328 LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: "
329 << printReg(MOp.getReg(), TRI) << "\n");
330 continue;
331 }
332
333 // Otherwise, the only unsafe "definition" is a live-in, so insert the
334 // fixup at the start of the function.
336 << "Fixup Planned: Live-In (with safe defining instrs): "
337 << printReg(MOp.getReg(), TRI) << "\n");
338 NewLoc.Block = &MF.front();
339 NewLoc.InsertionPt = &*NewLoc.Block->begin();
340 LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before "
341 << *NewLoc.InsertionPt);
342 FixupLocsForFn.emplace_back(NewLoc);
343 continue;
344 }
345
346 // If a fixup is needed in more than one place, then the best place to
347 // insert it is adjacent to the use rather than introducing a fixup
348 // adjacent to each def.
349 //
350 // FIXME: It might be better to hoist this to the start of the BB, if
351 // possible.
352 if (IsLiveIn || UnsafeCount > 1) {
353 LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs "
354 "(including live-ins): "
355 << printReg(MOp.getReg(), TRI) << "\n");
356 FixupLocsForFn.emplace_back(NewLoc);
357 continue;
358 }
359
360 assert(UnsafeCount == 1 && !IsLiveIn &&
361 "At this point, there should be one unsafe defining instrs "
362 "and the defined register should not be a live-in.");
364 llvm::find_if(AllDefs, IsUnsafe);
365 assert(It != AllDefs.end() &&
366 "UnsafeCount == 1 but No Unsafe MachineInstr found.");
367 MachineInstr *DefMI = *It;
368
370 dbgs() << "Fixup Planned: Found single unsafe defining instrs for "
371 << printReg(MOp.getReg(), TRI) << ": " << *DefMI);
372
373 // There is one unsafe defining instruction, which needs a fixup. It is
374 // generally good to hoist the fixup to be adjacent to the defining
375 // instruction rather than the using instruction, as the using
376 // instruction may be inside a loop when the defining instruction is
377 // not.
379 ++DefIt;
380 if (DefIt != DefMI->getParent()->end()) {
381 LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI
382 << "And immediately before " << *DefIt);
383 NewLoc.Block = DefIt->getParent();
384 NewLoc.InsertionPt = &*DefIt;
385 }
386
387 FixupLocsForFn.emplace_back(NewLoc);
388 }
389 }
390 }
391
392 assert(FixupLocsForFn.size() <= MaxAllowedFixups &&
393 "Inserted too many fixups for this function.");
394 (void)MaxAllowedFixups;
395}
396
397void ARMFixCortexA57AES1742098::insertAESFixup(
398 AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
399 const ARMBaseRegisterInfo *TRI) const {
400 MachineOperand *OperandToFixup = FixupLoc.MOp;
401
402 assert(OperandToFixup->isReg() && "OperandToFixup must be a register");
403 Register RegToFixup = OperandToFixup->getReg();
404
405 LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI)
406 << " before: " << *FixupLoc.InsertionPt);
407
408 // Insert the new `VORRq qN, qN, qN`. There are a few details here:
409 //
410 // The uses are marked as killed, even if the original use of OperandToFixup
411 // is not killed, as the new instruction is clobbering the register. This is
412 // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op
413 // (it is inserted for microarchitectural reasons).
414 //
415 // The def and the uses are still marked as Renamable if the original register
416 // was, to avoid having to rummage through all the other uses and defs and
417 // unset their renamable bits.
418 unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : 0;
419 BuildMI(*FixupLoc.Block, FixupLoc.InsertionPt, DebugLoc(),
420 TII->get(ARM::VORRq))
421 .addReg(RegToFixup, RegState::Define | Renamable)
422 .addReg(RegToFixup, RegState::Kill | Renamable)
423 .addReg(RegToFixup, RegState::Kill | Renamable)
425 .addReg(ARM::NoRegister);
426}
427
428// Factory function used by AArch64TargetMachine to add the pass to
429// the passmanager.
431 return new ARMFixCortexA57AES1742098();
432}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineInstrBuilder MachineInstrBuilder & DefMI
ReachingDefAnalysis & RDA
MachineBasicBlock & MBB
arm execution domain fix
#define LLVM_DEBUG(X)
Definition: Debug.h:101
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
Register getReg() const
getReg - Returns the register number.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
This class provides the reaching def analysis.
void getGlobalReachingDefs(MachineInstr *MI, MCRegister PhysReg, InstSet &Defs) const
Collect all possible definitions of the value stored in PhysReg, which is used by MI.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet.
Definition: SmallPtrSet.h:295
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1928
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
void initializeARMFixCortexA57AES1742098Pass(PassRegistry &)
FunctionPass * createARMFixCortexA57AES1742098Pass()
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.