LLVM 20.0.0git
ARMFixCortexA57AES1742098Pass.cpp
Go to the documentation of this file.
1//===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This pass works around a Cortex Core Fused AES erratum:
9// - Cortex-A57 Erratum 1742098
10// - Cortex-A72 Erratum 1655431
11//
12// The erratum may be triggered if an input vector register to AESE or AESD was
13// last written by an instruction that only updated 32 bits of it. This can
14// occur for either of the input registers.
15//
16// The workaround chosen is to update the input register using `r = VORRq r, r`,
17// as this updates all 128 bits of the register unconditionally, but does not
18// change the values observed in `r`, making the input safe.
19//
20// This pass has to be conservative in a few cases:
21// - an input vector register to the AES instruction is defined outside the
22// current function, where we have to assume the register was updated in an
23// unsafe way; and
24// - an input vector register to the AES instruction is updated along multiple
25// different control-flow paths, where we have to ensure all the register
26// updating instructions are safe.
27//
28// Both of these cases may apply to a input vector register. In either case, we
29// need to ensure that, when the pass is finished, there exists a safe
30// instruction between every unsafe register updating instruction and the AES
31// instruction.
32//
33//===----------------------------------------------------------------------===//
34
35#include "ARM.h"
36#include "ARMBaseInstrInfo.h"
37#include "ARMBaseRegisterInfo.h"
38#include "ARMSubtarget.h"
39#include "Utils/ARMBaseInfo.h"
40#include "llvm/ADT/STLExtras.h"
43#include "llvm/ADT/StringRef.h"
54#include "llvm/IR/DebugLoc.h"
56#include "llvm/Pass.h"
57#include "llvm/PassRegistry.h"
58#include "llvm/Support/Debug.h"
60#include <assert.h>
61#include <stdint.h>
62
63using namespace llvm;
64
65#define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098"
66
67//===----------------------------------------------------------------------===//
68
69namespace {
70class ARMFixCortexA57AES1742098 : public MachineFunctionPass {
71public:
72 static char ID;
73 explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID) {
75 }
76
78
81 MachineFunctionProperties::Property::NoVRegs);
82 }
83
84 StringRef getPassName() const override {
85 return "ARM fix for Cortex-A57 AES Erratum 1742098";
86 }
87
88 void getAnalysisUsage(AnalysisUsage &AU) const override {
90 AU.setPreservesCFG();
92 }
93
94private:
95 // This is the information needed to insert the fixup in the right place.
96 struct AESFixupLocation {
97 MachineBasicBlock *Block;
98 // The fixup instruction will be inserted *before* InsertionPt.
99 MachineInstr *InsertionPt;
100 MachineOperand *MOp;
101 };
102
103 void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA,
105 SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const;
106
107 void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
108 const ARMBaseRegisterInfo *TRI) const;
109
110 static bool isFirstAESPairInstr(MachineInstr &MI);
111 static bool isSafeAESInput(MachineInstr &MI);
112};
113char ARMFixCortexA57AES1742098::ID = 0;
114
115} // end anonymous namespace
116
117INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE,
118 "ARM fix for Cortex-A57 AES Erratum 1742098", false,
119 false)
121INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE,
122 "ARM fix for Cortex-A57 AES Erratum 1742098", false, false)
123
124//===----------------------------------------------------------------------===//
125
126bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) {
127 unsigned Opc = MI.getOpcode();
128 return Opc == ARM::AESD || Opc == ARM::AESE;
129}
130
131bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) {
132 auto CondCodeIsAL = [](MachineInstr &MI) -> bool {
133 int CCIdx = MI.findFirstPredOperandIdx();
134 if (CCIdx == -1)
135 return false;
136 return MI.getOperand(CCIdx).getImm() == (int64_t)ARMCC::AL;
137 };
138
139 switch (MI.getOpcode()) {
140 // Unknown: Assume not safe.
141 default:
142 return false;
143 // 128-bit wide AES instructions
144 case ARM::AESD:
145 case ARM::AESE:
146 case ARM::AESMC:
147 case ARM::AESIMC:
148 // No CondCode.
149 return true;
150 // 128-bit and 64-bit wide bitwise ops (when condition = al)
151 case ARM::VANDd:
152 case ARM::VANDq:
153 case ARM::VORRd:
154 case ARM::VORRq:
155 case ARM::VEORd:
156 case ARM::VEORq:
157 case ARM::VMVNd:
158 case ARM::VMVNq:
159 // VMOV of 64-bit value between D registers (when condition = al)
160 case ARM::VMOVD:
161 // VMOV of 64 bit value from GPRs (when condition = al)
162 case ARM::VMOVDRR:
163 // VMOV of immediate into D or Q registers (when condition = al)
164 case ARM::VMOVv2i64:
165 case ARM::VMOVv1i64:
166 case ARM::VMOVv2f32:
167 case ARM::VMOVv4f32:
168 case ARM::VMOVv2i32:
169 case ARM::VMOVv4i32:
170 case ARM::VMOVv4i16:
171 case ARM::VMOVv8i16:
172 case ARM::VMOVv8i8:
173 case ARM::VMOVv16i8:
174 // Loads (when condition = al)
175 // VLD Dn, [Rn, #imm]
176 case ARM::VLDRD:
177 // VLDM
178 case ARM::VLDMDDB_UPD:
179 case ARM::VLDMDIA_UPD:
180 case ARM::VLDMDIA:
181 // VLDn to all lanes.
182 case ARM::VLD1d64:
183 case ARM::VLD1q64:
184 case ARM::VLD1d32:
185 case ARM::VLD1q32:
186 case ARM::VLD2b32:
187 case ARM::VLD2d32:
188 case ARM::VLD2q32:
189 case ARM::VLD1d16:
190 case ARM::VLD1q16:
191 case ARM::VLD2d16:
192 case ARM::VLD2q16:
193 case ARM::VLD1d8:
194 case ARM::VLD1q8:
195 case ARM::VLD2b8:
196 case ARM::VLD2d8:
197 case ARM::VLD2q8:
198 case ARM::VLD3d32:
199 case ARM::VLD3q32:
200 case ARM::VLD3d16:
201 case ARM::VLD3q16:
202 case ARM::VLD3d8:
203 case ARM::VLD3q8:
204 case ARM::VLD4d32:
205 case ARM::VLD4q32:
206 case ARM::VLD4d16:
207 case ARM::VLD4q16:
208 case ARM::VLD4d8:
209 case ARM::VLD4q8:
210 // VLD1 (single element to one lane)
211 case ARM::VLD1LNd32:
212 case ARM::VLD1LNd32_UPD:
213 case ARM::VLD1LNd8:
214 case ARM::VLD1LNd8_UPD:
215 case ARM::VLD1LNd16:
216 case ARM::VLD1LNd16_UPD:
217 // VLD1 (single element to all lanes)
218 case ARM::VLD1DUPd32:
219 case ARM::VLD1DUPd32wb_fixed:
220 case ARM::VLD1DUPd32wb_register:
221 case ARM::VLD1DUPd16:
222 case ARM::VLD1DUPd16wb_fixed:
223 case ARM::VLD1DUPd16wb_register:
224 case ARM::VLD1DUPd8:
225 case ARM::VLD1DUPd8wb_fixed:
226 case ARM::VLD1DUPd8wb_register:
227 case ARM::VLD1DUPq32:
228 case ARM::VLD1DUPq32wb_fixed:
229 case ARM::VLD1DUPq32wb_register:
230 case ARM::VLD1DUPq16:
231 case ARM::VLD1DUPq16wb_fixed:
232 case ARM::VLD1DUPq16wb_register:
233 case ARM::VLD1DUPq8:
234 case ARM::VLD1DUPq8wb_fixed:
235 case ARM::VLD1DUPq8wb_register:
236 // VMOV
237 case ARM::VSETLNi32:
238 case ARM::VSETLNi16:
239 case ARM::VSETLNi8:
240 return CondCodeIsAL(MI);
241 };
242
243 return false;
244}
245
246bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) {
247 LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n");
248 auto &STI = F.getSubtarget<ARMSubtarget>();
249
250 // Fix not requested or AES instructions not present: skip pass.
251 if (!STI.hasAES() || !STI.fixCortexA57AES1742098())
252 return false;
253
254 const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo();
255 const ARMBaseInstrInfo *TII = STI.getInstrInfo();
256
257 auto &RDA = getAnalysis<ReachingDefAnalysis>();
258
259 // Analyze whole function to find instructions which need fixing up...
260 SmallVector<AESFixupLocation> FixupLocsForFn{};
261 analyzeMF(F, RDA, TRI, FixupLocsForFn);
262
263 // ... and fix the instructions up all at the same time.
264 bool Changed = false;
265 LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n");
266 for (AESFixupLocation &FixupLoc : FixupLocsForFn) {
267 insertAESFixup(FixupLoc, TII, TRI);
268 Changed |= true;
269 }
270
271 return Changed;
272}
273
274void ARMFixCortexA57AES1742098::analyzeMF(
277 SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const {
278 unsigned MaxAllowedFixups = 0;
279
280 for (MachineBasicBlock &MBB : MF) {
281 for (MachineInstr &MI : MBB) {
282 if (!isFirstAESPairInstr(MI))
283 continue;
284
285 // Found an instruction to check the operands of.
286 LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI);
287 assert(MI.getNumExplicitOperands() == 3 && MI.getNumExplicitDefs() == 1 &&
288 "Unknown AES Instruction Format. Expected 1 def, 2 uses.");
289
290 // A maximum of two fixups should be inserted for each AES pair (one per
291 // register use).
292 MaxAllowedFixups += 2;
293
294 // Inspect all operands, choosing whether to insert a fixup.
295 for (MachineOperand &MOp : MI.uses()) {
297 RDA.getGlobalReachingDefs(&MI, MOp.getReg(), AllDefs);
298
299 // Planned Fixup: This should be added to FixupLocsForFn at most once.
300 AESFixupLocation NewLoc{&MBB, &MI, &MOp};
301
302 // In small functions with loops, this operand may be both a live-in and
303 // have definitions within the function itself. These will need a fixup.
304 bool IsLiveIn = MF.front().isLiveIn(MOp.getReg());
305
306 // If the register doesn't have defining instructions, and is not a
307 // live-in, then something is wrong and the fixup must always be
308 // inserted to be safe.
309 if (!IsLiveIn && AllDefs.size() == 0) {
311 << "Fixup Planned: No Defining Instrs found, not live-in: "
312 << printReg(MOp.getReg(), TRI) << "\n");
313 FixupLocsForFn.emplace_back(NewLoc);
314 continue;
315 }
316
317 auto IsUnsafe = [](MachineInstr *MI) -> bool {
318 return !isSafeAESInput(*MI);
319 };
320 size_t UnsafeCount = llvm::count_if(AllDefs, IsUnsafe);
321
322 // If there are no unsafe definitions...
323 if (UnsafeCount == 0) {
324 // ... and the register is not live-in ...
325 if (!IsLiveIn) {
326 // ... then skip the fixup.
327 LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: "
328 << printReg(MOp.getReg(), TRI) << "\n");
329 continue;
330 }
331
332 // Otherwise, the only unsafe "definition" is a live-in, so insert the
333 // fixup at the start of the function.
335 << "Fixup Planned: Live-In (with safe defining instrs): "
336 << printReg(MOp.getReg(), TRI) << "\n");
337 NewLoc.Block = &MF.front();
338 NewLoc.InsertionPt = &*NewLoc.Block->begin();
339 LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before "
340 << *NewLoc.InsertionPt);
341 FixupLocsForFn.emplace_back(NewLoc);
342 continue;
343 }
344
345 // If a fixup is needed in more than one place, then the best place to
346 // insert it is adjacent to the use rather than introducing a fixup
347 // adjacent to each def.
348 //
349 // FIXME: It might be better to hoist this to the start of the BB, if
350 // possible.
351 if (IsLiveIn || UnsafeCount > 1) {
352 LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs "
353 "(including live-ins): "
354 << printReg(MOp.getReg(), TRI) << "\n");
355 FixupLocsForFn.emplace_back(NewLoc);
356 continue;
357 }
358
359 assert(UnsafeCount == 1 && !IsLiveIn &&
360 "At this point, there should be one unsafe defining instrs "
361 "and the defined register should not be a live-in.");
363 llvm::find_if(AllDefs, IsUnsafe);
364 assert(It != AllDefs.end() &&
365 "UnsafeCount == 1 but No Unsafe MachineInstr found.");
366 MachineInstr *DefMI = *It;
367
369 dbgs() << "Fixup Planned: Found single unsafe defining instrs for "
370 << printReg(MOp.getReg(), TRI) << ": " << *DefMI);
371
372 // There is one unsafe defining instruction, which needs a fixup. It is
373 // generally good to hoist the fixup to be adjacent to the defining
374 // instruction rather than the using instruction, as the using
375 // instruction may be inside a loop when the defining instruction is
376 // not.
378 ++DefIt;
379 if (DefIt != DefMI->getParent()->end()) {
380 LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI
381 << "And immediately before " << *DefIt);
382 NewLoc.Block = DefIt->getParent();
383 NewLoc.InsertionPt = &*DefIt;
384 }
385
386 FixupLocsForFn.emplace_back(NewLoc);
387 }
388 }
389 }
390
391 assert(FixupLocsForFn.size() <= MaxAllowedFixups &&
392 "Inserted too many fixups for this function.");
393 (void)MaxAllowedFixups;
394}
395
396void ARMFixCortexA57AES1742098::insertAESFixup(
397 AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
398 const ARMBaseRegisterInfo *TRI) const {
399 MachineOperand *OperandToFixup = FixupLoc.MOp;
400
401 assert(OperandToFixup->isReg() && "OperandToFixup must be a register");
402 Register RegToFixup = OperandToFixup->getReg();
403
404 LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI)
405 << " before: " << *FixupLoc.InsertionPt);
406
407 // Insert the new `VORRq qN, qN, qN`. There are a few details here:
408 //
409 // The uses are marked as killed, even if the original use of OperandToFixup
410 // is not killed, as the new instruction is clobbering the register. This is
411 // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op
412 // (it is inserted for microarchitectural reasons).
413 //
414 // The def and the uses are still marked as Renamable if the original register
415 // was, to avoid having to rummage through all the other uses and defs and
416 // unset their renamable bits.
417 unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : 0;
418 BuildMI(*FixupLoc.Block, FixupLoc.InsertionPt, DebugLoc(),
419 TII->get(ARM::VORRq))
420 .addReg(RegToFixup, RegState::Define | Renamable)
421 .addReg(RegToFixup, RegState::Kill | Renamable)
422 .addReg(RegToFixup, RegState::Kill | Renamable)
424 .addReg(ARM::NoRegister);
425}
426
427// Factory function used by AArch64TargetMachine to add the pass to
428// the passmanager.
430 return new ARMFixCortexA57AES1742098();
431}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineInstrBuilder MachineInstrBuilder & DefMI
ReachingDefAnalysis & RDA
MachineBasicBlock & MBB
arm execution domain fix
#define LLVM_DEBUG(...)
Definition: Debug.h:106
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
Register getReg() const
getReg - Returns the register number.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
This class provides the reaching def analysis.
void getGlobalReachingDefs(MachineInstr *MI, MCRegister PhysReg, InstSet &Defs) const
Collect all possible definitions of the value stored in PhysReg, which is used by MI.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet.
Definition: SmallPtrSet.h:312
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
void initializeARMFixCortexA57AES1742098Pass(PassRegistry &)
FunctionPass * createARMFixCortexA57AES1742098Pass()
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.