LLVM  14.0.0git
AArch64FalkorHWPFFix.cpp
Go to the documentation of this file.
1 //===- AArch64FalkorHWPFFix.cpp - Avoid HW prefetcher pitfalls on Falkor --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file For Falkor, we want to avoid HW prefetcher instruction tag collisions
9 /// that may inhibit the HW prefetching. This is done in two steps. Before
10 /// ISel, we mark strided loads (i.e. those that will likely benefit from
11 /// prefetching) with metadata. Then, after opcodes have been finalized, we
12 /// insert MOVs and re-write loads to prevent unintentional tag collisions.
13 // ===---------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
16 #include "AArch64InstrInfo.h"
17 #include "AArch64Subtarget.h"
18 #include "AArch64TargetMachine.h"
19 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/None.h"
22 #include "llvm/ADT/Optional.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/LoopInfo.h"
39 #include "llvm/IR/DebugLoc.h"
40 #include "llvm/IR/Dominators.h"
41 #include "llvm/IR/Function.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/Metadata.h"
45 #include "llvm/InitializePasses.h"
46 #include "llvm/Pass.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/Debug.h"
51 #include <cassert>
52 #include <iterator>
53 #include <utility>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "aarch64-falkor-hwpf-fix"
58 
59 STATISTIC(NumStridedLoadsMarked, "Number of strided loads marked");
60 STATISTIC(NumCollisionsAvoided,
61  "Number of HW prefetch tag collisions avoided");
62 STATISTIC(NumCollisionsNotAvoided,
63  "Number of HW prefetch tag collisions not avoided due to lack of registers");
64 DEBUG_COUNTER(FixCounter, "falkor-hwpf",
65  "Controls which tag collisions are avoided");
66 
67 namespace {
68 
69 class FalkorMarkStridedAccesses {
70 public:
71  FalkorMarkStridedAccesses(LoopInfo &LI, ScalarEvolution &SE)
72  : LI(LI), SE(SE) {}
73 
74  bool run();
75 
76 private:
77  bool runOnLoop(Loop &L);
78 
79  LoopInfo &LI;
80  ScalarEvolution &SE;
81 };
82 
83 class FalkorMarkStridedAccessesLegacy : public FunctionPass {
84 public:
85  static char ID; // Pass ID, replacement for typeid
86 
87  FalkorMarkStridedAccessesLegacy() : FunctionPass(ID) {
90  }
91 
92  void getAnalysisUsage(AnalysisUsage &AU) const override {
99  }
100 
101  bool runOnFunction(Function &F) override;
102 };
103 
104 } // end anonymous namespace
105 
107 
108 INITIALIZE_PASS_BEGIN(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
109  "Falkor HW Prefetch Fix", false, false)
113 INITIALIZE_PASS_END(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
114  "Falkor HW Prefetch Fix", false, false)
115 
117  return new FalkorMarkStridedAccessesLegacy();
118 }
119 
121  TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
122  const AArch64Subtarget *ST =
123  TPC.getTM<AArch64TargetMachine>().getSubtargetImpl(F);
124  if (ST->getProcFamily() != AArch64Subtarget::Falkor)
125  return false;
126 
127  if (skipFunction(F))
128  return false;
129 
130  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
131  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
132 
133  FalkorMarkStridedAccesses LDP(LI, SE);
134  return LDP.run();
135 }
136 
137 bool FalkorMarkStridedAccesses::run() {
138  bool MadeChange = false;
139 
140  for (Loop *L : LI)
141  for (auto LIt = df_begin(L), LE = df_end(L); LIt != LE; ++LIt)
142  MadeChange |= runOnLoop(**LIt);
143 
144  return MadeChange;
145 }
146 
147 bool FalkorMarkStridedAccesses::runOnLoop(Loop &L) {
148  // Only mark strided loads in the inner-most loop
149  if (!L.isInnermost())
150  return false;
151 
152  bool MadeChange = false;
153 
154  for (BasicBlock *BB : L.blocks()) {
155  for (Instruction &I : *BB) {
156  LoadInst *LoadI = dyn_cast<LoadInst>(&I);
157  if (!LoadI)
158  continue;
159 
160  Value *PtrValue = LoadI->getPointerOperand();
161  if (L.isLoopInvariant(PtrValue))
162  continue;
163 
164  const SCEV *LSCEV = SE.getSCEV(PtrValue);
165  const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
166  if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
167  continue;
168 
170  MDNode::get(LoadI->getContext(), {}));
171  ++NumStridedLoadsMarked;
172  LLVM_DEBUG(dbgs() << "Load: " << I << " marked as strided\n");
173  MadeChange = true;
174  }
175  }
176 
177  return MadeChange;
178 }
179 
180 namespace {
181 
182 class FalkorHWPFFix : public MachineFunctionPass {
183 public:
184  static char ID;
185 
186  FalkorHWPFFix() : MachineFunctionPass(ID) {
188  }
189 
190  bool runOnMachineFunction(MachineFunction &Fn) override;
191 
192  void getAnalysisUsage(AnalysisUsage &AU) const override {
193  AU.setPreservesCFG();
196  }
197 
198  MachineFunctionProperties getRequiredProperties() const override {
201  }
202 
203 private:
204  void runOnLoop(MachineLoop &L, MachineFunction &Fn);
205 
206  const AArch64InstrInfo *TII;
207  const TargetRegisterInfo *TRI;
209  bool Modified;
210 };
211 
212 /// Bits from load opcodes used to compute HW prefetcher instruction tags.
213 struct LoadInfo {
214  LoadInfo() = default;
215 
216  Register DestReg;
217  Register BaseReg;
218  int BaseRegIdx = -1;
219  const MachineOperand *OffsetOpnd = nullptr;
220  bool IsPrePost = false;
221 };
222 
223 } // end anonymous namespace
224 
225 char FalkorHWPFFix::ID = 0;
226 
227 INITIALIZE_PASS_BEGIN(FalkorHWPFFix, "aarch64-falkor-hwpf-fix-late",
228  "Falkor HW Prefetch Fix Late Phase", false, false)
230 INITIALIZE_PASS_END(FalkorHWPFFix, "aarch64-falkor-hwpf-fix-late",
231  "Falkor HW Prefetch Fix Late Phase", false, false)
232 
233 static unsigned makeTag(unsigned Dest, unsigned Base, unsigned Offset) {
234  return (Dest & 0xf) | ((Base & 0xf) << 4) | ((Offset & 0x3f) << 8);
235 }
236 
238  int DestRegIdx;
239  int BaseRegIdx;
240  int OffsetIdx;
241  bool IsPrePost;
242 
243  switch (MI.getOpcode()) {
244  default:
245  return None;
246 
247  case AArch64::LD1i64:
248  case AArch64::LD2i64:
249  DestRegIdx = 0;
250  BaseRegIdx = 3;
251  OffsetIdx = -1;
252  IsPrePost = false;
253  break;
254 
255  case AArch64::LD1i8:
256  case AArch64::LD1i16:
257  case AArch64::LD1i32:
258  case AArch64::LD2i8:
259  case AArch64::LD2i16:
260  case AArch64::LD2i32:
261  case AArch64::LD3i8:
262  case AArch64::LD3i16:
263  case AArch64::LD3i32:
264  case AArch64::LD3i64:
265  case AArch64::LD4i8:
266  case AArch64::LD4i16:
267  case AArch64::LD4i32:
268  case AArch64::LD4i64:
269  DestRegIdx = -1;
270  BaseRegIdx = 3;
271  OffsetIdx = -1;
272  IsPrePost = false;
273  break;
274 
275  case AArch64::LD1Onev1d:
276  case AArch64::LD1Onev2s:
277  case AArch64::LD1Onev4h:
278  case AArch64::LD1Onev8b:
279  case AArch64::LD1Onev2d:
280  case AArch64::LD1Onev4s:
281  case AArch64::LD1Onev8h:
282  case AArch64::LD1Onev16b:
283  case AArch64::LD1Rv1d:
284  case AArch64::LD1Rv2s:
285  case AArch64::LD1Rv4h:
286  case AArch64::LD1Rv8b:
287  case AArch64::LD1Rv2d:
288  case AArch64::LD1Rv4s:
289  case AArch64::LD1Rv8h:
290  case AArch64::LD1Rv16b:
291  DestRegIdx = 0;
292  BaseRegIdx = 1;
293  OffsetIdx = -1;
294  IsPrePost = false;
295  break;
296 
297  case AArch64::LD1Twov1d:
298  case AArch64::LD1Twov2s:
299  case AArch64::LD1Twov4h:
300  case AArch64::LD1Twov8b:
301  case AArch64::LD1Twov2d:
302  case AArch64::LD1Twov4s:
303  case AArch64::LD1Twov8h:
304  case AArch64::LD1Twov16b:
305  case AArch64::LD1Threev1d:
306  case AArch64::LD1Threev2s:
307  case AArch64::LD1Threev4h:
308  case AArch64::LD1Threev8b:
309  case AArch64::LD1Threev2d:
310  case AArch64::LD1Threev4s:
311  case AArch64::LD1Threev8h:
312  case AArch64::LD1Threev16b:
313  case AArch64::LD1Fourv1d:
314  case AArch64::LD1Fourv2s:
315  case AArch64::LD1Fourv4h:
316  case AArch64::LD1Fourv8b:
317  case AArch64::LD1Fourv2d:
318  case AArch64::LD1Fourv4s:
319  case AArch64::LD1Fourv8h:
320  case AArch64::LD1Fourv16b:
321  case AArch64::LD2Twov2s:
322  case AArch64::LD2Twov4s:
323  case AArch64::LD2Twov8b:
324  case AArch64::LD2Twov2d:
325  case AArch64::LD2Twov4h:
326  case AArch64::LD2Twov8h:
327  case AArch64::LD2Twov16b:
328  case AArch64::LD2Rv1d:
329  case AArch64::LD2Rv2s:
330  case AArch64::LD2Rv4s:
331  case AArch64::LD2Rv8b:
332  case AArch64::LD2Rv2d:
333  case AArch64::LD2Rv4h:
334  case AArch64::LD2Rv8h:
335  case AArch64::LD2Rv16b:
336  case AArch64::LD3Threev2s:
337  case AArch64::LD3Threev4h:
338  case AArch64::LD3Threev8b:
339  case AArch64::LD3Threev2d:
340  case AArch64::LD3Threev4s:
341  case AArch64::LD3Threev8h:
342  case AArch64::LD3Threev16b:
343  case AArch64::LD3Rv1d:
344  case AArch64::LD3Rv2s:
345  case AArch64::LD3Rv4h:
346  case AArch64::LD3Rv8b:
347  case AArch64::LD3Rv2d:
348  case AArch64::LD3Rv4s:
349  case AArch64::LD3Rv8h:
350  case AArch64::LD3Rv16b:
351  case AArch64::LD4Fourv2s:
352  case AArch64::LD4Fourv4h:
353  case AArch64::LD4Fourv8b:
354  case AArch64::LD4Fourv2d:
355  case AArch64::LD4Fourv4s:
356  case AArch64::LD4Fourv8h:
357  case AArch64::LD4Fourv16b:
358  case AArch64::LD4Rv1d:
359  case AArch64::LD4Rv2s:
360  case AArch64::LD4Rv4h:
361  case AArch64::LD4Rv8b:
362  case AArch64::LD4Rv2d:
363  case AArch64::LD4Rv4s:
364  case AArch64::LD4Rv8h:
365  case AArch64::LD4Rv16b:
366  DestRegIdx = -1;
367  BaseRegIdx = 1;
368  OffsetIdx = -1;
369  IsPrePost = false;
370  break;
371 
372  case AArch64::LD1i64_POST:
373  case AArch64::LD2i64_POST:
374  DestRegIdx = 1;
375  BaseRegIdx = 4;
376  OffsetIdx = 5;
377  IsPrePost = true;
378  break;
379 
380  case AArch64::LD1i8_POST:
381  case AArch64::LD1i16_POST:
382  case AArch64::LD1i32_POST:
383  case AArch64::LD2i8_POST:
384  case AArch64::LD2i16_POST:
385  case AArch64::LD2i32_POST:
386  case AArch64::LD3i8_POST:
387  case AArch64::LD3i16_POST:
388  case AArch64::LD3i32_POST:
389  case AArch64::LD3i64_POST:
390  case AArch64::LD4i8_POST:
391  case AArch64::LD4i16_POST:
392  case AArch64::LD4i32_POST:
393  case AArch64::LD4i64_POST:
394  DestRegIdx = -1;
395  BaseRegIdx = 4;
396  OffsetIdx = 5;
397  IsPrePost = true;
398  break;
399 
400  case AArch64::LD1Onev1d_POST:
401  case AArch64::LD1Onev2s_POST:
402  case AArch64::LD1Onev4h_POST:
403  case AArch64::LD1Onev8b_POST:
404  case AArch64::LD1Onev2d_POST:
405  case AArch64::LD1Onev4s_POST:
406  case AArch64::LD1Onev8h_POST:
407  case AArch64::LD1Onev16b_POST:
408  case AArch64::LD1Rv1d_POST:
409  case AArch64::LD1Rv2s_POST:
410  case AArch64::LD1Rv4h_POST:
411  case AArch64::LD1Rv8b_POST:
412  case AArch64::LD1Rv2d_POST:
413  case AArch64::LD1Rv4s_POST:
414  case AArch64::LD1Rv8h_POST:
415  case AArch64::LD1Rv16b_POST:
416  DestRegIdx = 1;
417  BaseRegIdx = 2;
418  OffsetIdx = 3;
419  IsPrePost = true;
420  break;
421 
422  case AArch64::LD1Twov1d_POST:
423  case AArch64::LD1Twov2s_POST:
424  case AArch64::LD1Twov4h_POST:
425  case AArch64::LD1Twov8b_POST:
426  case AArch64::LD1Twov2d_POST:
427  case AArch64::LD1Twov4s_POST:
428  case AArch64::LD1Twov8h_POST:
429  case AArch64::LD1Twov16b_POST:
430  case AArch64::LD1Threev1d_POST:
431  case AArch64::LD1Threev2s_POST:
432  case AArch64::LD1Threev4h_POST:
433  case AArch64::LD1Threev8b_POST:
434  case AArch64::LD1Threev2d_POST:
435  case AArch64::LD1Threev4s_POST:
436  case AArch64::LD1Threev8h_POST:
437  case AArch64::LD1Threev16b_POST:
438  case AArch64::LD1Fourv1d_POST:
439  case AArch64::LD1Fourv2s_POST:
440  case AArch64::LD1Fourv4h_POST:
441  case AArch64::LD1Fourv8b_POST:
442  case AArch64::LD1Fourv2d_POST:
443  case AArch64::LD1Fourv4s_POST:
444  case AArch64::LD1Fourv8h_POST:
445  case AArch64::LD1Fourv16b_POST:
446  case AArch64::LD2Twov2s_POST:
447  case AArch64::LD2Twov4s_POST:
448  case AArch64::LD2Twov8b_POST:
449  case AArch64::LD2Twov2d_POST:
450  case AArch64::LD2Twov4h_POST:
451  case AArch64::LD2Twov8h_POST:
452  case AArch64::LD2Twov16b_POST:
453  case AArch64::LD2Rv1d_POST:
454  case AArch64::LD2Rv2s_POST:
455  case AArch64::LD2Rv4s_POST:
456  case AArch64::LD2Rv8b_POST:
457  case AArch64::LD2Rv2d_POST:
458  case AArch64::LD2Rv4h_POST:
459  case AArch64::LD2Rv8h_POST:
460  case AArch64::LD2Rv16b_POST:
461  case AArch64::LD3Threev2s_POST:
462  case AArch64::LD3Threev4h_POST:
463  case AArch64::LD3Threev8b_POST:
464  case AArch64::LD3Threev2d_POST:
465  case AArch64::LD3Threev4s_POST:
466  case AArch64::LD3Threev8h_POST:
467  case AArch64::LD3Threev16b_POST:
468  case AArch64::LD3Rv1d_POST:
469  case AArch64::LD3Rv2s_POST:
470  case AArch64::LD3Rv4h_POST:
471  case AArch64::LD3Rv8b_POST:
472  case AArch64::LD3Rv2d_POST:
473  case AArch64::LD3Rv4s_POST:
474  case AArch64::LD3Rv8h_POST:
475  case AArch64::LD3Rv16b_POST:
476  case AArch64::LD4Fourv2s_POST:
477  case AArch64::LD4Fourv4h_POST:
478  case AArch64::LD4Fourv8b_POST:
479  case AArch64::LD4Fourv2d_POST:
480  case AArch64::LD4Fourv4s_POST:
481  case AArch64::LD4Fourv8h_POST:
482  case AArch64::LD4Fourv16b_POST:
483  case AArch64::LD4Rv1d_POST:
484  case AArch64::LD4Rv2s_POST:
485  case AArch64::LD4Rv4h_POST:
486  case AArch64::LD4Rv8b_POST:
487  case AArch64::LD4Rv2d_POST:
488  case AArch64::LD4Rv4s_POST:
489  case AArch64::LD4Rv8h_POST:
490  case AArch64::LD4Rv16b_POST:
491  DestRegIdx = -1;
492  BaseRegIdx = 2;
493  OffsetIdx = 3;
494  IsPrePost = true;
495  break;
496 
497  case AArch64::LDRBBroW:
498  case AArch64::LDRBBroX:
499  case AArch64::LDRBBui:
500  case AArch64::LDRBroW:
501  case AArch64::LDRBroX:
502  case AArch64::LDRBui:
503  case AArch64::LDRDl:
504  case AArch64::LDRDroW:
505  case AArch64::LDRDroX:
506  case AArch64::LDRDui:
507  case AArch64::LDRHHroW:
508  case AArch64::LDRHHroX:
509  case AArch64::LDRHHui:
510  case AArch64::LDRHroW:
511  case AArch64::LDRHroX:
512  case AArch64::LDRHui:
513  case AArch64::LDRQl:
514  case AArch64::LDRQroW:
515  case AArch64::LDRQroX:
516  case AArch64::LDRQui:
517  case AArch64::LDRSBWroW:
518  case AArch64::LDRSBWroX:
519  case AArch64::LDRSBWui:
520  case AArch64::LDRSBXroW:
521  case AArch64::LDRSBXroX:
522  case AArch64::LDRSBXui:
523  case AArch64::LDRSHWroW:
524  case AArch64::LDRSHWroX:
525  case AArch64::LDRSHWui:
526  case AArch64::LDRSHXroW:
527  case AArch64::LDRSHXroX:
528  case AArch64::LDRSHXui:
529  case AArch64::LDRSWl:
530  case AArch64::LDRSWroW:
531  case AArch64::LDRSWroX:
532  case AArch64::LDRSWui:
533  case AArch64::LDRSl:
534  case AArch64::LDRSroW:
535  case AArch64::LDRSroX:
536  case AArch64::LDRSui:
537  case AArch64::LDRWl:
538  case AArch64::LDRWroW:
539  case AArch64::LDRWroX:
540  case AArch64::LDRWui:
541  case AArch64::LDRXl:
542  case AArch64::LDRXroW:
543  case AArch64::LDRXroX:
544  case AArch64::LDRXui:
545  case AArch64::LDURBBi:
546  case AArch64::LDURBi:
547  case AArch64::LDURDi:
548  case AArch64::LDURHHi:
549  case AArch64::LDURHi:
550  case AArch64::LDURQi:
551  case AArch64::LDURSBWi:
552  case AArch64::LDURSBXi:
553  case AArch64::LDURSHWi:
554  case AArch64::LDURSHXi:
555  case AArch64::LDURSWi:
556  case AArch64::LDURSi:
557  case AArch64::LDURWi:
558  case AArch64::LDURXi:
559  DestRegIdx = 0;
560  BaseRegIdx = 1;
561  OffsetIdx = 2;
562  IsPrePost = false;
563  break;
564 
565  case AArch64::LDRBBpost:
566  case AArch64::LDRBBpre:
567  case AArch64::LDRBpost:
568  case AArch64::LDRBpre:
569  case AArch64::LDRDpost:
570  case AArch64::LDRDpre:
571  case AArch64::LDRHHpost:
572  case AArch64::LDRHHpre:
573  case AArch64::LDRHpost:
574  case AArch64::LDRHpre:
575  case AArch64::LDRQpost:
576  case AArch64::LDRQpre:
577  case AArch64::LDRSBWpost:
578  case AArch64::LDRSBWpre:
579  case AArch64::LDRSBXpost:
580  case AArch64::LDRSBXpre:
581  case AArch64::LDRSHWpost:
582  case AArch64::LDRSHWpre:
583  case AArch64::LDRSHXpost:
584  case AArch64::LDRSHXpre:
585  case AArch64::LDRSWpost:
586  case AArch64::LDRSWpre:
587  case AArch64::LDRSpost:
588  case AArch64::LDRSpre:
589  case AArch64::LDRWpost:
590  case AArch64::LDRWpre:
591  case AArch64::LDRXpost:
592  case AArch64::LDRXpre:
593  DestRegIdx = 1;
594  BaseRegIdx = 2;
595  OffsetIdx = 3;
596  IsPrePost = true;
597  break;
598 
599  case AArch64::LDNPDi:
600  case AArch64::LDNPQi:
601  case AArch64::LDNPSi:
602  case AArch64::LDPQi:
603  case AArch64::LDPDi:
604  case AArch64::LDPSi:
605  DestRegIdx = -1;
606  BaseRegIdx = 2;
607  OffsetIdx = 3;
608  IsPrePost = false;
609  break;
610 
611  case AArch64::LDPSWi:
612  case AArch64::LDPWi:
613  case AArch64::LDPXi:
614  DestRegIdx = 0;
615  BaseRegIdx = 2;
616  OffsetIdx = 3;
617  IsPrePost = false;
618  break;
619 
620  case AArch64::LDPQpost:
621  case AArch64::LDPQpre:
622  case AArch64::LDPDpost:
623  case AArch64::LDPDpre:
624  case AArch64::LDPSpost:
625  case AArch64::LDPSpre:
626  DestRegIdx = -1;
627  BaseRegIdx = 3;
628  OffsetIdx = 4;
629  IsPrePost = true;
630  break;
631 
632  case AArch64::LDPSWpost:
633  case AArch64::LDPSWpre:
634  case AArch64::LDPWpost:
635  case AArch64::LDPWpre:
636  case AArch64::LDPXpost:
637  case AArch64::LDPXpre:
638  DestRegIdx = 1;
639  BaseRegIdx = 3;
640  OffsetIdx = 4;
641  IsPrePost = true;
642  break;
643  }
644 
645  // Loads from the stack pointer don't get prefetched.
646  Register BaseReg = MI.getOperand(BaseRegIdx).getReg();
647  if (BaseReg == AArch64::SP || BaseReg == AArch64::WSP)
648  return None;
649 
650  LoadInfo LI;
651  LI.DestReg = DestRegIdx == -1 ? Register() : MI.getOperand(DestRegIdx).getReg();
652  LI.BaseReg = BaseReg;
653  LI.BaseRegIdx = BaseRegIdx;
654  LI.OffsetOpnd = OffsetIdx == -1 ? nullptr : &MI.getOperand(OffsetIdx);
655  LI.IsPrePost = IsPrePost;
656  return LI;
657 }
658 
660  const MachineInstr &MI, const LoadInfo &LI) {
661  unsigned Dest = LI.DestReg ? TRI->getEncodingValue(LI.DestReg) : 0;
662  unsigned Base = TRI->getEncodingValue(LI.BaseReg);
663  unsigned Off;
664  if (LI.OffsetOpnd == nullptr)
665  Off = 0;
666  else if (LI.OffsetOpnd->isGlobal() || LI.OffsetOpnd->isSymbol() ||
667  LI.OffsetOpnd->isCPI())
668  return None;
669  else if (LI.OffsetOpnd->isReg())
670  Off = (1 << 5) | TRI->getEncodingValue(LI.OffsetOpnd->getReg());
671  else
672  Off = LI.OffsetOpnd->getImm() >> 2;
673 
674  return makeTag(Dest, Base, Off);
675 }
676 
677 void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) {
678  // Build the initial tag map for the whole loop.
679  TagMap.clear();
680  for (MachineBasicBlock *MBB : L.getBlocks())
681  for (MachineInstr &MI : *MBB) {
683  if (!LInfo)
684  continue;
685  Optional<unsigned> Tag = getTag(TRI, MI, *LInfo);
686  if (!Tag)
687  continue;
688  TagMap[*Tag].push_back(&MI);
689  }
690 
691  bool AnyCollisions = false;
692  for (auto &P : TagMap) {
693  auto Size = P.second.size();
694  if (Size > 1) {
695  for (auto *MI : P.second) {
696  if (TII->isStridedAccess(*MI)) {
697  AnyCollisions = true;
698  break;
699  }
700  }
701  }
702  if (AnyCollisions)
703  break;
704  }
705  // Nothing to fix.
706  if (!AnyCollisions)
707  return;
708 
710 
711  // Go through all the basic blocks in the current loop and fix any streaming
712  // loads to avoid collisions with any other loads.
713  LiveRegUnits LR(*TRI);
714  for (MachineBasicBlock *MBB : L.getBlocks()) {
715  LR.clear();
716  LR.addLiveOuts(*MBB);
717  for (auto I = MBB->rbegin(); I != MBB->rend(); LR.stepBackward(*I), ++I) {
718  MachineInstr &MI = *I;
719  if (!TII->isStridedAccess(MI))
720  continue;
721 
723  if (!OptLdI)
724  continue;
725  LoadInfo LdI = *OptLdI;
726  Optional<unsigned> OptOldTag = getTag(TRI, MI, LdI);
727  if (!OptOldTag)
728  continue;
729  auto &OldCollisions = TagMap[*OptOldTag];
730  if (OldCollisions.size() <= 1)
731  continue;
732 
733  bool Fixed = false;
734  LLVM_DEBUG(dbgs() << "Attempting to fix tag collision: " << MI);
735 
736  if (!DebugCounter::shouldExecute(FixCounter)) {
737  LLVM_DEBUG(dbgs() << "Skipping fix due to debug counter:\n " << MI);
738  continue;
739  }
740 
741  // Add the non-base registers of MI as live so we don't use them as
742  // scratch registers.
743  for (unsigned OpI = 0, OpE = MI.getNumOperands(); OpI < OpE; ++OpI) {
744  if (OpI == static_cast<unsigned>(LdI.BaseRegIdx))
745  continue;
746  MachineOperand &MO = MI.getOperand(OpI);
747  if (MO.isReg() && MO.readsReg())
748  LR.addReg(MO.getReg());
749  }
750 
751  for (unsigned ScratchReg : AArch64::GPR64RegClass) {
752  if (!LR.available(ScratchReg) || MRI.isReserved(ScratchReg))
753  continue;
754 
755  LoadInfo NewLdI(LdI);
756  NewLdI.BaseReg = ScratchReg;
757  unsigned NewTag = *getTag(TRI, MI, NewLdI);
758  // Scratch reg tag would collide too, so don't use it.
759  if (TagMap.count(NewTag))
760  continue;
761 
762  LLVM_DEBUG(dbgs() << "Changing base reg to: "
763  << printReg(ScratchReg, TRI) << '\n');
764 
765  // Rewrite:
766  // Xd = LOAD Xb, off
767  // to:
768  // Xc = MOV Xb
769  // Xd = LOAD Xc, off
770  DebugLoc DL = MI.getDebugLoc();
771  BuildMI(*MBB, &MI, DL, TII->get(AArch64::ORRXrs), ScratchReg)
772  .addReg(AArch64::XZR)
773  .addReg(LdI.BaseReg)
774  .addImm(0);
775  MachineOperand &BaseOpnd = MI.getOperand(LdI.BaseRegIdx);
776  BaseOpnd.setReg(ScratchReg);
777 
778  // If the load does a pre/post increment, then insert a MOV after as
779  // well to update the real base register.
780  if (LdI.IsPrePost) {
781  LLVM_DEBUG(dbgs() << "Doing post MOV of incremented reg: "
782  << printReg(ScratchReg, TRI) << '\n');
783  MI.getOperand(0).setReg(
784  ScratchReg); // Change tied operand pre/post update dest.
786  TII->get(AArch64::ORRXrs), LdI.BaseReg)
787  .addReg(AArch64::XZR)
788  .addReg(ScratchReg)
789  .addImm(0);
790  }
791 
792  for (int I = 0, E = OldCollisions.size(); I != E; ++I)
793  if (OldCollisions[I] == &MI) {
794  std::swap(OldCollisions[I], OldCollisions[E - 1]);
795  OldCollisions.pop_back();
796  break;
797  }
798 
799  // Update TagMap to reflect instruction changes to reduce the number
800  // of later MOVs to be inserted. This needs to be done after
801  // OldCollisions is updated since it may be relocated by this
802  // insertion.
803  TagMap[NewTag].push_back(&MI);
804  ++NumCollisionsAvoided;
805  Fixed = true;
806  Modified = true;
807  break;
808  }
809  if (!Fixed)
810  ++NumCollisionsNotAvoided;
811  }
812  }
813 }
814 
815 bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) {
816  auto &ST = static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
817  if (ST.getProcFamily() != AArch64Subtarget::Falkor)
818  return false;
819 
820  if (skipFunction(Fn.getFunction()))
821  return false;
822 
823  TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
824  TRI = ST.getRegisterInfo();
825 
826  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
827 
828  Modified = false;
829 
830  for (MachineLoop *I : LI)
831  for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
832  // Only process inner-loops
833  if (L->isInnermost())
834  runOnLoop(**L, Fn);
835 
836  return Modified;
837 }
838 
839 FunctionPass *llvm::createFalkorHWPFFixPass() { return new FalkorHWPFFix(); }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
FALKOR_STRIDED_ACCESS_MD
#define FALKOR_STRIDED_ACCESS_MD
Definition: AArch64InstrInfo.h:36
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
Phase
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
Definition: AArch64FalkorHWPFFix.cpp:231
AArch64.h
Optional.h
Metadata.h
llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition: ScalarEvolutionExpressions.h:379
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AArch64FalkorHWPFFix.cpp:57
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
fix
arm execution domain fix
Definition: ARMTargetMachine.cpp:391
Pass.h
Statistic.h
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::df_end
df_iterator< T > df_end(const T &G)
Definition: DepthFirstIterator.h:223
MachineBasicBlock.h
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
llvm::MCRegisterInfo::getEncodingValue
uint16_t getEncodingValue(MCRegister RegNo) const
Returns the encoding for RegNo.
Definition: MCRegisterInfo.h:553
ScalarEvolution.h
DenseMap.h
llvm::AArch64ISD::LDP
@ LDP
Definition: AArch64ISelLowering.h:446
late
aarch64 falkor hwpf fix late
Definition: AArch64FalkorHWPFFix.cpp:230
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1268
llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:111
llvm::Optional
Definition: APInt.h:33
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
getLoadInfo
static Optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
Definition: AArch64FalkorHWPFFix.cpp:237
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:267
llvm::dwarf::Tag
Tag
Definition: Dwarf.h:104
getTag
static Optional< unsigned > getTag(const TargetRegisterInfo *TRI, const MachineInstr &MI, const LoadInfo &LI)
Definition: AArch64FalkorHWPFFix.cpp:659
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::initializeFalkorHWPFFixPass
void initializeFalkorHWPFFixPass(PassRegistry &)
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1233
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
MachineRegisterInfo.h
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:640
MachineLoopInfo.h
AArch64TargetMachine.h
AArch64InstrInfo.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::AArch64InstrInfo
Definition: AArch64InstrInfo.h:38
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineRegisterInfo::isReserved
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
Definition: MachineRegisterInfo.h:928
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
Prefetch
A record for a potential prefetch made during the initial scan of the loop.
Definition: LoopDataPrefetch.cpp:235
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::MachineFunctionProperties::set
MachineFunctionProperties & set(Property P)
Definition: MachineFunction.h:173
llvm::Instruction
Definition: Instruction.h:45
llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:74
llvm::MachineBasicBlock::rend
reverse_iterator rend()
Definition: MachineBasicBlock.h:278
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::AArch64TargetMachine
Definition: AArch64TargetMachine.h:25
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
LoopDeletionResult::Modified
@ Modified
llvm::AArch64CC::LE
@ LE
Definition: AArch64BaseInfo.h:268
DEBUG_COUNTER
DEBUG_COUNTER(FixCounter, "falkor-hwpf", "Controls which tag collisions are avoided")
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2084
DebugLoc.h
llvm::LiveRegUnits
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE, "Falkor HW Prefetch Fix", false, false) INITIALIZE_PASS_END(FalkorMarkStridedAccessesLegacy
llvm::None
const NoneType None
Definition: None.h:23
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
makeTag
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late static false unsigned makeTag(unsigned Dest, unsigned Base, unsigned Offset)
Definition: AArch64FalkorHWPFFix.cpp:233
llvm::MachineFunctionProperties::Property::NoVRegs
@ NoVRegs
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4090
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:630
llvm::createFalkorHWPFFixPass
FunctionPass * createFalkorHWPFFixPass()
Definition: AArch64FalkorHWPFFix.cpp:839
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
llvm::LoadInfo
Definition: GVNHoist.cpp:182
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
TargetPassConfig.h
llvm::df_begin
df_iterator< T > df_begin(const T &G)
Definition: DepthFirstIterator.h:218
MachineFunctionPass.h
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::LoopInfo
Definition: LoopInfo.h:1083
None.h
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::MachineBasicBlock::rbegin
reverse_iterator rbegin()
Definition: MachineBasicBlock.h:272
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:990
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetPassConfig::getTM
TMC & getTM() const
Get the right type of TargetMachine for this target.
Definition: TargetPassConfig.h:151
LiveRegUnits.h
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:596
llvm::initializeFalkorMarkStridedAccessesLegacyPass
void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry &)
llvm::MachineOperand::readsReg
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
Definition: MachineOperand.h:458
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
Casting.h
Function.h
DebugCounter.h
ScalarEvolutionExpressions.h
Instructions.h
AArch64Subtarget.h
SmallVector.h
MachineInstrBuilder.h
Dominators.h
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
Fix
Falkor HW Prefetch Fix
Definition: AArch64FalkorHWPFFix.cpp:114
MachineOperand.h
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::AArch64Subtarget
Definition: AArch64Subtarget.h:38
raw_ostream.h
MachineFunction.h
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:110
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::createFalkorMarkStridedAccessesPass
FunctionPass * createFalkorMarkStridedAccessesPass()
Definition: AArch64FalkorHWPFFix.cpp:116
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::AArch64Subtarget::Falkor
@ Falkor
Definition: AArch64Subtarget.h:66
TargetRegisterInfo.h
Debug.h
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::FloatStyle::Fixed
@ Fixed
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37