LLVM  9.0.0svn
SystemZTDC.cpp
Go to the documentation of this file.
1 //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass looks for instructions that can be replaced by a Test Data Class
10 // instruction, and replaces them when profitable.
11 //
12 // Roughly, the following rules are recognized:
13 //
14 // 1: fcmp pred X, 0 -> tdc X, mask
15 // 2: fcmp pred X, +-inf -> tdc X, mask
16 // 3: fcmp pred X, +-minnorm -> tdc X, mask
17 // 4: tdc (fabs X), mask -> tdc X, newmask
18 // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
19 // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
20 // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
21 // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
22 // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
23 // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
24 //
25 // The pass works in 4 steps:
26 //
27 // 1. All fcmp and icmp instructions in a function are checked for a match
28 // with rules 1-3 and 5-7. Their TDC equivalents are stored in
29 // the ConvertedInsts mapping. If the operand of a fcmp instruction is
30 // a fabs, it's also folded according to rule 4.
31 // 2. All and/or/xor i1 instructions whose both operands have been already
32 // mapped are mapped according to rules 8-10. LogicOpsWorklist is used
33 // as a queue of instructions to check.
34 // 3. All mapped instructions that are considered worthy of conversion (ie.
35 // replacing them will actually simplify the final code) are replaced
36 // with a call to the s390.tdc intrinsic.
37 // 4. All intermediate results of replaced instructions are removed if unused.
38 //
39 // Instructions that match rules 1-3 are considered unworthy of conversion
40 // on their own (since a comparison instruction is superior), but are mapped
41 // in the hopes of folding the result using rules 4 and 8-10 (likely removing
42 // the original comparison in the process).
43 //
44 //===----------------------------------------------------------------------===//
45 
46 #include "SystemZ.h"
47 #include "llvm/ADT/MapVector.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/IRBuilder.h"
50 #include "llvm/IR/InstIterator.h"
51 #include "llvm/IR/Instructions.h"
52 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/Module.h"
55 #include <deque>
56 #include <set>
57 
58 using namespace llvm;
59 
60 namespace llvm {
62 }
63 
64 namespace {
65 
66 class SystemZTDCPass : public FunctionPass {
67 public:
68  static char ID;
69  SystemZTDCPass() : FunctionPass(ID) {
71  }
72 
73  bool runOnFunction(Function &F) override;
74 private:
75  // Maps seen instructions that can be mapped to a TDC, values are
76  // (TDC operand, TDC mask, worthy flag) triples.
78  // The queue of and/or/xor i1 instructions to be potentially folded.
79  std::vector<BinaryOperator *> LogicOpsWorklist;
80  // Instructions matched while folding, to be removed at the end if unused.
81  std::set<Instruction *> PossibleJunk;
82 
83  // Tries to convert a fcmp instruction.
84  void convertFCmp(CmpInst &I);
85 
86  // Tries to convert an icmp instruction.
87  void convertICmp(CmpInst &I);
88 
89  // Tries to convert an i1 and/or/xor instruction, whose both operands
90  // have been already converted.
91  void convertLogicOp(BinaryOperator &I);
92 
93  // Marks an instruction as converted - adds it to ConvertedInsts and adds
94  // any and/or/xor i1 users to the queue.
95  void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
96  ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
97  auto &M = *I->getFunction()->getParent();
98  auto &Ctx = M.getContext();
99  for (auto *U : I->users()) {
100  auto *LI = dyn_cast<BinaryOperator>(U);
101  if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
102  (LI->getOpcode() == Instruction::And ||
103  LI->getOpcode() == Instruction::Or ||
104  LI->getOpcode() == Instruction::Xor)) {
105  LogicOpsWorklist.push_back(LI);
106  }
107  }
108  }
109 };
110 
111 } // end anonymous namespace
112 
113 char SystemZTDCPass::ID = 0;
114 INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
115  "SystemZ Test Data Class optimization", false, false)
116 
118  return new SystemZTDCPass();
119 }
120 
121 void SystemZTDCPass::convertFCmp(CmpInst &I) {
122  Value *Op0 = I.getOperand(0);
123  auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
124  auto Pred = I.getPredicate();
125  // Only comparisons with consts are interesting.
126  if (!Const)
127  return;
128  // Compute the smallest normal number (and its negation).
129  auto &Sem = Op0->getType()->getFltSemantics();
130  APFloat Smallest = APFloat::getSmallestNormalized(Sem);
131  APFloat NegSmallest = Smallest;
132  NegSmallest.changeSign();
133  // Check if Const is one of our recognized consts.
134  int WhichConst;
135  if (Const->isZero()) {
136  // All comparisons with 0 can be converted.
137  WhichConst = 0;
138  } else if (Const->isInfinity()) {
139  // Likewise for infinities.
140  WhichConst = Const->isNegative() ? 2 : 1;
141  } else if (Const->isExactlyValue(Smallest)) {
142  // For Smallest, we cannot do EQ separately from GT.
143  if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
144  (Pred & CmpInst::FCMP_OGE) != 0)
145  return;
146  WhichConst = 3;
147  } else if (Const->isExactlyValue(NegSmallest)) {
148  // Likewise for NegSmallest, we cannot do EQ separately from LT.
149  if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
150  (Pred & CmpInst::FCMP_OLE) != 0)
151  return;
152  WhichConst = 4;
153  } else {
154  // Not one of our special constants.
155  return;
156  }
157  // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
158  static const int Masks[][4] = {
159  { // 0
160  SystemZ::TDCMASK_ZERO, // eq
163  SystemZ::TDCMASK_NAN, // un
164  },
165  { // inf
167  0, // gt
172  SystemZ::TDCMASK_NAN, // un
173  },
174  { // -inf
180  0, // lt
181  SystemZ::TDCMASK_NAN, // un
182  },
183  { // minnorm
184  0, // eq (unsupported)
186  SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge)
190  SystemZ::TDCMASK_NAN, // un
191  },
192  { // -minnorm
193  0, // eq (unsupported)
198  SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le)
199  SystemZ::TDCMASK_NAN, // un
200  }
201  };
202  // Construct the mask as a combination of the partial masks.
203  int Mask = 0;
204  if (Pred & CmpInst::FCMP_OEQ)
205  Mask |= Masks[WhichConst][0];
206  if (Pred & CmpInst::FCMP_OGT)
207  Mask |= Masks[WhichConst][1];
208  if (Pred & CmpInst::FCMP_OLT)
209  Mask |= Masks[WhichConst][2];
210  if (Pred & CmpInst::FCMP_UNO)
211  Mask |= Masks[WhichConst][3];
212  // A lone fcmp is unworthy of tdc conversion on its own, but may become
213  // worthy if combined with fabs.
214  bool Worthy = false;
215  if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
216  Function *F = CI->getCalledFunction();
217  if (F && F->getIntrinsicID() == Intrinsic::fabs) {
218  // Fold with fabs - adjust the mask appropriately.
219  Mask &= SystemZ::TDCMASK_PLUS;
220  Mask |= Mask >> 1;
221  Op0 = CI->getArgOperand(0);
222  // A combination of fcmp with fabs is a win, unless the constant
223  // involved is 0 (which is handled by later passes).
224  Worthy = WhichConst != 0;
225  PossibleJunk.insert(CI);
226  }
227  }
228  converted(&I, Op0, Mask, Worthy);
229 }
230 
231 void SystemZTDCPass::convertICmp(CmpInst &I) {
232  Value *Op0 = I.getOperand(0);
233  auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
234  auto Pred = I.getPredicate();
235  // All our icmp rules involve comparisons with consts.
236  if (!Const)
237  return;
238  if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
239  // Check for icmp+bitcast used for signbit.
240  if (!Cast->getSrcTy()->isFloatTy() &&
241  !Cast->getSrcTy()->isDoubleTy() &&
242  !Cast->getSrcTy()->isFP128Ty())
243  return;
244  Value *V = Cast->getOperand(0);
245  int Mask;
246  if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
247  // icmp slt (bitcast X), 0 - set if sign bit true
248  Mask = SystemZ::TDCMASK_MINUS;
249  } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
250  // icmp sgt (bitcast X), -1 - set if sign bit false
251  Mask = SystemZ::TDCMASK_PLUS;
252  } else {
253  // Not a sign bit check.
254  return;
255  }
256  PossibleJunk.insert(Cast);
257  converted(&I, V, Mask, true);
258  } else if (auto *CI = dyn_cast<CallInst>(Op0)) {
259  // Check if this is a pre-existing call of our tdc intrinsic.
260  Function *F = CI->getCalledFunction();
261  if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
262  return;
263  if (!Const->isZero())
264  return;
265  Value *V = CI->getArgOperand(0);
266  auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
267  // Bail if the mask is not a constant.
268  if (!MaskC)
269  return;
270  int Mask = MaskC->getZExtValue();
271  Mask &= SystemZ::TDCMASK_ALL;
272  if (Pred == CmpInst::ICMP_NE) {
273  // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
274  } else if (Pred == CmpInst::ICMP_EQ) {
275  // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
276  Mask ^= SystemZ::TDCMASK_ALL;
277  } else {
278  // An unknown comparison - ignore.
279  return;
280  }
281  PossibleJunk.insert(CI);
282  converted(&I, V, Mask, false);
283  }
284 }
285 
286 void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
287  Value *Op0, *Op1;
288  int Mask0, Mask1;
289  bool Worthy0, Worthy1;
290  std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
291  std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
292  if (Op0 != Op1)
293  return;
294  int Mask;
295  switch (I.getOpcode()) {
296  case Instruction::And:
297  Mask = Mask0 & Mask1;
298  break;
299  case Instruction::Or:
300  Mask = Mask0 | Mask1;
301  break;
302  case Instruction::Xor:
303  Mask = Mask0 ^ Mask1;
304  break;
305  default:
306  llvm_unreachable("Unknown op in convertLogicOp");
307  }
308  converted(&I, Op0, Mask, true);
309 }
310 
312  ConvertedInsts.clear();
313  LogicOpsWorklist.clear();
314  PossibleJunk.clear();
315 
316  // Look for icmp+fcmp instructions.
317  for (auto &I : instructions(F)) {
318  if (I.getOpcode() == Instruction::FCmp)
319  convertFCmp(cast<CmpInst>(I));
320  else if (I.getOpcode() == Instruction::ICmp)
321  convertICmp(cast<CmpInst>(I));
322  }
323 
324  // If none found, bail already.
325  if (ConvertedInsts.empty())
326  return false;
327 
328  // Process the queue of logic instructions.
329  while (!LogicOpsWorklist.empty()) {
330  BinaryOperator *Op = LogicOpsWorklist.back();
331  LogicOpsWorklist.pop_back();
332  // If both operands mapped, and the instruction itself not yet mapped,
333  // convert it.
334  if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
335  ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
336  !ConvertedInsts.count(Op))
337  convertLogicOp(*Op);
338  }
339 
340  // Time to actually replace the instructions. Do it in the reverse order
341  // of finding them, since there's a good chance the earlier ones will be
342  // unused (due to being folded into later ones).
343  Module &M = *F.getParent();
344  auto &Ctx = M.getContext();
345  Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
346  bool MadeChange = false;
347  for (auto &It : reverse(ConvertedInsts)) {
348  Instruction *I = It.first;
349  Value *V;
350  int Mask;
351  bool Worthy;
352  std::tie(V, Mask, Worthy) = It.second;
353  if (!I->user_empty()) {
354  // If used and unworthy of conversion, skip it.
355  if (!Worthy)
356  continue;
357  // Call the intrinsic, compare result with 0.
358  Function *TDCFunc =
359  Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, V->getType());
360  IRBuilder<> IRB(I);
361  Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
362  Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
363  Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
364  I->replaceAllUsesWith(ICmp);
365  }
366  // If unused, or used and converted, remove it.
367  I->eraseFromParent();
368  MadeChange = true;
369  }
370 
371  if (!MadeChange)
372  return false;
373 
374  // We've actually done something - now clear misc accumulated junk (fabs,
375  // bitcast).
376  for (auto *I : PossibleJunk)
377  if (I->user_empty())
378  I->eraseFromParent();
379 
380  return true;
381 }
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:636
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:172
void clear()
Definition: MapVector.h:88
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
BinaryOps getOpcode() const
Definition: InstrTypes.h:316
const unsigned TDCMASK_ZERO
Definition: SystemZ.h:131
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
bool user_empty() const
Definition: Value.h:363
void initializeSystemZTDCPassPass(PassRegistry &)
const unsigned TDCMASK_NEGATIVE
Definition: SystemZ.h:135
This class represents a function call, abstracting a target machine&#39;s calling convention.
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:652
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
F(f)
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
void changeSign()
Definition: APFloat.h:1049
const unsigned TDCMASK_POSITIVE
Definition: SystemZ.h:132
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:656
FunctionPass * createSystemZTDCPass()
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:653
bool empty() const
Definition: MapVector.h:79
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:266
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1018
Value * getOperand(unsigned i) const
Definition: User.h:169
static bool runOnFunction(Function &F, bool PostInlining)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:120
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:123
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
const unsigned TDCMASK_NAN
Definition: SystemZ.h:138
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:124
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
signed greater than
Definition: InstrTypes.h:673
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:650
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
const unsigned TDCMASK_ALL
Definition: SystemZ.h:150
Module.h This file contains the declarations for the Module class.
size_type count(const KeyT &Key) const
Definition: MapVector.h:142
signed less than
Definition: InstrTypes.h:675
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
iterator_range< user_iterator > users()
Definition: Value.h:399
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:721
INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc", "SystemZ Test Data Class optimization", false, false) FunctionPass *llvm
Definition: SystemZTDC.cpp:114
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.h:923
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:649
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:125
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:121
inst_range instructions(Function *F)
Definition: InstIterator.h:133
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:651
const unsigned TDCMASK_PLUS
Definition: SystemZ.h:142
const unsigned TDCMASK_MINUS
Definition: SystemZ.h:146
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:122
const fltSemantics & getFltSemantics() const
Definition: Type.h:168