LLVM 20.0.0git
SystemZTDC.cpp
Go to the documentation of this file.
1//===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass looks for instructions that can be replaced by a Test Data Class
10// instruction, and replaces them when profitable.
11//
12// Roughly, the following rules are recognized:
13//
14// 1: fcmp pred X, 0 -> tdc X, mask
15// 2: fcmp pred X, +-inf -> tdc X, mask
16// 3: fcmp pred X, +-minnorm -> tdc X, mask
17// 4: tdc (fabs X), mask -> tdc X, newmask
18// 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
19// 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
20// 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
21// 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
22// 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
23// 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
24//
25// The pass works in 4 steps:
26//
27// 1. All fcmp and icmp instructions in a function are checked for a match
28// with rules 1-3 and 5-7. Their TDC equivalents are stored in
29// the ConvertedInsts mapping. If the operand of a fcmp instruction is
30// a fabs, it's also folded according to rule 4.
31// 2. All and/or/xor i1 instructions whose both operands have been already
32// mapped are mapped according to rules 8-10. LogicOpsWorklist is used
33// as a queue of instructions to check.
34// 3. All mapped instructions that are considered worthy of conversion (ie.
35// replacing them will actually simplify the final code) are replaced
36// with a call to the s390.tdc intrinsic.
37// 4. All intermediate results of replaced instructions are removed if unused.
38//
39// Instructions that match rules 1-3 are considered unworthy of conversion
40// on their own (since a comparison instruction is superior), but are mapped
41// in the hopes of folding the result using rules 4 and 8-10 (likely removing
42// the original comparison in the process).
43//
44//===----------------------------------------------------------------------===//
45
46#include "SystemZ.h"
47#include "SystemZSubtarget.h"
48#include "llvm/ADT/MapVector.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/IRBuilder.h"
54#include "llvm/IR/IntrinsicsS390.h"
56#include "llvm/IR/Module.h"
58#include <set>
59
60using namespace llvm;
61
62namespace {
63
64class SystemZTDCPass : public FunctionPass {
65public:
66 static char ID;
67 SystemZTDCPass() : FunctionPass(ID) {
69 }
70
71 bool runOnFunction(Function &F) override;
72
73 void getAnalysisUsage(AnalysisUsage &AU) const override {
75 }
76
77private:
78 // Maps seen instructions that can be mapped to a TDC, values are
79 // (TDC operand, TDC mask, worthy flag) triples.
81 // The queue of and/or/xor i1 instructions to be potentially folded.
82 std::vector<BinaryOperator *> LogicOpsWorklist;
83 // Instructions matched while folding, to be removed at the end if unused.
84 std::set<Instruction *> PossibleJunk;
85
86 // Tries to convert a fcmp instruction.
87 void convertFCmp(CmpInst &I);
88
89 // Tries to convert an icmp instruction.
90 void convertICmp(CmpInst &I);
91
92 // Tries to convert an i1 and/or/xor instruction, whose both operands
93 // have been already converted.
94 void convertLogicOp(BinaryOperator &I);
95
96 // Marks an instruction as converted - adds it to ConvertedInsts and adds
97 // any and/or/xor i1 users to the queue.
98 void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
99 ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
100 auto &M = *I->getFunction()->getParent();
101 auto &Ctx = M.getContext();
102 for (auto *U : I->users()) {
103 auto *LI = dyn_cast<BinaryOperator>(U);
104 if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
105 (LI->getOpcode() == Instruction::And ||
106 LI->getOpcode() == Instruction::Or ||
107 LI->getOpcode() == Instruction::Xor)) {
108 LogicOpsWorklist.push_back(LI);
109 }
110 }
111 }
112};
113
114} // end anonymous namespace
115
116char SystemZTDCPass::ID = 0;
117INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
118 "SystemZ Test Data Class optimization", false, false)
119
121 return new SystemZTDCPass();
122}
123
124void SystemZTDCPass::convertFCmp(CmpInst &I) {
125 Value *Op0 = I.getOperand(0);
126 auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
127 auto Pred = I.getPredicate();
128 // Only comparisons with consts are interesting.
129 if (!Const)
130 return;
131 // Compute the smallest normal number (and its negation).
132 auto &Sem = Op0->getType()->getFltSemantics();
134 APFloat NegSmallest = Smallest;
135 NegSmallest.changeSign();
136 // Check if Const is one of our recognized consts.
137 int WhichConst;
138 if (Const->isZero()) {
139 // All comparisons with 0 can be converted.
140 WhichConst = 0;
141 } else if (Const->isInfinity()) {
142 // Likewise for infinities.
143 WhichConst = Const->isNegative() ? 2 : 1;
144 } else if (Const->isExactlyValue(Smallest)) {
145 // For Smallest, we cannot do EQ separately from GT.
146 if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
147 (Pred & CmpInst::FCMP_OGE) != 0)
148 return;
149 WhichConst = 3;
150 } else if (Const->isExactlyValue(NegSmallest)) {
151 // Likewise for NegSmallest, we cannot do EQ separately from LT.
152 if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
153 (Pred & CmpInst::FCMP_OLE) != 0)
154 return;
155 WhichConst = 4;
156 } else {
157 // Not one of our special constants.
158 return;
159 }
160 // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
161 static const int Masks[][4] = {
162 { // 0
167 },
168 { // inf
170 0, // gt
176 },
177 { // -inf
183 0, // lt
185 },
186 { // minnorm
187 0, // eq (unsupported)
189 SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge)
194 },
195 { // -minnorm
196 0, // eq (unsupported)
201 SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le)
203 }
204 };
205 // Construct the mask as a combination of the partial masks.
206 int Mask = 0;
207 if (Pred & CmpInst::FCMP_OEQ)
208 Mask |= Masks[WhichConst][0];
209 if (Pred & CmpInst::FCMP_OGT)
210 Mask |= Masks[WhichConst][1];
211 if (Pred & CmpInst::FCMP_OLT)
212 Mask |= Masks[WhichConst][2];
213 if (Pred & CmpInst::FCMP_UNO)
214 Mask |= Masks[WhichConst][3];
215 // A lone fcmp is unworthy of tdc conversion on its own, but may become
216 // worthy if combined with fabs.
217 bool Worthy = false;
218 if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
219 Function *F = CI->getCalledFunction();
220 if (F && F->getIntrinsicID() == Intrinsic::fabs) {
221 // Fold with fabs - adjust the mask appropriately.
223 Mask |= Mask >> 1;
224 Op0 = CI->getArgOperand(0);
225 // A combination of fcmp with fabs is a win, unless the constant
226 // involved is 0 (which is handled by later passes).
227 Worthy = WhichConst != 0;
228 PossibleJunk.insert(CI);
229 }
230 }
231 converted(&I, Op0, Mask, Worthy);
232}
233
234void SystemZTDCPass::convertICmp(CmpInst &I) {
235 Value *Op0 = I.getOperand(0);
236 auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
237 auto Pred = I.getPredicate();
238 // All our icmp rules involve comparisons with consts.
239 if (!Const)
240 return;
241 if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
242 // Check for icmp+bitcast used for signbit.
243 if (!Cast->getSrcTy()->isFloatTy() &&
244 !Cast->getSrcTy()->isDoubleTy() &&
245 !Cast->getSrcTy()->isFP128Ty())
246 return;
247 Value *V = Cast->getOperand(0);
248 int Mask;
249 if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
250 // icmp slt (bitcast X), 0 - set if sign bit true
252 } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
253 // icmp sgt (bitcast X), -1 - set if sign bit false
255 } else {
256 // Not a sign bit check.
257 return;
258 }
259 PossibleJunk.insert(Cast);
260 converted(&I, V, Mask, true);
261 } else if (auto *CI = dyn_cast<CallInst>(Op0)) {
262 // Check if this is a pre-existing call of our tdc intrinsic.
263 Function *F = CI->getCalledFunction();
264 if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
265 return;
266 if (!Const->isZero())
267 return;
268 Value *V = CI->getArgOperand(0);
269 auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
270 // Bail if the mask is not a constant.
271 if (!MaskC)
272 return;
273 int Mask = MaskC->getZExtValue();
275 if (Pred == CmpInst::ICMP_NE) {
276 // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
277 } else if (Pred == CmpInst::ICMP_EQ) {
278 // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
280 } else {
281 // An unknown comparison - ignore.
282 return;
283 }
284 PossibleJunk.insert(CI);
285 converted(&I, V, Mask, false);
286 }
287}
288
289void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
290 Value *Op0, *Op1;
291 int Mask0, Mask1;
292 bool Worthy0, Worthy1;
293 std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
294 std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
295 if (Op0 != Op1)
296 return;
297 int Mask;
298 switch (I.getOpcode()) {
299 case Instruction::And:
300 Mask = Mask0 & Mask1;
301 break;
302 case Instruction::Or:
303 Mask = Mask0 | Mask1;
304 break;
305 case Instruction::Xor:
306 Mask = Mask0 ^ Mask1;
307 break;
308 default:
309 llvm_unreachable("Unknown op in convertLogicOp");
310 }
311 converted(&I, Op0, Mask, true);
312}
313
314bool SystemZTDCPass::runOnFunction(Function &F) {
315 auto &TPC = getAnalysis<TargetPassConfig>();
316 if (TPC.getTM<TargetMachine>()
318 .hasSoftFloat())
319 return false;
320
321 ConvertedInsts.clear();
322 LogicOpsWorklist.clear();
323 PossibleJunk.clear();
324
325 // Look for icmp+fcmp instructions.
326 for (auto &I : instructions(F)) {
327 if (I.getOpcode() == Instruction::FCmp)
328 convertFCmp(cast<CmpInst>(I));
329 else if (I.getOpcode() == Instruction::ICmp)
330 convertICmp(cast<CmpInst>(I));
331 }
332
333 // If none found, bail already.
334 if (ConvertedInsts.empty())
335 return false;
336
337 // Process the queue of logic instructions.
338 while (!LogicOpsWorklist.empty()) {
339 BinaryOperator *Op = LogicOpsWorklist.back();
340 LogicOpsWorklist.pop_back();
341 // If both operands mapped, and the instruction itself not yet mapped,
342 // convert it.
343 if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
344 ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
345 !ConvertedInsts.count(Op))
346 convertLogicOp(*Op);
347 }
348
349 // Time to actually replace the instructions. Do it in the reverse order
350 // of finding them, since there's a good chance the earlier ones will be
351 // unused (due to being folded into later ones).
352 Module &M = *F.getParent();
353 auto &Ctx = M.getContext();
354 Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
355 bool MadeChange = false;
356 for (auto &It : reverse(ConvertedInsts)) {
357 Instruction *I = It.first;
358 Value *V;
359 int Mask;
360 bool Worthy;
361 std::tie(V, Mask, Worthy) = It.second;
362 if (!I->user_empty()) {
363 // If used and unworthy of conversion, skip it.
364 if (!Worthy)
365 continue;
366 // Call the intrinsic, compare result with 0.
367 IRBuilder<> IRB(I);
368 Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
370 IRB.CreateIntrinsic(Intrinsic::s390_tdc, V->getType(), {V, MaskVal});
371 Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
372 I->replaceAllUsesWith(ICmp);
373 }
374 // If unused, or used and converted, remove it.
375 I->eraseFromParent();
376 MadeChange = true;
377 }
378
379 if (!MadeChange)
380 return false;
381
382 // We've actually done something - now clear misc accumulated junk (fabs,
383 // bitcast).
384 for (auto *I : PossibleJunk)
385 if (I->user_empty())
386 I->eraseFromParent();
387
388 return true;
389}
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
Target-Independent Code Generator Pass Configuration Options pass.
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.h:1155
void changeSign()
Definition: APFloat.h:1294
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:679
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:677
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:678
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:680
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:683
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
size_type count(const KeyT &Key) const
Definition: MapVector.h:165
bool empty() const
Definition: MapVector.h:79
void clear()
Definition: MapVector.h:88
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Target-Independent Code Generator Pass Configuration Options.
const fltSemantics & getFltSemantics() const
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
const unsigned TDCMASK_NEGATIVE
Definition: SystemZ.h:136
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:122
const unsigned TDCMASK_MINUS
Definition: SystemZ.h:147
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:124
const unsigned TDCMASK_ZERO
Definition: SystemZ.h:132
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:121
const unsigned TDCMASK_ALL
Definition: SystemZ.h:151
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:123
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:125
const unsigned TDCMASK_POSITIVE
Definition: SystemZ.h:133
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:126
const unsigned TDCMASK_NAN
Definition: SystemZ.h:139
const unsigned TDCMASK_PLUS
Definition: SystemZ.h:143
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
FunctionPass * createSystemZTDCPass()
void initializeSystemZTDCPassPass(PassRegistry &)