Bug Summary

File:lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Warning:line 391, column 32
The result of the right shift is undefined due to shifting by '64', which is greater or equal to the width of type 'llvm::APInt::WordType'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name DAGCombiner.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-eagerly-assume -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-7/lib/clang/7.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-7~svn326246/build-llvm/lib/CodeGen/SelectionDAG -I /build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG -I /build/llvm-toolchain-snapshot-7~svn326246/build-llvm/include -I /build/llvm-toolchain-snapshot-7~svn326246/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0/backward -internal-isystem /usr/include/clang/7.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-7/lib/clang/7.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-7~svn326246/build-llvm/lib/CodeGen/SelectionDAG -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-02-28-041547-14988-1 -x c++ /build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
11// both before and after the DAG is legalized.
12//
13// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14// primarily intended to handle simplification opportunities that are implicit
15// in the LLVM IR and exposed by the various codegen lowering phases.
16//
17//===----------------------------------------------------------------------===//
18
19#include "llvm/ADT/APFloat.h"
20#include "llvm/ADT/APInt.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/None.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallBitVector.h"
28#include "llvm/ADT/SmallPtrSet.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/ADT/Statistic.h"
32#include "llvm/Analysis/AliasAnalysis.h"
33#include "llvm/Analysis/MemoryLocation.h"
34#include "llvm/CodeGen/DAGCombine.h"
35#include "llvm/CodeGen/ISDOpcodes.h"
36#include "llvm/CodeGen/MachineFrameInfo.h"
37#include "llvm/CodeGen/MachineFunction.h"
38#include "llvm/CodeGen/MachineMemOperand.h"
39#include "llvm/CodeGen/MachineValueType.h"
40#include "llvm/CodeGen/RuntimeLibcalls.h"
41#include "llvm/CodeGen/SelectionDAG.h"
42#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
43#include "llvm/CodeGen/SelectionDAGNodes.h"
44#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
45#include "llvm/CodeGen/TargetLowering.h"
46#include "llvm/CodeGen/TargetRegisterInfo.h"
47#include "llvm/CodeGen/TargetSubtargetInfo.h"
48#include "llvm/CodeGen/ValueTypes.h"
49#include "llvm/IR/Attributes.h"
50#include "llvm/IR/Constant.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DerivedTypes.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/LLVMContext.h"
55#include "llvm/IR/Metadata.h"
56#include "llvm/Support/Casting.h"
57#include "llvm/Support/CodeGen.h"
58#include "llvm/Support/CommandLine.h"
59#include "llvm/Support/Compiler.h"
60#include "llvm/Support/Debug.h"
61#include "llvm/Support/ErrorHandling.h"
62#include "llvm/Support/KnownBits.h"
63#include "llvm/Support/MathExtras.h"
64#include "llvm/Support/raw_ostream.h"
65#include "llvm/Target/TargetMachine.h"
66#include "llvm/Target/TargetOptions.h"
67#include <algorithm>
68#include <cassert>
69#include <cstdint>
70#include <functional>
71#include <iterator>
72#include <string>
73#include <tuple>
74#include <utility>
75#include <vector>
76
77using namespace llvm;
78
79#define DEBUG_TYPE"dagcombine" "dagcombine"
80
81STATISTIC(NodesCombined , "Number of dag nodes combined")static llvm::Statistic NodesCombined = {"dagcombine", "NodesCombined"
, "Number of dag nodes combined", {0}, {false}}
;
82STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created")static llvm::Statistic PreIndexedNodes = {"dagcombine", "PreIndexedNodes"
, "Number of pre-indexed nodes created", {0}, {false}}
;
83STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created")static llvm::Statistic PostIndexedNodes = {"dagcombine", "PostIndexedNodes"
, "Number of post-indexed nodes created", {0}, {false}}
;
84STATISTIC(OpsNarrowed , "Number of load/op/store narrowed")static llvm::Statistic OpsNarrowed = {"dagcombine", "OpsNarrowed"
, "Number of load/op/store narrowed", {0}, {false}}
;
85STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int")static llvm::Statistic LdStFP2Int = {"dagcombine", "LdStFP2Int"
, "Number of fp load/store pairs transformed to int", {0}, {false
}}
;
86STATISTIC(SlicedLoads, "Number of load sliced")static llvm::Statistic SlicedLoads = {"dagcombine", "SlicedLoads"
, "Number of load sliced", {0}, {false}}
;
87
88static cl::opt<bool>
89CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90 cl::desc("Enable DAG combiner's use of IR alias analysis"));
91
92static cl::opt<bool>
93UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94 cl::desc("Enable DAG combiner's use of TBAA"));
95
96#ifndef NDEBUG
97static cl::opt<std::string>
98CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99 cl::desc("Only use DAG-combiner alias analysis in this"
100 " function"));
101#endif
102
103/// Hidden option to stress test load slicing, i.e., when this option
104/// is enabled, load slicing bypasses most of its profitability guards.
105static cl::opt<bool>
106StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107 cl::desc("Bypass the profitability model of load slicing"),
108 cl::init(false));
109
110static cl::opt<bool>
111 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112 cl::desc("DAG combiner may split indexing from loads"));
113
114namespace {
115
116 class DAGCombiner {
117 SelectionDAG &DAG;
118 const TargetLowering &TLI;
119 CombineLevel Level;
120 CodeGenOpt::Level OptLevel;
121 bool LegalOperations = false;
122 bool LegalTypes = false;
123 bool ForCodeSize;
124
125 /// \brief Worklist of all of the nodes that need to be simplified.
126 ///
127 /// This must behave as a stack -- new nodes to process are pushed onto the
128 /// back and when processing we pop off of the back.
129 ///
130 /// The worklist will not contain duplicates but may contain null entries
131 /// due to nodes being deleted from the underlying DAG.
132 SmallVector<SDNode *, 64> Worklist;
133
134 /// \brief Mapping from an SDNode to its position on the worklist.
135 ///
136 /// This is used to find and remove nodes from the worklist (by nulling
137 /// them) when they are deleted from the underlying DAG. It relies on
138 /// stable indices of nodes within the worklist.
139 DenseMap<SDNode *, unsigned> WorklistMap;
140
141 /// \brief Set of nodes which have been combined (at least once).
142 ///
143 /// This is used to allow us to reliably add any operands of a DAG node
144 /// which have not yet been combined to the worklist.
145 SmallPtrSet<SDNode *, 32> CombinedNodes;
146
147 // AA - Used for DAG load/store alias analysis.
148 AliasAnalysis *AA;
149
150 /// When an instruction is simplified, add all users of the instruction to
151 /// the work lists because they might get more simplified now.
152 void AddUsersToWorklist(SDNode *N) {
153 for (SDNode *Node : N->uses())
154 AddToWorklist(Node);
155 }
156
157 /// Call the node-specific routine that folds each particular type of node.
158 SDValue visit(SDNode *N);
159
160 public:
161 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
162 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
163 OptLevel(OL), AA(AA) {
164 ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
165
166 MaximumLegalStoreInBits = 0;
167 for (MVT VT : MVT::all_valuetypes())
168 if (EVT(VT).isSimple() && VT != MVT::Other &&
169 TLI.isTypeLegal(EVT(VT)) &&
170 VT.getSizeInBits() >= MaximumLegalStoreInBits)
171 MaximumLegalStoreInBits = VT.getSizeInBits();
172 }
173
174 /// Add to the worklist making sure its instance is at the back (next to be
175 /// processed.)
176 void AddToWorklist(SDNode *N) {
177 assert(N->getOpcode() != ISD::DELETED_NODE &&(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& "Deleted Node added to Worklist") ? void (0) : __assert_fail
("N->getOpcode() != ISD::DELETED_NODE && \"Deleted Node added to Worklist\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 178, __extension__ __PRETTY_FUNCTION__))
178 "Deleted Node added to Worklist")(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& "Deleted Node added to Worklist") ? void (0) : __assert_fail
("N->getOpcode() != ISD::DELETED_NODE && \"Deleted Node added to Worklist\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 178, __extension__ __PRETTY_FUNCTION__))
;
179
180 // Skip handle nodes as they can't usefully be combined and confuse the
181 // zero-use deletion strategy.
182 if (N->getOpcode() == ISD::HANDLENODE)
183 return;
184
185 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
186 Worklist.push_back(N);
187 }
188
189 /// Remove all instances of N from the worklist.
190 void removeFromWorklist(SDNode *N) {
191 CombinedNodes.erase(N);
192
193 auto It = WorklistMap.find(N);
194 if (It == WorklistMap.end())
195 return; // Not in the worklist.
196
197 // Null out the entry rather than erasing it to avoid a linear operation.
198 Worklist[It->second] = nullptr;
199 WorklistMap.erase(It);
200 }
201
202 void deleteAndRecombine(SDNode *N);
203 bool recursivelyDeleteUnusedNodes(SDNode *N);
204
205 /// Replaces all uses of the results of one DAG node with new values.
206 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
207 bool AddTo = true);
208
209 /// Replaces all uses of the results of one DAG node with new values.
210 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
211 return CombineTo(N, &Res, 1, AddTo);
212 }
213
214 /// Replaces all uses of the results of one DAG node with new values.
215 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
216 bool AddTo = true) {
217 SDValue To[] = { Res0, Res1 };
218 return CombineTo(N, To, 2, AddTo);
219 }
220
221 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
222
223 private:
224 unsigned MaximumLegalStoreInBits;
225
226 /// Check the specified integer node value to see if it can be simplified or
227 /// if things it uses can be simplified by bit propagation.
228 /// If so, return true.
229 bool SimplifyDemandedBits(SDValue Op) {
230 unsigned BitWidth = Op.getScalarValueSizeInBits();
231 APInt Demanded = APInt::getAllOnesValue(BitWidth);
232 return SimplifyDemandedBits(Op, Demanded);
233 }
234
235 /// Check the specified vector node value to see if it can be simplified or
236 /// if things it uses can be simplified as it only uses some of the
237 /// elements. If so, return true.
238 bool SimplifyDemandedVectorElts(SDValue Op) {
239 unsigned NumElts = Op.getValueType().getVectorNumElements();
240 APInt Demanded = APInt::getAllOnesValue(NumElts);
241 return SimplifyDemandedVectorElts(Op, Demanded);
242 }
243
244 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
245 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded);
246
247 bool CombineToPreIndexedLoadStore(SDNode *N);
248 bool CombineToPostIndexedLoadStore(SDNode *N);
249 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
250 bool SliceUpLoad(SDNode *N);
251
252 /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
253 /// load.
254 ///
255 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
256 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
257 /// \param EltNo index of the vector element to load.
258 /// \param OriginalLoad load that EVE came from to be replaced.
259 /// \returns EVE on success SDValue() on failure.
260 SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
261 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
262 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
263 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
264 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
265 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
266 SDValue PromoteIntBinOp(SDValue Op);
267 SDValue PromoteIntShiftOp(SDValue Op);
268 SDValue PromoteExtend(SDValue Op);
269 bool PromoteLoad(SDValue Op);
270
271 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
272 SDValue OrigLoad, SDValue ExtLoad,
273 const SDLoc &DL,
274 ISD::NodeType ExtType);
275
276 /// Call the node-specific routine that knows how to fold each
277 /// particular type of node. If that doesn't do anything, try the
278 /// target-specific DAG combines.
279 SDValue combine(SDNode *N);
280
281 // Visitation implementation - Implement dag node combining for different
282 // node types. The semantics are as follows:
283 // Return Value:
284 // SDValue.getNode() == 0 - No change was made
285 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
286 // otherwise - N should be replaced by the returned Operand.
287 //
288 SDValue visitTokenFactor(SDNode *N);
289 SDValue visitMERGE_VALUES(SDNode *N);
290 SDValue visitADD(SDNode *N);
291 SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
292 SDValue visitSUB(SDNode *N);
293 SDValue visitADDC(SDNode *N);
294 SDValue visitUADDO(SDNode *N);
295 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
296 SDValue visitSUBC(SDNode *N);
297 SDValue visitUSUBO(SDNode *N);
298 SDValue visitADDE(SDNode *N);
299 SDValue visitADDCARRY(SDNode *N);
300 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
301 SDValue visitSUBE(SDNode *N);
302 SDValue visitSUBCARRY(SDNode *N);
303 SDValue visitMUL(SDNode *N);
304 SDValue useDivRem(SDNode *N);
305 SDValue visitSDIV(SDNode *N);
306 SDValue visitUDIV(SDNode *N);
307 SDValue visitREM(SDNode *N);
308 SDValue visitMULHU(SDNode *N);
309 SDValue visitMULHS(SDNode *N);
310 SDValue visitSMUL_LOHI(SDNode *N);
311 SDValue visitUMUL_LOHI(SDNode *N);
312 SDValue visitSMULO(SDNode *N);
313 SDValue visitUMULO(SDNode *N);
314 SDValue visitIMINMAX(SDNode *N);
315 SDValue visitAND(SDNode *N);
316 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
317 SDValue visitOR(SDNode *N);
318 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
319 SDValue visitXOR(SDNode *N);
320 SDValue SimplifyVBinOp(SDNode *N);
321 SDValue visitSHL(SDNode *N);
322 SDValue visitSRA(SDNode *N);
323 SDValue visitSRL(SDNode *N);
324 SDValue visitRotate(SDNode *N);
325 SDValue visitABS(SDNode *N);
326 SDValue visitBSWAP(SDNode *N);
327 SDValue visitBITREVERSE(SDNode *N);
328 SDValue visitCTLZ(SDNode *N);
329 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
330 SDValue visitCTTZ(SDNode *N);
331 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
332 SDValue visitCTPOP(SDNode *N);
333 SDValue visitSELECT(SDNode *N);
334 SDValue visitVSELECT(SDNode *N);
335 SDValue visitSELECT_CC(SDNode *N);
336 SDValue visitSETCC(SDNode *N);
337 SDValue visitSETCCE(SDNode *N);
338 SDValue visitSETCCCARRY(SDNode *N);
339 SDValue visitSIGN_EXTEND(SDNode *N);
340 SDValue visitZERO_EXTEND(SDNode *N);
341 SDValue visitANY_EXTEND(SDNode *N);
342 SDValue visitAssertExt(SDNode *N);
343 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
344 SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
345 SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
346 SDValue visitTRUNCATE(SDNode *N);
347 SDValue visitBITCAST(SDNode *N);
348 SDValue visitBUILD_PAIR(SDNode *N);
349 SDValue visitFADD(SDNode *N);
350 SDValue visitFSUB(SDNode *N);
351 SDValue visitFMUL(SDNode *N);
352 SDValue visitFMA(SDNode *N);
353 SDValue visitFDIV(SDNode *N);
354 SDValue visitFREM(SDNode *N);
355 SDValue visitFSQRT(SDNode *N);
356 SDValue visitFCOPYSIGN(SDNode *N);
357 SDValue visitSINT_TO_FP(SDNode *N);
358 SDValue visitUINT_TO_FP(SDNode *N);
359 SDValue visitFP_TO_SINT(SDNode *N);
360 SDValue visitFP_TO_UINT(SDNode *N);
361 SDValue visitFP_ROUND(SDNode *N);
362 SDValue visitFP_ROUND_INREG(SDNode *N);
363 SDValue visitFP_EXTEND(SDNode *N);
364 SDValue visitFNEG(SDNode *N);
365 SDValue visitFABS(SDNode *N);
366 SDValue visitFCEIL(SDNode *N);
367 SDValue visitFTRUNC(SDNode *N);
368 SDValue visitFFLOOR(SDNode *N);
369 SDValue visitFMINNUM(SDNode *N);
370 SDValue visitFMAXNUM(SDNode *N);
371 SDValue visitBRCOND(SDNode *N);
372 SDValue visitBR_CC(SDNode *N);
373 SDValue visitLOAD(SDNode *N);
374
375 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
376 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
377
378 SDValue visitSTORE(SDNode *N);
379 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
380 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
381 SDValue visitBUILD_VECTOR(SDNode *N);
382 SDValue visitCONCAT_VECTORS(SDNode *N);
383 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
384 SDValue visitVECTOR_SHUFFLE(SDNode *N);
385 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
386 SDValue visitINSERT_SUBVECTOR(SDNode *N);
387 SDValue visitMLOAD(SDNode *N);
388 SDValue visitMSTORE(SDNode *N);
389 SDValue visitMGATHER(SDNode *N);
390 SDValue visitMSCATTER(SDNode *N);
391 SDValue visitFP_TO_FP16(SDNode *N);
392 SDValue visitFP16_TO_FP(SDNode *N);
393
394 SDValue visitFADDForFMACombine(SDNode *N);
395 SDValue visitFSUBForFMACombine(SDNode *N);
396 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
397
398 SDValue XformToShuffleWithZero(SDNode *N);
399 SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
400 SDValue RHS);
401
402 SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
403
404 SDValue foldSelectOfConstants(SDNode *N);
405 SDValue foldVSelectOfConstants(SDNode *N);
406 SDValue foldBinOpIntoSelect(SDNode *BO);
407 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
408 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
409 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
410 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
411 SDValue N2, SDValue N3, ISD::CondCode CC,
412 bool NotExtCompare = false);
413 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
414 SDValue N2, SDValue N3, ISD::CondCode CC);
415 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
416 const SDLoc &DL);
417 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
418 const SDLoc &DL, bool foldBooleans);
419 SDValue rebuildSetCC(SDValue N);
420
421 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
422 SDValue &CC) const;
423 bool isOneUseSetCC(SDValue N) const;
424
425 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
426 unsigned HiOp);
427 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
428 SDValue CombineExtLoad(SDNode *N);
429 SDValue combineRepeatedFPDivisors(SDNode *N);
430 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
431 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
432 SDValue BuildSDIV(SDNode *N);
433 SDValue BuildSDIVPow2(SDNode *N);
434 SDValue BuildUDIV(SDNode *N);
435 SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
436 SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
437 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
438 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
439 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
440 SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
441 SDNodeFlags Flags, bool Reciprocal);
442 SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
443 SDNodeFlags Flags, bool Reciprocal);
444 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
445 bool DemandHighBits = true);
446 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
447 SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
448 SDValue InnerPos, SDValue InnerNeg,
449 unsigned PosOpcode, unsigned NegOpcode,
450 const SDLoc &DL);
451 SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
452 SDValue MatchLoadCombine(SDNode *N);
453 SDValue ReduceLoadWidth(SDNode *N);
454 SDValue ReduceLoadOpStoreWidth(SDNode *N);
455 SDValue splitMergedValStore(StoreSDNode *ST);
456 SDValue TransformFPLoadStorePair(SDNode *N);
457 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
458 SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
459 SDValue reduceBuildVecToShuffle(SDNode *N);
460 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
461 ArrayRef<int> VectorMask, SDValue VecIn1,
462 SDValue VecIn2, unsigned LeftIdx);
463 SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
464
465 /// Walk up chain skipping non-aliasing memory nodes,
466 /// looking for aliasing nodes and adding them to the Aliases vector.
467 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
468 SmallVectorImpl<SDValue> &Aliases);
469
470 /// Return true if there is any possibility that the two addresses overlap.
471 bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
472
473 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
474 /// chain (aliasing node.)
475 SDValue FindBetterChain(SDNode *N, SDValue Chain);
476
477 /// Try to replace a store and any possibly adjacent stores on
478 /// consecutive chains with better chains. Return true only if St is
479 /// replaced.
480 ///
481 /// Notice that other chains may still be replaced even if the function
482 /// returns false.
483 bool findBetterNeighborChains(StoreSDNode *St);
484
485 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
486 bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
487
488 /// Holds a pointer to an LSBaseSDNode as well as information on where it
489 /// is located in a sequence of memory operations connected by a chain.
490 struct MemOpLink {
491 // Ptr to the mem node.
492 LSBaseSDNode *MemNode;
493
494 // Offset from the base ptr.
495 int64_t OffsetFromBase;
496
497 MemOpLink(LSBaseSDNode *N, int64_t Offset)
498 : MemNode(N), OffsetFromBase(Offset) {}
499 };
500
501 /// This is a helper function for visitMUL to check the profitability
502 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
503 /// MulNode is the original multiply, AddNode is (add x, c1),
504 /// and ConstNode is c2.
505 bool isMulAddWithConstProfitable(SDNode *MulNode,
506 SDValue &AddNode,
507 SDValue &ConstNode);
508
509 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
510 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
511 /// the type of the loaded value to be extended.
512 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
513 EVT LoadResultTy, EVT &ExtVT);
514
515 /// Helper function to calculate whether the given Load can have its
516 /// width reduced to ExtVT.
517 bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
518 EVT &ExtVT, unsigned ShAmt = 0);
519
520 /// Used by BackwardsPropagateMask to find suitable loads.
521 bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
522 SmallPtrSetImpl<SDNode*> &NodeWithConsts,
523 ConstantSDNode *Mask, SDNode *&UncombinedNode);
524 /// Attempt to propagate a given AND node back to load leaves so that they
525 /// can be combined into narrow loads.
526 bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
527
528 /// Helper function for MergeConsecutiveStores which merges the
529 /// component store chains.
530 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
531 unsigned NumStores);
532
533 /// This is a helper function for MergeConsecutiveStores. When the
534 /// source elements of the consecutive stores are all constants or
535 /// all extracted vector elements, try to merge them into one
536 /// larger store introducing bitcasts if necessary. \return True
537 /// if a merged store was created.
538 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
539 EVT MemVT, unsigned NumStores,
540 bool IsConstantSrc, bool UseVector,
541 bool UseTrunc);
542
543 /// This is a helper function for MergeConsecutiveStores. Stores
544 /// that potentially may be merged with St are placed in
545 /// StoreNodes.
546 void getStoreMergeCandidates(StoreSDNode *St,
547 SmallVectorImpl<MemOpLink> &StoreNodes);
548
549 /// Helper function for MergeConsecutiveStores. Checks if
550 /// candidate stores have indirect dependency through their
551 /// operands. \return True if safe to merge.
552 bool checkMergeStoreCandidatesForDependencies(
553 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
554
555 /// Merge consecutive store operations into a wide store.
556 /// This optimization uses wide integers or vectors when possible.
557 /// \return number of stores that were merged into a merged store (the
558 /// affected nodes are stored as a prefix in \p StoreNodes).
559 bool MergeConsecutiveStores(StoreSDNode *N);
560
561 /// \brief Try to transform a truncation where C is a constant:
562 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
563 ///
564 /// \p N needs to be a truncation and its first operand an AND. Other
565 /// requirements are checked by the function (e.g. that trunc is
566 /// single-use) and if missed an empty SDValue is returned.
567 SDValue distributeTruncateThroughAnd(SDNode *N);
568
569 public:
570 /// Runs the dag combiner on all nodes in the work list
571 void Run(CombineLevel AtLevel);
572
573 SelectionDAG &getDAG() const { return DAG; }
574
575 /// Returns a type large enough to hold any valid shift amount - before type
576 /// legalization these can be huge.
577 EVT getShiftAmountTy(EVT LHSTy) {
578 assert(LHSTy.isInteger() && "Shift amount is not an integer type!")(static_cast <bool> (LHSTy.isInteger() && "Shift amount is not an integer type!"
) ? void (0) : __assert_fail ("LHSTy.isInteger() && \"Shift amount is not an integer type!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 578, __extension__ __PRETTY_FUNCTION__))
;
579 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
580 }
581
582 /// This method returns true if we are running before type legalization or
583 /// if the specified VT is legal.
584 bool isTypeLegal(const EVT &VT) {
585 if (!LegalTypes) return true;
586 return TLI.isTypeLegal(VT);
587 }
588
589 /// Convenience wrapper around TargetLowering::getSetCCResultType
590 EVT getSetCCResultType(EVT VT) const {
591 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
592 }
593 };
594
595/// This class is a DAGUpdateListener that removes any deleted
596/// nodes from the worklist.
597class WorklistRemover : public SelectionDAG::DAGUpdateListener {
598 DAGCombiner &DC;
599
600public:
601 explicit WorklistRemover(DAGCombiner &dc)
602 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
603
604 void NodeDeleted(SDNode *N, SDNode *E) override {
605 DC.removeFromWorklist(N);
606 }
607};
608
609} // end anonymous namespace
610
611//===----------------------------------------------------------------------===//
612// TargetLowering::DAGCombinerInfo implementation
613//===----------------------------------------------------------------------===//
614
615void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
616 ((DAGCombiner*)DC)->AddToWorklist(N);
617}
618
619SDValue TargetLowering::DAGCombinerInfo::
620CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
621 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
622}
623
624SDValue TargetLowering::DAGCombinerInfo::
625CombineTo(SDNode *N, SDValue Res, bool AddTo) {
626 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
627}
628
629SDValue TargetLowering::DAGCombinerInfo::
630CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
631 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
632}
633
634void TargetLowering::DAGCombinerInfo::
635CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
636 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
637}
638
639//===----------------------------------------------------------------------===//
640// Helper Functions
641//===----------------------------------------------------------------------===//
642
643void DAGCombiner::deleteAndRecombine(SDNode *N) {
644 removeFromWorklist(N);
645
646 // If the operands of this node are only used by the node, they will now be
647 // dead. Make sure to re-visit them and recursively delete dead nodes.
648 for (const SDValue &Op : N->ops())
649 // For an operand generating multiple values, one of the values may
650 // become dead allowing further simplification (e.g. split index
651 // arithmetic from an indexed load).
652 if (Op->hasOneUse() || Op->getNumValues() > 1)
653 AddToWorklist(Op.getNode());
654
655 DAG.DeleteNode(N);
656}
657
658/// Return 1 if we can compute the negated form of the specified expression for
659/// the same cost as the expression itself, or 2 if we can compute the negated
660/// form more cheaply than the expression itself.
661static char isNegatibleForFree(SDValue Op, bool LegalOperations,
662 const TargetLowering &TLI,
663 const TargetOptions *Options,
664 unsigned Depth = 0) {
665 // fneg is removable even if it has multiple uses.
666 if (Op.getOpcode() == ISD::FNEG) return 2;
667
668 // Don't allow anything with multiple uses.
669 if (!Op.hasOneUse()) return 0;
670
671 // Don't recurse exponentially.
672 if (Depth > 6) return 0;
673
674 switch (Op.getOpcode()) {
675 default: return false;
676 case ISD::ConstantFP: {
677 if (!LegalOperations)
678 return 1;
679
680 // Don't invert constant FP values after legalization unless the target says
681 // the negated constant is legal.
682 EVT VT = Op.getValueType();
683 return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
684 TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
685 }
686 case ISD::FADD:
687 // FIXME: determine better conditions for this xform.
688 if (!Options->UnsafeFPMath) return 0;
689
690 // After operation legalization, it might not be legal to create new FSUBs.
691 if (LegalOperations &&
692 !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
693 return 0;
694
695 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
696 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
697 Options, Depth + 1))
698 return V;
699 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
700 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
701 Depth + 1);
702 case ISD::FSUB:
703 // We can't turn -(A-B) into B-A when we honor signed zeros.
704 if (!Options->NoSignedZerosFPMath &&
705 !Op.getNode()->getFlags().hasNoSignedZeros())
706 return 0;
707
708 // fold (fneg (fsub A, B)) -> (fsub B, A)
709 return 1;
710
711 case ISD::FMUL:
712 case ISD::FDIV:
713 if (Options->HonorSignDependentRoundingFPMath()) return 0;
714
715 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
716 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
717 Options, Depth + 1))
718 return V;
719
720 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
721 Depth + 1);
722
723 case ISD::FP_EXTEND:
724 case ISD::FP_ROUND:
725 case ISD::FSIN:
726 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
727 Depth + 1);
728 }
729}
730
731/// If isNegatibleForFree returns true, return the newly negated expression.
732static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
733 bool LegalOperations, unsigned Depth = 0) {
734 const TargetOptions &Options = DAG.getTarget().Options;
735 // fneg is removable even if it has multiple uses.
736 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
737
738 // Don't allow anything with multiple uses.
739 assert(Op.hasOneUse() && "Unknown reuse!")(static_cast <bool> (Op.hasOneUse() && "Unknown reuse!"
) ? void (0) : __assert_fail ("Op.hasOneUse() && \"Unknown reuse!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 739, __extension__ __PRETTY_FUNCTION__))
;
740
741 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree")(static_cast <bool> (Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"
) ? void (0) : __assert_fail ("Depth <= 6 && \"GetNegatedExpression doesn't match isNegatibleForFree\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 741, __extension__ __PRETTY_FUNCTION__))
;
742
743 const SDNodeFlags Flags = Op.getNode()->getFlags();
744
745 switch (Op.getOpcode()) {
746 default: llvm_unreachable("Unknown code")::llvm::llvm_unreachable_internal("Unknown code", "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 746)
;
747 case ISD::ConstantFP: {
748 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
749 V.changeSign();
750 return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
751 }
752 case ISD::FADD:
753 // FIXME: determine better conditions for this xform.
754 assert(Options.UnsafeFPMath)(static_cast <bool> (Options.UnsafeFPMath) ? void (0) :
__assert_fail ("Options.UnsafeFPMath", "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 754, __extension__ __PRETTY_FUNCTION__))
;
755
756 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
757 if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
758 DAG.getTargetLoweringInfo(), &Options, Depth+1))
759 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
760 GetNegatedExpression(Op.getOperand(0), DAG,
761 LegalOperations, Depth+1),
762 Op.getOperand(1), Flags);
763 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
764 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
765 GetNegatedExpression(Op.getOperand(1), DAG,
766 LegalOperations, Depth+1),
767 Op.getOperand(0), Flags);
768 case ISD::FSUB:
769 // fold (fneg (fsub 0, B)) -> B
770 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
771 if (N0CFP->isZero())
772 return Op.getOperand(1);
773
774 // fold (fneg (fsub A, B)) -> (fsub B, A)
775 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
776 Op.getOperand(1), Op.getOperand(0), Flags);
777
778 case ISD::FMUL:
779 case ISD::FDIV:
780 assert(!Options.HonorSignDependentRoundingFPMath())(static_cast <bool> (!Options.HonorSignDependentRoundingFPMath
()) ? void (0) : __assert_fail ("!Options.HonorSignDependentRoundingFPMath()"
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 780, __extension__ __PRETTY_FUNCTION__))
;
781
782 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
783 if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
784 DAG.getTargetLoweringInfo(), &Options, Depth+1))
785 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
786 GetNegatedExpression(Op.getOperand(0), DAG,
787 LegalOperations, Depth+1),
788 Op.getOperand(1), Flags);
789
790 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
791 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
792 Op.getOperand(0),
793 GetNegatedExpression(Op.getOperand(1), DAG,
794 LegalOperations, Depth+1), Flags);
795
796 case ISD::FP_EXTEND:
797 case ISD::FSIN:
798 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
799 GetNegatedExpression(Op.getOperand(0), DAG,
800 LegalOperations, Depth+1));
801 case ISD::FP_ROUND:
802 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
803 GetNegatedExpression(Op.getOperand(0), DAG,
804 LegalOperations, Depth+1),
805 Op.getOperand(1));
806 }
807}
808
809// APInts must be the same size for most operations, this helper
810// function zero extends the shorter of the pair so that they match.
811// We provide an Offset so that we can create bitwidths that won't overflow.
812static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
813 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
814 LHS = LHS.zextOrSelf(Bits);
815 RHS = RHS.zextOrSelf(Bits);
816}
817
818// Return true if this node is a setcc, or is a select_cc
819// that selects between the target values used for true and false, making it
820// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
821// the appropriate nodes based on the type of node we are checking. This
822// simplifies life a bit for the callers.
823bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
824 SDValue &CC) const {
825 if (N.getOpcode() == ISD::SETCC) {
826 LHS = N.getOperand(0);
827 RHS = N.getOperand(1);
828 CC = N.getOperand(2);
829 return true;
830 }
831
832 if (N.getOpcode() != ISD::SELECT_CC ||
833 !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
834 !TLI.isConstFalseVal(N.getOperand(3).getNode()))
835 return false;
836
837 if (TLI.getBooleanContents(N.getValueType()) ==
838 TargetLowering::UndefinedBooleanContent)
839 return false;
840
841 LHS = N.getOperand(0);
842 RHS = N.getOperand(1);
843 CC = N.getOperand(4);
844 return true;
845}
846
847/// Return true if this is a SetCC-equivalent operation with only one use.
848/// If this is true, it allows the users to invert the operation for free when
849/// it is profitable to do so.
850bool DAGCombiner::isOneUseSetCC(SDValue N) const {
851 SDValue N0, N1, N2;
852 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
853 return true;
854 return false;
855}
856
857// \brief Returns the SDNode if it is a constant float BuildVector
858// or constant float.
859static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
860 if (isa<ConstantFPSDNode>(N))
861 return N.getNode();
862 if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
863 return N.getNode();
864 return nullptr;
865}
866
867// Determines if it is a constant integer or a build vector of constant
868// integers (and undefs).
869// Do not permit build vector implicit truncation.
870static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
871 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
872 return !(Const->isOpaque() && NoOpaques);
873 if (N.getOpcode() != ISD::BUILD_VECTOR)
874 return false;
875 unsigned BitWidth = N.getScalarValueSizeInBits();
876 for (const SDValue &Op : N->op_values()) {
877 if (Op.isUndef())
878 continue;
879 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
880 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
881 (Const->isOpaque() && NoOpaques))
882 return false;
883 }
884 return true;
885}
886
887// Determines if it is a constant null integer or a splatted vector of a
888// constant null integer (with no undefs).
889// Build vector implicit truncation is not an issue for null values.
890static bool isNullConstantOrNullSplatConstant(SDValue N) {
891 if (ConstantSDNode *Splat = isConstOrConstSplat(N))
892 return Splat->isNullValue();
893 return false;
894}
895
896// Determines if it is a constant integer of one or a splatted vector of a
897// constant integer of one (with no undefs).
898// Do not permit build vector implicit truncation.
899static bool isOneConstantOrOneSplatConstant(SDValue N) {
900 unsigned BitWidth = N.getScalarValueSizeInBits();
901 if (ConstantSDNode *Splat = isConstOrConstSplat(N))
902 return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
903 return false;
904}
905
906// Determines if it is a constant integer of all ones or a splatted vector of a
907// constant integer of all ones (with no undefs).
908// Do not permit build vector implicit truncation.
909static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
910 unsigned BitWidth = N.getScalarValueSizeInBits();
911 if (ConstantSDNode *Splat = isConstOrConstSplat(N))
912 return Splat->isAllOnesValue() &&
913 Splat->getAPIntValue().getBitWidth() == BitWidth;
914 return false;
915}
916
917// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
918// undef's.
919static bool isAnyConstantBuildVector(const SDNode *N) {
920 return ISD::isBuildVectorOfConstantSDNodes(N) ||
921 ISD::isBuildVectorOfConstantFPSDNodes(N);
922}
923
924SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
925 SDValue N1) {
926 EVT VT = N0.getValueType();
927 if (N0.getOpcode() == Opc) {
928 if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
929 if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
930 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
931 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
932 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
933 return SDValue();
934 }
935 if (N0.hasOneUse()) {
936 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
937 // use
938 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
939 if (!OpNode.getNode())
940 return SDValue();
941 AddToWorklist(OpNode.getNode());
942 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
943 }
944 }
945 }
946
947 if (N1.getOpcode() == Opc) {
948 if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
949 if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
950 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
951 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
952 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
953 return SDValue();
954 }
955 if (N1.hasOneUse()) {
956 // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
957 // use
958 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
959 if (!OpNode.getNode())
960 return SDValue();
961 AddToWorklist(OpNode.getNode());
962 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
963 }
964 }
965 }
966
967 return SDValue();
968}
969
970SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
971 bool AddTo) {
972 assert(N->getNumValues() == NumTo && "Broken CombineTo call!")(static_cast <bool> (N->getNumValues() == NumTo &&
"Broken CombineTo call!") ? void (0) : __assert_fail ("N->getNumValues() == NumTo && \"Broken CombineTo call!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 972, __extension__ __PRETTY_FUNCTION__))
;
973 ++NodesCombined;
974 DEBUG(dbgs() << "\nReplacing.1 ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo-1 <<
" other values\n"; } } while (false)
975 N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo-1 <<
" other values\n"; } } while (false)
976 dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo-1 <<
" other values\n"; } } while (false)
977 To[0].getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo-1 <<
" other values\n"; } } while (false)
978 dbgs() << " and " << NumTo-1 << " other values\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo-1 <<
" other values\n"; } } while (false)
;
979 for (unsigned i = 0, e = NumTo; i != e; ++i)
980 assert((!To[i].getNode() ||(static_cast <bool> ((!To[i].getNode() || N->getValueType
(i) == To[i].getValueType()) && "Cannot combine value to value of different type!"
) ? void (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 982, __extension__ __PRETTY_FUNCTION__))
981 N->getValueType(i) == To[i].getValueType()) &&(static_cast <bool> ((!To[i].getNode() || N->getValueType
(i) == To[i].getValueType()) && "Cannot combine value to value of different type!"
) ? void (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 982, __extension__ __PRETTY_FUNCTION__))
982 "Cannot combine value to value of different type!")(static_cast <bool> ((!To[i].getNode() || N->getValueType
(i) == To[i].getValueType()) && "Cannot combine value to value of different type!"
) ? void (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 982, __extension__ __PRETTY_FUNCTION__))
;
983
984 WorklistRemover DeadNodes(*this);
985 DAG.ReplaceAllUsesWith(N, To);
986 if (AddTo) {
987 // Push the new nodes and any users onto the worklist
988 for (unsigned i = 0, e = NumTo; i != e; ++i) {
989 if (To[i].getNode()) {
990 AddToWorklist(To[i].getNode());
991 AddUsersToWorklist(To[i].getNode());
992 }
993 }
994 }
995
996 // Finally, if the node is now dead, remove it from the graph. The node
997 // may not be dead if the replacement process recursively simplified to
998 // something else needing this node.
999 if (N->use_empty())
1000 deleteAndRecombine(N);
1001 return SDValue(N, 0);
1002}
1003
1004void DAGCombiner::
1005CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1006 // Replace all uses. If any nodes become isomorphic to other nodes and
1007 // are deleted, make sure to remove them from our worklist.
1008 WorklistRemover DeadNodes(*this);
1009 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1010
1011 // Push the new node and any (possibly new) users onto the worklist.
1012 AddToWorklist(TLO.New.getNode());
1013 AddUsersToWorklist(TLO.New.getNode());
1014
1015 // Finally, if the node is now dead, remove it from the graph. The node
1016 // may not be dead if the replacement process recursively simplified to
1017 // something else needing this node.
1018 if (TLO.Old.getNode()->use_empty())
1019 deleteAndRecombine(TLO.Old.getNode());
1020}
1021
1022/// Check the specified integer node value to see if it can be simplified or if
1023/// things it uses can be simplified by bit propagation. If so, return true.
1024bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1025 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1026 KnownBits Known;
1027 if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1028 return false;
1029
1030 // Revisit the node.
1031 AddToWorklist(Op.getNode());
1032
1033 // Replace the old value with the new one.
1034 ++NodesCombined;
1035 DEBUG(dbgs() << "\nReplacing.2 ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
1036 TLO.Old.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
1037 dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
1038 TLO.New.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
1039 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
;
1040
1041 CommitTargetLoweringOpt(TLO);
1042 return true;
1043}
1044
1045/// Check the specified vector node value to see if it can be simplified or
1046/// if things it uses can be simplified as it only uses some of the elements.
1047/// If so, return true.
1048bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1049 const APInt &Demanded) {
1050 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1051 APInt KnownUndef, KnownZero;
1052 if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO))
1053 return false;
1054
1055 // Revisit the node.
1056 AddToWorklist(Op.getNode());
1057
1058 // Replace the old value with the new one.
1059 ++NodesCombined;
1060 DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
1061 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
;
1062
1063 CommitTargetLoweringOpt(TLO);
1064 return true;
1065}
1066
1067void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1068 SDLoc DL(Load);
1069 EVT VT = Load->getValueType(0);
1070 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1071
1072 DEBUG(dbgs() << "\nReplacing.9 ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
1073 Load->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
1074 dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
1075 Trunc.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
1076 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
;
1077 WorklistRemover DeadNodes(*this);
1078 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1079 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1080 deleteAndRecombine(Load);
1081 AddToWorklist(Trunc.getNode());
1082}
1083
1084SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1085 Replace = false;
1086 SDLoc DL(Op);
1087 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1088 LoadSDNode *LD = cast<LoadSDNode>(Op);
1089 EVT MemVT = LD->getMemoryVT();
1090 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1091 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1092 : ISD::EXTLOAD)
1093 : LD->getExtensionType();
1094 Replace = true;
1095 return DAG.getExtLoad(ExtType, DL, PVT,
1096 LD->getChain(), LD->getBasePtr(),
1097 MemVT, LD->getMemOperand());
1098 }
1099
1100 unsigned Opc = Op.getOpcode();
1101 switch (Opc) {
1102 default: break;
1103 case ISD::AssertSext:
1104 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1105 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1106 break;
1107 case ISD::AssertZext:
1108 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1109 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1110 break;
1111 case ISD::Constant: {
1112 unsigned ExtOpc =
1113 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1114 return DAG.getNode(ExtOpc, DL, PVT, Op);
1115 }
1116 }
1117
1118 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1119 return SDValue();
1120 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1121}
1122
1123SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1124 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1125 return SDValue();
1126 EVT OldVT = Op.getValueType();
1127 SDLoc DL(Op);
1128 bool Replace = false;
1129 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1130 if (!NewOp.getNode())
1131 return SDValue();
1132 AddToWorklist(NewOp.getNode());
1133
1134 if (Replace)
1135 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1136 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1137 DAG.getValueType(OldVT));
1138}
1139
1140SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1141 EVT OldVT = Op.getValueType();
1142 SDLoc DL(Op);
1143 bool Replace = false;
1144 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1145 if (!NewOp.getNode())
1146 return SDValue();
1147 AddToWorklist(NewOp.getNode());
1148
1149 if (Replace)
1150 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1151 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1152}
1153
1154/// Promote the specified integer binary operation if the target indicates it is
1155/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1156/// i32 since i16 instructions are longer.
1157SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1158 if (!LegalOperations)
1159 return SDValue();
1160
1161 EVT VT = Op.getValueType();
1162 if (VT.isVector() || !VT.isInteger())
1163 return SDValue();
1164
1165 // If operation type is 'undesirable', e.g. i16 on x86, consider
1166 // promoting it.
1167 unsigned Opc = Op.getOpcode();
1168 if (TLI.isTypeDesirableForOp(Opc, VT))
1169 return SDValue();
1170
1171 EVT PVT = VT;
1172 // Consult target whether it is a good idea to promote this operation and
1173 // what's the right type to promote it to.
1174 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1175 assert(PVT != VT && "Don't know what type to promote to!")(static_cast <bool> (PVT != VT && "Don't know what type to promote to!"
) ? void (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1175, __extension__ __PRETTY_FUNCTION__))
;
1176
1177 DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode(
)->dump(&DAG); } } while (false)
;
1178
1179 bool Replace0 = false;
1180 SDValue N0 = Op.getOperand(0);
1181 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1182
1183 bool Replace1 = false;
1184 SDValue N1 = Op.getOperand(1);
1185 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1186 SDLoc DL(Op);
1187
1188 SDValue RV =
1189 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1190
1191 // We are always replacing N0/N1's use in N and only need
1192 // additional replacements if there are additional uses.
1193 Replace0 &= !N0->hasOneUse();
1194 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1195
1196 // Combine Op here so it is preserved past replacements.
1197 CombineTo(Op.getNode(), RV);
1198
1199 // If operands have a use ordering, make sure we deal with
1200 // predecessor first.
1201 if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1202 std::swap(N0, N1);
1203 std::swap(NN0, NN1);
1204 }
1205
1206 if (Replace0) {
1207 AddToWorklist(NN0.getNode());
1208 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1209 }
1210 if (Replace1) {
1211 AddToWorklist(NN1.getNode());
1212 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1213 }
1214 return Op;
1215 }
1216 return SDValue();
1217}
1218
1219/// Promote the specified integer shift operation if the target indicates it is
1220/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1221/// i32 since i16 instructions are longer.
1222SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1223 if (!LegalOperations)
1224 return SDValue();
1225
1226 EVT VT = Op.getValueType();
1227 if (VT.isVector() || !VT.isInteger())
1228 return SDValue();
1229
1230 // If operation type is 'undesirable', e.g. i16 on x86, consider
1231 // promoting it.
1232 unsigned Opc = Op.getOpcode();
1233 if (TLI.isTypeDesirableForOp(Opc, VT))
1234 return SDValue();
1235
1236 EVT PVT = VT;
1237 // Consult target whether it is a good idea to promote this operation and
1238 // what's the right type to promote it to.
1239 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1240 assert(PVT != VT && "Don't know what type to promote to!")(static_cast <bool> (PVT != VT && "Don't know what type to promote to!"
) ? void (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1240, __extension__ __PRETTY_FUNCTION__))
;
1241
1242 DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode(
)->dump(&DAG); } } while (false)
;
1243
1244 bool Replace = false;
1245 SDValue N0 = Op.getOperand(0);
1246 SDValue N1 = Op.getOperand(1);
1247 if (Opc == ISD::SRA)
1248 N0 = SExtPromoteOperand(N0, PVT);
1249 else if (Opc == ISD::SRL)
1250 N0 = ZExtPromoteOperand(N0, PVT);
1251 else
1252 N0 = PromoteOperand(N0, PVT, Replace);
1253
1254 if (!N0.getNode())
1255 return SDValue();
1256
1257 SDLoc DL(Op);
1258 SDValue RV =
1259 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1260
1261 AddToWorklist(N0.getNode());
1262 if (Replace)
1263 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1264
1265 // Deal with Op being deleted.
1266 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1267 return RV;
1268 }
1269 return SDValue();
1270}
1271
1272SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1273 if (!LegalOperations)
1274 return SDValue();
1275
1276 EVT VT = Op.getValueType();
1277 if (VT.isVector() || !VT.isInteger())
1278 return SDValue();
1279
1280 // If operation type is 'undesirable', e.g. i16 on x86, consider
1281 // promoting it.
1282 unsigned Opc = Op.getOpcode();
1283 if (TLI.isTypeDesirableForOp(Opc, VT))
1284 return SDValue();
1285
1286 EVT PVT = VT;
1287 // Consult target whether it is a good idea to promote this operation and
1288 // what's the right type to promote it to.
1289 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1290 assert(PVT != VT && "Don't know what type to promote to!")(static_cast <bool> (PVT != VT && "Don't know what type to promote to!"
) ? void (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1290, __extension__ __PRETTY_FUNCTION__))
;
1291 // fold (aext (aext x)) -> (aext x)
1292 // fold (aext (zext x)) -> (zext x)
1293 // fold (aext (sext x)) -> (sext x)
1294 DEBUG(dbgs() << "\nPromoting ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode(
)->dump(&DAG); } } while (false)
1295 Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode(
)->dump(&DAG); } } while (false)
;
1296 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1297 }
1298 return SDValue();
1299}
1300
1301bool DAGCombiner::PromoteLoad(SDValue Op) {
1302 if (!LegalOperations)
1303 return false;
1304
1305 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1306 return false;
1307
1308 EVT VT = Op.getValueType();
1309 if (VT.isVector() || !VT.isInteger())
1310 return false;
1311
1312 // If operation type is 'undesirable', e.g. i16 on x86, consider
1313 // promoting it.
1314 unsigned Opc = Op.getOpcode();
1315 if (TLI.isTypeDesirableForOp(Opc, VT))
1316 return false;
1317
1318 EVT PVT = VT;
1319 // Consult target whether it is a good idea to promote this operation and
1320 // what's the right type to promote it to.
1321 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1322 assert(PVT != VT && "Don't know what type to promote to!")(static_cast <bool> (PVT != VT && "Don't know what type to promote to!"
) ? void (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1322, __extension__ __PRETTY_FUNCTION__))
;
1323
1324 SDLoc DL(Op);
1325 SDNode *N = Op.getNode();
1326 LoadSDNode *LD = cast<LoadSDNode>(N);
1327 EVT MemVT = LD->getMemoryVT();
1328 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1329 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1330 : ISD::EXTLOAD)
1331 : LD->getExtensionType();
1332 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1333 LD->getChain(), LD->getBasePtr(),
1334 MemVT, LD->getMemOperand());
1335 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1336
1337 DEBUG(dbgs() << "\nPromoting ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.getNode()->dump
(&DAG); dbgs() << '\n'; } } while (false)
1338 N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.getNode()->dump
(&DAG); dbgs() << '\n'; } } while (false)
1339 dbgs() << "\nTo: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.getNode()->dump
(&DAG); dbgs() << '\n'; } } while (false)
1340 Result.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.getNode()->dump
(&DAG); dbgs() << '\n'; } } while (false)
1341 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.getNode()->dump
(&DAG); dbgs() << '\n'; } } while (false)
;
1342 WorklistRemover DeadNodes(*this);
1343 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1344 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1345 deleteAndRecombine(N);
1346 AddToWorklist(Result.getNode());
1347 return true;
1348 }
1349 return false;
1350}
1351
1352/// \brief Recursively delete a node which has no uses and any operands for
1353/// which it is the only use.
1354///
1355/// Note that this both deletes the nodes and removes them from the worklist.
1356/// It also adds any nodes who have had a user deleted to the worklist as they
1357/// may now have only one use and subject to other combines.
1358bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1359 if (!N->use_empty())
1360 return false;
1361
1362 SmallSetVector<SDNode *, 16> Nodes;
1363 Nodes.insert(N);
1364 do {
1365 N = Nodes.pop_back_val();
1366 if (!N)
1367 continue;
1368
1369 if (N->use_empty()) {
1370 for (const SDValue &ChildN : N->op_values())
1371 Nodes.insert(ChildN.getNode());
1372
1373 removeFromWorklist(N);
1374 DAG.DeleteNode(N);
1375 } else {
1376 AddToWorklist(N);
1377 }
1378 } while (!Nodes.empty());
1379 return true;
1380}
1381
1382//===----------------------------------------------------------------------===//
1383// Main DAG Combiner implementation
1384//===----------------------------------------------------------------------===//
1385
1386void DAGCombiner::Run(CombineLevel AtLevel) {
1387 // set the instance variables, so that the various visit routines may use it.
1388 Level = AtLevel;
1389 LegalOperations = Level >= AfterLegalizeVectorOps;
1390 LegalTypes = Level >= AfterLegalizeTypes;
1391
1392 // Add all the dag nodes to the worklist.
1393 for (SDNode &Node : DAG.allnodes())
1394 AddToWorklist(&Node);
1395
1396 // Create a dummy node (which is not added to allnodes), that adds a reference
1397 // to the root node, preventing it from being deleted, and tracking any
1398 // changes of the root.
1399 HandleSDNode Dummy(DAG.getRoot());
1400
1401 // While the worklist isn't empty, find a node and try to combine it.
1402 while (!WorklistMap.empty()) {
1403 SDNode *N;
1404 // The Worklist holds the SDNodes in order, but it may contain null entries.
1405 do {
1406 N = Worklist.pop_back_val();
1407 } while (!N);
1408
1409 bool GoodWorklistEntry = WorklistMap.erase(N);
1410 (void)GoodWorklistEntry;
1411 assert(GoodWorklistEntry &&(static_cast <bool> (GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!"
) ? void (0) : __assert_fail ("GoodWorklistEntry && \"Found a worklist entry without a corresponding map entry!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1412, __extension__ __PRETTY_FUNCTION__))
1412 "Found a worklist entry without a corresponding map entry!")(static_cast <bool> (GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!"
) ? void (0) : __assert_fail ("GoodWorklistEntry && \"Found a worklist entry without a corresponding map entry!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1412, __extension__ __PRETTY_FUNCTION__))
;
1413
1414 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1415 // N is deleted from the DAG, since they too may now be dead or may have a
1416 // reduced number of uses, allowing other xforms.
1417 if (recursivelyDeleteUnusedNodes(N))
1418 continue;
1419
1420 WorklistRemover DeadNodes(*this);
1421
1422 // If this combine is running after legalizing the DAG, re-legalize any
1423 // nodes pulled off the worklist.
1424 if (Level == AfterLegalizeDAG) {
1425 SmallSetVector<SDNode *, 16> UpdatedNodes;
1426 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1427
1428 for (SDNode *LN : UpdatedNodes) {
1429 AddToWorklist(LN);
1430 AddUsersToWorklist(LN);
1431 }
1432 if (!NIsValid)
1433 continue;
1434 }
1435
1436 DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nCombining: "; N->dump
(&DAG); } } while (false)
;
1437
1438 // Add any operands of the new node which have not yet been combined to the
1439 // worklist as well. Because the worklist uniques things already, this
1440 // won't repeatedly process the same operand.
1441 CombinedNodes.insert(N);
1442 for (const SDValue &ChildN : N->op_values())
1443 if (!CombinedNodes.count(ChildN.getNode()))
1444 AddToWorklist(ChildN.getNode());
1445
1446 SDValue RV = combine(N);
1447
1448 if (!RV.getNode())
1449 continue;
1450
1451 ++NodesCombined;
1452
1453 // If we get back the same node we passed in, rather than a new node or
1454 // zero, we know that the node must have defined multiple values and
1455 // CombineTo was used. Since CombineTo takes care of the worklist
1456 // mechanics for us, we have no work to do in this case.
1457 if (RV.getNode() == N)
1458 continue;
1459
1460 assert(N->getOpcode() != ISD::DELETED_NODE &&(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? void (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1462, __extension__ __PRETTY_FUNCTION__))
1461 RV.getOpcode() != ISD::DELETED_NODE &&(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? void (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1462, __extension__ __PRETTY_FUNCTION__))
1462 "Node was deleted but visit returned new node!")(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? void (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1462, __extension__ __PRETTY_FUNCTION__))
;
1463
1464 DEBUG(dbgs() << " ... into: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << " ... into: "; RV.getNode()
->dump(&DAG); } } while (false)
1465 RV.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << " ... into: "; RV.getNode()
->dump(&DAG); } } while (false)
;
1466
1467 if (N->getNumValues() == RV.getNode()->getNumValues())
1468 DAG.ReplaceAllUsesWith(N, RV.getNode());
1469 else {
1470 assert(N->getValueType(0) == RV.getValueType() &&(static_cast <bool> (N->getValueType(0) == RV.getValueType
() && N->getNumValues() == 1 && "Type mismatch"
) ? void (0) : __assert_fail ("N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && \"Type mismatch\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1471, __extension__ __PRETTY_FUNCTION__))
1471 N->getNumValues() == 1 && "Type mismatch")(static_cast <bool> (N->getValueType(0) == RV.getValueType
() && N->getNumValues() == 1 && "Type mismatch"
) ? void (0) : __assert_fail ("N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && \"Type mismatch\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1471, __extension__ __PRETTY_FUNCTION__))
;
1472 DAG.ReplaceAllUsesWith(N, &RV);
1473 }
1474
1475 // Push the new node and any users onto the worklist
1476 AddToWorklist(RV.getNode());
1477 AddUsersToWorklist(RV.getNode());
1478
1479 // Finally, if the node is now dead, remove it from the graph. The node
1480 // may not be dead if the replacement process recursively simplified to
1481 // something else needing this node. This will also take care of adding any
1482 // operands which have lost a user to the worklist.
1483 recursivelyDeleteUnusedNodes(N);
1484 }
1485
1486 // If the root changed (e.g. it was a dead load, update the root).
1487 DAG.setRoot(Dummy.getValue());
1488 DAG.RemoveDeadNodes();
1489}
1490
1491SDValue DAGCombiner::visit(SDNode *N) {
1492 switch (N->getOpcode()) {
1493 default: break;
1494 case ISD::TokenFactor: return visitTokenFactor(N);
1495 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1496 case ISD::ADD: return visitADD(N);
1497 case ISD::SUB: return visitSUB(N);
1498 case ISD::ADDC: return visitADDC(N);
1499 case ISD::UADDO: return visitUADDO(N);
1500 case ISD::SUBC: return visitSUBC(N);
1501 case ISD::USUBO: return visitUSUBO(N);
1502 case ISD::ADDE: return visitADDE(N);
1503 case ISD::ADDCARRY: return visitADDCARRY(N);
1504 case ISD::SUBE: return visitSUBE(N);
1505 case ISD::SUBCARRY: return visitSUBCARRY(N);
1506 case ISD::MUL: return visitMUL(N);
1507 case ISD::SDIV: return visitSDIV(N);
1508 case ISD::UDIV: return visitUDIV(N);
1509 case ISD::SREM:
1510 case ISD::UREM: return visitREM(N);
1511 case ISD::MULHU: return visitMULHU(N);
1512 case ISD::MULHS: return visitMULHS(N);
1513 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1514 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1515 case ISD::SMULO: return visitSMULO(N);
1516 case ISD::UMULO: return visitUMULO(N);
1517 case ISD::SMIN:
1518 case ISD::SMAX:
1519 case ISD::UMIN:
1520 case ISD::UMAX: return visitIMINMAX(N);
1521 case ISD::AND: return visitAND(N);
1522 case ISD::OR: return visitOR(N);
1523 case ISD::XOR: return visitXOR(N);
1524 case ISD::SHL: return visitSHL(N);
1525 case ISD::SRA: return visitSRA(N);
1526 case ISD::SRL: return visitSRL(N);
1527 case ISD::ROTR:
1528 case ISD::ROTL: return visitRotate(N);
1529 case ISD::ABS: return visitABS(N);
1530 case ISD::BSWAP: return visitBSWAP(N);
1531 case ISD::BITREVERSE: return visitBITREVERSE(N);
1532 case ISD::CTLZ: return visitCTLZ(N);
1533 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1534 case ISD::CTTZ: return visitCTTZ(N);
1535 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1536 case ISD::CTPOP: return visitCTPOP(N);
1537 case ISD::SELECT: return visitSELECT(N);
1538 case ISD::VSELECT: return visitVSELECT(N);
1539 case ISD::SELECT_CC: return visitSELECT_CC(N);
1540 case ISD::SETCC: return visitSETCC(N);
1541 case ISD::SETCCE: return visitSETCCE(N);
1542 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1543 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1544 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1545 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1546 case ISD::AssertSext:
1547 case ISD::AssertZext: return visitAssertExt(N);
1548 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1549 case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1550 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1551 case ISD::TRUNCATE: return visitTRUNCATE(N);
1552 case ISD::BITCAST: return visitBITCAST(N);
1553 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1554 case ISD::FADD: return visitFADD(N);
1555 case ISD::FSUB: return visitFSUB(N);
1556 case ISD::FMUL: return visitFMUL(N);
1557 case ISD::FMA: return visitFMA(N);
1558 case ISD::FDIV: return visitFDIV(N);
1559 case ISD::FREM: return visitFREM(N);
1560 case ISD::FSQRT: return visitFSQRT(N);
1561 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1562 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1563 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1564 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1565 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1566 case ISD::FP_ROUND: return visitFP_ROUND(N);
1567 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1568 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1569 case ISD::FNEG: return visitFNEG(N);
1570 case ISD::FABS: return visitFABS(N);
1571 case ISD::FFLOOR: return visitFFLOOR(N);
1572 case ISD::FMINNUM: return visitFMINNUM(N);
1573 case ISD::FMAXNUM: return visitFMAXNUM(N);
1574 case ISD::FCEIL: return visitFCEIL(N);
1575 case ISD::FTRUNC: return visitFTRUNC(N);
1576 case ISD::BRCOND: return visitBRCOND(N);
1577 case ISD::BR_CC: return visitBR_CC(N);
1578 case ISD::LOAD: return visitLOAD(N);
1579 case ISD::STORE: return visitSTORE(N);
1580 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1581 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1582 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1583 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1584 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1585 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1586 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1587 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1588 case ISD::MGATHER: return visitMGATHER(N);
1589 case ISD::MLOAD: return visitMLOAD(N);
1590 case ISD::MSCATTER: return visitMSCATTER(N);
1591 case ISD::MSTORE: return visitMSTORE(N);
1592 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1593 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1594 }
1595 return SDValue();
1596}
1597
1598SDValue DAGCombiner::combine(SDNode *N) {
1599 SDValue RV = visit(N);
1600
1601 // If nothing happened, try a target-specific DAG combine.
1602 if (!RV.getNode()) {
1603 assert(N->getOpcode() != ISD::DELETED_NODE &&(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& "Node was deleted but visit returned NULL!") ? void
(0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned NULL!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1604, __extension__ __PRETTY_FUNCTION__))
1604 "Node was deleted but visit returned NULL!")(static_cast <bool> (N->getOpcode() != ISD::DELETED_NODE
&& "Node was deleted but visit returned NULL!") ? void
(0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned NULL!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1604, __extension__ __PRETTY_FUNCTION__))
;
1605
1606 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1607 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1608
1609 // Expose the DAG combiner to the target combiner impls.
1610 TargetLowering::DAGCombinerInfo
1611 DagCombineInfo(DAG, Level, false, this);
1612
1613 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1614 }
1615 }
1616
1617 // If nothing happened still, try promoting the operation.
1618 if (!RV.getNode()) {
1619 switch (N->getOpcode()) {
1620 default: break;
1621 case ISD::ADD:
1622 case ISD::SUB:
1623 case ISD::MUL:
1624 case ISD::AND:
1625 case ISD::OR:
1626 case ISD::XOR:
1627 RV = PromoteIntBinOp(SDValue(N, 0));
1628 break;
1629 case ISD::SHL:
1630 case ISD::SRA:
1631 case ISD::SRL:
1632 RV = PromoteIntShiftOp(SDValue(N, 0));
1633 break;
1634 case ISD::SIGN_EXTEND:
1635 case ISD::ZERO_EXTEND:
1636 case ISD::ANY_EXTEND:
1637 RV = PromoteExtend(SDValue(N, 0));
1638 break;
1639 case ISD::LOAD:
1640 if (PromoteLoad(SDValue(N, 0)))
1641 RV = SDValue(N, 0);
1642 break;
1643 }
1644 }
1645
1646 // If N is a commutative binary node, try eliminate it if the commuted
1647 // version is already present in the DAG.
1648 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1649 N->getNumValues() == 1) {
1650 SDValue N0 = N->getOperand(0);
1651 SDValue N1 = N->getOperand(1);
1652
1653 // Constant operands are canonicalized to RHS.
1654 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1655 SDValue Ops[] = {N1, N0};
1656 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1657 N->getFlags());
1658 if (CSENode)
1659 return SDValue(CSENode, 0);
1660 }
1661 }
1662
1663 return RV;
1664}
1665
1666/// Given a node, return its input chain if it has one, otherwise return a null
1667/// sd operand.
1668static SDValue getInputChainForNode(SDNode *N) {
1669 if (unsigned NumOps = N->getNumOperands()) {
1670 if (N->getOperand(0).getValueType() == MVT::Other)
1671 return N->getOperand(0);
1672 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1673 return N->getOperand(NumOps-1);
1674 for (unsigned i = 1; i < NumOps-1; ++i)
1675 if (N->getOperand(i).getValueType() == MVT::Other)
1676 return N->getOperand(i);
1677 }
1678 return SDValue();
1679}
1680
1681SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1682 // If N has two operands, where one has an input chain equal to the other,
1683 // the 'other' chain is redundant.
1684 if (N->getNumOperands() == 2) {
1685 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1686 return N->getOperand(0);
1687 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1688 return N->getOperand(1);
1689 }
1690
1691 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1692 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1693 SmallPtrSet<SDNode*, 16> SeenOps;
1694 bool Changed = false; // If we should replace this token factor.
1695
1696 // Start out with this token factor.
1697 TFs.push_back(N);
1698
1699 // Iterate through token factors. The TFs grows when new token factors are
1700 // encountered.
1701 for (unsigned i = 0; i < TFs.size(); ++i) {
1702 SDNode *TF = TFs[i];
1703
1704 // Check each of the operands.
1705 for (const SDValue &Op : TF->op_values()) {
1706 switch (Op.getOpcode()) {
1707 case ISD::EntryToken:
1708 // Entry tokens don't need to be added to the list. They are
1709 // redundant.
1710 Changed = true;
1711 break;
1712
1713 case ISD::TokenFactor:
1714 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1715 // Queue up for processing.
1716 TFs.push_back(Op.getNode());
1717 // Clean up in case the token factor is removed.
1718 AddToWorklist(Op.getNode());
1719 Changed = true;
1720 break;
1721 }
1722 LLVM_FALLTHROUGH[[clang::fallthrough]];
1723
1724 default:
1725 // Only add if it isn't already in the list.
1726 if (SeenOps.insert(Op.getNode()).second)
1727 Ops.push_back(Op);
1728 else
1729 Changed = true;
1730 break;
1731 }
1732 }
1733 }
1734
1735 // Remove Nodes that are chained to another node in the list. Do so
1736 // by walking up chains breath-first stopping when we've seen
1737 // another operand. In general we must climb to the EntryNode, but we can exit
1738 // early if we find all remaining work is associated with just one operand as
1739 // no further pruning is possible.
1740
1741 // List of nodes to search through and original Ops from which they originate.
1742 SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1743 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1744 SmallPtrSet<SDNode *, 16> SeenChains;
1745 bool DidPruneOps = false;
1746
1747 unsigned NumLeftToConsider = 0;
1748 for (const SDValue &Op : Ops) {
1749 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1750 OpWorkCount.push_back(1);
1751 }
1752
1753 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1754 // If this is an Op, we can remove the op from the list. Remark any
1755 // search associated with it as from the current OpNumber.
1756 if (SeenOps.count(Op) != 0) {
1757 Changed = true;
1758 DidPruneOps = true;
1759 unsigned OrigOpNumber = 0;
1760 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1761 OrigOpNumber++;
1762 assert((OrigOpNumber != Ops.size()) &&(static_cast <bool> ((OrigOpNumber != Ops.size()) &&
"expected to find TokenFactor Operand") ? void (0) : __assert_fail
("(OrigOpNumber != Ops.size()) && \"expected to find TokenFactor Operand\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1763, __extension__ __PRETTY_FUNCTION__))
1763 "expected to find TokenFactor Operand")(static_cast <bool> ((OrigOpNumber != Ops.size()) &&
"expected to find TokenFactor Operand") ? void (0) : __assert_fail
("(OrigOpNumber != Ops.size()) && \"expected to find TokenFactor Operand\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1763, __extension__ __PRETTY_FUNCTION__))
;
1764 // Re-mark worklist from OrigOpNumber to OpNumber
1765 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1766 if (Worklist[i].second == OrigOpNumber) {
1767 Worklist[i].second = OpNumber;
1768 }
1769 }
1770 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1771 OpWorkCount[OrigOpNumber] = 0;
1772 NumLeftToConsider--;
1773 }
1774 // Add if it's a new chain
1775 if (SeenChains.insert(Op).second) {
1776 OpWorkCount[OpNumber]++;
1777 Worklist.push_back(std::make_pair(Op, OpNumber));
1778 }
1779 };
1780
1781 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1782 // We need at least be consider at least 2 Ops to prune.
1783 if (NumLeftToConsider <= 1)
1784 break;
1785 auto CurNode = Worklist[i].first;
1786 auto CurOpNumber = Worklist[i].second;
1787 assert((OpWorkCount[CurOpNumber] > 0) &&(static_cast <bool> ((OpWorkCount[CurOpNumber] > 0) &&
"Node should not appear in worklist") ? void (0) : __assert_fail
("(OpWorkCount[CurOpNumber] > 0) && \"Node should not appear in worklist\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1788, __extension__ __PRETTY_FUNCTION__))
1788 "Node should not appear in worklist")(static_cast <bool> ((OpWorkCount[CurOpNumber] > 0) &&
"Node should not appear in worklist") ? void (0) : __assert_fail
("(OpWorkCount[CurOpNumber] > 0) && \"Node should not appear in worklist\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1788, __extension__ __PRETTY_FUNCTION__))
;
1789 switch (CurNode->getOpcode()) {
1790 case ISD::EntryToken:
1791 // Hitting EntryToken is the only way for the search to terminate without
1792 // hitting
1793 // another operand's search. Prevent us from marking this operand
1794 // considered.
1795 NumLeftToConsider++;
1796 break;
1797 case ISD::TokenFactor:
1798 for (const SDValue &Op : CurNode->op_values())
1799 AddToWorklist(i, Op.getNode(), CurOpNumber);
1800 break;
1801 case ISD::CopyFromReg:
1802 case ISD::CopyToReg:
1803 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1804 break;
1805 default:
1806 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1807 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1808 break;
1809 }
1810 OpWorkCount[CurOpNumber]--;
1811 if (OpWorkCount[CurOpNumber] == 0)
1812 NumLeftToConsider--;
1813 }
1814
1815 // If we've changed things around then replace token factor.
1816 if (Changed) {
1817 SDValue Result;
1818 if (Ops.empty()) {
1819 // The entry token is the only possible outcome.
1820 Result = DAG.getEntryNode();
1821 } else {
1822 if (DidPruneOps) {
1823 SmallVector<SDValue, 8> PrunedOps;
1824 //
1825 for (const SDValue &Op : Ops) {
1826 if (SeenChains.count(Op.getNode()) == 0)
1827 PrunedOps.push_back(Op);
1828 }
1829 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1830 } else {
1831 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1832 }
1833 }
1834 return Result;
1835 }
1836 return SDValue();
1837}
1838
1839/// MERGE_VALUES can always be eliminated.
1840SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1841 WorklistRemover DeadNodes(*this);
1842 // Replacing results may cause a different MERGE_VALUES to suddenly
1843 // be CSE'd with N, and carry its uses with it. Iterate until no
1844 // uses remain, to ensure that the node can be safely deleted.
1845 // First add the users of this node to the work list so that they
1846 // can be tried again once they have new operands.
1847 AddUsersToWorklist(N);
1848 do {
1849 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1850 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1851 } while (!N->use_empty());
1852 deleteAndRecombine(N);
1853 return SDValue(N, 0); // Return N so it doesn't get rechecked!
1854}
1855
1856/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1857/// ConstantSDNode pointer else nullptr.
1858static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1859 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1860 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1861}
1862
1863SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1864 auto BinOpcode = BO->getOpcode();
1865 assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||(static_cast <bool> ((BinOpcode == ISD::ADD || BinOpcode
== ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV
|| BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode
== ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR
|| BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode
== ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD
|| BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode
== ISD::FDIV || BinOpcode == ISD::FREM) && "Unexpected binary operator"
) ? void (0) : __assert_fail ("(BinOpcode == ISD::ADD || BinOpcode == ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV || BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode == ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR || BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode == ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD || BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1874, __extension__ __PRETTY_FUNCTION__))
1866 BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||(static_cast <bool> ((BinOpcode == ISD::ADD || BinOpcode
== ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV
|| BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode
== ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR
|| BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode
== ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD
|| BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode
== ISD::FDIV || BinOpcode == ISD::FREM) && "Unexpected binary operator"
) ? void (0) : __assert_fail ("(BinOpcode == ISD::ADD || BinOpcode == ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV || BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode == ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR || BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode == ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD || BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1874, __extension__ __PRETTY_FUNCTION__))
1867 BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||(static_cast <bool> ((BinOpcode == ISD::ADD || BinOpcode
== ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV
|| BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode
== ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR
|| BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode
== ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD
|| BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode
== ISD::FDIV || BinOpcode == ISD::FREM) && "Unexpected binary operator"
) ? void (0) : __assert_fail ("(BinOpcode == ISD::ADD || BinOpcode == ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV || BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode == ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR || BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode == ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD || BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1874, __extension__ __PRETTY_FUNCTION__))
1868 BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||(static_cast <bool> ((BinOpcode == ISD::ADD || BinOpcode
== ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV
|| BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode
== ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR
|| BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode
== ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD
|| BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode
== ISD::FDIV || BinOpcode == ISD::FREM) && "Unexpected binary operator"
) ? void (0) : __assert_fail ("(BinOpcode == ISD::ADD || BinOpcode == ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV || BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode == ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR || BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode == ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD || BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1874, __extension__ __PRETTY_FUNCTION__))
1869 BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||(static_cast <bool> ((BinOpcode == ISD::ADD || BinOpcode
== ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV
|| BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode
== ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR
|| BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode
== ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD
|| BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode
== ISD::FDIV || BinOpcode == ISD::FREM) && "Unexpected binary operator"
) ? void (0) : __assert_fail ("(BinOpcode == ISD::ADD || BinOpcode == ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV || BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode == ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR || BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode == ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD || BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1874, __extension__ __PRETTY_FUNCTION__))
1870 BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||(static_cast <bool> ((BinOpcode == ISD::ADD || BinOpcode
== ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV
|| BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode
== ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR
|| BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode
== ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD
|| BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode
== ISD::FDIV || BinOpcode == ISD::FREM) && "Unexpected binary operator"
) ? void (0) : __assert_fail ("(BinOpcode == ISD::ADD || BinOpcode == ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV || BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode == ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR || BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode == ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD || BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1874, __extension__ __PRETTY_FUNCTION__))
1871 BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||(static_cast <bool> ((BinOpcode == ISD::ADD || BinOpcode
== ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV
|| BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode
== ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR
|| BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode
== ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD
|| BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode
== ISD::FDIV || BinOpcode == ISD::FREM) && "Unexpected binary operator"
) ? void (0) : __assert_fail ("(BinOpcode == ISD::ADD || BinOpcode == ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV || BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode == ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR || BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode == ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD || BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1874, __extension__ __PRETTY_FUNCTION__))
1872 BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||(static_cast <bool> ((BinOpcode == ISD::ADD || BinOpcode
== ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV
|| BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode
== ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR
|| BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode
== ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD
|| BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode
== ISD::FDIV || BinOpcode == ISD::FREM) && "Unexpected binary operator"
) ? void (0) : __assert_fail ("(BinOpcode == ISD::ADD || BinOpcode == ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV || BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode == ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR || BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode == ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD || BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1874, __extension__ __PRETTY_FUNCTION__))
1873 BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&(static_cast <bool> ((BinOpcode == ISD::ADD || BinOpcode
== ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV
|| BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode
== ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR
|| BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode
== ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD
|| BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode
== ISD::FDIV || BinOpcode == ISD::FREM) && "Unexpected binary operator"
) ? void (0) : __assert_fail ("(BinOpcode == ISD::ADD || BinOpcode == ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV || BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode == ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR || BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode == ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD || BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1874, __extension__ __PRETTY_FUNCTION__))
1874 "Unexpected binary operator")(static_cast <bool> ((BinOpcode == ISD::ADD || BinOpcode
== ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV
|| BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode
== ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR
|| BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode
== ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD
|| BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode
== ISD::FDIV || BinOpcode == ISD::FREM) && "Unexpected binary operator"
) ? void (0) : __assert_fail ("(BinOpcode == ISD::ADD || BinOpcode == ISD::SUB || BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV || BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || BinOpcode == ISD::UREM || BinOpcode == ISD::AND || BinOpcode == ISD::OR || BinOpcode == ISD::XOR || BinOpcode == ISD::SHL || BinOpcode == ISD::SRL || BinOpcode == ISD::SRA || BinOpcode == ISD::FADD || BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1874, __extension__ __PRETTY_FUNCTION__))
;
1875
1876 // Bail out if any constants are opaque because we can't constant fold those.
1877 SDValue C1 = BO->getOperand(1);
1878 if (!isConstantOrConstantVector(C1, true) &&
1879 !isConstantFPBuildVectorOrConstantFP(C1))
1880 return SDValue();
1881
1882 // Don't do this unless the old select is going away. We want to eliminate the
1883 // binary operator, not replace a binop with a select.
1884 // TODO: Handle ISD::SELECT_CC.
1885 SDValue Sel = BO->getOperand(0);
1886 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1887 return SDValue();
1888
1889 SDValue CT = Sel.getOperand(1);
1890 if (!isConstantOrConstantVector(CT, true) &&
1891 !isConstantFPBuildVectorOrConstantFP(CT))
1892 return SDValue();
1893
1894 SDValue CF = Sel.getOperand(2);
1895 if (!isConstantOrConstantVector(CF, true) &&
1896 !isConstantFPBuildVectorOrConstantFP(CF))
1897 return SDValue();
1898
1899 // We have a select-of-constants followed by a binary operator with a
1900 // constant. Eliminate the binop by pulling the constant math into the select.
1901 // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
1902 EVT VT = Sel.getValueType();
1903 SDLoc DL(Sel);
1904 SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
1905 if (!NewCT.isUndef() &&
1906 !isConstantOrConstantVector(NewCT, true) &&
1907 !isConstantFPBuildVectorOrConstantFP(NewCT))
1908 return SDValue();
1909
1910 SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
1911 if (!NewCF.isUndef() &&
1912 !isConstantOrConstantVector(NewCF, true) &&
1913 !isConstantFPBuildVectorOrConstantFP(NewCF))
1914 return SDValue();
1915
1916 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1917}
1918
1919SDValue DAGCombiner::visitADD(SDNode *N) {
1920 SDValue N0 = N->getOperand(0);
1921 SDValue N1 = N->getOperand(1);
1922 EVT VT = N0.getValueType();
1923 SDLoc DL(N);
1924
1925 // fold vector ops
1926 if (VT.isVector()) {
1927 if (SDValue FoldedVOp = SimplifyVBinOp(N))
1928 return FoldedVOp;
1929
1930 // fold (add x, 0) -> x, vector edition
1931 if (ISD::isBuildVectorAllZeros(N1.getNode()))
1932 return N0;
1933 if (ISD::isBuildVectorAllZeros(N0.getNode()))
1934 return N1;
1935 }
1936
1937 // fold (add x, undef) -> undef
1938 if (N0.isUndef())
1939 return N0;
1940
1941 if (N1.isUndef())
1942 return N1;
1943
1944 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1945 // canonicalize constant to RHS
1946 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1947 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1948 // fold (add c1, c2) -> c1+c2
1949 return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1950 N1.getNode());
1951 }
1952
1953 // fold (add x, 0) -> x
1954 if (isNullConstant(N1))
1955 return N0;
1956
1957 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
1958 // fold ((c1-A)+c2) -> (c1+c2)-A
1959 if (N0.getOpcode() == ISD::SUB &&
1960 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
1961 // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
1962 return DAG.getNode(ISD::SUB, DL, VT,
1963 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1964 N0.getOperand(1));
1965 }
1966
1967 // add (sext i1 X), 1 -> zext (not i1 X)
1968 // We don't transform this pattern:
1969 // add (zext i1 X), -1 -> sext (not i1 X)
1970 // because most (?) targets generate better code for the zext form.
1971 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
1972 isOneConstantOrOneSplatConstant(N1)) {
1973 SDValue X = N0.getOperand(0);
1974 if ((!LegalOperations ||
1975 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
1976 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
1977 X.getScalarValueSizeInBits() == 1) {
1978 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
1979 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
1980 }
1981 }
1982
1983 // Undo the add -> or combine to merge constant offsets from a frame index.
1984 if (N0.getOpcode() == ISD::OR &&
1985 isa<FrameIndexSDNode>(N0.getOperand(0)) &&
1986 isa<ConstantSDNode>(N0.getOperand(1)) &&
1987 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
1988 SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
1989 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
1990 }
1991 }
1992
1993 if (SDValue NewSel = foldBinOpIntoSelect(N))
1994 return NewSel;
1995
1996 // reassociate add
1997 if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
1998 return RADD;
1999
2000 // fold ((0-A) + B) -> B-A
2001 if (N0.getOpcode() == ISD::SUB &&
2002 isNullConstantOrNullSplatConstant(N0.getOperand(0)))
2003 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2004
2005 // fold (A + (0-B)) -> A-B
2006 if (N1.getOpcode() == ISD::SUB &&
2007 isNullConstantOrNullSplatConstant(N1.getOperand(0)))
2008 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2009
2010 // fold (A+(B-A)) -> B
2011 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2012 return N1.getOperand(0);
2013
2014 // fold ((B-A)+A) -> B
2015 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2016 return N0.getOperand(0);
2017
2018 // fold (A+(B-(A+C))) to (B-C)
2019 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2020 N0 == N1.getOperand(1).getOperand(0))
2021 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2022 N1.getOperand(1).getOperand(1));
2023
2024 // fold (A+(B-(C+A))) to (B-C)
2025 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2026 N0 == N1.getOperand(1).getOperand(1))
2027 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2028 N1.getOperand(1).getOperand(0));
2029
2030 // fold (A+((B-A)+or-C)) to (B+or-C)
2031 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2032 N1.getOperand(0).getOpcode() == ISD::SUB &&
2033 N0 == N1.getOperand(0).getOperand(1))
2034 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2035 N1.getOperand(1));
2036
2037 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2038 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2039 SDValue N00 = N0.getOperand(0);
2040 SDValue N01 = N0.getOperand(1);
2041 SDValue N10 = N1.getOperand(0);
2042 SDValue N11 = N1.getOperand(1);
2043
2044 if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2045 return DAG.getNode(ISD::SUB, DL, VT,
2046 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2047 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2048 }
2049
2050 if (SimplifyDemandedBits(SDValue(N, 0)))
2051 return SDValue(N, 0);
2052
2053 // fold (a+b) -> (a|b) iff a and b share no bits.
2054 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2055 DAG.haveNoCommonBitsSet(N0, N1))
2056 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2057
2058 if (SDValue Combined = visitADDLike(N0, N1, N))
2059 return Combined;
2060
2061 if (SDValue Combined = visitADDLike(N1, N0, N))
2062 return Combined;
2063
2064 return SDValue();
2065}
2066
2067static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2068 bool Masked = false;
2069
2070 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2071 while (true) {
2072 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2073 V = V.getOperand(0);
2074 continue;
2075 }
2076
2077 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2078 Masked = true;
2079 V = V.getOperand(0);
2080 continue;
2081 }
2082
2083 break;
2084 }
2085
2086 // If this is not a carry, return.
2087 if (V.getResNo() != 1)
2088 return SDValue();
2089
2090 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2091 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2092 return SDValue();
2093
2094 // If the result is masked, then no matter what kind of bool it is we can
2095 // return. If it isn't, then we need to make sure the bool type is either 0 or
2096 // 1 and not other values.
2097 if (Masked ||
2098 TLI.getBooleanContents(V.getValueType()) ==
2099 TargetLoweringBase::ZeroOrOneBooleanContent)
2100 return V;
2101
2102 return SDValue();
2103}
2104
2105SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2106 EVT VT = N0.getValueType();
2107 SDLoc DL(LocReference);
2108
2109 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2110 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2111 isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
2112 return DAG.getNode(ISD::SUB, DL, VT, N0,
2113 DAG.getNode(ISD::SHL, DL, VT,
2114 N1.getOperand(0).getOperand(1),
2115 N1.getOperand(1)));
2116
2117 if (N1.getOpcode() == ISD::AND) {
2118 SDValue AndOp0 = N1.getOperand(0);
2119 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2120 unsigned DestBits = VT.getScalarSizeInBits();
2121
2122 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2123 // and similar xforms where the inner op is either ~0 or 0.
2124 if (NumSignBits == DestBits &&
2125 isOneConstantOrOneSplatConstant(N1->getOperand(1)))
2126 return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2127 }
2128
2129 // add (sext i1), X -> sub X, (zext i1)
2130 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2131 N0.getOperand(0).getValueType() == MVT::i1 &&
2132 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
2133 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2134 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2135 }
2136
2137 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2138 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2139 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2140 if (TN->getVT() == MVT::i1) {
2141 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2142 DAG.getConstant(1, DL, VT));
2143 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2144 }
2145 }
2146
2147 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2148 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2149 N1.getResNo() == 0)
2150 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2151 N0, N1.getOperand(0), N1.getOperand(2));
2152
2153 // (add X, Carry) -> (addcarry X, 0, Carry)
2154 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2155 if (SDValue Carry = getAsCarry(TLI, N1))
2156 return DAG.getNode(ISD::ADDCARRY, DL,
2157 DAG.getVTList(VT, Carry.getValueType()), N0,
2158 DAG.getConstant(0, DL, VT), Carry);
2159
2160 return SDValue();
2161}
2162
2163SDValue DAGCombiner::visitADDC(SDNode *N) {
2164 SDValue N0 = N->getOperand(0);
2165 SDValue N1 = N->getOperand(1);
2166 EVT VT = N0.getValueType();
2167 SDLoc DL(N);
2168
2169 // If the flag result is dead, turn this into an ADD.
2170 if (!N->hasAnyUseOfValue(1))
2171 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2172 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2173
2174 // canonicalize constant to RHS.
2175 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2176 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2177 if (N0C && !N1C)
2178 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2179
2180 // fold (addc x, 0) -> x + no carry out
2181 if (isNullConstant(N1))
2182 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2183 DL, MVT::Glue));
2184
2185 // If it cannot overflow, transform into an add.
2186 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2187 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2188 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2189
2190 return SDValue();
2191}
2192
2193SDValue DAGCombiner::visitUADDO(SDNode *N) {
2194 SDValue N0 = N->getOperand(0);
2195 SDValue N1 = N->getOperand(1);
2196 EVT VT = N0.getValueType();
2197 if (VT.isVector())
2198 return SDValue();
2199
2200 EVT CarryVT = N->getValueType(1);
2201 SDLoc DL(N);
2202
2203 // If the flag result is dead, turn this into an ADD.
2204 if (!N->hasAnyUseOfValue(1))
2205 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2206 DAG.getUNDEF(CarryVT));
2207
2208 // canonicalize constant to RHS.
2209 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2210 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2211 if (N0C && !N1C)
2212 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2213
2214 // fold (uaddo x, 0) -> x + no carry out
2215 if (isNullConstant(N1))
2216 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2217
2218 // If it cannot overflow, transform into an add.
2219 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2220 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2221 DAG.getConstant(0, DL, CarryVT));
2222
2223 if (SDValue Combined = visitUADDOLike(N0, N1, N))
2224 return Combined;
2225
2226 if (SDValue Combined = visitUADDOLike(N1, N0, N))
2227 return Combined;
2228
2229 return SDValue();
2230}
2231
2232SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2233 auto VT = N0.getValueType();
2234
2235 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2236 // If Y + 1 cannot overflow.
2237 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2238 SDValue Y = N1.getOperand(0);
2239 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2240 if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2241 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2242 N1.getOperand(2));
2243 }
2244
2245 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2246 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2247 if (SDValue Carry = getAsCarry(TLI, N1))
2248 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2249 DAG.getConstant(0, SDLoc(N), VT), Carry);
2250
2251 return SDValue();
2252}
2253
2254SDValue DAGCombiner::visitADDE(SDNode *N) {
2255 SDValue N0 = N->getOperand(0);
2256 SDValue N1 = N->getOperand(1);
2257 SDValue CarryIn = N->getOperand(2);
2258
2259 // canonicalize constant to RHS
2260 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2261 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2262 if (N0C && !N1C)
2263 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2264 N1, N0, CarryIn);
2265
2266 // fold (adde x, y, false) -> (addc x, y)
2267 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2268 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2269
2270 return SDValue();
2271}
2272
2273SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2274 SDValue N0 = N->getOperand(0);
2275 SDValue N1 = N->getOperand(1);
2276 SDValue CarryIn = N->getOperand(2);
2277 SDLoc DL(N);
2278
2279 // canonicalize constant to RHS
2280 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2281 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2282 if (N0C && !N1C)
2283 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2284
2285 // fold (addcarry x, y, false) -> (uaddo x, y)
2286 if (isNullConstant(CarryIn))
2287 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2288
2289 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2290 if (isNullConstant(N0) && isNullConstant(N1)) {
2291 EVT VT = N0.getValueType();
2292 EVT CarryVT = CarryIn.getValueType();
2293 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2294 AddToWorklist(CarryExt.getNode());
2295 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2296 DAG.getConstant(1, DL, VT)),
2297 DAG.getConstant(0, DL, CarryVT));
2298 }
2299
2300 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2301 return Combined;
2302
2303 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2304 return Combined;
2305
2306 return SDValue();
2307}
2308
2309SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2310 SDNode *N) {
2311 // Iff the flag result is dead:
2312 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2313 if ((N0.getOpcode() == ISD::ADD ||
2314 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2315 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2316 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2317 N0.getOperand(0), N0.getOperand(1), CarryIn);
2318
2319 /**
2320 * When one of the addcarry argument is itself a carry, we may be facing
2321 * a diamond carry propagation. In which case we try to transform the DAG
2322 * to ensure linear carry propagation if that is possible.
2323 *
2324 * We are trying to get:
2325 * (addcarry X, 0, (addcarry A, B, Z):Carry)
2326 */
2327 if (auto Y = getAsCarry(TLI, N1)) {
2328 /**
2329 * (uaddo A, B)
2330 * / \
2331 * Carry Sum
2332 * | \
2333 * | (addcarry *, 0, Z)
2334 * | /
2335 * \ Carry
2336 * | /
2337 * (addcarry X, *, *)
2338 */
2339 if (Y.getOpcode() == ISD::UADDO &&
2340 CarryIn.getResNo() == 1 &&
2341 CarryIn.getOpcode() == ISD::ADDCARRY &&
2342 isNullConstant(CarryIn.getOperand(1)) &&
2343 CarryIn.getOperand(0) == Y.getValue(0)) {
2344 auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2345 Y.getOperand(0), Y.getOperand(1),
2346 CarryIn.getOperand(2));
2347 AddToWorklist(NewY.getNode());
2348 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2349 DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2350 NewY.getValue(1));
2351 }
2352 }
2353
2354 return SDValue();
2355}
2356
2357// Since it may not be valid to emit a fold to zero for vector initializers
2358// check if we can before folding.
2359static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2360 SelectionDAG &DAG, bool LegalOperations,
2361 bool LegalTypes) {
2362 if (!VT.isVector())
2363 return DAG.getConstant(0, DL, VT);
2364 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2365 return DAG.getConstant(0, DL, VT);
2366 return SDValue();
2367}
2368
2369SDValue DAGCombiner::visitSUB(SDNode *N) {
2370 SDValue N0 = N->getOperand(0);
2371 SDValue N1 = N->getOperand(1);
2372 EVT VT = N0.getValueType();
2373 SDLoc DL(N);
2374
2375 // fold vector ops
2376 if (VT.isVector()) {
2377 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2378 return FoldedVOp;
2379
2380 // fold (sub x, 0) -> x, vector edition
2381 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2382 return N0;
2383 }
2384
2385 // fold (sub x, x) -> 0
2386 // FIXME: Refactor this and xor and other similar operations together.
2387 if (N0 == N1)
2388 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2389 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2390 DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2391 // fold (sub c1, c2) -> c1-c2
2392 return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2393 N1.getNode());
2394 }
2395
2396 if (SDValue NewSel = foldBinOpIntoSelect(N))
2397 return NewSel;
2398
2399 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2400
2401 // fold (sub x, c) -> (add x, -c)
2402 if (N1C) {
2403 return DAG.getNode(ISD::ADD, DL, VT, N0,
2404 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2405 }
2406
2407 if (isNullConstantOrNullSplatConstant(N0)) {
2408 unsigned BitWidth = VT.getScalarSizeInBits();
2409 // Right-shifting everything out but the sign bit followed by negation is
2410 // the same as flipping arithmetic/logical shift type without the negation:
2411 // -(X >>u 31) -> (X >>s 31)
2412 // -(X >>s 31) -> (X >>u 31)
2413 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2414 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2415 if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2416 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2417 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2418 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2419 }
2420 }
2421
2422 // 0 - X --> 0 if the sub is NUW.
2423 if (N->getFlags().hasNoUnsignedWrap())
2424 return N0;
2425
2426 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2427 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2428 // N1 must be 0 because negating the minimum signed value is undefined.
2429 if (N->getFlags().hasNoSignedWrap())
2430 return N0;
2431
2432 // 0 - X --> X if X is 0 or the minimum signed value.
2433 return N1;
2434 }
2435 }
2436
2437 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2438 if (isAllOnesConstantOrAllOnesSplatConstant(N0))
2439 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2440
2441 // fold A-(A-B) -> B
2442 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2443 return N1.getOperand(1);
2444
2445 // fold (A+B)-A -> B
2446 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2447 return N0.getOperand(1);
2448
2449 // fold (A+B)-B -> A
2450 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2451 return N0.getOperand(0);
2452
2453 // fold C2-(A+C1) -> (C2-C1)-A
2454 if (N1.getOpcode() == ISD::ADD) {
2455 SDValue N11 = N1.getOperand(1);
2456 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2457 isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2458 SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2459 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2460 }
2461 }
2462
2463 // fold ((A+(B+or-C))-B) -> A+or-C
2464 if (N0.getOpcode() == ISD::ADD &&
2465 (N0.getOperand(1).getOpcode() == ISD::SUB ||
2466 N0.getOperand(1).getOpcode() == ISD::ADD) &&
2467 N0.getOperand(1).getOperand(0) == N1)
2468 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2469 N0.getOperand(1).getOperand(1));
2470
2471 // fold ((A+(C+B))-B) -> A+C
2472 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2473 N0.getOperand(1).getOperand(1) == N1)
2474 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2475 N0.getOperand(1).getOperand(0));
2476
2477 // fold ((A-(B-C))-C) -> A-B
2478 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2479 N0.getOperand(1).getOperand(1) == N1)
2480 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2481 N0.getOperand(1).getOperand(0));
2482
2483 // If either operand of a sub is undef, the result is undef
2484 if (N0.isUndef())
2485 return N0;
2486 if (N1.isUndef())
2487 return N1;
2488
2489 // If the relocation model supports it, consider symbol offsets.
2490 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2491 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2492 // fold (sub Sym, c) -> Sym-c
2493 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2494 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2495 GA->getOffset() -
2496 (uint64_t)N1C->getSExtValue());
2497 // fold (sub Sym+c1, Sym+c2) -> c1-c2
2498 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2499 if (GA->getGlobal() == GB->getGlobal())
2500 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2501 DL, VT);
2502 }
2503
2504 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2505 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2506 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2507 if (TN->getVT() == MVT::i1) {
2508 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2509 DAG.getConstant(1, DL, VT));
2510 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2511 }
2512 }
2513
2514 return SDValue();
2515}
2516
2517SDValue DAGCombiner::visitSUBC(SDNode *N) {
2518 SDValue N0 = N->getOperand(0);
2519 SDValue N1 = N->getOperand(1);
2520 EVT VT = N0.getValueType();
2521 SDLoc DL(N);
2522
2523 // If the flag result is dead, turn this into an SUB.
2524 if (!N->hasAnyUseOfValue(1))
2525 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2526 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2527
2528 // fold (subc x, x) -> 0 + no borrow
2529 if (N0 == N1)
2530 return CombineTo(N, DAG.getConstant(0, DL, VT),
2531 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2532
2533 // fold (subc x, 0) -> x + no borrow
2534 if (isNullConstant(N1))
2535 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2536
2537 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2538 if (isAllOnesConstant(N0))
2539 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2540 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2541
2542 return SDValue();
2543}
2544
2545SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2546 SDValue N0 = N->getOperand(0);
2547 SDValue N1 = N->getOperand(1);
2548 EVT VT = N0.getValueType();
2549 if (VT.isVector())
2550 return SDValue();
2551
2552 EVT CarryVT = N->getValueType(1);
2553 SDLoc DL(N);
2554
2555 // If the flag result is dead, turn this into an SUB.
2556 if (!N->hasAnyUseOfValue(1))
2557 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2558 DAG.getUNDEF(CarryVT));
2559
2560 // fold (usubo x, x) -> 0 + no borrow
2561 if (N0 == N1)
2562 return CombineTo(N, DAG.getConstant(0, DL, VT),
2563 DAG.getConstant(0, DL, CarryVT));
2564
2565 // fold (usubo x, 0) -> x + no borrow
2566 if (isNullConstant(N1))
2567 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2568
2569 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2570 if (isAllOnesConstant(N0))
2571 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2572 DAG.getConstant(0, DL, CarryVT));
2573
2574 return SDValue();
2575}
2576
2577SDValue DAGCombiner::visitSUBE(SDNode *N) {
2578 SDValue N0 = N->getOperand(0);
2579 SDValue N1 = N->getOperand(1);
2580 SDValue CarryIn = N->getOperand(2);
2581
2582 // fold (sube x, y, false) -> (subc x, y)
2583 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2584 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2585
2586 return SDValue();
2587}
2588
2589SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2590 SDValue N0 = N->getOperand(0);
2591 SDValue N1 = N->getOperand(1);
2592 SDValue CarryIn = N->getOperand(2);
2593
2594 // fold (subcarry x, y, false) -> (usubo x, y)
2595 if (isNullConstant(CarryIn))
2596 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2597
2598 return SDValue();
2599}
2600
2601SDValue DAGCombiner::visitMUL(SDNode *N) {
2602 SDValue N0 = N->getOperand(0);
2603 SDValue N1 = N->getOperand(1);
2604 EVT VT = N0.getValueType();
2605
2606 // fold (mul x, undef) -> 0
2607 if (N0.isUndef() || N1.isUndef())
2608 return DAG.getConstant(0, SDLoc(N), VT);
2609
2610 bool N0IsConst = false;
2611 bool N1IsConst = false;
2612 bool N1IsOpaqueConst = false;
2613 bool N0IsOpaqueConst = false;
2614 APInt ConstValue0, ConstValue1;
2615 // fold vector ops
2616 if (VT.isVector()) {
2617 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2618 return FoldedVOp;
2619
2620 N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2621 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2622 assert((!N0IsConst ||(static_cast <bool> ((!N0IsConst || ConstValue0.getBitWidth
() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"
) ? void (0) : __assert_fail ("(!N0IsConst || ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2624, __extension__ __PRETTY_FUNCTION__))
2623 ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&(static_cast <bool> ((!N0IsConst || ConstValue0.getBitWidth
() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"
) ? void (0) : __assert_fail ("(!N0IsConst || ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2624, __extension__ __PRETTY_FUNCTION__))
2624 "Splat APInt should be element width")(static_cast <bool> ((!N0IsConst || ConstValue0.getBitWidth
() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"
) ? void (0) : __assert_fail ("(!N0IsConst || ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2624, __extension__ __PRETTY_FUNCTION__))
;
2625 assert((!N1IsConst ||(static_cast <bool> ((!N1IsConst || ConstValue1.getBitWidth
() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"
) ? void (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2627, __extension__ __PRETTY_FUNCTION__))
2626 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&(static_cast <bool> ((!N1IsConst || ConstValue1.getBitWidth
() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"
) ? void (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2627, __extension__ __PRETTY_FUNCTION__))
2627 "Splat APInt should be element width")(static_cast <bool> ((!N1IsConst || ConstValue1.getBitWidth
() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"
) ? void (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2627, __extension__ __PRETTY_FUNCTION__))
;
2628 } else {
2629 N0IsConst = isa<ConstantSDNode>(N0);
2630 if (N0IsConst) {
2631 ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2632 N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2633 }
2634 N1IsConst = isa<ConstantSDNode>(N1);
2635 if (N1IsConst) {
2636 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2637 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2638 }
2639 }
2640
2641 // fold (mul c1, c2) -> c1*c2
2642 if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2643 return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2644 N0.getNode(), N1.getNode());
2645
2646 // canonicalize constant to RHS (vector doesn't have to splat)
2647 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2648 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2649 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2650 // fold (mul x, 0) -> 0
2651 if (N1IsConst && ConstValue1.isNullValue())
2652 return N1;
2653 // fold (mul x, 1) -> x
2654 if (N1IsConst && ConstValue1.isOneValue())
2655 return N0;
2656
2657 if (SDValue NewSel = foldBinOpIntoSelect(N))
2658 return NewSel;
2659
2660 // fold (mul x, -1) -> 0-x
2661 if (N1IsConst && ConstValue1.isAllOnesValue()) {
2662 SDLoc DL(N);
2663 return DAG.getNode(ISD::SUB, DL, VT,
2664 DAG.getConstant(0, DL, VT), N0);
2665 }
2666 // fold (mul x, (1 << c)) -> x << c
2667 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2668 DAG.isKnownToBeAPowerOfTwo(N1) &&
2669 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2670 SDLoc DL(N);
2671 SDValue LogBase2 = BuildLogBase2(N1, DL);
2672 AddToWorklist(LogBase2.getNode());
2673
2674 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2675 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2676 AddToWorklist(Trunc.getNode());
2677 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2678 }
2679 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2680 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2681 unsigned Log2Val = (-ConstValue1).logBase2();
2682 SDLoc DL(N);
2683 // FIXME: If the input is something that is easily negated (e.g. a
2684 // single-use add), we should put the negate there.
2685 return DAG.getNode(ISD::SUB, DL, VT,
2686 DAG.getConstant(0, DL, VT),
2687 DAG.getNode(ISD::SHL, DL, VT, N0,
2688 DAG.getConstant(Log2Val, DL,
2689 getShiftAmountTy(N0.getValueType()))));
2690 }
2691
2692 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2693 if (N0.getOpcode() == ISD::SHL &&
2694 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2695 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2696 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2697 if (isConstantOrConstantVector(C3))
2698 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2699 }
2700
2701 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2702 // use.
2703 {
2704 SDValue Sh(nullptr, 0), Y(nullptr, 0);
2705
2706 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
2707 if (N0.getOpcode() == ISD::SHL &&
2708 isConstantOrConstantVector(N0.getOperand(1)) &&
2709 N0.getNode()->hasOneUse()) {
2710 Sh = N0; Y = N1;
2711 } else if (N1.getOpcode() == ISD::SHL &&
2712 isConstantOrConstantVector(N1.getOperand(1)) &&
2713 N1.getNode()->hasOneUse()) {
2714 Sh = N1; Y = N0;
2715 }
2716
2717 if (Sh.getNode()) {
2718 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2719 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2720 }
2721 }
2722
2723 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2724 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2725 N0.getOpcode() == ISD::ADD &&
2726 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2727 isMulAddWithConstProfitable(N, N0, N1))
2728 return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2729 DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2730 N0.getOperand(0), N1),
2731 DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2732 N0.getOperand(1), N1));
2733
2734 // reassociate mul
2735 if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2736 return RMUL;
2737
2738 return SDValue();
2739}
2740
2741/// Return true if divmod libcall is available.
2742static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2743 const TargetLowering &TLI) {
2744 RTLIB::Libcall LC;
2745 EVT NodeType = Node->getValueType(0);
2746 if (!NodeType.isSimple())
2747 return false;
2748 switch (NodeType.getSimpleVT().SimpleTy) {
2749 default: return false; // No libcall for vector types.
2750 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
2751 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2752 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2753 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2754 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2755 }
2756
2757 return TLI.getLibcallName(LC) != nullptr;
2758}
2759
2760/// Issue divrem if both quotient and remainder are needed.
2761SDValue DAGCombiner::useDivRem(SDNode *Node) {
2762 if (Node->use_empty())
2763 return SDValue(); // This is a dead node, leave it alone.
2764
2765 unsigned Opcode = Node->getOpcode();
2766 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2767 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2768
2769 // DivMod lib calls can still work on non-legal types if using lib-calls.
2770 EVT VT = Node->getValueType(0);
2771 if (VT.isVector() || !VT.isInteger())
2772 return SDValue();
2773
2774 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2775 return SDValue();
2776
2777 // If DIVREM is going to get expanded into a libcall,
2778 // but there is no libcall available, then don't combine.
2779 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2780 !isDivRemLibcallAvailable(Node, isSigned, TLI))
2781 return SDValue();
2782
2783 // If div is legal, it's better to do the normal expansion
2784 unsigned OtherOpcode = 0;
2785 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2786 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2787 if (TLI.isOperationLegalOrCustom(Opcode, VT))
2788 return SDValue();
2789 } else {
2790 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2791 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2792 return SDValue();
2793 }
2794
2795 SDValue Op0 = Node->getOperand(0);
2796 SDValue Op1 = Node->getOperand(1);
2797 SDValue combined;
2798 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2799 UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
2800 SDNode *User = *UI;
2801 if (User == Node || User->use_empty())
2802 continue;
2803 // Convert the other matching node(s), too;
2804 // otherwise, the DIVREM may get target-legalized into something
2805 // target-specific that we won't be able to recognize.
2806 unsigned UserOpc = User->getOpcode();
2807 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2808 User->getOperand(0) == Op0 &&
2809 User->getOperand(1) == Op1) {
2810 if (!combined) {
2811 if (UserOpc == OtherOpcode) {
2812 SDVTList VTs = DAG.getVTList(VT, VT);
2813 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2814 } else if (UserOpc == DivRemOpc) {
2815 combined = SDValue(User, 0);
2816 } else {
2817 assert(UserOpc == Opcode)(static_cast <bool> (UserOpc == Opcode) ? void (0) : __assert_fail
("UserOpc == Opcode", "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2817, __extension__ __PRETTY_FUNCTION__))
;
2818 continue;
2819 }
2820 }
2821 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2822 CombineTo(User, combined);
2823 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2824 CombineTo(User, combined.getValue(1));
2825 }
2826 }
2827 return combined;
2828}
2829
2830static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
2831 SDValue N0 = N->getOperand(0);
2832 SDValue N1 = N->getOperand(1);
2833 EVT VT = N->getValueType(0);
2834 SDLoc DL(N);
2835
2836 if (DAG.isUndef(N->getOpcode(), {N0, N1}))
2837 return DAG.getUNDEF(VT);
2838
2839 // undef / X -> 0
2840 // undef % X -> 0
2841 if (N0.isUndef())
2842 return DAG.getConstant(0, DL, VT);
2843
2844 return SDValue();
2845}
2846
2847SDValue DAGCombiner::visitSDIV(SDNode *N) {
2848 SDValue N0 = N->getOperand(0);
2849 SDValue N1 = N->getOperand(1);
2850 EVT VT = N->getValueType(0);
2851
2852 // fold vector ops
2853 if (VT.isVector())
2854 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2855 return FoldedVOp;
2856
2857 SDLoc DL(N);
2858
2859 // fold (sdiv c1, c2) -> c1/c2
2860 ConstantSDNode *N0C = isConstOrConstSplat(N0);
2861 ConstantSDNode *N1C = isConstOrConstSplat(N1);
2862 if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2863 return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2864 // fold (sdiv X, 1) -> X
2865 if (N1C && N1C->isOne())
2866 return N0;
2867 // fold (sdiv X, -1) -> 0-X
2868 if (N1C && N1C->isAllOnesValue())
2869 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
2870
2871 if (SDValue V = simplifyDivRem(N, DAG))
2872 return V;
2873
2874 if (SDValue NewSel = foldBinOpIntoSelect(N))
2875 return NewSel;
2876
2877 // If we know the sign bits of both operands are zero, strength reduce to a
2878 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
2879 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2880 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2881
2882 // fold (sdiv X, pow2) -> simple ops after legalize
2883 // FIXME: We check for the exact bit here because the generic lowering gives
2884 // better results in that case. The target-specific lowering should learn how
2885 // to handle exact sdivs efficiently.
2886 if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2887 !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() ||
2888 (-N1C->getAPIntValue()).isPowerOf2())) {
2889 // Target-specific implementation of sdiv x, pow2.
2890 if (SDValue Res = BuildSDIVPow2(N))
2891 return Res;
2892
2893 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2894
2895 // Splat the sign bit into the register
2896 SDValue SGN =
2897 DAG.getNode(ISD::SRA, DL, VT, N0,
2898 DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2899 getShiftAmountTy(N0.getValueType())));
2900 AddToWorklist(SGN.getNode());
2901
2902 // Add (N0 < 0) ? abs2 - 1 : 0;
2903 SDValue SRL =
2904 DAG.getNode(ISD::SRL, DL, VT, SGN,
2905 DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2906 getShiftAmountTy(SGN.getValueType())));
2907 SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2908 AddToWorklist(SRL.getNode());
2909 AddToWorklist(ADD.getNode()); // Divide by pow2
2910 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2911 DAG.getConstant(lg2, DL,
2912 getShiftAmountTy(ADD.getValueType())));
2913
2914 // If we're dividing by a positive value, we're done. Otherwise, we must
2915 // negate the result.
2916 if (N1C->getAPIntValue().isNonNegative())
2917 return SRA;
2918
2919 AddToWorklist(SRA.getNode());
2920 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2921 }
2922
2923 // If integer divide is expensive and we satisfy the requirements, emit an
2924 // alternate sequence. Targets may check function attributes for size/speed
2925 // trade-offs.
2926 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
2927 if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2928 if (SDValue Op = BuildSDIV(N))
2929 return Op;
2930
2931 // sdiv, srem -> sdivrem
2932 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2933 // true. Otherwise, we break the simplification logic in visitREM().
2934 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2935 if (SDValue DivRem = useDivRem(N))
2936 return DivRem;
2937
2938 return SDValue();
2939}
2940
2941SDValue DAGCombiner::visitUDIV(SDNode *N) {
2942 SDValue N0 = N->getOperand(0);
2943 SDValue N1 = N->getOperand(1);
2944 EVT VT = N->getValueType(0);
2945
2946 // fold vector ops
2947 if (VT.isVector())
2948 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2949 return FoldedVOp;
2950
2951 SDLoc DL(N);
2952
2953 // fold (udiv c1, c2) -> c1/c2
2954 ConstantSDNode *N0C = isConstOrConstSplat(N0);
2955 ConstantSDNode *N1C = isConstOrConstSplat(N1);
2956 if (N0C && N1C)
2957 if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2958 N0C, N1C))
2959 return Folded;
2960
2961 if (SDValue V = simplifyDivRem(N, DAG))
2962 return V;
2963
2964 if (SDValue NewSel = foldBinOpIntoSelect(N))
2965 return NewSel;
2966
2967 // fold (udiv x, (1 << c)) -> x >>u c
2968 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2969 DAG.isKnownToBeAPowerOfTwo(N1)) {
2970 SDValue LogBase2 = BuildLogBase2(N1, DL);
2971 AddToWorklist(LogBase2.getNode());
2972
2973 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2974 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2975 AddToWorklist(Trunc.getNode());
2976 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
2977 }
2978
2979 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2980 if (N1.getOpcode() == ISD::SHL) {
2981 SDValue N10 = N1.getOperand(0);
2982 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
2983 DAG.isKnownToBeAPowerOfTwo(N10)) {
2984 SDValue LogBase2 = BuildLogBase2(N10, DL);
2985 AddToWorklist(LogBase2.getNode());
2986
2987 EVT ADDVT = N1.getOperand(1).getValueType();
2988 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
2989 AddToWorklist(Trunc.getNode());
2990 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
2991 AddToWorklist(Add.getNode());
2992 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2993 }
2994 }
2995
2996 // fold (udiv x, c) -> alternate
2997 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
2998 if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2999 if (SDValue Op = BuildUDIV(N))
3000 return Op;
3001
3002 // sdiv, srem -> sdivrem
3003 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3004 // true. Otherwise, we break the simplification logic in visitREM().
3005 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3006 if (SDValue DivRem = useDivRem(N))
3007 return DivRem;
3008
3009 return SDValue();
3010}
3011
3012// handles ISD::SREM and ISD::UREM
3013SDValue DAGCombiner::visitREM(SDNode *N) {
3014 unsigned Opcode = N->getOpcode();
3015 SDValue N0 = N->getOperand(0);
3016 SDValue N1 = N->getOperand(1);
3017 EVT VT = N->getValueType(0);
3018 bool isSigned = (Opcode == ISD::SREM);
3019 SDLoc DL(N);
3020
3021 // fold (rem c1, c2) -> c1%c2
3022 ConstantSDNode *N0C = isConstOrConstSplat(N0);
3023 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3024 if (N0C && N1C)
3025 if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3026 return Folded;
3027
3028 if (SDValue V = simplifyDivRem(N, DAG))
3029 return V;
3030
3031 if (SDValue NewSel = foldBinOpIntoSelect(N))
3032 return NewSel;
3033
3034 if (isSigned) {
3035 // If we know the sign bits of both operands are zero, strength reduce to a
3036 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3037 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3038 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3039 } else {
3040 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3041 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3042 // fold (urem x, pow2) -> (and x, pow2-1)
3043 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3044 AddToWorklist(Add.getNode());
3045 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3046 }
3047 if (N1.getOpcode() == ISD::SHL &&
3048 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3049 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3050 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3051 AddToWorklist(Add.getNode());
3052 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3053 }
3054 }
3055
3056 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3057
3058 // If X/C can be simplified by the division-by-constant logic, lower
3059 // X%C to the equivalent of X-X/C*C.
3060 // To avoid mangling nodes, this simplification requires that the combine()
3061 // call for the speculative DIV must not cause a DIVREM conversion. We guard
3062 // against this by skipping the simplification if isIntDivCheap(). When
3063 // div is not cheap, combine will not return a DIVREM. Regardless,
3064 // checking cheapness here makes sense since the simplification results in
3065 // fatter code.
3066 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
3067 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3068 SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
3069 AddToWorklist(Div.getNode());
3070 SDValue OptimizedDiv = combine(Div.getNode());
3071 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
3072 assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&(static_cast <bool> ((OptimizedDiv.getOpcode() != ISD::
UDIVREM) && (OptimizedDiv.getOpcode() != ISD::SDIVREM
)) ? void (0) : __assert_fail ("(OptimizedDiv.getOpcode() != ISD::UDIVREM) && (OptimizedDiv.getOpcode() != ISD::SDIVREM)"
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3073, __extension__ __PRETTY_FUNCTION__))
3073 (OptimizedDiv.getOpcode() != ISD::SDIVREM))(static_cast <bool> ((OptimizedDiv.getOpcode() != ISD::
UDIVREM) && (OptimizedDiv.getOpcode() != ISD::SDIVREM
)) ? void (0) : __assert_fail ("(OptimizedDiv.getOpcode() != ISD::UDIVREM) && (OptimizedDiv.getOpcode() != ISD::SDIVREM)"
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3073, __extension__ __PRETTY_FUNCTION__))
;
3074 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3075 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3076 AddToWorklist(Mul.getNode());
3077 return Sub;
3078 }
3079 }
3080
3081 // sdiv, srem -> sdivrem
3082 if (SDValue DivRem = useDivRem(N))
3083 return DivRem.getValue(1);
3084
3085 return SDValue();
3086}
3087
3088SDValue DAGCombiner::visitMULHS(SDNode *N) {
3089 SDValue N0 = N->getOperand(0);
3090 SDValue N1 = N->getOperand(1);
3091 EVT VT = N->getValueType(0);
3092 SDLoc DL(N);
3093
3094 if (VT.isVector()) {
3095 // fold (mulhs x, 0) -> 0
3096 if (ISD::isBuildVectorAllZeros(N1.getNode()))
3097 return N1;
3098 if (ISD::isBuildVectorAllZeros(N0.getNode()))
3099 return N0;
3100 }
3101
3102 // fold (mulhs x, 0) -> 0
3103 if (isNullConstant(N1))
3104 return N1;
3105 // fold (mulhs x, 1) -> (sra x, size(x)-1)
3106 if (isOneConstant(N1))
3107 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3108 DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3109 getShiftAmountTy(N0.getValueType())));
3110
3111 // fold (mulhs x, undef) -> 0
3112 if (N0.isUndef() || N1.isUndef())
3113 return DAG.getConstant(0, DL, VT);
3114
3115 // If the type twice as wide is legal, transform the mulhs to a wider multiply
3116 // plus a shift.
3117 if (VT.isSimple() && !VT.isVector()) {
3118 MVT Simple = VT.getSimpleVT();
3119 unsigned SimpleSize = Simple.getSizeInBits();
3120 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3121 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3122 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3123 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3124 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3125 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3126 DAG.getConstant(SimpleSize, DL,
3127 getShiftAmountTy(N1.getValueType())));
3128 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3129 }
3130 }
3131
3132 return SDValue();
3133}
3134
3135SDValue DAGCombiner::visitMULHU(SDNode *N) {
3136 SDValue N0 = N->getOperand(0);
3137 SDValue N1 = N->getOperand(1);
3138 EVT VT = N->getValueType(0);
3139 SDLoc DL(N);
3140
3141 if (VT.isVector()) {
3142 // fold (mulhu x, 0) -> 0
3143 if (ISD::isBuildVectorAllZeros(N1.getNode()))
3144 return N1;
3145 if (ISD::isBuildVectorAllZeros(N0.getNode()))
3146 return N0;
3147 }
3148
3149 // fold (mulhu x, 0) -> 0
3150 if (isNullConstant(N1))
3151 return N1;
3152 // fold (mulhu x, 1) -> 0
3153 if (isOneConstant(N1))
3154 return DAG.getConstant(0, DL, N0.getValueType());
3155 // fold (mulhu x, undef) -> 0
3156 if (N0.isUndef() || N1.isUndef())
3157 return DAG.getConstant(0, DL, VT);
3158
3159 // If the type twice as wide is legal, transform the mulhu to a wider multiply
3160 // plus a shift.
3161 if (VT.isSimple() && !VT.isVector()) {
3162 MVT Simple = VT.getSimpleVT();
3163 unsigned SimpleSize = Simple.getSizeInBits();
3164 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3165 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3166 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3167 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3168 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3169 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3170 DAG.getConstant(SimpleSize, DL,
3171 getShiftAmountTy(N1.getValueType())));
3172 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3173 }
3174 }
3175
3176 return SDValue();
3177}
3178
3179/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3180/// give the opcodes for the two computations that are being performed. Return
3181/// true if a simplification was made.
3182SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3183 unsigned HiOp) {
3184 // If the high half is not needed, just compute the low half.
3185 bool HiExists = N->hasAnyUseOfValue(1);
3186 if (!HiExists &&
3187 (!LegalOperations ||
3188 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3189 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3190 return CombineTo(N, Res, Res);
3191 }
3192
3193 // If the low half is not needed, just compute the high half.
3194 bool LoExists = N->hasAnyUseOfValue(0);
3195 if (!LoExists &&
3196 (!LegalOperations ||
3197 TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3198 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3199 return CombineTo(N, Res, Res);
3200 }
3201
3202 // If both halves are used, return as it is.
3203 if (LoExists && HiExists)
3204 return SDValue();
3205
3206 // If the two computed results can be simplified separately, separate them.
3207 if (LoExists) {
3208 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3209 AddToWorklist(Lo.getNode());
3210 SDValue LoOpt = combine(Lo.getNode());
3211 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3212 (!LegalOperations ||
3213 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3214 return CombineTo(N, LoOpt, LoOpt);
3215 }
3216
3217 if (HiExists) {
3218 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3219 AddToWorklist(Hi.getNode());
3220 SDValue HiOpt = combine(Hi.getNode());
3221 if (HiOpt.getNode() && HiOpt != Hi &&
3222 (!LegalOperations ||
3223 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3224 return CombineTo(N, HiOpt, HiOpt);
3225 }
3226
3227 return SDValue();
3228}
3229
3230SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3231 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3232 return Res;
3233
3234 EVT VT = N->getValueType(0);
3235 SDLoc DL(N);
3236
3237 // If the type is twice as wide is legal, transform the mulhu to a wider
3238 // multiply plus a shift.
3239 if (VT.isSimple() && !VT.isVector()) {
3240 MVT Simple = VT.getSimpleVT();
3241 unsigned SimpleSize = Simple.getSizeInBits();
3242 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3243 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3244 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3245 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3246 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3247 // Compute the high part as N1.
3248 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3249 DAG.getConstant(SimpleSize, DL,
3250 getShiftAmountTy(Lo.getValueType())));
3251 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3252 // Compute the low part as N0.
3253 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3254 return CombineTo(N, Lo, Hi);
3255 }
3256 }
3257
3258 return SDValue();
3259}
3260
3261SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3262 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3263 return Res;
3264
3265 EVT VT = N->getValueType(0);
3266 SDLoc DL(N);
3267
3268 // If the type is twice as wide is legal, transform the mulhu to a wider
3269 // multiply plus a shift.
3270 if (VT.isSimple() && !VT.isVector()) {
3271 MVT Simple = VT.getSimpleVT();
3272 unsigned SimpleSize = Simple.getSizeInBits();
3273 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3274 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3275 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3276 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3277 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3278 // Compute the high part as N1.
3279 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3280 DAG.getConstant(SimpleSize, DL,
3281 getShiftAmountTy(Lo.getValueType())));
3282 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3283 // Compute the low part as N0.
3284 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3285 return CombineTo(N, Lo, Hi);
3286 }
3287 }
3288
3289 return SDValue();
3290}
3291
3292SDValue DAGCombiner::visitSMULO(SDNode *N) {
3293 // (smulo x, 2) -> (saddo x, x)
3294 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3295 if (C2->getAPIntValue() == 2)
3296 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3297 N->getOperand(0), N->getOperand(0));
3298
3299 return SDValue();
3300}
3301
3302SDValue DAGCombiner::visitUMULO(SDNode *N) {
3303 // (umulo x, 2) -> (uaddo x, x)
3304 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3305 if (C2->getAPIntValue() == 2)
3306 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3307 N->getOperand(0), N->getOperand(0));
3308
3309 return SDValue();
3310}
3311
3312SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3313 SDValue N0 = N->getOperand(0);
3314 SDValue N1 = N->getOperand(1);
3315 EVT VT = N0.getValueType();
3316
3317 // fold vector ops
3318 if (VT.isVector())
3319 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3320 return FoldedVOp;
3321
3322 // fold operation with constant operands.
3323 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3324 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3325 if (N0C && N1C)
3326 return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3327
3328 // canonicalize constant to RHS
3329 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3330 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3331 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3332
3333 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3334 // Only do this if the current op isn't legal and the flipped is.
3335 unsigned Opcode = N->getOpcode();
3336 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3337 if (!TLI.isOperationLegal(Opcode, VT) &&
3338 (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3339 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3340 unsigned AltOpcode;
3341 switch (Opcode) {
3342 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3343 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3344 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3345 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3346 default: llvm_unreachable("Unknown MINMAX opcode")::llvm::llvm_unreachable_internal("Unknown MINMAX opcode", "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3346)
;
3347 }
3348 if (TLI.isOperationLegal(AltOpcode, VT))
3349 return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3350 }
3351
3352 return SDValue();
3353}
3354
3355/// If this is a binary operator with two operands of the same opcode, try to
3356/// simplify it.
3357SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3358 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3359 EVT VT = N0.getValueType();
3360 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!")(static_cast <bool> (N0.getOpcode() == N1.getOpcode() &&
"Bad input!") ? void (0) : __assert_fail ("N0.getOpcode() == N1.getOpcode() && \"Bad input!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3360, __extension__ __PRETTY_FUNCTION__))
;
3361
3362 // Bail early if none of these transforms apply.
3363 if (N0.getNumOperands() == 0) return SDValue();
3364
3365 // For each of OP in AND/OR/XOR:
3366 // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3367 // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3368 // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3369 // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3370 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3371 //
3372 // do not sink logical op inside of a vector extend, since it may combine
3373 // into a vsetcc.
3374 EVT Op0VT = N0.getOperand(0).getValueType();
3375 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3376 N0.getOpcode() == ISD::SIGN_EXTEND ||
3377 N0.getOpcode() == ISD::BSWAP ||
3378 // Avoid infinite looping with PromoteIntBinOp.
3379 (N0.getOpcode() == ISD::ANY_EXTEND &&
3380 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3381 (N0.getOpcode() == ISD::TRUNCATE &&
3382 (!TLI.isZExtFree(VT, Op0VT) ||
3383 !TLI.isTruncateFree(Op0VT, VT)) &&
3384 TLI.isTypeLegal(Op0VT))) &&
3385 !VT.isVector() &&
3386 Op0VT == N1.getOperand(0).getValueType() &&
3387 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3388 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3389 N0.getOperand(0).getValueType(),
3390 N0.getOperand(0), N1.getOperand(0));
3391 AddToWorklist(ORNode.getNode());
3392 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3393 }
3394
3395 // For each of OP in SHL/SRL/SRA/AND...
3396 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3397 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
3398 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3399 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3400 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3401 N0.getOperand(1) == N1.getOperand(1)) {
3402 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3403 N0.getOperand(0).getValueType(),
3404 N0.getOperand(0), N1.getOperand(0));
3405 AddToWorklist(ORNode.getNode());
3406 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3407 ORNode, N0.getOperand(1));
3408 }
3409
3410 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3411 // Only perform this optimization up until type legalization, before
3412 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3413 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3414 // we don't want to undo this promotion.
3415 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3416 // on scalars.
3417 if ((N0.getOpcode() == ISD::BITCAST ||
3418 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3419 Level <= AfterLegalizeTypes) {
3420 SDValue In0 = N0.getOperand(0);
3421 SDValue In1 = N1.getOperand(0);
3422 EVT In0Ty = In0.getValueType();
3423 EVT In1Ty = In1.getValueType();
3424 SDLoc DL(N);
3425 // If both incoming values are integers, and the original types are the
3426 // same.
3427 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3428 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3429 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3430 AddToWorklist(Op.getNode());
3431 return BC;
3432 }
3433 }
3434
3435 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3436 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3437 // If both shuffles use the same mask, and both shuffle within a single
3438 // vector, then it is worthwhile to move the swizzle after the operation.
3439 // The type-legalizer generates this pattern when loading illegal
3440 // vector types from memory. In many cases this allows additional shuffle
3441 // optimizations.
3442 // There are other cases where moving the shuffle after the xor/and/or
3443 // is profitable even if shuffles don't perform a swizzle.
3444 // If both shuffles use the same mask, and both shuffles have the same first
3445 // or second operand, then it might still be profitable to move the shuffle
3446 // after the xor/and/or operation.
3447 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3448 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3449 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3450
3451 assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&(static_cast <bool> (N0.getOperand(0).getValueType() ==
N1.getOperand(0).getValueType() && "Inputs to shuffles are not the same type"
) ? void (0) : __assert_fail ("N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && \"Inputs to shuffles are not the same type\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3452, __extension__ __PRETTY_FUNCTION__))
3452 "Inputs to shuffles are not the same type")(static_cast <bool> (N0.getOperand(0).getValueType() ==
N1.getOperand(0).getValueType() && "Inputs to shuffles are not the same type"
) ? void (0) : __assert_fail ("N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && \"Inputs to shuffles are not the same type\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3452, __extension__ __PRETTY_FUNCTION__))
;
3453
3454 // Check that both shuffles use the same mask. The masks are known to be of
3455 // the same length because the result vector type is the same.
3456 // Check also that shuffles have only one use to avoid introducing extra
3457 // instructions.
3458 if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3459 SVN0->getMask().equals(SVN1->getMask())) {
3460 SDValue ShOp = N0->getOperand(1);
3461
3462 // Don't try to fold this node if it requires introducing a
3463 // build vector of all zeros that might be illegal at this stage.
3464 if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3465 if (!LegalTypes)
3466 ShOp = DAG.getConstant(0, SDLoc(N), VT);
3467 else
3468 ShOp = SDValue();
3469 }
3470
3471 // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
3472 // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C)
3473 // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
3474 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3475 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3476 N0->getOperand(0), N1->getOperand(0));
3477 AddToWorklist(NewNode.getNode());
3478 return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3479 SVN0->getMask());
3480 }
3481
3482 // Don't try to fold this node if it requires introducing a
3483 // build vector of all zeros that might be illegal at this stage.
3484 ShOp = N0->getOperand(0);
3485 if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3486 if (!LegalTypes)
3487 ShOp = DAG.getConstant(0, SDLoc(N), VT);
3488 else
3489 ShOp = SDValue();
3490 }
3491
3492 // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
3493 // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B))
3494 // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
3495 if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3496 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3497 N0->getOperand(1), N1->getOperand(1));
3498 AddToWorklist(NewNode.getNode());
3499 return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3500 SVN0->getMask());
3501 }
3502 }
3503 }
3504
3505 return SDValue();
3506}
3507
3508/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3509SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3510 const SDLoc &DL) {
3511 SDValue LL, LR, RL, RR, N0CC, N1CC;
3512 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3513 !isSetCCEquivalent(N1, RL, RR, N1CC))
3514 return SDValue();
3515
3516 assert(N0.getValueType() == N1.getValueType() &&(static_cast <bool> (N0.getValueType() == N1.getValueType
() && "Unexpected operand types for bitwise logic op"
) ? void (0) : __assert_fail ("N0.getValueType() == N1.getValueType() && \"Unexpected operand types for bitwise logic op\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3517, __extension__ __PRETTY_FUNCTION__))
3517 "Unexpected operand types for bitwise logic op")(static_cast <bool> (N0.getValueType() == N1.getValueType
() && "Unexpected operand types for bitwise logic op"
) ? void (0) : __assert_fail ("N0.getValueType() == N1.getValueType() && \"Unexpected operand types for bitwise logic op\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3517, __extension__ __PRETTY_FUNCTION__))
;
3518 assert(LL.getValueType() == LR.getValueType() &&(static_cast <bool> (LL.getValueType() == LR.getValueType
() && RL.getValueType() == RR.getValueType() &&
"Unexpected operand types for setcc") ? void (0) : __assert_fail
("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3520, __extension__ __PRETTY_FUNCTION__))
3519 RL.getValueType() == RR.getValueType() &&(static_cast <bool> (LL.getValueType() == LR.getValueType
() && RL.getValueType() == RR.getValueType() &&
"Unexpected operand types for setcc") ? void (0) : __assert_fail
("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3520, __extension__ __PRETTY_FUNCTION__))
3520 "Unexpected operand types for setcc")(static_cast <bool> (LL.getValueType() == LR.getValueType
() && RL.getValueType() == RR.getValueType() &&
"Unexpected operand types for setcc") ? void (0) : __assert_fail
("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3520, __extension__ __PRETTY_FUNCTION__))
;
3521
3522 // If we're here post-legalization or the logic op type is not i1, the logic
3523 // op type must match a setcc result type. Also, all folds require new
3524 // operations on the left and right operands, so those types must match.
3525 EVT VT = N0.getValueType();
3526 EVT OpVT = LL.getValueType();
3527 if (LegalOperations || VT.getScalarType() != MVT::i1)
3528 if (VT != getSetCCResultType(OpVT))
3529 return SDValue();
3530 if (OpVT != RL.getValueType())
3531 return SDValue();
3532
3533 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3534 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3535 bool IsInteger = OpVT.isInteger();
3536 if (LR == RR && CC0 == CC1 && IsInteger) {
3537 bool IsZero = isNullConstantOrNullSplatConstant(LR);
3538 bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3539
3540 // All bits clear?
3541 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3542 // All sign bits clear?
3543 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3544 // Any bits set?
3545 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3546 // Any sign bits set?
3547 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3548
3549 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
3550 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3551 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
3552 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
3553 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3554 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3555 AddToWorklist(Or.getNode());
3556 return DAG.getSetCC(DL, VT, Or, LR, CC1);
3557 }
3558
3559 // All bits set?
3560 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3561 // All sign bits set?
3562 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3563 // Any bits clear?
3564 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3565 // Any sign bits clear?
3566 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3567
3568 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3569 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
3570 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3571 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
3572 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3573 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3574 AddToWorklist(And.getNode());
3575 return DAG.getSetCC(DL, VT, And, LR, CC1);
3576 }
3577 }
3578
3579 // TODO: What is the 'or' equivalent of this fold?
3580 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3581 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
3582 IsInteger && CC0 == ISD::SETNE &&
3583 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3584 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3585 SDValue One = DAG.getConstant(1, DL, OpVT);
3586 SDValue Two = DAG.getConstant(2, DL, OpVT);
3587 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3588 AddToWorklist(Add.getNode());
3589 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3590 }
3591
3592 // Try more general transforms if the predicates match and the only user of
3593 // the compares is the 'and' or 'or'.
3594 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3595 N0.hasOneUse() && N1.hasOneUse()) {
3596 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3597 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3598 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3599 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3600 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3601 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3602 SDValue Zero = DAG.getConstant(0, DL, OpVT);
3603 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3604 }
3605 }
3606
3607 // Canonicalize equivalent operands to LL == RL.
3608 if (LL == RR && LR == RL) {
3609 CC1 = ISD::getSetCCSwappedOperands(CC1);
3610 std::swap(RL, RR);
3611 }
3612
3613 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3614 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3615 if (LL == RL && LR == RR) {
3616 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3617 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3618 if (NewCC != ISD::SETCC_INVALID &&
3619 (!LegalOperations ||
3620 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3621 TLI.isOperationLegal(ISD::SETCC, OpVT))))
3622 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3623 }
3624
3625 return SDValue();
3626}
3627
3628/// This contains all DAGCombine rules which reduce two values combined by
3629/// an And operation to a single value. This makes them reusable in the context
3630/// of visitSELECT(). Rules involving constants are not included as
3631/// visitSELECT() already handles those cases.
3632SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3633 EVT VT = N1.getValueType();
3634 SDLoc DL(N);
3635
3636 // fold (and x, undef) -> 0
3637 if (N0.isUndef() || N1.isUndef())
3638 return DAG.getConstant(0, DL, VT);
3639
3640 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3641 return V;
3642
3643 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3644 VT.getSizeInBits() <= 64) {
3645 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3646 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3647 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3648 // immediate for an add, but it is legal if its top c2 bits are set,
3649 // transform the ADD so the immediate doesn't need to be materialized
3650 // in a register.
3651 APInt ADDC = ADDI->getAPIntValue();
3652 APInt SRLC = SRLI->getAPIntValue();
3653 if (ADDC.getMinSignedBits() <= 64 &&
3654 SRLC.ult(VT.getSizeInBits()) &&
3655 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3656 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3657 SRLC.getZExtValue());
3658 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3659 ADDC |= Mask;
3660 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3661 SDLoc DL0(N0);
3662 SDValue NewAdd =
3663 DAG.getNode(ISD::ADD, DL0, VT,
3664 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3665 CombineTo(N0.getNode(), NewAdd);
3666 // Return N so it doesn't get rechecked!
3667 return SDValue(N, 0);
3668 }
3669 }
3670 }
3671 }
3672 }
3673 }
3674
3675 // Reduce bit extract of low half of an integer to the narrower type.
3676 // (and (srl i64:x, K), KMask) ->
3677 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3678 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3679 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3680 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3681 unsigned Size = VT.getSizeInBits();
3682 const APInt &AndMask = CAnd->getAPIntValue();
3683 unsigned ShiftBits = CShift->getZExtValue();
3684
3685 // Bail out, this node will probably disappear anyway.
3686 if (ShiftBits == 0)
3687 return SDValue();
3688
3689 unsigned MaskBits = AndMask.countTrailingOnes();
3690 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3691
3692 if (AndMask.isMask() &&
3693 // Required bits must not span the two halves of the integer and
3694 // must fit in the half size type.
3695 (ShiftBits + MaskBits <= Size / 2) &&
3696 TLI.isNarrowingProfitable(VT, HalfVT) &&
3697 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3698 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3699 TLI.isTruncateFree(VT, HalfVT) &&
3700 TLI.isZExtFree(HalfVT, VT)) {
3701 // The isNarrowingProfitable is to avoid regressions on PPC and
3702 // AArch64 which match a few 64-bit bit insert / bit extract patterns
3703 // on downstream users of this. Those patterns could probably be
3704 // extended to handle extensions mixed in.
3705
3706 SDValue SL(N0);
3707 assert(MaskBits <= Size)(static_cast <bool> (MaskBits <= Size) ? void (0) : __assert_fail
("MaskBits <= Size", "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3707, __extension__ __PRETTY_FUNCTION__))
;
3708
3709 // Extracting the highest bit of the low half.
3710 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3711 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3712 N0.getOperand(0));
3713
3714 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3715 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3716 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3717 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3718 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3719 }
3720 }
3721 }
3722 }
3723
3724 return SDValue();
3725}
3726
3727bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3728 EVT LoadResultTy, EVT &ExtVT) {
3729 if (!AndC->getAPIntValue().isMask())
3730 return false;
3731
3732 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
3733
3734 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3735 EVT LoadedVT = LoadN->getMemoryVT();
3736
3737 if (ExtVT == LoadedVT &&
3738 (!LegalOperations ||
3739 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3740 // ZEXTLOAD will match without needing to change the size of the value being
3741 // loaded.
3742 return true;
3743 }
3744
3745 // Do not change the width of a volatile load.
3746 if (LoadN->isVolatile())
3747 return false;
3748
3749 // Do not generate loads of non-round integer types since these can
3750 // be expensive (and would be wrong if the type is not byte sized).
3751 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3752 return false;
3753
3754 if (LegalOperations &&
3755 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3756 return false;
3757
3758 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3759 return false;
3760
3761 return true;
3762}
3763
3764bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
3765 EVT &ExtVT, unsigned ShAmt) {
3766 // Don't transform one with multiple uses, this would require adding a new
3767 // load.
3768 if (!SDValue(LoadN, 0).hasOneUse())
3769 return false;
3770
3771 if (LegalOperations &&
3772 !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT))
3773 return false;
3774
3775 // Do not generate loads of non-round integer types since these can
3776 // be expensive (and would be wrong if the type is not byte sized).
3777 if (!ExtVT.isRound())
3778 return false;
3779
3780 // Don't change the width of a volatile load.
3781 if (LoadN->isVolatile())
3782 return false;
3783
3784 // Verify that we are actually reducing a load width here.
3785 if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits())
3786 return false;
3787
3788 // For the transform to be legal, the load must produce only two values
3789 // (the value loaded and the chain). Don't transform a pre-increment
3790 // load, for example, which produces an extra value. Otherwise the
3791 // transformation is not equivalent, and the downstream logic to replace
3792 // uses gets things wrong.
3793 if (LoadN->getNumValues() > 2)
3794 return false;
3795
3796 // If the load that we're shrinking is an extload and we're not just
3797 // discarding the extension we can't simply shrink the load. Bail.
3798 // TODO: It would be possible to merge the extensions in some cases.
3799 if (LoadN->getExtensionType() != ISD::NON_EXTLOAD &&
3800 LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
3801 return false;
3802
3803 if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT))
3804 return false;
3805
3806 // It's not possible to generate a constant of extended or untyped type.
3807 EVT PtrType = LoadN->getOperand(1).getValueType();
3808 if (PtrType == MVT::Untyped || PtrType.isExtended())
3809 return false;
3810
3811 return true;
3812}
3813
3814bool DAGCombiner::SearchForAndLoads(SDNode *N,
3815 SmallPtrSetImpl<LoadSDNode*> &Loads,
3816 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
3817 ConstantSDNode *Mask,
3818 SDNode *&NodeToMask) {
3819 // Recursively search for the operands, looking for loads which can be
3820 // narrowed.
3821 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
3822 SDValue Op = N->getOperand(i);
3823
3824 if (Op.getValueType().isVector())
3825 return false;
3826
3827 // Some constants may need fixing up later if they are too large.
3828 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3829 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
3830 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
3831 NodesWithConsts.insert(N);
3832 continue;
3833 }
3834
3835 if (!Op.hasOneUse())
3836 return false;
3837
3838 switch(Op.getOpcode()) {
3839 case ISD::LOAD: {
3840 auto *Load = cast<LoadSDNode>(Op);
3841 EVT ExtVT;
3842 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
3843 isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
3844
3845 // ZEXTLOAD is already small enough.
3846 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
3847 ExtVT.bitsGE(Load->getMemoryVT()))
3848 continue;
3849
3850 // Use LE to convert equal sized loads to zext.
3851 if (ExtVT.bitsLE(Load->getMemoryVT()))
3852 Loads.insert(Load);
3853
3854 continue;
3855 }
3856 return false;
3857 }
3858 case ISD::ZERO_EXTEND:
3859 case ISD::AssertZext: {
3860 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
3861 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3862 EVT VT = Op.getOpcode() == ISD::AssertZext ?
3863 cast<VTSDNode>(Op.getOperand(1))->getVT() :
3864 Op.getOperand(0).getValueType();
3865
3866 // We can accept extending nodes if the mask is wider or an equal
3867 // width to the original type.
3868 if (ExtVT.bitsGE(VT))
3869 continue;
3870 break;
3871 }
3872 case ISD::OR:
3873 case ISD::XOR:
3874 case ISD::AND:
3875 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
3876 NodeToMask))
3877 return false;
3878 continue;
3879 }
3880
3881 // Allow one node which will masked along with any loads found.
3882 if (NodeToMask)
3883 return false;
3884 NodeToMask = Op.getNode();
3885 }
3886 return true;
3887}
3888
3889bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
3890 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
3891 if (!Mask)
3892 return false;
3893
3894 if (!Mask->getAPIntValue().isMask())
3895 return false;
3896
3897 // No need to do anything if the and directly uses a load.
3898 if (isa<LoadSDNode>(N->getOperand(0)))
3899 return false;
3900
3901 SmallPtrSet<LoadSDNode*, 8> Loads;
3902 SmallPtrSet<SDNode*, 2> NodesWithConsts;
3903 SDNode *FixupNode = nullptr;
3904 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
3905 if (Loads.size() == 0)
3906 return false;
3907
3908 DEBUG(dbgs() << "Backwards propagate AND: "; N->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "Backwards propagate AND: "
; N->dump(); } } while (false)
;
3909 SDValue MaskOp = N->getOperand(1);
3910
3911 // If it exists, fixup the single node we allow in the tree that needs
3912 // masking.
3913 if (FixupNode) {
3914 DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "First, need to fix up: "; FixupNode
->dump(); } } while (false)
;
3915 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
3916 FixupNode->getValueType(0),
3917 SDValue(FixupNode, 0), MaskOp);
3918 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
3919 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0),
3920 MaskOp);
3921 }
3922
3923 // Narrow any constants that need it.
3924 for (auto *LogicN : NodesWithConsts) {
3925 SDValue Op0 = LogicN->getOperand(0);
3926 SDValue Op1 = LogicN->getOperand(1);
3927
3928 if (isa<ConstantSDNode>(Op0))
3929 std::swap(Op0, Op1);
3930
3931 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
3932 Op1, MaskOp);
3933
3934 DAG.UpdateNodeOperands(LogicN, Op0, And);
3935 }
3936
3937 // Create narrow loads.
3938 for (auto *Load : Loads) {
3939 DEBUG(dbgs() << "Propagate AND back to: "; Load->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "Propagate AND back to: "; Load
->dump(); } } while (false)
;
3940 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
3941 SDValue(Load, 0), MaskOp);
3942 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
3943 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp);
3944 SDValue NewLoad = ReduceLoadWidth(And.getNode());
3945 assert(NewLoad &&(static_cast <bool> (NewLoad && "Shouldn't be masking the load if it can't be narrowed"
) ? void (0) : __assert_fail ("NewLoad && \"Shouldn't be masking the load if it can't be narrowed\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3946, __extension__ __PRETTY_FUNCTION__))
3946 "Shouldn't be masking the load if it can't be narrowed")(static_cast <bool> (NewLoad && "Shouldn't be masking the load if it can't be narrowed"
) ? void (0) : __assert_fail ("NewLoad && \"Shouldn't be masking the load if it can't be narrowed\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3946, __extension__ __PRETTY_FUNCTION__))
;
3947 CombineTo(Load, NewLoad, NewLoad.getValue(1));
3948 }
3949 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
3950 return true;
3951 }
3952 return false;
3953}
3954
3955SDValue DAGCombiner::visitAND(SDNode *N) {
3956 SDValue N0 = N->getOperand(0);
3957 SDValue N1 = N->getOperand(1);
3958 EVT VT = N1.getValueType();
3959
3960 // x & x --> x
3961 if (N0 == N1)
3962 return N0;
3963
3964 // fold vector ops
3965 if (VT.isVector()) {
3966 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3967 return FoldedVOp;
3968
3969 // fold (and x, 0) -> 0, vector edition
3970 if (ISD::isBuildVectorAllZeros(N0.getNode()))
3971 // do not return N0, because undef node may exist in N0
3972 return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
3973 SDLoc(N), N0.getValueType());
3974 if (ISD::isBuildVectorAllZeros(N1.getNode()))
3975 // do not return N1, because undef node may exist in N1
3976 return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
3977 SDLoc(N), N1.getValueType());
3978
3979 // fold (and x, -1) -> x, vector edition
3980 if (ISD::isBuildVectorAllOnes(N0.getNode()))
3981 return N1;
3982 if (ISD::isBuildVectorAllOnes(N1.getNode()))
3983 return N0;
3984 }
3985
3986 // fold (and c1, c2) -> c1&c2
3987 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3988 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3989 if (N0C && N1C && !N1C->isOpaque())
3990 return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3991 // canonicalize constant to RHS
3992 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3993 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3994 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3995 // fold (and x, -1) -> x
3996 if (isAllOnesConstant(N1))
3997 return N0;
3998 // if (and x, c) is known to be zero, return 0
3999 unsigned BitWidth = VT.getScalarSizeInBits();
4000 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4001 APInt::getAllOnesValue(BitWidth)))
4002 return DAG.getConstant(0, SDLoc(N), VT);
4003
4004 if (SDValue NewSel = foldBinOpIntoSelect(N))
4005 return NewSel;
4006
4007 // reassociate and
4008 if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
4009 return RAND;
4010
4011 // Try to convert a constant mask AND into a shuffle clear mask.
4012 if (VT.isVector())
4013 if (SDValue Shuffle = XformToShuffleWithZero(N))
4014 return Shuffle;
4015
4016 // fold (and (or x, C), D) -> D if (C & D) == D
4017 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4018 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4019 };
4020 if (N0.getOpcode() == ISD::OR &&
4021 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4022 return N1;
4023 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4024 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4025 SDValue N0Op0 = N0.getOperand(0);
4026 APInt Mask = ~N1C->getAPIntValue();
4027 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4028 if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4029 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4030 N0.getValueType(), N0Op0);
4031
4032 // Replace uses of the AND with uses of the Zero extend node.
4033 CombineTo(N, Zext);
4034
4035 // We actually want to replace all uses of the any_extend with the
4036 // zero_extend, to avoid duplicating things. This will later cause this
4037 // AND to be folded.
4038 CombineTo(N0.getNode(), Zext);
4039 return SDValue(N, 0); // Return N so it doesn't get rechecked!
4040 }
4041 }
4042 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4043 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4044 // already be zero by virtue of the width of the base type of the load.
4045 //
4046 // the 'X' node here can either be nothing or an extract_vector_elt to catch
4047 // more cases.
4048 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4049 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
4050 N0.getOperand(0).getOpcode() == ISD::LOAD &&
4051 N0.getOperand(0).getResNo() == 0) ||
4052 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4053 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4054 N0 : N0.getOperand(0) );
4055
4056 // Get the constant (if applicable) the zero'th operand is being ANDed with.
4057 // This can be a pure constant or a vector splat, in which case we treat the
4058 // vector as a scalar and use the splat value.
4059 APInt Constant = APInt::getNullValue(1);
4060 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4061 Constant = C->getAPIntValue();
4062 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4063 APInt SplatValue, SplatUndef;
4064 unsigned SplatBitSize;
4065 bool HasAnyUndefs;
4066 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4067 SplatBitSize, HasAnyUndefs);
4068 if (IsSplat) {
4069 // Undef bits can contribute to a possible optimisation if set, so
4070 // set them.
4071 SplatValue |= SplatUndef;
4072
4073 // The splat value may be something like "0x00FFFFFF", which means 0 for
4074 // the first vector value and FF for the rest, repeating. We need a mask
4075 // that will apply equally to all members of the vector, so AND all the
4076 // lanes of the constant together.
4077 EVT VT = Vector->getValueType(0);
4078 unsigned BitWidth = VT.getScalarSizeInBits();
4079
4080 // If the splat value has been compressed to a bitlength lower
4081 // than the size of the vector lane, we need to re-expand it to
4082 // the lane size.
4083 if (BitWidth > SplatBitSize)
4084 for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4085 SplatBitSize < BitWidth;
4086 SplatBitSize = SplatBitSize * 2)
4087 SplatValue |= SplatValue.shl(SplatBitSize);
4088
4089 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4090 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4091 if (SplatBitSize % BitWidth == 0) {
4092 Constant = APInt::getAllOnesValue(BitWidth);
4093 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4094 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4095 }
4096 }
4097 }
4098
4099 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4100 // actually legal and isn't going to get expanded, else this is a false
4101 // optimisation.
4102 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4103 Load->getValueType(0),
4104 Load->getMemoryVT());
4105
4106 // Resize the constant to the same size as the original memory access before
4107 // extension. If it is still the AllOnesValue then this AND is completely
4108 // unneeded.
4109 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4110
4111 bool B;
4112 switch (Load->getExtensionType()) {
4113 default: B = false; break;
4114 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4115 case ISD::ZEXTLOAD:
4116 case ISD::NON_EXTLOAD: B = true; break;
4117 }
4118
4119 if (B && Constant.isAllOnesValue()) {
4120 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4121 // preserve semantics once we get rid of the AND.
4122 SDValue NewLoad(Load, 0);
4123
4124 // Fold the AND away. NewLoad may get replaced immediately.
4125 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4126
4127 if (Load->getExtensionType() == ISD::EXTLOAD) {
4128 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4129 Load->getValueType(0), SDLoc(Load),
4130 Load->getChain(), Load->getBasePtr(),
4131 Load->getOffset(), Load->getMemoryVT(),
4132 Load->getMemOperand());
4133 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4134 if (Load->getNumValues() == 3) {
4135 // PRE/POST_INC loads have 3 values.
4136 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4137 NewLoad.getValue(2) };
4138 CombineTo(Load, To, 3, true);
4139 } else {
4140 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4141 }
4142 }
4143
4144 return SDValue(N, 0); // Return N so it doesn't get rechecked!
4145 }
4146 }
4147
4148 // fold (and (load x), 255) -> (zextload x, i8)
4149 // fold (and (extload x, i16), 255) -> (zextload x, i8)
4150 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4151 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4152 (N0.getOpcode() == ISD::ANY_EXTEND &&
4153 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4154 if (SDValue Res = ReduceLoadWidth(N)) {
4155 LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4156 ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4157
4158 AddToWorklist(N);
4159 CombineTo(LN0, Res, Res.getValue(1));
4160 return SDValue(N, 0);
4161 }
4162 }
4163
4164 if (Level >= AfterLegalizeTypes) {
4165 // Attempt to propagate the AND back up to the leaves which, if they're
4166 // loads, can be combined to narrow loads and the AND node can be removed.
4167 // Perform after legalization so that extend nodes will already be
4168 // combined into the loads.
4169 if (BackwardsPropagateMask(N, DAG)) {
4170 return SDValue(N, 0);
4171 }
4172 }
4173
4174 if (SDValue Combined = visitANDLike(N0, N1, N))
4175 return Combined;
4176
4177 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
4178 if (N0.getOpcode() == N1.getOpcode())
4179 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4180 return Tmp;
4181
4182 // Masking the negated extension of a boolean is just the zero-extended
4183 // boolean:
4184 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4185 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4186 //
4187 // Note: the SimplifyDemandedBits fold below can make an information-losing
4188 // transform, and then we have no way to find this better fold.
4189 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4190 if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
4191 SDValue SubRHS = N0.getOperand(1);
4192 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4193 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4194 return SubRHS;
4195 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4196 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4197 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4198 }
4199 }
4200
4201 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4202 // fold (and (sra)) -> (and (srl)) when possible.
4203 if (SimplifyDemandedBits(SDValue(N, 0)))
4204 return SDValue(N, 0);
4205
4206 // fold (zext_inreg (extload x)) -> (zextload x)
4207 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4208 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4209 EVT MemVT = LN0->getMemoryVT();
4210 // If we zero all the possible extended bits, then we can turn this into
4211 // a zextload if we are running before legalize or the operation is legal.
4212 unsigned BitWidth = N1.getScalarValueSizeInBits();
4213 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4214 BitWidth - MemVT.getScalarSizeInBits())) &&
4215 ((!LegalOperations && !LN0->isVolatile()) ||
4216 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4217 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4218 LN0->getChain(), LN0->getBasePtr(),
4219 MemVT, LN0->getMemOperand());
4220 AddToWorklist(N);
4221 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4222 return SDValue(N, 0); // Return N so it doesn't get rechecked!
4223 }
4224 }
4225 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4226 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4227 N0.hasOneUse()) {
4228 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4229 EVT MemVT = LN0->getMemoryVT();
4230 // If we zero all the possible extended bits, then we can turn this into
4231 // a zextload if we are running before legalize or the operation is legal.
4232 unsigned BitWidth = N1.getScalarValueSizeInBits();
4233 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4234 BitWidth - MemVT.getScalarSizeInBits())) &&
4235 ((!LegalOperations && !LN0->isVolatile()) ||
4236 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4237 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4238 LN0->getChain(), LN0->getBasePtr(),
4239 MemVT, LN0->getMemOperand());
4240 AddToWorklist(N);
4241 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4242 return SDValue(N, 0); // Return N so it doesn't get rechecked!
4243 }
4244 }
4245 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4246 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4247 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4248 N0.getOperand(1), false))
4249 return BSwap;
4250 }
4251
4252 return SDValue();
4253}
4254
4255/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4256SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4257 bool DemandHighBits) {
4258 if (!LegalOperations)
4259 return SDValue();
4260
4261 EVT VT = N->getValueType(0);
4262 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4263 return SDValue();
4264 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4265 return SDValue();
4266
4267 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4268 bool LookPassAnd0 = false;
4269 bool LookPassAnd1 = false;
4270 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4271 std::swap(N0, N1);
4272 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4273 std::swap(N0, N1);
4274 if (N0.getOpcode() == ISD::AND) {
4275 if (!N0.getNode()->hasOneUse())
4276 return SDValue();
4277 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4278 if (!N01C || N01C->getZExtValue() != 0xFF00)
4279 return SDValue();
4280 N0 = N0.getOperand(0);
4281 LookPassAnd0 = true;
4282 }
4283
4284 if (N1.getOpcode() == ISD::AND) {
4285 if (!N1.getNode()->hasOneUse())
4286 return SDValue();
4287 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4288 if (!N11C || N11C->getZExtValue() != 0xFF)
4289 return SDValue();
4290 N1 = N1.getOperand(0);
4291 LookPassAnd1 = true;
4292 }
4293
4294 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4295 std::swap(N0, N1);
4296 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4297 return SDValue();
4298 if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4299 return SDValue();
4300
4301 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4302 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4303 if (!N01C || !N11C)
4304 return SDValue();
4305 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4306 return SDValue();
4307
4308 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4309 SDValue N00 = N0->getOperand(0);
4310 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4311 if (!N00.getNode()->hasOneUse())
4312 return SDValue();
4313 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4314 if (!N001C || N001C->getZExtValue() != 0xFF)
4315 return SDValue();
4316 N00 = N00.getOperand(0);
4317 LookPassAnd0 = true;
4318 }
4319
4320 SDValue N10 = N1->getOperand(0);
4321 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4322 if (!N10.getNode()->hasOneUse())
4323 return SDValue();
4324 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4325 if (!N101C || N101C->getZExtValue() != 0xFF00)
4326 return SDValue();
4327 N10 = N10.getOperand(0);
4328 LookPassAnd1 = true;
4329 }
4330
4331 if (N00 != N10)
4332 return SDValue();
4333
4334 // Make sure everything beyond the low halfword gets set to zero since the SRL
4335 // 16 will clear the top bits.
4336 unsigned OpSizeInBits = VT.getSizeInBits();
4337 if (DemandHighBits && OpSizeInBits > 16) {
4338 // If the left-shift isn't masked out then the only way this is a bswap is
4339 // if all bits beyond the low 8 are 0. In that case the entire pattern
4340 // reduces to a left shift anyway: leave it for other parts of the combiner.
4341 if (!LookPassAnd0)
4342 return SDValue();
4343
4344 // However, if the right shift isn't masked out then it might be because
4345 // it's not needed. See if we can spot that too.
4346 if (!LookPassAnd1 &&
4347 !DAG.MaskedValueIsZero(
4348 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4349 return SDValue();
4350 }
4351
4352 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4353 if (OpSizeInBits > 16) {
4354 SDLoc DL(N);
4355 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4356 DAG.getConstant(OpSizeInBits - 16, DL,
4357 getShiftAmountTy(VT)));
4358 }
4359 return Res;
4360}
4361
4362/// Return true if the specified node is an element that makes up a 32-bit
4363/// packed halfword byteswap.
4364/// ((x & 0x000000ff) << 8) |
4365/// ((x & 0x0000ff00) >> 8) |
4366/// ((x & 0x00ff0000) << 8) |
4367/// ((x & 0xff000000) >> 8)
4368static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4369 if (!N.getNode()->hasOneUse())
4370 return false;
4371
4372 unsigned Opc = N.getOpcode();
4373 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4374 return false;
4375
4376 SDValue N0 = N.getOperand(0);
4377 unsigned Opc0 = N0.getOpcode();
4378 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4379 return false;
4380
4381 ConstantSDNode *N1C = nullptr;
4382 // SHL or SRL: look upstream for AND mask operand
4383 if (Opc == ISD::AND)
4384 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4385 else if (Opc0 == ISD::AND)
4386 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4387 if (!N1C)
4388 return false;
4389
4390 unsigned MaskByteOffset;
4391 switch (N1C->getZExtValue()) {
4392 default:
4393 return false;
4394 case 0xFF: MaskByteOffset = 0; break;
4395 case 0xFF00: MaskByteOffset = 1; break;
4396 case 0xFF0000: MaskByteOffset = 2; break;
4397 case 0xFF000000: MaskByteOffset = 3; break;
4398 }
4399
4400 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4401 if (Opc == ISD::AND) {
4402 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4403 // (x >> 8) & 0xff
4404 // (x >> 8) & 0xff0000
4405 if (Opc0 != ISD::SRL)
4406 return false;
4407 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4408 if (!C || C->getZExtValue() != 8)
4409 return false;
4410 } else {
4411 // (x << 8) & 0xff00
4412 // (x << 8) & 0xff000000
4413 if (Opc0 != ISD::SHL)
4414 return false;
4415 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4416 if (!C || C->getZExtValue() != 8)
4417 return false;
4418 }
4419 } else if (Opc == ISD::SHL) {
4420 // (x & 0xff) << 8
4421 // (x & 0xff0000) << 8
4422 if (MaskByteOffset != 0 && MaskByteOffset != 2)
4423 return false;
4424 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4425 if (!C || C->getZExtValue() != 8)
4426 return false;
4427 } else { // Opc == ISD::SRL
4428 // (x & 0xff00) >> 8
4429 // (x & 0xff000000) >> 8
4430 if (MaskByteOffset != 1 && MaskByteOffset != 3)
4431 return false;
4432 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4433 if (!C || C->getZExtValue() != 8)
4434 return false;
4435 }
4436
4437 if (Parts[MaskByteOffset])
4438 return false;
4439
4440 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4441 return true;
4442}
4443
4444/// Match a 32-bit packed halfword bswap. That is
4445/// ((x & 0x000000ff) << 8) |
4446/// ((x & 0x0000ff00) >> 8) |
4447/// ((x & 0x00ff0000) << 8) |
4448/// ((x & 0xff000000) >> 8)
4449/// => (rotl (bswap x), 16)
4450SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4451 if (!LegalOperations)
4452 return SDValue();
4453
4454 EVT VT = N->getValueType(0);
4455 if (VT != MVT::i32)
4456 return SDValue();
4457 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4458 return SDValue();
4459
4460 // Look for either
4461 // (or (or (and), (and)), (or (and), (and)))
4462 // (or (or (or (and), (and)), (and)), (and))
4463 if (N0.getOpcode() != ISD::OR)
4464 return SDValue();
4465 SDValue N00 = N0.getOperand(0);
4466 SDValue N01 = N0.getOperand(1);
4467 SDNode *Parts[4] = {};
4468
4469 if (N1.getOpcode() == ISD::OR &&
4470 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4471 // (or (or (and), (and)), (or (and), (and)))
4472 if (!isBSwapHWordElement(N00, Parts))
4473 return SDValue();
4474
4475 if (!isBSwapHWordElement(N01, Parts))
4476 return SDValue();
4477 SDValue N10 = N1.getOperand(0);
4478 if (!isBSwapHWordElement(N10, Parts))
4479 return SDValue();
4480 SDValue N11 = N1.getOperand(1);
4481 if (!isBSwapHWordElement(N11, Parts))
4482 return SDValue();
4483 } else {
4484 // (or (or (or (and), (and)), (and)), (and))
4485 if (!isBSwapHWordElement(N1, Parts))
4486 return SDValue();
4487 if (!isBSwapHWordElement(N01, Parts))
4488 return SDValue();
4489 if (N00.getOpcode() != ISD::OR)
4490 return SDValue();
4491 SDValue N000 = N00.getOperand(0);
4492 if (!isBSwapHWordElement(N000, Parts))
4493 return SDValue();
4494 SDValue N001 = N00.getOperand(1);
4495 if (!isBSwapHWordElement(N001, Parts))
4496 return SDValue();
4497 }
4498
4499 // Make sure the parts are all coming from the same node.
4500 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4501 return SDValue();
4502
4503 SDLoc DL(N);
4504 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4505 SDValue(Parts[0], 0));
4506
4507 // Result of the bswap should be rotated by 16. If it's not legal, then
4508 // do (x << 16) | (x >> 16).
4509 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4510 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4511 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4512 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4513 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4514 return DAG.getNode(ISD::OR, DL, VT,
4515 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4516 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4517}
4518
4519/// This contains all DAGCombine rules which reduce two values combined by
4520/// an Or operation to a single value \see visitANDLike().
4521SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4522 EVT VT = N1.getValueType();
4523 SDLoc DL(N);
4524
4525 // fold (or x, undef) -> -1
4526 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4527 return DAG.getAllOnesConstant(DL, VT);
4528
4529 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4530 return V;
4531
4532 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
4533 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4534 // Don't increase # computations.
4535 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4536 // We can only do this xform if we know that bits from X that are set in C2
4537 // but not in C1 are already zero. Likewise for Y.
4538 if (const ConstantSDNode *N0O1C =
4539 getAsNonOpaqueConstant(N0.getOperand(1))) {
4540 if (const ConstantSDNode *N1O1C =
4541 getAsNonOpaqueConstant(N1.getOperand(1))) {
4542 // We can only do this xform if we know that bits from X that are set in
4543 // C2 but not in C1 are already zero. Likewise for Y.
4544 const APInt &LHSMask = N0O1C->getAPIntValue();
4545 const APInt &RHSMask = N1O1C->getAPIntValue();
4546
4547 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4548 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4549 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4550 N0.getOperand(0), N1.getOperand(0));
4551 return DAG.getNode(ISD::AND, DL, VT, X,
4552 DAG.getConstant(LHSMask | RHSMask, DL, VT));
4553 }
4554 }
4555 }
4556 }
4557
4558 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4559 if (N0.getOpcode() == ISD::AND &&
4560 N1.getOpcode() == ISD::AND &&
4561 N0.getOperand(0) == N1.getOperand(0) &&
4562 // Don't increase # computations.
4563 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4564 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4565 N0.getOperand(1), N1.getOperand(1));
4566 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
4567 }
4568
4569 return SDValue();
4570}
4571
4572SDValue DAGCombiner::visitOR(SDNode *N) {
4573 SDValue N0 = N->getOperand(0);
4574 SDValue N1 = N->getOperand(1);
4575 EVT VT = N1.getValueType();
4576
4577 // x | x --> x
4578 if (N0 == N1)
4579 return N0;
4580
4581 // fold vector ops
4582 if (VT.isVector()) {
4583 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4584 return FoldedVOp;
4585
4586 // fold (or x, 0) -> x, vector edition
4587 if (ISD::isBuildVectorAllZeros(N0.getNode()))
4588 return N1;
4589 if (ISD::isBuildVectorAllZeros(N1.getNode()))
4590 return N0;
4591
4592 // fold (or x, -1) -> -1, vector edition
4593 if (ISD::isBuildVectorAllOnes(N0.getNode()))
4594 // do not return N0, because undef node may exist in N0
4595 return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
4596 if (ISD::isBuildVectorAllOnes(N1.getNode()))
4597 // do not return N1, because undef node may exist in N1
4598 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
4599
4600 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
4601 // Do this only if the resulting shuffle is legal.
4602 if (isa<ShuffleVectorSDNode>(N0) &&
4603 isa<ShuffleVectorSDNode>(N1) &&
4604 // Avoid folding a node with illegal type.
4605 TLI.isTypeLegal(VT)) {
4606 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
4607 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
4608 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4609 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
4610 // Ensure both shuffles have a zero input.
4611 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
4612 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!")(static_cast <bool> ((!ZeroN00 || !ZeroN01) && "Both inputs zero!"
) ? void (0) : __assert_fail ("(!ZeroN00 || !ZeroN01) && \"Both inputs zero!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4612, __extension__ __PRETTY_FUNCTION__))
;
4613 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!")(static_cast <bool> ((!ZeroN10 || !ZeroN11) && "Both inputs zero!"
) ? void (0) : __assert_fail ("(!ZeroN10 || !ZeroN11) && \"Both inputs zero!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4613, __extension__ __PRETTY_FUNCTION__))
;
4614 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
4615 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
4616 bool CanFold = true;
4617 int NumElts = VT.getVectorNumElements();
4618 SmallVector<int, 4> Mask(NumElts);
4619
4620 for (int i = 0; i != NumElts; ++i) {
4621 int M0 = SV0->getMaskElt(i);
4622 int M1 = SV1->getMaskElt(i);
4623
4624 // Determine if either index is pointing to a zero vector.
4625 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
4626 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
4627
4628 // If one element is zero and the otherside is undef, keep undef.
4629 // This also handles the case that both are undef.
4630 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
4631 Mask[i] = -1;
4632 continue;
4633 }
4634
4635 // Make sure only one of the elements is zero.
4636 if (M0Zero == M1Zero) {
4637 CanFold = false;
4638 break;
4639 }
4640
4641 assert((M0 >= 0 || M1 >= 0) && "Undef index!")(static_cast <bool> ((M0 >= 0 || M1 >= 0) &&
"Undef index!") ? void (0) : __assert_fail ("(M0 >= 0 || M1 >= 0) && \"Undef index!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4641, __extension__ __PRETTY_FUNCTION__))
;
4642
4643 // We have a zero and non-zero element. If the non-zero came from
4644 // SV0 make the index a LHS index. If it came from SV1, make it
4645 // a RHS index. We need to mod by NumElts because we don't care
4646 // which operand it came from in the original shuffles.
4647 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
4648 }
4649
4650 if (CanFold) {
4651 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
4652 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
4653
4654 bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4655 if (!LegalMask) {
4656 std::swap(NewLHS, NewRHS);
4657 ShuffleVectorSDNode::commuteMask(Mask);
4658 LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4659 }
4660
4661 if (LegalMask)
4662 return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
4663 }
4664 }
4665 }
4666 }
4667
4668 // fold (or c1, c2) -> c1|c2
4669 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4670 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4671 if (N0C && N1C && !N1C->isOpaque())
4672 return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
4673 // canonicalize constant to RHS
4674 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4675 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4676 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
4677 // fold (or x, 0) -> x
4678 if (isNullConstant(N1))
4679 return N0;
4680 // fold (or x, -1) -> -1
4681 if (isAllOnesConstant(N1))
4682 return N1;
4683
4684 if (SDValue NewSel = foldBinOpIntoSelect(N))
4685 return NewSel;
4686
4687 // fold (or x, c) -> c iff (x & ~c) == 0
4688 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
4689 return N1;
4690
4691 if (SDValue Combined = visitORLike(N0, N1, N))
4692 return Combined;
4693
4694 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
4695 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
4696 return BSwap;
4697 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
4698 return BSwap;
4699
4700 // reassociate or
4701 if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
4702 return ROR;
4703
4704 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
4705 // iff (c1 & c2) != 0.
4706 auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4707 return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
4708 };
4709 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
4710 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
4711 if (SDValue COR = DAG.FoldConstantArithmetic(
4712 ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
4713 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
4714 AddToWorklist(IOR.getNode());
4715 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
4716 }
4717 }
4718
4719 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
4720 if (N0.getOpcode() == N1.getOpcode())
4721 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4722 return Tmp;
4723
4724 // See if this is some rotate idiom.
4725 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
4726 return SDValue(Rot, 0);
4727
4728 if (SDValue Load = MatchLoadCombine(N))
4729 return Load;
4730
4731 // Simplify the operands using demanded-bits information.
4732 if (SimplifyDemandedBits(SDValue(N, 0)))
4733 return SDValue(N, 0);
4734
4735 return SDValue();
4736}
4737
4738/// Match "(X shl/srl V1) & V2" where V2 may not be present.
4739bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
4740 if (Op.getOpcode() == ISD::AND) {
4741 if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
4742 Mask = Op.getOperand(1);
4743 Op = Op.getOperand(0);
4744 } else {
4745 return false;
4746 }
4747 }
4748
4749 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
4750 Shift = Op;
4751 return true;
4752 }
4753
4754 return false;
4755}
4756
4757// Return true if we can prove that, whenever Neg and Pos are both in the
4758// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
4759// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4760//
4761// (or (shift1 X, Neg), (shift2 X, Pos))
4762//
4763// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4764// in direction shift1 by Neg. The range [0, EltSize) means that we only need
4765// to consider shift amounts with defined behavior.
4766static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
4767 // If EltSize is a power of 2 then:
4768 //
4769 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4770 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4771 //
4772 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4773 // for the stronger condition:
4774 //
4775 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
4776 //
4777 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4778 // we can just replace Neg with Neg' for the rest of the function.
4779 //
4780 // In other cases we check for the even stronger condition:
4781 //
4782 // Neg == EltSize - Pos [B]
4783 //
4784 // for all Neg and Pos. Note that the (or ...) then invokes undefined
4785 // behavior if Pos == 0 (and consequently Neg == EltSize).
4786 //
4787 // We could actually use [A] whenever EltSize is a power of 2, but the
4788 // only extra cases that it would match are those uninteresting ones
4789 // where Neg and Pos are never in range at the same time. E.g. for
4790 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4791 // as well as (sub 32, Pos), but:
4792 //
4793 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4794 //
4795 // always invokes undefined behavior for 32-bit X.
4796 //
4797 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4798 unsigned MaskLoBits = 0;
4799 if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4800 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4801 if (NegC->getAPIntValue() == EltSize - 1) {
4802 Neg = Neg.getOperand(0);
4803 MaskLoBits = Log2_64(EltSize);
4804 }
4805 }
4806 }
4807
4808 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4809 if (Neg.getOpcode() != ISD::SUB)
4810 return false;
4811 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
4812 if (!NegC)
4813 return false;
4814 SDValue NegOp1 = Neg.getOperand(1);
4815
4816 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4817 // Pos'. The truncation is redundant for the purpose of the equality.
4818 if (MaskLoBits && Pos.getOpcode() == ISD::AND)
4819 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4820 if (PosC->getAPIntValue() == EltSize - 1)
4821 Pos = Pos.getOperand(0);
4822
4823 // The condition we need is now:
4824 //
4825 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4826 //
4827 // If NegOp1 == Pos then we need:
4828 //
4829 // EltSize & Mask == NegC & Mask
4830 //
4831 // (because "x & Mask" is a truncation and distributes through subtraction).
4832 APInt Width;
4833 if (Pos == NegOp1)
4834 Width = NegC->getAPIntValue();
4835
4836 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4837 // Then the condition we want to prove becomes:
4838 //
4839 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4840 //
4841 // which, again because "x & Mask" is a truncation, becomes:
4842 //
4843 // NegC & Mask == (EltSize - PosC) & Mask
4844 // EltSize & Mask == (NegC + PosC) & Mask
4845 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4846 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4847 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4848 else
4849 return false;
4850 } else
4851 return false;
4852
4853 // Now we just need to check that EltSize & Mask == Width & Mask.
4854 if (MaskLoBits)
4855 // EltSize & Mask is 0 since Mask is EltSize - 1.
4856 return Width.getLoBits(MaskLoBits) == 0;
4857 return Width == EltSize;
4858}
4859
4860// A subroutine of MatchRotate used once we have found an OR of two opposite
4861// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
4862// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4863// former being preferred if supported. InnerPos and InnerNeg are Pos and
4864// Neg with outer conversions stripped away.
4865SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4866 SDValue Neg, SDValue InnerPos,
4867 SDValue InnerNeg, unsigned PosOpcode,
4868 unsigned NegOpcode, const SDLoc &DL) {
4869 // fold (or (shl x, (*ext y)),
4870 // (srl x, (*ext (sub 32, y)))) ->
4871 // (rotl x, y) or (rotr x, (sub 32, y))
4872 //
4873 // fold (or (shl x, (*ext (sub 32, y))),
4874 // (srl x, (*ext y))) ->
4875 // (rotr x, y) or (rotl x, (sub 32, y))
4876 EVT VT = Shifted.getValueType();
4877 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
4878 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4879 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
4880 HasPos ? Pos : Neg).getNode();
4881 }
4882
4883 return nullptr;
4884}
4885
4886// MatchRotate - Handle an 'or' of two operands. If this is one of the many
4887// idioms for rotate, and if the target supports rotation instructions, generate
4888// a rot[lr].
4889SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4890 // Must be a legal type. Expanded 'n promoted things won't work with rotates.
4891 EVT VT = LHS.getValueType();
4892 if (!TLI.isTypeLegal(VT)) return nullptr;
4893
4894 // The target must have at least one rotate flavor.
4895 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4896 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4897 if (!HasROTL && !HasROTR) return nullptr;
4898
4899 // Check for truncated rotate.
4900 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
4901 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
4902 assert(LHS.getValueType() == RHS.getValueType())(static_cast <bool> (LHS.getValueType() == RHS.getValueType
()) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4902, __extension__ __PRETTY_FUNCTION__))
;
4903 if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
4904 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
4905 SDValue(Rot, 0)).getNode();
4906 }
4907 }
4908
4909 // Match "(X shl/srl V1) & V2" where V2 may not be present.
4910 SDValue LHSShift; // The shift.
4911 SDValue LHSMask; // AND value if any.
4912 if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4913 return nullptr; // Not part of a rotate.
4914
4915 SDValue RHSShift; // The shift.
4916 SDValue RHSMask; // AND value if any.
4917 if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4918 return nullptr; // Not part of a rotate.
4919
4920 if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
4921 return nullptr; // Not shifting the same value.
4922
4923 if (LHSShift.getOpcode() == RHSShift.getOpcode())
4924 return nullptr; // Shifts must disagree.
4925
4926 // Canonicalize shl to left side in a shl/srl pair.
4927 if (RHSShift.getOpcode() == ISD::SHL) {
4928 std::swap(LHS, RHS);
4929 std::swap(LHSShift, RHSShift);
4930 std::swap(LHSMask, RHSMask);
4931 }
4932
4933 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4934 SDValue LHSShiftArg = LHSShift.getOperand(0);
4935 SDValue LHSShiftAmt = LHSShift.getOperand(1);
4936 SDValue RHSShiftArg = RHSShift.getOperand(0);
4937 SDValue RHSShiftAmt = RHSShift.getOperand(1);
4938
4939 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4940 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4941 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
4942 ConstantSDNode *RHS) {
4943 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
4944 };
4945 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
4946 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4947 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4948
4949 // If there is an AND of either shifted operand, apply it to the result.
4950 if (LHSMask.getNode() || RHSMask.getNode()) {
4951 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4952 SDValue Mask = AllOnes;
4953
4954 if (LHSMask.getNode()) {
4955 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
4956 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4957 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
4958 }
4959 if (RHSMask.getNode()) {
4960 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
4961 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4962 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
4963 }
4964
4965 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4966 }
4967
4968 return Rot.getNode();
4969 }
4970
4971 // If there is a mask here, and we have a variable shift, we can't be sure
4972 // that we're masking out the right stuff.
4973 if (LHSMask.getNode() || RHSMask.getNode())
4974 return nullptr;
4975
4976 // If the shift amount is sign/zext/any-extended just peel it off.
4977 SDValue LExtOp0 = LHSShiftAmt;
4978 SDValue RExtOp0 = RHSShiftAmt;
4979 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4980 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4981 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4982 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4983 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4984 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4985 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4986 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4987 LExtOp0 = LHSShiftAmt.getOperand(0);
4988 RExtOp0 = RHSShiftAmt.getOperand(0);
4989 }
4990
4991 SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4992 LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4993 if (TryL)
4994 return TryL;
4995
4996 SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4997 RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4998 if (TryR)
4999 return TryR;
5000
5001 return nullptr;
5002}
5003
5004namespace {
5005
5006/// Represents known origin of an individual byte in load combine pattern. The
5007/// value of the byte is either constant zero or comes from memory.
5008struct ByteProvider {
5009 // For constant zero providers Load is set to nullptr. For memory providers
5010 // Load represents the node which loads the byte from memory.
5011 // ByteOffset is the offset of the byte in the value produced by the load.
5012 LoadSDNode *Load = nullptr;
5013 unsigned ByteOffset = 0;
5014
5015 ByteProvider() = default;
5016
5017 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
5018 return ByteProvider(Load, ByteOffset);
5019 }
5020
5021 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
5022
5023 bool isConstantZero() const { return !Load; }
5024 bool isMemory() const { return Load; }
5025
5026 bool operator==(const ByteProvider &Other) const {
5027 return Other.Load == Load && Other.ByteOffset == ByteOffset;
5028 }
5029
5030private:
5031 ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
5032 : Load(Load), ByteOffset(ByteOffset) {}
5033};
5034
5035} // end anonymous namespace
5036
5037/// Recursively traverses the expression calculating the origin of the requested
5038/// byte of the given value. Returns None if the provider can't be calculated.
5039///
5040/// For all the values except the root of the expression verifies that the value
5041/// has exactly one use and if it's not true return None. This way if the origin
5042/// of the byte is returned it's guaranteed that the values which contribute to
5043/// the byte are not used outside of this expression.
5044///
5045/// Because the parts of the expression are not allowed to have more than one
5046/// use this function iterates over trees, not DAGs. So it never visits the same
5047/// node more than once.
5048static const Optional<ByteProvider>
5049calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
5050 bool Root = false) {
5051 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
5052 if (Depth == 10)
5053 return None;
5054
5055 if (!Root && !Op.hasOneUse())
5056 return None;
5057
5058 assert(Op.getValueType().isScalarInteger() && "can't handle other types")(static_cast <bool> (Op.getValueType().isScalarInteger(
) && "can't handle other types") ? void (0) : __assert_fail
("Op.getValueType().isScalarInteger() && \"can't handle other types\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5058, __extension__ __PRETTY_FUNCTION__))
;
5059 unsigned BitWidth = Op.getValueSizeInBits();
5060 if (BitWidth % 8 != 0)
5061 return None;
5062 unsigned ByteWidth = BitWidth / 8;
5063 assert(Index < ByteWidth && "invalid index requested")(static_cast <bool> (Index < ByteWidth && "invalid index requested"
) ? void (0) : __assert_fail ("Index < ByteWidth && \"invalid index requested\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5063, __extension__ __PRETTY_FUNCTION__))
;
5064 (void) ByteWidth;
5065
5066 switch (Op.getOpcode()) {
5067 case ISD::OR: {
5068 auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
5069 if (!LHS)
5070 return None;
5071 auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
5072 if (!RHS)
5073 return None;
5074
5075 if (LHS->isConstantZero())
5076 return RHS;
5077 if (RHS->isConstantZero())
5078 return LHS;
5079 return None;
5080 }
5081 case ISD::SHL: {
5082 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
5083 if (!ShiftOp)
5084 return None;
5085
5086 uint64_t BitShift = ShiftOp->getZExtValue();
5087 if (BitShift % 8 != 0)
5088 return None;
5089 uint64_t ByteShift = BitShift / 8;
5090
5091 return Index < ByteShift
5092 ? ByteProvider::getConstantZero()
5093 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
5094 Depth + 1);
5095 }
5096 case ISD::ANY_EXTEND:
5097 case ISD::SIGN_EXTEND:
5098 case ISD::ZERO_EXTEND: {
5099 SDValue NarrowOp = Op->getOperand(0);
5100 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
5101 if (NarrowBitWidth % 8 != 0)
5102 return None;
5103 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5104
5105 if (Index >= NarrowByteWidth)
5106 return Op.getOpcode() == ISD::ZERO_EXTEND
5107 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5108 : None;
5109 return calculateByteProvider(NarrowOp, Index, Depth + 1);
5110 }
5111 case ISD::BSWAP:
5112 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
5113 Depth + 1);
5114 case ISD::LOAD: {
5115 auto L = cast<LoadSDNode>(Op.getNode());
5116 if (L->isVolatile() || L->isIndexed())
5117 return None;
5118
5119 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
5120 if (NarrowBitWidth % 8 != 0)
5121 return None;
5122 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5123
5124 if (Index >= NarrowByteWidth)
5125 return L->getExtensionType() == ISD::ZEXTLOAD
5126 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5127 : None;
5128 return ByteProvider::getMemory(L, Index);
5129 }
5130 }
5131
5132 return None;
5133}
5134
5135/// Match a pattern where a wide type scalar value is loaded by several narrow
5136/// loads and combined by shifts and ors. Fold it into a single load or a load
5137/// and a BSWAP if the targets supports it.
5138///
5139/// Assuming little endian target:
5140/// i8 *a = ...
5141/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
5142/// =>
5143/// i32 val = *((i32)a)
5144///
5145/// i8 *a = ...
5146/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
5147/// =>
5148/// i32 val = BSWAP(*((i32)a))
5149///
5150/// TODO: This rule matches complex patterns with OR node roots and doesn't
5151/// interact well with the worklist mechanism. When a part of the pattern is
5152/// updated (e.g. one of the loads) its direct users are put into the worklist,
5153/// but the root node of the pattern which triggers the load combine is not
5154/// necessarily a direct user of the changed node. For example, once the address
5155/// of t28 load is reassociated load combine won't be triggered:
5156/// t25: i32 = add t4, Constant:i32<2>
5157/// t26: i64 = sign_extend t25
5158/// t27: i64 = add t2, t26
5159/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
5160/// t29: i32 = zero_extend t28
5161/// t32: i32 = shl t29, Constant:i8<8>
5162/// t33: i32 = or t23, t32
5163/// As a possible fix visitLoad can check if the load can be a part of a load
5164/// combine pattern and add corresponding OR roots to the worklist.
5165SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
5166 assert(N->getOpcode() == ISD::OR &&(static_cast <bool> (N->getOpcode() == ISD::OR &&
"Can only match load combining against OR nodes") ? void (0)
: __assert_fail ("N->getOpcode() == ISD::OR && \"Can only match load combining against OR nodes\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5167, __extension__ __PRETTY_FUNCTION__))
5167 "Can only match load combining against OR nodes")(static_cast <bool> (N->getOpcode() == ISD::OR &&
"Can only match load combining against OR nodes") ? void (0)
: __assert_fail ("N->getOpcode() == ISD::OR && \"Can only match load combining against OR nodes\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5167, __extension__ __PRETTY_FUNCTION__))
;
5168
5169 // Handles simple types only
5170 EVT VT = N->getValueType(0);
5171 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
5172 return SDValue();
5173 unsigned ByteWidth = VT.getSizeInBits() / 8;
5174
5175 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5176 // Before legalize we can introduce too wide illegal loads which will be later
5177 // split into legal sized loads. This enables us to combine i64 load by i8
5178 // patterns to a couple of i32 loads on 32 bit targets.
5179 if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
5180 return SDValue();
5181
5182 std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
5183 unsigned BW, unsigned i) { return i; };
5184 std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
5185 unsigned BW, unsigned i) { return BW - i - 1; };
5186
5187 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
5188 auto MemoryByteOffset = [&] (ByteProvider P) {
5189 assert(P.isMemory() && "Must be a memory byte provider")(static_cast <bool> (P.isMemory() && "Must be a memory byte provider"
) ? void (0) : __assert_fail ("P.isMemory() && \"Must be a memory byte provider\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5189, __extension__ __PRETTY_FUNCTION__))
;
5190 unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
5191 assert(LoadBitWidth % 8 == 0 &&(static_cast <bool> (LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit"
) ? void (0) : __assert_fail ("LoadBitWidth % 8 == 0 && \"can only analyze providers for individual bytes not bit\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5192, __extension__ __PRETTY_FUNCTION__))
5192 "can only analyze providers for individual bytes not bit")(static_cast <bool> (LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit"
) ? void (0) : __assert_fail ("LoadBitWidth % 8 == 0 && \"can only analyze providers for individual bytes not bit\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5192, __extension__ __PRETTY_FUNCTION__))
;
5193 unsigned LoadByteWidth = LoadBitWidth / 8;
5194 return IsBigEndianTarget
5195 ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
5196 : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
5197 };
5198
5199 Optional<BaseIndexOffset> Base;
5200 SDValue Chain;
5201
5202 SmallSet<LoadSDNode *, 8> Loads;
5203 Optional<ByteProvider> FirstByteProvider;
5204 int64_t FirstOffset = INT64_MAX(9223372036854775807L);
5205
5206 // Check if all the bytes of the OR we are looking at are loaded from the same
5207 // base address. Collect bytes offsets from Base address in ByteOffsets.
5208 SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
5209 for (unsigned i = 0; i < ByteWidth; i++) {
5210 auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
5211 if (!P || !P->isMemory()) // All the bytes must be loaded from memory
5212 return SDValue();
5213
5214 LoadSDNode *L = P->Load;
5215 assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&(static_cast <bool> (L->hasNUsesOfValue(1, 0) &&
!L->isVolatile() && !L->isIndexed() &&
"Must be enforced by calculateByteProvider") ? void (0) : __assert_fail
("L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5216, __extension__ __PRETTY_FUNCTION__))
5216 "Must be enforced by calculateByteProvider")(static_cast <bool> (L->hasNUsesOfValue(1, 0) &&
!L->isVolatile() && !L->isIndexed() &&
"Must be enforced by calculateByteProvider") ? void (0) : __assert_fail
("L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5216, __extension__ __PRETTY_FUNCTION__))
;
5217 assert(L->getOffset().isUndef() && "Unindexed load must have undef offset")(static_cast <bool> (L->getOffset().isUndef() &&
"Unindexed load must have undef offset") ? void (0) : __assert_fail
("L->getOffset().isUndef() && \"Unindexed load must have undef offset\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5217, __extension__ __PRETTY_FUNCTION__))
;
5218
5219 // All loads must share the same chain
5220 SDValue LChain = L->getChain();
5221 if (!Chain)
5222 Chain = LChain;
5223 else if (Chain != LChain)
5224 return SDValue();
5225
5226 // Loads must share the same base address
5227 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
5228 int64_t ByteOffsetFromBase = 0;
5229 if (!Base)
5230 Base = Ptr;
5231 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
5232 return SDValue();
5233
5234 // Calculate the offset of the current byte from the base address
5235 ByteOffsetFromBase += MemoryByteOffset(*P);
5236 ByteOffsets[i] = ByteOffsetFromBase;
5237
5238 // Remember the first byte load
5239 if (ByteOffsetFromBase < FirstOffset) {
5240 FirstByteProvider = P;
5241 FirstOffset = ByteOffsetFromBase;
5242 }
5243
5244 Loads.insert(L);
5245 }
5246 assert(!Loads.empty() && "All the bytes of the value must be loaded from "(static_cast <bool> (!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value"
) ? void (0) : __assert_fail ("!Loads.empty() && \"All the bytes of the value must be loaded from \" \"memory, so there must be at least one load which produces the value\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5247, __extension__ __PRETTY_FUNCTION__))
5247 "memory, so there must be at least one load which produces the value")(static_cast <bool> (!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value"
) ? void (0) : __assert_fail ("!Loads.empty() && \"All the bytes of the value must be loaded from \" \"memory, so there must be at least one load which produces the value\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5247, __extension__ __PRETTY_FUNCTION__))
;
5248 assert(Base && "Base address of the accessed memory location must be set")(static_cast <bool> (Base && "Base address of the accessed memory location must be set"
) ? void (0) : __assert_fail ("Base && \"Base address of the accessed memory location must be set\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5248, __extension__ __PRETTY_FUNCTION__))
;
5249 assert(FirstOffset != INT64_MAX && "First byte offset must be set")(static_cast <bool> (FirstOffset != (9223372036854775807L
) && "First byte offset must be set") ? void (0) : __assert_fail
("FirstOffset != INT64_MAX && \"First byte offset must be set\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5249, __extension__ __PRETTY_FUNCTION__))
;
5250
5251 // Check if the bytes of the OR we are looking at match with either big or
5252 // little endian value load
5253 bool BigEndian = true, LittleEndian = true;
5254 for (unsigned i = 0; i < ByteWidth; i++) {
5255 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
5256 LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
5257 BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
5258 if (!BigEndian && !LittleEndian)
5259 return SDValue();
5260 }
5261 assert((BigEndian != LittleEndian) && "should be either or")(static_cast <bool> ((BigEndian != LittleEndian) &&
"should be either or") ? void (0) : __assert_fail ("(BigEndian != LittleEndian) && \"should be either or\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5261, __extension__ __PRETTY_FUNCTION__))
;
5262 assert(FirstByteProvider && "must be set")(static_cast <bool> (FirstByteProvider && "must be set"
) ? void (0) : __assert_fail ("FirstByteProvider && \"must be set\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5262, __extension__ __PRETTY_FUNCTION__))
;
5263
5264 // Ensure that the first byte is loaded from zero offset of the first load.
5265 // So the combined value can be loaded from the first load address.
5266 if (MemoryByteOffset(*FirstByteProvider) != 0)
5267 return SDValue();
5268 LoadSDNode *FirstLoad = FirstByteProvider->Load;
5269
5270 // The node we are looking at matches with the pattern, check if we can
5271 // replace it with a single load and bswap if needed.
5272
5273 // If the load needs byte swap check if the target supports it
5274 bool NeedsBswap = IsBigEndianTarget != BigEndian;
5275
5276 // Before legalize we can introduce illegal bswaps which will be later
5277 // converted to an explicit bswap sequence. This way we end up with a single
5278 // load and byte shuffling instead of several loads and byte shuffling.
5279 if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
5280 return SDValue();
5281
5282 // Check that a load of the wide type is both allowed and fast on the target
5283 bool Fast = false;
5284 bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
5285 VT, FirstLoad->getAddressSpace(),
5286 FirstLoad->getAlignment(), &Fast);
5287 if (!Allowed || !Fast)
5288 return SDValue();
5289
5290 SDValue NewLoad =
5291 DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
5292 FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
5293
5294 // Transfer chain users from old loads to the new load.
5295 for (LoadSDNode *L : Loads)
5296 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
5297
5298 return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
5299}
5300
5301SDValue DAGCombiner::visitXOR(SDNode *N) {
5302 SDValue N0 = N->getOperand(0);
5303 SDValue N1 = N->getOperand(1);
5304 EVT VT = N0.getValueType();
5305
5306 // fold vector ops
5307 if (VT.isVector()) {
5308 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5309 return FoldedVOp;
5310
5311 // fold (xor x, 0) -> x, vector edition
5312 if (ISD::isBuildVectorAllZeros(N0.getNode()))
5313 return N1;
5314 if (ISD::isBuildVectorAllZeros(N1.getNode()))
5315 return N0;
5316 }
5317
5318 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
5319 if (N0.isUndef() && N1.isUndef())
5320 return DAG.getConstant(0, SDLoc(N), VT);
5321 // fold (xor x, undef) -> undef
5322 if (N0.isUndef())
5323 return N0;
5324 if (N1.isUndef())
5325 return N1;
5326 // fold (xor c1, c2) -> c1^c2
5327 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5328 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
5329 if (N0C && N1C)
5330 return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
5331 // canonicalize constant to RHS
5332 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5333 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5334 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
5335 // fold (xor x, 0) -> x
5336 if (isNullConstant(N1))
5337 return N0;
5338
5339 if (SDValue NewSel = foldBinOpIntoSelect(N))
5340 return NewSel;
5341
5342 // reassociate xor
5343 if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
5344 return RXOR;
5345
5346 // fold !(x cc y) -> (x !cc y)
5347 SDValue LHS, RHS, CC;
5348 if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
5349 bool isInt = LHS.getValueType().isInteger();
5350 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
5351 isInt);
5352
5353 if (!LegalOperations ||
5354 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
5355 switch (N0.getOpcode()) {
5356 default:
5357 llvm_unreachable("Unhandled SetCC Equivalent!")::llvm::llvm_unreachable_internal("Unhandled SetCC Equivalent!"
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5357)
;
5358 case ISD::SETCC:
5359 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
5360 case ISD::SELECT_CC:
5361 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
5362 N0.getOperand(3), NotCC);
5363 }
5364 }
5365 }
5366
5367 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
5368 if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
5369 N0.getNode()->hasOneUse() &&
5370 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
5371 SDValue V = N0.getOperand(0);
5372 SDLoc DL(N0);
5373 V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
5374 DAG.getConstant(1, DL, V.getValueType()));
5375 AddToWorklist(V.getNode());
5376 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
5377 }
5378
5379 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
5380 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
5381 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5382 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5383 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
5384 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5385 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5386 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5387 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5388 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5389 }
5390 }
5391 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
5392 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
5393 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5394 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5395 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
5396 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5397 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5398 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5399 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5400 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5401 }
5402 }
5403 // fold (xor (and x, y), y) -> (and (not x), y)
5404 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5405 N0->getOperand(1) == N1) {
5406 SDValue X = N0->getOperand(0);
5407 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
5408 AddToWorklist(NotX.getNode());
5409 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
5410 }
5411
5412 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
5413 unsigned OpSizeInBits = VT.getScalarSizeInBits();
5414 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
5415 N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
5416 TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
5417 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
5418 if (C->getAPIntValue() == (OpSizeInBits - 1))
5419 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
5420 }
5421
5422 // fold (xor x, x) -> 0
5423 if (N0 == N1)
5424 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
5425
5426 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
5427 // Here is a concrete example of this equivalence:
5428 // i16 x == 14
5429 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
5430 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
5431 //
5432 // =>
5433 //
5434 // i16 ~1 == 0b1111111111111110
5435 // i16 rol(~1, 14) == 0b1011111111111111
5436 //
5437 // Some additional tips to help conceptualize this transform:
5438 // - Try to see the operation as placing a single zero in a value of all ones.
5439 // - There exists no value for x which would allow the result to contain zero.
5440 // - Values of x larger than the bitwidth are undefined and do not require a
5441 // consistent result.
5442 // - Pushing the zero left requires shifting one bits in from the right.
5443 // A rotate left of ~1 is a nice way of achieving the desired result.
5444 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
5445 && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
5446 SDLoc DL(N);
5447 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
5448 N0.getOperand(1));
5449 }
5450
5451 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
5452 if (N0.getOpcode() == N1.getOpcode())
5453 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5454 return Tmp;
5455
5456 // Simplify the expression using non-local knowledge.
5457 if (SimplifyDemandedBits(SDValue(N, 0)))
5458 return SDValue(N, 0);
5459
5460 return SDValue();
5461}
5462
5463/// Handle transforms common to the three shifts, when the shift amount is a
5464/// constant.
5465SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
5466 SDNode *LHS = N->getOperand(0).getNode();
5467 if (!LHS->hasOneUse()) return SDValue();
5468
5469 // We want to pull some binops through shifts, so that we have (and (shift))
5470 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
5471 // thing happens with address calculations, so it's important to canonicalize
5472 // it.
5473 bool HighBitSet = false; // Can we transform this if the high bit is set?
5474
5475 switch (LHS->getOpcode()) {
5476 default: return SDValue();
5477 case ISD::OR:
5478 case ISD::XOR:
5479 HighBitSet = false; // We can only transform sra if the high bit is clear.
5480 break;
5481 case ISD::AND:
5482 HighBitSet = true; // We can only transform sra if the high bit is set.
5483 break;
5484 case ISD::ADD:
5485 if (N->getOpcode() != ISD::SHL)
5486 return SDValue(); // only shl(add) not sr[al](add).
5487 HighBitSet = false; // We can only transform sra if the high bit is clear.
5488 break;
5489 }
5490
5491 // We require the RHS of the binop to be a constant and not opaque as well.
5492 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
5493 if (!BinOpCst) return SDValue();
5494
5495 // FIXME: disable this unless the input to the binop is a shift by a constant
5496 // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
5497 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
5498 bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
5499 BinOpLHSVal->getOpcode() == ISD::SRA ||
5500 BinOpLHSVal->getOpcode() == ISD::SRL;
5501 bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
5502 BinOpLHSVal->getOpcode() == ISD::SELECT;
5503
5504 if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
5505 !isCopyOrSelect)
5506 return SDValue();
5507
5508 if (isCopyOrSelect && N->hasOneUse())
5509 return SDValue();
5510
5511 EVT VT = N->getValueType(0);
5512
5513 // If this is a signed shift right, and the high bit is modified by the
5514 // logical operation, do not perform the transformation. The highBitSet
5515 // boolean indicates the value of the high bit of the constant which would
5516 // cause it to be modified for this operation.
5517 if (N->getOpcode() == ISD::SRA) {
5518 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
5519 if (BinOpRHSSignSet != HighBitSet)
5520 return SDValue();
5521 }
5522
5523 if (!TLI.isDesirableToCommuteWithShift(LHS))
5524 return SDValue();
5525
5526 // Fold the constants, shifting the binop RHS by the shift amount.
5527 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
5528 N->getValueType(0),
5529 LHS->getOperand(1), N->getOperand(1));
5530 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!")(static_cast <bool> (isa<ConstantSDNode>(NewRHS) &&
"Folding was not successful!") ? void (0) : __assert_fail ("isa<ConstantSDNode>(NewRHS) && \"Folding was not successful!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5530, __extension__ __PRETTY_FUNCTION__))
;
5531
5532 // Create the new shift.
5533 SDValue NewShift = DAG.getNode(N->getOpcode(),
5534 SDLoc(LHS->getOperand(0)),
5535 VT, LHS->getOperand(0), N->getOperand(1));
5536
5537 // Create the new binop.
5538 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
5539}
5540
5541SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
5542 assert(N->getOpcode() == ISD::TRUNCATE)(static_cast <bool> (N->getOpcode() == ISD::TRUNCATE
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::TRUNCATE"
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5542, __extension__ __PRETTY_FUNCTION__))
;
5543 assert(N->getOperand(0).getOpcode() == ISD::AND)(static_cast <bool> (N->getOperand(0).getOpcode() ==
ISD::AND) ? void (0) : __assert_fail ("N->getOperand(0).getOpcode() == ISD::AND"
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5543, __extension__ __PRETTY_FUNCTION__))
;
5544
5545 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
5546 if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
5547 SDValue N01 = N->getOperand(0).getOperand(1);
5548 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
5549 SDLoc DL(N);
5550 EVT TruncVT = N->getValueType(0);
5551 SDValue N00 = N->getOperand(0).getOperand(0);
5552 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
5553 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
5554 AddToWorklist(Trunc00.getNode());
5555 AddToWorklist(Trunc01.getNode());
5556 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
5557 }
5558 }
5559
5560 return SDValue();
5561}
5562
5563SDValue DAGCombiner::visitRotate(SDNode *N) {
5564 SDLoc dl(N);
5565 SDValue N0 = N->getOperand(0);
5566 SDValue N1 = N->getOperand(1);
5567 EVT VT = N->getValueType(0);
5568 unsigned Bitsize = VT.getScalarSizeInBits();
5569
5570 // fold (rot x, 0) -> x
5571 if (isNullConstantOrNullSplatConstant(N1))
5572 return N0;
5573
5574 // fold (rot x, c) -> (rot x, c % BitSize)
5575 if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
5576 if (Cst->getAPIntValue().uge(Bitsize)) {
5577 uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
5578 return DAG.getNode(N->getOpcode(), dl, VT, N0,
5579 DAG.getConstant(RotAmt, dl, N1.getValueType()));
5580 }
5581 }
5582
5583 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
5584 if (N1.getOpcode() == ISD::TRUNCATE &&
5585 N1.getOperand(0).getOpcode() == ISD::AND) {
5586 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5587 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
5588 }
5589
5590 unsigned NextOp = N0.getOpcode();
5591 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
5592 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
5593 SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
5594 SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
5595 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
5596 EVT ShiftVT = C1->getValueType(0);
5597 bool SameSide = (N->getOpcode() == NextOp);
5598 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
5599 if (SDValue CombinedShift =
5600 DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
5601 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
5602 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
5603 ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
5604 BitsizeC.getNode());
5605 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
5606 CombinedShiftNorm);
5607 }
5608 }
5609 }
5610 return SDValue();
5611}
5612
5613SDValue DAGCombiner::visitSHL(SDNode *N) {
5614 SDValue N0 = N->getOperand(0);
5615 SDValue N1 = N->getOperand(1);
5616 EVT VT = N0.getValueType();
5617 unsigned OpSizeInBits = VT.getScalarSizeInBits();
5618
5619 // fold vector ops
5620 if (VT.isVector()) {
5621 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5622 return FoldedVOp;
5623
5624 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
5625 // If setcc produces all-one true value then:
5626 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
5627 if (N1CV && N1CV->isConstant()) {
5628 if (N0.getOpcode() == ISD::AND) {
5629 SDValue N00 = N0->getOperand(0);
5630 SDValue N01 = N0->getOperand(1);
5631 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
5632
5633 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
5634 TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
5635 TargetLowering::ZeroOrNegativeOneBooleanContent) {
5636 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
5637 N01CV, N1CV))
5638 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
5639 }
5640 }
5641 }
5642 }
5643
5644 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5645
5646 // fold (shl c1, c2) -> c1<<c2
5647 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5648 if (N0C && N1C && !N1C->isOpaque())
5649 return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
5650 // fold (shl 0, x) -> 0
5651 if (isNullConstantOrNullSplatConstant(N0))
5652 return N0;
5653 // fold (shl x, c >= size(x)) -> undef
5654 // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5655 auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
5656 return Val->getAPIntValue().uge(OpSizeInBits);
5657 };
5658 if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
5659 return DAG.getUNDEF(VT);
5660 // fold (shl x, 0) -> x
5661 if (N1C && N1C->isNullValue())
5662 return N0;
5663 // fold (shl undef, x) -> 0
5664 if (N0.isUndef())
5665 return DAG.getConstant(0, SDLoc(N), VT);
5666
5667 if (SDValue NewSel = foldBinOpIntoSelect(N))
5668 return NewSel;
5669
5670 // if (shl x, c) is known to be zero, return 0
5671 if (DAG.MaskedValueIsZero(SDValue(N, 0),
5672 APInt::getAllOnesValue(OpSizeInBits)))
5673 return DAG.getConstant(0, SDLoc(N), VT);
5674 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
5675 if (N1.getOpcode() == ISD::TRUNCATE &&
5676 N1.getOperand(0).getOpcode() == ISD::AND) {
5677 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5678 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
5679 }
5680
5681 if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5682 return SDValue(N, 0);
5683
5684 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
5685 if (N0.getOpcode() == ISD::SHL) {
5686 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5687 ConstantSDNode *RHS) {
5688 APInt c1 = LHS->getAPIntValue();
5689 APInt c2 = RHS->getAPIntValue();
5690 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5691 return (c1 + c2).uge(OpSizeInBits);
5692 };
5693 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5694 return DAG.getConstant(0, SDLoc(N), VT);
5695
5696 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5697 ConstantSDNode *RHS) {
5698 APInt c1 = LHS->getAPIntValue();
5699 APInt c2 = RHS->getAPIntValue();
5700 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5701 return (c1 + c2).ult(OpSizeInBits);
5702 };
5703 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
5704 SDLoc DL(N);
5705 EVT ShiftVT = N1.getValueType();
5706 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5707 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
5708 }
5709 }
5710
5711 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
5712 // For this to be valid, the second form must not preserve any of the bits
5713 // that are shifted out by the inner shift in the first form. This means
5714 // the outer shift size must be >= the number of bits added by the ext.
5715 // As a corollary, we don't care what kind of ext it is.
5716 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
5717 N0.getOpcode() == ISD::ANY_EXTEND ||
5718 N0.getOpcode() == ISD::SIGN_EXTEND) &&
5719 N0.getOperand(0).getOpcode() == ISD::SHL) {
5720 SDValue N0Op0 = N0.getOperand(0);
5721 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5722 APInt c1 = N0Op0C1->getAPIntValue();
5723 APInt c2 = N1C->getAPIntValue();
5724 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5725
5726 EVT InnerShiftVT = N0Op0.getValueType();
5727 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5728 if (c2.uge(OpSizeInBits - InnerShiftSize)) {
5729 SDLoc DL(N0);
5730 APInt Sum = c1 + c2;
5731 if (Sum.uge(OpSizeInBits))
5732 return DAG.getConstant(0, DL, VT);
5733
5734 return DAG.getNode(
5735 ISD::SHL, DL, VT,
5736 DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
5737 DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5738 }
5739 }
5740 }
5741
5742 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
5743 // Only fold this if the inner zext has no other uses to avoid increasing
5744 // the total number of instructions.
5745 if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
5746 N0.getOperand(0).getOpcode() == ISD::SRL) {
5747 SDValue N0Op0 = N0.getOperand(0);
5748 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5749 if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
5750 uint64_t c1 = N0Op0C1->getZExtValue();
5751 uint64_t c2 = N1C->getZExtValue();
5752 if (c1 == c2) {
5753 SDValue NewOp0 = N0.getOperand(0);
5754 EVT CountVT = NewOp0.getOperand(1).getValueType();
5755 SDLoc DL(N);
5756 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
5757 NewOp0,
5758 DAG.getConstant(c2, DL, CountVT));
5759 AddToWorklist(NewSHL.getNode());
5760 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
5761 }
5762 }
5763 }
5764 }
5765
5766 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
5767 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
5768 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
5769 N0->getFlags().hasExact()) {
5770 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5771 uint64_t C1 = N0C1->getZExtValue();
5772 uint64_t C2 = N1C->getZExtValue();
5773 SDLoc DL(N);
5774 if (C1 <= C2)
5775 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5776 DAG.getConstant(C2 - C1, DL, N1.getValueType()));
5777 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
5778 DAG.getConstant(C1 - C2, DL, N1.getValueType()));
5779 }
5780 }
5781
5782 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
5783 // (and (srl x, (sub c1, c2), MASK)
5784 // Only fold this if the inner shift has no other uses -- if it does, folding
5785 // this will increase the total number of instructions.
5786 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5787 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5788 uint64_t c1 = N0C1->getZExtValue();
5789 if (c1 < OpSizeInBits) {
5790 uint64_t c2 = N1C->getZExtValue();
5791 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
5792 SDValue Shift;
5793 if (c2 > c1) {
5794 Mask <<= c2 - c1;
5795 SDLoc DL(N);
5796 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5797 DAG.getConstant(c2 - c1, DL, N1.getValueType()));
5798 } else {
5799 Mask.lshrInPlace(c1 - c2);
5800 SDLoc DL(N);
5801 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
5802 DAG.getConstant(c1 - c2, DL, N1.getValueType()));
5803 }
5804 SDLoc DL(N0);
5805 return DAG.getNode(ISD::AND, DL, VT, Shift,
5806 DAG.getConstant(Mask, DL, VT));
5807 }
5808 }
5809 }
5810
5811 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
5812 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
5813 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
5814 SDLoc DL(N);
5815 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
5816 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
5817 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
5818 }
5819
5820 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
5821 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
5822 // Variant of version done on multiply, except mul by a power of 2 is turned
5823 // into a shift.
5824 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
5825 N0.getNode()->hasOneUse() &&
5826 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5827 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5828 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
5829 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5830 AddToWorklist(Shl0.getNode());
5831 AddToWorklist(Shl1.getNode());
5832 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
5833 }
5834
5835 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
5836 if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
5837 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5838 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5839 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5840 if (isConstantOrConstantVector(Shl))
5841 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
5842 }
5843
5844 if (N1C && !N1C->isOpaque())
5845 if (SDValue NewSHL = visitShiftByConstant(N, N1C))
5846 return NewSHL;
5847
5848 return SDValue();
5849}
5850
5851SDValue DAGCombiner::visitSRA(SDNode *N) {
5852 SDValue N0 = N->getOperand(0);
5853 SDValue N1 = N->getOperand(1);
5854 EVT VT = N0.getValueType();
5855 unsigned OpSizeInBits = VT.getScalarSizeInBits();
5856
5857 // Arithmetic shifting an all-sign-bit value is a no-op.
5858 // fold (sra 0, x) -> 0
5859 // fold (sra -1, x) -> -1
5860 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
5861 return N0;
5862
5863 // fold vector ops
5864 if (VT.isVector())
5865 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5866 return FoldedVOp;
5867
5868 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5869
5870 // fold (sra c1, c2) -> (sra c1, c2)
5871 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5872 if (N0C && N1C && !N1C->isOpaque())
5873 return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
5874 // fold (sra x, c >= size(x)) -> undef
5875 // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5876 auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
5877 return Val->getAPIntValue().uge(OpSizeInBits);
5878 };
5879 if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
5880 return DAG.getUNDEF(VT);
5881 // fold (sra x, 0) -> x
5882 if (N1C && N1C->isNullValue())
5883 return N0;
5884
5885 if (SDValue NewSel = foldBinOpIntoSelect(N))
5886 return NewSel;
5887
5888 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
5889 // sext_inreg.
5890 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
5891 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
5892 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
5893 if (VT.isVector())
5894 ExtVT = EVT::getVectorVT(*DAG.getContext(),
5895 ExtVT, VT.getVectorNumElements());
5896 if ((!LegalOperations ||
5897 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
5898 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
5899 N0.getOperand(0), DAG.getValueType(ExtVT));
5900 }
5901
5902 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
5903 if (N0.getOpcode() == ISD::SRA) {
5904 SDLoc DL(N);
5905 EVT ShiftVT = N1.getValueType();
5906
5907 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5908 ConstantSDNode *RHS) {
5909 APInt c1 = LHS->getAPIntValue();
5910 APInt c2 = RHS->getAPIntValue();
5911 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5912 return (c1 + c2).uge(OpSizeInBits);
5913 };
5914 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5915 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
5916 DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
5917
5918 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5919 ConstantSDNode *RHS) {
5920 APInt c1 = LHS->getAPIntValue();
5921 APInt c2 = RHS->getAPIntValue();
5922 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5923 return (c1 + c2).ult(OpSizeInBits);
5924 };
5925 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
5926 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5927 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
5928 }
5929 }
5930
5931 // fold (sra (shl X, m), (sub result_size, n))
5932 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
5933 // result_size - n != m.
5934 // If truncate is free for the target sext(shl) is likely to result in better
5935 // code.
5936 if (N0.getOpcode() == ISD::SHL && N1C) {
5937 // Get the two constanst of the shifts, CN0 = m, CN = n.
5938 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
5939 if (N01C) {
5940 LLVMContext &Ctx = *DAG.getContext();
5941 // Determine what the truncate's result bitsize and type would be.
5942 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
5943
5944 if (VT.isVector())
5945 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
5946
5947 // Determine the residual right-shift amount.
5948 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
5949
5950 // If the shift is not a no-op (in which case this should be just a sign
5951 // extend already), the truncated to type is legal, sign_extend is legal
5952 // on that type, and the truncate to that type is both legal and free,
5953 // perform the transform.
5954 if ((ShiftAmt > 0) &&
5955 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
5956 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
5957 TLI.isTruncateFree(VT, TruncVT)) {
5958 SDLoc DL(N);
5959 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
5960 getShiftAmountTy(N0.getOperand(0).getValueType()));
5961 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
5962 N0.getOperand(0), Amt);
5963 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
5964 Shift);
5965 return DAG.getNode(ISD::SIGN_EXTEND, DL,
5966 N->getValueType(0), Trunc);
5967 }
5968 }
5969 }
5970
5971 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
5972 if (N1.getOpcode() == ISD::TRUNCATE &&
5973 N1.getOperand(0).getOpcode() == ISD::AND) {
5974 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5975 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
5976 }
5977
5978 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
5979 // if c1 is equal to the number of bits the trunc removes
5980 if (N0.getOpcode() == ISD::TRUNCATE &&
5981 (N0.getOperand(0).getOpcode() == ISD::SRL ||
5982 N0.getOperand(0).getOpcode() == ISD::SRA) &&
5983 N0.getOperand(0).hasOneUse() &&
5984 N0.getOperand(0).getOperand(1).hasOneUse() &&
5985 N1C) {
5986 SDValue N0Op0 = N0.getOperand(0);
5987 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
5988 unsigned LargeShiftVal = LargeShift->getZExtValue();
5989 EVT LargeVT = N0Op0.getValueType();
5990
5991 if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
5992 SDLoc DL(N);
5993 SDValue Amt =
5994 DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
5995 getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
5996 SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
5997 N0Op0.getOperand(0), Amt);
5998 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
5999 }
6000 }
6001 }
6002
6003 // Simplify, based on bits shifted out of the LHS.
6004 if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6005 return SDValue(N, 0);
6006
6007 // If the sign bit is known to be zero, switch this to a SRL.
6008 if (DAG.SignBitIsZero(N0))
6009 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
6010
6011 if (N1C && !N1C->isOpaque())
6012 if (SDValue NewSRA = visitShiftByConstant(N, N1C))
6013 return NewSRA;
6014
6015 return SDValue();
6016}
6017
6018SDValue DAGCombiner::visitSRL(SDNode *N) {
6019 SDValue N0 = N->getOperand(0);
6020 SDValue N1 = N->getOperand(1);
6021 EVT VT = N0.getValueType();
6022 unsigned OpSizeInBits = VT.getScalarSizeInBits();
6023
6024 // fold vector ops
6025 if (VT.isVector())
6026 if (SDValue FoldedVOp = SimplifyVBinOp(N))
6027 return FoldedVOp;
6028
6029 ConstantSDNode *N1C = isConstOrConstSplat(N1);
6030
6031 // fold (srl c1, c2) -> c1 >>u c2
6032 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6033 if (N0C && N1C && !N1C->isOpaque())
6034 return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
6035 // fold (srl 0, x) -> 0
6036 if (isNullConstantOrNullSplatConstant(N0))
6037 return N0;
6038 // fold (srl x, c >= size(x)) -> undef
6039 // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6040 auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6041 return Val->getAPIntValue().uge(OpSizeInBits);
6042 };
6043 if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
6044 return DAG.getUNDEF(VT);
6045 // fold (srl x, 0) -> x
6046 if (N1C && N1C->isNullValue())
6047 return N0;
6048
6049 if (SDValue NewSel = foldBinOpIntoSelect(N))
6050 return NewSel;
6051
6052 // if (srl x, c) is known to be zero, return 0
6053 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
6054 APInt::getAllOnesValue(OpSizeInBits)))
6055 return DAG.getConstant(0, SDLoc(N), VT);
6056
6057 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
6058 if (N0.getOpcode() == ISD::SRL) {
6059 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6060 ConstantSDNode *RHS) {
6061 APInt c1 = LHS->getAPIntValue();
6062 APInt c2 = RHS->getAPIntValue();
6063 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6064 return (c1 + c2).uge(OpSizeInBits);
6065 };
6066 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6067 return DAG.getConstant(0, SDLoc(N), VT);
6068
6069 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6070 ConstantSDNode *RHS) {
6071 APInt c1 = LHS->getAPIntValue();
6072 APInt c2 = RHS->getAPIntValue();
6073 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6074 return (c1 + c2).ult(OpSizeInBits);
6075 };
6076 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6077 SDLoc DL(N);
6078 EVT ShiftVT = N1.getValueType();
6079 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6080 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
6081 }
6082 }
6083
6084 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
6085 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
6086 N0.getOperand(0).getOpcode() == ISD::SRL) {
6087 if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
6088 uint64_t c1 = N001C->getZExtValue();
6089 uint64_t c2 = N1C->getZExtValue();
6090 EVT InnerShiftVT = N0.getOperand(0).getValueType();
6091 EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
6092 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6093 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
6094 if (c1 + OpSizeInBits == InnerShiftSize) {
6095 SDLoc DL(N0);
6096 if (c1 + c2 >= InnerShiftSize)
6097 return DAG.getConstant(0, DL, VT);
6098 return DAG.getNode(ISD::TRUNCATE, DL, VT,
6099 DAG.getNode(ISD::SRL, DL, InnerShiftVT,
6100 N0.getOperand(0).getOperand(0),
6101 DAG.getConstant(c1 + c2, DL,
6102 ShiftCountVT)));
6103 }
6104 }
6105 }
6106
6107 // fold (srl (shl x, c), c) -> (and x, cst2)
6108 if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
6109 isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
6110 SDLoc DL(N);
6111 SDValue Mask =
6112 DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
6113 AddToWorklist(Mask.getNode());
6114 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
6115 }
6116
6117 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
6118 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6119 // Shifting in all undef bits?
6120 EVT SmallVT = N0.getOperand(0).getValueType();
6121 unsigned BitSize = SmallVT.getScalarSizeInBits();
6122 if (N1C->getZExtValue() >= BitSize)
6123 return DAG.getUNDEF(VT);
6124
6125 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
6126 uint64_t ShiftAmt = N1C->getZExtValue();
6127 SDLoc DL0(N0);
6128 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
6129 N0.getOperand(0),
6130 DAG.getConstant(ShiftAmt, DL0,
6131 getShiftAmountTy(SmallVT)));
6132 AddToWorklist(SmallShift.getNode());
6133 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
6134 SDLoc DL(N);
6135 return DAG.getNode(ISD::AND, DL, VT,
6136 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
6137 DAG.getConstant(Mask, DL, VT));
6138 }
6139 }
6140
6141 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
6142 // bit, which is unmodified by sra.
6143 if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
6144 if (N0.getOpcode() == ISD::SRA)
6145 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
6146 }
6147
6148 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
6149 if (N1C && N0.getOpcode() == ISD::CTLZ &&
6150 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
6151 KnownBits Known;
6152 DAG.computeKnownBits(N0.getOperand(0), Known);
6153
6154 // If any of the input bits are KnownOne, then the input couldn't be all
6155 // zeros, thus the result of the srl will always be zero.
6156 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
6157
6158 // If all of the bits input the to ctlz node are known to be zero, then
6159 // the result of the ctlz is "32" and the result of the shift is one.
6160 APInt UnknownBits = ~Known.Zero;
6161 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
6162
6163 // Otherwise, check to see if there is exactly one bit input to the ctlz.
6164 if (UnknownBits.isPowerOf2()) {
6165 // Okay, we know that only that the single bit specified by UnknownBits
6166 // could be set on input to the CTLZ node. If this bit is set, the SRL
6167 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
6168 // to an SRL/XOR pair, which is likely to simplify more.
6169 unsigned ShAmt = UnknownBits.countTrailingZeros();
6170 SDValue Op = N0.getOperand(0);
6171
6172 if (ShAmt) {
6173 SDLoc DL(N0);
6174 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
6175 DAG.getConstant(ShAmt, DL,
6176 getShiftAmountTy(Op.getValueType())));
6177 AddToWorklist(Op.getNode());
6178 }
6179
6180 SDLoc DL(N);
6181 return DAG.getNode(ISD::XOR, DL, VT,
6182 Op, DAG.getConstant(1, DL, VT));
6183 }
6184 }
6185
6186 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
6187 if (N1.getOpcode() == ISD::TRUNCATE &&
6188 N1.getOperand(0).getOpcode() == ISD::AND) {
6189 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6190 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
6191 }
6192
6193 // fold operands of srl based on knowledge that the low bits are not
6194 // demanded.
6195 if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6196 return SDValue(N, 0);
6197
6198 if (N1C && !N1C->isOpaque())
6199 if (SDValue NewSRL = visitShiftByConstant(N, N1C))
6200 return NewSRL;
6201
6202 // Attempt to convert a srl of a load into a narrower zero-extending load.
6203 if (SDValue NarrowLoad = ReduceLoadWidth(N))
6204 return NarrowLoad;
6205
6206 // Here is a common situation. We want to optimize:
6207 //
6208 // %a = ...
6209 // %b = and i32 %a, 2
6210 // %c = srl i32 %b, 1
6211 // brcond i32 %c ...
6212 //
6213 // into
6214 //
6215 // %a = ...
6216 // %b = and %a, 2
6217 // %c = setcc eq %b, 0
6218 // brcond %c ...
6219 //
6220 // However when after the source operand of SRL is optimized into AND, the SRL
6221 // itself may not be optimized further. Look for it and add the BRCOND into
6222 // the worklist.
6223 if (N->hasOneUse()) {
6224 SDNode *Use = *N->use_begin();
6225 if (Use->getOpcode() == ISD::BRCOND)
6226 AddToWorklist(Use);
6227 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
6228 // Also look pass the truncate.
6229 Use = *Use->use_begin();
6230 if (Use->getOpcode() == ISD::BRCOND)
6231 AddToWorklist(Use);
6232 }
6233 }
6234
6235 return SDValue();
6236}
6237
6238SDValue DAGCombiner::visitABS(SDNode *N) {
6239 SDValue N0 = N->getOperand(0);
6240 EVT VT = N->getValueType(0);
6241
6242 // fold (abs c1) -> c2
6243 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6244 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
6245 // fold (abs (abs x)) -> (abs x)
6246 if (N0.getOpcode() == ISD::ABS)
6247 return N0;
6248 // fold (abs x) -> x iff not-negative
6249 if (DAG.SignBitIsZero(N0))
6250 return N0;
6251 return SDValue();
6252}
6253
6254SDValue DAGCombiner::visitBSWAP(SDNode *N) {
6255 SDValue N0 = N->getOperand(0);
6256 EVT VT = N->getValueType(0);
6257
6258 // fold (bswap c1) -> c2
6259 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6260 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
6261 // fold (bswap (bswap x)) -> x
6262 if (N0.getOpcode() == ISD::BSWAP)
6263 return N0->getOperand(0);
6264 return SDValue();
6265}
6266
6267SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
6268 SDValue N0 = N->getOperand(0);
6269 EVT VT = N->getValueType(0);
6270
6271 // fold (bitreverse c1) -> c2
6272 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6273 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
6274 // fold (bitreverse (bitreverse x)) -> x
6275 if (N0.getOpcode() == ISD::BITREVERSE)
6276 return N0.getOperand(0);
6277 return SDValue();
6278}
6279
6280SDValue DAGCombiner::visitCTLZ(SDNode *N) {
6281 SDValue N0 = N->getOperand(0);
6282 EVT VT = N->getValueType(0);
6283
6284 // fold (ctlz c1) -> c2
6285 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6286 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
6287
6288 // If the value is known never to be zero, switch to the undef version.
6289 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
6290 if (DAG.isKnownNeverZero(N0))
6291 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6292 }
6293
6294 return SDValue();
6295}
6296
6297SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
6298 SDValue N0 = N->getOperand(0);
6299 EVT VT = N->getValueType(0);
6300
6301 // fold (ctlz_zero_undef c1) -> c2
6302 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6303 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6304 return SDValue();
6305}
6306
6307SDValue DAGCombiner::visitCTTZ(SDNode *N) {
6308 SDValue N0 = N->getOperand(0);
6309 EVT VT = N->getValueType(0);
6310
6311 // fold (cttz c1) -> c2
6312 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6313 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
6314
6315 // If the value is known never to be zero, switch to the undef version.
6316 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
6317 if (DAG.isKnownNeverZero(N0))
6318 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6319 }
6320
6321 return SDValue();
6322}
6323
6324SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
6325 SDValue N0 = N->getOperand(0);
6326 EVT VT = N->getValueType(0);
6327
6328 // fold (cttz_zero_undef c1) -> c2
6329 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6330 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6331 return SDValue();
6332}
6333
6334SDValue DAGCombiner::visitCTPOP(SDNode *N) {
6335 SDValue N0 = N->getOperand(0);
6336 EVT VT = N->getValueType(0);
6337
6338 // fold (ctpop c1) -> c2
6339 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6340 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
6341 return SDValue();
6342}
6343
6344/// \brief Generate Min/Max node
6345static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
6346 SDValue RHS, SDValue True, SDValue False,
6347 ISD::CondCode CC, const TargetLowering &TLI,
6348 SelectionDAG &DAG) {
6349 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
6350 return SDValue();
6351
6352 switch (CC) {
6353 case ISD::SETOLT:
6354 case ISD::SETOLE:
6355 case ISD::SETLT:
6356 case ISD::SETLE:
6357 case ISD::SETULT:
6358 case ISD::SETULE: {
6359 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
6360 if (TLI.isOperationLegal(Opcode, VT))
6361 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6362 return SDValue();
6363 }
6364 case ISD::SETOGT:
6365 case ISD::SETOGE:
6366 case ISD::SETGT:
6367 case ISD::SETGE:
6368 case ISD::SETUGT:
6369 case ISD::SETUGE: {
6370 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
6371 if (TLI.isOperationLegal(Opcode, VT))
6372 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6373 return SDValue();
6374 }
6375 default:
6376 return SDValue();
6377 }
6378}
6379
6380SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
6381 SDValue Cond = N->getOperand(0);
6382 SDValue N1 = N->getOperand(1);
6383 SDValue N2 = N->getOperand(2);
6384 EVT VT = N->getValueType(0);
6385 EVT CondVT = Cond.getValueType();
6386 SDLoc DL(N);
6387
6388 if (!VT.isInteger())
6389 return SDValue();
6390
6391 auto *C1 = dyn_cast<ConstantSDNode>(N1);
6392 auto *C2 = dyn_cast<ConstantSDNode>(N2);
6393 if (!C1 || !C2)
6394 return SDValue();
6395
6396 // Only do this before legalization to avoid conflicting with target-specific
6397 // transforms in the other direction (create a select from a zext/sext). There
6398 // is also a target-independent combine here in DAGCombiner in the other
6399 // direction for (select Cond, -1, 0) when the condition is not i1.
6400 if (CondVT == MVT::i1 && !LegalOperations) {
6401 if (C1->isNullValue() && C2->isOne()) {
6402 // select Cond, 0, 1 --> zext (!Cond)
6403 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6404 if (VT != MVT::i1)
6405 NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
6406 return NotCond;
6407 }
6408 if (C1->isNullValue() && C2->isAllOnesValue()) {
6409 // select Cond, 0, -1 --> sext (!Cond)
6410 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6411 if (VT != MVT::i1)
6412 NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
6413 return NotCond;
6414 }
6415 if (C1->isOne() && C2->isNullValue()) {
6416 // select Cond, 1, 0 --> zext (Cond)
6417 if (VT != MVT::i1)
6418 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6419 return Cond;
6420 }
6421 if (C1->isAllOnesValue() && C2->isNullValue()) {
6422 // select Cond, -1, 0 --> sext (Cond)
6423 if (VT != MVT::i1)
6424 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6425 return Cond;
6426 }
6427
6428 // For any constants that differ by 1, we can transform the select into an
6429 // extend and add. Use a target hook because some targets may prefer to
6430 // transform in the other direction.
6431 if (TLI.convertSelectOfConstantsToMath(VT)) {
6432 if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
6433 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6434 if (VT != MVT::i1)
6435 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6436 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6437 }
6438 if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
6439 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6440 if (VT != MVT::i1)
6441 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6442 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6443 }
6444 }
6445
6446 return SDValue();
6447 }
6448
6449 // fold (select Cond, 0, 1) -> (xor Cond, 1)
6450 // We can't do this reliably if integer based booleans have different contents
6451 // to floating point based booleans. This is because we can't tell whether we
6452 // have an integer-based boolean or a floating-point-based boolean unless we
6453 // can find the SETCC that produced it and inspect its operands. This is
6454 // fairly easy if C is the SETCC node, but it can potentially be
6455 // undiscoverable (or not reasonably discoverable). For example, it could be
6456 // in another basic block or it could require searching a complicated
6457 // expression.
6458 if (CondVT.isInteger() &&
6459 TLI.getBooleanContents(false, true) ==
6460 TargetLowering::ZeroOrOneBooleanContent &&
6461 TLI.getBooleanContents(false, false) ==
6462 TargetLowering::ZeroOrOneBooleanContent &&
6463 C1->isNullValue() && C2->isOne()) {
6464 SDValue NotCond =
6465 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
6466 if (VT.bitsEq(CondVT))
6467 return NotCond;
6468 return DAG.getZExtOrTrunc(NotCond, DL, VT);
6469 }
6470
6471 return SDValue();
6472}
6473
6474SDValue DAGCombiner::visitSELECT(SDNode *N) {
6475 SDValue N0 = N->getOperand(0);
6476 SDValue N1 = N->getOperand(1);
6477 SDValue N2 = N->getOperand(2);
6478 EVT VT = N->getValueType(0);
6479 EVT VT0 = N0.getValueType();
6480 SDLoc DL(N);
6481
6482 // fold (select C, X, X) -> X
6483 if (N1 == N2)
6484 return N1;
6485
6486 if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
6487 // fold (select true, X, Y) -> X
6488 // fold (select false, X, Y) -> Y
6489 return !N0C->isNullValue() ? N1 : N2;
6490 }
6491
6492 // fold (select X, X, Y) -> (or X, Y)
6493 // fold (select X, 1, Y) -> (or C, Y)
6494 if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
6495 return DAG.getNode(ISD::OR, DL, VT, N0, N2);
6496
6497 if (SDValue V = foldSelectOfConstants(N))
6498 return V;
6499
6500 // fold (select C, 0, X) -> (and (not C), X)
6501 if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
6502 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6503 AddToWorklist(NOTNode.getNode());
6504 return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
6505 }
6506 // fold (select C, X, 1) -> (or (not C), X)
6507 if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
6508 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6509 AddToWorklist(NOTNode.getNode());
6510 return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
6511 }
6512 // fold (select X, Y, X) -> (and X, Y)
6513 // fold (select X, Y, 0) -> (and X, Y)
6514 if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
6515 return DAG.getNode(ISD::AND, DL, VT, N0, N1);
6516
6517 // If we can fold this based on the true/false value, do so.
6518 if (SimplifySelectOps(N, N1, N2))
6519 return SDValue(N, 0); // Don't revisit N.
6520
6521 if (VT0 == MVT::i1) {
6522 // The code in this block deals with the following 2 equivalences:
6523 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
6524 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
6525 // The target can specify its preferred form with the
6526 // shouldNormalizeToSelectSequence() callback. However we always transform
6527 // to the right anyway if we find the inner select exists in the DAG anyway
6528 // and we always transform to the left side if we know that we can further
6529 // optimize the combination of the conditions.
6530 bool normalizeToSequence =
6531 TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
6532 // select (and Cond0, Cond1), X, Y
6533 // -> select Cond0, (select Cond1, X, Y), Y
6534 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
6535 SDValue Cond0 = N0->getOperand(0);
6536 SDValue Cond1 = N0->getOperand(1);
6537 SDValue InnerSelect =
6538 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6539 if (normalizeToSequence || !InnerSelect.use_empty())
6540 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
6541 InnerSelect, N2);
6542 }
6543 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
6544 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
6545 SDValue Cond0 = N0->getOperand(0);
6546 SDValue Cond1 = N0->getOperand(1);
6547 SDValue InnerSelect =
6548 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6549 if (normalizeToSequence || !InnerSelect.use_empty())
6550 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
6551 InnerSelect);
6552 }
6553
6554 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
6555 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
6556 SDValue N1_0 = N1->getOperand(0);
6557 SDValue N1_1 = N1->getOperand(1);
6558 SDValue N1_2 = N1->getOperand(2);
6559 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
6560 // Create the actual and node if we can generate good code for it.
6561 if (!normalizeToSequence) {
6562 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
6563 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
6564 }
6565 // Otherwise see if we can optimize the "and" to a better pattern.
6566 if (SDValue Combined = visitANDLike(N0, N1_0, N))
6567 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
6568 N2);
6569 }
6570 }
6571 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
6572 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
6573 SDValue N2_0 = N2->getOperand(0);
6574 SDValue N2_1 = N2->getOperand(1);
6575 SDValue N2_2 = N2->getOperand(2);
6576 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
6577 // Create the actual or node if we can generate good code for it.
6578 if (!normalizeToSequence) {
6579 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
6580 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
6581 }
6582 // Otherwise see if we can optimize to a better pattern.
6583 if (SDValue Combined = visitORLike(N0, N2_0, N))
6584 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
6585 N2_2);
6586 }
6587 }
6588 }
6589
6590 // select (xor Cond, 1), X, Y -> select Cond, Y, X
6591 if (VT0 == MVT::i1) {
6592 if (N0->getOpcode() == ISD::XOR) {
6593 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
6594 SDValue Cond0 = N0->getOperand(0);
6595 if (C->isOne())
6596 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1);
6597 }
6598 }
6599 }
6600
6601 // fold selects based on a setcc into other things, such as min/max/abs
6602 if (N0.getOpcode() == ISD::SETCC) {
6603 // select x, y (fcmp lt x, y) -> fminnum x, y
6604 // select x, y (fcmp gt x, y) -> fmaxnum x, y
6605 //
6606 // This is OK if we don't care about what happens if either operand is a
6607 // NaN.
6608 //
6609
6610 // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
6611 // no signed zeros as well as no nans.
6612 const TargetOptions &Options = DAG.getTarget().Options;
6613 if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
6614 DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
6615 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6616
6617 if (SDValue FMinMax = combineMinNumMaxNum(
6618 DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
6619 return FMinMax;
6620 }
6621
6622 if ((!LegalOperations &&
6623 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
6624 TLI.isOperationLegal(ISD::SELECT_CC, VT))
6625 return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
6626 N0.getOperand(1), N1, N2, N0.getOperand(2));
6627 return SimplifySelect(DL, N0, N1, N2);
6628 }
6629
6630 return SDValue();
6631}
6632
6633static
6634std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
6635 SDLoc DL(N);
6636 EVT LoVT, HiVT;
6637 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
6638
6639 // Split the inputs.
6640 SDValue Lo, Hi, LL, LH, RL, RH;
6641 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
6642 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
6643
6644 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
6645 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
6646
6647 return std::make_pair(Lo, Hi);
6648}
6649
6650// This function assumes all the vselect's arguments are CONCAT_VECTOR
6651// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
6652static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
6653 SDLoc DL(N);
6654 SDValue Cond = N->getOperand(0);
6655 SDValue LHS = N->getOperand(1);
6656 SDValue RHS = N->getOperand(2);
6657 EVT VT = N->getValueType(0);
6658 int NumElems = VT.getVectorNumElements();
6659 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&(static_cast <bool> (LHS.getOpcode() == ISD::CONCAT_VECTORS
&& RHS.getOpcode() == ISD::CONCAT_VECTORS &&
Cond.getOpcode() == ISD::BUILD_VECTOR) ? void (0) : __assert_fail
("LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6661, __extension__ __PRETTY_FUNCTION__))
6660 RHS.getOpcode() == ISD::CONCAT_VECTORS &&(static_cast <bool> (LHS.getOpcode() == ISD::CONCAT_VECTORS
&& RHS.getOpcode() == ISD::CONCAT_VECTORS &&
Cond.getOpcode() == ISD::BUILD_VECTOR) ? void (0) : __assert_fail
("LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6661, __extension__ __PRETTY_FUNCTION__))
6661 Cond.getOpcode() == ISD::BUILD_VECTOR)(static_cast <bool> (LHS.getOpcode() == ISD::CONCAT_VECTORS
&& RHS.getOpcode() == ISD::CONCAT_VECTORS &&
Cond.getOpcode() == ISD::BUILD_VECTOR) ? void (0) : __assert_fail
("LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6661, __extension__ __PRETTY_FUNCTION__))
;
6662
6663 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
6664 // binary ones here.
6665 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
6666 return SDValue();
6667
6668 // We're sure we have an even number of elements due to the
6669 // concat_vectors we have as arguments to vselect.
6670 // Skip BV elements until we find one that's not an UNDEF
6671 // After we find an UNDEF element, keep looping until we get to half the
6672 // length of the BV and see if all the non-undef nodes are the same.
6673 ConstantSDNode *BottomHalf = nullptr;
6674 for (int i = 0; i < NumElems / 2; ++i) {
6675 if (Cond->getOperand(i)->isUndef())
6676 continue;
6677
6678 if (BottomHalf == nullptr)
6679 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6680 else if (Cond->getOperand(i).getNode() != BottomHalf)
6681 return SDValue();
6682 }
6683
6684 // Do the same for the second half of the BuildVector
6685 ConstantSDNode *TopHalf = nullptr;
6686 for (int i = NumElems / 2; i < NumElems; ++i) {
6687 if (Cond->getOperand(i)->isUndef())
6688 continue;
6689
6690 if (TopHalf == nullptr)
6691 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6692 else if (Cond->getOperand(i).getNode() != TopHalf)
6693 return SDValue();
6694 }
6695
6696 assert(TopHalf && BottomHalf &&(static_cast <bool> (TopHalf && BottomHalf &&
"One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? void (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6698, __extension__ __PRETTY_FUNCTION__))
6697 "One half of the selector was all UNDEFs and the other was all the "(static_cast <bool> (TopHalf && BottomHalf &&
"One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? void (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6698, __extension__ __PRETTY_FUNCTION__))
6698 "same value. This should have been addressed before this function.")(static_cast <bool> (TopHalf && BottomHalf &&
"One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? void (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6698, __extension__ __PRETTY_FUNCTION__))
;
6699 return DAG.getNode(
6700 ISD::CONCAT_VECTORS, DL, VT,
6701 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
6702 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
6703}
6704
6705SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
6706 if (Level >= AfterLegalizeTypes)
6707 return SDValue();
6708
6709 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
6710 SDValue Mask = MSC->getMask();
6711 SDValue Data = MSC->getValue();
6712 SDLoc DL(N);
6713
6714 // If the MSCATTER data type requires splitting and the mask is provided by a
6715 // SETCC, then split both nodes and its operands before legalization. This
6716 // prevents the type legalizer from unrolling SETCC into scalar comparisons
6717 // and enables future optimizations (e.g. min/max pattern matching on X86).
6718 if (Mask.getOpcode() != ISD::SETCC)
6719 return SDValue();
6720
6721 // Check if any splitting is required.
6722 if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
6723 TargetLowering::TypeSplitVector)
6724 return SDValue();
6725 SDValue MaskLo, MaskHi, Lo, Hi;
6726 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6727
6728 EVT LoVT, HiVT;
6729 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
6730
6731 SDValue Chain = MSC->getChain();
6732
6733 EVT MemoryVT = MSC->getMemoryVT();
6734 unsigned Alignment = MSC->getOriginalAlignment();
6735
6736 EVT LoMemVT, HiMemVT;
6737 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6738
6739 SDValue DataLo, DataHi;
6740 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6741
6742 SDValue Scale = MSC->getScale();
6743 SDValue BasePtr = MSC->getBasePtr();
6744 SDValue IndexLo, IndexHi;
6745 std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
6746
6747 MachineMemOperand *MMO = DAG.getMachineFunction().
6748 getMachineMemOperand(MSC->getPointerInfo(),
6749 MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
6750 Alignment, MSC->getAAInfo(), MSC->getRanges());
6751
6752 SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
6753 Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
6754 DL, OpsLo, MMO);
6755
6756 SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale };
6757 Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
6758 DL, OpsHi, MMO);
6759
6760 AddToWorklist(Lo.getNode());
6761 AddToWorklist(Hi.getNode());
6762
6763 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6764}
6765
6766SDValue DAGCombiner::visitMSTORE(SDNode *N) {
6767 if (Level >= AfterLegalizeTypes)
6768 return SDValue();
6769
6770 MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
6771 SDValue Mask = MST->getMask();
6772 SDValue Data = MST->getValue();
6773 EVT VT = Data.getValueType();
6774 SDLoc DL(N);
6775
6776 // If the MSTORE data type requires splitting and the mask is provided by a
6777 // SETCC, then split both nodes and its operands before legalization. This
6778 // prevents the type legalizer from unrolling SETCC into scalar comparisons
6779 // and enables future optimizations (e.g. min/max pattern matching on X86).
6780 if (Mask.getOpcode() == ISD::SETCC) {
6781 // Check if any splitting is required.
6782 if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6783 TargetLowering::TypeSplitVector)
6784 return SDValue();
6785
6786 SDValue MaskLo, MaskHi, Lo, Hi;
6787 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6788
6789 SDValue Chain = MST->getChain();
6790 SDValue Ptr = MST->getBasePtr();
6791
6792 EVT MemoryVT = MST->getMemoryVT();
6793 unsigned Alignment = MST->getOriginalAlignment();
6794
6795 // if Alignment is equal to the vector size,
6796 // take the half of it for the second part
6797 unsigned SecondHalfAlignment =
6798 (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
6799
6800 EVT LoMemVT, HiMemVT;
6801 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6802
6803 SDValue DataLo, DataHi;
6804 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6805
6806 MachineMemOperand *MMO = DAG.getMachineFunction().
6807 getMachineMemOperand(MST->getPointerInfo(),
6808 MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
6809 Alignment, MST->getAAInfo(), MST->getRanges());
6810
6811 Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
6812 MST->isTruncatingStore(),
6813 MST->isCompressingStore());
6814
6815 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6816 MST->isCompressingStore());
6817 unsigned HiOffset = LoMemVT.getStoreSize();
6818
6819 MMO = DAG.getMachineFunction().getMachineMemOperand(
6820 MST->getPointerInfo().getWithOffset(HiOffset),
6821 MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
6822 MST->getAAInfo(), MST->getRanges());
6823
6824 Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
6825 MST->isTruncatingStore(),
6826 MST->isCompressingStore());
6827
6828 AddToWorklist(Lo.getNode());
6829 AddToWorklist(Hi.getNode());
6830
6831 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6832 }
6833 return SDValue();
6834}
6835
6836SDValue DAGCombiner::visitMGATHER(SDNode *N) {
6837 if (Level >= AfterLegalizeTypes)
6838 return SDValue();
6839
6840 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
6841 SDValue Mask = MGT->getMask();
6842 SDLoc DL(N);
6843
6844 // If the MGATHER result requires splitting and the mask is provided by a
6845 // SETCC, then split both nodes and its operands before legalization. This
6846 // prevents the type legalizer from unrolling SETCC into scalar comparisons
6847 // and enables future optimizations (e.g. min/max pattern matching on X86).
6848
6849 if (Mask.getOpcode() != ISD::SETCC)
6850 return SDValue();
6851
6852 EVT VT = N->getValueType(0);
6853
6854 // Check if any splitting is required.
6855 if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6856 TargetLowering::TypeSplitVector)
6857 return SDValue();
6858
6859 SDValue MaskLo, MaskHi, Lo, Hi;
6860 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6861
6862 SDValue Src0 = MGT->getValue();
6863 SDValue Src0Lo, Src0Hi;
6864 std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6865
6866 EVT LoVT, HiVT;
6867 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
6868
6869 SDValue Chain = MGT->getChain();
6870 EVT MemoryVT = MGT->getMemoryVT();
6871 unsigned Alignment = MGT->getOriginalAlignment();
6872
6873 EVT LoMemVT, HiMemVT;
6874 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6875
6876 SDValue Scale = MGT->getScale();
6877 SDValue BasePtr = MGT->getBasePtr();
6878 SDValue Index = MGT->getIndex();
6879 SDValue IndexLo, IndexHi;
6880 std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
6881
6882 MachineMemOperand *MMO = DAG.getMachineFunction().
6883 getMachineMemOperand(MGT->getPointerInfo(),
6884 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
6885 Alignment, MGT->getAAInfo(), MGT->getRanges());
6886
6887 SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo, Scale };
6888 Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
6889 MMO);
6890
6891 SDValue OpsHi[] = { Chain, Src0Hi, MaskHi, BasePtr, IndexHi, Scale };
6892 Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
6893 MMO);
6894
6895 AddToWorklist(Lo.getNode());
6896 AddToWorklist(Hi.getNode());
6897
6898 // Build a factor node to remember that this load is independent of the
6899 // other one.
6900 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6901 Hi.getValue(1));
6902
6903 // Legalized the chain result - switch anything that used the old chain to
6904 // use the new one.
6905 DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
6906
6907 SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6908
6909 SDValue RetOps[] = { GatherRes, Chain };
6910 return DAG.getMergeValues(RetOps, DL);
6911}
6912
6913SDValue DAGCombiner::visitMLOAD(SDNode *N) {
6914 if (Level >= AfterLegalizeTypes)
6915 return SDValue();
6916
6917 MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
6918 SDValue Mask = MLD->getMask();
6919 SDLoc DL(N);
6920
6921 // If the MLOAD result requires splitting and the mask is provided by a
6922 // SETCC, then split both nodes and its operands before legalization. This
6923 // prevents the type legalizer from unrolling SETCC into scalar comparisons
6924 // and enables future optimizations (e.g. min/max pattern matching on X86).
6925 if (Mask.getOpcode() == ISD::SETCC) {
6926 EVT VT = N->getValueType(0);
6927
6928 // Check if any splitting is required.
6929 if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6930 TargetLowering::TypeSplitVector)
6931 return SDValue();
6932
6933 SDValue MaskLo, MaskHi, Lo, Hi;
6934 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6935
6936 SDValue Src0 = MLD->getSrc0();
6937 SDValue Src0Lo, Src0Hi;
6938 std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6939
6940 EVT LoVT, HiVT;
6941 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
6942
6943 SDValue Chain = MLD->getChain();
6944 SDValue Ptr = MLD->getBasePtr();
6945 EVT MemoryVT = MLD->getMemoryVT();
6946 unsigned Alignment = MLD->getOriginalAlignment();
6947
6948 // if Alignment is equal to the vector size,
6949 // take the half of it for the second part
6950 unsigned SecondHalfAlignment =
6951 (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
6952 Alignment/2 : Alignment;
6953
6954 EVT LoMemVT, HiMemVT;
6955 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6956
6957 MachineMemOperand *MMO = DAG.getMachineFunction().
6958 getMachineMemOperand(MLD->getPointerInfo(),
6959 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
6960 Alignment, MLD->getAAInfo(), MLD->getRanges());
6961
6962 Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
6963 ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6964
6965 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6966 MLD->isExpandingLoad());
6967 unsigned HiOffset = LoMemVT.getStoreSize();
6968
6969 MMO = DAG.getMachineFunction().getMachineMemOperand(
6970 MLD->getPointerInfo().getWithOffset(HiOffset),
6971 MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
6972 MLD->getAAInfo(), MLD->getRanges());
6973
6974 Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
6975 ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6976
6977 AddToWorklist(Lo.getNode());
6978 AddToWorklist(Hi.getNode());
6979
6980 // Build a factor node to remember that this load is independent of the
6981 // other one.
6982 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6983 Hi.getValue(1));
6984
6985 // Legalized the chain result - switch anything that used the old chain to
6986 // use the new one.
6987 DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
6988
6989 SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6990
6991 SDValue RetOps[] = { LoadRes, Chain };
6992 return DAG.getMergeValues(RetOps, DL);
6993 }
6994 return SDValue();
6995}
6996
6997/// A vector select of 2 constant vectors can be simplified to math/logic to
6998/// avoid a variable select instruction and possibly avoid constant loads.
6999SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
7000 SDValue Cond = N->getOperand(0);
7001 SDValue N1 = N->getOperand(1);
7002 SDValue N2 = N->getOperand(2);
7003 EVT VT = N->getValueType(0);
7004 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
7005 !TLI.convertSelectOfConstantsToMath(VT) ||
7006 !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
7007 !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
7008 return SDValue();
7009
7010 // Check if we can use the condition value to increment/decrement a single
7011 // constant value. This simplifies a select to an add and removes a constant
7012 // load/materialization from the general case.
7013 bool AllAddOne = true;
7014 bool AllSubOne = true;
7015 unsigned Elts = VT.getVectorNumElements();
7016 for (unsigned i = 0; i != Elts; ++i) {
7017 SDValue N1Elt = N1.getOperand(i);
7018 SDValue N2Elt = N2.getOperand(i);
7019 if (N1Elt.isUndef() || N2Elt.isUndef())
7020 continue;
7021
7022 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
7023 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
7024 if (C1 != C2 + 1)
7025 AllAddOne = false;
7026 if (C1 != C2 - 1)
7027 AllSubOne = false;
7028 }
7029
7030 // Further simplifications for the extra-special cases where the constants are
7031 // all 0 or all -1 should be implemented as folds of these patterns.
7032 SDLoc DL(N);
7033 if (AllAddOne || AllSubOne) {
7034 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
7035 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
7036 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
7037 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
7038 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
7039 }
7040
7041 // The general case for select-of-constants:
7042 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
7043 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
7044 // leave that to a machine-specific pass.
7045 return SDValue();
7046}
7047
7048SDValue DAGCombiner::visitVSELECT(SDNode *N) {
7049 SDValue N0 = N->getOperand(0);
7050 SDValue N1 = N->getOperand(1);
7051 SDValue N2 = N->getOperand(2);
7052 SDLoc DL(N);
7053
7054 // fold (vselect C, X, X) -> X
7055 if (N1 == N2)
7056 return N1;
7057
7058 // Canonicalize integer abs.
7059 // vselect (setg[te] X, 0), X, -X ->
7060 // vselect (setgt X, -1), X, -X ->
7061 // vselect (setl[te] X, 0), -X, X ->
7062 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
7063 if (N0.getOpcode() == ISD::SETCC) {
7064 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
7065 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7066 bool isAbs = false;
7067 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
7068
7069 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
7070 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
7071 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
7072 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
7073 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
7074 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
7075 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7076
7077 if (isAbs) {
7078 EVT VT = LHS.getValueType();
7079 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
7080 return DAG.getNode(ISD::ABS, DL, VT, LHS);
7081
7082 SDValue Shift = DAG.getNode(
7083 ISD::SRA, DL, VT, LHS,
7084 DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
7085 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
7086 AddToWorklist(Shift.getNode());
7087 AddToWorklist(Add.getNode());
7088 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
7089 }
7090 }
7091
7092 if (SimplifySelectOps(N, N1, N2))
7093 return SDValue(N, 0); // Don't revisit N.
7094
7095 // Fold (vselect (build_vector all_ones), N1, N2) -> N1
7096 if (ISD::isBuildVectorAllOnes(N0.getNode()))
7097 return N1;
7098 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
7099 if (ISD::isBuildVectorAllZeros(N0.getNode()))
7100 return N2;
7101
7102 // The ConvertSelectToConcatVector function is assuming both the above
7103 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
7104 // and addressed.
7105 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
7106 N2.getOpcode() == ISD::CONCAT_VECTORS &&
7107 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
7108 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
7109 return CV;
7110 }
7111
7112 if (SDValue V = foldVSelectOfConstants(N))
7113 return V;
7114
7115 return SDValue();
7116}
7117
7118SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
7119 SDValue N0 = N->getOperand(0);
7120 SDValue N1 = N->getOperand(1);
7121 SDValue N2 = N->getOperand(2);
7122 SDValue N3 = N->getOperand(3);
7123 SDValue N4 = N->getOperand(4);
7124 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
7125
7126 // fold select_cc lhs, rhs, x, x, cc -> x
7127 if (N2 == N3)
7128 return N2;
7129
7130 // Determine if the condition we're dealing with is constant
7131 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
7132 CC, SDLoc(N), false)) {
7133 AddToWorklist(SCC.getNode());
7134
7135 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
7136 if (!SCCC->isNullValue())
7137 return N2; // cond always true -> true val
7138 else
7139 return N3; // cond always false -> false val
7140 } else if (SCC->isUndef()) {
7141 // When the condition is UNDEF, just return the first operand. This is
7142 // coherent the DAG creation, no setcc node is created in this case
7143 return N2;
7144 } else if (SCC.getOpcode() == ISD::SETCC) {
7145 // Fold to a simpler select_cc
7146 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
7147 SCC.getOperand(0), SCC.getOperand(1), N2, N3,
7148 SCC.getOperand(2));
7149 }
7150 }
7151
7152 // If we can fold this based on the true/false value, do so.
7153 if (SimplifySelectOps(N, N2, N3))
7154 return SDValue(N, 0); // Don't revisit N.
7155
7156 // fold select_cc into other things, such as min/max/abs
7157 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
7158}
7159
7160SDValue DAGCombiner::visitSETCC(SDNode *N) {
7161 // setcc is very commonly used as an argument to brcond. This pattern
7162 // also lend itself to numerous combines and, as a result, it is desired
7163 // we keep the argument to a brcond as a setcc as much as possible.
7164 bool PreferSetCC =
7165 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
7166
7167 SDValue Combined = SimplifySetCC(
7168 N->getValueType(0), N->getOperand(0), N->getOperand(1),
7169 cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
7170
7171 if (!Combined)
7172 return SDValue();
7173
7174 // If we prefer to have a setcc, and we don't, we'll try our best to
7175 // recreate one using rebuildSetCC.
7176 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
7177 SDValue NewSetCC = rebuildSetCC(Combined);
7178
7179 // We don't have anything interesting to combine to.
7180 if (NewSetCC.getNode() == N)
7181 return SDValue();
7182
7183 if (NewSetCC)
7184 return NewSetCC;
7185 }
7186
7187 return Combined;
7188}
7189
7190SDValue DAGCombiner::visitSETCCE(SDNode *N) {
7191 SDValue LHS = N->getOperand(0);
7192 SDValue RHS = N->getOperand(1);
7193 SDValue Carry = N->getOperand(2);
7194 SDValue Cond = N->getOperand(3);
7195
7196 // If Carry is false, fold to a regular SETCC.
7197 if (Carry.getOpcode() == ISD::CARRY_FALSE)
7198 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
7199
7200 return SDValue();
7201}
7202
7203SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
7204 SDValue LHS = N->getOperand(0);
7205 SDValue RHS = N->getOperand(1);
7206 SDValue Carry = N->getOperand(2);
7207 SDValue Cond = N->getOperand(3);
7208
7209 // If Carry is false, fold to a regular SETCC.
7210 if (isNullConstant(Carry))
7211 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
7212
7213 return SDValue();
7214}
7215
7216/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
7217/// a build_vector of constants.
7218/// This function is called by the DAGCombiner when visiting sext/zext/aext
7219/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
7220/// Vector extends are not folded if operations are legal; this is to
7221/// avoid introducing illegal build_vector dag nodes.
7222static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
7223 SelectionDAG &DAG, bool LegalTypes,
7224 bool LegalOperations) {
7225 unsigned Opcode = N->getOpcode();
7226 SDValue N0 = N->getOperand(0);
7227 EVT VT = N->getValueType(0);
7228
7229 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
) && "Expected EXTEND dag node in input!") ? void (0)
: __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7232, __extension__ __PRETTY_FUNCTION__))
7230 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
) && "Expected EXTEND dag node in input!") ? void (0)
: __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7232, __extension__ __PRETTY_FUNCTION__))
7231 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
) && "Expected EXTEND dag node in input!") ? void (0)
: __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7232, __extension__ __PRETTY_FUNCTION__))
7232 && "Expected EXTEND dag node in input!")(static_cast <bool> ((Opcode == ISD::SIGN_EXTEND || Opcode
== ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode ==
ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG
) && "Expected EXTEND dag node in input!") ? void (0)
: __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7232, __extension__ __PRETTY_FUNCTION__))
;
7233
7234 // fold (sext c1) -> c1
7235 // fold (zext c1) -> c1
7236 // fold (aext c1) -> c1
7237 if (isa<ConstantSDNode>(N0))
7238 return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
7239
7240 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
7241 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
7242 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
7243 EVT SVT = VT.getScalarType();
7244 if (!(VT.isVector() &&
7245 (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
7246 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
7247 return nullptr;
7248
7249 // We can fold this node into a build_vector.
7250 unsigned VTBits = SVT.getSizeInBits();
7251 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
7252 SmallVector<SDValue, 8> Elts;
7253 unsigned NumElts = VT.getVectorNumElements();
7254 SDLoc DL(N);
7255
7256 for (unsigned i=0; i != NumElts; ++i) {
7257 SDValue Op = N0->getOperand(i);
7258 if (Op->isUndef()) {
7259 Elts.push_back(DAG.getUNDEF(SVT));
7260 continue;
7261 }
7262
7263 SDLoc DL(Op);
7264 // Get the constant value and if needed trunc it to the size of the type.
7265 // Nodes like build_vector might have constants wider than the scalar type.
7266 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
7267 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
7268 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
7269 else
7270 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
7271 }
7272
7273 return DAG.getBuildVector(VT, DL, Elts).getNode();
7274}
7275
7276// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
7277// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
7278// transformation. Returns true if extension are possible and the above
7279// mentioned transformation is profitable.
7280static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
7281 unsigned ExtOpc,
7282 SmallVectorImpl<SDNode *> &ExtendNodes,
7283 const TargetLowering &TLI) {
7284 bool HasCopyToRegUses = false;
7285 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
7286 for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
7287 UE = N0.getNode()->use_end();
7288 UI != UE; ++UI) {
7289 SDNode *User = *UI;
7290 if (User == N)
7291 continue;
7292 if (UI.getUse().getResNo() != N0.getResNo())
7293 continue;
7294 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
7295 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
7296 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
7297 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
7298 // Sign bits will be lost after a zext.
7299 return false;
7300 bool Add = false;
7301 for (unsigned i = 0; i != 2; ++i) {
7302 SDValue UseOp = User->getOperand(i);
7303 if (UseOp == N0)
7304 continue;
7305 if (!isa<ConstantSDNode>(UseOp))
7306 return false;
7307 Add = true;
7308 }
7309 if (Add)
7310 ExtendNodes.push_back(User);
7311 continue;
7312 }
7313 // If truncates aren't free and there are users we can't
7314 // extend, it isn't worthwhile.
7315 if (!isTruncFree)
7316 return false;
7317 // Remember if this value is live-out.
7318 if (User->getOpcode() == ISD::CopyToReg)
7319 HasCopyToRegUses = true;
7320 }
7321
7322 if (HasCopyToRegUses) {
7323 bool BothLiveOut = false;
7324 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
7325 UI != UE; ++UI) {
7326 SDUse &Use = UI.getUse();
7327 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
7328 BothLiveOut = true;
7329 break;
7330 }
7331 }
7332 if (BothLiveOut)
7333 // Both unextended and extended values are live out. There had better be
7334 // a good reason for the transformation.
7335 return ExtendNodes.size();
7336 }
7337 return true;
7338}
7339
7340void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
7341 SDValue OrigLoad, SDValue ExtLoad,
7342 const SDLoc &DL, ISD::NodeType ExtType) {
7343 // Extend SetCC uses if necessary.
7344 for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
7345 SDNode *SetCC = SetCCs[i];
7346 SmallVector<SDValue, 4> Ops;
7347
7348 for (unsigned j = 0; j != 2; ++j) {
7349 SDValue SOp = SetCC->getOperand(j);
7350 if (SOp == OrigLoad)
7351 Ops.push_back(ExtLoad);
7352 else
7353 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
7354 }
7355
7356 Ops.push_back(SetCC->getOperand(2));
7357 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
7358 }
7359}
7360
7361// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
7362SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
7363 SDValue N0 = N->getOperand(0);
7364 EVT DstVT = N->getValueType(0);
7365 EVT SrcVT = N0.getValueType();
7366
7367 assert((N->getOpcode() == ISD::SIGN_EXTEND ||(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7369, __extension__ __PRETTY_FUNCTION__))
7368 N->getOpcode() == ISD::ZERO_EXTEND) &&(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7369, __extension__ __PRETTY_FUNCTION__))
7369 "Unexpected node type (not an extend)!")(static_cast <bool> ((N->getOpcode() == ISD::SIGN_EXTEND
|| N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7369, __extension__ __PRETTY_FUNCTION__))
;
7370
7371 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
7372 // For example, on a target with legal v4i32, but illegal v8i32, turn:
7373 // (v8i32 (sext (v8i16 (load x))))
7374 // into:
7375 // (v8i32 (concat_vectors (v4i32 (sextload x)),
7376 // (v4i32 (sextload (x + 16)))))
7377 // Where uses of the original load, i.e.:
7378 // (v8i16 (load x))
7379 // are replaced with:
7380 // (v8i16 (truncate
7381 // (v8i32 (concat_vectors (v4i32 (sextload x)),
7382 // (v4i32 (sextload (x + 16)))))))
7383 //
7384 // This combine is only applicable to illegal, but splittable, vectors.
7385 // All legal types, and illegal non-vector types, are handled elsewhere.
7386 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
7387 //
7388 if (N0->getOpcode() != ISD::LOAD)
7389 return SDValue();
7390
7391 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7392
7393 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
7394 !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
7395 !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
7396 return SDValue();
7397
7398 SmallVector<SDNode *, 4> SetCCs;
7399 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
7400 return SDValue();
7401
7402 ISD::LoadExtType ExtType =
7403 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
7404
7405 // Try to split the vector types to get down to legal types.
7406 EVT SplitSrcVT = SrcVT;
7407 EVT SplitDstVT = DstVT;
7408 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
7409 SplitSrcVT.getVectorNumElements() > 1) {
7410 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
7411 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
7412 }
7413
7414 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
7415 return SDValue();
7416
7417 SDLoc DL(N);
7418 const unsigned NumSplits =
7419 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
7420 const unsigned Stride = SplitSrcVT.getStoreSize();
7421 SmallVector<SDValue, 4> Loads;
7422 SmallVector<SDValue, 4> Chains;
7423
7424 SDValue BasePtr = LN0->getBasePtr();
7425 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
7426 const unsigned Offset = Idx * Stride;
7427 const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
7428
7429 SDValue SplitLoad = DAG.getExtLoad(
7430 ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
7431 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
7432 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7433
7434 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
7435 DAG.getConstant(Stride, DL, BasePtr.getValueType()));
7436
7437 Loads.push_back(SplitLoad.getValue(0));
7438 Chains.push_back(SplitLoad.getValue(1));
7439 }
7440
7441 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
7442 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
7443
7444 // Simplify TF.
7445 AddToWorklist(NewChain.getNode());
7446
7447 CombineTo(N, NewValue);
7448
7449 // Replace uses of the original load (before extension)
7450 // with a truncate of the concatenated sextloaded vectors.
7451 SDValue Trunc =
7452 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
7453 ExtendSetCCUses(SetCCs, N0, NewValue, DL,
7454 (ISD::NodeType)N->getOpcode());
7455 CombineTo(N0.getNode(), Trunc, NewChain);
7456 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7457}
7458
7459/// If we're narrowing or widening the result of a vector select and the final
7460/// size is the same size as a setcc (compare) feeding the select, then try to
7461/// apply the cast operation to the select's operands because matching vector
7462/// sizes for a select condition and other operands should be more efficient.
7463SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
7464 unsigned CastOpcode = Cast->getOpcode();
7465 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||(static_cast <bool> ((CastOpcode == ISD::SIGN_EXTEND ||
CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE
|| CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND
) && "Unexpected opcode for vector select narrowing/widening"
) ? void (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7468, __extension__ __PRETTY_FUNCTION__))
7466 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||(static_cast <bool> ((CastOpcode == ISD::SIGN_EXTEND ||
CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE
|| CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND
) && "Unexpected opcode for vector select narrowing/widening"
) ? void (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7468, __extension__ __PRETTY_FUNCTION__))
7467 CastOpcode == ISD::FP_ROUND) &&(static_cast <bool> ((CastOpcode == ISD::SIGN_EXTEND ||
CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE
|| CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND
) && "Unexpected opcode for vector select narrowing/widening"
) ? void (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7468, __extension__ __PRETTY_FUNCTION__))
7468 "Unexpected opcode for vector select narrowing/widening")(static_cast <bool> ((CastOpcode == ISD::SIGN_EXTEND ||
CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE
|| CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND
) && "Unexpected opcode for vector select narrowing/widening"
) ? void (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7468, __extension__ __PRETTY_FUNCTION__))
;
7469
7470 // We only do this transform before legal ops because the pattern may be
7471 // obfuscated by target-specific operations after legalization. Do not create
7472 // an illegal select op, however, because that may be difficult to lower.
7473 EVT VT = Cast->getValueType(0);
7474 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
7475 return SDValue();
7476
7477 SDValue VSel = Cast->getOperand(0);
7478 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
7479 VSel.getOperand(0).getOpcode() != ISD::SETCC)
7480 return SDValue();
7481
7482 // Does the setcc have the same vector size as the casted select?
7483 SDValue SetCC = VSel.getOperand(0);
7484 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
7485 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
7486 return SDValue();
7487
7488 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
7489 SDValue A = VSel.getOperand(1);
7490 SDValue B = VSel.getOperand(2);
7491 SDValue CastA, CastB;
7492 SDLoc DL(Cast);
7493 if (CastOpcode == ISD::FP_ROUND) {
7494 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
7495 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
7496 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
7497 } else {
7498 CastA = DAG.getNode(CastOpcode, DL, VT, A);
7499 CastB = DAG.getNode(CastOpcode, DL, VT, B);
7500 }
7501 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
7502}
7503
7504SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
7505 SDValue N0 = N->getOperand(0);
7506 EVT VT = N->getValueType(0);
7507 SDLoc DL(N);
7508
7509 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7510 LegalOperations))
7511 return SDValue(Res, 0);
7512
7513 // fold (sext (sext x)) -> (sext x)
7514 // fold (sext (aext x)) -> (sext x)
7515 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7516 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
7517
7518 if (N0.getOpcode() == ISD::TRUNCATE) {
7519 // fold (sext (truncate (load x))) -> (sext (smaller load x))
7520 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
7521 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7522 SDNode *oye = N0.getOperand(0).getNode();
7523 if (NarrowLoad.getNode() != N0.getNode()) {
7524 CombineTo(N0.getNode(), NarrowLoad);
7525 // CombineTo deleted the truncate, if needed, but not what's under it.
7526 AddToWorklist(oye);
7527 }
7528 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7529 }
7530
7531 // See if the value being truncated is already sign extended. If so, just
7532 // eliminate the trunc/sext pair.
7533 SDValue Op = N0.getOperand(0);
7534 unsigned OpBits = Op.getScalarValueSizeInBits();
7535 unsigned MidBits = N0.getScalarValueSizeInBits();
7536 unsigned DestBits = VT.getScalarSizeInBits();
7537 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
7538
7539 if (OpBits == DestBits) {
7540 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
7541 // bits, it is already ready.
7542 if (NumSignBits > DestBits-MidBits)
7543 return Op;
7544 } else if (OpBits < DestBits) {
7545 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
7546 // bits, just sext from i32.
7547 if (NumSignBits > OpBits-MidBits)
7548 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
7549 } else {
7550 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
7551 // bits, just truncate to i32.
7552 if (NumSignBits > OpBits-MidBits)
7553 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7554 }
7555
7556 // fold (sext (truncate x)) -> (sextinreg x).
7557 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
7558 N0.getValueType())) {
7559 if (OpBits < DestBits)
7560 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
7561 else if (OpBits > DestBits)
7562 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
7563 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7564 DAG.getValueType(N0.getValueType()));
7565 }
7566 }
7567
7568 // fold (sext (load x)) -> (sext (truncate (sextload x)))
7569 // Only generate vector extloads when 1) they're legal, and 2) they are
7570 // deemed desirable by the target.
7571 if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7572 ((!LegalOperations && !VT.isVector() &&
7573 !cast<LoadSDNode>(N0)->isVolatile()) ||
7574 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
7575 bool DoXform = true;
7576 SmallVector<SDNode*, 4> SetCCs;
7577 if (!N0.hasOneUse())
7578 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::SIGN_EXTEND, SetCCs,
7579 TLI);
7580 if (VT.isVector())
7581 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7582 if (DoXform) {
7583 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7584 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7585 LN0->getBasePtr(), N0.getValueType(),
7586 LN0->getMemOperand());
7587 ExtendSetCCUses(SetCCs, N0, ExtLoad, DL, ISD::SIGN_EXTEND);
7588 // If the load value is used only by N, replace it via CombineTo N.
7589 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7590 CombineTo(N, ExtLoad);
7591 if (NoReplaceTrunc) {
7592 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7593 } else {
7594 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7595 N0.getValueType(), ExtLoad);
7596 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7597 }
7598 return SDValue(N, 0);
7599 }
7600 }
7601
7602 // fold (sext (load x)) to multiple smaller sextloads.
7603 // Only on illegal but splittable vectors.
7604 if (SDValue ExtLoad = CombineExtLoad(N))
7605 return ExtLoad;
7606
7607 // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
7608 // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
7609 if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7610 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7611 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7612 EVT MemVT = LN0->getMemoryVT();
7613 if ((!LegalOperations && !LN0->isVolatile()) ||
7614 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
7615 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7616 LN0->getBasePtr(), MemVT,
7617 LN0->getMemOperand());
7618 CombineTo(N, ExtLoad);
7619 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7620 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7621 }
7622 }
7623
7624 // fold (sext (and/or/xor (load x), cst)) ->
7625 // (and/or/xor (sextload x), (sext cst))
7626 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7627 N0.getOpcode() == ISD::XOR) &&
7628 isa<LoadSDNode>(N0.getOperand(0)) &&
7629 N0.getOperand(1).getOpcode() == ISD::Constant &&
7630 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7631 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
7632 EVT MemVT = LN00->getMemoryVT();
7633 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
7634 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
7635 SmallVector<SDNode*, 4> SetCCs;
7636 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
7637 ISD::SIGN_EXTEND, SetCCs, TLI);
7638 if (DoXform) {
7639 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
7640 LN00->getChain(), LN00->getBasePtr(),
7641 LN00->getMemoryVT(),
7642 LN00->getMemOperand());
7643 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7644 Mask = Mask.sext(VT.getSizeInBits());
7645 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7646 ExtLoad, DAG.getConstant(Mask, DL, VT));
7647 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, DL,
7648 ISD::SIGN_EXTEND);
7649 bool NoReplaceTruncAnd = !N0.hasOneUse();
7650 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
7651 CombineTo(N, And);
7652 // If N0 has multiple uses, change other uses as well.
7653 if (NoReplaceTruncAnd) {
7654 SDValue TruncAnd =
7655 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
7656 CombineTo(N0.getNode(), TruncAnd);
7657 }
7658 if (NoReplaceTrunc) {
7659 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
7660 } else {
7661 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
7662 LN00->getValueType(0), ExtLoad);
7663 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
7664 }
7665 return SDValue(N,0); // Return N so it doesn't get rechecked!
7666 }
7667 }
7668 }
7669
7670 if (N0.getOpcode() == ISD::SETCC) {
7671 SDValue N00 = N0.getOperand(0);
7672 SDValue N01 = N0.getOperand(1);
7673 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7674 EVT N00VT = N0.getOperand(0).getValueType();
7675
7676 // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
7677 // Only do this before legalize for now.
7678 if (VT.isVector() && !LegalOperations &&
7679 TLI.getBooleanContents(N00VT) ==
7680 TargetLowering::ZeroOrNegativeOneBooleanContent) {
7681 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
7682 // of the same size as the compared operands. Only optimize sext(setcc())
7683 // if this is the case.
7684 EVT SVT = getSetCCResultType(N00VT);
7685
7686 // We know that the # elements of the results is the same as the
7687 // # elements of the compare (and the # elements of the compare result
7688 // for that matter). Check to see that they are the same size. If so,
7689 // we know that the element size of the sext'd result matches the
7690 // element size of the compare operands.
7691 if (VT.getSizeInBits() == SVT.getSizeInBits())
7692 return DAG.getSetCC(DL, VT, N00, N01, CC);
7693
7694 // If the desired elements are smaller or larger than the source
7695 // elements, we can use a matching integer vector type and then
7696 // truncate/sign extend.
7697 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
7698 if (SVT == MatchingVecType) {
7699 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
7700 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
7701 }
7702 }
7703
7704 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
7705 // Here, T can be 1 or -1, depending on the type of the setcc and
7706 // getBooleanContents().
7707 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
7708
7709 // To determine the "true" side of the select, we need to know the high bit
7710 // of the value returned by the setcc if it evaluates to true.
7711 // If the type of the setcc is i1, then the true case of the select is just
7712 // sext(i1 1), that is, -1.
7713 // If the type of the setcc is larger (say, i8) then the value of the high
7714 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
7715 // of the appropriate width.
7716 SDValue ExtTrueVal = (SetCCWidth == 1)
7717 ? DAG.getAllOnesConstant(DL, VT)
7718 : DAG.getBoolConstant(true, DL, VT, N00VT);
7719 SDValue Zero = DAG.getConstant(0, DL, VT);
7720 if (SDValue SCC =
7721 SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
7722 return SCC;
7723
7724 if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
7725 EVT SetCCVT = getSetCCResultType(N00VT);
7726 // Don't do this transform for i1 because there's a select transform
7727 // that would reverse it.
7728 // TODO: We should not do this transform at all without a target hook
7729 // because a sext is likely cheaper than a select?
7730 if (SetCCVT.getScalarSizeInBits() != 1 &&
7731 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
7732 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
7733 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
7734 }
7735 }
7736 }
7737
7738 // fold (sext x) -> (zext x) if the sign bit is known zero.
7739 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
7740 DAG.SignBitIsZero(N0))
7741 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
7742
7743 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
7744 return NewVSel;
7745
7746 return SDValue();
7747}
7748
7749// isTruncateOf - If N is a truncate of some other value, return true, record
7750// the value being truncated in Op and which of Op's bits are zero/one in Known.
7751// This function computes KnownBits to avoid a duplicated call to
7752// computeKnownBits in the caller.
7753static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
7754 KnownBits &Known) {
7755 if (N->getOpcode() == ISD::TRUNCATE) {
7756 Op = N->getOperand(0);
7757 DAG.computeKnownBits(Op, Known);
7758 return true;
7759 }
7760
7761 if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
7762 cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
7763 return false;
7764
7765 SDValue Op0 = N->getOperand(0);
7766 SDValue Op1 = N->getOperand(1);
7767 assert(Op0.getValueType() == Op1.getValueType())(static_cast <bool> (Op0.getValueType() == Op1.getValueType
()) ? void (0) : __assert_fail ("Op0.getValueType() == Op1.getValueType()"
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7767, __extension__ __PRETTY_FUNCTION__))
;
7768
7769 if (isNullConstant(Op0))
7770 Op = Op1;
7771 else if (isNullConstant(Op1))
7772 Op = Op0;
7773 else
7774 return false;
7775
7776 DAG.computeKnownBits(Op, Known);
7777
7778 if (!(Known.Zero | 1).isAllOnesValue())
7779 return false;
7780
7781 return true;
7782}
7783
7784SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
7785 SDValue N0 = N->getOperand(0);
7786 EVT VT = N->getValueType(0);
7787
7788 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7789 LegalOperations))
7790 return SDValue(Res, 0);
7791
7792 // fold (zext (zext x)) -> (zext x)
7793 // fold (zext (aext x)) -> (zext x)
7794 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7795 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
7796 N0.getOperand(0));
7797
7798 // fold (zext (truncate x)) -> (zext x) or
7799 // (zext (truncate x)) -> (truncate x)
7800 // This is valid when the truncated bits of x are already zero.
7801 // FIXME: We should extend this to work for vectors too.
7802 SDValue Op;
7803 KnownBits Known;
7804 if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
7805 APInt TruncatedBits =
7806 (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
7807 APInt(Op.getValueSizeInBits(), 0) :
7808 APInt::getBitsSet(Op.getValueSizeInBits(),
7809 N0.getValueSizeInBits(),
7810 std::min(Op.getValueSizeInBits(),
7811 VT.getSizeInBits()));
7812 if (TruncatedBits.isSubsetOf(Known.Zero))
7813 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7814 }
7815
7816 // fold (zext (truncate x)) -> (and x, mask)
7817 if (N0.getOpcode() == ISD::TRUNCATE) {
7818 // fold (zext (truncate (load x))) -> (zext (smaller load x))
7819 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
7820 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7821 SDNode *oye = N0.getOperand(0).getNode();
7822 if (NarrowLoad.getNode() != N0.getNode()) {
7823 CombineTo(N0.getNode(), NarrowLoad);
7824 // CombineTo deleted the truncate, if needed, but not what's under it.
7825 AddToWorklist(oye);
7826 }
7827 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7828 }
7829
7830 EVT SrcVT = N0.getOperand(0).getValueType();
7831 EVT MinVT = N0.getValueType();
7832
7833 // Try to mask before the extension to avoid having to generate a larger mask,
7834 // possibly over several sub-vectors.
7835 if (SrcVT.bitsLT(VT)) {
7836 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
7837 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
7838 SDValue Op = N0.getOperand(0);
7839 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7840 AddToWorklist(Op.getNode());
7841 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7842 // Transfer the debug info; the new node is equivalent to N0.
7843 DAG.transferDbgValues(N0, ZExtOrTrunc);
7844 return ZExtOrTrunc;
7845 }
7846 }
7847
7848 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
7849 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7850 AddToWorklist(Op.getNode());
7851 SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7852 // We may safely transfer the debug info describing the truncate node over
7853 // to the equivalent and operation.
7854 DAG.transferDbgValues(N0, And);
7855 return And;
7856 }
7857 }
7858
7859 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
7860 // if either of the casts is not free.
7861 if (N0.getOpcode() == ISD::AND &&
7862 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7863 N0.getOperand(1).getOpcode() == ISD::Constant &&
7864 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7865 N0.getValueType()) ||
7866 !TLI.isZExtFree(N0.getValueType(), VT))) {
7867 SDValue X = N0.getOperand(0).getOperand(0);
7868 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
7869 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7870 Mask = Mask.zext(VT.getSizeInBits());
7871 SDLoc DL(N);
7872 return DAG.getNode(ISD::AND, DL, VT,
7873 X, DAG.getConstant(Mask, DL, VT));
7874 }
7875
7876 // fold (zext (load x)) -> (zext (truncate (zextload x)))
7877 // Only generate vector extloads when 1) they're legal, and 2) they are
7878 // deemed desirable by the target.
7879 if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7880 ((!LegalOperations && !VT.isVector() &&
7881 !cast<LoadSDNode>(N0)->isVolatile()) ||
7882 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
7883 bool DoXform = true;
7884 SmallVector<SDNode*, 4> SetCCs;
7885 if (!N0.hasOneUse())
7886 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ZERO_EXTEND, SetCCs,
7887 TLI);
7888 if (VT.isVector())
7889 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7890 if (DoXform) {
7891 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7892 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7893 LN0->getChain(),
7894 LN0->getBasePtr(), N0.getValueType(),
7895 LN0->getMemOperand());
7896
7897 ExtendSetCCUses(SetCCs, N0, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND);
7898 // If the load value is used only by N, replace it via CombineTo N.
7899 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7900 CombineTo(N, ExtLoad);
7901 if (NoReplaceTrunc) {
7902 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7903 } else {
7904 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7905 N0.getValueType(), ExtLoad);
7906 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7907 }
7908 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7909 }
7910 }
7911
7912 // fold (zext (load x)) to multiple smaller zextloads.
7913 // Only on illegal but splittable vectors.
7914 if (SDValue ExtLoad = CombineExtLoad(N))
7915 return ExtLoad;
7916
7917 // fold (zext (and/or/xor (load x), cst)) ->
7918 // (and/or/xor (zextload x), (zext cst))
7919 // Unless (and (load x) cst) will match as a zextload already and has
7920 // additional users.
7921 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7922 N0.getOpcode() == ISD::XOR) &&
7923 isa<LoadSDNode>(N0.getOperand(0)) &&
7924 N0.getOperand(1).getOpcode() == ISD::Constant &&
7925 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7926 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
7927 EVT MemVT = LN00->getMemoryVT();
7928 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
7929 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
7930 bool DoXform = true;
7931 SmallVector<SDNode*, 4> SetCCs;
7932 if (!N0.hasOneUse()) {
7933 if (N0.getOpcode() == ISD::AND) {
7934 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
7935 EVT LoadResultTy = AndC->getValueType(0);
7936 EVT ExtVT;
7937 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
7938 DoXform = false;
7939 }
7940 }
7941 if (DoXform)
7942 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
7943 ISD::ZERO_EXTEND, SetCCs, TLI);
7944 if (DoXform) {
7945 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
7946 LN00->getChain(), LN00->getBasePtr(),
7947 LN00->getMemoryVT(),
7948 LN00->getMemOperand());
7949 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7950 Mask = Mask.zext(VT.getSizeInBits());
7951 SDLoc DL(N);
7952 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7953 ExtLoad, DAG.getConstant(Mask, DL, VT));
7954 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, DL,
7955 ISD::ZERO_EXTEND);
7956 bool NoReplaceTruncAnd = !N0.hasOneUse();
7957 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
7958 CombineTo(N, And);
7959 // If N0 has multiple uses, change other uses as well.
7960 if (NoReplaceTruncAnd) {
7961 SDValue TruncAnd =
7962 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
7963 CombineTo(N0.getNode(), TruncAnd);
7964 }
7965 if (NoReplaceTrunc) {
7966 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
7967 } else {
7968 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
7969 LN00->getValueType(0), ExtLoad);
7970 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
7971 }
7972 return SDValue(N,0); // Return N so it doesn't get rechecked!
7973 }
7974 }
7975 }
7976
7977 // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
7978 // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
7979 if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7980 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7981 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7982 EVT MemVT = LN0->getMemoryVT();
7983 if ((!LegalOperations && !LN0->isVolatile()) ||
7984 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
7985 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7986 LN0->getChain(),
7987 LN0->getBasePtr(), MemVT,
7988 LN0->getMemOperand());
7989 CombineTo(N, ExtLoad);
7990 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7991 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7992 }
7993 }
7994
7995 if (N0.getOpcode() == ISD::SETCC) {
7996 // Only do this before legalize for now.
7997 if (!LegalOperations && VT.isVector() &&
7998 N0.getValueType().getVectorElementType() == MVT::i1) {
7999 EVT N00VT = N0.getOperand(0).getValueType();
8000 if (getSetCCResultType(N00VT) == N0.getValueType())
8001 return SDValue();
8002
8003 // We know that the # elements of the results is the same as the #
8004 // elements of the compare (and the # elements of the compare result for
8005 // that matter). Check to see that they are the same size. If so, we know
8006 // that the element size of the sext'd result matches the element size of
8007 // the compare operands.
8008 SDLoc DL(N);
8009 SDValue VecOnes = DAG.getConstant(1, DL, VT);
8010 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
8011 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
8012 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
8013 N0.getOperand(1), N0.getOperand(2));
8014 return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
8015 }
8016
8017 // If the desired elements are smaller or larger than the source
8018 // elements we can use a matching integer vector type and then
8019 // truncate/sign extend.
8020 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
8021 SDValue VsetCC =
8022 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
8023 N0.getOperand(1), N0.getOperand(2));
8024 return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
8025 VecOnes);
8026 }
8027
8028 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
8029 SDLoc DL(N);
8030 if (SDValue SCC = SimplifySelectCC(
8031 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
8032 DAG.getConstant(0, DL, VT),
8033 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
8034 return SCC;
8035 }
8036
8037 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
8038 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
8039 isa<ConstantSDNode>(N0.getOperand(1)) &&
8040 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
8041 N0.hasOneUse()) {
8042 SDValue ShAmt = N0.getOperand(1);
8043 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
8044 if (N0.getOpcode() == ISD::SHL) {
8045 SDValue InnerZExt = N0.getOperand(0);
8046 // If the original shl may be shifting out bits, do not perform this
8047 // transformation.
8048 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
8049 InnerZExt.getOperand(0).getValueSizeInBits();
8050 if (ShAmtVal > KnownZeroBits)
8051 return SDValue();
8052 }
8053
8054 SDLoc DL(N);
8055
8056 // Ensure that the shift amount is wide enough for the shifted value.
8057 if (VT.getSizeInBits() >= 256)
8058 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
8059
8060 return DAG.getNode(N0.getOpcode(), DL, VT,
8061 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
8062 ShAmt);
8063 }
8064
8065 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8066 return NewVSel;
8067
8068 return SDValue();
8069}
8070
8071SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
8072 SDValue N0 = N->getOperand(0);
8073 EVT VT = N->getValueType(0);
8074
8075 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8076 LegalOperations))
8077 return SDValue(Res, 0);
8078
8079 // fold (aext (aext x)) -> (aext x)
8080 // fold (aext (zext x)) -> (zext x)
8081 // fold (aext (sext x)) -> (sext x)
8082 if (N0.getOpcode() == ISD::ANY_EXTEND ||
8083 N0.getOpcode() == ISD::ZERO_EXTEND ||
8084 N0.getOpcode() == ISD::SIGN_EXTEND)
8085 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8086
8087 // fold (aext (truncate (load x))) -> (aext (smaller load x))
8088 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
8089 if (N0.getOpcode() == ISD::TRUNCATE) {
8090 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8091 SDNode *oye = N0.getOperand(0).getNode();
8092 if (NarrowLoad.getNode() != N0.getNode()) {
8093 CombineTo(N0.getNode(), NarrowLoad);
8094 // CombineTo deleted the truncate, if needed, but not what's under it.
8095 AddToWorklist(oye);
8096 }
8097 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8098 }
8099 }
8100
8101 // fold (aext (truncate x))
8102 if (N0.getOpcode() == ISD::TRUNCATE)
8103 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8104
8105 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
8106 // if the trunc is not free.
8107 if (N0.getOpcode() == ISD::AND &&
8108 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8109 N0.getOperand(1).getOpcode() == ISD::Constant &&
8110 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
8111 N0.getValueType())) {
8112 SDLoc DL(N);
8113 SDValue X = N0.getOperand(0).getOperand(0);
8114 X = DAG.getAnyExtOrTrunc(X, DL, VT);
8115 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8116 Mask = Mask.zext(VT.getSizeInBits());
8117 return DAG.getNode(ISD::AND, DL, VT,
8118 X, DAG.getConstant(Mask, DL, VT));
8119 }
8120
8121 // fold (aext (load x)) -> (aext (truncate (extload x)))
8122 // None of the supported targets knows how to perform load and any_ext
8123 // on vectors in one instruction. We only perform this transformation on
8124 // scalars.
8125 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
8126 ISD::isUNINDEXEDLoad(N0.getNode()) &&
8127 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
8128 bool DoXform = true;
8129 SmallVector<SDNode*, 4> SetCCs;
8130 if (!N0.hasOneUse())
8131 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
8132 TLI);
8133 if (DoXform) {
8134 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8135 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
8136 LN0->getChain(),
8137 LN0->getBasePtr(), N0.getValueType(),
8138 LN0->getMemOperand());
8139 ExtendSetCCUses(SetCCs, N0, ExtLoad, SDLoc(N),
8140 ISD::ANY_EXTEND);
8141 // If the load value is used only by N, replace it via CombineTo N.
8142 bool NoReplaceTrunc = N0.hasOneUse();
8143 CombineTo(N, ExtLoad);
8144 if (NoReplaceTrunc) {
8145 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8146 } else {
8147 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
8148 N0.getValueType(), ExtLoad);
8149 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8150 }
8151 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8152 }
8153 }
8154
8155 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
8156 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
8157 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
8158 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
8159 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
8160 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8161 ISD::LoadExtType ExtType = LN0->getExtensionType();
8162 EVT MemVT = LN0->getMemoryVT();
8163 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
8164 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
8165 VT, LN0->getChain(), LN0->getBasePtr(),
8166 MemVT, LN0->getMemOperand());
8167 CombineTo(N, ExtLoad);
8168 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8169 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8170 }
8171 }
8172
8173 if (N0.getOpcode() == ISD::SETCC) {
8174 // For vectors:
8175 // aext(setcc) -> vsetcc
8176 // aext(setcc) -> truncate(vsetcc)
8177 // aext(setcc) -> aext(vsetcc)
8178 // Only do this before legalize for now.
8179 if (VT.isVector() && !LegalOperations) {
8180 EVT N00VT = N0.getOperand(0).getValueType();
8181 if (getSetCCResultType(N00VT) == N0.getValueType())
8182 return SDValue();
8183
8184 // We know that the # elements of the results is the same as the
8185 // # elements of the compare (and the # elements of the compare result
8186 // for that matter). Check to see that they are the same size. If so,
8187 // we know that the element size of the sext'd result matches the
8188 // element size of the compare operands.
8189 if (VT.getSizeInBits() == N00VT.getSizeInBits())
8190 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
8191 N0.getOperand(1),
8192 cast<CondCodeSDNode>(N0.getOperand(2))->get());
8193 // If the desired elements are smaller or larger than the source
8194 // elements we can use a matching integer vector type and then
8195 // truncate/any extend
8196 else {
8197 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
8198 SDValue VsetCC =
8199 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
8200 N0.getOperand(1),
8201 cast<CondCodeSDNode>(N0.getOperand(2))->get());
8202 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
8203 }
8204 }
8205
8206 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
8207 SDLoc DL(N);
8208 if (SDValue SCC = SimplifySelectCC(
8209 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
8210 DAG.getConstant(0, DL, VT),
8211 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
8212 return SCC;
8213 }
8214
8215 return SDValue();
8216}
8217
8218SDValue DAGCombiner::visitAssertExt(SDNode *N) {
8219 unsigned Opcode = N->getOpcode();
8220 SDValue N0 = N->getOperand(0);
8221 SDValue N1 = N->getOperand(1);
8222 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
8223
8224 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
8225 if (N0.getOpcode() == Opcode &&
8226 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
8227 return N0;
8228
8229 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
8230 N0.getOperand(0).getOpcode() == Opcode) {
8231 // We have an assert, truncate, assert sandwich. Make one stronger assert
8232 // by asserting on the smallest asserted type to the larger source type.
8233 // This eliminates the later assert:
8234 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
8235 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
8236 SDValue BigA = N0.getOperand(0);
8237 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
8238 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&(static_cast <bool> (BigA_AssertVT.bitsLE(N0.getValueType
()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? void
(0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8240, __extension__ __PRETTY_FUNCTION__))
8239 "Asserting zero/sign-extended bits to a type larger than the "(static_cast <bool> (BigA_AssertVT.bitsLE(N0.getValueType
()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? void
(0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8240, __extension__ __PRETTY_FUNCTION__))
8240 "truncated destination does not provide information")(static_cast <bool> (BigA_AssertVT.bitsLE(N0.getValueType
()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? void
(0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8240, __extension__ __PRETTY_FUNCTION__))
;
8241
8242 SDLoc DL(N);
8243 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
8244 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
8245 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
8246 BigA.getOperand(0), MinAssertVTVal);
8247 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
8248 }
8249
8250 return SDValue();
8251}
8252
8253/// If the result of a wider load is shifted to right of N bits and then
8254/// truncated to a narrower type and where N is a multiple of number of bits of
8255/// the narrower type, transform it to a narrower load from address + N / num of
8256/// bits of new type. Also narrow the load if the result is masked with an AND
8257/// to effectively produce a smaller type. If the result is to be extended, also
8258/// fold the extension to form a extending load.
8259SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
8260 unsigned Opc = N->getOpcode();
8261
8262 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
8263 SDValue N0 = N->getOperand(0);
8264 EVT VT = N->getValueType(0);
8265 EVT ExtVT = VT;
8266
8267 // This transformation isn't valid for vector loads.
8268 if (VT.isVector())
8269 return SDValue();
8270
8271 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
8272 // extended to VT.
8273 if (Opc == ISD::SIGN_EXTEND_INREG) {
8274 ExtType = ISD::SEXTLOAD;
8275 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
8276 } else if (Opc == ISD::SRL) {
8277 // Another special-case: SRL is basically zero-extending a narrower value,
8278 // or it maybe shifting a higher subword, half or byte into the lowest
8279 // bits.
8280 ExtType = ISD::ZEXTLOAD;
8281 N0 = SDValue(N, 0);
8282
8283 auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
8284 auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8285 if (!N01 || !LN0)
8286 return SDValue();
8287
8288 uint64_t ShiftAmt = N01->getZExtValue();
8289 uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
8290 if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
8291 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
8292 else
8293 ExtVT = EVT::getIntegerVT(*DAG.getContext(),
8294 VT.getSizeInBits() - ShiftAmt);
8295 } else if (Opc == ISD::AND) {
8296 // An AND with a constant mask is the same as a truncate + zero-extend.
8297 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
8298 if (!AndC || !AndC->getAPIntValue().isMask())
8299 return SDValue();
8300
8301 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
8302 ExtType = ISD::ZEXTLOAD;
8303 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
8304 }
8305
8306 unsigned ShAmt = 0;
8307 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
8308 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
8309 ShAmt = N01->getZExtValue();
8310 unsigned EVTBits = ExtVT.getSizeInBits();
8311 // Is the shift amount a multiple of size of VT?
8312 if ((ShAmt & (EVTBits-1)) == 0) {
8313 N0 = N0.getOperand(0);
8314 // Is the load width a multiple of size of VT?
8315 if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
8316 return SDValue();
8317 }
8318
8319 // At this point, we must have a load or else we can't do the transform.
8320 if (!isa<LoadSDNode>(N0)) return SDValue();
8321
8322 // Because a SRL must be assumed to *need* to zero-extend the high bits
8323 // (as opposed to anyext the high bits), we can't combine the zextload
8324 // lowering of SRL and an sextload.
8325 if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
8326 return SDValue();
8327
8328 // If the shift amount is larger than the input type then we're not
8329 // accessing any of the loaded bytes. If the load was a zextload/extload
8330 // then the result of the shift+trunc is zero/undef (handled elsewhere).
8331 if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
8332 return SDValue();
8333 }
8334 }
8335
8336 // If the load is shifted left (and the result isn't shifted back right),
8337 // we can fold the truncate through the shift.
8338 unsigned ShLeftAmt = 0;
8339 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8340 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
8341 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
8342 ShLeftAmt = N01->getZExtValue();
8343 N0 = N0.getOperand(0);
8344 }
8345 }
8346
8347 // If we haven't found a load, we can't narrow it.
8348 if (!isa<LoadSDNode>(N0))
8349 return SDValue();
8350
8351 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8352 if (!isLegalNarrowLoad(LN0, ExtType, ExtVT, ShAmt))
8353 return SDValue();
8354
8355 // For big endian targets, we need to adjust the offset to the pointer to
8356 // load the correct bytes.
8357 if (DAG.getDataLayout().isBigEndian()) {
8358 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
8359 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
8360 ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
8361 }
8362
8363 EVT PtrType = N0.getOperand(1).getValueType();
8364 uint64_t PtrOff = ShAmt / 8;
8365 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
8366 SDLoc DL(LN0);
8367 // The original load itself didn't wrap, so an offset within it doesn't.
8368 SDNodeFlags Flags;
8369 Flags.setNoUnsignedWrap(true);
8370 SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
8371 PtrType, LN0->getBasePtr(),
8372 DAG.getConstant(PtrOff, DL, PtrType),
8373 Flags);
8374 AddToWorklist(NewPtr.getNode());
8375
8376 SDValue Load;
8377 if (ExtType == ISD::NON_EXTLOAD)
8378 Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
8379 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8380 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8381 else
8382 Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
8383 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
8384 NewAlign, LN0->getMemOperand()->getFlags(),
8385 LN0->getAAInfo());
8386
8387 // Replace the old load's chain with the new load's chain.
8388 WorklistRemover DeadNodes(*this);
8389 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8390
8391 // Shift the result left, if we've swallowed a left shift.
8392 SDValue Result = Load;
8393 if (ShLeftAmt != 0) {
8394 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
8395 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
8396 ShImmTy = VT;
8397 // If the shift amount is as large as the result size (but, presumably,
8398 // no larger than the source) then the useful bits of the result are
8399 // zero; we can't simply return the shortened shift, because the result
8400 // of that operation is undefined.
8401 SDLoc DL(N0);
8402 if (ShLeftAmt >= VT.getSizeInBits())
8403 Result = DAG.getConstant(0, DL, VT);
8404 else
8405 Result = DAG.getNode(ISD::SHL, DL, VT,
8406 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
8407 }
8408
8409 // Return the new loaded value.
8410 return Result;
8411}
8412
8413SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
8414 SDValue N0 = N->getOperand(0);
8415 SDValue N1 = N->getOperand(1);
8416 EVT VT = N->getValueType(0);
8417 EVT EVT = cast<VTSDNode>(N1)->getVT();
8418 unsigned VTBits = VT.getScalarSizeInBits();
8419 unsigned EVTBits = EVT.getScalarSizeInBits();
8420
8421 if (N0.isUndef())
8422 return DAG.getUNDEF(VT);
8423
8424 // fold (sext_in_reg c1) -> c1
8425 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8426 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
8427
8428 // If the input is already sign extended, just drop the extension.
8429 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
8430 return N0;
8431
8432 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
8433 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
8434 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
8435 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8436 N0.getOperand(0), N1);
8437
8438 // fold (sext_in_reg (sext x)) -> (sext x)
8439 // fold (sext_in_reg (aext x)) -> (sext x)
8440 // if x is small enough.
8441 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
8442 SDValue N00 = N0.getOperand(0);
8443 if (N00.getScalarValueSizeInBits() <= EVTBits &&
8444 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8445 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8446 }
8447
8448 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
8449 if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
8450 N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
8451 N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
8452 N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
8453 if (!LegalOperations ||
8454 TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
8455 return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
8456 }
8457
8458 // fold (sext_in_reg (zext x)) -> (sext x)
8459 // iff we are extending the source sign bit.
8460 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
8461 SDValue N00 = N0.getOperand(0);
8462 if (N00.getScalarValueSizeInBits() == EVTBits &&
8463 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8464 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8465 }
8466
8467 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
8468 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
8469 return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
8470
8471 // fold operands of sext_in_reg based on knowledge that the top bits are not
8472 // demanded.
8473 if (SimplifyDemandedBits(SDValue(N, 0)))
8474 return SDValue(N, 0);
8475
8476 // fold (sext_in_reg (load x)) -> (smaller sextload x)
8477 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
8478 if (SDValue NarrowLoad = ReduceLoadWidth(N))
8479 return NarrowLoad;
8480
8481 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
8482 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
8483 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
8484 if (N0.getOpcode() == ISD::SRL) {
8485 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
8486 if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
8487 // We can turn this into an SRA iff the input to the SRL is already sign
8488 // extended enough.
8489 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
8490 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
8491 return DAG.getNode(ISD::SRA, SDLoc(N), VT,
8492 N0.getOperand(0), N0.getOperand(1));
8493 }
8494 }
8495
8496 // fold (sext_inreg (extload x)) -> (sextload x)
8497 // If sextload is not supported by target, we can only do the combine when
8498 // load has one use. Doing otherwise can block folding the extload with other
8499 // extends that the target does support.
8500 if (ISD::isEXTLoad(N0.getNode()) &&
8501 ISD::isUNINDEXEDLoad(N0.getNode()) &&
8502 EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8503 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
8504 N0.hasOneUse()) ||
8505 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8506 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8507 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8508 LN0->getChain(),
8509 LN0->getBasePtr(), EVT,
8510 LN0->getMemOperand());
8511 CombineTo(N, ExtLoad);
8512 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8513 AddToWorklist(ExtLoad.getNode());
8514 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8515 }
8516 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
8517 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
8518 N0.hasOneUse() &&
8519 EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8520 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
8521 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8522 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8523 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8524 LN0->getChain(),
8525 LN0->getBasePtr(), EVT,
8526 LN0->getMemOperand());
8527 CombineTo(N, ExtLoad);
8528 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8529 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8530 }
8531
8532 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
8533 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
8534 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
8535 N0.getOperand(1), false))
8536 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8537 BSwap, N1);
8538 }
8539
8540 return SDValue();
8541}
8542
8543SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
8544 SDValue N0 = N->getOperand(0);
8545 EVT VT = N->getValueType(0);
8546
8547 if (N0.isUndef())
8548 return DAG.getUNDEF(VT);
8549
8550 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8551 LegalOperations))
8552 return SDValue(Res, 0);
8553
8554 return SDValue();
8555}
8556
8557SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
8558 SDValue N0 = N->getOperand(0);
8559 EVT VT = N->getValueType(0);
8560
8561 if (N0.isUndef())
8562 return DAG.getUNDEF(VT);
8563
8564 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8565 LegalOperations))
8566 return SDValue(Res, 0);
8567
8568 return SDValue();
8569}
8570
8571SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
8572 SDValue N0 = N->getOperand(0);
8573 EVT VT = N->getValueType(0);
8574 bool isLE = DAG.getDataLayout().isLittleEndian();
8575
8576 // noop truncate
8577 if (N0.getValueType() == N->getValueType(0))
8578 return N0;
8579
8580 // fold (truncate (truncate x)) -> (truncate x)
8581 if (N0.getOpcode() == ISD::TRUNCATE)
8582 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8583
8584 // fold (truncate c1) -> c1
8585 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
8586 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
8587 if (C.getNode() != N)
8588 return C;
8589 }
8590
8591 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
8592 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
8593 N0.getOpcode() == ISD::SIGN_EXTEND ||
8594 N0.getOpcode() == ISD::ANY_EXTEND) {
8595 // if the source is smaller than the dest, we still need an extend.
8596 if (N0.getOperand(0).getValueType().bitsLT(VT))
8597 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8598 // if the source is larger than the dest, than we just need the truncate.
8599 if (N0.getOperand(0).getValueType().bitsGT(VT))
8600 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8601 // if the source and dest are the same type, we can drop both the extend
8602 // and the truncate.
8603 return N0.getOperand(0);
8604 }
8605
8606 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
8607 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
8608 return SDValue();
8609
8610 // Fold extract-and-trunc into a narrow extract. For example:
8611 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
8612 // i32 y = TRUNCATE(i64 x)
8613 // -- becomes --
8614 // v16i8 b = BITCAST (v2i64 val)
8615 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
8616 //
8617 // Note: We only run this optimization after type legalization (which often
8618 // creates this pattern) and before operation legalization after which
8619 // we need to be more careful about the vector instructions that we generate.
8620 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8621 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
8622 EVT VecTy = N0.getOperand(0).getValueType();
8623 EVT ExTy = N0.getValueType();
8624 EVT TrTy = N->getValueType(0);
8625
8626 unsigned NumElem = VecTy.getVectorNumElements();
8627 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
8628
8629 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
8630 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size")(static_cast <bool> (NVT.getSizeInBits() == VecTy.getSizeInBits
() && "Invalid Size") ? void (0) : __assert_fail ("NVT.getSizeInBits() == VecTy.getSizeInBits() && \"Invalid Size\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8630, __extension__ __PRETTY_FUNCTION__))
;
8631
8632 SDValue EltNo = N0->getOperand(1);
8633 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
8634 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
8635 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
8636 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
8637
8638 SDLoc DL(N);
8639 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
8640 DAG.getBitcast(NVT, N0.getOperand(0)),
8641 DAG.getConstant(Index, DL, IndexTy));
8642 }
8643 }
8644
8645 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
8646 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
8647 EVT SrcVT = N0.getValueType();
8648 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
8649 TLI.isTruncateFree(SrcVT, VT)) {
8650 SDLoc SL(N0);
8651 SDValue Cond = N0.getOperand(0);
8652 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8653 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
8654 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
8655 }
8656 }
8657
8658 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
8659 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8660 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
8661 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
8662 SDValue Amt = N0.getOperand(1);
8663 KnownBits Known;
8664 DAG.computeKnownBits(Amt, Known);
8665 unsigned Size = VT.getScalarSizeInBits();
8666 if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
8667 SDLoc SL(N);
8668 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
8669
8670 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8671 if (AmtVT != Amt.getValueType()) {
8672 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
8673 AddToWorklist(Amt.getNode());
8674 }
8675 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
8676 }
8677 }
8678
8679 // Fold a series of buildvector, bitcast, and truncate if possible.
8680 // For example fold
8681 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
8682 // (2xi32 (buildvector x, y)).
8683 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
8684 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
8685 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
8686 N0.getOperand(0).hasOneUse()) {
8687 SDValue BuildVect = N0.getOperand(0);
8688 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
8689 EVT TruncVecEltTy = VT.getVectorElementType();
8690
8691 // Check that the element types match.
8692 if (BuildVectEltTy == TruncVecEltTy) {
8693 // Now we only need to compute the offset of the truncated elements.
8694 unsigned BuildVecNumElts = BuildVect.getNumOperands();
8695 unsigned TruncVecNumElts = VT.getVectorNumElements();
8696 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
8697
8698 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&(static_cast <bool> ((BuildVecNumElts % TruncVecNumElts
) == 0 && "Invalid number of elements") ? void (0) : __assert_fail
("(BuildVecNumElts % TruncVecNumElts) == 0 && \"Invalid number of elements\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8699, __extension__ __PRETTY_FUNCTION__))
8699 "Invalid number of elements")(static_cast <bool> ((BuildVecNumElts % TruncVecNumElts
) == 0 && "Invalid number of elements") ? void (0) : __assert_fail
("(BuildVecNumElts % TruncVecNumElts) == 0 && \"Invalid number of elements\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8699, __extension__ __PRETTY_FUNCTION__))
;
8700
8701 SmallVector<SDValue, 8> Opnds;
8702 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
8703 Opnds.push_back(BuildVect.getOperand(i));
8704
8705 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
8706 }
8707 }
8708
8709 // See if we can simplify the input to this truncate through knowledge that
8710 // only the low bits are being used.
8711 // For example "trunc (or (shl x, 8), y)" // -> trunc y
8712 // Currently we only perform this optimization on scalars because vectors
8713 // may have different active low bits.
8714 if (!VT.isVector()) {
8715 APInt Mask =
8716 APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
8717 if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
8718 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
8719 }
8720
8721 // fold (truncate (load x)) -> (smaller load x)
8722 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
8723 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
8724 if (SDValue Reduced = ReduceLoadWidth(N))
8725 return Reduced;
8726
8727 // Handle the case where the load remains an extending load even
8728 // after truncation.
8729 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
8730 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8731 if (!LN0->isVolatile() &&
8732 LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
8733 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
8734 VT, LN0->getChain(), LN0->getBasePtr(),
8735 LN0->getMemoryVT(),
8736 LN0->getMemOperand());
8737 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
8738 return NewLoad;
8739 }
8740 }
8741 }
8742
8743 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
8744 // where ... are all 'undef'.
8745 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
8746 SmallVector<EVT, 8> VTs;
8747 SDValue V;
8748 unsigned Idx = 0;
8749 unsigned NumDefs = 0;
8750
8751 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
8752 SDValue X = N0.getOperand(i);
8753 if (!X.isUndef()) {
8754 V = X;
8755 Idx = i;
8756 NumDefs++;
8757 }
8758 // Stop if more than one members are non-undef.
8759 if (NumDefs > 1)
8760 break;
8761 VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
8762 VT.getVectorElementType(),
8763 X.getValueType().getVectorNumElements()));
8764 }
8765
8766 if (NumDefs == 0)
8767 return DAG.getUNDEF(VT);
8768
8769 if (NumDefs == 1) {
8770 assert(V.getNode() && "The single defined operand is empty!")(static_cast <bool> (V.getNode() && "The single defined operand is empty!"
) ? void (0) : __assert_fail ("V.getNode() && \"The single defined operand is empty!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8770, __extension__ __PRETTY_FUNCTION__))
;
8771 SmallVector<SDValue, 8> Opnds;
8772 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
8773 if (i != Idx) {
8774 Opnds.push_back(DAG.getUNDEF(VTs[i]));
8775 continue;
8776 }
8777 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
8778 AddToWorklist(NV.getNode());
8779 Opnds.push_back(NV);
8780 }
8781 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
8782 }
8783 }
8784
8785 // Fold truncate of a bitcast of a vector to an extract of the low vector
8786 // element.
8787 //
8788 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
8789 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
8790 SDValue VecSrc = N0.getOperand(0);
8791 EVT SrcVT = VecSrc.getValueType();
8792 if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
8793 (!LegalOperations ||
8794 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
8795 SDLoc SL(N);
8796
8797 EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
8798 unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
8799 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
8800 VecSrc, DAG.getConstant(Idx, SL, IdxVT));
8801 }
8802 }
8803
8804 // Simplify the operands using demanded-bits information.
8805 if (!VT.isVector() &&
8806 SimplifyDemandedBits(SDValue(N, 0)))
8807 return SDValue(N, 0);
8808
8809 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
8810 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
8811 // When the adde's carry is not used.
8812 if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
8813 N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
8814 (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
8815 SDLoc SL(N);
8816 auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8817 auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8818 auto VTs = DAG.getVTList(VT, N0->getValueType(1));
8819 return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
8820 }
8821
8822 // fold (truncate (extract_subvector(ext x))) ->
8823 // (extract_subvector x)
8824 // TODO: This can be generalized to cover cases where the truncate and extract
8825 // do not fully cancel each other out.
8826 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
8827 SDValue N00 = N0.getOperand(0);
8828 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
8829 N00.getOpcode() == ISD::ZERO_EXTEND ||
8830 N00.getOpcode() == ISD::ANY_EXTEND) {
8831 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
8832 VT.getVectorElementType())
8833 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
8834 N00.getOperand(0), N0.getOperand(1));
8835 }
8836 }
8837
8838 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8839 return NewVSel;
8840
8841 return SDValue();
8842}
8843
8844static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
8845 SDValue Elt = N->getOperand(i);
8846 if (Elt.getOpcode() != ISD::MERGE_VALUES)
8847 return Elt.getNode();
8848 return Elt.getOperand(Elt.getResNo()).getNode();
8849}
8850
8851/// build_pair (load, load) -> load
8852/// if load locations are consecutive.
8853SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
8854 assert(N->getOpcode() == ISD::BUILD_PAIR)(static_cast <bool> (N->getOpcode() == ISD::BUILD_PAIR
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_PAIR"
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8854, __extension__ __PRETTY_FUNCTION__))
;
8855
8856 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
8857 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
8858
8859 // A BUILD_PAIR is always having the least significant part in elt 0 and the
8860 // most significant part in elt 1. So when combining into one large load, we
8861 // need to consider the endianness.
8862 if (DAG.getDataLayout().isBigEndian())
8863 std::swap(LD1, LD2);
8864
8865 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
8866 LD1->getAddressSpace() != LD2->getAddressSpace())
8867 return SDValue();
8868 EVT LD1VT = LD1->getValueType(0);
8869 unsigned LD1Bytes = LD1VT.getStoreSize();
8870 if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
8871 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
8872 unsigned Align = LD1->getAlignment();
8873 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
8874 VT.getTypeForEVT(*DAG.getContext()));
8875
8876 if (NewAlign <= Align &&
8877 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
8878 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
8879 LD1->getPointerInfo(), Align);
8880 }
8881
8882 return SDValue();
8883}
8884
8885static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
8886 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
8887 // and Lo parts; on big-endian machines it doesn't.
8888 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
8889}
8890
8891static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
8892 const TargetLowering &TLI) {
8893 // If this is not a bitcast to an FP type or if the target doesn't have
8894 // IEEE754-compliant FP logic, we're done.
8895 EVT VT = N->getValueType(0);
8896 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
8897 return SDValue();
8898
8899 // TODO: Use splat values for the constant-checking below and remove this
8900 // restriction.
8901 SDValue N0 = N->getOperand(0);
8902 EVT SourceVT = N0.getValueType();
8903 if (SourceVT.isVector())
8904 return SDValue();
8905
8906 unsigned FPOpcode;
8907 APInt SignMask;
8908 switch (N0.getOpcode()) {
8909 case ISD::AND:
8910 FPOpcode = ISD::FABS;
8911 SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
8912 break;
8913 case ISD::XOR:
8914 FPOpcode = ISD::FNEG;
8915 SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
8916 break;
8917 // TODO: ISD::OR --> ISD::FNABS?
8918 default:
8919 return SDValue();
8920 }
8921
8922 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
8923 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
8924 SDValue LogicOp0 = N0.getOperand(0);
8925 ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8926 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
8927 LogicOp0.getOpcode() == ISD::BITCAST &&
8928 LogicOp0->getOperand(0).getValueType() == VT)
8929 return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
8930
8931 return SDValue();
8932}
8933
8934SDValue DAGCombiner::visitBITCAST(SDNode *N) {
8935 SDValue N0 = N->getOperand(0);
8936 EVT VT = N->getValueType(0);
8937
8938 if (N0.isUndef())
8939 return DAG.getUNDEF(VT);
8940
8941 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
8942 // Only do this before legalize, since afterward the target may be depending
8943 // on the bitconvert.
8944 // First check to see if this is all constant.
8945 if (!LegalTypes &&
8946 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
8947 VT.isVector()) {
8948 bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
8949
8950 EVT DestEltVT = N->getValueType(0).getVectorElementType();
8951 assert(!DestEltVT.isVector() &&(static_cast <bool> (!DestEltVT.isVector() && "Element type of vector ValueType must not be vector!"
) ? void (0) : __assert_fail ("!DestEltVT.isVector() && \"Element type of vector ValueType must not be vector!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8952, __extension__ __PRETTY_FUNCTION__))
8952 "Element type of vector ValueType must not be vector!")(static_cast <bool> (!DestEltVT.isVector() && "Element type of vector ValueType must not be vector!"
) ? void (0) : __assert_fail ("!DestEltVT.isVector() && \"Element type of vector ValueType must not be vector!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8952, __extension__ __PRETTY_FUNCTION__))
;
8953 if (isSimple)
8954 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
8955 }
8956
8957 // If the input is a constant, let getNode fold it.
8958 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
8959 // If we can't allow illegal operations, we need to check that this is just
8960 // a fp -> int or int -> conversion and that the resulting operation will
8961 // be legal.
8962 if (!LegalOperations ||
8963 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
8964 TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
8965 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
8966 TLI.isOperationLegal(ISD::Constant, VT)))
8967 return DAG.getBitcast(VT, N0);
8968 }
8969
8970 // (conv (conv x, t1), t2) -> (conv x, t2)
8971 if (N0.getOpcode() == ISD::BITCAST)
8972 return DAG.getBitcast(VT, N0.getOperand(0));
8973
8974 // fold (conv (load x)) -> (load (conv*)x)
8975 // If the resultant load doesn't need a higher alignment than the original!
8976 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
8977 // Do not change the width of a volatile load.
8978 !cast<LoadSDNode>(N0)->isVolatile() &&
8979 // Do not remove the cast if the types differ in endian layout.
8980 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
8981 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
8982 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
8983 TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
8984 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8985 unsigned OrigAlign = LN0->getAlignment();
8986
8987 bool Fast = false;
8988 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8989 LN0->getAddressSpace(), OrigAlign, &Fast) &&
8990 Fast) {
8991 SDValue Load =
8992 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
8993 LN0->getPointerInfo(), OrigAlign,
8994 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8995 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8996 return Load;
8997 }
8998 }
8999
9000 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
9001 return V;
9002
9003 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
9004 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
9005 //
9006 // For ppc_fp128:
9007 // fold (bitcast (fneg x)) ->
9008 // flipbit = signbit
9009 // (xor (bitcast x) (build_pair flipbit, flipbit))
9010 //
9011 // fold (bitcast (fabs x)) ->
9012 // flipbit = (and (extract_element (bitcast x), 0), signbit)
9013 // (xor (bitcast x) (build_pair flipbit, flipbit))
9014 // This often reduces constant pool loads.
9015 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
9016 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
9017 N0.getNode()->hasOneUse() && VT.isInteger() &&
9018 !VT.isVector() && !N0.getValueType().isVector()) {
9019 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
9020 AddToWorklist(NewConv.getNode());
9021
9022 SDLoc DL(N);
9023 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
9024 assert(VT.getSizeInBits() == 128)(static_cast <bool> (VT.getSizeInBits() == 128) ? void (
0) : __assert_fail ("VT.getSizeInBits() == 128", "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9024, __extension__ __PRETTY_FUNCTION__))
;
9025 SDValue SignBit = DAG.getConstant(
9026 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
9027 SDValue FlipBit;
9028 if (N0.getOpcode() == ISD::FNEG) {
9029 FlipBit = SignBit;
9030 AddToWorklist(FlipBit.getNode());
9031 } else {
9032 assert(N0.getOpcode() == ISD::FABS)(static_cast <bool> (N0.getOpcode() == ISD::FABS) ? void
(0) : __assert_fail ("N0.getOpcode() == ISD::FABS", "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9032, __extension__ __PRETTY_FUNCTION__))
;
9033 SDValue Hi =
9034 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
9035 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
9036 SDLoc(NewConv)));
9037 AddToWorklist(Hi.getNode());
9038 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
9039 AddToWorklist(FlipBit.getNode());
9040 }
9041 SDValue FlipBits =
9042 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
9043 AddToWorklist(FlipBits.getNode());
9044 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
9045 }
9046 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
9047 if (N0.getOpcode() == ISD::FNEG)
9048 return DAG.getNode(ISD::XOR, DL, VT,
9049 NewConv, DAG.getConstant(SignBit, DL, VT));
9050 assert(N0.getOpcode() == ISD::FABS)(static_cast <bool> (N0.getOpcode() == ISD::FABS) ? void
(0) : __assert_fail ("N0.getOpcode() == ISD::FABS", "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9050, __extension__ __PRETTY_FUNCTION__))
;
9051 return DAG.getNode(ISD::AND, DL, VT,
9052 NewConv, DAG.getConstant(~SignBit, DL, VT));
9053 }
9054
9055 // fold (bitconvert (fcopysign cst, x)) ->
9056 // (or (and (bitconvert x), sign), (and cst, (not sign)))
9057 // Note that we don't handle (copysign x, cst) because this can always be
9058 // folded to an fneg or fabs.
9059 //
9060 // For ppc_fp128:
9061 // fold (bitcast (fcopysign cst, x)) ->
9062 // flipbit = (and (extract_element
9063 // (xor (bitcast cst), (bitcast x)), 0),
9064 // signbit)
9065 // (xor (bitcast cst) (build_pair flipbit, flipbit))
9066 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
9067 isa<ConstantFPSDNode>(N0.getOperand(0)) &&
9068 VT.isInteger() && !VT.isVector()) {
9069 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
9070 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
9071 if (isTypeLegal(IntXVT)) {
9072 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
9073 AddToWorklist(X.getNode());
9074
9075 // If X has a different width than the result/lhs, sext it or truncate it.
9076 unsigned VTWidth = VT.getSizeInBits();
9077 if (OrigXWidth < VTWidth) {
9078 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
9079 AddToWorklist(X.getNode());
9080 } else if (OrigXWidth > VTWidth) {
9081 // To get the sign bit in the right place, we have to shift it right
9082 // before truncating.
9083 SDLoc DL(X);
9084 X = DAG.getNode(ISD::SRL, DL,
9085 X.getValueType(), X,
9086 DAG.getConstant(OrigXWidth-VTWidth, DL,
9087 X.getValueType()));
9088 AddToWorklist(X.getNode());
9089 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
9090 AddToWorklist(X.getNode());
9091 }
9092
9093 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
9094 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
9095 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
9096 AddToWorklist(Cst.getNode());
9097 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
9098 AddToWorklist(X.getNode());
9099 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
9100 AddToWorklist(XorResult.getNode());
9101 SDValue XorResult64 = DAG.getNode(
9102 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
9103 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
9104 SDLoc(XorResult)));
9105 AddToWorklist(XorResult64.getNode());
9106 SDValue FlipBit =
9107 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
9108 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
9109 AddToWorklist(FlipBit.getNode());
9110 SDValue FlipBits =
9111 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
9112 AddToWorklist(FlipBits.getNode());
9113 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
9114 }
9115 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
9116 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
9117 X, DAG.getConstant(SignBit, SDLoc(X), VT));
9118 AddToWorklist(X.getNode());
9119
9120 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
9121 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
9122 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
9123 AddToWorklist(Cst.getNode());
9124
9125 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
9126 }
9127 }
9128
9129 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
9130 if (N0.getOpcode() == ISD::BUILD_PAIR)
9131 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
9132 return CombineLD;
9133
9134 // Remove double bitcasts from shuffles - this is often a legacy of
9135 // XformToShuffleWithZero being used to combine bitmaskings (of
9136 // float vectors bitcast to integer vectors) into shuffles.
9137 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
9138 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
9139 N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
9140 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
9141 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
9142 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
9143
9144 // If operands are a bitcast, peek through if it casts the original VT.
9145 // If operands are a constant, just bitcast back to original VT.
9146 auto PeekThroughBitcast = [&](SDValue Op) {
9147 if (Op.getOpcode() == ISD::BITCAST &&
9148 Op.getOperand(0).getValueType() == VT)
9149 return SDValue(Op.getOperand(0));
9150 if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
9151 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
9152 return DAG.getBitcast(VT, Op);
9153 return SDValue();
9154 };
9155
9156 // FIXME: If either input vector is bitcast, try to convert the shuffle to
9157 // the result type of this bitcast. This would eliminate at least one
9158 // bitcast. See the transform in InstCombine.
9159 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
9160 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
9161 if (!(SV0 && SV1))
9162 return SDValue();
9163
9164 int MaskScale =
9165 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
9166 SmallVector<int, 8> NewMask;
9167 for (int M : SVN->getMask())
9168 for (int i = 0; i != MaskScale; ++i)
9169 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
9170
9171 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
9172 if (!LegalMask) {
9173 std::swap(SV0, SV1);
9174 ShuffleVectorSDNode::commuteMask(NewMask);
9175 LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
9176 }
9177
9178 if (LegalMask)
9179 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
9180 }
9181
9182 return SDValue();
9183}
9184
9185SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
9186 EVT VT = N->getValueType(0);
9187 return CombineConsecutiveLoads(N, VT);
9188}
9189
9190/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
9191/// operands. DstEltVT indicates the destination element value type.
9192SDValue DAGCombiner::
9193ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
9194 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
9195
9196 // If this is already the right type, we're done.
9197 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
9198
9199 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
9200 unsigned DstBitSize = DstEltVT.getSizeInBits();
9201
9202 // If this is a conversion of N elements of one type to N elements of another
9203 // type, convert each element. This handles FP<->INT cases.
9204 if (SrcBitSize == DstBitSize) {
9205 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
9206 BV->getValueType(0).getVectorNumElements());
9207
9208 // Due to the FP element handling below calling this routine recursively,
9209 // we can end up with a scalar-to-vector node here.
9210 if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
9211 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
9212 DAG.getBitcast(DstEltVT, BV->getOperand(0)));
9213
9214 SmallVector<SDValue, 8> Ops;
9215 for (SDValue Op : BV->op_values()) {
9216 // If the vector element type is not legal, the BUILD_VECTOR operands
9217 // are promoted and implicitly truncated. Make that explicit here.
9218 if (Op.getValueType() != SrcEltVT)
9219 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
9220 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
9221 AddToWorklist(Ops.back().getNode());
9222 }
9223 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
9224 }
9225
9226 // Otherwise, we're growing or shrinking the elements. To avoid having to
9227 // handle annoying details of growing/shrinking FP values, we convert them to
9228 // int first.
9229 if (SrcEltVT.isFloatingPoint()) {
9230 // Convert the input float vector to a int vector where the elements are the
9231 // same sizes.
9232 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
9233 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
9234 SrcEltVT = IntVT;
9235 }
9236
9237 // Now we know the input is an integer vector. If the output is a FP type,
9238 // convert to integer first, then to FP of the right size.
9239 if (DstEltVT.isFloatingPoint()) {
9240 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
9241 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
9242
9243 // Next, convert to FP elements of the same size.
9244 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
9245 }
9246
9247 SDLoc DL(BV);
9248
9249 // Okay, we know the src/dst types are both integers of differing types.
9250 // Handling growing first.
9251 assert(SrcEltVT.isInteger() && DstEltVT.isInteger())(static_cast <bool> (SrcEltVT.isInteger() && DstEltVT
.isInteger()) ? void (0) : __assert_fail ("SrcEltVT.isInteger() && DstEltVT.isInteger()"
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9251, __extension__ __PRETTY_FUNCTION__))
;
9252 if (SrcBitSize < DstBitSize) {
9253 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
9254
9255 SmallVector<SDValue, 8> Ops;
9256 for (unsigned i = 0, e = BV->getNumOperands(); i != e;
9257 i += NumInputsPerOutput) {
9258 bool isLE = DAG.getDataLayout().isLittleEndian();
9259 APInt NewBits = APInt(DstBitSize, 0);
9260 bool EltIsUndef = true;
9261 for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
9262 // Shift the previously computed bits over.
9263 NewBits <<= SrcBitSize;
9264 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
9265 if (Op.isUndef()) continue;
9266 EltIsUndef = false;
9267
9268 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
9269 zextOrTrunc(SrcBitSize).zext(DstBitSize);
9270 }
9271
9272 if (EltIsUndef)
9273 Ops.push_back(DAG.getUNDEF(DstEltVT));
9274 else
9275 Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
9276 }
9277
9278 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
9279 return DAG.getBuildVector(VT, DL, Ops);
9280 }
9281
9282 // Finally, this must be the case where we are shrinking elements: each input
9283 // turns into multiple outputs.
9284 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
9285 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
9286 NumOutputsPerInput*BV->getNumOperands());
9287 SmallVector<SDValue, 8> Ops;
9288
9289 for (const SDValue &Op : BV->op_values()) {
9290 if (Op.isUndef()) {
9291 Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
9292 continue;
9293 }
9294
9295 APInt OpVal = cast<ConstantSDNode>(Op)->
9296 getAPIntValue().zextOrTrunc(SrcBitSize);
9297
9298 for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
9299 APInt ThisVal = OpVal.trunc(DstBitSize);
9300 Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
9301 OpVal.lshrInPlace(DstBitSize);
9302 }
9303
9304 // For big endian targets, swap the order of the pieces of each element.
9305 if (DAG.getDataLayout().isBigEndian())
9306 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
9307 }
9308
9309 return DAG.getBuildVector(VT, DL, Ops);
9310}
9311
9312static bool isContractable(SDNode *N) {
9313 SDNodeFlags F = N->getFlags();
9314 return F.hasAllowContract() || F.hasUnsafeAlgebra();
9315}
9316
9317/// Try to perform FMA combining on a given FADD node.
9318SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
9319 SDValue N0 = N->getOperand(0);
9320 SDValue N1 = N->getOperand(1);
9321 EVT VT = N->getValueType(0);
9322 SDLoc SL(N);
9323
9324 const TargetOptions &Options = DAG.getTarget().Options;
9325
9326 // Floating-point multiply-add with intermediate rounding.
9327 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9328
9329 // Floating-point multiply-add without intermediate rounding.
9330 bool HasFMA =
9331 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9332 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9333
9334 // No valid opcode, do not combine.
9335 if (!HasFMAD && !HasFMA)
9336 return SDValue();
9337
9338 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9339 Options.UnsafeFPMath || HasFMAD);
9340 // If the addition is not contractable, do not combine.
9341 if (!AllowFusionGlobally && !isContractable(N))
9342 return SDValue();
9343
9344 const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9345 if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
9346 return SDValue();
9347
9348 // Always prefer FMAD to FMA for precision.
9349 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9350 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9351
9352 // Is the node an FMUL and contractable either due to global flags or
9353 // SDNodeFlags.
9354 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9355 if (N.getOpcode() != ISD::FMUL)
9356 return false;
9357 return AllowFusionGlobally || isContractable(N.getNode());
9358 };
9359 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
9360 // prefer to fold the multiply with fewer uses.
9361 if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
9362 if (N0.getNode()->use_size() > N1.getNode()->use_size())
9363 std::swap(N0, N1);
9364 }
9365
9366 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
9367 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
9368 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9369 N0.getOperand(0), N0.getOperand(1), N1);
9370 }
9371
9372 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
9373 // Note: Commutes FADD operands.
9374 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
9375 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9376 N1.getOperand(0), N1.getOperand(1), N0);
9377 }
9378
9379 // Look through FP_EXTEND nodes to do more combining.
9380
9381 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
9382 if (N0.getOpcode() == ISD::FP_EXTEND) {
9383 SDValue N00 = N0.getOperand(0);
9384 if (isContractableFMUL(N00) &&
9385 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9386 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9387 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9388 N00.getOperand(0)),
9389 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9390 N00.getOperand(1)), N1);
9391 }
9392 }
9393
9394 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
9395 // Note: Commutes FADD operands.
9396 if (N1.getOpcode() == ISD::FP_EXTEND) {
9397 SDValue N10 = N1.getOperand(0);
9398 if (isContractableFMUL(N10) &&
9399 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
9400 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9401 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9402 N10.getOperand(0)),
9403 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9404 N10.getOperand(1)), N0);
9405 }
9406 }
9407
9408 // More folding opportunities when target permits.
9409 if (Aggressive) {
9410 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
9411 // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9412 // are currently only supported on binary nodes.
9413 if (Options.UnsafeFPMath &&
9414 N0.getOpcode() == PreferredFusedOpcode &&
9415 N0.getOperand(2).getOpcode() == ISD::FMUL &&
9416 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
9417 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9418 N0.getOperand(0), N0.getOperand(1),
9419 DAG.getNode(PreferredFusedOpcode, SL, VT,
9420 N0.getOperand(2).getOperand(0),
9421 N0.getOperand(2).getOperand(1),
9422 N1));
9423 }
9424
9425 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
9426 // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9427 // are currently only supported on binary nodes.
9428 if (Options.UnsafeFPMath &&
9429 N1->getOpcode() == PreferredFusedOpcode &&
9430 N1.getOperand(2).getOpcode() == ISD::FMUL &&
9431 N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
9432 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9433 N1.getOperand(0), N1.getOperand(1),
9434 DAG.getNode(PreferredFusedOpcode, SL, VT,
9435 N1.getOperand(2).getOperand(0),
9436 N1.getOperand(2).getOperand(1),
9437 N0));
9438 }
9439
9440
9441 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
9442 // -> (fma x, y, (fma (fpext u), (fpext v), z))
9443 auto FoldFAddFMAFPExtFMul = [&] (
9444 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9445 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
9446 DAG.getNode(PreferredFusedOpcode, SL, VT,
9447 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9448 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9449 Z));
9450 };
9451 if (N0.getOpcode() == PreferredFusedOpcode) {
9452 SDValue N02 = N0.getOperand(2);
9453 if (N02.getOpcode() == ISD::FP_EXTEND) {
9454 SDValue N020 = N02.getOperand(0);
9455 if (isContractableFMUL(N020) &&
9456 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
9457 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
9458 N020.getOperand(0), N020.getOperand(1),
9459 N1);
9460 }
9461 }
9462 }
9463
9464 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
9465 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
9466 // FIXME: This turns two single-precision and one double-precision
9467 // operation into two double-precision operations, which might not be
9468 // interesting for all targets, especially GPUs.
9469 auto FoldFAddFPExtFMAFMul = [&] (
9470 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9471 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9472 DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
9473 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
9474 DAG.getNode(PreferredFusedOpcode, SL, VT,
9475 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9476 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9477 Z));
9478 };
9479 if (N0.getOpcode() == ISD::FP_EXTEND) {
9480 SDValue N00 = N0.getOperand(0);
9481 if (N00.getOpcode() == PreferredFusedOpcode) {
9482 SDValue N002 = N00.getOperand(2);
9483 if (isContractableFMUL(N002) &&
9484 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9485 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
9486 N002.getOperand(0), N002.getOperand(1),
9487 N1);
9488 }
9489 }
9490 }
9491
9492 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
9493 // -> (fma y, z, (fma (fpext u), (fpext v), x))
9494 if (N1.getOpcode() == PreferredFusedOpcode) {
9495 SDValue N12 = N1.getOperand(2);
9496 if (N12.getOpcode() == ISD::FP_EXTEND) {
9497 SDValue N120 = N12.getOperand(0);
9498 if (isContractableFMUL(N120) &&
9499 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
9500 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
9501 N120.getOperand(0), N120.getOperand(1),
9502 N0);
9503 }
9504 }
9505 }
9506
9507 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
9508 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
9509 // FIXME: This turns two single-precision and one double-precision
9510 // operation into two double-precision operations, which might not be
9511 // interesting for all targets, especially GPUs.
9512 if (N1.getOpcode() == ISD::FP_EXTEND) {
9513 SDValue N10 = N1.getOperand(0);
9514 if (N10.getOpcode() == PreferredFusedOpcode) {
9515 SDValue N102 = N10.getOperand(2);
9516 if (isContractableFMUL(N102) &&
9517 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
9518 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
9519 N102.getOperand(0), N102.getOperand(1),
9520 N0);
9521 }
9522 }
9523 }
9524 }
9525
9526 return SDValue();
9527}
9528
9529/// Try to perform FMA combining on a given FSUB node.
9530SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
9531 SDValue N0 = N->getOperand(0);
9532 SDValue N1 = N->getOperand(1);
9533 EVT VT = N->getValueType(0);
9534 SDLoc SL(N);
9535
9536 const TargetOptions &Options = DAG.getTarget().Options;
9537 // Floating-point multiply-add with intermediate rounding.
9538 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9539
9540 // Floating-point multiply-add without intermediate rounding.
9541 bool HasFMA =
9542 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9543 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9544
9545 // No valid opcode, do not combine.
9546 if (!HasFMAD && !HasFMA)
9547 return SDValue();
9548
9549 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9550 Options.UnsafeFPMath || HasFMAD);
9551 // If the subtraction is not contractable, do not combine.
9552 if (!AllowFusionGlobally && !isContractable(N))
9553 return SDValue();
9554
9555 const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9556 if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
9557 return SDValue();
9558
9559 // Always prefer FMAD to FMA for precision.
9560 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9561 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9562
9563 // Is the node an FMUL and contractable either due to global flags or
9564 // SDNodeFlags.
9565 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9566 if (N.getOpcode() != ISD::FMUL)
9567 return false;
9568 return AllowFusionGlobally || isContractable(N.getNode());
9569 };
9570
9571 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
9572 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
9573 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9574 N0.getOperand(0), N0.getOperand(1),
9575 DAG.getNode(ISD::FNEG, SL, VT, N1));
9576 }
9577
9578 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
9579 // Note: Commutes FSUB operands.
9580 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
9581 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9582 DAG.getNode(ISD::FNEG, SL, VT,
9583 N1.getOperand(0)),
9584 N1.getOperand(1), N0);
9585
9586 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
9587 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
9588 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
9589 SDValue N00 = N0.getOperand(0).getOperand(0);
9590 SDValue N01 = N0.getOperand(0).getOperand(1);
9591 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9592 DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
9593 DAG.getNode(ISD::FNEG, SL, VT, N1));
9594 }
9595
9596 // Look through FP_EXTEND nodes to do more combining.
9597
9598 // fold (fsub (fpext (fmul x, y)), z)
9599 // -> (fma (fpext x), (fpext y), (fneg z))
9600 if (N0.getOpcode() == ISD::FP_EXTEND) {
9601 SDValue N00 = N0.getOperand(0);
9602 if (isContractableFMUL(N00) &&
9603 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9604 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9605 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9606 N00.getOperand(0)),
9607 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9608 N00.getOperand(1)),
9609 DAG.getNode(ISD::FNEG, SL, VT, N1));
9610 }
9611 }
9612
9613 // fold (fsub x, (fpext (fmul y, z)))
9614 // -> (fma (fneg (fpext y)), (fpext z), x)
9615 // Note: Commutes FSUB operands.
9616 if (N1.getOpcode() == ISD::FP_EXTEND) {
9617 SDValue N10 = N1.getOperand(0);
9618 if (isContractableFMUL(N10) &&
9619 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
9620 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9621 DAG.getNode(ISD::FNEG, SL, VT,
9622 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9623 N10.getOperand(0))),
9624 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9625 N10.getOperand(1)),
9626 N0);
9627 }
9628 }
9629
9630 // fold (fsub (fpext (fneg (fmul, x, y))), z)
9631 // -> (fneg (fma (fpext x), (fpext y), z))
9632 // Note: This could be removed with appropriate canonicalization of the
9633 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9634 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9635 // from implementing the canonicalization in visitFSUB.
9636 if (N0.getOpcode() == ISD::FP_EXTEND) {
9637 SDValue N00 = N0.getOperand(0);
9638 if (N00.getOpcode() == ISD::FNEG) {
9639 SDValue N000 = N00.getOperand(0);
9640 if (isContractableFMUL(N000) &&
9641 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9642 return DAG.getNode(ISD::FNEG, SL, VT,
9643 DAG.getNode(PreferredFusedOpcode, SL, VT,
9644 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9645 N000.getOperand(0)),
9646 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9647 N000.getOperand(1)),
9648 N1));
9649 }
9650 }
9651 }
9652
9653 // fold (fsub (fneg (fpext (fmul, x, y))), z)
9654 // -> (fneg (fma (fpext x)), (fpext y), z)
9655 // Note: This could be removed with appropriate canonicalization of the
9656 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9657 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9658 // from implementing the canonicalization in visitFSUB.
9659 if (N0.getOpcode() == ISD::FNEG) {
9660 SDValue N00 = N0.getOperand(0);
9661 if (N00.getOpcode() == ISD::FP_EXTEND) {
9662 SDValue N000 = N00.getOperand(0);
9663 if (isContractableFMUL(N000) &&
9664 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
9665 return DAG.getNode(ISD::FNEG, SL, VT,
9666 DAG.getNode(PreferredFusedOpcode, SL, VT,
9667 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9668 N000.getOperand(0)),
9669 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9670 N000.getOperand(1)),
9671 N1));
9672 }
9673 }
9674 }
9675
9676 // More folding opportunities when target permits.
9677 if (Aggressive) {
9678 // fold (fsub (fma x, y, (fmul u, v)), z)
9679 // -> (fma x, y (fma u, v, (fneg z)))
9680 // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9681 // are currently only supported on binary nodes.
9682 if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
9683 isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
9684 N0.getOperand(2)->hasOneUse()) {
9685 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9686 N0.getOperand(0), N0.getOperand(1),
9687 DAG.getNode(PreferredFusedOpcode, SL, VT,
9688 N0.getOperand(2).getOperand(0),
9689 N0.getOperand(2).getOperand(1),
9690 DAG.getNode(ISD::FNEG, SL, VT,
9691 N1)));
9692 }
9693
9694 // fold (fsub x, (fma y, z, (fmul u, v)))
9695 // -> (fma (fneg y), z, (fma (fneg u), v, x))
9696 // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9697 // are currently only supported on binary nodes.
9698 if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
9699 isContractableFMUL(N1.getOperand(2))) {
9700 SDValue N20 = N1.getOperand(2).getOperand(0);
9701 SDValue N21 = N1.getOperand(2).getOperand(1);
9702 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9703 DAG.getNode(ISD::FNEG, SL, VT,
9704 N1.getOperand(0)),
9705 N1.getOperand(1),
9706 DAG.getNode(PreferredFusedOpcode, SL, VT,
9707 DAG.getNode(ISD::FNEG, SL, VT, N20),
9708
9709 N21, N0));
9710 }
9711
9712
9713 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
9714 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
9715 if (N0.getOpcode() == PreferredFusedOpcode) {
9716 SDValue N02 = N0.getOperand(2);
9717 if (N02.getOpcode() == ISD::FP_EXTEND) {
9718 SDValue N020 = N02.getOperand(0);
9719 if (isContractableFMUL(N020) &&
9720 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
9721 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9722 N0.getOperand(0), N0.getOperand(1),
9723 DAG.getNode(PreferredFusedOpcode, SL, VT,
9724 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9725 N020.getOperand(0)),
9726 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9727 N020.getOperand(1)),
9728 DAG.getNode(ISD::FNEG, SL, VT,
9729 N1)));
9730 }
9731 }
9732 }
9733
9734 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
9735 // -> (fma (fpext x), (fpext y),
9736 // (fma (fpext u), (fpext v), (fneg z)))
9737 // FIXME: This turns two single-precision and one double-precision
9738 // operation into two double-precision operations, which might not be
9739 // interesting for all targets, especially GPUs.
9740 if (N0.getOpcode() == ISD::FP_EXTEND) {
9741 SDValue N00 = N0.getOperand(0);
9742 if (N00.getOpcode() == PreferredFusedOpcode) {
9743 SDValue N002 = N00.getOperand(2);
9744 if (isContractableFMUL(N002) &&
9745 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9746 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9747 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9748 N00.getOperand(0)),
9749 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9750 N00.getOperand(1)),
9751 DAG.getNode(PreferredFusedOpcode, SL, VT,
9752 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9753 N002.getOperand(0)),
9754 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9755 N002.getOperand(1)),
9756 DAG.getNode(ISD::FNEG, SL, VT,
9757 N1)));
9758 }
9759 }
9760 }
9761
9762 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
9763 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
9764 if (N1.getOpcode() == PreferredFusedOpcode &&
9765 N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
9766 SDValue N120 = N1.getOperand(2).getOperand(0);
9767 if (isContractableFMUL(N120) &&
9768 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
9769 SDValue N1200 = N120.getOperand(0);
9770 SDValue N1201 = N120.getOperand(1);
9771 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9772 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
9773 N1.getOperand(1),
9774 DAG.getNode(PreferredFusedOpcode, SL, VT,
9775 DAG.getNode(ISD::FNEG, SL, VT,
9776 DAG.getNode(ISD::FP_EXTEND, SL,
9777 VT, N1200)),
9778 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9779 N1201),
9780 N0));
9781 }
9782 }
9783
9784 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
9785 // -> (fma (fneg (fpext y)), (fpext z),
9786 // (fma (fneg (fpext u)), (fpext v), x))
9787 // FIXME: This turns two single-precision and one double-precision
9788 // operation into two double-precision operations, which might not be
9789 // interesting for all targets, especially GPUs.
9790 if (N1.getOpcode() == ISD::FP_EXTEND &&
9791 N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
9792 SDValue CvtSrc = N1.getOperand(0);
9793 SDValue N100 = CvtSrc.getOperand(0);
9794 SDValue N101 = CvtSrc.getOperand(1);
9795 SDValue N102 = CvtSrc.getOperand(2);
9796 if (isContractableFMUL(N102) &&
9797 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
9798 SDValue N1020 = N102.getOperand(0);
9799 SDValue N1021 = N102.getOperand(1);
9800 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9801 DAG.getNode(ISD::FNEG, SL, VT,
9802 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9803 N100)),
9804 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
9805 DAG.getNode(PreferredFusedOpcode, SL, VT,
9806 DAG.getNode(ISD::FNEG, SL, VT,
9807 DAG.getNode(ISD::FP_EXTEND, SL,
9808 VT, N1020)),
9809 DAG.getNode(ISD::FP_EXTEND, SL, VT,
9810 N1021),
9811 N0));
9812 }
9813 }
9814 }
9815
9816 return SDValue();
9817}
9818
9819/// Try to perform FMA combining on a given FMUL node based on the distributive
9820/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
9821/// subtraction instead of addition).
9822SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
9823 SDValue N0 = N->getOperand(0);
9824 SDValue N1 = N->getOperand(1);
9825 EVT VT = N->getValueType(0);
9826 SDLoc SL(N);
9827
9828 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation")(static_cast <bool> (N->getOpcode() == ISD::FMUL &&
"Expected FMUL Operation") ? void (0) : __assert_fail ("N->getOpcode() == ISD::FMUL && \"Expected FMUL Operation\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9828, __extension__ __PRETTY_FUNCTION__))
;
9829
9830 const TargetOptions &Options = DAG.getTarget().Options;
9831
9832 // The transforms below are incorrect when x == 0 and y == inf, because the
9833 // intermediate multiplication produces a nan.
9834 if (!Options.NoInfsFPMath)
9835 return SDValue();
9836
9837 // Floating-point multiply-add without intermediate rounding.
9838 bool HasFMA =
9839 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
9840 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9841 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9842
9843 // Floating-point multiply-add with intermediate rounding. This can result
9844 // in a less precise result due to the changed rounding order.
9845 bool HasFMAD = Options.UnsafeFPMath &&
9846 (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9847
9848 // No valid opcode, do not combine.
9849 if (!HasFMAD && !HasFMA)
9850 return SDValue();
9851
9852 // Always prefer FMAD to FMA for precision.
9853 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9854 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9855
9856 // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
9857 // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
9858 auto FuseFADD = [&](SDValue X, SDValue Y) {
9859 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
9860 auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9861 if (XC1 && XC1->isExactlyValue(+1.0))
9862 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9863 if (XC1 && XC1->isExactlyValue(-1.0))
9864 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9865 DAG.getNode(ISD::FNEG, SL, VT, Y));
9866 }
9867 return SDValue();
9868 };
9869
9870 if (SDValue FMA = FuseFADD(N0, N1))
9871 return FMA;
9872 if (SDValue FMA = FuseFADD(N1, N0))
9873 return FMA;
9874
9875 // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
9876 // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
9877 // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
9878 // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
9879 auto FuseFSUB = [&](SDValue X, SDValue Y) {
9880 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
9881 auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
9882 if (XC0 && XC0->isExactlyValue(+1.0))
9883 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9884 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9885 Y);
9886 if (XC0 && XC0->isExactlyValue(-1.0))
9887 return DAG.getNode(PreferredFusedOpcode, SL, VT,
9888 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9889 DAG.getNode(ISD::FNEG, SL, VT, Y));
9890
9891 auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9892 if (XC1 && XC1->isExactlyValue(+1.0))
9893 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9894 DAG.getNode(ISD::FNEG, SL, VT, Y));
9895 if (XC1 && XC1->isExactlyValue(-1.0))
9896 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9897 }
9898 return SDValue();
9899 };
9900
9901 if (SDValue FMA = FuseFSUB(N0, N1))
9902 return FMA;
9903 if (SDValue FMA = FuseFSUB(N1, N0))
9904 return FMA;
9905
9906 return SDValue();
9907}
9908
9909static bool isFMulNegTwo(SDValue &N) {
9910 if (N.getOpcode() != ISD::FMUL)
9911 return false;
9912 if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
9913 return CFP->isExactlyValue(-2.0);
9914 return false;
9915}
9916
9917SDValue DAGCombiner::visitFADD(SDNode *N) {
9918 SDValue N0 = N->getOperand(0);
9919 SDValue N1 = N->getOperand(1);
9920 bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
9921 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
9922 EVT VT = N->getValueType(0);
9923 SDLoc DL(N);
9924 const TargetOptions &Options = DAG.getTarget().Options;
9925 const SDNodeFlags Flags = N->getFlags();
9926
9927 // fold vector ops
9928 if (VT.isVector())
9929 if (SDValue FoldedVOp = SimplifyVBinOp(N))
9930 return FoldedVOp;
9931
9932 // fold (fadd c1, c2) -> c1 + c2
9933 if (N0CFP && N1CFP)
9934 return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
9935
9936 // canonicalize constant to RHS
9937 if (N0CFP && !N1CFP)
9938 return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
9939
9940 if (SDValue NewSel = foldBinOpIntoSelect(N))
9941 return NewSel;
9942
9943 // fold (fadd A, (fneg B)) -> (fsub A, B)
9944 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9945 isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
9946 return DAG.getNode(ISD::FSUB, DL, VT, N0,
9947 GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9948
9949 // fold (fadd (fneg A), B) -> (fsub B, A)
9950 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9951 isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
9952 return DAG.getNode(ISD::FSUB, DL, VT, N1,
9953 GetNegatedExpression(N0, DAG, LegalOperations), Flags);
9954
9955 // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
9956 // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
9957 if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
9958 (isFMulNegTwo(N1) && N1.hasOneUse())) {
9959 bool N1IsFMul = isFMulNegTwo(N1);
9960 SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
9961 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
9962 return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
9963 }
9964
9965 // FIXME: Auto-upgrade the target/function-level option.
9966 if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
9967 // fold (fadd A, 0) -> A
9968 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
9969 if (N1C->isZero())
9970 return N0;
9971 }
9972
9973 // If 'unsafe math' is enabled, fold lots of things.
9974 if (Options.UnsafeFPMath) {
9975 // No FP constant should be created after legalization as Instruction
9976 // Selection pass has a hard time dealing with FP constants.
9977 bool AllowNewConst = (Level < AfterLegalizeDAG);
9978
9979 // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
9980 if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
9981 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
9982 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
9983 DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
9984 Flags),
9985 Flags);
9986
9987 // If allowed, fold (fadd (fneg x), x) -> 0.0
9988 if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
9989 return DAG.getConstantFP(0.0, DL, VT);
9990
9991 // If allowed, fold (fadd x, (fneg x)) -> 0.0
9992 if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
9993 return DAG.getConstantFP(0.0, DL, VT);
9994
9995 // We can fold chains of FADD's of the same value into multiplications.
9996 // This transform is not safe in general because we are reducing the number
9997 // of rounding steps.
9998 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
9999 if (N0.getOpcode() == ISD::FMUL) {
10000 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
10001 bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
10002
10003 // (fadd (fmul x, c), x) -> (fmul x, c+1)
10004 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
10005 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
10006 DAG.getConstantFP(1.0, DL, VT), Flags);
10007 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
10008 }
10009
10010 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
10011 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
10012 N1.getOperand(0) == N1.getOperand(1) &&
10013 N0.getOperand(0) == N1.getOperand(0)) {
10014 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
10015 DAG.getConstantFP(2.0, DL, VT), Flags);
10016 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
10017 }
10018 }
10019
10020 if (N1.getOpcode() == ISD::FMUL) {
10021 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
10022 bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
10023
10024 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
10025 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
10026 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
10027 DAG.getConstantFP(1.0, DL, VT), Flags);
10028 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
10029 }
10030
10031 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
10032 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
10033 N0.getOperand(0) == N0.getOperand(1) &&
10034 N1.getOperand(0) == N0.getOperand(0)) {
10035 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
10036 DAG.getConstantFP(2.0, DL, VT), Flags);
10037 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
10038 }
10039 }
10040
10041 if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
10042 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
10043 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
10044 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
10045 (N0.getOperand(0) == N1)) {
10046 return DAG.getNode(ISD::FMUL, DL, VT,
10047 N1, DAG.getConstantFP(3.0, DL, VT), Flags);
10048 }
10049 }
10050
10051 if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
10052 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
10053 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
10054 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
10055 N1.getOperand(0) == N0) {
10056 return DAG.getNode(ISD::FMUL, DL, VT,
10057 N0, DAG.getConstantFP(3.0, DL, VT), Flags);
10058 }
10059 }
10060
10061 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
10062 if (AllowNewConst &&
10063 N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
10064 N0.getOperand(0) == N0.getOperand(1) &&
10065 N1.getOperand(0) == N1.getOperand(1) &&
10066 N0.getOperand(0) == N1.getOperand(0)) {
10067 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
10068 DAG.getConstantFP(4.0, DL, VT), Flags);
10069 }
10070 }
10071 } // enable-unsafe-fp-math
10072
10073 // FADD -> FMA combines:
10074 if (SDValue Fused = visitFADDForFMACombine(N)) {
10075 AddToWorklist(Fused.getNode());
10076 return Fused;
10077 }
10078 return SDValue();
10079}
10080
10081SDValue DAGCombiner::visitFSUB(SDNode *N) {
10082 SDValue N0 = N->getOperand(0);
10083 SDValue N1 = N->getOperand(1);
10084 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10085 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10086 EVT VT = N->getValueType(0);
10087 SDLoc DL(N);
10088 const TargetOptions &Options = DAG.getTarget().Options;
10089 const SDNodeFlags Flags = N->getFlags();
10090
10091 // fold vector ops
10092 if (VT.isVector())
10093 if (SDValue FoldedVOp = SimplifyVBinOp(N))
10094 return FoldedVOp;
10095
10096 // fold (fsub c1, c2) -> c1-c2
10097 if (N0CFP && N1CFP)
10098 return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
10099
10100 if (SDValue NewSel = foldBinOpIntoSelect(N))
10101 return NewSel;
10102
10103 // fold (fsub A, (fneg B)) -> (fadd A, B)
10104 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
10105 return DAG.getNode(ISD::FADD, DL, VT, N0,
10106 GetNegatedExpression(N1, DAG, LegalOperations), Flags);
10107
10108 // FIXME: Auto-upgrade the target/function-level option.
10109 if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
10110 // (fsub 0, B) -> -B
10111 if (N0CFP && N0CFP->isZero()) {
10112 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
10113 return GetNegatedExpression(N1, DAG, LegalOperations);
10114 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10115 return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
10116 }
10117 }
10118
10119 // If 'unsafe math' is enabled, fold lots of things.
10120 if (Options.UnsafeFPMath) {
10121 // (fsub A, 0) -> A
10122 if (N1CFP && N1CFP->isZero())
10123 return N0;
10124
10125 // (fsub x, x) -> 0.0
10126 if (N0 == N1)
10127 return DAG.getConstantFP(0.0f, DL, VT);
10128
10129 // (fsub x, (fadd x, y)) -> (fneg y)
10130 // (fsub x, (fadd y, x)) -> (fneg y)
10131 if (N1.getOpcode() == ISD::FADD) {
10132 SDValue N10 = N1->getOperand(0);
10133 SDValue N11 = N1->getOperand(1);
10134
10135 if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
10136 return GetNegatedExpression(N11, DAG, LegalOperations);
10137
10138 if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
10139 return GetNegatedExpression(N10, DAG, LegalOperations);
10140 }
10141 }
10142
10143 // FSUB -> FMA combines:
10144 if (SDValue Fused = visitFSUBForFMACombine(N)) {
10145 AddToWorklist(Fused.getNode());
10146 return Fused;
10147 }
10148
10149 return SDValue();
10150}
10151
10152SDValue DAGCombiner::visitFMUL(SDNode *N) {
10153 SDValue N0 = N->getOperand(0);
10154 SDValue N1 = N->getOperand(1);
10155 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10156 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10157 EVT VT = N->getValueType(0);
10158 SDLoc DL(N);
10159 const TargetOptions &Options = DAG.getTarget().Options;
10160 const SDNodeFlags Flags = N->getFlags();
10161
10162 // fold vector ops
10163 if (VT.isVector()) {
10164 // This just handles C1 * C2 for vectors. Other vector folds are below.
10165 if (SDValue FoldedVOp = SimplifyVBinOp(N))
10166 return FoldedVOp;
10167 }
10168
10169 // fold (fmul c1, c2) -> c1*c2
10170 if (N0CFP && N1CFP)
10171 return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
10172
10173 // canonicalize constant to RHS
10174 if (isConstantFPBuildVectorOrConstantFP(N0) &&
10175 !isConstantFPBuildVectorOrConstantFP(N1))
10176 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
10177
10178 // fold (fmul A, 1.0) -> A
10179 if (N1CFP && N1CFP->isExactlyValue(1.0))
10180 return N0;
10181
10182 if (SDValue NewSel = foldBinOpIntoSelect(N))
10183 return NewSel;
10184
10185 if (Options.UnsafeFPMath) {
10186 // fold (fmul A, 0) -> 0
10187 if (N1CFP && N1CFP->isZero())
10188 return N1;
10189
10190 // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
10191 if (N0.getOpcode() == ISD::FMUL) {
10192 // Fold scalars or any vector constants (not just splats).
10193 // This fold is done in general by InstCombine, but extra fmul insts
10194 // may have been generated during lowering.
10195 SDValue N00 = N0.getOperand(0);
10196 SDValue N01 = N0.getOperand(1);
10197 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
10198 auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
10199 auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
10200
10201 // Check 1: Make sure that the first operand of the inner multiply is NOT
10202 // a constant. Otherwise, we may induce infinite looping.
10203 if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
10204 // Check 2: Make sure that the second operand of the inner multiply and
10205 // the second operand of the outer multiply are constants.
10206 if ((N1CFP && isConstOrConstSplatFP(N01)) ||
10207 (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
10208 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
10209 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
10210 }
10211 }
10212 }
10213
10214 // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
10215 // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
10216 // during an early run of DAGCombiner can prevent folding with fmuls
10217 // inserted during lowering.
10218 if (N0.getOpcode() == ISD::FADD &&
10219 (N0.getOperand(0) == N0.getOperand(1)) &&
10220 N0.hasOneUse()) {
10221 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
10222 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
10223 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
10224 }
10225 }
10226
10227 // fold (fmul X, 2.0) -> (fadd X, X)
10228 if (N1CFP && N1CFP->isExactlyValue(+2.0))
10229 return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
10230
10231 // fold (fmul X, -1.0) -> (fneg X)
10232 if (N1CFP && N1CFP->isExactlyValue(-1.0))
10233 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10234 return DAG.getNode(ISD::FNEG, DL, VT, N0);
10235
10236 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
10237 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
10238 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
10239 // Both can be negated for free, check to see if at least one is cheaper
10240 // negated.
10241 if (LHSNeg == 2 || RHSNeg == 2)
10242 return DAG.getNode(ISD::FMUL, DL, VT,
10243 GetNegatedExpression(N0, DAG, LegalOperations),
10244 GetNegatedExpression(N1, DAG, LegalOperations),
10245 Flags);
10246 }
10247 }
10248
10249 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
10250 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
10251 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
10252 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
10253 TLI.isOperationLegal(ISD::FABS, VT)) {
10254 SDValue Select = N0, X = N1;
10255 if (Select.getOpcode() != ISD::SELECT)
10256 std::swap(Select, X);
10257
10258 SDValue Cond = Select.getOperand(0);
10259 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
10260 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
10261
10262 if (TrueOpnd && FalseOpnd &&
10263 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
10264 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
10265 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
10266 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
10267 switch (CC) {
10268 default: break;
10269 case ISD::SETOLT:
10270 case ISD::SETULT:
10271 case ISD::SETOLE:
10272 case ISD::SETULE:
10273 case ISD::SETLT:
10274 case ISD::SETLE:
10275 std::swap(TrueOpnd, FalseOpnd);
10276 LLVM_FALLTHROUGH[[clang::fallthrough]];
10277 case ISD::SETOGT:
10278 case ISD::SETUGT:
10279 case ISD::SETOGE:
10280 case ISD::SETUGE:
10281 case ISD::SETGT:
10282 case ISD::SETGE:
10283 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
10284 TLI.isOperationLegal(ISD::FNEG, VT))
10285 return DAG.getNode(ISD::FNEG, DL, VT,
10286 DAG.getNode(ISD::FABS, DL, VT, X));
10287 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
10288 return DAG.getNode(ISD::FABS, DL, VT, X);
10289
10290 break;
10291 }
10292 }
10293 }
10294
10295 // FMUL -> FMA combines:
10296 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
10297 AddToWorklist(Fused.getNode());
10298 return Fused;
10299 }
10300
10301 return SDValue();
10302}
10303
10304SDValue DAGCombiner::visitFMA(SDNode *N) {
10305 SDValue N0 = N->getOperand(0);
10306 SDValue N1 = N->getOperand(1);
10307 SDValue N2 = N->getOperand(2);
10308 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10309 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10310 EVT VT = N->getValueType(0);
10311 SDLoc DL(N);
10312 const TargetOptions &Options = DAG.getTarget().Options;
10313
10314 // Constant fold FMA.
10315 if (isa<ConstantFPSDNode>(N0) &&
10316 isa<ConstantFPSDNode>(N1) &&
10317 isa<ConstantFPSDNode>(N2)) {
10318 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
10319 }
10320
10321 if (Options.UnsafeFPMath) {
10322 if (N0CFP && N0CFP->isZero())
10323 return N2;
10324 if (N1CFP && N1CFP->isZero())
10325 return N2;
10326 }
10327 // TODO: The FMA node should have flags that propagate to these nodes.
10328 if (N0CFP && N0CFP->isExactlyValue(1.0))
10329 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
10330 if (N1CFP && N1CFP->isExactlyValue(1.0))
10331 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
10332
10333 // Canonicalize (fma c, x, y) -> (fma x, c, y)
10334 if (isConstantFPBuildVectorOrConstantFP(N0) &&
10335 !isConstantFPBuildVectorOrConstantFP(N1))
10336 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
10337
10338 // TODO: FMA nodes should have flags that propagate to the created nodes.
10339 // For now, create a Flags object for use with all unsafe math transforms.
10340 SDNodeFlags Flags;
10341 Flags.setUnsafeAlgebra(true);
10342
10343 if (Options.UnsafeFPMath) {
10344 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
10345 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
10346 isConstantFPBuildVectorOrConstantFP(N1) &&
10347 isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
10348 return DAG.getNode(ISD::FMUL, DL, VT, N0,
10349 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
10350 Flags), Flags);
10351 }
10352
10353 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
10354 if (N0.getOpcode() == ISD::FMUL &&
10355 isConstantFPBuildVectorOrConstantFP(N1) &&
10356 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
10357 return DAG.getNode(ISD::FMA, DL, VT,
10358 N0.getOperand(0),
10359 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
10360 Flags),
10361 N2);
10362 }
10363 }
10364
10365 // (fma x, 1, y) -> (fadd x, y)
10366 // (fma x, -1, y) -> (fadd (fneg x), y)
10367 if (N1CFP) {
10368 if (N1CFP->isExactlyValue(1.0))
10369 // TODO: The FMA node should have flags that propagate to this node.
10370 return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
10371
10372 if (N1CFP->isExactlyValue(-1.0) &&
10373 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
10374 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
10375 AddToWorklist(RHSNeg.getNode());
10376 // TODO: The FMA node should have flags that propagate to this node.
10377 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
10378 }
10379
10380 // fma (fneg x), K, y -> fma x -K, y
10381 if (N0.getOpcode() == ISD::FNEG &&
10382 (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
10383 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
10384 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
10385 DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
10386 }
10387 }
10388
10389 if (Options.UnsafeFPMath) {
10390 // (fma x, c, x) -> (fmul x, (c+1))
10391 if (N1CFP && N0 == N2) {
10392 return DAG.getNode(ISD::FMUL, DL, VT, N0,
10393 DAG.getNode(ISD::FADD, DL, VT, N1,
10394 DAG.getConstantFP(1.0, DL, VT), Flags),
10395 Flags);
10396 }
10397
10398 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
10399 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
10400 return DAG.getNode(ISD::FMUL, DL, VT, N0,
10401 DAG.getNode(ISD::FADD, DL, VT, N1,
10402 DAG.getConstantFP(-1.0, DL, VT), Flags),
10403 Flags);
10404 }
10405 }
10406
10407 return SDValue();
10408}
10409
10410// Combine multiple FDIVs with the same divisor into multiple FMULs by the
10411// reciprocal.
10412// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
10413// Notice that this is not always beneficial. One reason is different targets
10414// may have different costs for FDIV and FMUL, so sometimes the cost of two
10415// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
10416// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
10417SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
10418 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
10419 const SDNodeFlags Flags = N->getFlags();
10420 if (!UnsafeMath && !Flags.hasAllowReciprocal())
10421 return SDValue();
10422
10423 // Skip if current node is a reciprocal.
10424 SDValue N0 = N->getOperand(0);
10425 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10426 if (N0CFP && N0CFP->isExactlyValue(1.0))
10427 return SDValue();
10428
10429 // Exit early if the target does not want this transform or if there can't
10430 // possibly be enough uses of the divisor to make the transform worthwhile.
10431 SDValue N1 = N->getOperand(1);
10432 unsigned MinUses = TLI.combineRepeatedFPDivisors();
10433 if (!MinUses || N1->use_size() < MinUses)
10434 return SDValue();
10435
10436 // Find all FDIV users of the same divisor.
10437 // Use a set because duplicates may be present in the user list.
10438 SetVector<SDNode *> Users;
10439 for (auto *U : N1->uses()) {
10440 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
10441 // This division is eligible for optimization only if global unsafe math
10442 // is enabled or if this division allows reciprocal formation.
10443 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
10444 Users.insert(U);
10445 }
10446 }
10447
10448 // Now that we have the actual number of divisor uses, make sure it meets
10449 // the minimum threshold specified by the target.
10450 if (Users.size() < MinUses)
10451 return SDValue();
10452
10453 EVT VT = N->getValueType(0);
10454 SDLoc DL(N);
10455 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
10456 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
10457
10458 // Dividend / Divisor -> Dividend * Reciprocal
10459 for (auto *U : Users) {
10460 SDValue Dividend = U->getOperand(0);
10461 if (Dividend != FPOne) {
10462 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
10463 Reciprocal, Flags);
10464 CombineTo(U, NewNode);
10465 } else if (U != Reciprocal.getNode()) {
10466 // In the absence of fast-math-flags, this user node is always the
10467 // same node as Reciprocal, but with FMF they may be different nodes.
10468 CombineTo(U, Reciprocal);
10469 }
10470 }
10471 return SDValue(N, 0); // N was replaced.
10472}
10473
10474SDValue DAGCombiner::visitFDIV(SDNode *N) {
10475 SDValue N0 = N->getOperand(0);
10476 SDValue N1 = N->getOperand(1);
10477 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10478 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10479 EVT VT = N->getValueType(0);
10480 SDLoc DL(N);
10481 const TargetOptions &Options = DAG.getTarget().Options;
10482 SDNodeFlags Flags = N->getFlags();
10483
10484 // fold vector ops
10485 if (VT.isVector())
10486 if (SDValue FoldedVOp = SimplifyVBinOp(N))
10487 return FoldedVOp;
10488
10489 // fold (fdiv c1, c2) -> c1/c2
10490 if (N0CFP && N1CFP)
10491 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
10492
10493 if (SDValue NewSel = foldBinOpIntoSelect(N))
10494 return NewSel;
10495
10496 if (Options.UnsafeFPMath) {
10497 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
10498 if (N1CFP) {
10499 // Compute the reciprocal 1.0 / c2.
10500 const APFloat &N1APF = N1CFP->getValueAPF();
10501 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
10502 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
10503 // Only do the transform if the reciprocal is a legal fp immediate that
10504 // isn't too nasty (eg NaN, denormal, ...).
10505 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
10506 (!LegalOperations ||
10507 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
10508 // backend)... we should handle this gracefully after Legalize.
10509 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
10510 TLI.isOperationLegal(ISD::ConstantFP, VT) ||
10511 TLI.isFPImmLegal(Recip, VT)))
10512 return DAG.getNode(ISD::FMUL, DL, VT, N0,
10513 DAG.getConstantFP(Recip, DL, VT), Flags);
10514 }
10515
10516 // If this FDIV is part of a reciprocal square root, it may be folded
10517 // into a target-specific square root estimate instruction.
10518 if (N1.getOpcode() == ISD::FSQRT) {
10519 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
10520 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10521 }
10522 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
10523 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10524 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10525 Flags)) {
10526 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
10527 AddToWorklist(RV.getNode());
10528 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10529 }
10530 } else if (N1.getOpcode() == ISD::FP_ROUND &&
10531 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10532 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10533 Flags)) {
10534 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
10535 AddToWorklist(RV.getNode());
10536 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10537 }
10538 } else if (N1.getOpcode() == ISD::FMUL) {
10539 // Look through an FMUL. Even though this won't remove the FDIV directly,
10540 // it's still worthwhile to get rid of the FSQRT if possible.
10541 SDValue SqrtOp;
10542 SDValue OtherOp;
10543 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10544 SqrtOp = N1.getOperand(0);
10545 OtherOp = N1.getOperand(1);
10546 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
10547 SqrtOp = N1.getOperand(1);
10548 OtherOp = N1.getOperand(0);
10549 }
10550 if (SqrtOp.getNode()) {
10551 // We found a FSQRT, so try to make this fold:
10552 // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
10553 if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
10554 RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
10555 AddToWorklist(RV.getNode());
10556 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10557 }
10558 }
10559 }
10560
10561 // Fold into a reciprocal estimate and multiply instead of a real divide.
10562 if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
10563 AddToWorklist(RV.getNode());
10564 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10565 }
10566 }
10567
10568 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
10569 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
10570 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
10571 // Both can be negated for free, check to see if at least one is cheaper
10572 // negated.
10573 if (LHSNeg == 2 || RHSNeg == 2)
10574 return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
10575 GetNegatedExpression(N0, DAG, LegalOperations),
10576 GetNegatedExpression(N1, DAG, LegalOperations),
10577 Flags);
10578 }
10579 }
10580
10581 if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
10582 return CombineRepeatedDivisors;
10583
10584 return SDValue();
10585}
10586
10587SDValue DAGCombiner::visitFREM(SDNode *N) {
10588 SDValue N0 = N->getOperand(0);
10589 SDValue N1 = N->getOperand(1);
10590 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10591 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10592 EVT VT = N->getValueType(0);
10593
10594 // fold (frem c1, c2) -> fmod(c1,c2)
10595 if (N0CFP && N1CFP)
10596 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
10597
10598 if (SDValue NewSel = foldBinOpIntoSelect(N))
10599 return NewSel;
10600
10601 return SDValue();
10602}
10603
10604SDValue DAGCombiner::visitFSQRT(SDNode *N) {
10605 if (!DAG.getTarget().Options.UnsafeFPMath)
10606 return SDValue();
10607
10608 SDValue N0 = N->getOperand(0);
10609 if (TLI.isFsqrtCheap(N0, DAG))
10610 return SDValue();
10611
10612 // TODO: FSQRT nodes should have flags that propagate to the created nodes.
10613 // For now, create a Flags object for use with all unsafe math transforms.
10614 SDNodeFlags Flags;
10615 Flags.setUnsafeAlgebra(true);
10616 return buildSqrtEstimate(N0, Flags);
10617}
10618
10619/// copysign(x, fp_extend(y)) -> copysign(x, y)
10620/// copysign(x, fp_round(y)) -> copysign(x, y)
10621static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
10622 SDValue N1 = N->getOperand(1);
10623 if ((N1.getOpcode() == ISD::FP_EXTEND ||
10624 N1.getOpcode() == ISD::FP_ROUND)) {
10625 // Do not optimize out type conversion of f128 type yet.
10626 // For some targets like x86_64, configuration is changed to keep one f128
10627 // value in one SSE register, but instruction selection cannot handle
10628 // FCOPYSIGN on SSE registers yet.
10629 EVT N1VT = N1->getValueType(0);
10630 EVT N1Op0VT = N1->getOperand(0).getValueType();
10631 return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
10632 }
10633 return false;
10634}
10635
10636SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
10637 SDValue N0 = N->getOperand(0);
10638 SDValue N1 = N->getOperand(1);
10639 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10640 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10641 EVT VT = N->getValueType(0);
10642
10643 if (N0CFP && N1CFP) // Constant fold
10644 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
10645
10646 if (N1CFP) {
10647 const APFloat &V = N1CFP->getValueAPF();
10648 // copysign(x, c1) -> fabs(x) iff ispos(c1)
10649 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
10650 if (!V.isNegative()) {
10651 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
10652 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10653 } else {
10654 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10655 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
10656 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
10657 }
10658 }
10659
10660 // copysign(fabs(x), y) -> copysign(x, y)
10661 // copysign(fneg(x), y) -> copysign(x, y)
10662 // copysign(copysign(x,z), y) -> copysign(x, y)
10663 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
10664 N0.getOpcode() == ISD::FCOPYSIGN)
10665 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
10666
10667 // copysign(x, abs(y)) -> abs(x)
10668 if (N1.getOpcode() == ISD::FABS)
10669 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10670
10671 // copysign(x, copysign(y,z)) -> copysign(x, z)
10672 if (N1.getOpcode() == ISD::FCOPYSIGN)
10673 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
10674
10675 // copysign(x, fp_extend(y)) -> copysign(x, y)
10676 // copysign(x, fp_round(y)) -> copysign(x, y)
10677 if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
10678 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
10679
10680 return SDValue();
10681}
10682
10683SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
10684 SDValue N0 = N->getOperand(0);
10685 EVT VT = N->getValueType(0);
10686 EVT OpVT = N0.getValueType();
10687
10688 // fold (sint_to_fp c1) -> c1fp
10689 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10690 // ...but only if the target supports immediate floating-point values
10691 (!LegalOperations ||
10692 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
10693 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10694
10695 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
10696 // but UINT_TO_FP is legal on this target, try to convert.
10697 if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
10698 TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
10699 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
10700 if (DAG.SignBitIsZero(N0))
10701 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10702 }
10703
10704 // The next optimizations are desirable only if SELECT_CC can be lowered.
10705 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10706 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10707 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
10708 !VT.isVector() &&
10709 (!LegalOperations ||
10710 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
10711 SDLoc DL(N);
10712 SDValue Ops[] =
10713 { N0.getOperand(0), N0.getOperand(1),
10714 DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10715 N0.getOperand(2) };
10716 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10717 }
10718
10719 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
10720 // (select_cc x, y, 1.0, 0.0,, cc)
10721 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
10722 N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
10723 (!LegalOperations ||
10724 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
10725 SDLoc DL(N);
10726 SDValue Ops[] =
10727 { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
10728 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10729 N0.getOperand(0).getOperand(2) };
10730 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10731 }
10732 }
10733
10734 return SDValue();
10735}
10736
10737SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
10738 SDValue N0 = N->getOperand(0);
10739 EVT VT = N->getValueType(0);
10740 EVT OpVT = N0.getValueType();
10741
10742 // fold (uint_to_fp c1) -> c1fp
10743 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10744 // ...but only if the target supports immediate floating-point values
10745 (!LegalOperations ||
10746 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
10747 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10748
10749 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
10750 // but SINT_TO_FP is legal on this target, try to convert.
10751 if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
10752 TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
10753 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
10754 if (DAG.SignBitIsZero(N0))
10755 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10756 }
10757
10758 // The next optimizations are desirable only if SELECT_CC can be lowered.
10759 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10760 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10761 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
10762 (!LegalOperations ||
10763 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
10764 SDLoc DL(N);
10765 SDValue Ops[] =
10766 { N0.getOperand(0), N0.getOperand(1),
10767 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10768 N0.getOperand(2) };
10769 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10770 }
10771 }
10772
10773 return SDValue();
10774}
10775
10776// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
10777static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
10778 SDValue N0 = N->getOperand(0);
10779 EVT VT = N->getValueType(0);
10780
10781 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
10782 return SDValue();
10783
10784 SDValue Src = N0.getOperand(0);
10785 EVT SrcVT = Src.getValueType();
10786 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
10787 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
10788
10789 // We can safely assume the conversion won't overflow the output range,
10790 // because (for example) (uint8_t)18293.f is undefined behavior.
10791
10792 // Since we can assume the conversion won't overflow, our decision as to
10793 // whether the input will fit in the float should depend on the minimum
10794 // of the input range and output range.
10795
10796 // This means this is also safe for a signed input and unsigned output, since
10797 // a negative input would lead to undefined behavior.
10798 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
10799 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
10800 unsigned ActualSize = std::min(InputSize, OutputSize);
10801 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
10802
10803 // We can only fold away the float conversion if the input range can be
10804 // represented exactly in the float range.
10805 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
10806 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
10807 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
10808 : ISD::ZERO_EXTEND;
10809 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
10810 }
10811 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
10812 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
10813 return DAG.getBitcast(VT, Src);
10814 }
10815 return SDValue();
10816}
10817
10818SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
10819 SDValue N0 = N->getOperand(0);
10820 EVT VT = N->getValueType(0);
10821
10822 // fold (fp_to_sint c1fp) -> c1
10823 if (isConstantFPBuildVectorOrConstantFP(N0))
10824 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
10825
10826 return FoldIntToFPToInt(N, DAG);
10827}
10828
10829SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
10830 SDValue N0 = N->getOperand(0);
10831 EVT VT = N->getValueType(0);
10832
10833 // fold (fp_to_uint c1fp) -> c1
10834 if (isConstantFPBuildVectorOrConstantFP(N0))
10835 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
10836
10837 return FoldIntToFPToInt(N, DAG);
10838}
10839
10840SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
10841 SDValue N0 = N->getOperand(0);
10842 SDValue N1 = N->getOperand(1);
10843 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10844 EVT VT = N->getValueType(0);
10845
10846 // fold (fp_round c1fp) -> c1fp
10847 if (N0CFP)
10848 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
10849
10850 // fold (fp_round (fp_extend x)) -> x
10851 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
10852 return N0.getOperand(0);
10853
10854 // fold (fp_round (fp_round x)) -> (fp_round x)
10855 if (N0.getOpcode() == ISD::FP_ROUND) {
10856 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
10857 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
10858
10859 // Skip this folding if it results in an fp_round from f80 to f16.
10860 //
10861 // f80 to f16 always generates an expensive (and as yet, unimplemented)
10862 // libcall to __truncxfhf2 instead of selecting native f16 conversion
10863 // instructions from f32 or f64. Moreover, the first (value-preserving)
10864 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
10865 // x86.
10866 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
10867 return SDValue();
10868
10869 // If the first fp_round isn't a value preserving truncation, it might
10870 // introduce a tie in the second fp_round, that wouldn't occur in the
10871 // single-step fp_round we want to fold to.
10872 // In other words, double rounding isn't the same as rounding.
10873 // Also, this is a value preserving truncation iff both fp_round's are.
10874 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
10875 SDLoc DL(N);
10876 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
10877 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
10878 }
10879 }
10880
10881 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
10882 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
10883 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
10884 N0.getOperand(0), N1);
10885 AddToWorklist(Tmp.getNode());
10886 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
10887 Tmp, N0.getOperand(1));
10888 }
10889
10890 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10891 return NewVSel;
10892
10893 return SDValue();
10894}
10895
10896SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
10897 SDValue N0 = N->getOperand(0);
10898 EVT VT = N->getValueType(0);
10899 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10900 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10901
10902 // fold (fp_round_inreg c1fp) -> c1fp
10903 if (N0CFP && isTypeLegal(EVT)) {
10904 SDLoc DL(N);
10905 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
10906 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
10907 }
10908
10909 return SDValue();
10910}
10911
10912SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
10913 SDValue N0 = N->getOperand(0);
10914 EVT VT = N->getValueType(0);
10915
10916 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
10917 if (N->hasOneUse() &&
10918 N->use_begin()->getOpcode() == ISD::FP_ROUND)
10919 return SDValue();
10920
10921 // fold (fp_extend c1fp) -> c1fp
10922 if (isConstantFPBuildVectorOrConstantFP(N0))
10923 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
10924
10925 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
10926 if (N0.getOpcode() == ISD::FP16_TO_FP &&
10927 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
10928 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
10929
10930 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
10931 // value of X.
10932 if (N0.getOpcode() == ISD::FP_ROUND
10933 && N0.getConstantOperandVal(1) == 1) {
10934 SDValue In = N0.getOperand(0);
10935 if (In.getValueType() == VT) return In;
10936 if (VT.bitsLT(In.getValueType()))
10937 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
10938 In, N0.getOperand(1));
10939 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
10940 }
10941
10942 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
10943 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10944 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10945 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10946 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10947 LN0->getChain(),
10948 LN0->getBasePtr(), N0.getValueType(),
10949 LN0->getMemOperand());
10950 CombineTo(N, ExtLoad);
10951 CombineTo(N0.getNode(),
10952 DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
10953 N0.getValueType(), ExtLoad,
10954 DAG.getIntPtrConstant(1, SDLoc(N0))),
10955 ExtLoad.getValue(1));
10956 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10957 }
10958
10959 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10960 return NewVSel;
10961
10962 return SDValue();
10963}
10964
10965SDValue DAGCombiner::visitFCEIL(SDNode *N) {
10966 SDValue N0 = N->getOperand(0);
10967 EVT VT = N->getValueType(0);
10968
10969 // fold (fceil c1) -> fceil(c1)
10970 if (isConstantFPBuildVectorOrConstantFP(N0))
10971 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
10972
10973 return SDValue();
10974}
10975
10976SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
10977 SDValue N0 = N->getOperand(0);
10978 EVT VT = N->getValueType(0);
10979
10980 // fold (ftrunc c1) -> ftrunc(c1)
10981 if (isConstantFPBuildVectorOrConstantFP(N0))
10982 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
10983
10984 // fold ftrunc (known rounded int x) -> x
10985 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
10986 // likely to be generated to extract integer from a rounded floating value.
10987 switch (N0.getOpcode()) {
10988 default: break;
10989 case ISD::FRINT:
10990 case ISD::FTRUNC:
10991 case ISD::FNEARBYINT:
10992 case ISD::FFLOOR:
10993 case ISD::FCEIL:
10994 return N0;
10995 }
10996
10997 return SDValue();
10998}
10999
11000SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
11001 SDValue N0 = N->getOperand(0);
11002 EVT VT = N->getValueType(0);
11003
11004 // fold (ffloor c1) -> ffloor(c1)
11005 if (isConstantFPBuildVectorOrConstantFP(N0))
11006 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
11007
11008 return SDValue();
11009}
11010
11011// FIXME: FNEG and FABS have a lot in common; refactor.
11012SDValue DAGCombiner::visitFNEG(SDNode *N) {
11013 SDValue N0 = N->getOperand(0);
11014 EVT VT = N->getValueType(0);
11015
11016 // Constant fold FNEG.
11017 if (isConstantFPBuildVectorOrConstantFP(N0))
11018 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
11019
11020 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
11021 &DAG.getTarget().Options))
11022 return GetNegatedExpression(N0, DAG, LegalOperations);
11023
11024 // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
11025 // constant pool values.
11026 if (!TLI.isFNegFree(VT) &&
11027 N0.getOpcode() == ISD::BITCAST &&
11028 N0.getNode()->hasOneUse()) {
11029 SDValue Int = N0.getOperand(0);
11030 EVT IntVT = Int.getValueType();
11031 if (IntVT.isInteger() && !IntVT.isVector()) {
11032 APInt SignMask;
11033 if (N0.getValueType().isVector()) {
11034 // For a vector, get a mask such as 0x80... per scalar element
11035 // and splat it.
11036 SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
11037 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
11038 } else {
11039 // For a scalar, just generate 0x80...
11040 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
11041 }
11042 SDLoc DL0(N0);
11043 Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
11044 DAG.getConstant(SignMask, DL0, IntVT));
11045 AddToWorklist(Int.getNode());
11046 return DAG.getBitcast(VT, Int);
11047 }
11048 }
11049
11050 // (fneg (fmul c, x)) -> (fmul -c, x)
11051 if (N0.getOpcode() == ISD::FMUL &&
11052 (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
11053 ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
11054 if (CFP1) {
11055 APFloat CVal = CFP1->getValueAPF();
11056 CVal.changeSign();
11057 if (Level >= AfterLegalizeDAG &&
11058 (TLI.isFPImmLegal(CVal, VT) ||
11059 TLI.isOperationLegal(ISD::ConstantFP, VT)))
11060 return DAG.getNode(
11061 ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
11062 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
11063 N0->getFlags());
11064 }
11065 }
11066
11067 return SDValue();
11068}
11069
11070SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
11071 SDValue N0 = N->getOperand(0);
11072 SDValue N1 = N->getOperand(1);
11073 EVT VT = N->getValueType(0);
11074 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
11075 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
11076
11077 if (N0CFP && N1CFP) {
11078 const APFloat &C0 = N0CFP->getValueAPF();
11079 const APFloat &C1 = N1CFP->getValueAPF();
11080 return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
11081 }
11082
11083 // Canonicalize to constant on RHS.
11084 if (isConstantFPBuildVectorOrConstantFP(N0) &&
11085 !isConstantFPBuildVectorOrConstantFP(N1))
11086 return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
11087
11088 return SDValue();
11089}
11090
11091SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
11092 SDValue N0 = N->getOperand(0);
11093 SDValue N1 = N->getOperand(1);
11094 EVT VT = N->getValueType(0);
11095 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
11096 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
11097
11098 if (N0CFP && N1CFP) {
11099 const APFloat &C0 = N0CFP->getValueAPF();
11100 const APFloat &C1 = N1CFP->getValueAPF();
11101 return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
11102 }
11103
11104 // Canonicalize to constant on RHS.
11105 if (isConstantFPBuildVectorOrConstantFP(N0) &&
11106 !isConstantFPBuildVectorOrConstantFP(N1))
11107 return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
11108
11109 return SDValue();
11110}
11111
11112SDValue DAGCombiner::visitFABS(SDNode *N) {
11113 SDValue N0 = N->getOperand(0);
11114 EVT VT = N->getValueType(0);
11115
11116 // fold (fabs c1) -> fabs(c1)
11117 if (isConstantFPBuildVectorOrConstantFP(N0))
11118 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11119
11120 // fold (fabs (fabs x)) -> (fabs x)
11121 if (N0.getOpcode() == ISD::FABS)
11122 return N->getOperand(0);
11123
11124 // fold (fabs (fneg x)) -> (fabs x)
11125 // fold (fabs (fcopysign x, y)) -> (fabs x)
11126 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
11127 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
11128
11129 // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
11130 // constant pool values.
11131 if (!TLI.isFAbsFree(VT) &&
11132 N0.getOpcode() == ISD::BITCAST &&
11133 N0.getNode()->hasOneUse()) {
11134 SDValue Int = N0.getOperand(0);
11135 EVT IntVT = Int.getValueType();
11136 if (IntVT.isInteger() && !IntVT.isVector()) {
11137 APInt SignMask;
11138 if (N0.getValueType().isVector()) {
11139 // For a vector, get a mask such as 0x7f... per scalar element
11140 // and splat it.
11141 SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
11142 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
11143 } else {
11144 // For a scalar, just generate 0x7f...
11145 SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
11146 }
11147 SDLoc DL(N0);
11148 Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
11149 DAG.getConstant(SignMask, DL, IntVT));
11150 AddToWorklist(Int.getNode());
11151 return DAG.getBitcast(N->getValueType(0), Int);
11152 }
11153 }
11154
11155 return SDValue();
11156}
11157
11158SDValue DAGCombiner::visitBRCOND(SDNode *N) {
11159 SDValue Chain = N->getOperand(0);
11160 SDValue N1 = N->getOperand(1);
11161 SDValue N2 = N->getOperand(2);
11162
11163 // If N is a constant we could fold this into a fallthrough or unconditional
11164 // branch. However that doesn't happen very often in normal code, because
11165 // Instcombine/SimplifyCFG should have handled the available opportunities.
11166 // If we did this folding here, it would be necessary to update the
11167 // MachineBasicBlock CFG, which is awkward.
11168
11169 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
11170 // on the target.
11171 if (N1.getOpcode() == ISD::SETCC &&
11172 TLI.isOperationLegalOrCustom(ISD::BR_CC,
11173 N1.getOperand(0).getValueType())) {
11174 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
11175 Chain, N1.getOperand(2),
11176 N1.getOperand(0), N1.getOperand(1), N2);
11177 }
11178
11179 if (N1.hasOneUse()) {
11180 if (SDValue NewN1 = rebuildSetCC(N1))
11181 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
11182 }
11183
11184 return SDValue();
11185}
11186
11187SDValue DAGCombiner::rebuildSetCC(SDValue N) {
11188 if (N.getOpcode() == ISD::SRL ||
11189 (N.getOpcode() == ISD::TRUNCATE &&
11190 (N.getOperand(0).hasOneUse() &&
11191 N.getOperand(0).getOpcode() == ISD::SRL))) {
11192 // Look pass the truncate.
11193 if (N.getOpcode() == ISD::TRUNCATE)
11194 N = N.getOperand(0);
11195
11196 // Match this pattern so that we can generate simpler code:
11197 //
11198 // %a = ...
11199 // %b = and i32 %a, 2
11200 // %c = srl i32 %b, 1
11201 // brcond i32 %c ...
11202 //
11203 // into
11204 //
11205 // %a = ...
11206 // %b = and i32 %a, 2
11207 // %c = setcc eq %b, 0
11208 // brcond %c ...
11209 //
11210 // This applies only when the AND constant value has one bit set and the
11211 // SRL constant is equal to the log2 of the AND constant. The back-end is
11212 // smart enough to convert the result into a TEST/JMP sequence.
11213 SDValue Op0 = N.getOperand(0);
11214 SDValue Op1 = N.getOperand(1);
11215
11216 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
11217 SDValue AndOp1 = Op0.getOperand(1);
11218
11219 if (AndOp1.getOpcode() == ISD::Constant) {
11220 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
11221
11222 if (AndConst.isPowerOf2() &&
11223 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
11224 SDLoc DL(N);
11225 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
11226 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
11227 ISD::SETNE);
11228 }
11229 }
11230 }
11231 }
11232
11233 // Transform br(xor(x, y)) -> br(x != y)
11234 // Transform br(xor(xor(x,y), 1)) -> br (x == y)
11235 if (N.getOpcode() == ISD::XOR) {
11236 SDNode *TheXor = N.getNode();
11237
11238 // Avoid missing important xor optimizations.
11239 while (SDValue Tmp = visitXOR(TheXor)) {
11240 // We don't have a XOR anymore, bail.
11241 if (Tmp.getOpcode() != ISD::XOR)
11242 return Tmp;
11243
11244 TheXor = Tmp.getNode();
11245 }
11246
11247 SDValue Op0 = TheXor->getOperand(0);
11248 SDValue Op1 = TheXor->getOperand(1);
11249
11250 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
11251 bool Equal = false;
11252 if (isOneConstant(Op0) && Op0.hasOneUse() &&
11253 Op0.getOpcode() == ISD::XOR) {
11254 TheXor = Op0.getNode();
11255 Equal = true;
11256 }
11257
11258 EVT SetCCVT = N.getValueType();
11259 if (LegalTypes)
11260 SetCCVT = getSetCCResultType(SetCCVT);
11261 // Replace the uses of XOR with SETCC
11262 return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
11263 Equal ? ISD::SETEQ : ISD::SETNE);
11264 }
11265 }
11266
11267 return SDValue();
11268}
11269
11270// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
11271//
11272SDValue DAGCombiner::visitBR_CC(SDNode *N) {
11273 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
11274 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
11275
11276 // If N is a constant we could fold this into a fallthrough or unconditional
11277 // branch. However that doesn't happen very often in normal code, because
11278 // Instcombine/SimplifyCFG should have handled the available opportunities.
11279 // If we did this folding here, it would be necessary to update the
11280 // MachineBasicBlock CFG, which is awkward.
11281
11282 // Use SimplifySetCC to simplify SETCC's.
11283 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
11284 CondLHS, CondRHS, CC->get(), SDLoc(N),
11285 false);
11286 if (Simp.getNode()) AddToWorklist(Simp.getNode());
11287
11288 // fold to a simpler setcc
11289 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
11290 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
11291 N->getOperand(0), Simp.getOperand(2),
11292 Simp.getOperand(0), Simp.getOperand(1),
11293 N->getOperand(4));
11294
11295 return SDValue();
11296}
11297
11298/// Return true if 'Use' is a load or a store that uses N as its base pointer
11299/// and that N may be folded in the load / store addressing mode.
11300static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
11301 SelectionDAG &DAG,
11302 const TargetLowering &TLI) {
11303 EVT VT;
11304 unsigned AS;
11305
11306 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
11307 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
11308 return false;
11309 VT = LD->getMemoryVT();
11310 AS = LD->getAddressSpace();
11311 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
11312 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
11313 return false;
11314 VT = ST->getMemoryVT();
11315 AS = ST->getAddressSpace();
11316 } else
11317 return false;
11318
11319 TargetLowering::AddrMode AM;
11320 if (N->getOpcode() == ISD::ADD) {
11321 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
11322 if (Offset)
11323 // [reg +/- imm]
11324 AM.BaseOffs = Offset->getSExtValue();
11325 else
11326 // [reg +/- reg]
11327 AM.Scale = 1;
11328 } else if (N->getOpcode() == ISD::SUB) {
11329 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
11330 if (Offset)
11331 // [reg +/- imm]
11332 AM.BaseOffs = -Offset->getSExtValue();
11333 else
11334 // [reg +/- reg]
11335 AM.Scale = 1;
11336 } else
11337 return false;
11338
11339 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
11340 VT.getTypeForEVT(*DAG.getContext()), AS);
11341}
11342
11343/// Try turning a load/store into a pre-indexed load/store when the base
11344/// pointer is an add or subtract and it has other uses besides the load/store.
11345/// After the transformation, the new indexed load/store has effectively folded
11346/// the add/subtract in and all of its other uses are redirected to the
11347/// new load/store.
11348bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
11349 if (Level < AfterLegalizeDAG)
11350 return false;
11351
11352 bool isLoad = true;
11353 SDValue Ptr;
11354 EVT VT;
11355 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
11356 if (LD->isIndexed())
11357 return false;
11358 VT = LD->getMemoryVT();
11359 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
11360 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
11361 return false;
11362 Ptr = LD->getBasePtr();
11363 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
11364 if (ST->isIndexed())
11365 return false;
11366 VT = ST->getMemoryVT();
11367 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
11368 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
11369 return false;
11370 Ptr = ST->getBasePtr();
11371 isLoad = false;
11372 } else {
11373 return false;
11374 }
11375
11376 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
11377 // out. There is no reason to make this a preinc/predec.
11378 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
11379 Ptr.getNode()->hasOneUse())
11380 return false;
11381
11382 // Ask the target to do addressing mode selection.
11383 SDValue BasePtr;
11384 SDValue Offset;
11385 ISD::MemIndexedMode AM = ISD::UNINDEXED;
11386 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
11387 return false;
11388
11389 // Backends without true r+i pre-indexed forms may need to pass a
11390 // constant base with a variable offset so that constant coercion
11391 // will work with the patterns in canonical form.
11392 bool Swapped = false;
11393 if (isa<ConstantSDNode>(BasePtr)) {
11394 std::swap(BasePtr, Offset);
11395 Swapped = true;
11396 }
11397
11398 // Don't create a indexed load / store with zero offset.
11399 if (isNullConstant(Offset))
11400 return false;
11401
11402 // Try turning it into a pre-indexed load / store except when:
11403 // 1) The new base ptr is a frame index.
11404 // 2) If N is a store and the new base ptr is either the same as or is a
11405 // predecessor of the value being stored.
11406 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
11407 // that would create a cycle.
11408 // 4) All uses are load / store ops that use it as old base ptr.
11409
11410 // Check #1. Preinc'ing a frame index would require copying the stack pointer
11411 // (plus the implicit offset) to a register to preinc anyway.
11412 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
11413 return false;
11414
11415 // Check #2.
11416 if (!isLoad) {
11417 SDValue Val = cast<StoreSDNode>(N)->getValue();
11418 if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
11419 return false;
11420 }
11421
11422 // Caches for hasPredecessorHelper.
11423 SmallPtrSet<const SDNode *, 32> Visited;
11424 SmallVector<const SDNode *, 16> Worklist;
11425 Worklist.push_back(N);
11426
11427 // If the offset is a constant, there may be other adds of constants that
11428 // can be folded with this one. We should do this to avoid having to keep
11429 // a copy of the original base pointer.
11430 SmallVector<SDNode *, 16> OtherUses;
11431 if (isa<ConstantSDNode>(Offset))
11432 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
11433 UE = BasePtr.getNode()->use_end();
11434 UI != UE; ++UI) {
11435 SDUse &Use = UI.getUse();
11436 // Skip the use that is Ptr and uses of other results from BasePtr's
11437 // node (important for nodes that return multiple results).
11438 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
11439 continue;
11440
11441 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
11442 continue;
11443
11444 if (Use.getUser()->getOpcode() != ISD::ADD &&
11445 Use.getUser()->getOpcode() != ISD::SUB) {
11446 OtherUses.clear();
11447 break;
11448 }
11449
11450 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
11451 if (!isa<ConstantSDNode>(Op1)) {
11452 OtherUses.clear();
11453 break;
11454 }
11455
11456 // FIXME: In some cases, we can be smarter about this.
11457 if (Op1.getValueType() != Offset.getValueType()) {
11458 OtherUses.clear();
11459 break;
11460 }
11461
11462 OtherUses.push_back(Use.getUser());
11463 }
11464
11465 if (Swapped)
11466 std::swap(BasePtr, Offset);
11467
11468 // Now check for #3 and #4.
11469 bool RealUse = false;
11470
11471 for (SDNode *Use : Ptr.getNode()->uses()) {
11472 if (Use == N)
11473 continue;
11474 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
11475 return false;
11476
11477 // If Ptr may be folded in addressing mode of other use, then it's
11478 // not profitable to do this transformation.
11479 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
11480 RealUse = true;
11481 }
11482
11483 if (!RealUse)
11484 return false;
11485
11486 SDValue Result;
11487 if (isLoad)
11488 Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11489 BasePtr, Offset, AM);
11490 else
11491 Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11492 BasePtr, Offset, AM);
11493 ++PreIndexedNodes;
11494 ++NodesCombined;
11495 DEBUG(dbgs() << "\nReplacing.4 ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
11496 N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
11497 dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
11498 Result.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
11499 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
;
11500 WorklistRemover DeadNodes(*this);
11501 if (isLoad) {
11502 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11503 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11504 } else {
11505 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11506 }
11507
11508 // Finally, since the node is now dead, remove it from the graph.
11509 deleteAndRecombine(N);
11510
11511 if (Swapped)
11512 std::swap(BasePtr, Offset);
11513
11514 // Replace other uses of BasePtr that can be updated to use Ptr
11515 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
11516 unsigned OffsetIdx = 1;
11517 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
11518 OffsetIdx = 0;
11519 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==(static_cast <bool> (OtherUses[i]->getOperand(!OffsetIdx
).getNode() == BasePtr.getNode() && "Expected BasePtr operand"
) ? void (0) : __assert_fail ("OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && \"Expected BasePtr operand\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11520, __extension__ __PRETTY_FUNCTION__))
11520 BasePtr.getNode() && "Expected BasePtr operand")(static_cast <bool> (OtherUses[i]->getOperand(!OffsetIdx
).getNode() == BasePtr.getNode() && "Expected BasePtr operand"
) ? void (0) : __assert_fail ("OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && \"Expected BasePtr operand\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11520, __extension__ __PRETTY_FUNCTION__))
;
11521
11522 // We need to replace ptr0 in the following expression:
11523 // x0 * offset0 + y0 * ptr0 = t0
11524 // knowing that
11525 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
11526 //
11527 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
11528 // indexed load/store and the expression that needs to be re-written.
11529 //
11530 // Therefore, we have:
11531 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
11532
11533 ConstantSDNode *CN =
11534 cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
11535 int X0, X1, Y0, Y1;
11536 const APInt &Offset0 = CN->getAPIntValue();
11537 APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
11538
11539 X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
11540 Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
11541 X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
11542 Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
11543
11544 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
11545
11546 APInt CNV = Offset0;
11547 if (X0 < 0) CNV = -CNV;
11548 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
11549 else CNV = CNV - Offset1;
11550
11551 SDLoc DL(OtherUses[i]);
11552
11553 // We can now generate the new expression.
11554 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
11555 SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
11556
11557 SDValue NewUse = DAG.getNode(Opcode,
11558 DL,
11559 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
11560 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
11561 deleteAndRecombine(OtherUses[i]);
11562 }
11563
11564 // Replace the uses of Ptr with uses of the updated base value.
11565 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
11566 deleteAndRecombine(Ptr.getNode());
11567 AddToWorklist(Result.getNode());
11568
11569 return true;
11570}
11571
11572/// Try to combine a load/store with a add/sub of the base pointer node into a
11573/// post-indexed load/store. The transformation folded the add/subtract into the
11574/// new indexed load/store effectively and all of its uses are redirected to the
11575/// new load/store.
11576bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
11577 if (Level < AfterLegalizeDAG)
11578 return false;
11579
11580 bool isLoad = true;
11581 SDValue Ptr;
11582 EVT VT;
11583 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
11584 if (LD->isIndexed())
11585 return false;
11586 VT = LD->getMemoryVT();
11587 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
11588 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
11589 return false;
11590 Ptr = LD->getBasePtr();
11591 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
11592 if (ST->isIndexed())
11593 return false;
11594 VT = ST->getMemoryVT();
11595 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
11596 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
11597 return false;
11598 Ptr = ST->getBasePtr();
11599 isLoad = false;
11600 } else {
11601 return false;
11602 }
11603
11604 if (Ptr.getNode()->hasOneUse())
11605 return false;
11606
11607 for (SDNode *Op : Ptr.getNode()->uses()) {
11608 if (Op == N ||
11609 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
11610 continue;
11611
11612 SDValue BasePtr;
11613 SDValue Offset;
11614 ISD::MemIndexedMode AM = ISD::UNINDEXED;
11615 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
11616 // Don't create a indexed load / store with zero offset.
11617 if (isNullConstant(Offset))
11618 continue;
11619
11620 // Try turning it into a post-indexed load / store except when
11621 // 1) All uses are load / store ops that use it as base ptr (and
11622 // it may be folded as addressing mmode).
11623 // 2) Op must be independent of N, i.e. Op is neither a predecessor
11624 // nor a successor of N. Otherwise, if Op is folded that would
11625 // create a cycle.
11626
11627 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
11628 continue;
11629
11630 // Check for #1.
11631 bool TryNext = false;
11632 for (SDNode *Use : BasePtr.getNode()->uses()) {
11633 if (Use == Ptr.getNode())
11634 continue;
11635
11636 // If all the uses are load / store addresses, then don't do the
11637 // transformation.
11638 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
11639 bool RealUse = false;
11640 for (SDNode *UseUse : Use->uses()) {
11641 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
11642 RealUse = true;
11643 }
11644
11645 if (!RealUse) {
11646 TryNext = true;
11647 break;
11648 }
11649 }
11650 }
11651
11652 if (TryNext)
11653 continue;
11654
11655 // Check for #2
11656 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
11657 SDValue Result = isLoad
11658 ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11659 BasePtr, Offset, AM)
11660 : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11661 BasePtr, Offset, AM);
11662 ++PostIndexedNodes;
11663 ++NodesCombined;
11664 DEBUG(dbgs() << "\nReplacing.5 ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
11665 N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
11666 dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
11667 Result.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
11668 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
;
11669 WorklistRemover DeadNodes(*this);
11670 if (isLoad) {
11671 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11672 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11673 } else {
11674 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11675 }
11676
11677 // Finally, since the node is now dead, remove it from the graph.
11678 deleteAndRecombine(N);
11679
11680 // Replace the uses of Use with uses of the updated base value.
11681 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
11682 Result.getValue(isLoad ? 1 : 0));
11683 deleteAndRecombine(Op);
11684 return true;
11685 }
11686 }
11687 }
11688
11689 return false;
11690}
11691
11692/// \brief Return the base-pointer arithmetic from an indexed \p LD.
11693SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
11694 ISD::MemIndexedMode AM = LD->getAddressingMode();
11695 assert(AM != ISD::UNINDEXED)(static_cast <bool> (AM != ISD::UNINDEXED) ? void (0) :
__assert_fail ("AM != ISD::UNINDEXED", "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11695, __extension__ __PRETTY_FUNCTION__))
;
11696 SDValue BP = LD->getOperand(1);
11697 SDValue Inc = LD->getOperand(2);
11698
11699 // Some backends use TargetConstants for load offsets, but don't expect
11700 // TargetConstants in general ADD nodes. We can convert these constants into
11701 // regular Constants (if the constant is not opaque).
11702 assert((Inc.getOpcode() != ISD::TargetConstant ||(static_cast <bool> ((Inc.getOpcode() != ISD::TargetConstant
|| !cast<ConstantSDNode>(Inc)->isOpaque()) &&
"Cannot split out indexing using opaque target constants") ?
void (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11704, __extension__ __PRETTY_FUNCTION__))
11703 !cast<ConstantSDNode>(Inc)->isOpaque()) &&(static_cast <bool> ((Inc.getOpcode() != ISD::TargetConstant
|| !cast<ConstantSDNode>(Inc)->isOpaque()) &&
"Cannot split out indexing using opaque target constants") ?
void (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11704, __extension__ __PRETTY_FUNCTION__))
11704 "Cannot split out indexing using opaque target constants")(static_cast <bool> ((Inc.getOpcode() != ISD::TargetConstant
|| !cast<ConstantSDNode>(Inc)->isOpaque()) &&
"Cannot split out indexing using opaque target constants") ?
void (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11704, __extension__ __PRETTY_FUNCTION__))
;
11705 if (Inc.getOpcode() == ISD::TargetConstant) {
11706 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
11707 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
11708 ConstInc->getValueType(0));
11709 }
11710
11711 unsigned Opc =
11712 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
11713 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
11714}
11715
11716SDValue DAGCombiner::visitLOAD(SDNode *N) {
11717 LoadSDNode *LD = cast<LoadSDNode>(N);
11718 SDValue Chain = LD->getChain();
11719 SDValue Ptr = LD->getBasePtr();
11720
11721 // If load is not volatile and there are no uses of the loaded value (and
11722 // the updated indexed value in case of indexed loads), change uses of the
11723 // chain value into uses of the chain input (i.e. delete the dead load).
11724 if (!LD->isVolatile()) {
11725 if (N->getValueType(1) == MVT::Other) {
11726 // Unindexed loads.
11727 if (!N->hasAnyUseOfValue(0)) {
11728 // It's not safe to use the two value CombineTo variant here. e.g.
11729 // v1, chain2 = load chain1, loc
11730 // v2, chain3 = load chain2, loc
11731 // v3 = add v2, c
11732 // Now we replace use of chain2 with chain1. This makes the second load
11733 // isomorphic to the one we are deleting, and thus makes this load live.
11734 DEBUG(dbgs() << "\nReplacing.6 ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.getNode()
->dump(&DAG); dbgs() << "\n"; } } while (false)
11735 N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.getNode()
->dump(&DAG); dbgs() << "\n"; } } while (false)
11736 dbgs() << "\nWith chain: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.getNode()
->dump(&DAG); dbgs() << "\n"; } } while (false)
11737 Chain.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.getNode()
->dump(&DAG); dbgs() << "\n"; } } while (false)
11738 dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.getNode()
->dump(&DAG); dbgs() << "\n"; } } while (false)
;
11739 WorklistRemover DeadNodes(*this);
11740 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11741 AddUsersToWorklist(Chain.getNode());
11742 if (N->use_empty())
11743 deleteAndRecombine(N);
11744
11745 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11746 }
11747 } else {
11748 // Indexed loads.
11749 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?")(static_cast <bool> (N->getValueType(2) == MVT::Other
&& "Malformed indexed loads?") ? void (0) : __assert_fail
("N->getValueType(2) == MVT::Other && \"Malformed indexed loads?\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11749, __extension__ __PRETTY_FUNCTION__))
;
11750
11751 // If this load has an opaque TargetConstant offset, then we cannot split
11752 // the indexing into an add/sub directly (that TargetConstant may not be
11753 // valid for a different type of node, and we cannot convert an opaque
11754 // target constant into a regular constant).
11755 bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
11756 cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
11757
11758 if (!N->hasAnyUseOfValue(0) &&
11759 ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
11760 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
11761 SDValue Index;
11762 if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
11763 Index = SplitIndexingFromLoad(LD);
11764 // Try to fold the base pointer arithmetic into subsequent loads and
11765 // stores.
11766 AddUsersToWorklist(N);
11767 } else
11768 Index = DAG.getUNDEF(N->getValueType(1));
11769 DEBUG(dbgs() << "\nReplacing.7 ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump
(&DAG); dbgs() << " and 2 other values\n"; } } while
(false)
11770 N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump
(&DAG); dbgs() << " and 2 other values\n"; } } while
(false)
11771 dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump
(&DAG); dbgs() << " and 2 other values\n"; } } while
(false)
11772 Undef.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump
(&DAG); dbgs() << " and 2 other values\n"; } } while
(false)
11773 dbgs() << " and 2 other values\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump
(&DAG); dbgs() << " and 2 other values\n"; } } while
(false)
;
11774 WorklistRemover DeadNodes(*this);
11775 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
11776 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
11777 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
11778 deleteAndRecombine(N);
11779 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11780 }
11781 }
11782 }
11783
11784 // If this load is directly stored, replace the load value with the stored
11785 // value.
11786 // TODO: Handle store large -> read small portion.
11787 // TODO: Handle TRUNCSTORE/LOADEXT
11788 if (OptLevel != CodeGenOpt::None &&
11789 ISD::isNormalLoad(N) && !LD->isVolatile()) {
11790 if (ISD::isNON_TRUNCStore(Chain.getNode())) {
11791 StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
11792 if (PrevST->getBasePtr() == Ptr &&
11793 PrevST->getValue().getValueType() == N->getValueType(0))
11794 return CombineTo(N, PrevST->getOperand(1), Chain);
11795 }
11796 }
11797
11798 // Try to infer better alignment information than the load already has.
11799 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
11800 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
11801 if (Align > LD->getMemOperand()->getBaseAlignment()) {
11802 SDValue NewLoad = DAG.getExtLoad(
11803 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
11804 LD->getPointerInfo(), LD->getMemoryVT(), Align,
11805 LD->getMemOperand()->getFlags(), LD->getAAInfo());
11806 if (NewLoad.getNode() != N)
11807 return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
11808 }
11809 }
11810 }
11811
11812 if (LD->isUnindexed()) {
11813 // Walk up chain skipping non-aliasing memory nodes.
11814 SDValue BetterChain = FindBetterChain(N, Chain);
11815
11816 // If there is a better chain.
11817 if (Chain != BetterChain) {
11818 SDValue ReplLoad;
11819
11820 // Replace the chain to void dependency.
11821 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
11822 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
11823 BetterChain, Ptr, LD->getMemOperand());
11824 } else {
11825 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
11826 LD->getValueType(0),
11827 BetterChain, Ptr, LD->getMemoryVT(),
11828 LD->getMemOperand());
11829 }
11830
11831 // Create token factor to keep old chain connected.
11832 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
11833 MVT::Other, Chain, ReplLoad.getValue(1));
11834
11835 // Replace uses with load result and token factor
11836 return CombineTo(N, ReplLoad.getValue(0), Token);
11837 }
11838 }
11839
11840 // Try transforming N to an indexed load.
11841 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11842 return SDValue(N, 0);
11843
11844 // Try to slice up N to more direct loads if the slices are mapped to
11845 // different register banks or pairing can take place.
11846 if (SliceUpLoad(N))
11847 return SDValue(N, 0);
11848
11849 return SDValue();
11850}
11851
11852namespace {
11853
11854/// \brief Helper structure used to slice a load in smaller loads.
11855/// Basically a slice is obtained from the following sequence:
11856/// Origin = load Ty1, Base
11857/// Shift = srl Ty1 Origin, CstTy Amount
11858/// Inst = trunc Shift to Ty2
11859///
11860/// Then, it will be rewritten into:
11861/// Slice = load SliceTy, Base + SliceOffset
11862/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
11863///
11864/// SliceTy is deduced from the number of bits that are actually used to
11865/// build Inst.
11866struct LoadedSlice {
11867 /// \brief Helper structure used to compute the cost of a slice.
11868 struct Cost {
11869 /// Are we optimizing for code size.
11870 bool ForCodeSize;
11871
11872 /// Various cost.
11873 unsigned Loads = 0;
11874 unsigned Truncates = 0;
11875 unsigned CrossRegisterBanksCopies = 0;
11876 unsigned ZExts = 0;
11877 unsigned Shift = 0;
11878
11879 Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
11880
11881 /// \brief Get the cost of one isolated slice.
11882 Cost(const LoadedSlice &LS, bool ForCodeSize = false)
11883 : ForCodeSize(ForCodeSize), Loads(1) {
11884 EVT TruncType = LS.Inst->getValueType(0);
11885 EVT LoadedType = LS.getLoadedType();
11886 if (TruncType != LoadedType &&
11887 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
11888 ZExts = 1;
11889 }
11890
11891 /// \brief Account for slicing gain in the current cost.
11892 /// Slicing provide a few gains like removing a shift or a
11893 /// truncate. This method allows to grow the cost of the original
11894 /// load with the gain from this slice.
11895 void addSliceGain(const LoadedSlice &LS) {
11896 // Each slice saves a truncate.
11897 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
11898 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
11899 LS.Inst->getValueType(0)))
11900 ++Truncates;
11901 // If there is a shift amount, this slice gets rid of it.
11902 if (LS.Shift)
11903 ++Shift;
11904 // If this slice can merge a cross register bank copy, account for it.
11905 if (LS.canMergeExpensiveCrossRegisterBankCopy())
11906 ++CrossRegisterBanksCopies;
11907 }
11908
11909 Cost &operator+=(const Cost &RHS) {
11910 Loads += RHS.Loads;
11911 Truncates += RHS.Truncates;
11912 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
11913 ZExts += RHS.ZExts;
11914 Shift += RHS.Shift;
11915 return *this;
11916 }
11917
11918 bool operator==(const Cost &RHS) const {
11919 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
11920 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
11921 ZExts == RHS.ZExts && Shift == RHS.Shift;
11922 }
11923
11924 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
11925
11926 bool operator<(const Cost &RHS) const {
11927 // Assume cross register banks copies are as expensive as loads.
11928 // FIXME: Do we want some more target hooks?
11929 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
11930 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
11931 // Unless we are optimizing for code size, consider the
11932 // expensive operation first.
11933 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
11934 return ExpensiveOpsLHS < ExpensiveOpsRHS;
11935 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
11936 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
11937 }
11938
11939 bool operator>(const Cost &RHS) const { return RHS < *this; }
11940
11941 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
11942
11943 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
11944 };
11945
11946 // The last instruction that represent the slice. This should be a
11947 // truncate instruction.
11948 SDNode *Inst;
11949
11950 // The original load instruction.
11951 LoadSDNode *Origin;
11952
11953 // The right shift amount in bits from the original load.
11954 unsigned Shift;
11955
11956 // The DAG from which Origin came from.
11957 // This is used to get some contextual information about legal types, etc.
11958 SelectionDAG *DAG;
11959
11960 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
11961 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
11962 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
11963
11964 /// \brief Get the bits used in a chunk of bits \p BitWidth large.
11965 /// \return Result is \p BitWidth and has used bits set to 1 and
11966 /// not used bits set to 0.
11967 APInt getUsedBits() const {
11968 // Reproduce the trunc(lshr) sequence:
11969 // - Start from the truncated value.
11970 // - Zero extend to the desired bit width.
11971 // - Shift left.
11972 assert(Origin && "No original load to compare against.")(static_cast <bool> (Origin && "No original load to compare against."
) ? void (0) : __assert_fail ("Origin && \"No original load to compare against.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11972, __extension__ __PRETTY_FUNCTION__))
;
11973 unsigned BitWidth = Origin->getValueSizeInBits(0);
11974 assert(Inst && "This slice is not bound to an instruction")(static_cast <bool> (Inst && "This slice is not bound to an instruction"
) ? void (0) : __assert_fail ("Inst && \"This slice is not bound to an instruction\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11974, __extension__ __PRETTY_FUNCTION__))
;
11975 assert(Inst->getValueSizeInBits(0) <= BitWidth &&(static_cast <bool> (Inst->getValueSizeInBits(0) <=
BitWidth && "Extracted slice is bigger than the whole type!"
) ? void (0) : __assert_fail ("Inst->getValueSizeInBits(0) <= BitWidth && \"Extracted slice is bigger than the whole type!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11976, __extension__ __PRETTY_FUNCTION__))
11976 "Extracted slice is bigger than the whole type!")(static_cast <bool> (Inst->getValueSizeInBits(0) <=
BitWidth && "Extracted slice is bigger than the whole type!"
) ? void (0) : __assert_fail ("Inst->getValueSizeInBits(0) <= BitWidth && \"Extracted slice is bigger than the whole type!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11976, __extension__ __PRETTY_FUNCTION__))
;
11977 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
11978 UsedBits.setAllBits();
11979 UsedBits = UsedBits.zext(BitWidth);
11980 UsedBits <<= Shift;
11981 return UsedBits;
11982 }
11983
11984 /// \brief Get the size of the slice to be loaded in bytes.
11985 unsigned getLoadedSize() const {
11986 unsigned SliceSize = getUsedBits().countPopulation();
11987 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.")(static_cast <bool> (!(SliceSize & 0x7) && "Size is not a multiple of a byte."
) ? void (0) : __assert_fail ("!(SliceSize & 0x7) && \"Size is not a multiple of a byte.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11987, __extension__ __PRETTY_FUNCTION__))
;
11988 return SliceSize / 8;
11989 }
11990
11991 /// \brief Get the type that will be loaded for this slice.
11992 /// Note: This may not be the final type for the slice.
11993 EVT getLoadedType() const {
11994 assert(DAG && "Missing context")(static_cast <bool> (DAG && "Missing context") ?
void (0) : __assert_fail ("DAG && \"Missing context\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11994, __extension__ __PRETTY_FUNCTION__))
;
11995 LLVMContext &Ctxt = *DAG->getContext();
11996 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
11997 }
11998
11999 /// \brief Get the alignment of the load used for this slice.
12000 unsigned getAlignment() const {
12001 unsigned Alignment = Origin->getAlignment();
12002 unsigned Offset = getOffsetFromBase();
12003 if (Offset != 0)
12004 Alignment = MinAlign(Alignment, Alignment + Offset);
12005 return Alignment;
12006 }
12007
12008 /// \brief Check if this slice can be rewritten with legal operations.
12009 bool isLegal() const {
12010 // An invalid slice is not legal.
12011 if (!Origin || !Inst || !DAG)
12012 return false;
12013
12014 // Offsets are for indexed load only, we do not handle that.
12015 if (!Origin->getOffset().isUndef())
12016 return false;
12017
12018 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
12019
12020 // Check that the type is legal.
12021 EVT SliceType = getLoadedType();
12022 if (!TLI.isTypeLegal(SliceType))
12023 return false;
12024
12025 // Check that the load is legal for this type.
12026 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
12027 return false;
12028
12029 // Check that the offset can be computed.
12030 // 1. Check its type.
12031 EVT PtrType = Origin->getBasePtr().getValueType();
12032 if (PtrType == MVT::Untyped || PtrType.isExtended())
12033 return false;
12034
12035 // 2. Check that it fits in the immediate.
12036 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
12037 return false;
12038
12039 // 3. Check that the computation is legal.
12040 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
12041 return false;
12042
12043 // Check that the zext is legal if it needs one.
12044 EVT TruncateType = Inst->getValueType(0);
12045 if (TruncateType != SliceType &&
12046 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
12047 return false;
12048
12049 return true;
12050 }
12051
12052 /// \brief Get the offset in bytes of this slice in the original chunk of
12053 /// bits.
12054 /// \pre DAG != nullptr.
12055 uint64_t getOffsetFromBase() const {
12056 assert(DAG && "Missing context.")(static_cast <bool> (DAG && "Missing context.")
? void (0) : __assert_fail ("DAG && \"Missing context.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12056, __extension__ __PRETTY_FUNCTION__))
;
12057 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
12058 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.")(static_cast <bool> (!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."
) ? void (0) : __assert_fail ("!(Shift & 0x7) && \"Shifts not aligned on Bytes are not supported.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12058, __extension__ __PRETTY_FUNCTION__))
;
12059 uint64_t Offset = Shift / 8;
12060 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
12061 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&(static_cast <bool> (!(Origin->getValueSizeInBits(0)
& 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? void (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12063, __extension__ __PRETTY_FUNCTION__))
12062 "The size of the original loaded type is not a multiple of a"(static_cast <bool> (!(Origin->getValueSizeInBits(0)
& 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? void (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12063, __extension__ __PRETTY_FUNCTION__))
12063 " byte.")(static_cast <bool> (!(Origin->getValueSizeInBits(0)
& 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? void (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12063, __extension__ __PRETTY_FUNCTION__))
;
12064 // If Offset is bigger than TySizeInBytes, it means we are loading all
12065 // zeros. This should have been optimized before in the process.
12066 assert(TySizeInBytes > Offset &&(static_cast <bool> (TySizeInBytes > Offset &&
"Invalid shift amount for given loaded size") ? void (0) : __assert_fail
("TySizeInBytes > Offset && \"Invalid shift amount for given loaded size\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12067, __extension__ __PRETTY_FUNCTION__))
12067 "Invalid shift amount for given loaded size")(static_cast <bool> (TySizeInBytes > Offset &&
"Invalid shift amount for given loaded size") ? void (0) : __assert_fail
("TySizeInBytes > Offset && \"Invalid shift amount for given loaded size\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12067, __extension__ __PRETTY_FUNCTION__))
;
12068 if (IsBigEndian)
12069 Offset = TySizeInBytes - Offset - getLoadedSize();
12070 return Offset;
12071 }
12072
12073 /// \brief Generate the sequence of instructions to load the slice
12074 /// represented by this object and redirect the uses of this slice to
12075 /// this new sequence of instructions.
12076 /// \pre this->Inst && this->Origin are valid Instructions and this
12077 /// object passed the legal check: LoadedSlice::isLegal returned true.
12078 /// \return The last instruction of the sequence used to load the slice.
12079 SDValue loadSlice() const {
12080 assert(Inst && Origin && "Unable to replace a non-existing slice.")(static_cast <bool> (Inst && Origin && "Unable to replace a non-existing slice."
) ? void (0) : __assert_fail ("Inst && Origin && \"Unable to replace a non-existing slice.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12080, __extension__ __PRETTY_FUNCTION__))
;
12081 const SDValue &OldBaseAddr = Origin->getBasePtr();
12082 SDValue BaseAddr = OldBaseAddr;
12083 // Get the offset in that chunk of bytes w.r.t. the endianness.
12084 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
12085 assert(Offset >= 0 && "Offset too big to fit in int64_t!")(static_cast <bool> (Offset >= 0 && "Offset too big to fit in int64_t!"
) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset too big to fit in int64_t!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12085, __extension__ __PRETTY_FUNCTION__))
;
12086 if (Offset) {
12087 // BaseAddr = BaseAddr + Offset.
12088 EVT ArithType = BaseAddr.getValueType();
12089 SDLoc DL(Origin);
12090 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
12091 DAG->getConstant(Offset, DL, ArithType));
12092 }
12093
12094 // Create the type of the loaded slice according to its size.
12095 EVT SliceType = getLoadedType();
12096
12097 // Create the load for the slice.
12098 SDValue LastInst =
12099 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
12100 Origin->getPointerInfo().getWithOffset(Offset),
12101 getAlignment(), Origin->getMemOperand()->getFlags());
12102 // If the final type is not the same as the loaded type, this means that
12103 // we have to pad with zero. Create a zero extend for that.
12104 EVT FinalType = Inst->getValueType(0);
12105 if (SliceType != FinalType)
12106 LastInst =
12107 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
12108 return LastInst;
12109 }
12110
12111 /// \brief Check if this slice can be merged with an expensive cross register
12112 /// bank copy. E.g.,
12113 /// i = load i32
12114 /// f = bitcast i32 i to float
12115 bool canMergeExpensiveCrossRegisterBankCopy() const {
12116 if (!Inst || !Inst->hasOneUse())
12117 return false;
12118 SDNode *Use = *Inst->use_begin();
12119 if (Use->getOpcode() != ISD::BITCAST)
12120 return false;
12121 assert(DAG && "Missing context")(static_cast <bool> (DAG && "Missing context") ?
void (0) : __assert_fail ("DAG && \"Missing context\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12121, __extension__ __PRETTY_FUNCTION__))
;
12122 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
12123 EVT ResVT = Use->getValueType(0);
12124 const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
12125 const TargetRegisterClass *ArgRC =
12126 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
12127 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
12128 return false;
12129
12130 // At this point, we know that we perform a cross-register-bank copy.
12131 // Check if it is expensive.
12132 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
12133 // Assume bitcasts are cheap, unless both register classes do not
12134 // explicitly share a common sub class.
12135 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
12136 return false;
12137
12138 // Check if it will be merged with the load.
12139 // 1. Check the alignment constraint.
12140 unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
12141 ResVT.getTypeForEVT(*DAG->getContext()));
12142
12143 if (RequiredAlignment > getAlignment())
12144 return false;
12145
12146 // 2. Check that the load is a legal operation for that type.
12147 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
12148 return false;
12149
12150 // 3. Check that we do not have a zext in the way.
12151 if (Inst->getValueType(0) != getLoadedType())
12152 return false;
12153
12154 return true;
12155 }
12156};
12157
12158} // end anonymous namespace
12159
12160/// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
12161/// \p UsedBits looks like 0..0 1..1 0..0.
12162static bool areUsedBitsDense(const APInt &UsedBits) {
12163 // If all the bits are one, this is dense!
12164 if (UsedBits.isAllOnesValue())
12165 return true;
12166
12167 // Get rid of the unused bits on the right.
12168 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
12169 // Get rid of the unused bits on the left.
12170 if (NarrowedUsedBits.countLeadingZeros())
12171 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
12172 // Check that the chunk of bits is completely used.
12173 return NarrowedUsedBits.isAllOnesValue();
12174}
12175
12176/// \brief Check whether or not \p First and \p Second are next to each other
12177/// in memory. This means that there is no hole between the bits loaded
12178/// by \p First and the bits loaded by \p Second.
12179static bool areSlicesNextToEachOther(const LoadedSlice &First,
12180 const LoadedSlice &Second) {
12181 assert(First.Origin == Second.Origin && First.Origin &&(static_cast <bool> (First.Origin == Second.Origin &&
First.Origin && "Unable to match different memory origins."
) ? void (0) : __assert_fail ("First.Origin == Second.Origin && First.Origin && \"Unable to match different memory origins.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12182, __extension__ __PRETTY_FUNCTION__))
12182 "Unable to match different memory origins.")(static_cast <bool> (First.Origin == Second.Origin &&
First.Origin && "Unable to match different memory origins."
) ? void (0) : __assert_fail ("First.Origin == Second.Origin && First.Origin && \"Unable to match different memory origins.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12182, __extension__ __PRETTY_FUNCTION__))
;
12183 APInt UsedBits = First.getUsedBits();
12184 assert((UsedBits & Second.getUsedBits()) == 0 &&(static_cast <bool> ((UsedBits & Second.getUsedBits
()) == 0 && "Slices are not supposed to overlap.") ? void
(0) : __assert_fail ("(UsedBits & Second.getUsedBits()) == 0 && \"Slices are not supposed to overlap.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12185, __extension__ __PRETTY_FUNCTION__))
12185 "Slices are not supposed to overlap.")(static_cast <bool> ((UsedBits & Second.getUsedBits
()) == 0 && "Slices are not supposed to overlap.") ? void
(0) : __assert_fail ("(UsedBits & Second.getUsedBits()) == 0 && \"Slices are not supposed to overlap.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12185, __extension__ __PRETTY_FUNCTION__))
;
12186 UsedBits |= Second.getUsedBits();
12187 return areUsedBitsDense(UsedBits);
12188}
12189
12190/// \brief Adjust the \p GlobalLSCost according to the target
12191/// paring capabilities and the layout of the slices.
12192/// \pre \p GlobalLSCost should account for at least as many loads as
12193/// there is in the slices in \p LoadedSlices.
12194static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
12195 LoadedSlice::Cost &GlobalLSCost) {
12196 unsigned NumberOfSlices = LoadedSlices.size();
12197 // If there is less than 2 elements, no pairing is possible.
12198 if (NumberOfSlices < 2)
12199 return;
12200
12201 // Sort the slices so that elements that are likely to be next to each
12202 // other in memory are next to each other in the list.
12203 std::sort(LoadedSlices.begin(), LoadedSlices.end(),
12204 [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
12205 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.")(static_cast <bool> (LHS.Origin == RHS.Origin &&
"Different bases not implemented.") ? void (0) : __assert_fail
("LHS.Origin == RHS.Origin && \"Different bases not implemented.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12205, __extension__ __PRETTY_FUNCTION__))
;
12206 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
12207 });
12208 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
12209 // First (resp. Second) is the first (resp. Second) potentially candidate
12210 // to be placed in a paired load.
12211 const LoadedSlice *First = nullptr;
12212 const LoadedSlice *Second = nullptr;
12213 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
12214 // Set the beginning of the pair.
12215 First = Second) {
12216 Second = &LoadedSlices[CurrSlice];
12217
12218 // If First is NULL, it means we start a new pair.
12219 // Get to the next slice.
12220 if (!First)
12221 continue;
12222
12223 EVT LoadedType = First->getLoadedType();
12224
12225 // If the types of the slices are different, we cannot pair them.
12226 if (LoadedType != Second->getLoadedType())
12227 continue;
12228
12229 // Check if the target supplies paired loads for this type.
12230 unsigned RequiredAlignment = 0;
12231 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
12232 // move to the next pair, this type is hopeless.
12233 Second = nullptr;
12234 continue;
12235 }
12236 // Check if we meet the alignment requirement.
12237 if (RequiredAlignment > First->getAlignment())
12238 continue;
12239
12240 // Check that both loads are next to each other in memory.
12241 if (!areSlicesNextToEachOther(*First, *Second))
12242 continue;
12243
12244 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!")(static_cast <bool> (GlobalLSCost.Loads > 0 &&
"We save more loads than we created!") ? void (0) : __assert_fail
("GlobalLSCost.Loads > 0 && \"We save more loads than we created!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12244, __extension__ __PRETTY_FUNCTION__))
;
12245 --GlobalLSCost.Loads;
12246 // Move to the next pair.
12247 Second = nullptr;
12248 }
12249}
12250
12251/// \brief Check the profitability of all involved LoadedSlice.
12252/// Currently, it is considered profitable if there is exactly two
12253/// involved slices (1) which are (2) next to each other in memory, and
12254/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
12255///
12256/// Note: The order of the elements in \p LoadedSlices may be modified, but not
12257/// the elements themselves.
12258///
12259/// FIXME: When the cost model will be mature enough, we can relax
12260/// constraints (1) and (2).
12261static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
12262 const APInt &UsedBits, bool ForCodeSize) {
12263 unsigned NumberOfSlices = LoadedSlices.size();
12264 if (StressLoadSlicing)
12265 return NumberOfSlices > 1;
12266
12267 // Check (1).
12268 if (NumberOfSlices != 2)
12269 return false;
12270
12271 // Check (2).
12272 if (!areUsedBitsDense(UsedBits))
12273 return false;
12274
12275 // Check (3).
12276 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
12277 // The original code has one big load.
12278 OrigCost.Loads = 1;
12279 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
12280 const LoadedSlice &LS = LoadedSlices[CurrSlice];
12281 // Accumulate the cost of all the slices.
12282 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
12283 GlobalSlicingCost += SliceCost;
12284
12285 // Account as cost in the original configuration the gain obtained
12286 // with the current slices.
12287 OrigCost.addSliceGain(LS);
12288 }
12289
12290 // If the target supports paired load, adjust the cost accordingly.
12291 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
12292 return OrigCost > GlobalSlicingCost;
12293}
12294
12295/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
12296/// operations, split it in the various pieces being extracted.
12297///
12298/// This sort of thing is introduced by SROA.
12299/// This slicing takes care not to insert overlapping loads.
12300/// \pre LI is a simple load (i.e., not an atomic or volatile load).
12301bool DAGCombiner::SliceUpLoad(SDNode *N) {
12302 if (Level < AfterLegalizeDAG)
12303 return false;
12304
12305 LoadSDNode *LD = cast<LoadSDNode>(N);
12306 if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
12307 !LD->getValueType(0).isInteger())
12308 return false;
12309
12310 // Keep track of already used bits to detect overlapping values.
12311 // In that case, we will just abort the transformation.
12312 APInt UsedBits(LD->getValueSizeInBits(0), 0);
12313
12314 SmallVector<LoadedSlice, 4> LoadedSlices;
12315
12316 // Check if this load is used as several smaller chunks of bits.
12317 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
12318 // of computation for each trunc.
12319 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
12320 UI != UIEnd; ++UI) {
12321 // Skip the uses of the chain.
12322 if (UI.getUse().getResNo() != 0)
12323 continue;
12324
12325 SDNode *User = *UI;
12326 unsigned Shift = 0;
12327
12328 // Check if this is a trunc(lshr).
12329 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
12330 isa<ConstantSDNode>(User->getOperand(1))) {
12331 Shift = User->getConstantOperandVal(1);
12332 User = *User->use_begin();
12333 }
12334
12335 // At this point, User is a Truncate, iff we encountered, trunc or
12336 // trunc(lshr).
12337 if (User->getOpcode() != ISD::TRUNCATE)
12338 return false;
12339
12340 // The width of the type must be a power of 2 and greater than 8-bits.
12341 // Otherwise the load cannot be represented in LLVM IR.
12342 // Moreover, if we shifted with a non-8-bits multiple, the slice
12343 // will be across several bytes. We do not support that.
12344 unsigned Width = User->getValueSizeInBits(0);
12345 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
12346 return false;
12347
12348 // Build the slice for this chain of computations.
12349 LoadedSlice LS(User, LD, Shift, &DAG);
12350 APInt CurrentUsedBits = LS.getUsedBits();
12351
12352 // Check if this slice overlaps with another.
12353 if ((CurrentUsedBits & UsedBits) != 0)
12354 return false;
12355 // Update the bits used globally.
12356 UsedBits |= CurrentUsedBits;
12357
12358 // Check if the new slice would be legal.
12359 if (!LS.isLegal())
12360 return false;
12361
12362 // Record the slice.
12363 LoadedSlices.push_back(LS);
12364 }
12365
12366 // Abort slicing if it does not seem to be profitable.
12367 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
12368 return false;
12369
12370 ++SlicedLoads;
12371
12372 // Rewrite each chain to use an independent load.
12373 // By construction, each chain can be represented by a unique load.
12374
12375 // Prepare the argument for the new token factor for all the slices.
12376 SmallVector<SDValue, 8> ArgChains;
12377 for (SmallVectorImpl<LoadedSlice>::const_iterator
12378 LSIt = LoadedSlices.begin(),
12379 LSItEnd = LoadedSlices.end();
12380 LSIt != LSItEnd; ++LSIt) {
12381 SDValue SliceInst = LSIt->loadSlice();
12382 CombineTo(LSIt->Inst, SliceInst, true);
12383 if (SliceInst.getOpcode() != ISD::LOAD)
12384 SliceInst = SliceInst.getOperand(0);
12385 assert(SliceInst->getOpcode() == ISD::LOAD &&(static_cast <bool> (SliceInst->getOpcode() == ISD::
LOAD && "It takes more than a zext to get to the loaded slice!!"
) ? void (0) : __assert_fail ("SliceInst->getOpcode() == ISD::LOAD && \"It takes more than a zext to get to the loaded slice!!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12386, __extension__ __PRETTY_FUNCTION__))
12386 "It takes more than a zext to get to the loaded slice!!")(static_cast <bool> (SliceInst->getOpcode() == ISD::
LOAD && "It takes more than a zext to get to the loaded slice!!"
) ? void (0) : __assert_fail ("SliceInst->getOpcode() == ISD::LOAD && \"It takes more than a zext to get to the loaded slice!!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12386, __extension__ __PRETTY_FUNCTION__))
;
12387 ArgChains.push_back(SliceInst.getValue(1));
12388 }
12389
12390 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
12391 ArgChains);
12392 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
12393 AddToWorklist(Chain.getNode());
12394 return true;
12395}
12396
12397/// Check to see if V is (and load (ptr), imm), where the load is having
12398/// specific bytes cleared out. If so, return the byte size being masked out
12399/// and the shift amount.
12400static std::pair<unsigned, unsigned>
12401CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
12402 std::pair<unsigned, unsigned> Result(0, 0);
12403
12404 // Check for the structure we're looking for.
12405 if (V->getOpcode() != ISD::AND ||
12406 !isa<ConstantSDNode>(V->getOperand(1)) ||
12407 !ISD::isNormalLoad(V->getOperand(0).getNode()))
12408 return Result;
12409
12410 // Check the chain and pointer.
12411 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
12412 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
12413
12414 // The store should be chained directly to the load or be an operand of a
12415 // tokenfactor.
12416 if (LD == Chain.getNode())
12417 ; // ok.
12418 else if (Chain->getOpcode() != ISD::TokenFactor)
12419 return Result; // Fail.
12420 else {
12421 bool isOk = false;
12422 for (const SDValue &ChainOp : Chain->op_values())
12423 if (ChainOp.getNode() == LD) {
12424 isOk = true;
12425 break;
12426 }
12427 if (!isOk) return Result;
12428 }
12429
12430 // This only handles simple types.
12431 if (V.getValueType() != MVT::i16 &&
12432 V.getValueType() != MVT::i32 &&
12433 V.getValueType() != MVT::i64)
12434 return Result;
12435
12436 // Check the constant mask. Invert it so that the bits being masked out are
12437 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
12438 // follow the sign bit for uniformity.
12439 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
12440 unsigned NotMaskLZ = countLeadingZeros(NotMask);
12441 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
12442 unsigned NotMaskTZ = countTrailingZeros(NotMask);
12443 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
12444 if (NotMaskLZ == 64) return Result; // All zero mask.
12445
12446 // See if we have a continuous run of bits. If so, we have 0*1+0*
12447 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
12448 return Result;
12449
12450 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
12451 if (V.getValueType() != MVT::i64 && NotMaskLZ)
12452 NotMaskLZ -= 64-V.getValueSizeInBits();
12453
12454 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
12455 switch (MaskedBytes) {
12456 case 1:
12457 case 2:
12458 case 4: break;
12459 default: return Result; // All one mask, or 5-byte mask.
12460 }
12461
12462 // Verify that the first bit starts at a multiple of mask so that the access
12463 // is aligned the same as the access width.
12464 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
12465
12466 Result.first = MaskedBytes;
12467 Result.second = NotMaskTZ/8;
12468 return Result;
12469}
12470
12471/// Check to see if IVal is something that provides a value as specified by
12472/// MaskInfo. If so, replace the specified store with a narrower store of
12473/// truncated IVal.
12474static SDNode *
12475ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
12476 SDValue IVal, StoreSDNode *St,
12477 DAGCombiner *DC) {
12478 unsigned NumBytes = MaskInfo.first;
12479 unsigned ByteShift = MaskInfo.second;
12480 SelectionDAG &DAG = DC->getDAG();
12481
12482 // Check to see if IVal is all zeros in the part being masked in by the 'or'
12483 // that uses this. If not, this is not a replacement.
12484 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
12485 ByteShift*8, (ByteShift+NumBytes)*8);
12486 if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
12487
12488 // Check that it is legal on the target to do this. It is legal if the new
12489 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
12490 // legalization.
12491 MVT VT = MVT::getIntegerVT(NumBytes*8);
12492 if (!DC->isTypeLegal(VT))
12493 return nullptr;
12494
12495 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
12496 // shifted by ByteShift and truncated down to NumBytes.
12497 if (ByteShift) {
12498 SDLoc DL(IVal);
12499 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
12500 DAG.getConstant(ByteShift*8, DL,
12501 DC->getShiftAmountTy(IVal.getValueType())));
12502 }
12503
12504 // Figure out the offset for the store and the alignment of the access.
12505 unsigned StOffset;
12506 unsigned NewAlign = St->getAlignment();
12507
12508 if (DAG.getDataLayout().isLittleEndian())
12509 StOffset = ByteShift;
12510 else
12511 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
12512
12513 SDValue Ptr = St->getBasePtr();
12514 if (StOffset) {
12515 SDLoc DL(IVal);
12516 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
12517 Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
12518 NewAlign = MinAlign(NewAlign, StOffset);
12519 }
12520
12521 // Truncate down to the new size.
12522 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
12523
12524 ++OpsNarrowed;
12525 return DAG
12526 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
12527 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
12528 .getNode();
12529}
12530
12531/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
12532/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
12533/// narrowing the load and store if it would end up being a win for performance
12534/// or code size.
12535SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
12536 StoreSDNode *ST = cast<StoreSDNode>(N);
12537 if (ST->isVolatile())
12538 return SDValue();
12539
12540 SDValue Chain = ST->getChain();
12541 SDValue Value = ST->getValue();
12542 SDValue Ptr = ST->getBasePtr();
12543 EVT VT = Value.getValueType();
12544
12545 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
12546 return SDValue();
12547
12548 unsigned Opc = Value.getOpcode();
12549
12550 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
12551 // is a byte mask indicating a consecutive number of bytes, check to see if
12552 // Y is known to provide just those bytes. If so, we try to replace the
12553 // load + replace + store sequence with a single (narrower) store, which makes
12554 // the load dead.
12555 if (Opc == ISD::OR) {
12556 std::pair<unsigned, unsigned> MaskedLoad;
12557 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
12558 if (MaskedLoad.first)
12559 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12560 Value.getOperand(1), ST,this))
12561 return SDValue(NewST, 0);
12562
12563 // Or is commutative, so try swapping X and Y.
12564 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
12565 if (MaskedLoad.first)
12566 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12567 Value.getOperand(0), ST,this))
12568 return SDValue(NewST, 0);
12569 }
12570
12571 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
12572 Value.getOperand(1).getOpcode() != ISD::Constant)
12573 return SDValue();
12574
12575 SDValue N0 = Value.getOperand(0);
12576 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12577 Chain == SDValue(N0.getNode(), 1)) {
12578 LoadSDNode *LD = cast<LoadSDNode>(N0);
12579 if (LD->getBasePtr() != Ptr ||
12580 LD->getPointerInfo().getAddrSpace() !=
12581 ST->getPointerInfo().getAddrSpace())
12582 return SDValue();
12583
12584 // Find the type to narrow it the load / op / store to.
12585 SDValue N1 = Value.getOperand(1);
12586 unsigned BitWidth = N1.getValueSizeInBits();
12587 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
12588 if (Opc == ISD::AND)
12589 Imm ^= APInt::getAllOnesValue(BitWidth);
12590 if (Imm == 0 || Imm.isAllOnesValue())
12591 return SDValue();
12592 unsigned ShAmt = Imm.countTrailingZeros();
12593 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
12594 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
12595 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12596 // The narrowing should be profitable, the load/store operation should be
12597 // legal (or custom) and the store size should be equal to the NewVT width.
12598 while (NewBW < BitWidth &&
12599 (NewVT.getStoreSizeInBits() != NewBW ||
12600 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
12601 !TLI.isNarrowingProfitable(VT, NewVT))) {
12602 NewBW = NextPowerOf2(NewBW);
12603 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12604 }
12605 if (NewBW >= BitWidth)
12606 return SDValue();
12607
12608 // If the lsb changed does not start at the type bitwidth boundary,
12609 // start at the previous one.
12610 if (ShAmt % NewBW)
12611 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
12612 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
12613 std::min(BitWidth, ShAmt + NewBW));
12614 if ((Imm & Mask) == Imm) {
12615 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
12616 if (Opc == ISD::AND)
12617 NewImm ^= APInt::getAllOnesValue(NewBW);
12618 uint64_t PtrOff = ShAmt / 8;
12619 // For big endian targets, we need to adjust the offset to the pointer to
12620 // load the correct bytes.
12621 if (DAG.getDataLayout().isBigEndian())
12622 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
12623
12624 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
12625 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
12626 if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
12627 return SDValue();
12628
12629 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
12630 Ptr.getValueType(), Ptr,
12631 DAG.getConstant(PtrOff, SDLoc(LD),
12632 Ptr.getValueType()));
12633 SDValue NewLD =
12634 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
12635 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12636 LD->getMemOperand()->getFlags(), LD->getAAInfo());
12637 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
12638 DAG.getConstant(NewImm, SDLoc(Value),
12639 NewVT));
12640 SDValue NewST =
12641 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
12642 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
12643
12644 AddToWorklist(NewPtr.getNode());
12645 AddToWorklist(NewLD.getNode());
12646 AddToWorklist(NewVal.getNode());
12647 WorklistRemover DeadNodes(*this);
12648 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
12649 ++OpsNarrowed;
12650 return NewST;
12651 }
12652 }
12653
12654 return SDValue();
12655}
12656
12657/// For a given floating point load / store pair, if the load value isn't used
12658/// by any other operations, then consider transforming the pair to integer
12659/// load / store operations if the target deems the transformation profitable.
12660SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
12661 StoreSDNode *ST = cast<StoreSDNode>(N);
12662 SDValue Chain = ST->getChain();
12663 SDValue Value = ST->getValue();
12664 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
12665 Value.hasOneUse() &&
12666 Chain == SDValue(Value.getNode(), 1)) {
12667 LoadSDNode *LD = cast<LoadSDNode>(Value);
12668 EVT VT = LD->getMemoryVT();
12669 if (!VT.isFloatingPoint() ||
12670 VT != ST->getMemoryVT() ||
12671 LD->isNonTemporal() ||
12672 ST->isNonTemporal() ||
12673 LD->getPointerInfo().getAddrSpace() != 0 ||
12674 ST->getPointerInfo().getAddrSpace() != 0)
12675 return SDValue();
12676
12677 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
12678 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
12679 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
12680 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
12681 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
12682 return SDValue();
12683
12684 unsigned LDAlign = LD->getAlignment();
12685 unsigned STAlign = ST->getAlignment();
12686 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
12687 unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
12688 if (LDAlign < ABIAlign || STAlign < ABIAlign)
12689 return SDValue();
12690
12691 SDValue NewLD =
12692 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
12693 LD->getPointerInfo(), LDAlign);
12694
12695 SDValue NewST =
12696 DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
12697 ST->getPointerInfo(), STAlign);
12698
12699 AddToWorklist(NewLD.getNode());
12700 AddToWorklist(NewST.getNode());
12701 WorklistRemover DeadNodes(*this);
12702 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
12703 ++LdStFP2Int;
12704 return NewST;
12705 }
12706
12707 return SDValue();
12708}
12709
12710// This is a helper function for visitMUL to check the profitability
12711// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
12712// MulNode is the original multiply, AddNode is (add x, c1),
12713// and ConstNode is c2.
12714//
12715// If the (add x, c1) has multiple uses, we could increase
12716// the number of adds if we make this transformation.
12717// It would only be worth doing this if we can remove a
12718// multiply in the process. Check for that here.
12719// To illustrate:
12720// (A + c1) * c3
12721// (A + c2) * c3
12722// We're checking for cases where we have common "c3 * A" expressions.
12723bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
12724 SDValue &AddNode,
12725 SDValue &ConstNode) {
12726 APInt Val;
12727
12728 // If the add only has one use, this would be OK to do.
12729 if (AddNode.getNode()->hasOneUse())
12730 return true;
12731
12732 // Walk all the users of the constant with which we're multiplying.
12733 for (SDNode *Use : ConstNode->uses()) {
12734 if (Use == MulNode) // This use is the one we're on right now. Skip it.
12735 continue;
12736
12737 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
12738 SDNode *OtherOp;
12739 SDNode *MulVar = AddNode.getOperand(0).getNode();
12740
12741 // OtherOp is what we're multiplying against the constant.
12742 if (Use->getOperand(0) == ConstNode)
12743 OtherOp = Use->getOperand(1).getNode();
12744 else
12745 OtherOp = Use->getOperand(0).getNode();
12746
12747 // Check to see if multiply is with the same operand of our "add".
12748 //
12749 // ConstNode = CONST
12750 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
12751 // ...
12752 // AddNode = (A + c1) <-- MulVar is A.
12753 // = AddNode * ConstNode <-- current visiting instruction.
12754 //
12755 // If we make this transformation, we will have a common
12756 // multiply (ConstNode * A) that we can save.
12757 if (OtherOp == MulVar)
12758 return true;
12759
12760 // Now check to see if a future expansion will give us a common
12761 // multiply.
12762 //
12763 // ConstNode = CONST
12764 // AddNode = (A + c1)
12765 // ... = AddNode * ConstNode <-- current visiting instruction.
12766 // ...
12767 // OtherOp = (A + c2)
12768 // Use = OtherOp * ConstNode <-- visiting Use.
12769 //
12770 // If we make this transformation, we will have a common
12771 // multiply (CONST * A) after we also do the same transformation
12772 // to the "t2" instruction.
12773 if (OtherOp->getOpcode() == ISD::ADD &&
12774 DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
12775 OtherOp->getOperand(0).getNode() == MulVar)
12776 return true;
12777 }
12778 }
12779
12780 // Didn't find a case where this would be profitable.
12781 return false;
12782}
12783
12784static SDValue peekThroughBitcast(SDValue V) {
12785 while (V.getOpcode() == ISD::BITCAST)
12786 V = V.getOperand(0);
12787 return V;
12788}
12789
12790SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
12791 unsigned NumStores) {
12792 SmallVector<SDValue, 8> Chains;
12793 SmallPtrSet<const SDNode *, 8> Visited;
12794 SDLoc StoreDL(StoreNodes[0].MemNode);
12795
12796 for (unsigned i = 0; i < NumStores; ++i) {
12797 Visited.insert(StoreNodes[i].MemNode);
12798 }
12799
12800 // don't include nodes that are children
12801 for (unsigned i = 0; i < NumStores; ++i) {
12802 if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
12803 Chains.push_back(StoreNodes[i].MemNode->getChain());
12804 }
12805
12806 assert(Chains.size() > 0 && "Chain should have generated a chain")(static_cast <bool> (Chains.size() > 0 && "Chain should have generated a chain"
) ? void (0) : __assert_fail ("Chains.size() > 0 && \"Chain should have generated a chain\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12806, __extension__ __PRETTY_FUNCTION__))
;
12807 return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
12808}
12809
12810bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
12811 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
12812 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
12813 // Make sure we have something to merge.
12814 if (NumStores < 2)
12815 return false;
12816
12817 // The latest Node in the DAG.
12818 SDLoc DL(StoreNodes[0].MemNode);
12819
12820 int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
12821 unsigned SizeInBits = NumStores * ElementSizeBits;
12822 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
12823
12824 EVT StoreTy;
12825 if (UseVector) {
12826 unsigned Elts = NumStores * NumMemElts;
12827 // Get the type for the merged vector store.
12828 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12829 } else
12830 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
12831
12832 SDValue StoredVal;
12833 if (UseVector) {
12834 if (IsConstantSrc) {
12835 SmallVector<SDValue, 8> BuildVector;
12836 for (unsigned I = 0; I != NumStores; ++I) {
12837 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
12838 SDValue Val = St->getValue();
12839 // If constant is of the wrong type, convert it now.
12840 if (MemVT != Val.getValueType()) {
12841 Val = peekThroughBitcast(Val);
12842 // Deal with constants of wrong size.
12843 if (ElementSizeBits != Val.getValueSizeInBits()) {
12844 EVT IntMemVT =
12845 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
12846 if (isa<ConstantFPSDNode>(Val)) {
12847 // Not clear how to truncate FP values.
12848 return false;
12849 } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
12850 Val = DAG.getConstant(C->getAPIntValue()
12851 .zextOrTrunc(Val.getValueSizeInBits())
12852 .zextOrTrunc(ElementSizeBits),
12853 SDLoc(C), IntMemVT);
12854 }
12855 // Make sure correctly size type is the correct type.
12856 Val = DAG.getBitcast(MemVT, Val);
12857 }
12858 BuildVector.push_back(Val);
12859 }
12860 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
12861 : ISD::BUILD_VECTOR,
12862 DL, StoreTy, BuildVector);
12863 } else {
12864 SmallVector<SDValue, 8> Ops;
12865 for (unsigned i = 0; i < NumStores; ++i) {
12866 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12867 SDValue Val = peekThroughBitcast(St->getValue());
12868 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
12869 // type MemVT. If the underlying value is not the correct
12870 // type, but it is an extraction of an appropriate vector we
12871 // can recast Val to be of the correct type. This may require
12872 // converting between EXTRACT_VECTOR_ELT and
12873 // EXTRACT_SUBVECTOR.
12874 if ((MemVT != Val.getValueType()) &&
12875 (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12876 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
12877 SDValue Vec = Val.getOperand(0);
12878 EVT MemVTScalarTy = MemVT.getScalarType();
12879 // We may need to add a bitcast here to get types to line up.
12880 if (MemVTScalarTy != Vec.getValueType()) {
12881 unsigned Elts = Vec.getValueType().getSizeInBits() /
12882 MemVTScalarTy.getSizeInBits();
12883 EVT NewVecTy =
12884 EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
12885 Vec = DAG.getBitcast(NewVecTy, Vec);
12886 }
12887 auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
12888 : ISD::EXTRACT_VECTOR_ELT;
12889 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1));
12890 }
12891 Ops.push_back(Val);
12892 }
12893
12894 // Build the extracted vector elements back into a vector.
12895 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
12896 : ISD::BUILD_VECTOR,
12897 DL, StoreTy, Ops);
12898 }
12899 } else {
12900 // We should always use a vector store when merging extracted vector
12901 // elements, so this path implies a store of constants.
12902 assert(IsConstantSrc && "Merged vector elements should use vector store")(static_cast <bool> (IsConstantSrc && "Merged vector elements should use vector store"
) ? void (0) : __assert_fail ("IsConstantSrc && \"Merged vector elements should use vector store\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12902, __extension__ __PRETTY_FUNCTION__))
;
12903
12904 APInt StoreInt(SizeInBits, 0);
12905
12906 // Construct a single integer constant which is made of the smaller
12907 // constant inputs.
12908 bool IsLE = DAG.getDataLayout().isLittleEndian();
12909 for (unsigned i = 0; i < NumStores; ++i) {
12910 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
12911 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
12912
12913 SDValue Val = St->getValue();
12914 StoreInt <<= ElementSizeBits;
12915 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
12916 StoreInt |= C->getAPIntValue()
12917 .zextOrTrunc(ElementSizeBits)
12918 .zextOrTrunc(SizeInBits);
12919 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
12920 StoreInt |= C->getValueAPF()
12921 .bitcastToAPInt()
12922 .zextOrTrunc(ElementSizeBits)
12923 .zextOrTrunc(SizeInBits);
12924 // If fp truncation is necessary give up for now.
12925 if (MemVT.getSizeInBits() != ElementSizeBits)
12926 return false;
12927 } else {
12928 llvm_unreachable("Invalid constant element type")::llvm::llvm_unreachable_internal("Invalid constant element type"
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12928)
;
12929 }
12930 }
12931
12932 // Create the new Load and Store operations.
12933 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
12934 }
12935
12936 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12937 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
12938
12939 // make sure we use trunc store if it's necessary to be legal.
12940 SDValue NewStore;
12941 if (!UseTrunc) {
12942 NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
12943 FirstInChain->getPointerInfo(),
12944 FirstInChain->getAlignment());
12945 } else { // Must be realized as a trunc store
12946 EVT LegalizedStoredValueTy =
12947 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
12948 unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits();
12949 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
12950 SDValue ExtendedStoreVal =
12951 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
12952 LegalizedStoredValueTy);
12953 NewStore = DAG.getTruncStore(
12954 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
12955 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
12956 FirstInChain->getAlignment(),
12957 FirstInChain->getMemOperand()->getFlags());
12958 }
12959
12960 // Replace all merged stores with the new store.
12961 for (unsigned i = 0; i < NumStores; ++i)
12962 CombineTo(StoreNodes[i].MemNode, NewStore);
12963
12964 AddToWorklist(NewChain.getNode());
12965 return true;
12966}
12967
12968void DAGCombiner::getStoreMergeCandidates(
12969 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
12970 // This holds the base pointer, index, and the offset in bytes from the base
12971 // pointer.
12972 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
12973 EVT MemVT = St->getMemoryVT();
12974
12975 SDValue Val = peekThroughBitcast(St->getValue());
12976 // We must have a base and an offset.
12977 if (!BasePtr.getBase().getNode())
12978 return;
12979
12980 // Do not handle stores to undef base pointers.
12981 if (BasePtr.getBase().isUndef())
12982 return;
12983
12984 bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
12985 bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12986 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
12987 bool IsLoadSrc = isa<LoadSDNode>(Val);
12988 BaseIndexOffset LBasePtr;
12989 // Match on loadbaseptr if relevant.
12990 EVT LoadVT;
12991 if (IsLoadSrc) {
12992 auto *Ld = cast<LoadSDNode>(Val);
12993 LBasePtr = BaseIndexOffset::match(Ld, DAG);
12994 LoadVT = Ld->getMemoryVT();
12995 // Load and store should be the same type.
12996 if (MemVT != LoadVT)
12997 return;
12998 }
12999 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
13000 int64_t &Offset) -> bool {
13001 if (Other->isVolatile() || Other->isIndexed())
13002 return false;
13003 SDValue Val = peekThroughBitcast(Other->getValue());
13004 // Allow merging constants of different types as integers.
13005 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
13006 : Other->getMemoryVT() != MemVT;
13007 if (IsLoadSrc) {
13008 if (NoTypeMatch)
13009 return false;
13010 // The Load's Base Ptr must also match
13011 if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
13012 auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
13013 if (LoadVT != OtherLd->getMemoryVT())
13014 return false;
13015 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
13016 return false;
13017 } else
13018 return false;
13019 }
13020 if (IsConstantSrc) {
13021 if (NoTypeMatch)
13022 return false;
13023 if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
13024 return false;
13025 }
13026 if (IsExtractVecSrc) {
13027 // Do not merge truncated stores here.
13028 if (Other->isTruncatingStore())
13029 return false;
13030 if (!MemVT.bitsEq(Val.getValueType()))
13031 return false;
13032 if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
13033 Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13034 return false;
13035 }
13036 Ptr = BaseIndexOffset::match(Other, DAG);
13037 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
13038 };
13039
13040 // We looking for a root node which is an ancestor to all mergable
13041 // stores. We search up through a load, to our root and then down
13042 // through all children. For instance we will find Store{1,2,3} if
13043 // St is Store1, Store2. or Store3 where the root is not a load
13044 // which always true for nonvolatile ops. TODO: Expand
13045 // the search to find all valid candidates through multiple layers of loads.
13046 //
13047 // Root
13048 // |-------|-------|
13049 // Load Load Store3
13050 // | |
13051 // Store1 Store2
13052 //
13053 // FIXME: We should be able to climb and
13054 // descend TokenFactors to find candidates as well.
13055
13056 SDNode *RootNode = (St->getChain()).getNode();
13057
13058 if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
13059 RootNode = Ldn->getChain().getNode();
13060 for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
13061 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
13062 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
13063 if (I2.getOperandNo() == 0)
13064 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
13065 BaseIndexOffset Ptr;
13066 int64_t PtrDiff;
13067 if (CandidateMatch(OtherST, Ptr, PtrDiff))
13068 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
13069 }
13070 } else
13071 for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
13072 if (I.getOperandNo() == 0)
13073 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
13074 BaseIndexOffset Ptr;
13075 int64_t PtrDiff;
13076 if (CandidateMatch(OtherST, Ptr, PtrDiff))
13077 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
13078 }
13079}
13080
13081// We need to check that merging these stores does not cause a loop in
13082// the DAG. Any store candidate may depend on another candidate
13083// indirectly through its operand (we already consider dependencies
13084// through the chain). Check in parallel by searching up from
13085// non-chain operands of candidates.
13086bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
13087 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
13088 // FIXME: We should be able to truncate a full search of
13089 // predecessors by doing a BFS and keeping tabs the originating
13090 // stores from which worklist nodes come from in a similar way to
13091 // TokenFactor simplfication.
13092
13093 SmallPtrSet<const SDNode *, 16> Visited;
13094 SmallVector<const SDNode *, 8> Worklist;
13095 unsigned int Max = 8192;
13096 // Search Ops of store candidates.
13097 for (unsigned i = 0; i < NumStores; ++i) {
13098 SDNode *n = StoreNodes[i].MemNode;
13099 // Potential loops may happen only through non-chain operands
13100 for (unsigned j = 1; j < n->getNumOperands(); ++j)
13101 Worklist.push_back(n->getOperand(j).getNode());
13102 }
13103 // Search through DAG. We can stop early if we find a store node.
13104 for (unsigned i = 0; i < NumStores; ++i) {
13105 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
13106 Max))
13107 return false;
13108 // Check if we ended early, failing conservatively if so.
13109 if (Visited.size() >= Max)
13110 return false;
13111 }
13112 return true;
13113}
13114
13115bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
13116 if (OptLevel == CodeGenOpt::None)
13117 return false;
13118
13119 EVT MemVT = St->getMemoryVT();
13120 int64_t ElementSizeBytes = MemVT.getStoreSize();
13121 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
13122
13123 if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
13124 return false;
13125
13126 bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
13127 Attribute::NoImplicitFloat);
13128
13129 // This function cannot currently deal with non-byte-sized memory sizes.
13130 if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
13131 return false;
13132
13133 if (!MemVT.isSimple())
13134 return false;
13135
13136 // Perform an early exit check. Do not bother looking at stored values that
13137 // are not constants, loads, or extracted vector elements.
13138 SDValue StoredVal = peekThroughBitcast(St->getValue());
13139 bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
13140 bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
13141 isa<ConstantFPSDNode>(StoredVal);
13142 bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
13143 StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
13144
13145 if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
13146 return false;
13147
13148 SmallVector<MemOpLink, 8> StoreNodes;
13149 // Find potential store merge candidates by searching through chain sub-DAG
13150 getStoreMergeCandidates(St, StoreNodes);
13151
13152 // Check if there is anything to merge.
13153 if (StoreNodes.size() < 2)
13154 return false;
13155
13156 // Sort the memory operands according to their distance from the
13157 // base pointer.
13158 std::sort(StoreNodes.begin(), StoreNodes.end(),
13159 [](MemOpLink LHS, MemOpLink RHS) {
13160 return LHS.OffsetFromBase < RHS.OffsetFromBase;
13161 });
13162
13163 // Store Merge attempts to merge the lowest stores. This generally
13164 // works out as if successful, as the remaining stores are checked
13165 // after the first collection of stores is merged. However, in the
13166 // case that a non-mergeable store is found first, e.g., {p[-2],
13167 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
13168 // mergeable cases. To prevent this, we prune such stores from the
13169 // front of StoreNodes here.
13170
13171 bool RV = false;
13172 while (StoreNodes.size() > 1) {
13173 unsigned StartIdx = 0;
13174 while ((StartIdx + 1 < StoreNodes.size()) &&
13175 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
13176 StoreNodes[StartIdx + 1].OffsetFromBase)
13177 ++StartIdx;
13178
13179 // Bail if we don't have enough candidates to merge.
13180 if (StartIdx + 1 >= StoreNodes.size())
13181 return RV;
13182
13183 if (StartIdx)
13184 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
13185
13186 // Scan the memory operations on the chain and find the first
13187 // non-consecutive store memory address.
13188 unsigned NumConsecutiveStores = 1;
13189 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
13190 // Check that the addresses are consecutive starting from the second
13191 // element in the list of stores.
13192 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
13193 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
13194 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
13195 break;
13196 NumConsecutiveStores = i + 1;
13197 }
13198
13199 if (NumConsecutiveStores < 2) {
13200 StoreNodes.erase(StoreNodes.begin(),
13201 StoreNodes.begin() + NumConsecutiveStores);
13202 continue;
13203 }
13204
13205 // Check that we can merge these candidates without causing a cycle
13206 if (!checkMergeStoreCandidatesForDependencies(StoreNodes,
13207 NumConsecutiveStores)) {
13208 StoreNodes.erase(StoreNodes.begin(),
13209 StoreNodes.begin() + NumConsecutiveStores);
13210 continue;
13211 }
13212
13213 // The node with the lowest store address.
13214 LLVMContext &Context = *DAG.getContext();
13215 const DataLayout &DL = DAG.getDataLayout();
13216
13217 // Store the constants into memory as one consecutive store.
13218 if (IsConstantSrc) {
13219 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13220 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13221 unsigned FirstStoreAlign = FirstInChain->getAlignment();
13222 unsigned LastLegalType = 1;
13223 unsigned LastLegalVectorType = 1;
13224 bool LastIntegerTrunc = false;
13225 bool NonZero = false;
13226 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
13227 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
13228 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
13229 SDValue StoredVal = ST->getValue();
13230 bool IsElementZero = false;
13231 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
13232 IsElementZero = C->isNullValue();
13233 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
13234 IsElementZero = C->getConstantFPValue()->isNullValue();
13235 if (IsElementZero) {
13236 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
13237 FirstZeroAfterNonZero = i;
13238 }
13239 NonZero |= !IsElementZero;
13240
13241 // Find a legal type for the constant store.
13242 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
13243 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
13244 bool IsFast = false;
13245 if (TLI.isTypeLegal(StoreTy) &&
13246 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13247 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13248 FirstStoreAlign, &IsFast) &&
13249 IsFast) {
13250 LastIntegerTrunc = false;
13251 LastLegalType = i + 1;
13252 // Or check whether a truncstore is legal.
13253 } else if (TLI.getTypeAction(Context, StoreTy) ==
13254 TargetLowering::TypePromoteInteger) {
13255 EVT LegalizedStoredValueTy =
13256 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
13257 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
13258 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
13259 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13260 FirstStoreAlign, &IsFast) &&
13261 IsFast) {
13262 LastIntegerTrunc = true;
13263 LastLegalType = i + 1;
13264 }
13265 }
13266
13267 // We only use vectors if the constant is known to be zero or the target
13268 // allows it and the function is not marked with the noimplicitfloat
13269 // attribute.
13270 if ((!NonZero ||
13271 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
13272 !NoVectors) {
13273 // Find a legal type for the vector store.
13274 unsigned Elts = (i + 1) * NumMemElts;
13275 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13276 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
13277 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
13278 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
13279 FirstStoreAlign, &IsFast) &&
13280 IsFast)
13281 LastLegalVectorType = i + 1;
13282 }
13283 }
13284
13285 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
13286 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
13287
13288 // Check if we found a legal integer type that creates a meaningful merge.
13289 if (NumElem < 2) {
13290 // We know that candidate stores are in order and of correct
13291 // shape. While there is no mergeable sequence from the
13292 // beginning one may start later in the sequence. The only
13293 // reason a merge of size N could have failed where another of
13294 // the same size would not have, is if the alignment has
13295 // improved or we've dropped a non-zero value. Drop as many
13296 // candidates as we can here.
13297 unsigned NumSkip = 1;
13298 while (
13299 (NumSkip < NumConsecutiveStores) &&
13300 (NumSkip < FirstZeroAfterNonZero) &&
13301 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) {
13302 NumSkip++;
13303 }
13304 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13305 continue;
13306 }
13307
13308 bool Merged = MergeStoresOfConstantsOrVecElts(
13309 StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
13310 RV |= Merged;
13311
13312 // Remove merged stores for next iteration.
13313 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13314 continue;
13315 }
13316
13317 // When extracting multiple vector elements, try to store them
13318 // in one vector store rather than a sequence of scalar stores.
13319 if (IsExtractVecSrc) {
13320 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13321 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13322 unsigned FirstStoreAlign = FirstInChain->getAlignment();
13323 unsigned NumStoresToMerge = 1;
13324 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
13325 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13326 SDValue StVal = peekThroughBitcast(St->getValue());
13327 // This restriction could be loosened.
13328 // Bail out if any stored values are not elements extracted from a
13329 // vector. It should be possible to handle mixed sources, but load
13330 // sources need more careful handling (see the block of code below that
13331 // handles consecutive loads).
13332 if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
13333 StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13334 return RV;
13335
13336 // Find a legal type for the vector store.
13337 unsigned Elts = (i + 1) * NumMemElts;
13338 EVT Ty =
13339 EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
13340 bool IsFast;
13341 if (TLI.isTypeLegal(Ty) &&
13342 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
13343 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
13344 FirstStoreAlign, &IsFast) &&
13345 IsFast)
13346 NumStoresToMerge = i + 1;
13347 }
13348
13349 // Check if we found a legal integer type that creates a meaningful merge.
13350 if (NumStoresToMerge < 2) {
13351 // We know that candidate stores are in order and of correct
13352 // shape. While there is no mergeable sequence from the
13353 // beginning one may start later in the sequence. The only
13354 // reason a merge of size N could have failed where another of
13355 // the same size would not have, is if the alignment has
13356 // improved. Drop as many candidates as we can here.
13357 unsigned NumSkip = 1;
13358 while ((NumSkip < NumConsecutiveStores) &&
13359 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
13360 NumSkip++;
13361
13362 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13363 continue;
13364 }
13365
13366 bool Merged = MergeStoresOfConstantsOrVecElts(
13367 StoreNodes, MemVT, NumStoresToMerge, false, true, false);
13368 if (!Merged) {
13369 StoreNodes.erase(StoreNodes.begin(),
13370 StoreNodes.begin() + NumStoresToMerge);
13371 continue;
13372 }
13373 // Remove merged stores for next iteration.
13374 StoreNodes.erase(StoreNodes.begin(),
13375 StoreNodes.begin() + NumStoresToMerge);
13376 RV = true;
13377 continue;
13378 }
13379
13380 // Below we handle the case of multiple consecutive stores that
13381 // come from multiple consecutive loads. We merge them into a single
13382 // wide load and a single wide store.
13383
13384 // Look for load nodes which are used by the stored values.
13385 SmallVector<MemOpLink, 8> LoadNodes;
13386
13387 // Find acceptable loads. Loads need to have the same chain (token factor),
13388 // must not be zext, volatile, indexed, and they must be consecutive.
13389 BaseIndexOffset LdBasePtr;
13390 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
13391 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13392 SDValue Val = peekThroughBitcast(St->getValue());
13393 LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val);
13394 if (!Ld)
13395 break;
13396
13397 // Loads must only have one use.
13398 if (!Ld->hasNUsesOfValue(1, 0))
13399 break;
13400
13401 // The memory operands must not be volatile.
13402 if (Ld->isVolatile() || Ld->isIndexed())
13403 break;
13404
13405 // The stored memory type must be the same.
13406 if (Ld->getMemoryVT() != MemVT)
13407 break;
13408
13409 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
13410 // If this is not the first ptr that we check.
13411 int64_t LdOffset = 0;
13412 if (LdBasePtr.getBase().getNode()) {
13413 // The base ptr must be the same.
13414 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
13415 break;
13416 } else {
13417 // Check that all other base pointers are the same as this one.
13418 LdBasePtr = LdPtr;
13419 }
13420
13421 // We found a potential memory operand to merge.
13422 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
13423 }
13424
13425 if (LoadNodes.size() < 2) {
13426 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
13427 continue;
13428 }
13429
13430 // If we have load/store pair instructions and we only have two values,
13431 // don't bother merging.
13432 unsigned RequiredAlignment;
13433 if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
13434 StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
13435 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
13436 continue;
13437 }
13438 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13439 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13440 unsigned FirstStoreAlign = FirstInChain->getAlignment();
13441 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
13442 unsigned FirstLoadAS = FirstLoad->getAddressSpace();
13443 unsigned FirstLoadAlign = FirstLoad->getAlignment();
13444
13445 // Scan the memory operations on the chain and find the first
13446 // non-consecutive load memory address. These variables hold the index in
13447 // the store node array.
13448 unsigned LastConsecutiveLoad = 1;
13449 // This variable refers to the size and not index in the array.
13450 unsigned LastLegalVectorType = 1;
13451 unsigned LastLegalIntegerType = 1;
13452 bool isDereferenceable = true;
13453 bool DoIntegerTruncate = false;
13454 StartAddress = LoadNodes[0].OffsetFromBase;
13455 SDValue FirstChain = FirstLoad->getChain();
13456 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
13457 // All loads must share the same chain.
13458 if (LoadNodes[i].MemNode->getChain() != FirstChain)
13459 break;
13460
13461 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
13462 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
13463 break;
13464 LastConsecutiveLoad = i;
13465
13466 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
13467 isDereferenceable = false;
13468
13469 // Find a legal type for the vector store.
13470 unsigned Elts = (i + 1) * NumMemElts;
13471 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13472
13473 bool IsFastSt, IsFastLd;
13474 if (TLI.isTypeLegal(StoreTy) &&
13475 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13476 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13477 FirstStoreAlign, &IsFastSt) &&
13478 IsFastSt &&
13479 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13480 FirstLoadAlign, &IsFastLd) &&
13481 IsFastLd) {
13482 LastLegalVectorType = i + 1;
13483 }
13484
13485 // Find a legal type for the integer store.
13486 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
13487 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
13488 if (TLI.isTypeLegal(StoreTy) &&
13489 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13490 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13491 FirstStoreAlign, &IsFastSt) &&
13492 IsFastSt &&
13493 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13494 FirstLoadAlign, &IsFastLd) &&
13495 IsFastLd) {
13496 LastLegalIntegerType = i + 1;
13497 DoIntegerTruncate = false;
13498 // Or check whether a truncstore and extload is legal.
13499 } else if (TLI.getTypeAction(Context, StoreTy) ==
13500 TargetLowering::TypePromoteInteger) {
13501 EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
13502 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
13503 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
13504 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
13505 StoreTy) &&
13506 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
13507 StoreTy) &&
13508 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
13509 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13510 FirstStoreAlign, &IsFastSt) &&
13511 IsFastSt &&
13512 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13513 FirstLoadAlign, &IsFastLd) &&
13514 IsFastLd) {
13515 LastLegalIntegerType = i + 1;
13516 DoIntegerTruncate = true;
13517 }
13518 }
13519 }
13520
13521 // Only use vector types if the vector type is larger than the integer type.
13522 // If they are the same, use integers.
13523 bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
13524 unsigned LastLegalType =
13525 std::max(LastLegalVectorType, LastLegalIntegerType);
13526
13527 // We add +1 here because the LastXXX variables refer to location while
13528 // the NumElem refers to array/index size.
13529 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
13530 NumElem = std::min(LastLegalType, NumElem);
13531
13532 if (NumElem < 2) {
13533 // We know that candidate stores are in order and of correct
13534 // shape. While there is no mergeable sequence from the
13535 // beginning one may start later in the sequence. The only
13536 // reason a merge of size N could have failed where another of
13537 // the same size would not have is if the alignment or either
13538 // the load or store has improved. Drop as many candidates as we
13539 // can here.
13540 unsigned NumSkip = 1;
13541 while ((NumSkip < LoadNodes.size()) &&
13542 (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
13543 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
13544 NumSkip++;
13545 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13546 continue;
13547 }
13548
13549 // Find if it is better to use vectors or integers to load and store
13550 // to memory.
13551 EVT JointMemOpVT;
13552 if (UseVectorTy) {
13553 // Find a legal type for the vector store.
13554 unsigned Elts = NumElem * NumMemElts;
13555 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13556 } else {
13557 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
13558 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
13559 }
13560
13561 SDLoc LoadDL(LoadNodes[0].MemNode);
13562 SDLoc StoreDL(StoreNodes[0].MemNode);
13563
13564 // The merged loads are required to have the same incoming chain, so
13565 // using the first's chain is acceptable.
13566
13567 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
13568 AddToWorklist(NewStoreChain.getNode());
13569
13570 MachineMemOperand::Flags MMOFlags = isDereferenceable ?
13571 MachineMemOperand::MODereferenceable:
13572 MachineMemOperand::MONone;
13573
13574 SDValue NewLoad, NewStore;
13575 if (UseVectorTy || !DoIntegerTruncate) {
13576 NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
13577 FirstLoad->getBasePtr(),
13578 FirstLoad->getPointerInfo(), FirstLoadAlign,
13579 MMOFlags);
13580 NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
13581 FirstInChain->getBasePtr(),
13582 FirstInChain->getPointerInfo(), FirstStoreAlign);
13583 } else { // This must be the truncstore/extload case
13584 EVT ExtendedTy =
13585 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
13586 NewLoad =
13587 DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
13588 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
13589 JointMemOpVT, FirstLoadAlign, MMOFlags);
13590 NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
13591 FirstInChain->getBasePtr(),
13592 FirstInChain->getPointerInfo(), JointMemOpVT,
13593 FirstInChain->getAlignment(),
13594 FirstInChain->getMemOperand()->getFlags());
13595 }
13596
13597 // Transfer chain users from old loads to the new load.
13598 for (unsigned i = 0; i < NumElem; ++i) {
13599 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
13600 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
13601 SDValue(NewLoad.getNode(), 1));
13602 }
13603
13604 // Replace the all stores with the new store. Recursively remove
13605 // corresponding value if its no longer used.
13606 for (unsigned i = 0; i < NumElem; ++i) {
13607 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
13608 CombineTo(StoreNodes[i].MemNode, NewStore);
13609 if (Val.getNode()->use_empty())
13610 recursivelyDeleteUnusedNodes(Val.getNode());
13611 }
13612
13613 RV = true;
13614 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13615 }
13616 return RV;
13617}
13618
13619SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
13620 SDLoc SL(ST);
13621 SDValue ReplStore;
13622
13623 // Replace the chain to avoid dependency.
13624 if (ST->isTruncatingStore()) {
13625 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
13626 ST->getBasePtr(), ST->getMemoryVT(),
13627 ST->getMemOperand());
13628 } else {
13629 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
13630 ST->getMemOperand());
13631 }
13632
13633 // Create token to keep both nodes around.
13634 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
13635 MVT::Other, ST->getChain(), ReplStore);
13636
13637 // Make sure the new and old chains are cleaned up.
13638 AddToWorklist(Token.getNode());
13639
13640 // Don't add users to work list.
13641 return CombineTo(ST, Token, false);
13642}
13643
13644SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
13645 SDValue Value = ST->getValue();
13646 if (Value.getOpcode() == ISD::TargetConstantFP)
13647 return SDValue();
13648
13649 SDLoc DL(ST);
13650
13651 SDValue Chain = ST->getChain();
13652 SDValue Ptr = ST->getBasePtr();
13653
13654 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
13655
13656 // NOTE: If the original store is volatile, this transform must not increase
13657 // the number of stores. For example, on x86-32 an f64 can be stored in one
13658 // processor operation but an i64 (which is not legal) requires two. So the
13659 // transform should not be done in this case.
13660
13661 SDValue Tmp;
13662 switch (CFP->getSimpleValueType(0).SimpleTy) {
13663 default:
13664 llvm_unreachable("Unknown FP type")::llvm::llvm_unreachable_internal("Unknown FP type", "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 13664)
;
13665 case MVT::f16: // We don't do this for these yet.
13666 case MVT::f80:
13667 case MVT::f128:
13668 case MVT::ppcf128:
13669 return SDValue();
13670 case MVT::f32:
13671 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
13672 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13673 ;
13674 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
13675 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
13676 MVT::i32);
13677 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
13678 }
13679
13680 return SDValue();
13681 case MVT::f64:
13682 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
13683 !ST->isVolatile()) ||
13684 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
13685 ;
13686 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
13687 getZExtValue(), SDLoc(CFP), MVT::i64);
13688 return DAG.getStore(Chain, DL, Tmp,
13689 Ptr, ST->getMemOperand());
13690 }
13691
13692 if (!ST->isVolatile() &&
13693 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13694 // Many FP stores are not made apparent until after legalize, e.g. for
13695 // argument passing. Since this is so common, custom legalize the
13696 // 64-bit integer store into two 32-bit stores.
13697 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
13698 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
13699 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
13700 if (DAG.getDataLayout().isBigEndian())
13701 std::swap(Lo, Hi);
13702
13703 unsigned Alignment = ST->getAlignment();
13704 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13705 AAMDNodes AAInfo = ST->getAAInfo();
13706
13707 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13708 ST->getAlignment(), MMOFlags, AAInfo);
13709 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13710 DAG.getConstant(4, DL, Ptr.getValueType()));
13711 Alignment = MinAlign(Alignment, 4U);
13712 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
13713 ST->getPointerInfo().getWithOffset(4),
13714 Alignment, MMOFlags, AAInfo);
13715 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13716 St0, St1);
13717 }
13718
13719 return SDValue();
13720 }
13721}
13722
13723SDValue DAGCombiner::visitSTORE(SDNode *N) {
13724 StoreSDNode *ST = cast<StoreSDNode>(N);
13725 SDValue Chain = ST->getChain();
13726 SDValue Value = ST->getValue();
13727 SDValue Ptr = ST->getBasePtr();
13728
13729 // If this is a store of a bit convert, store the input value if the
13730 // resultant store does not need a higher alignment than the original.
13731 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
13732 ST->isUnindexed()) {
13733 EVT SVT = Value.getOperand(0).getValueType();
13734 if (((!LegalOperations && !ST->isVolatile()) ||
13735 TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
13736 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
13737 unsigned OrigAlign = ST->getAlignment();
13738 bool Fast = false;
13739 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
13740 ST->getAddressSpace(), OrigAlign, &Fast) &&
13741 Fast) {
13742 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
13743 ST->getPointerInfo(), OrigAlign,
13744 ST->getMemOperand()->getFlags(), ST->getAAInfo());
13745 }
13746 }
13747 }
13748
13749 // Turn 'store undef, Ptr' -> nothing.
13750 if (Value.isUndef() && ST->isUnindexed())
13751 return Chain;
13752
13753 // Try to infer better alignment information than the store already has.
13754 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
13755 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13756 if (Align > ST->getAlignment()) {
13757 SDValue NewStore =
13758 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
13759 ST->getMemoryVT(), Align,
13760 ST->getMemOperand()->getFlags(), ST->getAAInfo());
13761 if (NewStore.getNode() != N)
13762 return CombineTo(ST, NewStore, true);
13763 }
13764 }
13765 }
13766
13767 // Try transforming a pair floating point load / store ops to integer
13768 // load / store ops.
13769 if (SDValue NewST = TransformFPLoadStorePair(N))
13770 return NewST;
13771
13772 if (ST->isUnindexed()) {
13773 // Walk up chain skipping non-aliasing memory nodes, on this store and any
13774 // adjacent stores.
13775 if (findBetterNeighborChains(ST)) {
13776 // replaceStoreChain uses CombineTo, which handled all of the worklist
13777 // manipulation. Return the original node to not do anything else.
13778 return SDValue(ST, 0);
13779 }
13780 Chain = ST->getChain();
13781 }
13782
13783 // FIXME: is there such a thing as a truncating indexed store?
13784 if (ST->isTruncatingStore() && ST->isUnindexed() &&
13785 Value.getValueType().isInteger()) {
13786 // See if we can simplify the input to this truncstore with knowledge that
13787 // only the low bits are being used. For example:
13788 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
13789 SDValue Shorter = DAG.GetDemandedBits(
13790 Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13791 ST->getMemoryVT().getScalarSizeInBits()));
13792 AddToWorklist(Value.getNode());
13793 if (Shorter.getNode())
13794 return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
13795 Ptr, ST->getMemoryVT(), ST->getMemOperand());
13796
13797 // Otherwise, see if we can simplify the operation with
13798 // SimplifyDemandedBits, which only works if the value has a single use.
13799 if (SimplifyDemandedBits(
13800 Value,
13801 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13802 ST->getMemoryVT().getScalarSizeInBits()))) {
13803 // Re-visit the store if anything changed and the store hasn't been merged
13804 // with another node (N is deleted) SimplifyDemandedBits will add Value's
13805 // node back to the worklist if necessary, but we also need to re-visit
13806 // the Store node itself.
13807 if (N->getOpcode() != ISD::DELETED_NODE)
13808 AddToWorklist(N);
13809 return SDValue(N, 0);
13810 }
13811 }
13812
13813 // If this is a load followed by a store to the same location, then the store
13814 // is dead/noop.
13815 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
13816 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
13817 ST->isUnindexed() && !ST->isVolatile() &&
13818 // There can't be any side effects between the load and store, such as
13819 // a call or store.
13820 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
13821 // The store is dead, remove it.
13822 return Chain;
13823 }
13824 }
13825
13826 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
13827 if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
13828 !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
13829 ST->getMemoryVT() == ST1->getMemoryVT()) {
13830 // If this is a store followed by a store with the same value to the same
13831 // location, then the store is dead/noop.
13832 if (ST1->getValue() == Value) {
13833 // The store is dead, remove it.
13834 return Chain;
13835 }
13836
13837 // If this is a store who's preceeding store to the same location
13838 // and no one other node is chained to that store we can effectively
13839 // drop the store. Do not remove stores to undef as they may be used as
13840 // data sinks.
13841 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
13842 !ST1->getBasePtr().isUndef()) {
13843 // ST1 is fully overwritten and can be elided. Combine with it's chain
13844 // value.
13845 CombineTo(ST1, ST1->getChain());
13846 return SDValue();
13847 }
13848 }
13849 }
13850
13851 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
13852 // truncating store. We can do this even if this is already a truncstore.
13853 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
13854 && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
13855 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
13856 ST->getMemoryVT())) {
13857 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
13858 Ptr, ST->getMemoryVT(), ST->getMemOperand());
13859 }
13860
13861 // Always perform this optimization before types are legal. If the target
13862 // prefers, also try this after legalization to catch stores that were created
13863 // by intrinsics or other nodes.
13864 if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
13865 while (true) {
13866 // There can be multiple store sequences on the same chain.
13867 // Keep trying to merge store sequences until we are unable to do so
13868 // or until we merge the last store on the chain.
13869 bool Changed = MergeConsecutiveStores(ST);
13870 if (!Changed) break;
13871 // Return N as merge only uses CombineTo and no worklist clean
13872 // up is necessary.
13873 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
13874 return SDValue(N, 0);
13875 }
13876 }
13877
13878 // Try transforming N to an indexed store.
13879 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13880 return SDValue(N, 0);
13881
13882 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
13883 //
13884 // Make sure to do this only after attempting to merge stores in order to
13885 // avoid changing the types of some subset of stores due to visit order,
13886 // preventing their merging.
13887 if (isa<ConstantFPSDNode>(ST->getValue())) {
13888 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
13889 return NewSt;
13890 }
13891
13892 if (SDValue NewSt = splitMergedValStore(ST))
13893 return NewSt;
13894
13895 return ReduceLoadOpStoreWidth(N);
13896}
13897
13898/// For the instruction sequence of store below, F and I values
13899/// are bundled together as an i64 value before being stored into memory.
13900/// Sometimes it is more efficent to generate separate stores for F and I,
13901/// which can remove the bitwise instructions or sink them to colder places.
13902///
13903/// (store (or (zext (bitcast F to i32) to i64),
13904/// (shl (zext I to i64), 32)), addr) -->
13905/// (store F, addr) and (store I, addr+4)
13906///
13907/// Similarly, splitting for other merged store can also be beneficial, like:
13908/// For pair of {i32, i32}, i64 store --> two i32 stores.
13909/// For pair of {i32, i16}, i64 store --> two i32 stores.
13910/// For pair of {i16, i16}, i32 store --> two i16 stores.
13911/// For pair of {i16, i8}, i32 store --> two i16 stores.
13912/// For pair of {i8, i8}, i16 store --> two i8 stores.
13913///
13914/// We allow each target to determine specifically which kind of splitting is
13915/// supported.
13916///
13917/// The store patterns are commonly seen from the simple code snippet below
13918/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
13919/// void goo(const std::pair<int, float> &);
13920/// hoo() {
13921/// ...
13922/// goo(std::make_pair(tmp, ftmp));
13923/// ...
13924/// }
13925///
13926SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
13927 if (OptLevel == CodeGenOpt::None)
13928 return SDValue();
13929
13930 SDValue Val = ST->getValue();
13931 SDLoc DL(ST);
13932
13933 // Match OR operand.
13934 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
13935 return SDValue();
13936
13937 // Match SHL operand and get Lower and Higher parts of Val.
13938 SDValue Op1 = Val.getOperand(0);
13939 SDValue Op2 = Val.getOperand(1);
13940 SDValue Lo, Hi;
13941 if (Op1.getOpcode() != ISD::SHL) {
13942 std::swap(Op1, Op2);
13943 if (Op1.getOpcode() != ISD::SHL)
13944 return SDValue();
13945 }
13946 Lo = Op2;
13947 Hi = Op1.getOperand(0);
13948 if (!Op1.hasOneUse())
13949 return SDValue();
13950
13951 // Match shift amount to HalfValBitSize.
13952 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
13953 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
13954 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
13955 return SDValue();
13956
13957 // Lo and Hi are zero-extended from int with size less equal than 32
13958 // to i64.
13959 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
13960 !Lo.getOperand(0).getValueType().isScalarInteger() ||
13961 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
13962 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
13963 !Hi.getOperand(0).getValueType().isScalarInteger() ||
13964 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
13965 return SDValue();
13966
13967 // Use the EVT of low and high parts before bitcast as the input
13968 // of target query.
13969 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
13970 ? Lo.getOperand(0).getValueType()
13971 : Lo.getValueType();
13972 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
13973 ? Hi.getOperand(0).getValueType()
13974 : Hi.getValueType();
13975 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
13976 return SDValue();
13977
13978 // Start to split store.
13979 unsigned Alignment = ST->getAlignment();
13980 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13981 AAMDNodes AAInfo = ST->getAAInfo();
13982
13983 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
13984 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
13985 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
13986 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
13987
13988 SDValue Chain = ST->getChain();
13989 SDValue Ptr = ST->getBasePtr();
13990 // Lower value store.
13991 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13992 ST->getAlignment(), MMOFlags, AAInfo);
13993 Ptr =
13994 DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13995 DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
13996 // Higher value store.
13997 SDValue St1 =
13998 DAG.getStore(St0, DL, Hi, Ptr,
13999 ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
14000 Alignment / 2, MMOFlags, AAInfo);
14001 return St1;
14002}
14003
14004/// Convert a disguised subvector insertion into a shuffle:
14005/// insert_vector_elt V, (bitcast X from vector type), IdxC -->
14006/// bitcast(shuffle (bitcast V), (extended X), Mask)
14007/// Note: We do not use an insert_subvector node because that requires a legal
14008/// subvector type.
14009SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
14010 SDValue InsertVal = N->getOperand(1);
14011 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
14012 !InsertVal.getOperand(0).getValueType().isVector())
14013 return SDValue();
14014
14015 SDValue SubVec = InsertVal.getOperand(0);
14016 SDValue DestVec = N->getOperand(0);
14017 EVT SubVecVT = SubVec.getValueType();
14018 EVT VT = DestVec.getValueType();
14019 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
14020 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
14021 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
14022
14023 // Step 1: Create a shuffle mask that implements this insert operation. The
14024 // vector that we are inserting into will be operand 0 of the shuffle, so
14025 // those elements are just 'i'. The inserted subvector is in the first
14026 // positions of operand 1 of the shuffle. Example:
14027 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
14028 SmallVector<int, 16> Mask(NumMaskVals);
14029 for (unsigned i = 0; i != NumMaskVals; ++i) {
14030 if (i / NumSrcElts == InsIndex)
14031 Mask[i] = (i % NumSrcElts) + NumMaskVals;
14032 else
14033 Mask[i] = i;
14034 }
14035
14036 // Bail out if the target can not handle the shuffle we want to create.
14037 EVT SubVecEltVT = SubVecVT.getVectorElementType();
14038 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
14039 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
14040 return SDValue();
14041
14042 // Step 2: Create a wide vector from the inserted source vector by appending
14043 // undefined elements. This is the same size as our destination vector.
14044 SDLoc DL(N);
14045 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
14046 ConcatOps[0] = SubVec;
14047 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
14048
14049 // Step 3: Shuffle in the padded subvector.
14050 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
14051 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
14052 AddToWorklist(PaddedSubV.getNode());
14053 AddToWorklist(DestVecBC.getNode());
14054 AddToWorklist(Shuf.getNode());
14055 return DAG.getBitcast(VT, Shuf);
14056}
14057
14058SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
14059 SDValue InVec = N->getOperand(0);
14060 SDValue InVal = N->getOperand(1);
14061 SDValue EltNo = N->getOperand(2);
14062 SDLoc DL(N);
14063
14064 // If the inserted element is an UNDEF, just use the input vector.
14065 if (InVal.isUndef())
14066 return InVec;
14067
14068 EVT VT = InVec.getValueType();
14069
14070 // Remove redundant insertions:
14071 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
14072 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14073 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
14074 return InVec;
14075
14076 // We must know which element is being inserted for folds below here.
14077 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
14078 if (!IndexC)
14079 return SDValue();
14080 unsigned Elt = IndexC->getZExtValue();
14081
14082 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
14083 return Shuf;
14084
14085 // Canonicalize insert_vector_elt dag nodes.
14086 // Example:
14087 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
14088 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
14089 //
14090 // Do this only if the child insert_vector node has one use; also
14091 // do this only if indices are both constants and Idx1 < Idx0.
14092 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
14093 && isa<ConstantSDNode>(InVec.getOperand(2))) {
14094 unsigned OtherElt = InVec.getConstantOperandVal(2);
14095 if (Elt < OtherElt) {
14096 // Swap nodes.
14097 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
14098 InVec.getOperand(0), InVal, EltNo);
14099 AddToWorklist(NewOp.getNode());
14100 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
14101 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
14102 }
14103 }
14104
14105 // If we can't generate a legal BUILD_VECTOR, exit
14106 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
14107 return SDValue();
14108
14109 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
14110 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
14111 // vector elements.
14112 SmallVector<SDValue, 8> Ops;
14113 // Do not combine these two vectors if the output vector will not replace
14114 // the input vector.
14115 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
14116 Ops.append(InVec.getNode()->op_begin(),
14117 InVec.getNode()->op_end());
14118 } else if (InVec.isUndef()) {
14119 unsigned NElts = VT.getVectorNumElements();
14120 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
14121 } else {
14122 return SDValue();
14123 }
14124
14125 // Insert the element
14126 if (Elt < Ops.size()) {
14127 // All the operands of BUILD_VECTOR must have the same type;
14128 // we enforce that here.
14129 EVT OpVT = Ops[0].getValueType();
14130 Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
14131 }
14132
14133 // Return the new vector
14134 return DAG.getBuildVector(VT, DL, Ops);
14135}
14136
14137SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
14138 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
14139 assert(!OriginalLoad->isVolatile())(static_cast <bool> (!OriginalLoad->isVolatile()) ? void
(0) : __assert_fail ("!OriginalLoad->isVolatile()", "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14139, __extension__ __PRETTY_FUNCTION__))
;
14140
14141 EVT ResultVT = EVE->getValueType(0);
14142 EVT VecEltVT = InVecVT.getVectorElementType();
14143 unsigned Align = OriginalLoad->getAlignment();
14144 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
14145 VecEltVT.getTypeForEVT(*DAG.getContext()));
14146
14147 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
14148 return SDValue();
14149
14150 ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
14151 ISD::NON_EXTLOAD : ISD::EXTLOAD;
14152 if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
14153 return SDValue();
14154
14155 Align = NewAlign;
14156
14157 SDValue NewPtr = OriginalLoad->getBasePtr();
14158 SDValue Offset;
14159 EVT PtrType = NewPtr.getValueType();
14160 MachinePointerInfo MPI;
14161 SDLoc DL(EVE);
14162 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
14163 int Elt = ConstEltNo->getZExtValue();
14164 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
14165 Offset = DAG.getConstant(PtrOff, DL, PtrType);
14166 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
14167 } else {
14168 Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
14169 Offset = DAG.getNode(
14170 ISD::MUL, DL, PtrType, Offset,
14171 DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
14172 MPI = OriginalLoad->getPointerInfo();
14173 }
14174 NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
14175
14176 // The replacement we need to do here is a little tricky: we need to
14177 // replace an extractelement of a load with a load.
14178 // Use ReplaceAllUsesOfValuesWith to do the replacement.
14179 // Note that this replacement assumes that the extractvalue is the only
14180 // use of the load; that's okay because we don't want to perform this
14181 // transformation in other cases anyway.
14182 SDValue Load;
14183 SDValue Chain;
14184 if (ResultVT.bitsGT(VecEltVT)) {
14185 // If the result type of vextract is wider than the load, then issue an
14186 // extending load instead.
14187 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
14188 VecEltVT)
14189 ? ISD::ZEXTLOAD
14190 : ISD::EXTLOAD;
14191 Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
14192 OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
14193 Align, OriginalLoad->getMemOperand()->getFlags(),
14194 OriginalLoad->getAAInfo());
14195 Chain = Load.getValue(1);
14196 } else {
14197 Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
14198 MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
14199 OriginalLoad->getAAInfo());
14200 Chain = Load.getValue(1);
14201 if (ResultVT.bitsLT(VecEltVT))
14202 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
14203 else
14204 Load = DAG.getBitcast(ResultVT, Load);
14205 }
14206 WorklistRemover DeadNodes(*this);
14207 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
14208 SDValue To[] = { Load, Chain };
14209 DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
14210 // Since we're explicitly calling ReplaceAllUses, add the new node to the
14211 // worklist explicitly as well.
14212 AddToWorklist(Load.getNode());
14213 AddUsersToWorklist(Load.getNode()); // Add users too
14214 // Make sure to revisit this node to clean it up; it will usually be dead.
14215 AddToWorklist(EVE);
14216 ++OpsNarrowed;
14217 return SDValue(EVE, 0);
14218}
14219
14220SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
14221 // (vextract (scalar_to_vector val, 0) -> val
14222 SDValue InVec = N->getOperand(0);
14223 EVT VT = InVec.getValueType();
14224 EVT NVT = N->getValueType(0);
14225
14226 if (InVec.isUndef())
14227 return DAG.getUNDEF(NVT);
14228
14229 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
14230 // Check if the result type doesn't match the inserted element type. A
14231 // SCALAR_TO_VECTOR may truncate the inserted element and the
14232 // EXTRACT_VECTOR_ELT may widen the extracted vector.
14233 SDValue InOp = InVec.getOperand(0);
14234 if (InOp.getValueType() != NVT) {
14235 assert(InOp.getValueType().isInteger() && NVT.isInteger())(static_cast <bool> (InOp.getValueType().isInteger() &&
NVT.isInteger()) ? void (0) : __assert_fail ("InOp.getValueType().isInteger() && NVT.isInteger()"
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14235, __extension__ __PRETTY_FUNCTION__))
;
14236 return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
14237 }
14238 return InOp;
14239 }
14240
14241 SDValue EltNo = N->getOperand(1);
14242 ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
14243
14244 // extract_vector_elt of out-of-bounds element -> UNDEF
14245 if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements()))
14246 return DAG.getUNDEF(NVT);
14247
14248 // extract_vector_elt (build_vector x, y), 1 -> y
14249 if (ConstEltNo &&
14250 InVec.getOpcode() == ISD::BUILD_VECTOR &&
14251 TLI.isTypeLegal(VT) &&
14252 (InVec.hasOneUse() ||
14253 TLI.aggressivelyPreferBuildVectorSources(VT))) {
14254 SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
14255 EVT InEltVT = Elt.getValueType();
14256
14257 // Sometimes build_vector's scalar input types do not match result type.
14258 if (NVT == InEltVT)
14259 return Elt;
14260
14261 // TODO: It may be useful to truncate if free if the build_vector implicitly
14262 // converts.
14263 }
14264
14265 // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
14266 bool isLE = DAG.getDataLayout().isLittleEndian();
14267 unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
14268 if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
14269 ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
14270 SDValue BCSrc = InVec.getOperand(0);
14271 if (BCSrc.getValueType().isScalarInteger())
14272 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
14273 }
14274
14275 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
14276 //
14277 // This only really matters if the index is non-constant since other combines
14278 // on the constant elements already work.
14279 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
14280 EltNo == InVec.getOperand(2)) {
14281 SDValue Elt = InVec.getOperand(1);
14282 return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
14283 }
14284
14285 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
14286 // We only perform this optimization before the op legalization phase because
14287 // we may introduce new vector instructions which are not backed by TD
14288 // patterns. For example on AVX, extracting elements from a wide vector
14289 // without using extract_subvector. However, if we can find an underlying
14290 // scalar value, then we can always use that.
14291 if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
14292 int NumElem = VT.getVectorNumElements();
14293 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
14294 // Find the new index to extract from.
14295 int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
14296
14297 // Extracting an undef index is undef.
14298 if (OrigElt == -1)
14299 return DAG.getUNDEF(NVT);
14300
14301 // Select the right vector half to extract from.
14302 SDValue SVInVec;
14303 if (OrigElt < NumElem) {
14304 SVInVec = InVec->getOperand(0);
14305 } else {
14306 SVInVec = InVec->getOperand(1);
14307 OrigElt -= NumElem;
14308 }
14309
14310 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
14311 SDValue InOp = SVInVec.getOperand(OrigElt);
14312 if (InOp.getValueType() != NVT) {
14313 assert(InOp.getValueType().isInteger() && NVT.isInteger())(static_cast <bool> (InOp.getValueType().isInteger() &&
NVT.isInteger()) ? void (0) : __assert_fail ("InOp.getValueType().isInteger() && NVT.isInteger()"
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14313, __extension__ __PRETTY_FUNCTION__))
;
14314 InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
14315 }
14316
14317 return InOp;
14318 }
14319
14320 // FIXME: We should handle recursing on other vector shuffles and
14321 // scalar_to_vector here as well.
14322
14323 if (!LegalOperations ||
14324 // FIXME: Should really be just isOperationLegalOrCustom.
14325 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
14326 TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
14327 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14328 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
14329 DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
14330 }
14331 }
14332
14333 bool BCNumEltsChanged = false;
14334 EVT ExtVT = VT.getVectorElementType();
14335 EVT LVT = ExtVT;
14336
14337 // If the result of load has to be truncated, then it's not necessarily
14338 // profitable.
14339 if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
14340 return SDValue();
14341
14342 if (InVec.getOpcode() == ISD::BITCAST) {
14343 // Don't duplicate a load with other uses.
14344 if (!InVec.hasOneUse())
14345 return SDValue();
14346
14347 EVT BCVT = InVec.getOperand(0).getValueType();
14348 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
14349 return SDValue();
14350 if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
14351 BCNumEltsChanged = true;
14352 InVec = InVec.getOperand(0);
14353 ExtVT = BCVT.getVectorElementType();
14354 }
14355
14356 // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
14357 if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
14358 ISD::isNormalLoad(InVec.getNode()) &&
14359 !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
14360 SDValue Index = N->getOperand(1);
14361 if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
14362 if (!OrigLoad->isVolatile()) {
14363 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
14364 OrigLoad);
14365 }
14366 }
14367 }
14368
14369 // Perform only after legalization to ensure build_vector / vector_shuffle
14370 // optimizations have already been done.
14371 if (!LegalOperations) return SDValue();
14372
14373 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
14374 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
14375 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
14376
14377 if (ConstEltNo) {
14378 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
14379
14380 LoadSDNode *LN0 = nullptr;
14381 const ShuffleVectorSDNode *SVN = nullptr;
14382 if (ISD::isNormalLoad(InVec.getNode())) {
14383 LN0 = cast<LoadSDNode>(InVec);
14384 } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14385 InVec.getOperand(0).getValueType() == ExtVT &&
14386 ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
14387 // Don't duplicate a load with other uses.
14388 if (!InVec.hasOneUse())
14389 return SDValue();
14390
14391 LN0 = cast<LoadSDNode>(InVec.getOperand(0));
14392 } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
14393 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
14394 // =>
14395 // (load $addr+1*size)
14396
14397 // Don't duplicate a load with other uses.
14398 if (!InVec.hasOneUse())
14399 return SDValue();
14400
14401 // If the bit convert changed the number of elements, it is unsafe
14402 // to examine the mask.
14403 if (BCNumEltsChanged)
14404 return SDValue();
14405
14406 // Select the input vector, guarding against out of range extract vector.
14407 unsigned NumElems = VT.getVectorNumElements();
14408 int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
14409 InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
14410
14411 if (InVec.getOpcode() == ISD::BITCAST) {
14412 // Don't duplicate a load with other uses.
14413 if (!InVec.hasOneUse())
14414 return SDValue();
14415
14416 InVec = InVec.getOperand(0);
14417 }
14418 if (ISD::isNormalLoad(InVec.getNode())) {
14419 LN0 = cast<LoadSDNode>(InVec);
14420 Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
14421 EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
14422 }
14423 }
14424
14425 // Make sure we found a non-volatile load and the extractelement is
14426 // the only use.
14427 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
14428 return SDValue();
14429
14430 // If Idx was -1 above, Elt is going to be -1, so just return undef.
14431 if (Elt == -1)
14432 return DAG.getUNDEF(LVT);
14433
14434 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
14435 }
14436
14437 return SDValue();
14438}
14439
14440// Simplify (build_vec (ext )) to (bitcast (build_vec ))
14441SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
14442 // We perform this optimization post type-legalization because
14443 // the type-legalizer often scalarizes integer-promoted vectors.
14444 // Performing this optimization before may create bit-casts which
14445 // will be type-legalized to complex code sequences.
14446 // We perform this optimization only before the operation legalizer because we
14447 // may introduce illegal operations.
14448 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
14449 return SDValue();
14450
14451 unsigned NumInScalars = N->getNumOperands();
14452 SDLoc DL(N);
14453 EVT VT = N->getValueType(0);
14454
14455 // Check to see if this is a BUILD_VECTOR of a bunch of values
14456 // which come from any_extend or zero_extend nodes. If so, we can create
14457 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
14458 // optimizations. We do not handle sign-extend because we can't fill the sign
14459 // using shuffles.
14460 EVT SourceType = MVT::Other;
14461 bool AllAnyExt = true;
14462
14463 for (unsigned i = 0; i != NumInScalars; ++i) {
14464 SDValue In = N->getOperand(i);
14465 // Ignore undef inputs.
14466 if (In.isUndef()) continue;
14467
14468 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
14469 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
14470
14471 // Abort if the element is not an extension.
14472 if (!ZeroExt && !AnyExt) {
14473 SourceType = MVT::Other;
14474 break;
14475 }
14476
14477 // The input is a ZeroExt or AnyExt. Check the original type.
14478 EVT InTy = In.getOperand(0).getValueType();
14479
14480 // Check that all of the widened source types are the same.
14481 if (SourceType == MVT::Other)
14482 // First time.
14483 SourceType = InTy;
14484 else if (InTy != SourceType) {
14485 // Multiple income types. Abort.
14486 SourceType = MVT::Other;
14487 break;
14488 }
14489
14490 // Check if all of the extends are ANY_EXTENDs.
14491 AllAnyExt &= AnyExt;
14492 }
14493
14494 // In order to have valid types, all of the inputs must be extended from the
14495 // same source type and all of the inputs must be any or zero extend.
14496 // Scalar sizes must be a power of two.
14497 EVT OutScalarTy = VT.getScalarType();
14498 bool ValidTypes = SourceType != MVT::Other &&
14499 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
14500 isPowerOf2_32(SourceType.getSizeInBits());
14501
14502 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
14503 // turn into a single shuffle instruction.
14504 if (!ValidTypes)
14505 return SDValue();
14506
14507 bool isLE = DAG.getDataLayout().isLittleEndian();
14508 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
14509 assert(ElemRatio > 1 && "Invalid element size ratio")(static_cast <bool> (ElemRatio > 1 && "Invalid element size ratio"
) ? void (0) : __assert_fail ("ElemRatio > 1 && \"Invalid element size ratio\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14509, __extension__ __PRETTY_FUNCTION__))
;
14510 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
14511 DAG.getConstant(0, DL, SourceType);
14512
14513 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
14514 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
14515
14516 // Populate the new build_vector
14517 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
14518 SDValue Cast = N->getOperand(i);
14519 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||(static_cast <bool> ((Cast.getOpcode() == ISD::ANY_EXTEND
|| Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) &&
"Invalid cast opcode") ? void (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14521, __extension__ __PRETTY_FUNCTION__))
14520 Cast.getOpcode() == ISD::ZERO_EXTEND ||(static_cast <bool> ((Cast.getOpcode() == ISD::ANY_EXTEND
|| Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) &&
"Invalid cast opcode") ? void (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14521, __extension__ __PRETTY_FUNCTION__))
14521 Cast.isUndef()) && "Invalid cast opcode")(static_cast <bool> ((Cast.getOpcode() == ISD::ANY_EXTEND
|| Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) &&
"Invalid cast opcode") ? void (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14521, __extension__ __PRETTY_FUNCTION__))
;
14522 SDValue In;
14523 if (Cast.isUndef())
14524 In = DAG.getUNDEF(SourceType);
14525 else
14526 In = Cast->getOperand(0);
14527 unsigned Index = isLE ? (i * ElemRatio) :
14528 (i * ElemRatio + (ElemRatio - 1));
14529
14530 assert(Index < Ops.size() && "Invalid index")(static_cast <bool> (Index < Ops.size() && "Invalid index"
) ? void (0) : __assert_fail ("Index < Ops.size() && \"Invalid index\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14530, __extension__ __PRETTY_FUNCTION__))
;
14531 Ops[Index] = In;
14532 }
14533
14534 // The type of the new BUILD_VECTOR node.
14535 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
14536 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&(static_cast <bool> (VecVT.getSizeInBits() == VT.getSizeInBits
() && "Invalid vector size") ? void (0) : __assert_fail
("VecVT.getSizeInBits() == VT.getSizeInBits() && \"Invalid vector size\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14537, __extension__ __PRETTY_FUNCTION__))
14537 "Invalid vector size")(static_cast <bool> (VecVT.getSizeInBits() == VT.getSizeInBits
() && "Invalid vector size") ? void (0) : __assert_fail
("VecVT.getSizeInBits() == VT.getSizeInBits() && \"Invalid vector size\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14537, __extension__ __PRETTY_FUNCTION__))
;
14538 // Check if the new vector type is legal.
14539 if (!isTypeLegal(VecVT)) return SDValue();
14540
14541 // Make the new BUILD_VECTOR.
14542 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
14543
14544 // The new BUILD_VECTOR node has the potential to be further optimized.
14545 AddToWorklist(BV.getNode());
14546 // Bitcast to the desired type.
14547 return DAG.getBitcast(VT, BV);
14548}
14549
14550SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
14551 EVT VT = N->getValueType(0);
14552
14553 unsigned NumInScalars = N->getNumOperands();
14554 SDLoc DL(N);
14555
14556 EVT SrcVT = MVT::Other;
14557 unsigned Opcode = ISD::DELETED_NODE;
14558 unsigned NumDefs = 0;
14559
14560 for (unsigned i = 0; i != NumInScalars; ++i) {
14561 SDValue In = N->getOperand(i);
14562 unsigned Opc = In.getOpcode();
14563
14564 if (Opc == ISD::UNDEF)
14565 continue;
14566
14567 // If all scalar values are floats and converted from integers.
14568 if (Opcode == ISD::DELETED_NODE &&
14569 (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
14570 Opcode = Opc;
14571 }
14572
14573 if (Opc != Opcode)
14574 return SDValue();
14575
14576 EVT InVT = In.getOperand(0).getValueType();
14577
14578 // If all scalar values are typed differently, bail out. It's chosen to
14579 // simplify BUILD_VECTOR of integer types.
14580 if (SrcVT == MVT::Other)
14581 SrcVT = InVT;
14582 if (SrcVT != InVT)
14583 return SDValue();
14584 NumDefs++;
14585 }
14586
14587 // If the vector has just one element defined, it's not worth to fold it into
14588 // a vectorized one.
14589 if (NumDefs < 2)
14590 return SDValue();
14591
14592 assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)(static_cast <bool> ((Opcode == ISD::UINT_TO_FP || Opcode
== ISD::SINT_TO_FP) && "Should only handle conversion from integer to float."
) ? void (0) : __assert_fail ("(Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) && \"Should only handle conversion from integer to float.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14593, __extension__ __PRETTY_FUNCTION__))
14593 && "Should only handle conversion from integer to float.")(static_cast <bool> ((Opcode == ISD::UINT_TO_FP || Opcode
== ISD::SINT_TO_FP) && "Should only handle conversion from integer to float."
) ? void (0) : __assert_fail ("(Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) && \"Should only handle conversion from integer to float.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14593, __extension__ __PRETTY_FUNCTION__))
;
14594 assert(SrcVT != MVT::Other && "Cannot determine source type!")(static_cast <bool> (SrcVT != MVT::Other && "Cannot determine source type!"
) ? void (0) : __assert_fail ("SrcVT != MVT::Other && \"Cannot determine source type!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14594, __extension__ __PRETTY_FUNCTION__))
;
14595
14596 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
14597
14598 if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
14599 return SDValue();
14600
14601 // Just because the floating-point vector type is legal does not necessarily
14602 // mean that the corresponding integer vector type is.
14603 if (!isTypeLegal(NVT))
14604 return SDValue();
14605
14606 SmallVector<SDValue, 8> Opnds;
14607 for (unsigned i = 0; i != NumInScalars; ++i) {
14608 SDValue In = N->getOperand(i);
14609
14610 if (In.isUndef())
14611 Opnds.push_back(DAG.getUNDEF(SrcVT));
14612 else
14613 Opnds.push_back(In.getOperand(0));
14614 }
14615 SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
14616 AddToWorklist(BV.getNode());
14617
14618 return DAG.getNode(Opcode, DL, VT, BV);
14619}
14620
14621SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
14622 ArrayRef<int> VectorMask,
14623 SDValue VecIn1, SDValue VecIn2,
14624 unsigned LeftIdx) {
14625 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14626 SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
14627
14628 EVT VT = N->getValueType(0);
14629 EVT InVT1 = VecIn1.getValueType();
14630 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
14631
14632 unsigned Vec2Offset = 0;
14633 unsigned NumElems = VT.getVectorNumElements();
14634 unsigned ShuffleNumElems = NumElems;
14635
14636 // In case both the input vectors are extracted from same base
14637 // vector we do not need extra addend (Vec2Offset) while
14638 // computing shuffle mask.
14639 if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
14640 !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
14641 !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
14642 Vec2Offset = InVT1.getVectorNumElements();
14643
14644 // We can't generate a shuffle node with mismatched input and output types.
14645 // Try to make the types match the type of the output.
14646 if (InVT1 != VT || InVT2 != VT) {
14647 if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
14648 // If the output vector length is a multiple of both input lengths,
14649 // we can concatenate them and pad the rest with undefs.
14650 unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
14651 assert(NumConcats >= 2 && "Concat needs at least two inputs!")(static_cast <bool> (NumConcats >= 2 && "Concat needs at least two inputs!"
) ? void (0) : __assert_fail ("NumConcats >= 2 && \"Concat needs at least two inputs!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14651, __extension__ __PRETTY_FUNCTION__))
;
14652 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
14653 ConcatOps[0] = VecIn1;
14654 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
14655 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14656 VecIn2 = SDValue();
14657 } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
14658 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
14659 return SDValue();
14660
14661 if (!VecIn2.getNode()) {
14662 // If we only have one input vector, and it's twice the size of the
14663 // output, split it in two.
14664 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
14665 DAG.getConstant(NumElems, DL, IdxTy));
14666 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
14667 // Since we now have shorter input vectors, adjust the offset of the
14668 // second vector's start.
14669 Vec2Offset = NumElems;
14670 } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
14671 // VecIn1 is wider than the output, and we have another, possibly
14672 // smaller input. Pad the smaller input with undefs, shuffle at the
14673 // input vector width, and extract the output.
14674 // The shuffle type is different than VT, so check legality again.
14675 if (LegalOperations &&
14676 !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
14677 return SDValue();
14678
14679 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
14680 // lower it back into a BUILD_VECTOR. So if the inserted type is
14681 // illegal, don't even try.
14682 if (InVT1 != InVT2) {
14683 if (!TLI.isTypeLegal(InVT2))
14684 return SDValue();
14685 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
14686 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
14687 }
14688 ShuffleNumElems = NumElems * 2;
14689 } else {
14690 // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
14691 // than VecIn1. We can't handle this for now - this case will disappear
14692 // when we start sorting the vectors by type.
14693 return SDValue();
14694 }
14695 } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
14696 InVT1.getSizeInBits() == VT.getSizeInBits()) {
14697 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
14698 ConcatOps[0] = VecIn2;
14699 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14700 } else {
14701 // TODO: Support cases where the length mismatch isn't exactly by a
14702 // factor of 2.
14703 // TODO: Move this check upwards, so that if we have bad type
14704 // mismatches, we don't create any DAG nodes.
14705 return SDValue();
14706 }
14707 }
14708
14709 // Initialize mask to undef.
14710 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
14711
14712 // Only need to run up to the number of elements actually used, not the
14713 // total number of elements in the shuffle - if we are shuffling a wider
14714 // vector, the high lanes should be set to undef.
14715 for (unsigned i = 0; i != NumElems; ++i) {
14716 if (VectorMask[i] <= 0)
14717 continue;
14718
14719 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
14720 if (VectorMask[i] == (int)LeftIdx) {
14721 Mask[i] = ExtIndex;
14722 } else if (VectorMask[i] == (int)LeftIdx + 1) {
14723 Mask[i] = Vec2Offset + ExtIndex;
14724 }
14725 }
14726
14727 // The type the input vectors may have changed above.
14728 InVT1 = VecIn1.getValueType();
14729
14730 // If we already have a VecIn2, it should have the same type as VecIn1.
14731 // If we don't, get an undef/zero vector of the appropriate type.
14732 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
14733 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.")(static_cast <bool> (InVT1 == VecIn2.getValueType() &&
"Unexpected second input type.") ? void (0) : __assert_fail (
"InVT1 == VecIn2.getValueType() && \"Unexpected second input type.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14733, __extension__ __PRETTY_FUNCTION__))
;
14734
14735 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
14736 if (ShuffleNumElems > NumElems)
14737 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
14738
14739 return Shuffle;
14740}
14741
14742// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
14743// operations. If the types of the vectors we're extracting from allow it,
14744// turn this into a vector_shuffle node.
14745SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
14746 SDLoc DL(N);
14747 EVT VT = N->getValueType(0);
14748
14749 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
14750 if (!isTypeLegal(VT))
14751 return SDValue();
14752
14753 // May only combine to shuffle after legalize if shuffle is legal.
14754 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
14755 return SDValue();
14756
14757 bool UsesZeroVector = false;
14758 unsigned NumElems = N->getNumOperands();
14759
14760 // Record, for each element of the newly built vector, which input vector
14761 // that element comes from. -1 stands for undef, 0 for the zero vector,
14762 // and positive values for the input vectors.
14763 // VectorMask maps each element to its vector number, and VecIn maps vector
14764 // numbers to their initial SDValues.
14765
14766 SmallVector<int, 8> VectorMask(NumElems, -1);
14767 SmallVector<SDValue, 8> VecIn;
14768 VecIn.push_back(SDValue());
14769
14770 for (unsigned i = 0; i != NumElems; ++i) {
14771 SDValue Op = N->getOperand(i);
14772
14773 if (Op.isUndef())
14774 continue;
14775
14776 // See if we can use a blend with a zero vector.
14777 // TODO: Should we generalize this to a blend with an arbitrary constant
14778 // vector?
14779 if (isNullConstant(Op) || isNullFPConstant(Op)) {
14780 UsesZeroVector = true;
14781 VectorMask[i] = 0;
14782 continue;
14783 }
14784
14785 // Not an undef or zero. If the input is something other than an
14786 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
14787 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14788 !isa<ConstantSDNode>(Op.getOperand(1)))
14789 return SDValue();
14790 SDValue ExtractedFromVec = Op.getOperand(0);
14791
14792 APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
14793 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
14794 return SDValue();
14795
14796 // All inputs must have the same element type as the output.
14797 if (VT.getVectorElementType() !=
14798 ExtractedFromVec.getValueType().getVectorElementType())
14799 return SDValue();
14800
14801 // Have we seen this input vector before?
14802 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
14803 // a map back from SDValues to numbers isn't worth it.
14804 unsigned Idx = std::distance(
14805 VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
14806 if (Idx == VecIn.size())
14807 VecIn.push_back(ExtractedFromVec);
14808
14809 VectorMask[i] = Idx;
14810 }
14811
14812 // If we didn't find at least one input vector, bail out.
14813 if (VecIn.size() < 2)
14814 return SDValue();
14815
14816 // If all the Operands of BUILD_VECTOR extract from same
14817 // vector, then split the vector efficiently based on the maximum
14818 // vector access index and adjust the VectorMask and
14819 // VecIn accordingly.
14820 if (VecIn.size() == 2) {
14821 unsigned MaxIndex = 0;
14822 unsigned NearestPow2 = 0;
14823 SDValue Vec = VecIn.back();
14824 EVT InVT = Vec.getValueType();
14825 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14826 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
14827
14828 for (unsigned i = 0; i < NumElems; i++) {
14829 if (VectorMask[i] <= 0)
14830 continue;
14831 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
14832 IndexVec[i] = Index;
14833 MaxIndex = std::max(MaxIndex, Index);
14834 }
14835
14836 NearestPow2 = PowerOf2Ceil(MaxIndex);
14837 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
14838 NumElems * 2 < NearestPow2) {
14839 unsigned SplitSize = NearestPow2 / 2;
14840 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
14841 InVT.getVectorElementType(), SplitSize);
14842 if (TLI.isTypeLegal(SplitVT)) {
14843 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
14844 DAG.getConstant(SplitSize, DL, IdxTy));
14845 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
14846 DAG.getConstant(0, DL, IdxTy));
14847 VecIn.pop_back();
14848 VecIn.push_back(VecIn1);
14849 VecIn.push_back(VecIn2);
14850
14851 for (unsigned i = 0; i < NumElems; i++) {
14852 if (VectorMask[i] <= 0)
14853 continue;
14854 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
14855 }
14856 }
14857 }
14858 }
14859
14860 // TODO: We want to sort the vectors by descending length, so that adjacent
14861 // pairs have similar length, and the longer vector is always first in the
14862 // pair.
14863
14864 // TODO: Should this fire if some of the input vectors has illegal type (like
14865 // it does now), or should we let legalization run its course first?
14866
14867 // Shuffle phase:
14868 // Take pairs of vectors, and shuffle them so that the result has elements
14869 // from these vectors in the correct places.
14870 // For example, given:
14871 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
14872 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
14873 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
14874 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
14875 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
14876 // We will generate:
14877 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
14878 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
14879 SmallVector<SDValue, 4> Shuffles;
14880 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
14881 unsigned LeftIdx = 2 * In + 1;
14882 SDValue VecLeft = VecIn[LeftIdx];
14883 SDValue VecRight =
14884 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
14885
14886 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
14887 VecRight, LeftIdx))
14888 Shuffles.push_back(Shuffle);
14889 else
14890 return SDValue();
14891 }
14892
14893 // If we need the zero vector as an "ingredient" in the blend tree, add it
14894 // to the list of shuffles.
14895 if (UsesZeroVector)
14896 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
14897 : DAG.getConstantFP(0.0, DL, VT));
14898
14899 // If we only have one shuffle, we're done.
14900 if (Shuffles.size() == 1)
14901 return Shuffles[0];
14902
14903 // Update the vector mask to point to the post-shuffle vectors.
14904 for (int &Vec : VectorMask)
14905 if (Vec == 0)
14906 Vec = Shuffles.size() - 1;
14907 else
14908 Vec = (Vec - 1) / 2;
14909
14910 // More than one shuffle. Generate a binary tree of blends, e.g. if from
14911 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
14912 // generate:
14913 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
14914 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
14915 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
14916 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
14917 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
14918 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
14919 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
14920
14921 // Make sure the initial size of the shuffle list is even.
14922 if (Shuffles.size() % 2)
14923 Shuffles.push_back(DAG.getUNDEF(VT));
14924
14925 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
14926 if (CurSize % 2) {
14927 Shuffles[CurSize] = DAG.getUNDEF(VT);
14928 CurSize++;
14929 }
14930 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
14931 int Left = 2 * In;
14932 int Right = 2 * In + 1;
14933 SmallVector<int, 8> Mask(NumElems, -1);
14934 for (unsigned i = 0; i != NumElems; ++i) {
14935 if (VectorMask[i] == Left) {
14936 Mask[i] = i;
14937 VectorMask[i] = In;
14938 } else if (VectorMask[i] == Right) {
14939 Mask[i] = i + NumElems;
14940 VectorMask[i] = In;
14941 }
14942 }
14943
14944 Shuffles[In] =
14945 DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
14946 }
14947 }
14948 return Shuffles[0];
14949}
14950
14951SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
14952 EVT VT = N->getValueType(0);
14953
14954 // A vector built entirely of undefs is undef.
14955 if (ISD::allOperandsUndef(N))
14956 return DAG.getUNDEF(VT);
14957
14958 // If this is a splat of a bitcast from another vector, change to a
14959 // concat_vector.
14960 // For example:
14961 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
14962 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
14963 //
14964 // If X is a build_vector itself, the concat can become a larger build_vector.
14965 // TODO: Maybe this is useful for non-splat too?
14966 if (!LegalOperations) {
14967 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
14968 Splat = peekThroughBitcast(Splat);
14969 EVT SrcVT = Splat.getValueType();
14970 if (SrcVT.isVector()) {
14971 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
14972 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
14973 SrcVT.getVectorElementType(), NumElts);
14974 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
14975 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
14976 return DAG.getBitcast(VT, Concat);
14977 }
14978 }
14979 }
14980
14981 // Check if we can express BUILD VECTOR via subvector extract.
14982 if (!LegalTypes && (N->getNumOperands() > 1)) {
14983 SDValue Op0 = N->getOperand(0);
14984 auto checkElem = [&](SDValue Op) -> uint64_t {
14985 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
14986 (Op0.getOperand(0) == Op.getOperand(0)))
14987 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
14988 return CNode->getZExtValue();
14989 return -1;
14990 };
14991
14992 int Offset = checkElem(Op0);
14993 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
14994 if (Offset + i != checkElem(N->getOperand(i))) {
14995 Offset = -1;
14996 break;
14997 }
14998 }
14999
15000 if ((Offset == 0) &&
15001 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
15002 return Op0.getOperand(0);
15003 if ((Offset != -1) &&
15004 ((Offset % N->getValueType(0).getVectorNumElements()) ==
15005 0)) // IDX must be multiple of output size.
15006 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
15007 Op0.getOperand(0), Op0.getOperand(1));
15008 }
15009
15010 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
15011 return V;
15012
15013 if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
15014 return V;
15015
15016 if (SDValue V = reduceBuildVecToShuffle(N))
15017 return V;
15018
15019 return SDValue();
15020}
15021
15022static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
15023 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15024 EVT OpVT = N->getOperand(0).getValueType();
15025
15026 // If the operands are legal vectors, leave them alone.
15027 if (TLI.isTypeLegal(OpVT))
15028 return SDValue();
15029
15030 SDLoc DL(N);
15031 EVT VT = N->getValueType(0);
15032 SmallVector<SDValue, 8> Ops;
15033
15034 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
15035 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
15036
15037 // Keep track of what we encounter.
15038 bool AnyInteger = false;
15039 bool AnyFP = false;
15040 for (const SDValue &Op : N->ops()) {
15041 if (ISD::BITCAST == Op.getOpcode() &&
15042 !Op.getOperand(0).getValueType().isVector())
15043 Ops.push_back(Op.getOperand(0));
15044 else if (ISD::UNDEF == Op.getOpcode())
15045 Ops.push_back(ScalarUndef);
15046 else
15047 return SDValue();
15048
15049 // Note whether we encounter an integer or floating point scalar.
15050 // If it's neither, bail out, it could be something weird like x86mmx.
15051 EVT LastOpVT = Ops.back().getValueType();
15052 if (LastOpVT.isFloatingPoint())
15053 AnyFP = true;
15054 else if (LastOpVT.isInteger())
15055 AnyInteger = true;
15056 else
15057 return SDValue();
15058 }
15059
15060 // If any of the operands is a floating point scalar bitcast to a vector,
15061 // use floating point types throughout, and bitcast everything.
15062 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
15063 if (AnyFP) {
15064 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
15065 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
15066 if (AnyInteger) {
15067 for (SDValue &Op : Ops) {
15068 if (Op.getValueType() == SVT)
15069 continue;
15070 if (Op.isUndef())
15071 Op = ScalarUndef;
15072 else
15073 Op = DAG.getBitcast(SVT, Op);
15074 }
15075 }
15076 }
15077
15078 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
15079 VT.getSizeInBits() / SVT.getSizeInBits());
15080 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
15081}
15082
15083// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
15084// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
15085// most two distinct vectors the same size as the result, attempt to turn this
15086// into a legal shuffle.
15087static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
15088 EVT VT = N->getValueType(0);
15089 EVT OpVT = N->getOperand(0).getValueType();
15090 int NumElts = VT.getVectorNumElements();
15091 int NumOpElts = OpVT.getVectorNumElements();
15092
15093 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
15094 SmallVector<int, 8> Mask;
15095
15096 for (SDValue Op : N->ops()) {
15097 // Peek through any bitcast.
15098 Op = peekThroughBitcast(Op);
15099
15100 // UNDEF nodes convert to UNDEF shuffle mask values.
15101 if (Op.isUndef()) {
15102 Mask.append((unsigned)NumOpElts, -1);
15103 continue;
15104 }
15105
15106 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
15107 return SDValue();
15108
15109 // What vector are we extracting the subvector from and at what index?
15110 SDValue ExtVec = Op.getOperand(0);
15111
15112 // We want the EVT of the original extraction to correctly scale the
15113 // extraction index.
15114 EVT ExtVT = ExtVec.getValueType();
15115
15116 // Peek through any bitcast.
15117 ExtVec = peekThroughBitcast(ExtVec);
15118
15119 // UNDEF nodes convert to UNDEF shuffle mask values.
15120 if (ExtVec.isUndef()) {
15121 Mask.append((unsigned)NumOpElts, -1);
15122 continue;
15123 }
15124
15125 if (!isa<ConstantSDNode>(Op.getOperand(1)))
15126 return SDValue();
15127 int ExtIdx = Op.getConstantOperandVal(1);
15128
15129 // Ensure that we are extracting a subvector from a vector the same
15130 // size as the result.
15131 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
15132 return SDValue();
15133
15134 // Scale the subvector index to account for any bitcast.
15135 int NumExtElts = ExtVT.getVectorNumElements();
15136 if (0 == (NumExtElts % NumElts))
15137 ExtIdx /= (NumExtElts / NumElts);
15138 else if (0 == (NumElts % NumExtElts))
15139 ExtIdx *= (NumElts / NumExtElts);
15140 else
15141 return SDValue();
15142
15143 // At most we can reference 2 inputs in the final shuffle.
15144 if (SV0.isUndef() || SV0 == ExtVec) {
15145 SV0 = ExtVec;
15146 for (int i = 0; i != NumOpElts; ++i)
15147 Mask.push_back(i + ExtIdx);
15148 } else if (SV1.isUndef() || SV1 == ExtVec) {
15149 SV1 = ExtVec;
15150 for (int i = 0; i != NumOpElts; ++i)
15151 Mask.push_back(i + ExtIdx + NumElts);
15152 } else {
15153 return SDValue();
15154 }
15155 }
15156
15157 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
15158 return SDValue();
15159
15160 return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
15161 DAG.getBitcast(VT, SV1), Mask);
15162}
15163
15164SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
15165 // If we only have one input vector, we don't need to do any concatenation.
15166 if (N->getNumOperands() == 1)
15167 return N->getOperand(0);
15168
15169 // Check if all of the operands are undefs.
15170 EVT VT = N->getValueType(0);
15171 if (ISD::allOperandsUndef(N))
15172 return DAG.getUNDEF(VT);
15173
15174 // Optimize concat_vectors where all but the first of the vectors are undef.
15175 if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
15176 return Op.isUndef();
15177 })) {
15178 SDValue In = N->getOperand(0);
15179 assert(In.getValueType().isVector() && "Must concat vectors")(static_cast <bool> (In.getValueType().isVector() &&
"Must concat vectors") ? void (0) : __assert_fail ("In.getValueType().isVector() && \"Must concat vectors\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15179, __extension__ __PRETTY_FUNCTION__))
;
15180
15181 // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
15182 if (In->getOpcode() == ISD::BITCAST &&
15183 !In->getOperand(0).getValueType().isVector()) {
15184 SDValue Scalar = In->getOperand(0);
15185
15186 // If the bitcast type isn't legal, it might be a trunc of a legal type;
15187 // look through the trunc so we can still do the transform:
15188 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
15189 if (Scalar->getOpcode() == ISD::TRUNCATE &&
15190 !TLI.isTypeLegal(Scalar.getValueType()) &&
15191 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
15192 Scalar = Scalar->getOperand(0);
15193
15194 EVT SclTy = Scalar->getValueType(0);
15195
15196 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
15197 return SDValue();
15198
15199 // Bail out if the vector size is not a multiple of the scalar size.
15200 if (VT.getSizeInBits() % SclTy.getSizeInBits())
15201 return SDValue();
15202
15203 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
15204 if (VNTNumElms < 2)
15205 return SDValue();
15206
15207 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
15208 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
15209 return SDValue();
15210
15211 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
15212 return DAG.getBitcast(VT, Res);
15213 }
15214 }
15215
15216 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
15217 // We have already tested above for an UNDEF only concatenation.
15218 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
15219 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
15220 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
15221 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
15222 };
15223 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
15224 SmallVector<SDValue, 8> Opnds;
15225 EVT SVT = VT.getScalarType();
15226
15227 EVT MinVT = SVT;
15228 if (!SVT.isFloatingPoint()) {
15229 // If BUILD_VECTOR are from built from integer, they may have different
15230 // operand types. Get the smallest type and truncate all operands to it.
15231 bool FoundMinVT = false;
15232 for (const SDValue &Op : N->ops())
15233 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
15234 EVT OpSVT = Op.getOperand(0).getValueType();
15235 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
15236 FoundMinVT = true;
15237 }
15238 assert(FoundMinVT && "Concat vector type mismatch")(static_cast <bool> (FoundMinVT && "Concat vector type mismatch"
) ? void (0) : __assert_fail ("FoundMinVT && \"Concat vector type mismatch\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15238, __extension__ __PRETTY_FUNCTION__))
;
15239 }
15240
15241 for (const SDValue &Op : N->ops()) {
15242 EVT OpVT = Op.getValueType();
15243 unsigned NumElts = OpVT.getVectorNumElements();
15244
15245 if (ISD::UNDEF == Op.getOpcode())
15246 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
15247
15248 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
15249 if (SVT.isFloatingPoint()) {
15250 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch")(static_cast <bool> (SVT == OpVT.getScalarType() &&
"Concat vector type mismatch") ? void (0) : __assert_fail ("SVT == OpVT.getScalarType() && \"Concat vector type mismatch\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15250, __extension__ __PRETTY_FUNCTION__))
;
15251 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
15252 } else {
15253 for (unsigned i = 0; i != NumElts; ++i)
15254 Opnds.push_back(
15255 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
15256 }
15257 }
15258 }
15259
15260 assert(VT.getVectorNumElements() == Opnds.size() &&(static_cast <bool> (VT.getVectorNumElements() == Opnds
.size() && "Concat vector type mismatch") ? void (0) :
__assert_fail ("VT.getVectorNumElements() == Opnds.size() && \"Concat vector type mismatch\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15261, __extension__ __PRETTY_FUNCTION__))
15261 "Concat vector type mismatch")(static_cast <bool> (VT.getVectorNumElements() == Opnds
.size() && "Concat vector type mismatch") ? void (0) :
__assert_fail ("VT.getVectorNumElements() == Opnds.size() && \"Concat vector type mismatch\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15261, __extension__ __PRETTY_FUNCTION__))
;
15262 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
15263 }
15264
15265 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
15266 if (SDValue V = combineConcatVectorOfScalars(N, DAG))
15267 return V;
15268
15269 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
15270 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
15271 if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
15272 return V;
15273
15274 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
15275 // nodes often generate nop CONCAT_VECTOR nodes.
15276 // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
15277 // place the incoming vectors at the exact same location.
15278 SDValue SingleSource = SDValue();
15279 unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
15280
15281 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
15282 SDValue Op = N->getOperand(i);
15283
15284 if (Op.isUndef())
15285 continue;
15286
15287 // Check if this is the identity extract:
15288 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
15289 return SDValue();
15290
15291 // Find the single incoming vector for the extract_subvector.
15292 if (SingleSource.getNode()) {
15293 if (Op.getOperand(0) != SingleSource)
15294 return SDValue();
15295 } else {
15296 SingleSource = Op.getOperand(0);
15297
15298 // Check the source type is the same as the type of the result.
15299 // If not, this concat may extend the vector, so we can not
15300 // optimize it away.
15301 if (SingleSource.getValueType() != N->getValueType(0))
15302 return SDValue();
15303 }
15304
15305 unsigned IdentityIndex = i * PartNumElem;
15306 ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
15307 // The extract index must be constant.
15308 if (!CS)
15309 return SDValue();
15310
15311 // Check that we are reading from the identity index.
15312 if (CS->getZExtValue() != IdentityIndex)
15313 return SDValue();
15314 }
15315
15316 if (SingleSource.getNode())
15317 return SingleSource;
15318
15319 return SDValue();
15320}
15321
15322/// If we are extracting a subvector produced by a wide binary operator with at
15323/// at least one operand that was the result of a vector concatenation, then try
15324/// to use the narrow vector operands directly to avoid the concatenation and
15325/// extraction.
15326static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
15327 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
15328 // some of these bailouts with other transforms.
15329
15330 // The extract index must be a constant, so we can map it to a concat operand.
15331 auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
15332 if (!ExtractIndex)
15333 return SDValue();
15334
15335 // Only handle the case where we are doubling and then halving. A larger ratio
15336 // may require more than two narrow binops to replace the wide binop.
15337 EVT VT = Extract->getValueType(0);
15338 unsigned NumElems = VT.getVectorNumElements();
15339 assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&(static_cast <bool> ((ExtractIndex->getZExtValue() %
NumElems) == 0 && "Extract index is not a multiple of the vector length."
) ? void (0) : __assert_fail ("(ExtractIndex->getZExtValue() % NumElems) == 0 && \"Extract index is not a multiple of the vector length.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15340, __extension__ __PRETTY_FUNCTION__))
15340 "Extract index is not a multiple of the vector length.")(static_cast <bool> ((ExtractIndex->getZExtValue() %
NumElems) == 0 && "Extract index is not a multiple of the vector length."
) ? void (0) : __assert_fail ("(ExtractIndex->getZExtValue() % NumElems) == 0 && \"Extract index is not a multiple of the vector length.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15340, __extension__ __PRETTY_FUNCTION__))
;
15341 if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
15342 return SDValue();
15343
15344 // We are looking for an optionally bitcasted wide vector binary operator
15345 // feeding an extract subvector.
15346 SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
15347
15348 // TODO: The motivating case for this transform is an x86 AVX1 target. That
15349 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
15350 // flavors, but no other 256-bit integer support. This could be extended to
15351 // handle any binop, but that may require fixing/adding other folds to avoid
15352 // codegen regressions.
15353 unsigned BOpcode = BinOp.getOpcode();
15354 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
15355 return SDValue();
15356
15357 // The binop must be a vector type, so we can chop it in half.
15358 EVT WideBVT = BinOp.getValueType();
15359 if (!WideBVT.isVector())
15360 return SDValue();
15361
15362 // Bail out if the target does not support a narrower version of the binop.
15363 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
15364 WideBVT.getVectorNumElements() / 2);
15365 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15366 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
15367 return SDValue();
15368
15369 // Peek through bitcasts of the binary operator operands if needed.
15370 SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
15371 SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
15372
15373 // We need at least one concatenation operation of a binop operand to make
15374 // this transform worthwhile. The concat must double the input vector sizes.
15375 // TODO: Should we also handle INSERT_SUBVECTOR patterns?
15376 bool ConcatL =
15377 LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
15378 bool ConcatR =
15379 RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
15380 if (!ConcatL && !ConcatR)
15381 return SDValue();
15382
15383 // If one of the binop operands was not the result of a concat, we must
15384 // extract a half-sized operand for our new narrow binop. We can't just reuse
15385 // the original extract index operand because we may have bitcasted.
15386 unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
15387 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
15388 EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
15389 SDLoc DL(Extract);
15390
15391 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
15392 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
15393 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
15394 SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
15395 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
15396 BinOp.getOperand(0),
15397 DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
15398
15399 SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
15400 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
15401 BinOp.getOperand(1),
15402 DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
15403
15404 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
15405 return DAG.getBitcast(VT, NarrowBinOp);
15406}
15407
15408/// If we are extracting a subvector from a wide vector load, convert to a
15409/// narrow load to eliminate the extraction:
15410/// (extract_subvector (load wide vector)) --> (load narrow vector)
15411static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
15412 // TODO: Add support for big-endian. The offset calculation must be adjusted.
15413 if (DAG.getDataLayout().isBigEndian())
15414 return SDValue();
15415
15416 // TODO: The one-use check is overly conservative. Check the cost of the
15417 // extract instead or remove that condition entirely.
15418 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
15419 auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
15420 if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
15421 !ExtIdx)
15422 return SDValue();
15423
15424 // The narrow load will be offset from the base address of the old load if
15425 // we are extracting from something besides index 0 (little-endian).
15426 EVT VT = Extract->getValueType(0);
15427 SDLoc DL(Extract);
15428 SDValue BaseAddr = Ld->getOperand(1);
15429 unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
15430
15431 // TODO: Use "BaseIndexOffset" to make this more effective.
15432 SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
15433 MachineFunction &MF = DAG.getMachineFunction();
15434 MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
15435 VT.getStoreSize());
15436 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
15437 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
15438 return NewLd;
15439}
15440
15441SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
15442 EVT NVT = N->getValueType(0);
15443 SDValue V = N->getOperand(0);
15444
15445 // Extract from UNDEF is UNDEF.
15446 if (V.isUndef())
15447 return DAG.getUNDEF(NVT);
15448
15449 if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
15450 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
15451 return NarrowLoad;
15452
15453 // Combine:
15454 // (extract_subvec (concat V1, V2, ...), i)
15455 // Into:
15456 // Vi if possible
15457 // Only operand 0 is checked as 'concat' assumes all inputs of the same
15458 // type.
15459 if (V->getOpcode() == ISD::CONCAT_VECTORS &&
15460 isa<ConstantSDNode>(N->getOperand(1)) &&
15461 V->getOperand(0).getValueType() == NVT) {
15462 unsigned Idx = N->getConstantOperandVal(1);
15463 unsigned NumElems = NVT.getVectorNumElements();
15464 assert((Idx % NumElems) == 0 &&(static_cast <bool> ((Idx % NumElems) == 0 && "IDX in concat is not a multiple of the result vector length."
) ? void (0) : __assert_fail ("(Idx % NumElems) == 0 && \"IDX in concat is not a multiple of the result vector length.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15465, __extension__ __PRETTY_FUNCTION__))
15465 "IDX in concat is not a multiple of the result vector length.")(static_cast <bool> ((Idx % NumElems) == 0 && "IDX in concat is not a multiple of the result vector length."
) ? void (0) : __assert_fail ("(Idx % NumElems) == 0 && \"IDX in concat is not a multiple of the result vector length.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15465, __extension__ __PRETTY_FUNCTION__))
;
15466 return V->getOperand(Idx / NumElems);
15467 }
15468
15469 // Skip bitcasting
15470 V = peekThroughBitcast(V);
15471
15472 // If the input is a build vector. Try to make a smaller build vector.
15473 if (V->getOpcode() == ISD::BUILD_VECTOR) {
15474 if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
15475 EVT InVT = V->getValueType(0);
15476 unsigned ExtractSize = NVT.getSizeInBits();
15477 unsigned EltSize = InVT.getScalarSizeInBits();
15478 // Only do this if we won't split any elements.
15479 if (ExtractSize % EltSize == 0) {
15480 unsigned NumElems = ExtractSize / EltSize;
15481 EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(),
15482 InVT.getVectorElementType(), NumElems);
15483 if ((!LegalOperations ||
15484 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) &&
15485 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
15486 unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
15487 EltSize;
15488
15489 // Extract the pieces from the original build_vector.
15490 SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
15491 makeArrayRef(V->op_begin() + IdxVal,
15492 NumElems));
15493 return DAG.getBitcast(NVT, BuildVec);
15494 }
15495 }
15496 }
15497 }
15498
15499 if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
15500 // Handle only simple case where vector being inserted and vector
15501 // being extracted are of same size.
15502 EVT SmallVT = V->getOperand(1).getValueType();
15503 if (!NVT.bitsEq(SmallVT))
15504 return SDValue();
15505
15506 // Only handle cases where both indexes are constants.
15507 ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
15508 ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
15509
15510 if (InsIdx && ExtIdx) {
15511 // Combine:
15512 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
15513 // Into:
15514 // indices are equal or bit offsets are equal => V1
15515 // otherwise => (extract_subvec V1, ExtIdx)
15516 if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
15517 ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
15518 return DAG.getBitcast(NVT, V->getOperand(1));
15519 return DAG.getNode(
15520 ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
15521 DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
15522 N->getOperand(1));
15523 }
15524 }
15525
15526 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
15527 return NarrowBOp;
15528
15529 return SDValue();
15530}
15531
15532// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
15533// or turn a shuffle of a single concat into simpler shuffle then concat.
15534static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
15535 EVT VT = N->getValueType(0);
15536 unsigned NumElts = VT.getVectorNumElements();
15537
15538 SDValue N0 = N->getOperand(0);
15539 SDValue N1 = N->getOperand(1);
15540 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15541
15542 SmallVector<SDValue, 4> Ops;
15543 EVT ConcatVT = N0.getOperand(0).getValueType();
15544 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
15545 unsigned NumConcats = NumElts / NumElemsPerConcat;
15546
15547 // Special case: shuffle(concat(A,B)) can be more efficiently represented
15548 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
15549 // half vector elements.
15550 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
15551 std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
15552 SVN->getMask().end(), [](int i) { return i == -1; })) {
15553 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
15554 makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
15555 N1 = DAG.getUNDEF(ConcatVT);
15556 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
15557 }
15558
15559 // Look at every vector that's inserted. We're looking for exact
15560 // subvector-sized copies from a concatenated vector
15561 for (unsigned I = 0; I != NumConcats; ++I) {
15562 // Make sure we're dealing with a copy.
15563 unsigned Begin = I * NumElemsPerConcat;
15564 bool AllUndef = true, NoUndef = true;
15565 for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
15566 if (SVN->getMaskElt(J) >= 0)
15567 AllUndef = false;
15568 else
15569 NoUndef = false;
15570 }
15571
15572 if (NoUndef) {
15573 if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
15574 return SDValue();
15575
15576 for (unsigned J = 1; J != NumElemsPerConcat; ++J)
15577 if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
15578 return SDValue();
15579
15580 unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
15581 if (FirstElt < N0.getNumOperands())
15582 Ops.push_back(N0.getOperand(FirstElt));
15583 else
15584 Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
15585
15586 } else if (AllUndef) {
15587 Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
15588 } else { // Mixed with general masks and undefs, can't do optimization.
15589 return SDValue();
15590 }
15591 }
15592
15593 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
15594}
15595
15596// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
15597// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
15598//
15599// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
15600// a simplification in some sense, but it isn't appropriate in general: some
15601// BUILD_VECTORs are substantially cheaper than others. The general case
15602// of a BUILD_VECTOR requires inserting each element individually (or
15603// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
15604// all constants is a single constant pool load. A BUILD_VECTOR where each
15605// element is identical is a splat. A BUILD_VECTOR where most of the operands
15606// are undef lowers to a small number of element insertions.
15607//
15608// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
15609// We don't fold shuffles where one side is a non-zero constant, and we don't
15610// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
15611// non-constant operands. This seems to work out reasonably well in practice.
15612static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
15613 SelectionDAG &DAG,
15614 const TargetLowering &TLI) {
15615 EVT VT = SVN->getValueType(0);
15616 unsigned NumElts = VT.getVectorNumElements();
15617 SDValue N0 = SVN->getOperand(0);
15618 SDValue N1 = SVN->getOperand(1);
15619
15620 if (!N0->hasOneUse() || !N1->hasOneUse())
15621 return SDValue();
15622
15623 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
15624 // discussed above.
15625 if (!N1.isUndef()) {
15626 bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
15627 bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
15628 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
15629 return SDValue();
15630 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
15631 return SDValue();
15632 }
15633
15634 // If both inputs are splats of the same value then we can safely merge this
15635 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
15636 bool IsSplat = false;
15637 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
15638 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
15639 if (BV0 && BV1)
15640 if (SDValue Splat0 = BV0->getSplatValue())
15641 IsSplat = (Splat0 == BV1->getSplatValue());
15642
15643 SmallVector<SDValue, 8> Ops;
15644 SmallSet<SDValue, 16> DuplicateOps;
15645 for (int M : SVN->getMask()) {
15646 SDValue Op = DAG.getUNDEF(VT.getScalarType());
15647 if (M >= 0) {
15648 int Idx = M < (int)NumElts ? M : M - NumElts;
15649 SDValue &S = (M < (int)NumElts ? N0 : N1);
15650 if (S.getOpcode() == ISD::BUILD_VECTOR) {
15651 Op = S.getOperand(Idx);
15652 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15653 assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.")(static_cast <bool> (Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index."
) ? void (0) : __assert_fail ("Idx == 0 && \"Unexpected SCALAR_TO_VECTOR operand index.\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15653, __extension__ __PRETTY_FUNCTION__))
;
15654 Op = S.getOperand(0);
15655 } else {
15656 // Operand can't be combined - bail out.
15657 return SDValue();
15658 }
15659 }
15660
15661 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
15662 // generating a splat; semantically, this is fine, but it's likely to
15663 // generate low-quality code if the target can't reconstruct an appropriate
15664 // shuffle.
15665 if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
15666 if (!IsSplat && !DuplicateOps.insert(Op).second)
15667 return SDValue();
15668
15669 Ops.push_back(Op);
15670 }
15671
15672 // BUILD_VECTOR requires all inputs to be of the same type, find the
15673 // maximum type and extend them all.
15674 EVT SVT = VT.getScalarType();
15675 if (SVT.isInteger())
15676 for (SDValue &Op : Ops)
15677 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
15678 if (SVT != VT.getScalarType())
15679 for (SDValue &Op : Ops)
15680 Op = TLI.isZExtFree(Op.getValueType(), SVT)
15681 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
15682 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
15683 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
15684}
15685
15686// Match shuffles that can be converted to any_vector_extend_in_reg.
15687// This is often generated during legalization.
15688// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
15689// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
15690static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
15691 SelectionDAG &DAG,
15692 const TargetLowering &TLI,
15693 bool LegalOperations,
15694 bool LegalTypes) {
15695 EVT VT = SVN->getValueType(0);
15696 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15697
15698 // TODO Add support for big-endian when we have a test case.
15699 if (!VT.isInteger() || IsBigEndian)
15700 return SDValue();
15701
15702 unsigned NumElts = VT.getVectorNumElements();
15703 unsigned EltSizeInBits = VT.getScalarSizeInBits();
15704 ArrayRef<int> Mask = SVN->getMask();
15705 SDValue N0 = SVN->getOperand(0);
15706
15707 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
15708 auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
15709 for (unsigned i = 0; i != NumElts; ++i) {
15710 if (Mask[i] < 0)
15711 continue;
15712 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
15713 continue;
15714 return false;
15715 }
15716 return true;
15717 };
15718
15719 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
15720 // power-of-2 extensions as they are the most likely.
15721 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
15722 // Check for non power of 2 vector sizes
15723 if (NumElts % Scale != 0)
15724 continue;
15725 if (!isAnyExtend(Scale))
15726 continue;
15727
15728 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
15729 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
15730 if (!LegalTypes || TLI.isTypeLegal(OutVT))
15731 if (!LegalOperations ||
15732 TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
15733 return DAG.getBitcast(VT,
15734 DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
15735 }
15736
15737 return SDValue();
15738}
15739
15740// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
15741// each source element of a large type into the lowest elements of a smaller
15742// destination type. This is often generated during legalization.
15743// If the source node itself was a '*_extend_vector_inreg' node then we should
15744// then be able to remove it.
15745static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
15746 SelectionDAG &DAG) {
15747 EVT VT = SVN->getValueType(0);
15748 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15749
15750 // TODO Add support for big-endian when we have a test case.
15751 if (!VT.isInteger() || IsBigEndian)
15752 return SDValue();
15753
15754 SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
15755
15756 unsigned Opcode = N0.getOpcode();
15757 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
15758 Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
15759 Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
15760 return SDValue();
15761
15762 SDValue N00 = N0.getOperand(0);
15763 ArrayRef<int> Mask = SVN->getMask();
15764 unsigned NumElts = VT.getVectorNumElements();
15765 unsigned EltSizeInBits = VT.getScalarSizeInBits();
15766 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
15767 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
15768
15769 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
15770 return SDValue();
15771 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
15772
15773 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
15774 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
15775 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
15776 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
15777 for (unsigned i = 0; i != NumElts; ++i) {
15778 if (Mask[i] < 0)
15779 continue;
15780 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
15781 continue;
15782 return false;
15783 }
15784 return true;
15785 };
15786
15787 // At the moment we just handle the case where we've truncated back to the
15788 // same size as before the extension.
15789 // TODO: handle more extension/truncation cases as cases arise.
15790 if (EltSizeInBits != ExtSrcSizeInBits)
15791 return SDValue();
15792
15793 // We can remove *extend_vector_inreg only if the truncation happens at
15794 // the same scale as the extension.
15795 if (isTruncate(ExtScale))
15796 return DAG.getBitcast(VT, N00);
15797
15798 return SDValue();
15799}
15800
15801// Combine shuffles of splat-shuffles of the form:
15802// shuffle (shuffle V, undef, splat-mask), undef, M
15803// If splat-mask contains undef elements, we need to be careful about
15804// introducing undef's in the folded mask which are not the result of composing
15805// the masks of the shuffles.
15806static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
15807 ShuffleVectorSDNode *Splat,
15808 SelectionDAG &DAG) {
15809 ArrayRef<int> SplatMask = Splat->getMask();
15810 assert(UserMask.size() == SplatMask.size() && "Mask length mismatch")(static_cast <bool> (UserMask.size() == SplatMask.size(
) && "Mask length mismatch") ? void (0) : __assert_fail
("UserMask.size() == SplatMask.size() && \"Mask length mismatch\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15810, __extension__ __PRETTY_FUNCTION__))
;
15811
15812 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
15813 // every undef mask element in the splat-shuffle has a corresponding undef
15814 // element in the user-shuffle's mask or if the composition of mask elements
15815 // would result in undef.
15816 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
15817 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
15818 // In this case it is not legal to simplify to the splat-shuffle because we
15819 // may be exposing the users of the shuffle an undef element at index 1
15820 // which was not there before the combine.
15821 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
15822 // In this case the composition of masks yields SplatMask, so it's ok to
15823 // simplify to the splat-shuffle.
15824 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
15825 // In this case the composed mask includes all undef elements of SplatMask
15826 // and in addition sets element zero to undef. It is safe to simplify to
15827 // the splat-shuffle.
15828 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
15829 ArrayRef<int> SplatMask) {
15830 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
15831 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
15832 SplatMask[UserMask[i]] != -1)
15833 return false;
15834 return true;
15835 };
15836 if (CanSimplifyToExistingSplat(UserMask, SplatMask))
15837 return SDValue(Splat, 0);
15838
15839 // Create a new shuffle with a mask that is composed of the two shuffles'
15840 // masks.
15841 SmallVector<int, 32> NewMask;
15842 for (int Idx : UserMask)
15843 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
15844
15845 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
15846 Splat->getOperand(0), Splat->getOperand(1),
15847 NewMask);
15848}
15849
15850/// If the shuffle mask is taking exactly one element from the first vector
15851/// operand and passing through all other elements from the second vector
15852/// operand, return the index of the mask element that is choosing an element
15853/// from the first operand. Otherwise, return -1.
15854static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
15855 int MaskSize = Mask.size();
15856 int EltFromOp0 = -1;
15857 // TODO: This does not match if there are undef elements in the shuffle mask.
15858 // Should we ignore undefs in the shuffle mask instead? The trade-off is
15859 // removing an instruction (a shuffle), but losing the knowledge that some
15860 // vector lanes are not needed.
15861 for (int i = 0; i != MaskSize; ++i) {
15862 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
15863 // We're looking for a shuffle of exactly one element from operand 0.
15864 if (EltFromOp0 != -1)
15865 return -1;
15866 EltFromOp0 = i;
15867 } else if (Mask[i] != i + MaskSize) {
15868 // Nothing from operand 1 can change lanes.
15869 return -1;
15870 }
15871 }
15872 return EltFromOp0;
15873}
15874
15875/// If a shuffle inserts exactly one element from a source vector operand into
15876/// another vector operand and we can access the specified element as a scalar,
15877/// then we can eliminate the shuffle.
15878static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
15879 SelectionDAG &DAG) {
15880 // First, check if we are taking one element of a vector and shuffling that
15881 // element into another vector.
15882 ArrayRef<int> Mask = Shuf->getMask();
15883 SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
15884 SDValue Op0 = Shuf->getOperand(0);
15885 SDValue Op1 = Shuf->getOperand(1);
15886 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
15887 if (ShufOp0Index == -1) {
15888 // Commute mask and check again.
15889 ShuffleVectorSDNode::commuteMask(CommutedMask);
15890 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
15891 if (ShufOp0Index == -1)
15892 return SDValue();
15893 // Commute operands to match the commuted shuffle mask.
15894 std::swap(Op0, Op1);
15895 Mask = CommutedMask;
15896 }
15897
15898 // The shuffle inserts exactly one element from operand 0 into operand 1.
15899 // Now see if we can access that element as a scalar via a real insert element
15900 // instruction.
15901 // TODO: We can try harder to locate the element as a scalar. Examples: it
15902 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
15903 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&(static_cast <bool> (Mask[ShufOp0Index] >= 0 &&
Mask[ShufOp0Index] < (int)Mask.size() && "Shuffle mask value must be from operand 0"
) ? void (0) : __assert_fail ("Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && \"Shuffle mask value must be from operand 0\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15904, __extension__ __PRETTY_FUNCTION__))
15904 "Shuffle mask value must be from operand 0")(static_cast <bool> (Mask[ShufOp0Index] >= 0 &&
Mask[ShufOp0Index] < (int)Mask.size() && "Shuffle mask value must be from operand 0"
) ? void (0) : __assert_fail ("Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && \"Shuffle mask value must be from operand 0\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15904, __extension__ __PRETTY_FUNCTION__))
;
15905 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
15906 return SDValue();
15907
15908 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
15909 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
15910 return SDValue();
15911
15912 // There's an existing insertelement with constant insertion index, so we
15913 // don't need to check the legality/profitability of a replacement operation
15914 // that differs at most in the constant value. The target should be able to
15915 // lower any of those in a similar way. If not, legalization will expand this
15916 // to a scalar-to-vector plus shuffle.
15917 //
15918 // Note that the shuffle may move the scalar from the position that the insert
15919 // element used. Therefore, our new insert element occurs at the shuffle's
15920 // mask index value, not the insert's index value.
15921 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
15922 SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
15923 Op0.getOperand(2).getValueType());
15924 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
15925 Op1, Op0.getOperand(1), NewInsIndex);
15926}
15927
15928SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
15929 EVT VT = N->getValueType(0);
15930 unsigned NumElts = VT.getVectorNumElements();
15931
15932 SDValue N0 = N->getOperand(0);
15933 SDValue N1 = N->getOperand(1);
15934
15935 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG")(static_cast <bool> (N0.getValueType() == VT &&
"Vector shuffle must be normalized in DAG") ? void (0) : __assert_fail
("N0.getValueType() == VT && \"Vector shuffle must be normalized in DAG\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15935, __extension__ __PRETTY_FUNCTION__))
;
15936
15937 // Canonicalize shuffle undef, undef -> undef
15938 if (N0.isUndef() && N1.isUndef())
15939 return DAG.getUNDEF(VT);
15940
15941 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15942
15943 // Canonicalize shuffle v, v -> v, undef
15944 if (N0 == N1) {
15945 SmallVector<int, 8> NewMask;
15946 for (unsigned i = 0; i != NumElts; ++i) {
15947 int Idx = SVN->getMaskElt(i);
15948 if (Idx >= (int)NumElts) Idx -= NumElts;
15949 NewMask.push_back(Idx);
15950 }
15951 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
15952 }
15953
15954 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
15955 if (N0.isUndef())
15956 return DAG.getCommutedVectorShuffle(*SVN);
15957
15958 // Remove references to rhs if it is undef
15959 if (N1.isUndef()) {
15960 bool Changed = false;
15961 SmallVector<int, 8> NewMask;
15962 for (unsigned i = 0; i != NumElts; ++i) {
15963 int Idx = SVN->getMaskElt(i);
15964 if (Idx >= (int)NumElts) {
15965 Idx = -1;
15966 Changed = true;
15967 }
15968 NewMask.push_back(Idx);
15969 }
15970 if (Changed)
15971 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
15972 }
15973
15974 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
15975 return InsElt;
15976
15977 // A shuffle of a single vector that is a splat can always be folded.
15978 if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
15979 if (N1->isUndef() && N0Shuf->isSplat())
15980 return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
15981
15982 // If it is a splat, check if the argument vector is another splat or a
15983 // build_vector.
15984 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
15985 SDNode *V = N0.getNode();
15986
15987 // If this is a bit convert that changes the element type of the vector but
15988 // not the number of vector elements, look through it. Be careful not to
15989 // look though conversions that change things like v4f32 to v2f64.
15990 if (V->getOpcode() == ISD::BITCAST) {
15991 SDValue ConvInput = V->getOperand(0);
15992 if (ConvInput.getValueType().isVector() &&
15993 ConvInput.getValueType().getVectorNumElements() == NumElts)
15994 V = ConvInput.getNode();
15995 }
15996
15997 if (V->getOpcode() == ISD::BUILD_VECTOR) {
15998 assert(V->getNumOperands() == NumElts &&(static_cast <bool> (V->getNumOperands() == NumElts &&
"BUILD_VECTOR has wrong number of operands") ? void (0) : __assert_fail
("V->getNumOperands() == NumElts && \"BUILD_VECTOR has wrong number of operands\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15999, __extension__ __PRETTY_FUNCTION__))
15999 "BUILD_VECTOR has wrong number of operands")(static_cast <bool> (V->getNumOperands() == NumElts &&
"BUILD_VECTOR has wrong number of operands") ? void (0) : __assert_fail
("V->getNumOperands() == NumElts && \"BUILD_VECTOR has wrong number of operands\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15999, __extension__ __PRETTY_FUNCTION__))
;
16000 SDValue Base;
16001 bool AllSame = true;
16002 for (unsigned i = 0; i != NumElts; ++i) {
16003 if (!V->getOperand(i).isUndef()) {
16004 Base = V->getOperand(i);
16005 break;
16006 }
16007 }
16008 // Splat of <u, u, u, u>, return <u, u, u, u>
16009 if (!Base.getNode())
16010 return N0;
16011 for (unsigned i = 0; i != NumElts; ++i) {
16012 if (V->getOperand(i) != Base) {
16013 AllSame = false;
16014 break;
16015 }
16016 }
16017 // Splat of <x, x, x, x>, return <x, x, x, x>
16018 if (AllSame)
16019 return N0;
16020
16021 // Canonicalize any other splat as a build_vector.
16022 const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
16023 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
16024 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
16025
16026 // We may have jumped through bitcasts, so the type of the
16027 // BUILD_VECTOR may not match the type of the shuffle.
16028 if (V->getValueType(0) != VT)
16029 NewBV = DAG.getBitcast(VT, NewBV);
16030 return NewBV;
16031 }
16032 }
16033
16034 // Simplify source operands based on shuffle mask.
16035 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
16036 return SDValue(N, 0);
16037
16038 // Match shuffles that can be converted to any_vector_extend_in_reg.
16039 if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
16040 return V;
16041
16042 // Combine "truncate_vector_in_reg" style shuffles.
16043 if (SDValue V = combineTruncationShuffle(SVN, DAG))
16044 return V;
16045
16046 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
16047 Level < AfterLegalizeVectorOps &&
16048 (N1.isUndef() ||
16049 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
16050 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
16051 if (SDValue V = partitionShuffleOfConcats(N, DAG))
16052 return V;
16053 }
16054
16055 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
16056 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
16057 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
16058 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
16059 return Res;
16060
16061 // If this shuffle only has a single input that is a bitcasted shuffle,
16062 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
16063 // back to their original types.
16064 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
16065 N1.isUndef() && Level < AfterLegalizeVectorOps &&
16066 TLI.isTypeLegal(VT)) {
16067
16068 // Peek through the bitcast only if there is one user.
16069 SDValue BC0 = N0;
16070 while (BC0.getOpcode() == ISD::BITCAST) {
16071 if (!BC0.hasOneUse())
16072 break;
16073 BC0 = BC0.getOperand(0);
16074 }
16075
16076 auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
16077 if (Scale == 1)
16078 return SmallVector<int, 8>(Mask.begin(), Mask.end());
16079
16080 SmallVector<int, 8> NewMask;
16081 for (int M : Mask)
16082 for (int s = 0; s != Scale; ++s)
16083 NewMask.push_back(M < 0 ? -1 : Scale * M + s);
16084 return NewMask;
16085 };
16086
16087 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
16088 EVT SVT = VT.getScalarType();
16089 EVT InnerVT = BC0->getValueType(0);
16090 EVT InnerSVT = InnerVT.getScalarType();
16091
16092 // Determine which shuffle works with the smaller scalar type.
16093 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
16094 EVT ScaleSVT = ScaleVT.getScalarType();
16095
16096 if (TLI.isTypeLegal(ScaleVT) &&
16097 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
16098 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
16099 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
16100 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
16101
16102 // Scale the shuffle masks to the smaller scalar type.
16103 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
16104 SmallVector<int, 8> InnerMask =
16105 ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
16106 SmallVector<int, 8> OuterMask =
16107 ScaleShuffleMask(SVN->getMask(), OuterScale);
16108
16109 // Merge the shuffle masks.
16110 SmallVector<int, 8> NewMask;
16111 for (int M : OuterMask)
16112 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
16113
16114 // Test for shuffle mask legality over both commutations.
16115 SDValue SV0 = BC0->getOperand(0);
16116 SDValue SV1 = BC0->getOperand(1);
16117 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
16118 if (!LegalMask) {
16119 std::swap(SV0, SV1);
16120 ShuffleVectorSDNode::commuteMask(NewMask);
16121 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
16122 }
16123
16124 if (LegalMask) {
16125 SV0 = DAG.getBitcast(ScaleVT, SV0);
16126 SV1 = DAG.getBitcast(ScaleVT, SV1);
16127 return DAG.getBitcast(
16128 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
16129 }
16130 }
16131 }
16132 }
16133
16134 // Canonicalize shuffles according to rules:
16135 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
16136 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
16137 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
16138 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
16139 N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
16140 TLI.isTypeLegal(VT)) {
16141 // The incoming shuffle must be of the same type as the result of the
16142 // current shuffle.
16143 assert(N1->getOperand(0).getValueType() == VT &&(static_cast <bool> (N1->getOperand(0).getValueType(
) == VT && "Shuffle types don't match") ? void (0) : __assert_fail
("N1->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16144, __extension__ __PRETTY_FUNCTION__))
16144 "Shuffle types don't match")(static_cast <bool> (N1->getOperand(0).getValueType(
) == VT && "Shuffle types don't match") ? void (0) : __assert_fail
("N1->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16144, __extension__ __PRETTY_FUNCTION__))
;
16145
16146 SDValue SV0 = N1->getOperand(0);
16147 SDValue SV1 = N1->getOperand(1);
16148 bool HasSameOp0 = N0 == SV0;
16149 bool IsSV1Undef = SV1.isUndef();
16150 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
16151 // Commute the operands of this shuffle so that next rule
16152 // will trigger.
16153 return DAG.getCommutedVectorShuffle(*SVN);
16154 }
16155
16156 // Try to fold according to rules:
16157 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
16158 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
16159 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
16160 // Don't try to fold shuffles with illegal type.
16161 // Only fold if this shuffle is the only user of the other shuffle.
16162 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
16163 Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
16164 ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
16165
16166 // Don't try to fold splats; they're likely to simplify somehow, or they
16167 // might be free.
16168 if (OtherSV->isSplat())
16169 return SDValue();
16170
16171 // The incoming shuffle must be of the same type as the result of the
16172 // current shuffle.
16173 assert(OtherSV->getOperand(0).getValueType() == VT &&(static_cast <bool> (OtherSV->getOperand(0).getValueType
() == VT && "Shuffle types don't match") ? void (0) :
__assert_fail ("OtherSV->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16174, __extension__ __PRETTY_FUNCTION__))
16174 "Shuffle types don't match")(static_cast <bool> (OtherSV->getOperand(0).getValueType
() == VT && "Shuffle types don't match") ? void (0) :
__assert_fail ("OtherSV->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16174, __extension__ __PRETTY_FUNCTION__))
;
16175
16176 SDValue SV0, SV1;
16177 SmallVector<int, 4> Mask;
16178 // Compute the combined shuffle mask for a shuffle with SV0 as the first
16179 // operand, and SV1 as the second operand.
16180 for (unsigned i = 0; i != NumElts; ++i) {
16181 int Idx = SVN->getMaskElt(i);
16182 if (Idx < 0) {
16183 // Propagate Undef.
16184 Mask.push_back(Idx);
16185 continue;
16186 }
16187
16188 SDValue CurrentVec;
16189 if (Idx < (int)NumElts) {
16190 // This shuffle index refers to the inner shuffle N0. Lookup the inner
16191 // shuffle mask to identify which vector is actually referenced.
16192 Idx = OtherSV->getMaskElt(Idx);
16193 if (Idx < 0) {
16194 // Propagate Undef.
16195 Mask.push_back(Idx);
16196 continue;
16197 }
16198
16199 CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
16200 : OtherSV->getOperand(1);
16201 } else {
16202 // This shuffle index references an element within N1.
16203 CurrentVec = N1;
16204 }
16205
16206 // Simple case where 'CurrentVec' is UNDEF.
16207 if (CurrentVec.isUndef()) {
16208 Mask.push_back(-1);
16209 continue;
16210 }
16211
16212 // Canonicalize the shuffle index. We don't know yet if CurrentVec
16213 // will be the first or second operand of the combined shuffle.
16214 Idx = Idx % NumElts;
16215 if (!SV0.getNode() || SV0 == CurrentVec) {
16216 // Ok. CurrentVec is the left hand side.
16217 // Update the mask accordingly.
16218 SV0 = CurrentVec;
16219 Mask.push_back(Idx);
16220 continue;
16221 }
16222
16223 // Bail out if we cannot convert the shuffle pair into a single shuffle.
16224 if (SV1.getNode() && SV1 != CurrentVec)
16225 return SDValue();
16226
16227 // Ok. CurrentVec is the right hand side.
16228 // Update the mask accordingly.
16229 SV1 = CurrentVec;
16230 Mask.push_back(Idx + NumElts);
16231 }
16232
16233 // Check if all indices in Mask are Undef. In case, propagate Undef.
16234 bool isUndefMask = true;
16235 for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
16236 isUndefMask &= Mask[i] < 0;
16237
16238 if (isUndefMask)
16239 return DAG.getUNDEF(VT);
16240
16241 if (!SV0.getNode())
16242 SV0 = DAG.getUNDEF(VT);
16243 if (!SV1.getNode())
16244 SV1 = DAG.getUNDEF(VT);
16245
16246 // Avoid introducing shuffles with illegal mask.
16247 if (!TLI.isShuffleMaskLegal(Mask, VT)) {
16248 ShuffleVectorSDNode::commuteMask(Mask);
16249
16250 if (!TLI.isShuffleMaskLegal(Mask, VT))
16251 return SDValue();
16252
16253 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
16254 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
16255 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
16256 std::swap(SV0, SV1);
16257 }
16258
16259 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
16260 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
16261 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
16262 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
16263 }
16264
16265 return SDValue();
16266}
16267
16268SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
16269 SDValue InVal = N->getOperand(0);
16270 EVT VT = N->getValueType(0);
16271
16272 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
16273 // with a VECTOR_SHUFFLE and possible truncate.
16274 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16275 SDValue InVec = InVal->getOperand(0);
16276 SDValue EltNo = InVal->getOperand(1);
16277 auto InVecT = InVec.getValueType();
16278 if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
16279 SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
16280 int Elt = C0->getZExtValue();
16281 NewMask[0] = Elt;
16282 SDValue Val;
16283 // If we have an implict truncate do truncate here as long as it's legal.
16284 // if it's not legal, this should
16285 if (VT.getScalarType() != InVal.getValueType() &&
16286 InVal.getValueType().isScalarInteger() &&
16287 isTypeLegal(VT.getScalarType())) {
16288 Val =
16289 DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
16290 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
16291 }
16292 if (VT.getScalarType() == InVecT.getScalarType() &&
16293 VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
16294 TLI.isShuffleMaskLegal(NewMask, VT)) {
16295 Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
16296 DAG.getUNDEF(InVecT), NewMask);
16297 // If the initial vector is the correct size this shuffle is a
16298 // valid result.
16299 if (VT == InVecT)
16300 return Val;
16301 // If not we must truncate the vector.
16302 if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
16303 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16304 SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
16305 EVT SubVT =
16306 EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
16307 VT.getVectorNumElements());
16308 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
16309 ZeroIdx);
16310 return Val;
16311 }
16312 }
16313 }
16314 }
16315
16316 return SDValue();
16317}
16318
16319SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
16320 EVT VT = N->getValueType(0);
16321 SDValue N0 = N->getOperand(0);
16322 SDValue N1 = N->getOperand(1);
16323 SDValue N2 = N->getOperand(2);
16324
16325 // If inserting an UNDEF, just return the original vector.
16326 if (N1.isUndef())
16327 return N0;
16328
16329 // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
16330 // us to pull BITCASTs from input to output.
16331 if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
16332 if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
16333 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
16334
16335 // If this is an insert of an extracted vector into an undef vector, we can
16336 // just use the input to the extract.
16337 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16338 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
16339 return N1.getOperand(0);
16340
16341 // If we are inserting a bitcast value into an undef, with the same
16342 // number of elements, just use the bitcast input of the extract.
16343 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
16344 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
16345 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
16346 N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16347 N1.getOperand(0).getOperand(1) == N2 &&
16348 N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
16349 VT.getVectorNumElements() &&
16350 N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
16351 VT.getSizeInBits()) {
16352 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
16353 }
16354
16355 // If both N1 and N2 are bitcast values on which insert_subvector
16356 // would makes sense, pull the bitcast through.
16357 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
16358 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
16359 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
16360 SDValue CN0 = N0.getOperand(0);
16361 SDValue CN1 = N1.getOperand(0);
16362 EVT CN0VT = CN0.getValueType();
16363 EVT CN1VT = CN1.getValueType();
16364 if (CN0VT.isVector() && CN1VT.isVector() &&
16365 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
16366 CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
16367 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
16368 CN0.getValueType(), CN0, CN1, N2);
16369 return DAG.getBitcast(VT, NewINSERT);
16370 }
16371 }
16372
16373 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
16374 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
16375 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
16376 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
16377 N0.getOperand(1).getValueType() == N1.getValueType() &&
16378 N0.getOperand(2) == N2)
16379 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
16380 N1, N2);
16381
16382 if (!isa<ConstantSDNode>(N2))
16383 return SDValue();
16384
16385 unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
16386
16387 // Canonicalize insert_subvector dag nodes.
16388 // Example:
16389 // (insert_subvector (insert_subvector A, Idx0), Idx1)
16390 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
16391 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
16392 N1.getValueType() == N0.getOperand(1).getValueType() &&
16393 isa<ConstantSDNode>(N0.getOperand(2))) {
16394 unsigned OtherIdx = N0.getConstantOperandVal(2);
16395 if (InsIdx < OtherIdx) {
16396 // Swap nodes.
16397 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
16398 N0.getOperand(0), N1, N2);
16399 AddToWorklist(NewOp.getNode());
16400 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
16401 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
16402 }
16403 }
16404
16405 // If the input vector is a concatenation, and the insert replaces
16406 // one of the pieces, we can optimize into a single concat_vectors.
16407 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
16408 N0.getOperand(0).getValueType() == N1.getValueType()) {
16409 unsigned Factor = N1.getValueType().getVectorNumElements();
16410
16411 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
16412 Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
16413
16414 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
16415 }
16416
16417 return SDValue();
16418}
16419
16420SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
16421 SDValue N0 = N->getOperand(0);
16422
16423 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
16424 if (N0->getOpcode() == ISD::FP16_TO_FP)
16425 return N0->getOperand(0);
16426
16427 return SDValue();
16428}
16429
16430SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
16431 SDValue N0 = N->getOperand(0);
16432
16433 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
16434 if (N0->getOpcode() == ISD::AND) {
16435 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
16436 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
16437 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
16438 N0.getOperand(0));
16439 }
16440 }
16441
16442 return SDValue();
16443}
16444
16445/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
16446/// with the destination vector and a zero vector.
16447/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
16448/// vector_shuffle V, Zero, <0, 4, 2, 4>
16449SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
16450 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"Unexpected opcode!") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"Unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16450, __extension__ __PRETTY_FUNCTION__))
;
16451
16452 EVT VT = N->getValueType(0);
16453 SDValue LHS = N->getOperand(0);
16454 SDValue RHS = peekThroughBitcast(N->getOperand(1));
16455 SDLoc DL(N);
16456
16457 // Make sure we're not running after operation legalization where it
16458 // may have custom lowered the vector shuffles.
16459 if (LegalOperations)
1
Assuming the condition is false
2
Taking false branch
16460 return SDValue();
16461
16462 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
3
Assuming the condition is false
4
Taking false branch
16463 return SDValue();
16464
16465 EVT RVT = RHS.getValueType();
16466 unsigned NumElts = RHS.getNumOperands();
16467
16468 // Attempt to create a valid clear mask, splitting the mask into
16469 // sub elements and checking to see if each is
16470 // all zeros or all ones - suitable for shuffle masking.
16471 auto BuildClearMask = [&](int Split) {
16472 int NumSubElts = NumElts * Split;
16473 int NumSubBits = RVT.getScalarSizeInBits() / Split;
16474
16475 SmallVector<int, 8> Indices;
16476 for (int i = 0; i != NumSubElts; ++i) {
10
Assuming 'i' is not equal to 'NumSubElts'
11
Loop condition is true. Entering loop body
19
Assuming 'i' is not equal to 'NumSubElts'
20
Loop condition is true. Entering loop body
28
Assuming 'i' is not equal to 'NumSubElts'
29
Loop condition is true. Entering loop body
16477 int EltIdx = i / Split;
16478 int SubIdx = i % Split;
16479 SDValue Elt = RHS.getOperand(EltIdx);
16480 if (Elt.isUndef()) {
12
Taking false branch
21
Taking false branch
30
Taking false branch
16481 Indices.push_back(-1);
16482 continue;
16483 }
16484
16485 APInt Bits;
16486 if (isa<ConstantSDNode>(Elt))
13
Taking false branch
22
Taking false branch
31
Taking true branch
16487 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
16488 else if (isa<ConstantFPSDNode>(Elt))
14
Taking true branch
23
Taking true branch
16489 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
16490 else
16491 return SDValue();
16492
16493 // Extract the sub element from the constant bit mask.
16494 if (DAG.getDataLayout().isBigEndian()) {
15
Assuming the condition is false
16
Taking false branch
24
Assuming the condition is false
25
Taking false branch
32
Assuming the condition is false
33
Taking false branch
16495 Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
16496 } else {
16497 Bits.lshrInPlace(SubIdx * NumSubBits);
16498 }
16499
16500 if (Split > 1)
17
Taking false branch
26
Taking false branch
34
Taking false branch
16501 Bits = Bits.trunc(NumSubBits);
16502
16503 if (Bits.isAllOnesValue())
18
Taking true branch
27
Taking true branch
35
Calling 'APInt::isAllOnesValue'
16504 Indices.push_back(i);
16505 else if (Bits == 0)
16506 Indices.push_back(i + NumSubElts);
16507 else
16508 return SDValue();
16509 }
16510
16511 // Let's see if the target supports this vector_shuffle.
16512 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
16513 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
16514 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
16515 return SDValue();
16516
16517 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
16518 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
16519 DAG.getBitcast(ClearVT, LHS),
16520 Zero, Indices));
16521 };
16522
16523 // Determine maximum split level (byte level masking).
16524 int MaxSplit = 1;
16525 if (RVT.getScalarSizeInBits() % 8 == 0)
5
Assuming the condition is false
6
Taking false branch
16526 MaxSplit = RVT.getScalarSizeInBits() / 8;
16527
16528 for (int Split = 1; Split <= MaxSplit; ++Split)
7
Loop condition is true. Entering loop body
16529 if (RVT.getScalarSizeInBits() % Split == 0)
8
Taking true branch
16530 if (SDValue S = BuildClearMask(Split))
9
Calling 'operator()'
16531 return S;
16532
16533 return SDValue();
16534}
16535
16536/// Visit a binary vector operation, like ADD.
16537SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
16538 assert(N->getValueType(0).isVector() &&(static_cast <bool> (N->getValueType(0).isVector() &&
"SimplifyVBinOp only works on vectors!") ? void (0) : __assert_fail
("N->getValueType(0).isVector() && \"SimplifyVBinOp only works on vectors!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16539, __extension__ __PRETTY_FUNCTION__))
16539 "SimplifyVBinOp only works on vectors!")(static_cast <bool> (N->getValueType(0).isVector() &&
"SimplifyVBinOp only works on vectors!") ? void (0) : __assert_fail
("N->getValueType(0).isVector() && \"SimplifyVBinOp only works on vectors!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16539, __extension__ __PRETTY_FUNCTION__))
;
16540
16541 SDValue LHS = N->getOperand(0);
16542 SDValue RHS = N->getOperand(1);
16543 SDValue Ops[] = {LHS, RHS};
16544
16545 // See if we can constant fold the vector operation.
16546 if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
16547 N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
16548 return Fold;
16549
16550 // Type legalization might introduce new shuffles in the DAG.
16551 // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
16552 // -> (shuffle (VBinOp (A, B)), Undef, Mask).
16553 if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
16554 isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
16555 LHS.getOperand(1).isUndef() &&
16556 RHS.getOperand(1).isUndef()) {
16557 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
16558 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
16559
16560 if (SVN0->getMask().equals(SVN1->getMask())) {
16561 EVT VT = N->getValueType(0);
16562 SDValue UndefVector = LHS.getOperand(1);
16563 SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
16564 LHS.getOperand(0), RHS.getOperand(0),
16565 N->getFlags());
16566 AddUsersToWorklist(N);
16567 return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
16568 SVN0->getMask());
16569 }
16570 }
16571
16572 return SDValue();
16573}
16574
16575SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
16576 SDValue N2) {
16577 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!")(static_cast <bool> (N0.getOpcode() ==ISD::SETCC &&
"First argument must be a SetCC node!") ? void (0) : __assert_fail
("N0.getOpcode() ==ISD::SETCC && \"First argument must be a SetCC node!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16577, __extension__ __PRETTY_FUNCTION__))
;
16578
16579 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
16580 cast<CondCodeSDNode>(N0.getOperand(2))->get());
16581
16582 // If we got a simplified select_cc node back from SimplifySelectCC, then
16583 // break it down into a new SETCC node, and a new SELECT node, and then return
16584 // the SELECT node, since we were called with a SELECT node.
16585 if (SCC.getNode()) {
16586 // Check to see if we got a select_cc back (to turn into setcc/select).
16587 // Otherwise, just return whatever node we got back, like fabs.
16588 if (SCC.getOpcode() == ISD::SELECT_CC) {
16589 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
16590 N0.getValueType(),
16591 SCC.getOperand(0), SCC.getOperand(1),
16592 SCC.getOperand(4));
16593 AddToWorklist(SETCC.getNode());
16594 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
16595 SCC.getOperand(2), SCC.getOperand(3));
16596 }
16597
16598 return SCC;
16599 }
16600 return SDValue();
16601}
16602
16603/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
16604/// being selected between, see if we can simplify the select. Callers of this
16605/// should assume that TheSelect is deleted if this returns true. As such, they
16606/// should return the appropriate thing (e.g. the node) back to the top-level of
16607/// the DAG combiner loop to avoid it being looked at.
16608bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
16609 SDValue RHS) {
16610 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
16611 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
16612 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
16613 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
16614 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
16615 SDValue Sqrt = RHS;
16616 ISD::CondCode CC;
16617 SDValue CmpLHS;
16618 const ConstantFPSDNode *Zero = nullptr;
16619
16620 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
16621 CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
16622 CmpLHS = TheSelect->getOperand(0);
16623 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
16624 } else {
16625 // SELECT or VSELECT
16626 SDValue Cmp = TheSelect->getOperand(0);
16627 if (Cmp.getOpcode() == ISD::SETCC) {
16628 CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
16629 CmpLHS = Cmp.getOperand(0);
16630 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
16631 }
16632 }
16633 if (Zero && Zero->isZero() &&
16634 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
16635 CC == ISD::SETULT || CC == ISD::SETLT)) {
16636 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
16637 CombineTo(TheSelect, Sqrt);
16638 return true;
16639 }
16640 }
16641 }
16642 // Cannot simplify select with vector condition
16643 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
16644
16645 // If this is a select from two identical things, try to pull the operation
16646 // through the select.
16647 if (LHS.getOpcode() != RHS.getOpcode() ||
16648 !LHS.hasOneUse() || !RHS.hasOneUse())
16649 return false;
16650
16651 // If this is a load and the token chain is identical, replace the select
16652 // of two loads with a load through a select of the address to load from.
16653 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
16654 // constants have been dropped into the constant pool.
16655 if (LHS.getOpcode() == ISD::LOAD) {
16656 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
16657 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
16658
16659 // Token chains must be identical.
16660 if (LHS.getOperand(0) != RHS.getOperand(0) ||
16661 // Do not let this transformation reduce the number of volatile loads.
16662 LLD->isVolatile() || RLD->isVolatile() ||
16663 // FIXME: If either is a pre/post inc/dec load,
16664 // we'd need to split out the address adjustment.
16665 LLD->isIndexed() || RLD->isIndexed() ||
16666 // If this is an EXTLOAD, the VT's must match.
16667 LLD->getMemoryVT() != RLD->getMemoryVT() ||
16668 // If this is an EXTLOAD, the kind of extension must match.
16669 (LLD->getExtensionType() != RLD->getExtensionType() &&
16670 // The only exception is if one of the extensions is anyext.
16671 LLD->getExtensionType() != ISD::EXTLOAD &&
16672 RLD->getExtensionType() != ISD::EXTLOAD) ||
16673 // FIXME: this discards src value information. This is
16674 // over-conservative. It would be beneficial to be able to remember
16675 // both potential memory locations. Since we are discarding
16676 // src value info, don't do the transformation if the memory
16677 // locations are not in the default address space.
16678 LLD->getPointerInfo().getAddrSpace() != 0 ||
16679 RLD->getPointerInfo().getAddrSpace() != 0 ||
16680 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
16681 LLD->getBasePtr().getValueType()))
16682 return false;
16683
16684 // Check that the select condition doesn't reach either load. If so,
16685 // folding this will induce a cycle into the DAG. If not, this is safe to
16686 // xform, so create a select of the addresses.
16687 SDValue Addr;
16688 if (TheSelect->getOpcode() == ISD::SELECT) {
16689 SDNode *CondNode = TheSelect->getOperand(0).getNode();
16690 if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
16691 (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
16692 return false;
16693 // The loads must not depend on one another.
16694 if (LLD->isPredecessorOf(RLD) ||
16695 RLD->isPredecessorOf(LLD))
16696 return false;
16697 Addr = DAG.getSelect(SDLoc(TheSelect),
16698 LLD->getBasePtr().getValueType(),
16699 TheSelect->getOperand(0), LLD->getBasePtr(),
16700 RLD->getBasePtr());
16701 } else { // Otherwise SELECT_CC
16702 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
16703 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
16704
16705 if ((LLD->hasAnyUseOfValue(1) &&
16706 (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
16707 (RLD->hasAnyUseOfValue(1) &&
16708 (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
16709 return false;
16710
16711 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
16712 LLD->getBasePtr().getValueType(),
16713 TheSelect->getOperand(0),
16714 TheSelect->getOperand(1),
16715 LLD->getBasePtr(), RLD->getBasePtr(),
16716 TheSelect->getOperand(4));
16717 }
16718
16719 SDValue Load;
16720 // It is safe to replace the two loads if they have different alignments,
16721 // but the new load must be the minimum (most restrictive) alignment of the
16722 // inputs.
16723 unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
16724 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
16725 if (!RLD->isInvariant())
16726 MMOFlags &= ~MachineMemOperand::MOInvariant;
16727 if (!RLD->isDereferenceable())
16728 MMOFlags &= ~MachineMemOperand::MODereferenceable;
16729 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
16730 // FIXME: Discards pointer and AA info.
16731 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
16732 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
16733 MMOFlags);
16734 } else {
16735 // FIXME: Discards pointer and AA info.
16736 Load = DAG.getExtLoad(
16737 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
16738 : LLD->getExtensionType(),
16739 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
16740 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
16741 }
16742
16743 // Users of the select now use the result of the load.
16744 CombineTo(TheSelect, Load);
16745
16746 // Users of the old loads now use the new load's chain. We know the
16747 // old-load value is dead now.
16748 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
16749 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
16750 return true;
16751 }
16752
16753 return false;
16754}
16755
16756/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
16757/// bitwise 'and'.
16758SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
16759 SDValue N1, SDValue N2, SDValue N3,
16760 ISD::CondCode CC) {
16761 // If this is a select where the false operand is zero and the compare is a
16762 // check of the sign bit, see if we can perform the "gzip trick":
16763 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
16764 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
16765 EVT XType = N0.getValueType();
16766 EVT AType = N2.getValueType();
16767 if (!isNullConstant(N3) || !XType.bitsGE(AType))
16768 return SDValue();
16769
16770 // If the comparison is testing for a positive value, we have to invert
16771 // the sign bit mask, so only do that transform if the target has a bitwise
16772 // 'and not' instruction (the invert is free).
16773 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
16774 // (X > -1) ? A : 0
16775 // (X > 0) ? X : 0 <-- This is canonical signed max.
16776 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
16777 return SDValue();
16778 } else if (CC == ISD::SETLT) {
16779 // (X < 0) ? A : 0
16780 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
16781 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
16782 return SDValue();
16783 } else {
16784 return SDValue();
16785 }
16786
16787 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
16788 // constant.
16789 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
16790 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16791 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
16792 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
16793 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
16794 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
16795 AddToWorklist(Shift.getNode());
16796
16797 if (XType.bitsGT(AType)) {
16798 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
16799 AddToWorklist(Shift.getNode());
16800 }
16801
16802 if (CC == ISD::SETGT)
16803 Shift = DAG.getNOT(DL, Shift, AType);
16804
16805 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16806 }
16807
16808 SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
16809 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
16810 AddToWorklist(Shift.getNode());
16811
16812 if (XType.bitsGT(AType)) {
16813 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
16814 AddToWorklist(Shift.getNode());
16815 }
16816
16817 if (CC == ISD::SETGT)
16818 Shift = DAG.getNOT(DL, Shift, AType);
16819
16820 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16821}
16822
16823/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
16824/// where 'cond' is the comparison specified by CC.
16825SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
16826 SDValue N2, SDValue N3, ISD::CondCode CC,
16827 bool NotExtCompare) {
16828 // (x ? y : y) -> y.
16829 if (N2 == N3) return N2;
16830
16831 EVT VT = N2.getValueType();
16832 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
16833 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16834
16835 // Determine if the condition we're dealing with is constant
16836 SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
16837 N0, N1, CC, DL, false);
16838 if (SCC.getNode()) AddToWorklist(SCC.getNode());
16839
16840 if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
16841 // fold select_cc true, x, y -> x
16842 // fold select_cc false, x, y -> y
16843 return !SCCC->isNullValue() ? N2 : N3;
16844 }
16845
16846 // Check to see if we can simplify the select into an fabs node
16847 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
16848 // Allow either -0.0 or 0.0
16849 if (CFP->isZero()) {
16850 // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
16851 if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
16852 N0 == N2 && N3.getOpcode() == ISD::FNEG &&
16853 N2 == N3.getOperand(0))
16854 return DAG.getNode(ISD::FABS, DL, VT, N0);
16855
16856 // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
16857 if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
16858 N0 == N3 && N2.getOpcode() == ISD::FNEG &&
16859 N2.getOperand(0) == N3)
16860 return DAG.getNode(ISD::FABS, DL, VT, N3);
16861 }
16862 }
16863
16864 // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
16865 // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
16866 // in it. This is a win when the constant is not otherwise available because
16867 // it replaces two constant pool loads with one. We only do this if the FP
16868 // type is known to be legal, because if it isn't, then we are before legalize
16869 // types an we want the other legalization to happen first (e.g. to avoid
16870 // messing with soft float) and if the ConstantFP is not legal, because if
16871 // it is legal, we may not need to store the FP constant in a constant pool.
16872 if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
16873 if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
16874 if (TLI.isTypeLegal(N2.getValueType()) &&
16875 (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
16876 TargetLowering::Legal &&
16877 !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
16878 !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
16879 // If both constants have multiple uses, then we won't need to do an
16880 // extra load, they are likely around in registers for other users.
16881 (TV->hasOneUse() || FV->hasOneUse())) {
16882 Constant *Elts[] = {
16883 const_cast<ConstantFP*>(FV->getConstantFPValue()),
16884 const_cast<ConstantFP*>(TV->getConstantFPValue())
16885 };
16886 Type *FPTy = Elts[0]->getType();
16887 const DataLayout &TD = DAG.getDataLayout();
16888
16889 // Create a ConstantArray of the two constants.
16890 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
16891 SDValue CPIdx =
16892 DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
16893 TD.getPrefTypeAlignment(FPTy));
16894 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
16895
16896 // Get the offsets to the 0 and 1 element of the array so that we can
16897 // select between them.
16898 SDValue Zero = DAG.getIntPtrConstant(0, DL);
16899 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
16900 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
16901
16902 SDValue Cond = DAG.getSetCC(DL,
16903 getSetCCResultType(N0.getValueType()),
16904 N0, N1, CC);
16905 AddToWorklist(Cond.getNode());
16906 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
16907 Cond, One, Zero);
16908 AddToWorklist(CstOffset.getNode());
16909 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
16910 CstOffset);
16911 AddToWorklist(CPIdx.getNode());
16912 return DAG.getLoad(
16913 TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
16914 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
16915 Alignment);
16916 }
16917 }
16918
16919 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
16920 return V;
16921
16922 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
16923 // where y is has a single bit set.
16924 // A plaintext description would be, we can turn the SELECT_CC into an AND
16925 // when the condition can be materialized as an all-ones register. Any
16926 // single bit-test can be materialized as an all-ones register with
16927 // shift-left and shift-right-arith.
16928 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
16929 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
16930 SDValue AndLHS = N0->getOperand(0);
16931 ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16932 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
16933 // Shift the tested bit over the sign bit.
16934 const APInt &AndMask = ConstAndRHS->getAPIntValue();
16935 SDValue ShlAmt =
16936 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
16937 getShiftAmountTy(AndLHS.getValueType()));
16938 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
16939
16940 // Now arithmetic right shift it all the way over, so the result is either
16941 // all-ones, or zero.
16942 SDValue ShrAmt =
16943 DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
16944 getShiftAmountTy(Shl.getValueType()));
16945 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
16946
16947 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
16948 }
16949 }
16950
16951 // fold select C, 16, 0 -> shl C, 4
16952 if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
16953 TLI.getBooleanContents(N0.getValueType()) ==
16954 TargetLowering::ZeroOrOneBooleanContent) {
16955
16956 // If the caller doesn't want us to simplify this into a zext of a compare,
16957 // don't do it.
16958 if (NotExtCompare && N2C->isOne())
16959 return SDValue();
16960
16961 // Get a SetCC of the condition
16962 // NOTE: Don't create a SETCC if it's not legal on this target.
16963 if (!LegalOperations ||
16964 TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
16965 SDValue Temp, SCC;
16966 // cast from setcc result type to select result type
16967 if (LegalTypes) {
16968 SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
16969 N0, N1, CC);
16970 if (N2.getValueType().bitsLT(SCC.getValueType()))
16971 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
16972 N2.getValueType());
16973 else
16974 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16975 N2.getValueType(), SCC);
16976 } else {
16977 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
16978 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16979 N2.getValueType(), SCC);
16980 }
16981
16982 AddToWorklist(SCC.getNode());
16983 AddToWorklist(Temp.getNode());
16984
16985 if (N2C->isOne())
16986 return Temp;
16987
16988 // shl setcc result by log2 n2c
16989 return DAG.getNode(
16990 ISD::SHL, DL, N2.getValueType(), Temp,
16991 DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
16992 getShiftAmountTy(Temp.getValueType())));
16993 }
16994 }
16995
16996 // Check to see if this is an integer abs.
16997 // select_cc setg[te] X, 0, X, -X ->
16998 // select_cc setgt X, -1, X, -X ->
16999 // select_cc setl[te] X, 0, -X, X ->
17000 // select_cc setlt X, 1, -X, X ->
17001 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
17002 if (N1C) {
17003 ConstantSDNode *SubC = nullptr;
17004 if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
17005 (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
17006 N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
17007 SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
17008 else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
17009 (N1C->isOne() && CC == ISD::SETLT)) &&
17010 N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
17011 SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
17012
17013 EVT XType = N0.getValueType();
17014 if (SubC && SubC->isNullValue() && XType.isInteger()) {
17015 SDLoc DL(N0);
17016 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
17017 N0,
17018 DAG.getConstant(XType.getSizeInBits() - 1, DL,
17019 getShiftAmountTy(N0.getValueType())));
17020 SDValue Add = DAG.getNode(ISD::ADD, DL,
17021 XType, N0, Shift);
17022 AddToWorklist(Shift.getNode());
17023 AddToWorklist(Add.getNode());
17024 return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
17025 }
17026 }
17027
17028 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
17029 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
17030 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
17031 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
17032 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
17033 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
17034 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
17035 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
17036 if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
17037 SDValue ValueOnZero = N2;
17038 SDValue Count = N3;
17039 // If the condition is NE instead of E, swap the operands.
17040 if (CC == ISD::SETNE)
17041 std::swap(ValueOnZero, Count);
17042 // Check if the value on zero is a constant equal to the bits in the type.
17043 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
17044 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
17045 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
17046 // legal, combine to just cttz.
17047 if ((Count.getOpcode() == ISD::CTTZ ||
17048 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
17049 N0 == Count.getOperand(0) &&
17050 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
17051 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
17052 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
17053 // legal, combine to just ctlz.
17054 if ((Count.getOpcode() == ISD::CTLZ ||
17055 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
17056 N0 == Count.getOperand(0) &&
17057 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
17058 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
17059 }
17060 }
17061 }
17062
17063 return SDValue();
17064}
17065
17066/// This is a stub for TargetLowering::SimplifySetCC.
17067SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
17068 ISD::CondCode Cond, const SDLoc &DL,
17069 bool foldBooleans) {
17070 TargetLowering::DAGCombinerInfo
17071 DagCombineInfo(DAG, Level, false, this);
17072 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
17073}
17074
17075/// Given an ISD::SDIV node expressing a divide by constant, return
17076/// a DAG expression to select that will generate the same value by multiplying
17077/// by a magic number.
17078/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
17079SDValue DAGCombiner::BuildSDIV(SDNode *N) {
17080 // when optimising for minimum size, we don't want to expand a div to a mul
17081 // and a shift.
17082 if (DAG.getMachineFunction().getFunction().optForMinSize())
17083 return SDValue();
17084
17085 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
17086 if (!C)
17087 return SDValue();
17088
17089 // Avoid division by zero.
17090 if (C->isNullValue())
17091 return SDValue();
17092
17093 std::vector<SDNode *> Built;
17094 SDValue S =
17095 TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
17096
17097 for (SDNode *N : Built)
17098 AddToWorklist(N);
17099 return S;
17100}
17101
17102/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
17103/// DAG expression that will generate the same value by right shifting.
17104SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
17105 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
17106 if (!C)
17107 return SDValue();
17108
17109 // Avoid division by zero.
17110 if (C->isNullValue())
17111 return SDValue();
17112
17113 std::vector<SDNode *> Built;
17114 SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
17115
17116 for (SDNode *N : Built)
17117 AddToWorklist(N);
17118 return S;
17119}
17120
17121/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
17122/// expression that will generate the same value by multiplying by a magic
17123/// number.
17124/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
17125SDValue DAGCombiner::BuildUDIV(SDNode *N) {
17126 // when optimising for minimum size, we don't want to expand a div to a mul
17127 // and a shift.
17128 if (DAG.getMachineFunction().getFunction().optForMinSize())
17129 return SDValue();
17130
17131 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
17132 if (!C)
17133 return SDValue();
17134
17135 // Avoid division by zero.
17136 if (C->isNullValue())
17137 return SDValue();
17138
17139 std::vector<SDNode *> Built;
17140 SDValue S =
17141 TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
17142
17143 for (SDNode *N : Built)
17144 AddToWorklist(N);
17145 return S;
17146}
17147
17148/// Determines the LogBase2 value for a non-null input value using the
17149/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
17150SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
17151 EVT VT = V.getValueType();
17152 unsigned EltBits = VT.getScalarSizeInBits();
17153 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
17154 SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
17155 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
17156 return LogBase2;
17157}
17158
17159/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
17160/// For the reciprocal, we need to find the zero of the function:
17161/// F(X) = A X - 1 [which has a zero at X = 1/A]
17162/// =>
17163/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
17164/// does not require additional intermediate precision]
17165SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
17166 if (Level >= AfterLegalizeDAG)
17167 return SDValue();
17168
17169 // TODO: Handle half and/or extended types?
17170 EVT VT = Op.getValueType();
17171 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
17172 return SDValue();
17173
17174 // If estimates are explicitly disabled for this function, we're done.
17175 MachineFunction &MF = DAG.getMachineFunction();
17176 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
17177 if (Enabled == TLI.ReciprocalEstimate::Disabled)
17178 return SDValue();
17179
17180 // Estimates may be explicitly enabled for this type with a custom number of
17181 // refinement steps.
17182 int Iterations = TLI.getDivRefinementSteps(VT, MF);
17183 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
17184 AddToWorklist(Est.getNode());
17185
17186 if (Iterations) {
17187 EVT VT = Op.getValueType();
17188 SDLoc DL(Op);
17189 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17190
17191 // Newton iterations: Est = Est + Est (1 - Arg * Est)
17192 for (int i = 0; i < Iterations; ++i) {
17193 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
17194 AddToWorklist(NewEst.getNode());
17195
17196 NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
17197 AddToWorklist(NewEst.getNode());
17198
17199 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
17200 AddToWorklist(NewEst.getNode());
17201
17202 Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
17203 AddToWorklist(Est.getNode());
17204 }
17205 }
17206 return Est;
17207 }
17208
17209 return SDValue();
17210}
17211
17212/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
17213/// For the reciprocal sqrt, we need to find the zero of the function:
17214/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
17215/// =>
17216/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
17217/// As a result, we precompute A/2 prior to the iteration loop.
17218SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
17219 unsigned Iterations,
17220 SDNodeFlags Flags, bool Reciprocal) {
17221 EVT VT = Arg.getValueType();
17222 SDLoc DL(Arg);
17223 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
17224
17225 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
17226 // this entire sequence requires only one FP constant.
17227 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
17228 AddToWorklist(HalfArg.getNode());
17229
17230 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
17231 AddToWorklist(HalfArg.getNode());
17232
17233 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
17234 for (unsigned i = 0; i < Iterations; ++i) {
17235 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
17236 AddToWorklist(NewEst.getNode());
17237
17238 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
17239 AddToWorklist(NewEst.getNode());
17240
17241 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
17242 AddToWorklist(NewEst.getNode());
17243
17244 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
17245 AddToWorklist(Est.getNode());
17246 }
17247
17248 // If non-reciprocal square root is requested, multiply the result by Arg.
17249 if (!Reciprocal) {
17250 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
17251 AddToWorklist(Est.getNode());
17252 }
17253
17254 return Est;
17255}
17256
17257/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
17258/// For the reciprocal sqrt, we need to find the zero of the function:
17259/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
17260/// =>
17261/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
17262SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
17263 unsigned Iterations,
17264 SDNodeFlags Flags, bool Reciprocal) {
17265 EVT VT = Arg.getValueType();
17266 SDLoc DL(Arg);
17267 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
17268 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
17269
17270 // This routine must enter the loop below to work correctly
17271 // when (Reciprocal == false).
17272 assert(Iterations > 0)(static_cast <bool> (Iterations > 0) ? void (0) : __assert_fail
("Iterations > 0", "/build/llvm-toolchain-snapshot-7~svn326246/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17272, __extension__ __PRETTY_FUNCTION__))
;
17273
17274 // Newton iterations for reciprocal square root:
17275 // E = (E * -0.5) * ((A * E) * E + -3.0)
17276 for (unsigned i = 0; i < Iterations; ++i) {
17277 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
17278 AddToWorklist(AE.getNode());
17279
17280 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
17281 AddToWorklist(AEE.getNode());
17282
17283 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
17284 AddToWorklist(RHS.getNode());
17285
17286 // When calculating a square root at the last iteration build:
17287 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
17288 // (notice a common subexpression)
17289 SDValue LHS;
17290 if (Reciprocal || (i + 1) < Iterations) {
17291 // RSQRT: LHS = (E * -0.5)
17292 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
17293 } else {
17294 // SQRT: LHS = (A * E) * -0.5
17295 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
17296 }
17297 AddToWorklist(LHS.getNode());
17298
17299 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
17300 AddToWorklist(Est.getNode());
17301 }
17302
17303 return Est;
17304}
17305
17306/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
17307/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
17308/// Op can be zero.
17309SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
17310 bool Reciprocal) {
17311 if (Level >= AfterLegalizeDAG)
17312 return SDValue();
17313
17314 // TODO: Handle half and/or extended types?
17315 EVT VT = Op.getValueType();
17316 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
17317 return SDValue();
17318
17319 // If estimates are explicitly disabled for this function, we're done.
17320 MachineFunction &MF = DAG.getMachineFunction();
17321 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
17322 if (Enabled == TLI.ReciprocalEstimate::Disabled)
17323 return SDValue();
17324
17325 // Estimates may be explicitly enabled for this type with a custom number of
17326 // refinement steps.
17327 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
17328
17329 bool UseOneConstNR = false;
17330 if (SDValue Est =
17331 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
17332 Reciprocal)) {
17333 AddToWorklist(Est.getNode());
17334
17335 if (Iterations) {
17336 Est = UseOneConstNR
17337 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
17338 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
17339
17340 if (!Reciprocal) {
17341 // The estimate is now completely wrong if the input was exactly 0.0 or
17342 // possibly a denormal. Force the answer to 0.0 for those cases.
17343 EVT VT = Op.getValueType();
17344 SDLoc DL(Op);
17345 EVT CCVT = getSetCCResultType(VT);
17346 ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
17347 const Function &F = DAG.getMachineFunction().getFunction();
17348 Attribute Denorms = F.getFnAttribute("denormal-fp-math");
17349 if (Denorms.getValueAsString().equals("ieee")) {
17350 // fabs(X) < SmallestNormal ? 0.0 : Est
17351 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
17352 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
17353 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
17354 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
17355 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
17356 SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
17357 Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
17358 AddToWorklist(Fabs.getNode());
17359 AddToWorklist(IsDenorm.getNode());
17360 AddToWorklist(Est.getNode());
17361 } else {
17362 // X == 0.0 ? 0.0 : Est
17363 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
17364 SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
17365 Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
17366 AddToWorklist(IsZero.getNode());
17367 AddToWorklist(Est.getNode());
17368 }
17369 }
17370 }
17371 return Est;
17372 }
17373
17374 return SDValue();
17375}
17376
17377SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
17378 return buildSqrtEstimateImpl(Op, Flags, true);
17379}
17380
17381SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
17382 return buildSqrtEstimateImpl(Op, Flags, false);
17383}
17384
17385/// Return true if there is any possibility that the two addresses overlap.
17386bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
17387 // If they are the same then they must be aliases.
17388 if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
17389
17390 // If they are both volatile then they cannot be reordered.
17391 if (Op0->isVolatile() && Op1->isVolatile()) return true;
17392
17393 // If one operation reads from invariant memory, and the other may store, they
17394 // cannot alias. These should really be checking the equivalent of mayWrite,
17395 // but it only matters for memory nodes other than load /store.
17396 if (Op0->isInvariant() && Op1->writeMem())
17397 return false;
17398
17399 if (Op1->isInvariant() && Op0->writeMem())
17400 return false;
17401
17402 unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
17403 unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
17404
17405 // Check for BaseIndexOffset matching.
17406 BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
17407 BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
17408 int64_t PtrDiff;
17409 if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
17410 if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
17411 return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
17412
17413 // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
17414 // able to calculate their relative offset if at least one arises
17415 // from an alloca. However, these allocas cannot overlap and we
17416 // can infer there is no alias.
17417 if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
17418 if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
17419 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
17420 // If the base are the same frame index but the we couldn't find a
17421 // constant offset, (indices are different) be conservative.
17422 if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
17423 !MFI.isFixedObjectIndex(B->getIndex())))
17424 return false;
17425 }
17426
17427 bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
17428 bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
17429 bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
17430 bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
17431 bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
17432 bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
17433
17434 // If of mismatched base types or checkable indices we can check
17435 // they do not alias.
17436 if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
17437 (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
17438 (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
17439 return false;
17440 }
17441
17442 // If we know required SrcValue1 and SrcValue2 have relatively large
17443 // alignment compared to the size and offset of the access, we may be able
17444 // to prove they do not alias. This check is conservative for now to catch
17445 // cases created by splitting vector types.
17446 int64_t SrcValOffset0 = Op0->getSrcValueOffset();
17447 int64_t SrcValOffset1 = Op1->getSrcValueOffset();
17448 unsigned OrigAlignment0 = Op0->getOriginalAlignment();
17449 unsigned OrigAlignment1 = Op1->getOriginalAlignment();
17450 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
17451 NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
17452 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
17453 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
17454
17455 // There is no overlap between these relatively aligned accesses of
17456 // similar size. Return no alias.
17457 if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
17458 (OffAlign1 + NumBytes1) <= OffAlign0)
17459 return false;
17460 }
17461
17462 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
17463 ? CombinerGlobalAA
17464 : DAG.getSubtarget().useAA();
17465#ifndef NDEBUG
17466 if (CombinerAAOnlyFunc.getNumOccurrences() &&
17467 CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
17468 UseAA = false;
17469#endif
17470
17471 if (UseAA && AA &&
17472 Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
17473 // Use alias analysis information.
17474 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
17475 int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
17476 int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
17477 AliasResult AAResult =
17478 AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
17479 UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
17480 MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
17481 UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
17482 if (AAResult == NoAlias)
17483 return false;
17484 }
17485
17486 // Otherwise we have to assume they alias.
17487 return true;
17488}
17489
17490/// Walk up chain skipping non-aliasing memory nodes,
17491/// looking for aliasing nodes and adding them to the Aliases vector.
17492void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
17493 SmallVectorImpl<SDValue> &Aliases) {
17494 SmallVector<SDValue, 8> Chains; // List of chains to visit.
17495 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
17496
17497 // Get alias information for node.
17498 bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
17499
17500 // Starting off.
17501 Chains.push_back(OriginalChain);
17502 unsigned Depth = 0;
17503
17504 // Look at each chain and determine if it is an alias. If so, add it to the
17505 // aliases list. If not, then continue up the chain looking for the next
17506 // candidate.
17507 while (!Chains.empty()) {
17508 SDValue Chain = Chains.pop_back_val();
17509
17510 // For TokenFactor nodes, look at each operand and only continue up the
17511 // chain until we reach the depth limit.
17512 //
17513 // FIXME: The depth check could be made to return the last non-aliasing
17514 // chain we found before we hit a tokenfactor rather than the original
17515 // chain.
17516 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
17517 Aliases.clear();
17518 Aliases.push_back(OriginalChain);
17519 return;
17520 }
17521
17522 // Don't bother if we've been before.
17523 if (!Visited.insert(Chain.getNode()).second)
17524 continue;
17525
17526 switch (Chain.getOpcode()) {
17527 case ISD::EntryToken:
17528 // Entry token is ideal chain operand, but handled in FindBetterChain.
17529 break;
17530
17531 case ISD::LOAD:
17532 case ISD::STORE: {
17533 // Get alias information for Chain.
17534 bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
17535 !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
17536
17537 // If chain is alias then stop here.
17538 if (!(IsLoad && IsOpLoad) &&
17539 isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
17540 Aliases.push_back(Chain);
17541 } else {
17542 // Look further up the chain.
17543 Chains.push_back(Chain.getOperand(0));
17544 ++Depth;
17545 }
17546 break;
17547 }
17548
17549 case ISD::TokenFactor:
17550 // We have to check each of the operands of the token factor for "small"
17551 // token factors, so we queue them up. Adding the operands to the queue
17552 // (stack) in reverse order maintains the original order and increases the
17553 // likelihood that getNode will find a matching token factor (CSE.)
17554 if (Chain.getNumOperands() > 16) {
17555 Aliases.push_back(Chain);
17556 break;
17557 }
17558 for (unsigned n = Chain.getNumOperands(); n;)
17559 Chains.push_back(Chain.getOperand(--n));
17560 ++Depth;
17561 break;
17562
17563 case ISD::CopyFromReg:
17564 // Forward past CopyFromReg.
17565 Chains.push_back(Chain.getOperand(0));
17566 ++Depth;
17567 break;
17568
17569 default:
17570 // For all other instructions we will just have to take what we can get.
17571 Aliases.push_back(Chain);
17572 break;
17573 }
17574 }
17575}
17576
17577/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
17578/// (aliasing node.)
17579SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
17580 if (OptLevel == CodeGenOpt::None)
17581 return OldChain;
17582
17583 // Ops for replacing token factor.
17584 SmallVector<SDValue, 8> Aliases;
17585
17586 // Accumulate all the aliases to this node.
17587 GatherAllAliases(N, OldChain, Aliases);
17588
17589 // If no operands then chain to entry token.
17590 if (Aliases.size() == 0)
17591 return DAG.getEntryNode();
17592
17593 // If a single operand then chain to it. We don't need to revisit it.
17594 if (Aliases.size() == 1)
17595 return Aliases[0];
17596
17597 // Construct a custom tailored token factor.
17598 return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
17599}
17600
17601// This function tries to collect a bunch of potentially interesting
17602// nodes to improve the chains of, all at once. This might seem
17603// redundant, as this function gets called when visiting every store
17604// node, so why not let the work be done on each store as it's visited?
17605//
17606// I believe this is mainly important because MergeConsecutiveStores
17607// is unable to deal with merging stores of different sizes, so unless
17608// we improve the chains of all the potential candidates up-front
17609// before running MergeConsecutiveStores, it might only see some of
17610// the nodes that will eventually be candidates, and then not be able
17611// to go from a partially-merged state to the desired final
17612// fully-merged state.
17613bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
17614 if (OptLevel == CodeGenOpt::None)
17615 return false;
17616
17617 // This holds the base pointer, index, and the offset in bytes from the base
17618 // pointer.
17619 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
17620
17621 // We must have a base and an offset.
17622 if (!BasePtr.getBase().getNode())
17623 return false;
17624
17625 // Do not handle stores to undef base pointers.
17626 if (BasePtr.getBase().isUndef())
17627 return false;
17628
17629 SmallVector<StoreSDNode *, 8> ChainedStores;
17630 ChainedStores.push_back(St);
17631
17632 // Walk up the chain and look for nodes with offsets from the same
17633 // base pointer. Stop when reaching an instruction with a different kind
17634 // or instruction which has a different base pointer.
17635 StoreSDNode *Index = St;
17636 while (Index) {
17637 // If the chain has more than one use, then we can't reorder the mem ops.
17638 if (Index != St && !SDValue(Index, 0)->hasOneUse())
17639 break;
17640
17641 if (Index->isVolatile() || Index->isIndexed())
17642 break;
17643
17644 // Find the base pointer and offset for this memory node.
17645 BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
17646
17647 // Check that the base pointer is the same as the original one.
17648 if (!BasePtr.equalBaseIndex(Ptr, DAG))
17649 break;
17650
17651 // Walk up the chain to find the next store node, ignoring any
17652 // intermediate loads. Any other kind of node will halt the loop.
17653 SDNode *NextInChain = Index->getChain().getNode();
17654 while (true) {
17655 if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
17656 // We found a store node. Use it for the next iteration.
17657 if (STn->isVolatile() || STn->isIndexed()) {
17658 Index = nullptr;
17659 break;
17660 }
17661 ChainedStores.push_back(STn);
17662 Index = STn;
17663 break;
17664 } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
17665 NextInChain = Ldn->getChain().getNode();
17666 continue;
17667 } else {
17668 Index = nullptr;
17669 break;
17670 }
17671 } // end while
17672 }
17673
17674 // At this point, ChainedStores lists all of the Store nodes
17675 // reachable by iterating up through chain nodes matching the above
17676 // conditions. For each such store identified, try to find an
17677 // earlier chain to attach the store to which won't violate the
17678 // required ordering.
17679 bool MadeChangeToSt = false;
17680 SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
17681
17682 for (StoreSDNode *ChainedStore : ChainedStores) {
17683 SDValue Chain = ChainedStore->getChain();
17684 SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
17685
17686 if (Chain != BetterChain) {
17687 if (ChainedStore == St)
17688 MadeChangeToSt = true;
17689 BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
17690 }
17691 }
17692
17693 // Do all replacements after finding the replacements to make to avoid making
17694 // the chains more complicated by introducing new TokenFactors.
17695 for (auto Replacement : BetterChains)
17696 replaceStoreChain(Replacement.first, Replacement.second);
17697
17698 return MadeChangeToSt;
17699}
17700
17701/// This is the entry point for the file.
17702void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
17703 CodeGenOpt::Level OptLevel) {
17704 /// This is the main entry point to this class.
17705 DAGCombiner(*this, AA, OptLevel).Run(Level);
17706}

/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h

1//===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a class to represent arbitrary precision
12/// integral constant values and operations on them.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_ADT_APINT_H
17#define LLVM_ADT_APINT_H
18
19#include "llvm/Support/Compiler.h"
20#include "llvm/Support/MathExtras.h"
21#include <cassert>
22#include <climits>
23#include <cstring>
24#include <string>
25
26namespace llvm {
27class FoldingSetNodeID;
28class StringRef;
29class hash_code;
30class raw_ostream;
31
32template <typename T> class SmallVectorImpl;
33template <typename T> class ArrayRef;
34
35class APInt;
36
37inline APInt operator-(APInt);
38
39//===----------------------------------------------------------------------===//
40// APInt Class
41//===----------------------------------------------------------------------===//
42
43/// \brief Class for arbitrary precision integers.
44///
45/// APInt is a functional replacement for common case unsigned integer type like
46/// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width
47/// integer sizes and large integer value types such as 3-bits, 15-bits, or more
48/// than 64-bits of precision. APInt provides a variety of arithmetic operators
49/// and methods to manipulate integer values of any bit-width. It supports both
50/// the typical integer arithmetic and comparison operations as well as bitwise
51/// manipulation.
52///
53/// The class has several invariants worth noting:
54/// * All bit, byte, and word positions are zero-based.
55/// * Once the bit width is set, it doesn't change except by the Truncate,
56/// SignExtend, or ZeroExtend operations.
57/// * All binary operators must be on APInt instances of the same bit width.
58/// Attempting to use these operators on instances with different bit
59/// widths will yield an assertion.
60/// * The value is stored canonically as an unsigned value. For operations
61/// where it makes a difference, there are both signed and unsigned variants
62/// of the operation. For example, sdiv and udiv. However, because the bit
63/// widths must be the same, operations such as Mul and Add produce the same
64/// results regardless of whether the values are interpreted as signed or
65/// not.
66/// * In general, the class tries to follow the style of computation that LLVM
67/// uses in its IR. This simplifies its use for LLVM.
68///
69class LLVM_NODISCARD[[clang::warn_unused_result]] APInt {
70public:
71 typedef uint64_t WordType;
72
73 /// This enum is used to hold the constants we needed for APInt.
74 enum : unsigned {
75 /// Byte size of a word.
76 APINT_WORD_SIZE = sizeof(WordType),
77 /// Bits in a word.
78 APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8
79 };
80
81 static const WordType WORD_MAX = ~WordType(0);
82
83private:
84 /// This union is used to store the integer value. When the
85 /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
86 union {
87 uint64_t VAL; ///< Used to store the <= 64 bits integer value.
88 uint64_t *pVal; ///< Used to store the >64 bits integer value.
89 } U;
90
91 unsigned BitWidth; ///< The number of bits in this APInt.
92
93 friend struct DenseMapAPIntKeyInfo;
94
95 friend class APSInt;
96
97 /// \brief Fast internal constructor
98 ///
99 /// This constructor is used only internally for speed of construction of
100 /// temporaries. It is unsafe for general use so it is not public.
101 APInt(uint64_t *val, unsigned bits) : BitWidth(bits) {
102 U.pVal = val;
103 }
104
105 /// \brief Determine if this APInt just has one word to store value.
106 ///
107 /// \returns true if the number of bits <= 64, false otherwise.
108 bool isSingleWord() const { return BitWidth <= APINT_BITS_PER_WORD; }
109
110 /// \brief Determine which word a bit is in.
111 ///
112 /// \returns the word position for the specified bit position.
113 static unsigned whichWord(unsigned bitPosition) {
114 return bitPosition / APINT_BITS_PER_WORD;
115 }
116
117 /// \brief Determine which bit in a word a bit is in.
118 ///
119 /// \returns the bit position in a word for the specified bit position
120 /// in the APInt.
121 static unsigned whichBit(unsigned bitPosition) {
122 return bitPosition % APINT_BITS_PER_WORD;
123 }
124
125 /// \brief Get a single bit mask.
126 ///
127 /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
128 /// This method generates and returns a uint64_t (word) mask for a single
129 /// bit at a specific bit position. This is used to mask the bit in the
130 /// corresponding word.
131 static uint64_t maskBit(unsigned bitPosition) {
132 return 1ULL << whichBit(bitPosition);
133 }
134
135 /// \brief Clear unused high order bits
136 ///
137 /// This method is used internally to clear the top "N" bits in the high order
138 /// word that are not used by the APInt. This is needed after the most
139 /// significant word is assigned a value to ensure that those bits are
140 /// zero'd out.
141 APInt &clearUnusedBits() {
142 // Compute how many bits are used in the final word
143 unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1;
144
145 // Mask out the high bits.
146 uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - WordBits);
147 if (isSingleWord())
148 U.VAL &= mask;
149 else
150 U.pVal[getNumWords() - 1] &= mask;
151 return *this;
152 }
153
154 /// \brief Get the word corresponding to a bit position
155 /// \returns the corresponding word for the specified bit position.
156 uint64_t getWord(unsigned bitPosition) const {
157 return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
158 }
159
160 /// Utility method to change the bit width of this APInt to new bit width,
161 /// allocating and/or deallocating as necessary. There is no guarantee on the
162 /// value of any bits upon return. Caller should populate the bits after.
163 void reallocate(unsigned NewBitWidth);
164
165 /// \brief Convert a char array into an APInt
166 ///
167 /// \param radix 2, 8, 10, 16, or 36
168 /// Converts a string into a number. The string must be non-empty
169 /// and well-formed as a number of the given base. The bit-width
170 /// must be sufficient to hold the result.
171 ///
172 /// This is used by the constructors that take string arguments.
173 ///
174 /// StringRef::getAsInteger is superficially similar but (1) does
175 /// not assume that the string is well-formed and (2) grows the
176 /// result to hold the input.
177 void fromString(unsigned numBits, StringRef str, uint8_t radix);
178
179 /// \brief An internal division function for dividing APInts.
180 ///
181 /// This is used by the toString method to divide by the radix. It simply
182 /// provides a more convenient form of divide for internal use since KnuthDiv
183 /// has specific constraints on its inputs. If those constraints are not met
184 /// then it provides a simpler form of divide.
185 static void divide(const WordType *LHS, unsigned lhsWords,
186 const WordType *RHS, unsigned rhsWords, WordType *Quotient,
187 WordType *Remainder);
188
189 /// out-of-line slow case for inline constructor
190 void initSlowCase(uint64_t val, bool isSigned);
191
192 /// shared code between two array constructors
193 void initFromArray(ArrayRef<uint64_t> array);
194
195 /// out-of-line slow case for inline copy constructor
196 void initSlowCase(const APInt &that);
197
198 /// out-of-line slow case for shl
199 void shlSlowCase(unsigned ShiftAmt);
200
201 /// out-of-line slow case for lshr.
202 void lshrSlowCase(unsigned ShiftAmt);
203
204 /// out-of-line slow case for ashr.
205 void ashrSlowCase(unsigned ShiftAmt);
206
207 /// out-of-line slow case for operator=
208 void AssignSlowCase(const APInt &RHS);
209
210 /// out-of-line slow case for operator==
211 bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
212
213 /// out-of-line slow case for countLeadingZeros
214 unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
215
216 /// out-of-line slow case for countLeadingOnes.
217 unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
218
219 /// out-of-line slow case for countTrailingZeros.
220 unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
221
222 /// out-of-line slow case for countTrailingOnes
223 unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
224
225 /// out-of-line slow case for countPopulation
226 unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__));
227
228 /// out-of-line slow case for intersects.
229 bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
230
231 /// out-of-line slow case for isSubsetOf.
232 bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
233
234 /// out-of-line slow case for setBits.
235 void setBitsSlowCase(unsigned loBit, unsigned hiBit);
236
237 /// out-of-line slow case for flipAllBits.
238 void flipAllBitsSlowCase();
239
240 /// out-of-line slow case for operator&=.
241 void AndAssignSlowCase(const APInt& RHS);
242
243 /// out-of-line slow case for operator|=.
244 void OrAssignSlowCase(const APInt& RHS);
245
246 /// out-of-line slow case for operator^=.
247 void XorAssignSlowCase(const APInt& RHS);
248
249 /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
250 /// to, or greater than RHS.
251 int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
252
253 /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
254 /// to, or greater than RHS.
255 int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
256
257public:
258 /// \name Constructors
259 /// @{
260
261 /// \brief Create a new APInt of numBits width, initialized as val.
262 ///
263 /// If isSigned is true then val is treated as if it were a signed value
264 /// (i.e. as an int64_t) and the appropriate sign extension to the bit width
265 /// will be done. Otherwise, no sign extension occurs (high order bits beyond
266 /// the range of val are zero filled).
267 ///
268 /// \param numBits the bit width of the constructed APInt
269 /// \param val the initial value of the APInt
270 /// \param isSigned how to treat signedness of val
271 APInt(unsigned numBits, uint64_t val, bool isSigned = false)
272 : BitWidth(numBits) {
273 assert(BitWidth && "bitwidth too small")(static_cast <bool> (BitWidth && "bitwidth too small"
) ? void (0) : __assert_fail ("BitWidth && \"bitwidth too small\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 273, __extension__ __PRETTY_FUNCTION__))
;
274 if (isSingleWord()) {
275 U.VAL = val;
276 clearUnusedBits();
277 } else {
278 initSlowCase(val, isSigned);
279 }
280 }
281
282 /// \brief Construct an APInt of numBits width, initialized as bigVal[].
283 ///
284 /// Note that bigVal.size() can be smaller or larger than the corresponding
285 /// bit width but any extraneous bits will be dropped.
286 ///
287 /// \param numBits the bit width of the constructed APInt
288 /// \param bigVal a sequence of words to form the initial value of the APInt
289 APInt(unsigned numBits, ArrayRef<uint64_t> bigVal);
290
291 /// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but
292 /// deprecated because this constructor is prone to ambiguity with the
293 /// APInt(unsigned, uint64_t, bool) constructor.
294 ///
295 /// If this overload is ever deleted, care should be taken to prevent calls
296 /// from being incorrectly captured by the APInt(unsigned, uint64_t, bool)
297 /// constructor.
298 APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]);
299
300 /// \brief Construct an APInt from a string representation.
301 ///
302 /// This constructor interprets the string \p str in the given radix. The
303 /// interpretation stops when the first character that is not suitable for the
304 /// radix is encountered, or the end of the string. Acceptable radix values
305 /// are 2, 8, 10, 16, and 36. It is an error for the value implied by the
306 /// string to require more bits than numBits.
307 ///
308 /// \param numBits the bit width of the constructed APInt
309 /// \param str the string to be interpreted
310 /// \param radix the radix to use for the conversion
311 APInt(unsigned numBits, StringRef str, uint8_t radix);
312
313 /// Simply makes *this a copy of that.
314 /// @brief Copy Constructor.
315 APInt(const APInt &that) : BitWidth(that.BitWidth) {
316 if (isSingleWord())
317 U.VAL = that.U.VAL;
318 else
319 initSlowCase(that);
320 }
321
322 /// \brief Move Constructor.
323 APInt(APInt &&that) : BitWidth(that.BitWidth) {
324 memcpy(&U, &that.U, sizeof(U));
325 that.BitWidth = 0;
326 }
327
328 /// \brief Destructor.
329 ~APInt() {
330 if (needsCleanup())
331 delete[] U.pVal;
332 }
333
334 /// \brief Default constructor that creates an uninteresting APInt
335 /// representing a 1-bit zero value.
336 ///
337 /// This is useful for object deserialization (pair this with the static
338 /// method Read).
339 explicit APInt() : BitWidth(1) { U.VAL = 0; }
340
341 /// \brief Returns whether this instance allocated memory.
342 bool needsCleanup() const { return !isSingleWord(); }
343
344 /// Used to insert APInt objects, or objects that contain APInt objects, into
345 /// FoldingSets.
346 void Profile(FoldingSetNodeID &id) const;
347
348 /// @}
349 /// \name Value Tests
350 /// @{
351
352 /// \brief Determine sign of this APInt.
353 ///
354 /// This tests the high bit of this APInt to determine if it is set.
355 ///
356 /// \returns true if this APInt is negative, false otherwise
357 bool isNegative() const { return (*this)[BitWidth - 1]; }
358
359 /// \brief Determine if this APInt Value is non-negative (>= 0)
360 ///
361 /// This tests the high bit of the APInt to determine if it is unset.
362 bool isNonNegative() const { return !isNegative(); }
363
364 /// \brief Determine if sign bit of this APInt is set.
365 ///
366 /// This tests the high bit of this APInt to determine if it is set.
367 ///
368 /// \returns true if this APInt has its sign bit set, false otherwise.
369 bool isSignBitSet() const { return (*this)[BitWidth-1]; }
370
371 /// \brief Determine if sign bit of this APInt is clear.
372 ///
373 /// This tests the high bit of this APInt to determine if it is clear.
374 ///
375 /// \returns true if this APInt has its sign bit clear, false otherwise.
376 bool isSignBitClear() const { return !isSignBitSet(); }
377
378 /// \brief Determine if this APInt Value is positive.
379 ///
380 /// This tests if the value of this APInt is positive (> 0). Note
381 /// that 0 is not a positive value.
382 ///
383 /// \returns true if this APInt is positive.
384 bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
385
386 /// \brief Determine if all bits are set
387 ///
388 /// This checks to see if the value has all bits of the APInt are set or not.
389 bool isAllOnesValue() const {
390 if (isSingleWord())
36
Taking true branch
391 return U.VAL == WORD_MAX >> (APINT_BITS_PER_WORD - BitWidth);
37
The result of the right shift is undefined due to shifting by '64', which is greater or equal to the width of type 'llvm::APInt::WordType'
392 return countTrailingOnesSlowCase() == BitWidth;
393 }
394
395 /// \brief Determine if all bits are clear
396 ///
397 /// This checks to see if the value has all bits of the APInt are clear or
398 /// not.
399 bool isNullValue() const { return !*this; }
400
401 /// \brief Determine if this is a value of 1.
402 ///
403 /// This checks to see if the value of this APInt is one.
404 bool isOneValue() const {
405 if (isSingleWord())
406 return U.VAL == 1;
407 return countLeadingZerosSlowCase() == BitWidth - 1;
408 }
409
410 /// \brief Determine if this is the largest unsigned value.
411 ///
412 /// This checks to see if the value of this APInt is the maximum unsigned
413 /// value for the APInt's bit width.
414 bool isMaxValue() const { return isAllOnesValue(); }
415
416 /// \brief Determine if this is the largest signed value.
417 ///
418 /// This checks to see if the value of this APInt is the maximum signed
419 /// value for the APInt's bit width.
420 bool isMaxSignedValue() const {
421 if (isSingleWord())
422 return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1);
423 return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1;
424 }
425
426 /// \brief Determine if this is the smallest unsigned value.
427 ///
428 /// This checks to see if the value of this APInt is the minimum unsigned
429 /// value for the APInt's bit width.
430 bool isMinValue() const { return isNullValue(); }
431
432 /// \brief Determine if this is the smallest signed value.
433 ///
434 /// This checks to see if the value of this APInt is the minimum signed
435 /// value for the APInt's bit width.
436 bool isMinSignedValue() const {
437 if (isSingleWord())
438 return U.VAL == (WordType(1) << (BitWidth - 1));
439 return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1;
440 }
441
442 /// \brief Check if this APInt has an N-bits unsigned integer value.
443 bool isIntN(unsigned N) const {
444 assert(N && "N == 0 ???")(static_cast <bool> (N && "N == 0 ???") ? void (
0) : __assert_fail ("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 444, __extension__ __PRETTY_FUNCTION__))
;
445 return getActiveBits() <= N;
446 }
447
448 /// \brief Check if this APInt has an N-bits signed integer value.
449 bool isSignedIntN(unsigned N) const {
450 assert(N && "N == 0 ???")(static_cast <bool> (N && "N == 0 ???") ? void (
0) : __assert_fail ("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 450, __extension__ __PRETTY_FUNCTION__))
;
451 return getMinSignedBits() <= N;
452 }
453
454 /// \brief Check if this APInt's value is a power of two greater than zero.
455 ///
456 /// \returns true if the argument APInt value is a power of two > 0.
457 bool isPowerOf2() const {
458 if (isSingleWord())
459 return isPowerOf2_64(U.VAL);
460 return countPopulationSlowCase() == 1;
461 }
462
463 /// \brief Check if the APInt's value is returned by getSignMask.
464 ///
465 /// \returns true if this is the value returned by getSignMask.
466 bool isSignMask() const { return isMinSignedValue(); }
467
468 /// \brief Convert APInt to a boolean value.
469 ///
470 /// This converts the APInt to a boolean value as a test against zero.
471 bool getBoolValue() const { return !!*this; }
472
473 /// If this value is smaller than the specified limit, return it, otherwise
474 /// return the limit value. This causes the value to saturate to the limit.
475 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const {
476 return ugt(Limit) ? Limit : getZExtValue();
477 }
478
479 /// \brief Check if the APInt consists of a repeated bit pattern.
480 ///
481 /// e.g. 0x01010101 satisfies isSplat(8).
482 /// \param SplatSizeInBits The size of the pattern in bits. Must divide bit
483 /// width without remainder.
484 bool isSplat(unsigned SplatSizeInBits) const;
485
486 /// \returns true if this APInt value is a sequence of \param numBits ones
487 /// starting at the least significant bit with the remainder zero.
488 bool isMask(unsigned numBits) const {
489 assert(numBits != 0 && "numBits must be non-zero")(static_cast <bool> (numBits != 0 && "numBits must be non-zero"
) ? void (0) : __assert_fail ("numBits != 0 && \"numBits must be non-zero\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 489, __extension__ __PRETTY_FUNCTION__))
;
490 assert(numBits <= BitWidth && "numBits out of range")(static_cast <bool> (numBits <= BitWidth && "numBits out of range"
) ? void (0) : __assert_fail ("numBits <= BitWidth && \"numBits out of range\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 490, __extension__ __PRETTY_FUNCTION__))
;
491 if (isSingleWord())
492 return U.VAL == (WORD_MAX >> (APINT_BITS_PER_WORD - numBits));
493 unsigned Ones = countTrailingOnesSlowCase();
494 return (numBits == Ones) &&
495 ((Ones + countLeadingZerosSlowCase()) == BitWidth);
496 }
497
498 /// \returns true if this APInt is a non-empty sequence of ones starting at
499 /// the least significant bit with the remainder zero.
500 /// Ex. isMask(0x0000FFFFU) == true.
501 bool isMask() const {
502 if (isSingleWord())
503 return isMask_64(U.VAL);
504 unsigned Ones = countTrailingOnesSlowCase();
505 return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
506 }
507
508 /// \brief Return true if this APInt value contains a sequence of ones with
509 /// the remainder zero.
510 bool isShiftedMask() const {
511 if (isSingleWord())
512 return isShiftedMask_64(U.VAL);
513 unsigned Ones = countPopulationSlowCase();
514 unsigned LeadZ = countLeadingZerosSlowCase();
515 return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
516 }
517
518 /// @}
519 /// \name Value Generators
520 /// @{
521
522 /// \brief Gets maximum unsigned value of APInt for specific bit width.
523 static APInt getMaxValue(unsigned numBits) {
524 return getAllOnesValue(numBits);
525 }
526
527 /// \brief Gets maximum signed value of APInt for a specific bit width.
528 static APInt getSignedMaxValue(unsigned numBits) {
529 APInt API = getAllOnesValue(numBits);
530 API.clearBit(numBits - 1);
531 return API;
532 }
533
534 /// \brief Gets minimum unsigned value of APInt for a specific bit width.
535 static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
536
537 /// \brief Gets minimum signed value of APInt for a specific bit width.
538 static APInt getSignedMinValue(unsigned numBits) {
539 APInt API(numBits, 0);
540 API.setBit(numBits - 1);
541 return API;
542 }
543
544 /// \brief Get the SignMask for a specific bit width.
545 ///
546 /// This is just a wrapper function of getSignedMinValue(), and it helps code
547 /// readability when we want to get a SignMask.
548 static APInt getSignMask(unsigned BitWidth) {
549 return getSignedMinValue(BitWidth);
550 }
551
552 /// \brief Get the all-ones value.
553 ///
554 /// \returns the all-ones value for an APInt of the specified bit-width.
555 static APInt getAllOnesValue(unsigned numBits) {
556 return APInt(numBits, WORD_MAX, true);
557 }
558
559 /// \brief Get the '0' value.
560 ///
561 /// \returns the '0' value for an APInt of the specified bit-width.
562 static APInt getNullValue(unsigned numBits) { return APInt(numBits, 0); }
563
564 /// \brief Compute an APInt containing numBits highbits from this APInt.
565 ///
566 /// Get an APInt with the same BitWidth as this APInt, just zero mask
567 /// the low bits and right shift to the least significant bit.
568 ///
569 /// \returns the high "numBits" bits of this APInt.
570 APInt getHiBits(unsigned numBits) const;
571
572 /// \brief Compute an APInt containing numBits lowbits from this APInt.
573 ///
574 /// Get an APInt with the same BitWidth as this APInt, just zero mask
575 /// the high bits.
576 ///
577 /// \returns the low "numBits" bits of this APInt.
578 APInt getLoBits(unsigned numBits) const;
579
580 /// \brief Return an APInt with exactly one bit set in the result.
581 static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
582 APInt Res(numBits, 0);
583 Res.setBit(BitNo);
584 return Res;
585 }
586
587 /// \brief Get a value with a block of bits set.
588 ///
589 /// Constructs an APInt value that has a contiguous range of bits set. The
590 /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
591 /// bits will be zero. For example, with parameters(32, 0, 16) you would get
592 /// 0x0000FFFF. If hiBit is less than loBit then the set bits "wrap". For
593 /// example, with parameters (32, 28, 4), you would get 0xF000000F.
594 ///
595 /// \param numBits the intended bit width of the result
596 /// \param loBit the index of the lowest bit set.
597 /// \param hiBit the index of the highest bit set.
598 ///
599 /// \returns An APInt value with the requested bits set.
600 static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
601 APInt Res(numBits, 0);
602 Res.setBits(loBit, hiBit);
603 return Res;
604 }
605
606 /// \brief Get a value with upper bits starting at loBit set.
607 ///
608 /// Constructs an APInt value that has a contiguous range of bits set. The
609 /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
610 /// bits will be zero. For example, with parameters(32, 12) you would get
611 /// 0xFFFFF000.
612 ///
613 /// \param numBits the intended bit width of the result
614 /// \param loBit the index of the lowest bit to set.
615 ///
616 /// \returns An APInt value with the requested bits set.
617 static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
618 APInt Res(numBits, 0);
619 Res.setBitsFrom(loBit);
620 return Res;
621 }
622
623 /// \brief Get a value with high bits set
624 ///
625 /// Constructs an APInt value that has the top hiBitsSet bits set.
626 ///
627 /// \param numBits the bitwidth of the result
628 /// \param hiBitsSet the number of high-order bits set in the result.
629 static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
630 APInt Res(numBits, 0);
631 Res.setHighBits(hiBitsSet);
632 return Res;
633 }
634
635 /// \brief Get a value with low bits set
636 ///
637 /// Constructs an APInt value that has the bottom loBitsSet bits set.
638 ///
639 /// \param numBits the bitwidth of the result
640 /// \param loBitsSet the number of low-order bits set in the result.
641 static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
642 APInt Res(numBits, 0);
643 Res.setLowBits(loBitsSet);
644 return Res;
645 }
646
647 /// \brief Return a value containing V broadcasted over NewLen bits.
648 static APInt getSplat(unsigned NewLen, const APInt &V);
649
650 /// \brief Determine if two APInts have the same value, after zero-extending
651 /// one of them (if needed!) to ensure that the bit-widths match.
652 static bool isSameValue(const APInt &I1, const APInt &I2) {
653 if (I1.getBitWidth() == I2.getBitWidth())
654 return I1 == I2;
655
656 if (I1.getBitWidth() > I2.getBitWidth())
657 return I1 == I2.zext(I1.getBitWidth());
658
659 return I1.zext(I2.getBitWidth()) == I2;
660 }
661
662 /// \brief Overload to compute a hash_code for an APInt value.
663 friend hash_code hash_value(const APInt &Arg);
664
665 /// This function returns a pointer to the internal storage of the APInt.
666 /// This is useful for writing out the APInt in binary form without any
667 /// conversions.
668 const uint64_t *getRawData() const {
669 if (isSingleWord())
670 return &U.VAL;
671 return &U.pVal[0];
672 }
673
674 /// @}
675 /// \name Unary Operators
676 /// @{
677
678 /// \brief Postfix increment operator.
679 ///
680 /// Increments *this by 1.
681 ///
682 /// \returns a new APInt value representing the original value of *this.
683 const APInt operator++(int) {
684 APInt API(*this);
685 ++(*this);
686 return API;
687 }
688
689 /// \brief Prefix increment operator.
690 ///
691 /// \returns *this incremented by one
692 APInt &operator++();
693
694 /// \brief Postfix decrement operator.
695 ///
696 /// Decrements *this by 1.
697 ///
698 /// \returns a new APInt value representing the original value of *this.
699 const APInt operator--(int) {
700 APInt API(*this);
701 --(*this);
702 return API;
703 }
704
705 /// \brief Prefix decrement operator.
706 ///
707 /// \returns *this decremented by one.
708 APInt &operator--();
709
710 /// \brief Logical negation operator.
711 ///
712 /// Performs logical negation operation on this APInt.
713 ///
714 /// \returns true if *this is zero, false otherwise.
715 bool operator!() const {
716 if (isSingleWord())
717 return U.VAL == 0;
718 return countLeadingZerosSlowCase() == BitWidth;
719 }
720
721 /// @}
722 /// \name Assignment Operators
723 /// @{
724
725 /// \brief Copy assignment operator.
726 ///
727 /// \returns *this after assignment of RHS.
728 APInt &operator=(const APInt &RHS) {
729 // If the bitwidths are the same, we can avoid mucking with memory
730 if (isSingleWord() && RHS.isSingleWord()) {
731 U.VAL = RHS.U.VAL;
732 BitWidth = RHS.BitWidth;
733 return clearUnusedBits();
734 }
735
736 AssignSlowCase(RHS);
737 return *this;
738 }
739
740 /// @brief Move assignment operator.
741 APInt &operator=(APInt &&that) {
742 assert(this != &that && "Self-move not supported")(static_cast <bool> (this != &that && "Self-move not supported"
) ? void (0) : __assert_fail ("this != &that && \"Self-move not supported\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 742, __extension__ __PRETTY_FUNCTION__))
;
743 if (!isSingleWord())
744 delete[] U.pVal;
745
746 // Use memcpy so that type based alias analysis sees both VAL and pVal
747 // as modified.
748 memcpy(&U, &that.U, sizeof(U));
749
750 BitWidth = that.BitWidth;
751 that.BitWidth = 0;
752
753 return *this;
754 }
755
756 /// \brief Assignment operator.
757 ///
758 /// The RHS value is assigned to *this. If the significant bits in RHS exceed
759 /// the bit width, the excess bits are truncated. If the bit width is larger
760 /// than 64, the value is zero filled in the unspecified high order bits.
761 ///
762 /// \returns *this after assignment of RHS value.
763 APInt &operator=(uint64_t RHS) {
764 if (isSingleWord()) {
765 U.VAL = RHS;
766 clearUnusedBits();
767 } else {
768 U.pVal[0] = RHS;
769 memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
770 }
771 return *this;
772 }
773
774 /// \brief Bitwise AND assignment operator.
775 ///
776 /// Performs a bitwise AND operation on this APInt and RHS. The result is
777 /// assigned to *this.
778 ///
779 /// \returns *this after ANDing with RHS.
780 APInt &operator&=(const APInt &RHS) {
781 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 781, __extension__ __PRETTY_FUNCTION__))
;
782 if (isSingleWord())
783 U.VAL &= RHS.U.VAL;
784 else
785 AndAssignSlowCase(RHS);
786 return *this;
787 }
788
789 /// \brief Bitwise AND assignment operator.
790 ///
791 /// Performs a bitwise AND operation on this APInt and RHS. RHS is
792 /// logically zero-extended or truncated to match the bit-width of
793 /// the LHS.
794 APInt &operator&=(uint64_t RHS) {
795 if (isSingleWord()) {
796 U.VAL &= RHS;
797 return *this;
798 }
799 U.pVal[0] &= RHS;
800 memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
801 return *this;
802 }
803
804 /// \brief Bitwise OR assignment operator.
805 ///
806 /// Performs a bitwise OR operation on this APInt and RHS. The result is
807 /// assigned *this;
808 ///
809 /// \returns *this after ORing with RHS.
810 APInt &operator|=(const APInt &RHS) {
811 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 811, __extension__ __PRETTY_FUNCTION__))
;
812 if (isSingleWord())
813 U.VAL |= RHS.U.VAL;
814 else
815 OrAssignSlowCase(RHS);
816 return *this;
817 }
818
819 /// \brief Bitwise OR assignment operator.
820 ///
821 /// Performs a bitwise OR operation on this APInt and RHS. RHS is
822 /// logically zero-extended or truncated to match the bit-width of
823 /// the LHS.
824 APInt &operator|=(uint64_t RHS) {
825 if (isSingleWord()) {
826 U.VAL |= RHS;
827 clearUnusedBits();
828 } else {
829 U.pVal[0] |= RHS;
830 }
831 return *this;
832 }
833
834 /// \brief Bitwise XOR assignment operator.
835 ///
836 /// Performs a bitwise XOR operation on this APInt and RHS. The result is
837 /// assigned to *this.
838 ///
839 /// \returns *this after XORing with RHS.
840 APInt &operator^=(const APInt &RHS) {
841 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 841, __extension__ __PRETTY_FUNCTION__))
;
842 if (isSingleWord())
843 U.VAL ^= RHS.U.VAL;
844 else
845 XorAssignSlowCase(RHS);
846 return *this;
847 }
848
849 /// \brief Bitwise XOR assignment operator.
850 ///
851 /// Performs a bitwise XOR operation on this APInt and RHS. RHS is
852 /// logically zero-extended or truncated to match the bit-width of
853 /// the LHS.
854 APInt &operator^=(uint64_t RHS) {
855 if (isSingleWord()) {
856 U.VAL ^= RHS;
857 clearUnusedBits();
858 } else {
859 U.pVal[0] ^= RHS;
860 }
861 return *this;
862 }
863
864 /// \brief Multiplication assignment operator.
865 ///
866 /// Multiplies this APInt by RHS and assigns the result to *this.
867 ///
868 /// \returns *this
869 APInt &operator*=(const APInt &RHS);
870 APInt &operator*=(uint64_t RHS);
871
872 /// \brief Addition assignment operator.
873 ///
874 /// Adds RHS to *this and assigns the result to *this.
875 ///
876 /// \returns *this
877 APInt &operator+=(const APInt &RHS);
878 APInt &operator+=(uint64_t RHS);
879
880 /// \brief Subtraction assignment operator.
881 ///
882 /// Subtracts RHS from *this and assigns the result to *this.
883 ///
884 /// \returns *this
885 APInt &operator-=(const APInt &RHS);
886 APInt &operator-=(uint64_t RHS);
887
888 /// \brief Left-shift assignment function.
889 ///
890 /// Shifts *this left by shiftAmt and assigns the result to *this.
891 ///
892 /// \returns *this after shifting left by ShiftAmt
893 APInt &operator<<=(unsigned ShiftAmt) {
894 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 894, __extension__ __PRETTY_FUNCTION__))
;
895 if (isSingleWord()) {
896 if (ShiftAmt == BitWidth)
897 U.VAL = 0;
898 else
899 U.VAL <<= ShiftAmt;
900 return clearUnusedBits();
901 }
902 shlSlowCase(ShiftAmt);
903 return *this;
904 }
905
906 /// \brief Left-shift assignment function.
907 ///
908 /// Shifts *this left by shiftAmt and assigns the result to *this.
909 ///
910 /// \returns *this after shifting left by ShiftAmt
911 APInt &operator<<=(const APInt &ShiftAmt);
912
913 /// @}
914 /// \name Binary Operators
915 /// @{
916
917 /// \brief Multiplication operator.
918 ///
919 /// Multiplies this APInt by RHS and returns the result.
920 APInt operator*(const APInt &RHS) const;
921
922 /// \brief Left logical shift operator.
923 ///
924 /// Shifts this APInt left by \p Bits and returns the result.
925 APInt operator<<(unsigned Bits) const { return shl(Bits); }
926
927 /// \brief Left logical shift operator.
928 ///
929 /// Shifts this APInt left by \p Bits and returns the result.
930 APInt operator<<(const APInt &Bits) const { return shl(Bits); }
931
932 /// \brief Arithmetic right-shift function.
933 ///
934 /// Arithmetic right-shift this APInt by shiftAmt.
935 APInt ashr(unsigned ShiftAmt) const {
936 APInt R(*this);
937 R.ashrInPlace(ShiftAmt);
938 return R;
939 }
940
941 /// Arithmetic right-shift this APInt by ShiftAmt in place.
942 void ashrInPlace(unsigned ShiftAmt) {
943 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 943, __extension__ __PRETTY_FUNCTION__))
;
944 if (isSingleWord()) {
945 int64_t SExtVAL = SignExtend64(U.VAL, BitWidth);
946 if (ShiftAmt == BitWidth)
947 U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit.
948 else
949 U.VAL = SExtVAL >> ShiftAmt;
950 clearUnusedBits();
951 return;
952 }
953 ashrSlowCase(ShiftAmt);
954 }
955
956 /// \brief Logical right-shift function.
957 ///
958 /// Logical right-shift this APInt by shiftAmt.
959 APInt lshr(unsigned shiftAmt) const {
960 APInt R(*this);
961 R.lshrInPlace(shiftAmt);
962 return R;
963 }
964
965 /// Logical right-shift this APInt by ShiftAmt in place.
966 void lshrInPlace(unsigned ShiftAmt) {
967 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 967, __extension__ __PRETTY_FUNCTION__))
;
968 if (isSingleWord()) {
969 if (ShiftAmt == BitWidth)
970 U.VAL = 0;
971 else
972 U.VAL >>= ShiftAmt;
973 return;
974 }
975 lshrSlowCase(ShiftAmt);
976 }
977
978 /// \brief Left-shift function.
979 ///
980 /// Left-shift this APInt by shiftAmt.
981 APInt shl(unsigned shiftAmt) const {
982 APInt R(*this);
983 R <<= shiftAmt;
984 return R;
985 }
986
987 /// \brief Rotate left by rotateAmt.
988 APInt rotl(unsigned rotateAmt) const;
989
990 /// \brief Rotate right by rotateAmt.
991 APInt rotr(unsigned rotateAmt) const;
992
993 /// \brief Arithmetic right-shift function.
994 ///
995 /// Arithmetic right-shift this APInt by shiftAmt.
996 APInt ashr(const APInt &ShiftAmt) const {
997 APInt R(*this);
998 R.ashrInPlace(ShiftAmt);
999 return R;
1000 }
1001
1002 /// Arithmetic right-shift this APInt by shiftAmt in place.
1003 void ashrInPlace(const APInt &shiftAmt);
1004
1005 /// \brief Logical right-shift function.
1006 ///
1007 /// Logical right-shift this APInt by shiftAmt.
1008 APInt lshr(const APInt &ShiftAmt) const {
1009 APInt R(*this);
1010 R.lshrInPlace(ShiftAmt);
1011 return R;
1012 }
1013
1014 /// Logical right-shift this APInt by ShiftAmt in place.
1015 void lshrInPlace(const APInt &ShiftAmt);
1016
1017 /// \brief Left-shift function.
1018 ///
1019 /// Left-shift this APInt by shiftAmt.
1020 APInt shl(const APInt &ShiftAmt) const {
1021 APInt R(*this);
1022 R <<= ShiftAmt;
1023 return R;
1024 }
1025
1026 /// \brief Rotate left by rotateAmt.
1027 APInt rotl(const APInt &rotateAmt) const;
1028
1029 /// \brief Rotate right by rotateAmt.
1030 APInt rotr(const APInt &rotateAmt) const;
1031
1032 /// \brief Unsigned division operation.
1033 ///
1034 /// Perform an unsigned divide operation on this APInt by RHS. Both this and
1035 /// RHS are treated as unsigned quantities for purposes of this division.
1036 ///
1037 /// \returns a new APInt value containing the division result
1038 APInt udiv(const APInt &RHS) const;
1039 APInt udiv(uint64_t RHS) const;
1040
1041 /// \brief Signed division function for APInt.
1042 ///
1043 /// Signed divide this APInt by APInt RHS.
1044 APInt sdiv(const APInt &RHS) const;
1045 APInt sdiv(int64_t RHS) const;
1046
1047 /// \brief Unsigned remainder operation.
1048 ///
1049 /// Perform an unsigned remainder operation on this APInt with RHS being the
1050 /// divisor. Both this and RHS are treated as unsigned quantities for purposes
1051 /// of this operation. Note that this is a true remainder operation and not a
1052 /// modulo operation because the sign follows the sign of the dividend which
1053 /// is *this.
1054 ///
1055 /// \returns a new APInt value containing the remainder result
1056 APInt urem(const APInt &RHS) const;
1057 uint64_t urem(uint64_t RHS) const;
1058
1059 /// \brief Function for signed remainder operation.
1060 ///
1061 /// Signed remainder operation on APInt.
1062 APInt srem(const APInt &RHS) const;
1063 int64_t srem(int64_t RHS) const;
1064
1065 /// \brief Dual division/remainder interface.
1066 ///
1067 /// Sometimes it is convenient to divide two APInt values and obtain both the
1068 /// quotient and remainder. This function does both operations in the same
1069 /// computation making it a little more efficient. The pair of input arguments
1070 /// may overlap with the pair of output arguments. It is safe to call
1071 /// udivrem(X, Y, X, Y), for example.
1072 static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
1073 APInt &Remainder);
1074 static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1075 uint64_t &Remainder);
1076
1077 static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
1078 APInt &Remainder);
1079 static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient,
1080 int64_t &Remainder);
1081
1082 // Operations that return overflow indicators.
1083 APInt sadd_ov(const APInt &RHS, bool &Overflow) const;
1084 APInt uadd_ov(const APInt &RHS, bool &Overflow) const;
1085 APInt ssub_ov(const APInt &RHS, bool &Overflow) const;
1086 APInt usub_ov(const APInt &RHS, bool &Overflow) const;
1087 APInt sdiv_ov(const APInt &RHS, bool &Overflow) const;
1088 APInt smul_ov(const APInt &RHS, bool &Overflow) const;
1089 APInt umul_ov(const APInt &RHS, bool &Overflow) const;
1090 APInt sshl_ov(const APInt &Amt, bool &Overflow) const;
1091 APInt ushl_ov(const APInt &Amt, bool &Overflow) const;
1092
1093 /// \brief Array-indexing support.
1094 ///
1095 /// \returns the bit value at bitPosition
1096 bool operator[](unsigned bitPosition) const {
1097 assert(bitPosition < getBitWidth() && "Bit position out of bounds!")(static_cast <bool> (bitPosition < getBitWidth() &&
"Bit position out of bounds!") ? void (0) : __assert_fail ("bitPosition < getBitWidth() && \"Bit position out of bounds!\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 1097, __extension__ __PRETTY_FUNCTION__))
;
1098 return (maskBit(bitPosition) & getWord(bitPosition)) != 0;
1099 }
1100
1101 /// @}
1102 /// \name Comparison Operators
1103 /// @{
1104
1105 /// \brief Equality operator.
1106 ///
1107 /// Compares this APInt with RHS for the validity of the equality
1108 /// relationship.
1109 bool operator==(const APInt &RHS) const {
1110 assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Comparison requires equal bit widths") ? void (0) : __assert_fail
("BitWidth == RHS.BitWidth && \"Comparison requires equal bit widths\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 1110, __extension__ __PRETTY_FUNCTION__))
;
1111 if (isSingleWord())
1112 return U.VAL == RHS.U.VAL;
1113 return EqualSlowCase(RHS);
1114 }
1115
1116 /// \brief Equality operator.
1117 ///
1118 /// Compares this APInt with a uint64_t for the validity of the equality
1119 /// relationship.
1120 ///
1121 /// \returns true if *this == Val
1122 bool operator==(uint64_t Val) const {
1123 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val;
1124 }
1125
1126 /// \brief Equality comparison.
1127 ///
1128 /// Compares this APInt with RHS for the validity of the equality
1129 /// relationship.
1130 ///
1131 /// \returns true if *this == Val
1132 bool eq(const APInt &RHS) const { return (*this) == RHS; }
1133
1134 /// \brief Inequality operator.
1135 ///
1136 /// Compares this APInt with RHS for the validity of the inequality
1137 /// relationship.
1138 ///
1139 /// \returns true if *this != Val
1140 bool operator!=(const APInt &RHS) const { return !((*this) == RHS); }
1141
1142 /// \brief Inequality operator.
1143 ///
1144 /// Compares this APInt with a uint64_t for the validity of the inequality
1145 /// relationship.
1146 ///
1147 /// \returns true if *this != Val
1148 bool operator!=(uint64_t Val) const { return !((*this) == Val); }
1149
1150 /// \brief Inequality comparison
1151 ///
1152 /// Compares this APInt with RHS for the validity of the inequality
1153 /// relationship.
1154 ///
1155 /// \returns true if *this != Val
1156 bool ne(const APInt &RHS) const { return !((*this) == RHS); }
1157
1158 /// \brief Unsigned less than comparison
1159 ///
1160 /// Regards both *this and RHS as unsigned quantities and compares them for
1161 /// the validity of the less-than relationship.
1162 ///
1163 /// \returns true if *this < RHS when both are considered unsigned.
1164 bool ult(const APInt &RHS) const { return compare(RHS) < 0; }
1165
1166 /// \brief Unsigned less than comparison
1167 ///
1168 /// Regards both *this as an unsigned quantity and compares it with RHS for
1169 /// the validity of the less-than relationship.
1170 ///
1171 /// \returns true if *this < RHS when considered unsigned.
1172 bool ult(uint64_t RHS) const {
1173 // Only need to check active bits if not a single word.
1174 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS;
1175 }
1176
1177 /// \brief Signed less than comparison
1178 ///
1179 /// Regards both *this and RHS as signed quantities and compares them for
1180 /// validity of the less-than relationship.
1181 ///
1182 /// \returns true if *this < RHS when both are considered signed.
1183 bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; }
1184
1185 /// \brief Signed less than comparison
1186 ///
1187 /// Regards both *this as a signed quantity and compares it with RHS for
1188 /// the validity of the less-than relationship.
1189 ///
1190 /// \returns true if *this < RHS when considered signed.
1191 bool slt(int64_t RHS) const {
1192 return (!isSingleWord() && getMinSignedBits() > 64) ? isNegative()
1193 : getSExtValue() < RHS;
1194 }
1195
1196 /// \brief Unsigned less or equal comparison
1197 ///
1198 /// Regards both *this and RHS as unsigned quantities and compares them for
1199 /// validity of the less-or-equal relationship.
1200 ///
1201 /// \returns true if *this <= RHS when both are considered unsigned.
1202 bool ule(const APInt &RHS) const { return compare(RHS) <= 0; }
1203
1204 /// \brief Unsigned less or equal comparison
1205 ///
1206 /// Regards both *this as an unsigned quantity and compares it with RHS for
1207 /// the validity of the less-or-equal relationship.
1208 ///
1209 /// \returns true if *this <= RHS when considered unsigned.
1210 bool ule(uint64_t RHS) const { return !ugt(RHS); }
1211
1212 /// \brief Signed less or equal comparison
1213 ///
1214 /// Regards both *this and RHS as signed quantities and compares them for
1215 /// validity of the less-or-equal relationship.
1216 ///
1217 /// \returns true if *this <= RHS when both are considered signed.
1218 bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; }
1219
1220 /// \brief Signed less or equal comparison
1221 ///
1222 /// Regards both *this as a signed quantity and compares it with RHS for the
1223 /// validity of the less-or-equal relationship.
1224 ///
1225 /// \returns true if *this <= RHS when considered signed.
1226 bool sle(uint64_t RHS) const { return !sgt(RHS); }
1227
1228 /// \brief Unsigned greather than comparison
1229 ///
1230 /// Regards both *this and RHS as unsigned quantities and compares them for
1231 /// the validity of the greater-than relationship.
1232 ///
1233 /// \returns true if *this > RHS when both are considered unsigned.
1234 bool ugt(const APInt &RHS) const { return !ule(RHS); }
1235
1236 /// \brief Unsigned greater than comparison
1237 ///
1238 /// Regards both *this as an unsigned quantity and compares it with RHS for
1239 /// the validity of the greater-than relationship.
1240 ///
1241 /// \returns true if *this > RHS when considered unsigned.
1242 bool ugt(uint64_t RHS) const {
1243 // Only need to check active bits if not a single word.
1244 return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS;
1245 }
1246
1247 /// \brief Signed greather than comparison
1248 ///
1249 /// Regards both *this and RHS as signed quantities and compares them for the
1250 /// validity of the greater-than relationship.
1251 ///
1252 /// \returns true if *this > RHS when both are considered signed.
1253 bool sgt(const APInt &RHS) const { return !sle(RHS); }
1254
1255 /// \brief Signed greater than comparison
1256 ///
1257 /// Regards both *this as a signed quantity and compares it with RHS for
1258 /// the validity of the greater-than relationship.
1259 ///
1260 /// \returns true if *this > RHS when considered signed.
1261 bool sgt(int64_t RHS) const {
1262 return (!isSingleWord() && getMinSignedBits() > 64) ? !isNegative()
1263 : getSExtValue() > RHS;
1264 }
1265
1266 /// \brief Unsigned greater or equal comparison
1267 ///
1268 /// Regards both *this and RHS as unsigned quantities and compares them for
1269 /// validity of the greater-or-equal relationship.
1270 ///
1271 /// \returns true if *this >= RHS when both are considered unsigned.
1272 bool uge(const APInt &RHS) const { return !ult(RHS); }
1273
1274 /// \brief Unsigned greater or equal comparison
1275 ///
1276 /// Regards both *this as an unsigned quantity and compares it with RHS for
1277 /// the validity of the greater-or-equal relationship.
1278 ///
1279 /// \returns true if *this >= RHS when considered unsigned.
1280 bool uge(uint64_t RHS) const { return !ult(RHS); }
1281
1282 /// \brief Signed greater or equal comparison
1283 ///
1284 /// Regards both *this and RHS as signed quantities and compares them for
1285 /// validity of the greater-or-equal relationship.
1286 ///
1287 /// \returns true if *this >= RHS when both are considered signed.
1288 bool sge(const APInt &RHS) const { return !slt(RHS); }
1289
1290 /// \brief Signed greater or equal comparison
1291 ///
1292 /// Regards both *this as a signed quantity and compares it with RHS for
1293 /// the validity of the greater-or-equal relationship.
1294 ///
1295 /// \returns true if *this >= RHS when considered signed.
1296 bool sge(int64_t RHS) const { return !slt(RHS); }
1297
1298 /// This operation tests if there are any pairs of corresponding bits
1299 /// between this APInt and RHS that are both set.
1300 bool intersects(const APInt &RHS) const {
1301 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 1301, __extension__ __PRETTY_FUNCTION__))
;
1302 if (isSingleWord())
1303 return (U.VAL & RHS.U.VAL) != 0;
1304 return intersectsSlowCase(RHS);
1305 }
1306
1307 /// This operation checks that all bits set in this APInt are also set in RHS.
1308 bool isSubsetOf(const APInt &RHS) const {
1309 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 1309, __extension__ __PRETTY_FUNCTION__))
;
1310 if (isSingleWord())
1311 return (U.VAL & ~RHS.U.VAL) == 0;
1312 return isSubsetOfSlowCase(RHS);
1313 }
1314
1315 /// @}
1316 /// \name Resizing Operators
1317 /// @{
1318
1319 /// \brief Truncate to new width.
1320 ///
1321 /// Truncate the APInt to a specified width. It is an error to specify a width
1322 /// that is greater than or equal to the current width.
1323 APInt trunc(unsigned width) const;
1324
1325 /// \brief Sign extend to a new width.
1326 ///
1327 /// This operation sign extends the APInt to a new width. If the high order
1328 /// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
1329 /// It is an error to specify a width that is less than or equal to the
1330 /// current width.
1331 APInt sext(unsigned width) const;
1332
1333 /// \brief Zero extend to a new width.
1334 ///
1335 /// This operation zero extends the APInt to a new width. The high order bits
1336 /// are filled with 0 bits. It is an error to specify a width that is less
1337 /// than or equal to the current width.
1338 APInt zext(unsigned width) const;
1339
1340 /// \brief Sign extend or truncate to width
1341 ///
1342 /// Make this APInt have the bit width given by \p width. The value is sign
1343 /// extended, truncated, or left alone to make it that width.
1344 APInt sextOrTrunc(unsigned width) const;
1345
1346 /// \brief Zero extend or truncate to width
1347 ///
1348 /// Make this APInt have the bit width given by \p width. The value is zero
1349 /// extended, truncated, or left alone to make it that width.
1350 APInt zextOrTrunc(unsigned width) const;
1351
1352 /// \brief Sign extend or truncate to width
1353 ///
1354 /// Make this APInt have the bit width given by \p width. The value is sign
1355 /// extended, or left alone to make it that width.
1356 APInt sextOrSelf(unsigned width) const;
1357
1358 /// \brief Zero extend or truncate to width
1359 ///
1360 /// Make this APInt have the bit width given by \p width. The value is zero
1361 /// extended, or left alone to make it that width.
1362 APInt zextOrSelf(unsigned width) const;
1363
1364 /// @}
1365 /// \name Bit Manipulation Operators
1366 /// @{
1367
1368 /// \brief Set every bit to 1.
1369 void setAllBits() {
1370 if (isSingleWord())
1371 U.VAL = WORD_MAX;
1372 else
1373 // Set all the bits in all the words.
1374 memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE);
1375 // Clear the unused ones
1376 clearUnusedBits();
1377 }
1378
1379 /// \brief Set a given bit to 1.
1380 ///
1381 /// Set the given bit to 1 whose position is given as "bitPosition".
1382 void setBit(unsigned BitPosition) {
1383 assert(BitPosition <= BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition <= BitWidth &&
"BitPosition out of range") ? void (0) : __assert_fail ("BitPosition <= BitWidth && \"BitPosition out of range\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 1383, __extension__ __PRETTY_FUNCTION__))
;
1384 WordType Mask = maskBit(BitPosition);
1385 if (isSingleWord())
1386 U.VAL |= Mask;
1387 else
1388 U.pVal[whichWord(BitPosition)] |= Mask;
1389 }
1390
1391 /// Set the sign bit to 1.
1392 void setSignBit() {
1393 setBit(BitWidth - 1);
1394 }
1395
1396 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1397 void setBits(unsigned loBit, unsigned hiBit) {
1398 assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range"
) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 1398, __extension__ __PRETTY_FUNCTION__))
;
1399 assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range"
) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 1399, __extension__ __PRETTY_FUNCTION__))
;
1400 assert(loBit <= hiBit && "loBit greater than hiBit")(static_cast <bool> (loBit <= hiBit && "loBit greater than hiBit"
) ? void (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 1400, __extension__ __PRETTY_FUNCTION__))
;
1401 if (loBit == hiBit)
1402 return;
1403 if (loBit < APINT_BITS_PER_WORD && hiBit <= APINT_BITS_PER_WORD) {
1404 uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
1405 mask <<= loBit;
1406 if (isSingleWord())
1407 U.VAL |= mask;
1408 else
1409 U.pVal[0] |= mask;
1410 } else {
1411 setBitsSlowCase(loBit, hiBit);
1412 }
1413 }
1414
1415 /// Set the top bits starting from loBit.
1416 void setBitsFrom(unsigned loBit) {
1417 return setBits(loBit, BitWidth);
1418 }
1419
1420 /// Set the bottom loBits bits.
1421 void setLowBits(unsigned loBits) {
1422 return setBits(0, loBits);
1423 }
1424
1425 /// Set the top hiBits bits.
1426 void setHighBits(unsigned hiBits) {
1427 return setBits(BitWidth - hiBits, BitWidth);
1428 }
1429
1430 /// \brief Set every bit to 0.
1431 void clearAllBits() {
1432 if (isSingleWord())
1433 U.VAL = 0;
1434 else
1435 memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE);
1436 }
1437
1438 /// \brief Set a given bit to 0.
1439 ///
1440 /// Set the given bit to 0 whose position is given as "bitPosition".
1441 void clearBit(unsigned BitPosition) {
1442 assert(BitPosition <= BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition <= BitWidth &&
"BitPosition out of range") ? void (0) : __assert_fail ("BitPosition <= BitWidth && \"BitPosition out of range\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 1442, __extension__ __PRETTY_FUNCTION__))
;
1443 WordType Mask = ~maskBit(BitPosition);
1444 if (isSingleWord())
1445 U.VAL &= Mask;
1446 else
1447 U.pVal[whichWord(BitPosition)] &= Mask;
1448 }
1449
1450 /// Set the sign bit to 0.
1451 void clearSignBit() {
1452 clearBit(BitWidth - 1);
1453 }
1454
1455 /// \brief Toggle every bit to its opposite value.
1456 void flipAllBits() {
1457 if (isSingleWord()) {
1458 U.VAL ^= WORD_MAX;
1459 clearUnusedBits();
1460 } else {
1461 flipAllBitsSlowCase();
1462 }
1463 }
1464
1465 /// \brief Toggles a given bit to its opposite value.
1466 ///
1467 /// Toggle a given bit to its opposite value whose position is given
1468 /// as "bitPosition".
1469 void flipBit(unsigned bitPosition);
1470
1471 /// Negate this APInt in place.
1472 void negate() {
1473 flipAllBits();
1474 ++(*this);
1475 }
1476
1477 /// Insert the bits from a smaller APInt starting at bitPosition.
1478 void insertBits(const APInt &SubBits, unsigned bitPosition);
1479
1480 /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
1481 APInt extractBits(unsigned numBits, unsigned bitPosition) const;
1482
1483 /// @}
1484 /// \name Value Characterization Functions
1485 /// @{
1486
1487 /// \brief Return the number of bits in the APInt.
1488 unsigned getBitWidth() const { return BitWidth; }
1489
1490 /// \brief Get the number of words.
1491 ///
1492 /// Here one word's bitwidth equals to that of uint64_t.
1493 ///
1494 /// \returns the number of words to hold the integer value of this APInt.
1495 unsigned getNumWords() const { return getNumWords(BitWidth); }
1496
1497 /// \brief Get the number of words.
1498 ///
1499 /// *NOTE* Here one word's bitwidth equals to that of uint64_t.
1500 ///
1501 /// \returns the number of words to hold the integer value with a given bit
1502 /// width.
1503 static unsigned getNumWords(unsigned BitWidth) {
1504 return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
1505 }
1506
1507 /// \brief Compute the number of active bits in the value
1508 ///
1509 /// This function returns the number of active bits which is defined as the
1510 /// bit width minus the number of leading zeros. This is used in several
1511 /// computations to see how "wide" the value is.
1512 unsigned getActiveBits() const { return BitWidth - countLeadingZeros(); }
1513
1514 /// \brief Compute the number of active words in the value of this APInt.
1515 ///
1516 /// This is used in conjunction with getActiveData to extract the raw value of
1517 /// the APInt.
1518 unsigned getActiveWords() const {
1519 unsigned numActiveBits = getActiveBits();
1520 return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1;
1521 }
1522
1523 /// \brief Get the minimum bit size for this signed APInt
1524 ///
1525 /// Computes the minimum bit width for this APInt while considering it to be a
1526 /// signed (and probably negative) value. If the value is not negative, this
1527 /// function returns the same value as getActiveBits()+1. Otherwise, it
1528 /// returns the smallest bit width that will retain the negative value. For
1529 /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so
1530 /// for -1, this function will always return 1.
1531 unsigned getMinSignedBits() const {
1532 if (isNegative())
1533 return BitWidth - countLeadingOnes() + 1;
1534 return getActiveBits() + 1;
1535 }
1536
1537 /// \brief Get zero extended value
1538 ///
1539 /// This method attempts to return the value of this APInt as a zero extended
1540 /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
1541 /// uint64_t. Otherwise an assertion will result.
1542 uint64_t getZExtValue() const {
1543 if (isSingleWord())
1544 return U.VAL;
1545 assert(getActiveBits() <= 64 && "Too many bits for uint64_t")(static_cast <bool> (getActiveBits() <= 64 &&
"Too many bits for uint64_t") ? void (0) : __assert_fail ("getActiveBits() <= 64 && \"Too many bits for uint64_t\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 1545, __extension__ __PRETTY_FUNCTION__))
;
1546 return U.pVal[0];
1547 }
1548
1549 /// \brief Get sign extended value
1550 ///
1551 /// This method attempts to return the value of this APInt as a sign extended
1552 /// int64_t. The bit width must be <= 64 or the value must fit within an
1553 /// int64_t. Otherwise an assertion will result.
1554 int64_t getSExtValue() const {
1555 if (isSingleWord())
1556 return SignExtend64(U.VAL, BitWidth);
1557 assert(getMinSignedBits() <= 64 && "Too many bits for int64_t")(static_cast <bool> (getMinSignedBits() <= 64 &&
"Too many bits for int64_t") ? void (0) : __assert_fail ("getMinSignedBits() <= 64 && \"Too many bits for int64_t\""
, "/build/llvm-toolchain-snapshot-7~svn326246/include/llvm/ADT/APInt.h"
, 1557, __extension__ __PRETTY_FUNCTION__))
;
1558 return int64_t(U.pVal[0]);
1559 }
1560
1561 /// \brief Get bits required for string value.
1562 ///
1563 /// This method determines how many bits are required to hold the APInt
1564 /// equivalent of the string given by \p str.
1565 static unsigned getBitsNeeded(StringRef str, uint8_t radix);
1566
1567 /// \brief The APInt version of the countLeadingZeros functions in
1568 /// MathExtras.h.
1569 ///
1570 /// It counts the number of zeros from the most significant bit to the first
1571 /// one bit.
1572 ///
1573 /// \returns BitWidth if the value is zero, otherwise returns the number of
1574 /// zeros from the most significant bit to the first one bits.
1575 unsigned countLeadingZeros() const {
1576 if (isSingleWord()) {
1577 unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
1578 return llvm::countLeadingZeros(U.VAL) - unusedBits;
1579 }
1580 return countLeadingZerosSlowCase();
1581 }
1582
1583 /// \brief Count the number of leading one bits.
1584 ///
1585 /// This function is an APInt version of the countLeadingOnes
1586 /// functions in MathExtras.h. It counts the number of ones from the most
1587 /// significant bit to the first zero bit.
1588 ///
1589 /// \returns 0 if the high order bit is not set, otherwise returns the number
1590 /// of 1 bits from the most significant to the least
1591 unsigned countLeadingOnes() const {
1592 if (isSingleWord())
1593 return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
1594 return countLeadingOnesSlowCase();
1595 }
1596
1597 /// Computes the number of leading bits of this APInt that are equal to its
1598 /// sign bit.
1599 unsigned getNumSignBits() const {
1600 return isNegative() ? countLeadingOnes() : countLeadingZeros();
1601 }
1602
1603 /// \brief Count the number of trailing zero bits.
1604 ///
1605 /// This function is an APInt version of the countTrailingZeros
1606 /// functions in MathExtras.h. It counts the number of zeros from the least
1607 /// significant bit to the first set bit.
1608 ///
1609 /// \returns BitWidth if the value is zero, otherwise returns the number of
1610 /// zeros from the least significant bit to the first one bit.
1611 unsigned countTrailingZeros() const {
1612 if (isSingleWord())
1613 return std::min(unsigned(llvm::countTrailingZeros(U.VAL)), BitWidth);
1614 return countTrailingZerosSlowCase();
1615 }
1616
1617 /// \brief Count the number of trailing one bits.
1618 ///
1619 /// This function is an APInt version of the countTrailingOnes
1620 /// functions in MathExtras.h. It counts the number of ones from the least
1621 /// significant bit to the first zero bit.
1622 ///
1623 /// \returns BitWidth if the value is all ones, otherwise returns the number
1624 /// of ones from the least significant bit to the first zero bit.
1625 unsigned countTrailingOnes() const {
1626 if (isSingleWord())
1627 return llvm::countTrailingOnes(U.VAL);
1628 return countTrailingOnesSlowCase();
1629 }
1630
1631 /// \brief Count the number of bits set.
1632 ///
1633 /// This function is an APInt version of the countPopulation functions
1634 /// in MathExtras.h. It counts the number of 1 bits in the APInt value.
1635 ///
1636 /// \returns 0 if the value is zero, otherwise returns the number of set bits.
1637 unsigned countPopulation() const {
1638 if (isSingleWord())
1639 return llvm::countPopulation(U.VAL);
1640 return countPopulationSlowCase();
1641 }
1642
1643 /// @}
1644 /// \name Conversion Functions
1645 /// @{
1646 void print(raw_ostream &OS, bool isSigned) const;
1647
1648 /// Converts an APInt to a string and append it to Str. Str is commonly a
1649 /// SmallString.
1650 void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
1651 bool formatAsCLiteral = false) const;
1652
1653 /// Considers the APInt to be unsigned and converts it into a string in the
1654 /// radix given. The radix can be 2, 8, 10 16, or 36.
1655 void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1656 toString(Str, Radix, false, false);
1657 }
1658
1659 /// Considers the APInt to be signed and converts it into a string in the
1660 /// radix given. The radix can be 2, 8, 10, 16, or 36.
1661 void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1662 toString(Str, Radix, true, false);
1663 }
1664
1665 /// \brief Return the APInt as a std::string.
1666 ///
1667 /// Note that this is an inefficient method. It is better to pass in a
1668 /// SmallVector/SmallString to the methods above to avoid thrashing the heap
1669 /// for the string.
1670 std::string toString(unsigned Radix, bool Signed) const;
1671
1672 /// \returns a byte-swapped representation of this APInt Value.
1673 APInt byteSwap() const;
1674
1675 /// \returns the value with the bit representation reversed of this APInt
1676 /// Value.
1677 APInt reverseBits() const;
1678
1679 /// \brief Converts this APInt to a double value.
1680 double roundToDouble(bool isSigned) const;
1681
1682 /// \brief Converts this unsigned APInt to a double value.
1683 double roundToDouble() const { return roundToDouble(false); }
1684
1685 /// \brief Converts this signed APInt to a double value.
1686 double signedRoundToDouble() const { return roundToDouble(true); }
1687
1688 /// \brief Converts APInt bits to a double
1689 ///
1690 /// The conversion does not do a translation from integer to double, it just
1691 /// re-interprets the bits as a double. Note that it is valid to do this on
1692 /// any bit width. Exactly 64 bits will be translated.
1693 double bitsToDouble() const {
1694 return BitsToDouble(getWord(0));
1695 }
1696
1697 /// \brief Converts APInt bits to a double
1698 ///
1699 /// The conversion does not do a translation from integer to float, it just
1700 /// re-interprets the bits as a float. Note that it is valid to do this on
1701 /// any bit width. Exactly 32 bits will be translated.
1702 float bitsToFloat() const {
1703 return BitsToFloat(getWord(0));
1704 }
1705
1706 /// \brief Converts a double to APInt bits.
1707 ///
1708 /// The conversion does not do a translation from double to integer, it just
1709 /// re-interprets the bits of the double.
1710 static APInt doubleToBits(double V) {
1711 return APInt(sizeof(double) * CHAR_BIT8, DoubleToBits(V));
1712 }
1713
1714 /// \brief Converts a float to APInt bits.
1715 ///
1716 /// The conversion does not do a translation from float to integer, it just
1717 /// re-interprets the bits of the float.
1718 static APInt floatToBits(float V) {
1719 return APInt(sizeof(float) * CHAR_BIT8, FloatToBits(V));
1720 }
1721
1722 /// @}
1723 /// \name Mathematics Operations
1724 /// @{
1725
1726 /// \returns the floor log base 2 of this APInt.
1727 unsigned logBase2() const { return getActiveBits() - 1; }
1728
1729 /// \returns the ceil log base 2 of this APInt.
1730 unsigned ceilLogBase2() const {
1731 APInt temp(*this);
1732 --temp;
1733 return temp.getActiveBits();
1734 }
1735
1736 /// \returns the nearest log base 2 of this APInt. Ties round up.
1737 ///
1738 /// NOTE: When we have a BitWidth of 1, we define:
1739 ///
1740 /// log2(0) = UINT32_MAX
1741 /// log2(1) = 0
1742 ///
1743 /// to get around any mathematical concerns resulting from
1744 /// referencing 2 in a space where 2 does no exist.
1745 unsigned nearestLogBase2() const {
1746 // Special case when we have a bitwidth of 1. If VAL is 1, then we
1747 // get 0. If VAL is 0, we get WORD_MAX which gets truncated to
1748 // UINT32_MAX.
1749 if (BitWidth == 1)
1750 return U.VAL - 1;
1751
1752 // Handle the zero case.
1753 if (isNullValue())
1754 return UINT32_MAX(4294967295U);
1755
1756 // The non-zero case is handled by computing:
1757 //
1758 // nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
1759 //
1760 // where x[i] is referring to the value of the ith bit of x.
1761 unsigned lg = logBase2();
1762 return lg + unsigned((*this)[lg - 1]);
1763 }
1764
1765 /// \returns the log base 2 of this APInt if its an exact power of two, -1
1766 /// otherwise
1767 int32_t exactLogBase2() const {
1768 if (!isPowerOf2())
1769 return -1;
1770 return logBase2();
1771 }
1772
1773 /// \brief Compute the square root
1774 APInt sqrt() const;
1775
1776 /// \brief Get the absolute value;
1777 ///
1778 /// If *this is < 0 then return -(*this), otherwise *this;
1779 APInt abs() const {
1780 if (isNegative())
1781 return -(*this);
1782 return *this;
1783 }
1784
1785 /// \returns the multiplicative inverse for a given modulo.
1786 APInt multiplicativeInverse(const APInt &modulo) const;
1787
1788 /// @}
1789 /// \name Support for division by constant
1790 /// @{
1791
1792 /// Calculate the magic number for signed division by a constant.
1793 struct ms;
1794 ms magic() const;
1795
1796 /// Calculate the magic number for unsigned division by a constant.
1797 struct mu;
1798 mu magicu(unsigned LeadingZeros = 0) const;
1799
1800 /// @}
1801 /// \name Building-block Operations for APInt and APFloat
1802 /// @{
1803
1804 // These building block operations operate on a representation of arbitrary
1805 // precision, two's-complement, bignum integer values. They should be
1806 // sufficient to implement APInt and APFloat bignum requirements. Inputs are
1807 // generally a pointer to the base of an array of integer parts, representing
1808 // an unsigned bignum, and a count of how many parts there are.
1809
1810 /// Sets the least significant part of a bignum to the input value, and zeroes
1811 /// out higher parts.
1812 static void tcSet(WordType *, WordType, unsigned);
1813
1814 /// Assign one bignum to another.
1815 static void tcAssign(WordType *, const WordType *, unsigned);
1816
1817 /// Returns true if a bignum is zero, false otherwise.
1818 static bool tcIsZero(const WordType *, unsigned);
1819
1820 /// Extract the given bit of a bignum; returns 0 or 1. Zero-based.
1821 static int tcExtractBit(const WordType *, unsigned bit);
1822
1823 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
1824 /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
1825 /// significant bit of DST. All high bits above srcBITS in DST are
1826 /// zero-filled.
1827 static void tcExtract(WordType *, unsigned dstCount,
1828 const WordType *, unsigned srcBits,
1829 unsigned srcLSB);
1830
1831 /// Set the given bit of a bignum. Zero-based.
1832 static void tcSetBit(WordType *, unsigned bit);
1833
1834 /// Clear the given bit of a bignum. Zero-based.
1835 static void tcClearBit(WordType *, unsigned bit);
1836
1837 /// Returns the bit number of the least or most significant set bit of a
1838 /// number. If the input number has no bits set -1U is returned.
1839 static unsigned tcLSB(const WordType *, unsigned n);
1840 static unsigned tcMSB(const WordType *parts, unsigned n);
1841
1842 /// Negate a bignum in-place.
1843 static void tcNegate(WordType *, unsigned);
1844
1845 /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1846 static WordType tcAdd(WordType *, const WordType *,
1847 WordType carry, unsigned);
1848 /// DST += RHS. Returns the carry flag.
1849 static WordType tcAddPart(WordType *, WordType, unsigned);
1850
1851 /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1852 static WordType tcSubtract(WordType *, const WordType *,
1853 WordType carry, unsigned);
1854 /// DST -= RHS. Returns the carry flag.
1855 static WordType tcSubtractPart(WordType *, WordType, unsigned);
1856
1857 /// DST += SRC * MULTIPLIER + PART if add is true
1858 /// DST = SRC * MULTIPLIER + PART if add is false
1859 ///
1860 /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC they must
1861 /// start at the same point, i.e. DST == SRC.
1862 ///
1863 /// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned.
1864 /// Otherwise DST is filled with the least significant DSTPARTS parts of the
1865 /// result, and if all of the omitted higher parts were zero return zero,
1866 /// otherwise overflow occurred and return one.
1867 static int tcMultiplyPart(WordType *dst, const WordType *src,
1868 WordType multiplier, WordType carry,
1869 unsigned srcParts, unsigned dstParts,
1870 bool add);
1871
1872 /// DST = LHS * RHS, where DST has the same width as the operands and is
1873 /// filled with the least significant parts of the result. Returns one if
1874 /// overflow occurred, otherwise zero. DST must be disjoint from both
1875 /// operands.
1876 static int tcMultiply(WordType *, const WordType *, const WordType *,
1877 unsigned);
1878
1879 /// DST = LHS * RHS, where DST has width the sum of the widths of the
1880 /// operands. No overflow occurs. DST must be disjoint from both operands.
1881 static void tcFullMultiply(WordType *, const WordType *,
1882 const WordType *, unsigned, unsigned);
1883
1884 /// If RHS is zero LHS and REMAINDER are left unchanged, return one.
1885 /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set
1886 /// REMAINDER to the remainder, return zero. i.e.
1887 ///
1888 /// OLD_LHS = RHS * LHS + REMAINDER
1889 ///
1890 /// SCRATCH is a bignum of the same size as the operands and result for use by
1891 /// the routine; its contents need not be initialized and are destroyed. LHS,
1892 /// REMAINDER and SCRATCH must be distinct.
1893 static int tcDivide(WordType *lhs, const WordType *rhs,
1894 WordType *remainder, WordType *scratch,
1895 unsigned parts);
1896
1897 /// Shift a bignum left Count bits. Shifted in bits are zero. There are no
1898 /// restrictions on Count.
1899 static void tcShiftLeft(WordType *, unsigned Words, unsigned Count);
1900
1901 /// Shift a bignum right Count bits. Shifted in bits are zero. There are no
1902 /// restrictions on Count.
1903 static void tcShiftRight(WordType *, unsigned Words, unsigned Count);
1904
1905 /// The obvious AND, OR and XOR and complement operations.
1906 static void tcAnd(WordType *, const WordType *, unsigned);
1907 static void tcOr(WordType *, const WordType *, unsigned);
1908 static void tcXor(WordType *, const WordType *, unsigned);
1909 static void tcComplement(WordType *, unsigned);
1910
1911 /// Comparison (unsigned) of two bignums.
1912 static int tcCompare(const WordType *, const WordType *, unsigned);
1913
1914 /// Increment a bignum in-place. Return the carry flag.
1915 static WordType tcIncrement(WordType *dst, unsigned parts) {
1916 return tcAddPart(dst, 1, parts);
1917 }
1918
1919 /// Decrement a bignum in-place. Return the borrow flag.
1920 static WordType tcDecrement(WordType *dst, unsigned parts) {
1921 return tcSubtractPart(dst, 1, parts);
1922 }
1923
1924 /// Set the least significant BITS and clear the rest.
1925 static void tcSetLeastSignificantBits(WordType *, unsigned, unsigned bits);
1926
1927 /// \brief debug method
1928 void dump() const;
1929
1930 /// @}
1931};
1932
1933/// Magic data for optimising signed division by a constant.
1934struct APInt::ms {
1935 APInt m; ///< magic number
1936 unsigned s; ///< shift amount
1937};
1938
1939/// Magic data for optimising unsigned division by a constant.
1940struct APInt::mu {
1941 APInt m; ///< magic number
1942 bool a; ///< add indicator
1943 unsigned s; ///< shift amount
1944};
1945
1946inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
1947
1948inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; }
1949
1950/// \brief Unary bitwise complement operator.
1951///
1952/// \returns an APInt that is the bitwise complement of \p v.
1953inline APInt operator~(APInt v) {
1954 v.flipAllBits();
1955 return v;
1956}
1957
1958inline APInt operator&(APInt a, const APInt &b) {
1959 a &= b;
1960 return a;
1961}
1962
1963inline APInt operator&(const APInt &a, APInt &&b) {
1964 b &= a;
1965 return std::move(b);
1966}
1967
1968inline APInt operator&(APInt a, uint64_t RHS) {
1969 a &= RHS;
1970 return a;
1971}
1972
1973inline APInt operator&(uint64_t LHS, APInt b) {
1974 b &= LHS;
1975 return b;
1976}
1977
1978inline APInt operator|(APInt a, const APInt &b) {
1979 a |= b;
1980 return a;
1981}
1982
1983inline APInt operator|(const APInt &a, APInt &&b) {
1984 b |= a;
1985 return std::move(b);
1986}
1987
1988inline APInt operator|(APInt a, uint64_t RHS) {
1989 a |= RHS;
1990 return a;
1991}
1992
1993inline APInt operator|(uint64_t LHS, APInt b) {
1994 b |= LHS;
1995 return b;
1996}
1997
1998inline APInt operator^(APInt a, const APInt &b) {
1999 a ^= b;
2000 return a;
2001}
2002
2003inline APInt operator^(const APInt &a, APInt &&b) {
2004 b ^= a;
2005 return std::move(b);
2006}
2007
2008inline APInt operator^(APInt a, uint64_t RHS) {
2009 a ^= RHS;
2010 return a;
2011}
2012
2013inline APInt operator^(uint64_t LHS, APInt b) {
2014 b ^= LHS;
2015 return b;
2016}
2017
2018inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
2019 I.print(OS, true);
2020 return OS;
2021}
2022
2023inline APInt operator-(APInt v) {
2024 v.negate();
2025 return v;
2026}
2027
2028inline APInt operator+(APInt a, const APInt &b) {
2029 a += b;
2030 return a;
2031}
2032
2033inline APInt operator+(const APInt &a, APInt &&b) {
2034 b += a;
2035 return std::move(b);
2036}
2037
2038inline APInt operator+(APInt a, uint64_t RHS) {
2039 a += RHS;
2040 return a;
2041}
2042
2043inline APInt operator+(uint64_t LHS, APInt b) {
2044 b += LHS;
2045 return b;
2046}
2047
2048inline APInt operator-(APInt a, const APInt &b) {
2049 a -= b;
2050 return a;
2051}
2052
2053inline APInt operator-(const APInt &a, APInt &&b) {
2054 b.negate();
2055 b += a;
2056 return std::move(b);
2057}
2058
2059inline APInt operator-(APInt a, uint64_t RHS) {
2060 a -= RHS;
2061 return a;
2062}
2063
2064inline APInt operator-(uint64_t LHS, APInt b) {
2065 b.negate();
2066 b += LHS;
2067 return b;
2068}
2069
2070inline APInt operator*(APInt a, uint64_t RHS) {
2071 a *= RHS;
2072 return a;
2073}
2074
2075inline APInt operator*(uint64_t LHS, APInt b) {
2076 b *= LHS;
2077 return b;
2078}
2079
2080
2081namespace APIntOps {
2082
2083/// \brief Determine the smaller of two APInts considered to be signed.
2084inline const APInt &smin(const APInt &A, const APInt &B) {
2085 return A.slt(B) ? A : B;
2086}
2087
2088/// \brief Determine the larger of two APInts considered to be signed.
2089inline const APInt &smax(const APInt &A, const APInt &B) {
2090 return A.sgt(B) ? A : B;
2091}
2092
2093/// \brief Determine the smaller of two APInts considered to be signed.
2094inline const APInt &umin(const APInt &A, const APInt &B) {
2095 return A.ult(B) ? A : B;
2096}
2097
2098/// \brief Determine the larger of two APInts considered to be unsigned.
2099inline const APInt &umax(const APInt &A, const APInt &B) {
2100 return A.ugt(B) ? A : B;
2101}
2102
2103/// \brief Compute GCD of two unsigned APInt values.
2104///
2105/// This function returns the greatest common divisor of the two APInt values
2106/// using Stein's algorithm.
2107///
2108/// \returns the greatest common divisor of A and B.
2109APInt GreatestCommonDivisor(APInt A, APInt B);
2110
2111/// \brief Converts the given APInt to a double value.
2112///
2113/// Treats the APInt as an unsigned value for conversion purposes.
2114inline double RoundAPIntToDouble(const APInt &APIVal) {
2115 return APIVal.roundToDouble();
2116}
2117
2118/// \brief Converts the given APInt to a double value.
2119///
2120/// Treats the APInt as a signed value for conversion purposes.
2121inline double RoundSignedAPIntToDouble(const APInt &APIVal) {
2122 return APIVal.signedRoundToDouble();
2123}
2124
2125/// \brief Converts the given APInt to a float vlalue.
2126inline float RoundAPIntToFloat(const APInt &APIVal) {
2127 return float(RoundAPIntToDouble(APIVal));
2128}
2129
2130/// \brief Converts the given APInt to a float value.
2131///
2132/// Treast the APInt as a signed value for conversion purposes.
2133inline float RoundSignedAPIntToFloat(const APInt &APIVal) {
2134 return float(APIVal.signedRoundToDouble());
2135}
2136
2137/// \brief Converts the given double value into a APInt.
2138///
2139/// This function convert a double value to an APInt value.
2140APInt RoundDoubleToAPInt(double Double, unsigned width);
2141
2142/// \brief Converts a float value into a APInt.
2143///
2144/// Converts a float value into an APInt value.
2145inline APInt RoundFloatToAPInt(float Float, unsigned width) {
2146 return RoundDoubleToAPInt(double(Float), width);
2147}
2148
2149} // End of APIntOps namespace
2150
2151// See friend declaration above. This additional declaration is required in
2152// order to compile LLVM with IBM xlC compiler.
2153hash_code hash_value(const APInt &Arg);
2154} // End of llvm namespace
2155
2156#endif