Bug Summary

File:llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1306, column 12
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name DAGCombiner.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-11/lib/clang/11.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/CodeGen/SelectionDAG -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/include -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-11/lib/clang/11.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/CodeGen/SelectionDAG -fdebug-prefix-map=/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-03-09-184146-41876-1 -x c++ /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/IntervalMap.h"
23#include "llvm/ADT/None.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallPtrSet.h"
28#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/SmallVector.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/Analysis/AliasAnalysis.h"
32#include "llvm/Analysis/MemoryLocation.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/DAGCombine.h"
35#include "llvm/CodeGen/ISDOpcodes.h"
36#include "llvm/CodeGen/MachineFrameInfo.h"
37#include "llvm/CodeGen/MachineFunction.h"
38#include "llvm/CodeGen/MachineMemOperand.h"
39#include "llvm/CodeGen/RuntimeLibcalls.h"
40#include "llvm/CodeGen/SelectionDAG.h"
41#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
42#include "llvm/CodeGen/SelectionDAGNodes.h"
43#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
44#include "llvm/CodeGen/TargetLowering.h"
45#include "llvm/CodeGen/TargetRegisterInfo.h"
46#include "llvm/CodeGen/TargetSubtargetInfo.h"
47#include "llvm/CodeGen/ValueTypes.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/Constant.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DerivedTypes.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/Metadata.h"
55#include "llvm/Support/Casting.h"
56#include "llvm/Support/CodeGen.h"
57#include "llvm/Support/CommandLine.h"
58#include "llvm/Support/Compiler.h"
59#include "llvm/Support/Debug.h"
60#include "llvm/Support/ErrorHandling.h"
61#include "llvm/Support/KnownBits.h"
62#include "llvm/Support/MachineValueType.h"
63#include "llvm/Support/MathExtras.h"
64#include "llvm/Support/raw_ostream.h"
65#include "llvm/Target/TargetMachine.h"
66#include "llvm/Target/TargetOptions.h"
67#include <algorithm>
68#include <cassert>
69#include <cstdint>
70#include <functional>
71#include <iterator>
72#include <string>
73#include <tuple>
74#include <utility>
75
76using namespace llvm;
77
78#define DEBUG_TYPE"dagcombine" "dagcombine"
79
80STATISTIC(NodesCombined , "Number of dag nodes combined")static llvm::Statistic NodesCombined = {"dagcombine", "NodesCombined"
, "Number of dag nodes combined"}
;
81STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created")static llvm::Statistic PreIndexedNodes = {"dagcombine", "PreIndexedNodes"
, "Number of pre-indexed nodes created"}
;
82STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created")static llvm::Statistic PostIndexedNodes = {"dagcombine", "PostIndexedNodes"
, "Number of post-indexed nodes created"}
;
83STATISTIC(OpsNarrowed , "Number of load/op/store narrowed")static llvm::Statistic OpsNarrowed = {"dagcombine", "OpsNarrowed"
, "Number of load/op/store narrowed"}
;
84STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int")static llvm::Statistic LdStFP2Int = {"dagcombine", "LdStFP2Int"
, "Number of fp load/store pairs transformed to int"}
;
85STATISTIC(SlicedLoads, "Number of load sliced")static llvm::Statistic SlicedLoads = {"dagcombine", "SlicedLoads"
, "Number of load sliced"}
;
86STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops")static llvm::Statistic NumFPLogicOpsConv = {"dagcombine", "NumFPLogicOpsConv"
, "Number of logic ops converted to fp ops"}
;
87
88static cl::opt<bool>
89CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90 cl::desc("Enable DAG combiner's use of IR alias analysis"));
91
92static cl::opt<bool>
93UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94 cl::desc("Enable DAG combiner's use of TBAA"));
95
96#ifndef NDEBUG
97static cl::opt<std::string>
98CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99 cl::desc("Only use DAG-combiner alias analysis in this"
100 " function"));
101#endif
102
103/// Hidden option to stress test load slicing, i.e., when this option
104/// is enabled, load slicing bypasses most of its profitability guards.
105static cl::opt<bool>
106StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107 cl::desc("Bypass the profitability model of load slicing"),
108 cl::init(false));
109
110static cl::opt<bool>
111 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112 cl::desc("DAG combiner may split indexing from loads"));
113
114static cl::opt<bool>
115 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
116 cl::desc("DAG combiner enable merging multiple stores "
117 "into a wider store"));
118
119static cl::opt<unsigned> TokenFactorInlineLimit(
120 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
121 cl::desc("Limit the number of operands to inline for Token Factors"));
122
123static cl::opt<unsigned> StoreMergeDependenceLimit(
124 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
125 cl::desc("Limit the number of times for the same StoreNode and RootNode "
126 "to bail out in store merging dependence check"));
127
128namespace {
129
130 class DAGCombiner {
131 SelectionDAG &DAG;
132 const TargetLowering &TLI;
133 CombineLevel Level;
134 CodeGenOpt::Level OptLevel;
135 bool LegalDAG = false;
136 bool LegalOperations = false;
137 bool LegalTypes = false;
138 bool ForCodeSize;
139
140 /// Worklist of all of the nodes that need to be simplified.
141 ///
142 /// This must behave as a stack -- new nodes to process are pushed onto the
143 /// back and when processing we pop off of the back.
144 ///
145 /// The worklist will not contain duplicates but may contain null entries
146 /// due to nodes being deleted from the underlying DAG.
147 SmallVector<SDNode *, 64> Worklist;
148
149 /// Mapping from an SDNode to its position on the worklist.
150 ///
151 /// This is used to find and remove nodes from the worklist (by nulling
152 /// them) when they are deleted from the underlying DAG. It relies on
153 /// stable indices of nodes within the worklist.
154 DenseMap<SDNode *, unsigned> WorklistMap;
155 /// This records all nodes attempted to add to the worklist since we
156 /// considered a new worklist entry. As we keep do not add duplicate nodes
157 /// in the worklist, this is different from the tail of the worklist.
158 SmallSetVector<SDNode *, 32> PruningList;
159
160 /// Set of nodes which have been combined (at least once).
161 ///
162 /// This is used to allow us to reliably add any operands of a DAG node
163 /// which have not yet been combined to the worklist.
164 SmallPtrSet<SDNode *, 32> CombinedNodes;
165
166 /// Map from candidate StoreNode to the pair of RootNode and count.
167 /// The count is used to track how many times we have seen the StoreNode
168 /// with the same RootNode bail out in dependence check. If we have seen
169 /// the bail out for the same pair many times over a limit, we won't
170 /// consider the StoreNode with the same RootNode as store merging
171 /// candidate again.
172 DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
173
174 // AA - Used for DAG load/store alias analysis.
175 AliasAnalysis *AA;
176
177 /// When an instruction is simplified, add all users of the instruction to
178 /// the work lists because they might get more simplified now.
179 void AddUsersToWorklist(SDNode *N) {
180 for (SDNode *Node : N->uses())
181 AddToWorklist(Node);
182 }
183
184 /// Convenient shorthand to add a node and all of its user to the worklist.
185 void AddToWorklistWithUsers(SDNode *N) {
186 AddUsersToWorklist(N);
187 AddToWorklist(N);
188 }
189
190 // Prune potentially dangling nodes. This is called after
191 // any visit to a node, but should also be called during a visit after any
192 // failed combine which may have created a DAG node.
193 void clearAddedDanglingWorklistEntries() {
194 // Check any nodes added to the worklist to see if they are prunable.
195 while (!PruningList.empty()) {
196 auto *N = PruningList.pop_back_val();
197 if (N->use_empty())
198 recursivelyDeleteUnusedNodes(N);
199 }
200 }
201
202 SDNode *getNextWorklistEntry() {
203 // Before we do any work, remove nodes that are not in use.
204 clearAddedDanglingWorklistEntries();
205 SDNode *N = nullptr;
206 // The Worklist holds the SDNodes in order, but it may contain null
207 // entries.
208 while (!N && !Worklist.empty()) {
209 N = Worklist.pop_back_val();
210 }
211
212 if (N) {
213 bool GoodWorklistEntry = WorklistMap.erase(N);
214 (void)GoodWorklistEntry;
215 assert(GoodWorklistEntry &&((GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!"
) ? static_cast<void> (0) : __assert_fail ("GoodWorklistEntry && \"Found a worklist entry without a corresponding map entry!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 216, __PRETTY_FUNCTION__))
216 "Found a worklist entry without a corresponding map entry!")((GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!"
) ? static_cast<void> (0) : __assert_fail ("GoodWorklistEntry && \"Found a worklist entry without a corresponding map entry!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 216, __PRETTY_FUNCTION__))
;
217 }
218 return N;
219 }
220
221 /// Call the node-specific routine that folds each particular type of node.
222 SDValue visit(SDNode *N);
223
224 public:
225 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
226 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
227 OptLevel(OL), AA(AA) {
228 ForCodeSize = DAG.shouldOptForSize();
229
230 MaximumLegalStoreInBits = 0;
231 // We use the minimum store size here, since that's all we can guarantee
232 // for the scalable vector types.
233 for (MVT VT : MVT::all_valuetypes())
234 if (EVT(VT).isSimple() && VT != MVT::Other &&
235 TLI.isTypeLegal(EVT(VT)) &&
236 VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
237 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
238 }
239
240 void ConsiderForPruning(SDNode *N) {
241 // Mark this for potential pruning.
242 PruningList.insert(N);
243 }
244
245 /// Add to the worklist making sure its instance is at the back (next to be
246 /// processed.)
247 void AddToWorklist(SDNode *N) {
248 assert(N->getOpcode() != ISD::DELETED_NODE &&((N->getOpcode() != ISD::DELETED_NODE && "Deleted Node added to Worklist"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Deleted Node added to Worklist\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 249, __PRETTY_FUNCTION__))
249 "Deleted Node added to Worklist")((N->getOpcode() != ISD::DELETED_NODE && "Deleted Node added to Worklist"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Deleted Node added to Worklist\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 249, __PRETTY_FUNCTION__))
;
250
251 // Skip handle nodes as they can't usefully be combined and confuse the
252 // zero-use deletion strategy.
253 if (N->getOpcode() == ISD::HANDLENODE)
254 return;
255
256 ConsiderForPruning(N);
257
258 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
259 Worklist.push_back(N);
260 }
261
262 /// Remove all instances of N from the worklist.
263 void removeFromWorklist(SDNode *N) {
264 CombinedNodes.erase(N);
265 PruningList.remove(N);
266 StoreRootCountMap.erase(N);
267
268 auto It = WorklistMap.find(N);
269 if (It == WorklistMap.end())
270 return; // Not in the worklist.
271
272 // Null out the entry rather than erasing it to avoid a linear operation.
273 Worklist[It->second] = nullptr;
274 WorklistMap.erase(It);
275 }
276
277 void deleteAndRecombine(SDNode *N);
278 bool recursivelyDeleteUnusedNodes(SDNode *N);
279
280 /// Replaces all uses of the results of one DAG node with new values.
281 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
282 bool AddTo = true);
283
284 /// Replaces all uses of the results of one DAG node with new values.
285 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
286 return CombineTo(N, &Res, 1, AddTo);
287 }
288
289 /// Replaces all uses of the results of one DAG node with new values.
290 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
291 bool AddTo = true) {
292 SDValue To[] = { Res0, Res1 };
293 return CombineTo(N, To, 2, AddTo);
294 }
295
296 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
297
298 private:
299 unsigned MaximumLegalStoreInBits;
300
301 /// Check the specified integer node value to see if it can be simplified or
302 /// if things it uses can be simplified by bit propagation.
303 /// If so, return true.
304 bool SimplifyDemandedBits(SDValue Op) {
305 unsigned BitWidth = Op.getScalarValueSizeInBits();
306 APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
307 return SimplifyDemandedBits(Op, DemandedBits);
308 }
309
310 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
311 EVT VT = Op.getValueType();
312 unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
313 APInt DemandedElts = APInt::getAllOnesValue(NumElts);
314 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
315 }
316
317 /// Check the specified vector node value to see if it can be simplified or
318 /// if things it uses can be simplified as it only uses some of the
319 /// elements. If so, return true.
320 bool SimplifyDemandedVectorElts(SDValue Op) {
321 unsigned NumElts = Op.getValueType().getVectorNumElements();
322 APInt DemandedElts = APInt::getAllOnesValue(NumElts);
323 return SimplifyDemandedVectorElts(Op, DemandedElts);
324 }
325
326 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
327 const APInt &DemandedElts,
328 bool AssumeSingleUse = false);
329 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
330 bool AssumeSingleUse = false);
331
332 bool CombineToPreIndexedLoadStore(SDNode *N);
333 bool CombineToPostIndexedLoadStore(SDNode *N);
334 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
335 bool SliceUpLoad(SDNode *N);
336
337 // Scalars have size 0 to distinguish from singleton vectors.
338 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
339 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
340 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
341
342 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
343 /// load.
344 ///
345 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
346 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
347 /// \param EltNo index of the vector element to load.
348 /// \param OriginalLoad load that EVE came from to be replaced.
349 /// \returns EVE on success SDValue() on failure.
350 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
351 SDValue EltNo,
352 LoadSDNode *OriginalLoad);
353 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
354 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
355 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
356 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
357 SDValue PromoteIntBinOp(SDValue Op);
358 SDValue PromoteIntShiftOp(SDValue Op);
359 SDValue PromoteExtend(SDValue Op);
360 bool PromoteLoad(SDValue Op);
361
362 /// Call the node-specific routine that knows how to fold each
363 /// particular type of node. If that doesn't do anything, try the
364 /// target-specific DAG combines.
365 SDValue combine(SDNode *N);
366
367 // Visitation implementation - Implement dag node combining for different
368 // node types. The semantics are as follows:
369 // Return Value:
370 // SDValue.getNode() == 0 - No change was made
371 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
372 // otherwise - N should be replaced by the returned Operand.
373 //
374 SDValue visitTokenFactor(SDNode *N);
375 SDValue visitMERGE_VALUES(SDNode *N);
376 SDValue visitADD(SDNode *N);
377 SDValue visitADDLike(SDNode *N);
378 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
379 SDValue visitSUB(SDNode *N);
380 SDValue visitADDSAT(SDNode *N);
381 SDValue visitSUBSAT(SDNode *N);
382 SDValue visitADDC(SDNode *N);
383 SDValue visitADDO(SDNode *N);
384 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
385 SDValue visitSUBC(SDNode *N);
386 SDValue visitSUBO(SDNode *N);
387 SDValue visitADDE(SDNode *N);
388 SDValue visitADDCARRY(SDNode *N);
389 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
390 SDValue visitSUBE(SDNode *N);
391 SDValue visitSUBCARRY(SDNode *N);
392 SDValue visitMUL(SDNode *N);
393 SDValue visitMULFIX(SDNode *N);
394 SDValue useDivRem(SDNode *N);
395 SDValue visitSDIV(SDNode *N);
396 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
397 SDValue visitUDIV(SDNode *N);
398 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
399 SDValue visitREM(SDNode *N);
400 SDValue visitMULHU(SDNode *N);
401 SDValue visitMULHS(SDNode *N);
402 SDValue visitSMUL_LOHI(SDNode *N);
403 SDValue visitUMUL_LOHI(SDNode *N);
404 SDValue visitMULO(SDNode *N);
405 SDValue visitIMINMAX(SDNode *N);
406 SDValue visitAND(SDNode *N);
407 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
408 SDValue visitOR(SDNode *N);
409 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
410 SDValue visitXOR(SDNode *N);
411 SDValue SimplifyVBinOp(SDNode *N);
412 SDValue visitSHL(SDNode *N);
413 SDValue visitSRA(SDNode *N);
414 SDValue visitSRL(SDNode *N);
415 SDValue visitFunnelShift(SDNode *N);
416 SDValue visitRotate(SDNode *N);
417 SDValue visitABS(SDNode *N);
418 SDValue visitBSWAP(SDNode *N);
419 SDValue visitBITREVERSE(SDNode *N);
420 SDValue visitCTLZ(SDNode *N);
421 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
422 SDValue visitCTTZ(SDNode *N);
423 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
424 SDValue visitCTPOP(SDNode *N);
425 SDValue visitSELECT(SDNode *N);
426 SDValue visitVSELECT(SDNode *N);
427 SDValue visitSELECT_CC(SDNode *N);
428 SDValue visitSETCC(SDNode *N);
429 SDValue visitSETCCCARRY(SDNode *N);
430 SDValue visitSIGN_EXTEND(SDNode *N);
431 SDValue visitZERO_EXTEND(SDNode *N);
432 SDValue visitANY_EXTEND(SDNode *N);
433 SDValue visitAssertExt(SDNode *N);
434 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
435 SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
436 SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
437 SDValue visitTRUNCATE(SDNode *N);
438 SDValue visitBITCAST(SDNode *N);
439 SDValue visitBUILD_PAIR(SDNode *N);
440 SDValue visitFADD(SDNode *N);
441 SDValue visitFSUB(SDNode *N);
442 SDValue visitFMUL(SDNode *N);
443 SDValue visitFMA(SDNode *N);
444 SDValue visitFDIV(SDNode *N);
445 SDValue visitFREM(SDNode *N);
446 SDValue visitFSQRT(SDNode *N);
447 SDValue visitFCOPYSIGN(SDNode *N);
448 SDValue visitFPOW(SDNode *N);
449 SDValue visitSINT_TO_FP(SDNode *N);
450 SDValue visitUINT_TO_FP(SDNode *N);
451 SDValue visitFP_TO_SINT(SDNode *N);
452 SDValue visitFP_TO_UINT(SDNode *N);
453 SDValue visitFP_ROUND(SDNode *N);
454 SDValue visitFP_EXTEND(SDNode *N);
455 SDValue visitFNEG(SDNode *N);
456 SDValue visitFABS(SDNode *N);
457 SDValue visitFCEIL(SDNode *N);
458 SDValue visitFTRUNC(SDNode *N);
459 SDValue visitFFLOOR(SDNode *N);
460 SDValue visitFMINNUM(SDNode *N);
461 SDValue visitFMAXNUM(SDNode *N);
462 SDValue visitFMINIMUM(SDNode *N);
463 SDValue visitFMAXIMUM(SDNode *N);
464 SDValue visitBRCOND(SDNode *N);
465 SDValue visitBR_CC(SDNode *N);
466 SDValue visitLOAD(SDNode *N);
467
468 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
469 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
470
471 SDValue visitSTORE(SDNode *N);
472 SDValue visitLIFETIME_END(SDNode *N);
473 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
474 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
475 SDValue visitBUILD_VECTOR(SDNode *N);
476 SDValue visitCONCAT_VECTORS(SDNode *N);
477 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
478 SDValue visitVECTOR_SHUFFLE(SDNode *N);
479 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
480 SDValue visitINSERT_SUBVECTOR(SDNode *N);
481 SDValue visitMLOAD(SDNode *N);
482 SDValue visitMSTORE(SDNode *N);
483 SDValue visitMGATHER(SDNode *N);
484 SDValue visitMSCATTER(SDNode *N);
485 SDValue visitFP_TO_FP16(SDNode *N);
486 SDValue visitFP16_TO_FP(SDNode *N);
487 SDValue visitVECREDUCE(SDNode *N);
488
489 SDValue visitFADDForFMACombine(SDNode *N);
490 SDValue visitFSUBForFMACombine(SDNode *N);
491 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
492
493 SDValue XformToShuffleWithZero(SDNode *N);
494 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
495 const SDLoc &DL, SDValue N0,
496 SDValue N1);
497 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
498 SDValue N1);
499 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
500 SDValue N1, SDNodeFlags Flags);
501
502 SDValue visitShiftByConstant(SDNode *N);
503
504 SDValue foldSelectOfConstants(SDNode *N);
505 SDValue foldVSelectOfConstants(SDNode *N);
506 SDValue foldBinOpIntoSelect(SDNode *BO);
507 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
508 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
509 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
510 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
511 SDValue N2, SDValue N3, ISD::CondCode CC,
512 bool NotExtCompare = false);
513 SDValue convertSelectOfFPConstantsToLoadOffset(
514 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
515 ISD::CondCode CC);
516 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
517 SDValue N2, SDValue N3, ISD::CondCode CC);
518 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
519 const SDLoc &DL);
520 SDValue unfoldMaskedMerge(SDNode *N);
521 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
522 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
523 const SDLoc &DL, bool foldBooleans);
524 SDValue rebuildSetCC(SDValue N);
525
526 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
527 SDValue &CC, bool MatchStrict = false) const;
528 bool isOneUseSetCC(SDValue N) const;
529 bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);
530
531 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
532 unsigned HiOp);
533 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
534 SDValue CombineExtLoad(SDNode *N);
535 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
536 SDValue combineRepeatedFPDivisors(SDNode *N);
537 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
538 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
539 SDValue BuildSDIV(SDNode *N);
540 SDValue BuildSDIVPow2(SDNode *N);
541 SDValue BuildUDIV(SDNode *N);
542 SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
543 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
544 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
545 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
546 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
547 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
548 SDNodeFlags Flags, bool Reciprocal);
549 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
550 SDNodeFlags Flags, bool Reciprocal);
551 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
552 bool DemandHighBits = true);
553 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
554 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
555 SDValue InnerPos, SDValue InnerNeg,
556 unsigned PosOpcode, unsigned NegOpcode,
557 const SDLoc &DL);
558 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
559 SDValue MatchLoadCombine(SDNode *N);
560 SDValue MatchStoreCombine(StoreSDNode *N);
561 SDValue ReduceLoadWidth(SDNode *N);
562 SDValue ReduceLoadOpStoreWidth(SDNode *N);
563 SDValue splitMergedValStore(StoreSDNode *ST);
564 SDValue TransformFPLoadStorePair(SDNode *N);
565 SDValue convertBuildVecZextToZext(SDNode *N);
566 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
567 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
568 SDValue reduceBuildVecToShuffle(SDNode *N);
569 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
570 ArrayRef<int> VectorMask, SDValue VecIn1,
571 SDValue VecIn2, unsigned LeftIdx,
572 bool DidSplitVec);
573 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
574
575 /// Walk up chain skipping non-aliasing memory nodes,
576 /// looking for aliasing nodes and adding them to the Aliases vector.
577 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
578 SmallVectorImpl<SDValue> &Aliases);
579
580 /// Return true if there is any possibility that the two addresses overlap.
581 bool isAlias(SDNode *Op0, SDNode *Op1) const;
582
583 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
584 /// chain (aliasing node.)
585 SDValue FindBetterChain(SDNode *N, SDValue Chain);
586
587 /// Try to replace a store and any possibly adjacent stores on
588 /// consecutive chains with better chains. Return true only if St is
589 /// replaced.
590 ///
591 /// Notice that other chains may still be replaced even if the function
592 /// returns false.
593 bool findBetterNeighborChains(StoreSDNode *St);
594
595 // Helper for findBetterNeighborChains. Walk up store chain add additional
596 // chained stores that do not overlap and can be parallelized.
597 bool parallelizeChainedStores(StoreSDNode *St);
598
599 /// Holds a pointer to an LSBaseSDNode as well as information on where it
600 /// is located in a sequence of memory operations connected by a chain.
601 struct MemOpLink {
602 // Ptr to the mem node.
603 LSBaseSDNode *MemNode;
604
605 // Offset from the base ptr.
606 int64_t OffsetFromBase;
607
608 MemOpLink(LSBaseSDNode *N, int64_t Offset)
609 : MemNode(N), OffsetFromBase(Offset) {}
610 };
611
612 /// This is a helper function for visitMUL to check the profitability
613 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
614 /// MulNode is the original multiply, AddNode is (add x, c1),
615 /// and ConstNode is c2.
616 bool isMulAddWithConstProfitable(SDNode *MulNode,
617 SDValue &AddNode,
618 SDValue &ConstNode);
619
620 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
621 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
622 /// the type of the loaded value to be extended.
623 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
624 EVT LoadResultTy, EVT &ExtVT);
625
626 /// Helper function to calculate whether the given Load/Store can have its
627 /// width reduced to ExtVT.
628 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
629 EVT &MemVT, unsigned ShAmt = 0);
630
631 /// Used by BackwardsPropagateMask to find suitable loads.
632 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
633 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
634 ConstantSDNode *Mask, SDNode *&NodeToMask);
635 /// Attempt to propagate a given AND node back to load leaves so that they
636 /// can be combined into narrow loads.
637 bool BackwardsPropagateMask(SDNode *N);
638
639 /// Helper function for MergeConsecutiveStores which merges the
640 /// component store chains.
641 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
642 unsigned NumStores);
643
644 /// This is a helper function for MergeConsecutiveStores. When the
645 /// source elements of the consecutive stores are all constants or
646 /// all extracted vector elements, try to merge them into one
647 /// larger store introducing bitcasts if necessary. \return True
648 /// if a merged store was created.
649 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
650 EVT MemVT, unsigned NumStores,
651 bool IsConstantSrc, bool UseVector,
652 bool UseTrunc);
653
654 /// This is a helper function for MergeConsecutiveStores. Stores
655 /// that potentially may be merged with St are placed in
656 /// StoreNodes. RootNode is a chain predecessor to all store
657 /// candidates.
658 void getStoreMergeCandidates(StoreSDNode *St,
659 SmallVectorImpl<MemOpLink> &StoreNodes,
660 SDNode *&Root);
661
662 /// Helper function for MergeConsecutiveStores. Checks if
663 /// candidate stores have indirect dependency through their
664 /// operands. RootNode is the predecessor to all stores calculated
665 /// by getStoreMergeCandidates and is used to prune the dependency check.
666 /// \return True if safe to merge.
667 bool checkMergeStoreCandidatesForDependencies(
668 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
669 SDNode *RootNode);
670
671 /// Merge consecutive store operations into a wide store.
672 /// This optimization uses wide integers or vectors when possible.
673 /// \return number of stores that were merged into a merged store (the
674 /// affected nodes are stored as a prefix in \p StoreNodes).
675 bool MergeConsecutiveStores(StoreSDNode *St);
676
677 /// Try to transform a truncation where C is a constant:
678 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
679 ///
680 /// \p N needs to be a truncation and its first operand an AND. Other
681 /// requirements are checked by the function (e.g. that trunc is
682 /// single-use) and if missed an empty SDValue is returned.
683 SDValue distributeTruncateThroughAnd(SDNode *N);
684
685 /// Helper function to determine whether the target supports operation
686 /// given by \p Opcode for type \p VT, that is, whether the operation
687 /// is legal or custom before legalizing operations, and whether is
688 /// legal (but not custom) after legalization.
689 bool hasOperation(unsigned Opcode, EVT VT) {
690 if (LegalOperations)
691 return TLI.isOperationLegal(Opcode, VT);
692 return TLI.isOperationLegalOrCustom(Opcode, VT);
693 }
694
695 public:
696 /// Runs the dag combiner on all nodes in the work list
697 void Run(CombineLevel AtLevel);
698
699 SelectionDAG &getDAG() const { return DAG; }
700
701 /// Returns a type large enough to hold any valid shift amount - before type
702 /// legalization these can be huge.
703 EVT getShiftAmountTy(EVT LHSTy) {
704 assert(LHSTy.isInteger() && "Shift amount is not an integer type!")((LHSTy.isInteger() && "Shift amount is not an integer type!"
) ? static_cast<void> (0) : __assert_fail ("LHSTy.isInteger() && \"Shift amount is not an integer type!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 704, __PRETTY_FUNCTION__))
;
705 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
706 }
707
708 /// This method returns true if we are running before type legalization or
709 /// if the specified VT is legal.
710 bool isTypeLegal(const EVT &VT) {
711 if (!LegalTypes) return true;
712 return TLI.isTypeLegal(VT);
713 }
714
715 /// Convenience wrapper around TargetLowering::getSetCCResultType
716 EVT getSetCCResultType(EVT VT) const {
717 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
718 }
719
720 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
721 SDValue OrigLoad, SDValue ExtLoad,
722 ISD::NodeType ExtType);
723 };
724
725/// This class is a DAGUpdateListener that removes any deleted
726/// nodes from the worklist.
727class WorklistRemover : public SelectionDAG::DAGUpdateListener {
728 DAGCombiner &DC;
729
730public:
731 explicit WorklistRemover(DAGCombiner &dc)
732 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
733
734 void NodeDeleted(SDNode *N, SDNode *E) override {
735 DC.removeFromWorklist(N);
736 }
737};
738
739class WorklistInserter : public SelectionDAG::DAGUpdateListener {
740 DAGCombiner &DC;
741
742public:
743 explicit WorklistInserter(DAGCombiner &dc)
744 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
745
746 // FIXME: Ideally we could add N to the worklist, but this causes exponential
747 // compile time costs in large DAGs, e.g. Halide.
748 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
749};
750
751} // end anonymous namespace
752
753//===----------------------------------------------------------------------===//
754// TargetLowering::DAGCombinerInfo implementation
755//===----------------------------------------------------------------------===//
756
757void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
758 ((DAGCombiner*)DC)->AddToWorklist(N);
759}
760
761SDValue TargetLowering::DAGCombinerInfo::
762CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
763 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
764}
765
766SDValue TargetLowering::DAGCombinerInfo::
767CombineTo(SDNode *N, SDValue Res, bool AddTo) {
768 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
769}
770
771SDValue TargetLowering::DAGCombinerInfo::
772CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
773 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
774}
775
776bool TargetLowering::DAGCombinerInfo::
777recursivelyDeleteUnusedNodes(SDNode *N) {
778 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
779}
780
781void TargetLowering::DAGCombinerInfo::
782CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
783 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
784}
785
786//===----------------------------------------------------------------------===//
787// Helper Functions
788//===----------------------------------------------------------------------===//
789
790void DAGCombiner::deleteAndRecombine(SDNode *N) {
791 removeFromWorklist(N);
792
793 // If the operands of this node are only used by the node, they will now be
794 // dead. Make sure to re-visit them and recursively delete dead nodes.
795 for (const SDValue &Op : N->ops())
796 // For an operand generating multiple values, one of the values may
797 // become dead allowing further simplification (e.g. split index
798 // arithmetic from an indexed load).
799 if (Op->hasOneUse() || Op->getNumValues() > 1)
800 AddToWorklist(Op.getNode());
801
802 DAG.DeleteNode(N);
803}
804
805// APInts must be the same size for most operations, this helper
806// function zero extends the shorter of the pair so that they match.
807// We provide an Offset so that we can create bitwidths that won't overflow.
808static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
809 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
810 LHS = LHS.zextOrSelf(Bits);
811 RHS = RHS.zextOrSelf(Bits);
812}
813
814// Return true if this node is a setcc, or is a select_cc
815// that selects between the target values used for true and false, making it
816// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
817// the appropriate nodes based on the type of node we are checking. This
818// simplifies life a bit for the callers.
819bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
820 SDValue &CC, bool MatchStrict) const {
821 if (N.getOpcode() == ISD::SETCC) {
822 LHS = N.getOperand(0);
823 RHS = N.getOperand(1);
824 CC = N.getOperand(2);
825 return true;
826 }
827
828 if (MatchStrict &&
829 (N.getOpcode() == ISD::STRICT_FSETCC ||
830 N.getOpcode() == ISD::STRICT_FSETCCS)) {
831 LHS = N.getOperand(1);
832 RHS = N.getOperand(2);
833 CC = N.getOperand(3);
834 return true;
835 }
836
837 if (N.getOpcode() != ISD::SELECT_CC ||
838 !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
839 !TLI.isConstFalseVal(N.getOperand(3).getNode()))
840 return false;
841
842 if (TLI.getBooleanContents(N.getValueType()) ==
843 TargetLowering::UndefinedBooleanContent)
844 return false;
845
846 LHS = N.getOperand(0);
847 RHS = N.getOperand(1);
848 CC = N.getOperand(4);
849 return true;
850}
851
852/// Return true if this is a SetCC-equivalent operation with only one use.
853/// If this is true, it allows the users to invert the operation for free when
854/// it is profitable to do so.
855bool DAGCombiner::isOneUseSetCC(SDValue N) const {
856 SDValue N0, N1, N2;
857 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
858 return true;
859 return false;
860}
861
862// Returns the SDNode if it is a constant float BuildVector
863// or constant float.
864static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
865 if (isa<ConstantFPSDNode>(N))
866 return N.getNode();
867 if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
868 return N.getNode();
869 return nullptr;
870}
871
872// Determines if it is a constant integer or a build vector of constant
873// integers (and undefs).
874// Do not permit build vector implicit truncation.
875static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
876 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
877 return !(Const->isOpaque() && NoOpaques);
878 if (N.getOpcode() != ISD::BUILD_VECTOR)
879 return false;
880 unsigned BitWidth = N.getScalarValueSizeInBits();
881 for (const SDValue &Op : N->op_values()) {
882 if (Op.isUndef())
883 continue;
884 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
885 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
886 (Const->isOpaque() && NoOpaques))
887 return false;
888 }
889 return true;
890}
891
892// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
893// undef's.
894static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
895 if (V.getOpcode() != ISD::BUILD_VECTOR)
896 return false;
897 return isConstantOrConstantVector(V, NoOpaques) ||
898 ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
899}
900
901bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
902 const SDLoc &DL,
903 SDValue N0,
904 SDValue N1) {
905 // Currently this only tries to ensure we don't undo the GEP splits done by
906 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
907 // we check if the following transformation would be problematic:
908 // (load/store (add, (add, x, offset1), offset2)) ->
909 // (load/store (add, x, offset1+offset2)).
910
911 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
912 return false;
913
914 if (N0.hasOneUse())
915 return false;
916
917 auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
918 auto *C2 = dyn_cast<ConstantSDNode>(N1);
919 if (!C1 || !C2)
920 return false;
921
922 const APInt &C1APIntVal = C1->getAPIntValue();
923 const APInt &C2APIntVal = C2->getAPIntValue();
924 if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
925 return false;
926
927 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
928 if (CombinedValueIntVal.getBitWidth() > 64)
929 return false;
930 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
931
932 for (SDNode *Node : N0->uses()) {
933 auto LoadStore = dyn_cast<MemSDNode>(Node);
934 if (LoadStore) {
935 // Is x[offset2] already not a legal addressing mode? If so then
936 // reassociating the constants breaks nothing (we test offset2 because
937 // that's the one we hope to fold into the load or store).
938 TargetLoweringBase::AddrMode AM;
939 AM.HasBaseReg = true;
940 AM.BaseOffs = C2APIntVal.getSExtValue();
941 EVT VT = LoadStore->getMemoryVT();
942 unsigned AS = LoadStore->getAddressSpace();
943 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
944 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
945 continue;
946
947 // Would x[offset1+offset2] still be a legal addressing mode?
948 AM.BaseOffs = CombinedValue;
949 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
950 return true;
951 }
952 }
953
954 return false;
955}
956
957// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
958// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
959SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
960 SDValue N0, SDValue N1) {
961 EVT VT = N0.getValueType();
962
963 if (N0.getOpcode() != Opc)
964 return SDValue();
965
966 // Don't reassociate reductions.
967 if (N0->getFlags().hasVectorReduction())
968 return SDValue();
969
970 if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
971 if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
972 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
973 if (SDValue OpNode =
974 DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
975 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
976 return SDValue();
977 }
978 if (N0.hasOneUse()) {
979 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
980 // iff (op x, c1) has one use
981 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
982 if (!OpNode.getNode())
983 return SDValue();
984 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
985 }
986 }
987 return SDValue();
988}
989
990// Try to reassociate commutative binops.
991SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
992 SDValue N1, SDNodeFlags Flags) {
993 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.")((TLI.isCommutativeBinOp(Opc) && "Operation not commutative."
) ? static_cast<void> (0) : __assert_fail ("TLI.isCommutativeBinOp(Opc) && \"Operation not commutative.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 993, __PRETTY_FUNCTION__))
;
994 // Don't reassociate reductions.
995 if (Flags.hasVectorReduction())
996 return SDValue();
997
998 // Floating-point reassociation is not allowed without loose FP math.
999 if (N0.getValueType().isFloatingPoint() ||
1000 N1.getValueType().isFloatingPoint())
1001 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1002 return SDValue();
1003
1004 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1005 return Combined;
1006 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1007 return Combined;
1008 return SDValue();
1009}
1010
1011SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1012 bool AddTo) {
1013 assert(N->getNumValues() == NumTo && "Broken CombineTo call!")((N->getNumValues() == NumTo && "Broken CombineTo call!"
) ? static_cast<void> (0) : __assert_fail ("N->getNumValues() == NumTo && \"Broken CombineTo call!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1013, __PRETTY_FUNCTION__))
;
1014 ++NodesCombined;
1015 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo - 1 <<
" other values\n"; } } while (false)
1016 To[0].getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo - 1 <<
" other values\n"; } } while (false)
1017 dbgs() << " and " << NumTo - 1 << " other values\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.1 "; N->dump
(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump
(&DAG); dbgs() << " and " << NumTo - 1 <<
" other values\n"; } } while (false)
;
1018 for (unsigned i = 0, e = NumTo; i != e; ++i)
1019 assert((!To[i].getNode() ||(((!To[i].getNode() || N->getValueType(i) == To[i].getValueType
()) && "Cannot combine value to value of different type!"
) ? static_cast<void> (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1021, __PRETTY_FUNCTION__))
1020 N->getValueType(i) == To[i].getValueType()) &&(((!To[i].getNode() || N->getValueType(i) == To[i].getValueType
()) && "Cannot combine value to value of different type!"
) ? static_cast<void> (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1021, __PRETTY_FUNCTION__))
1021 "Cannot combine value to value of different type!")(((!To[i].getNode() || N->getValueType(i) == To[i].getValueType
()) && "Cannot combine value to value of different type!"
) ? static_cast<void> (0) : __assert_fail ("(!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && \"Cannot combine value to value of different type!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1021, __PRETTY_FUNCTION__))
;
1022
1023 WorklistRemover DeadNodes(*this);
1024 DAG.ReplaceAllUsesWith(N, To);
1025 if (AddTo) {
1026 // Push the new nodes and any users onto the worklist
1027 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1028 if (To[i].getNode()) {
1029 AddToWorklist(To[i].getNode());
1030 AddUsersToWorklist(To[i].getNode());
1031 }
1032 }
1033 }
1034
1035 // Finally, if the node is now dead, remove it from the graph. The node
1036 // may not be dead if the replacement process recursively simplified to
1037 // something else needing this node.
1038 if (N->use_empty())
1039 deleteAndRecombine(N);
1040 return SDValue(N, 0);
1041}
1042
1043void DAGCombiner::
1044CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1045 // Replace all uses. If any nodes become isomorphic to other nodes and
1046 // are deleted, make sure to remove them from our worklist.
1047 WorklistRemover DeadNodes(*this);
1048 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1049
1050 // Push the new node and any (possibly new) users onto the worklist.
1051 AddToWorklistWithUsers(TLO.New.getNode());
1052
1053 // Finally, if the node is now dead, remove it from the graph. The node
1054 // may not be dead if the replacement process recursively simplified to
1055 // something else needing this node.
1056 if (TLO.Old.getNode()->use_empty())
1057 deleteAndRecombine(TLO.Old.getNode());
1058}
1059
1060/// Check the specified integer node value to see if it can be simplified or if
1061/// things it uses can be simplified by bit propagation. If so, return true.
1062bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1063 const APInt &DemandedElts,
1064 bool AssumeSingleUse) {
1065 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1066 KnownBits Known;
1067 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1068 AssumeSingleUse))
1069 return false;
1070
1071 // Revisit the node.
1072 AddToWorklist(Op.getNode());
1073
1074 // Replace the old value with the new one.
1075 ++NodesCombined;
1076 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
1077 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
1078 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
;
1079
1080 CommitTargetLoweringOpt(TLO);
1081 return true;
1082}
1083
1084/// Check the specified vector node value to see if it can be simplified or
1085/// if things it uses can be simplified as it only uses some of the elements.
1086/// If so, return true.
1087bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1088 const APInt &DemandedElts,
1089 bool AssumeSingleUse) {
1090 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1091 APInt KnownUndef, KnownZero;
1092 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1093 TLO, 0, AssumeSingleUse))
1094 return false;
1095
1096 // Revisit the node.
1097 AddToWorklist(Op.getNode());
1098
1099 // Replace the old value with the new one.
1100 ++NodesCombined;
1101 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
1102 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
1103 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.2 "; TLO.Old.getNode
()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode
()->dump(&DAG); dbgs() << '\n'; } } while (false
)
;
1104
1105 CommitTargetLoweringOpt(TLO);
1106 return true;
1107}
1108
1109void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1110 SDLoc DL(Load);
1111 EVT VT = Load->getValueType(0);
1112 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1113
1114 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
1115 Trunc.getNode()->dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.9 "; Load->
dump(&DAG); dbgs() << "\nWith: "; Trunc.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
;
1116 WorklistRemover DeadNodes(*this);
1117 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1118 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1119 deleteAndRecombine(Load);
1120 AddToWorklist(Trunc.getNode());
1121}
1122
1123SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1124 Replace = false;
1125 SDLoc DL(Op);
1126 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1127 LoadSDNode *LD = cast<LoadSDNode>(Op);
1128 EVT MemVT = LD->getMemoryVT();
1129 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1130 : LD->getExtensionType();
1131 Replace = true;
1132 return DAG.getExtLoad(ExtType, DL, PVT,
1133 LD->getChain(), LD->getBasePtr(),
1134 MemVT, LD->getMemOperand());
1135 }
1136
1137 unsigned Opc = Op.getOpcode();
1138 switch (Opc) {
1139 default: break;
1140 case ISD::AssertSext:
1141 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1142 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1143 break;
1144 case ISD::AssertZext:
1145 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1146 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1147 break;
1148 case ISD::Constant: {
1149 unsigned ExtOpc =
1150 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1151 return DAG.getNode(ExtOpc, DL, PVT, Op);
1152 }
1153 }
1154
1155 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1156 return SDValue();
1157 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1158}
1159
1160SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1161 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1162 return SDValue();
1163 EVT OldVT = Op.getValueType();
1164 SDLoc DL(Op);
1165 bool Replace = false;
1166 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1167 if (!NewOp.getNode())
1168 return SDValue();
1169 AddToWorklist(NewOp.getNode());
1170
1171 if (Replace)
1172 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1173 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1174 DAG.getValueType(OldVT));
1175}
1176
1177SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1178 EVT OldVT = Op.getValueType();
1179 SDLoc DL(Op);
1180 bool Replace = false;
1181 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1182 if (!NewOp.getNode())
1183 return SDValue();
1184 AddToWorklist(NewOp.getNode());
1185
1186 if (Replace)
1187 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1188 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1189}
1190
1191/// Promote the specified integer binary operation if the target indicates it is
1192/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1193/// i32 since i16 instructions are longer.
1194SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1195 if (!LegalOperations)
1196 return SDValue();
1197
1198 EVT VT = Op.getValueType();
1199 if (VT.isVector() || !VT.isInteger())
1200 return SDValue();
1201
1202 // If operation type is 'undesirable', e.g. i16 on x86, consider
1203 // promoting it.
1204 unsigned Opc = Op.getOpcode();
1205 if (TLI.isTypeDesirableForOp(Opc, VT))
1206 return SDValue();
1207
1208 EVT PVT = VT;
1209 // Consult target whether it is a good idea to promote this operation and
1210 // what's the right type to promote it to.
1211 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1212 assert(PVT != VT && "Don't know what type to promote to!")((PVT != VT && "Don't know what type to promote to!")
? static_cast<void> (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1212, __PRETTY_FUNCTION__))
;
1213
1214 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode(
)->dump(&DAG); } } while (false)
;
1215
1216 bool Replace0 = false;
1217 SDValue N0 = Op.getOperand(0);
1218 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1219
1220 bool Replace1 = false;
1221 SDValue N1 = Op.getOperand(1);
1222 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1223 SDLoc DL(Op);
1224
1225 SDValue RV =
1226 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1227
1228 // We are always replacing N0/N1's use in N and only need
1229 // additional replacements if there are additional uses.
1230 Replace0 &= !N0->hasOneUse();
1231 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1232
1233 // Combine Op here so it is preserved past replacements.
1234 CombineTo(Op.getNode(), RV);
1235
1236 // If operands have a use ordering, make sure we deal with
1237 // predecessor first.
1238 if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1239 std::swap(N0, N1);
1240 std::swap(NN0, NN1);
1241 }
1242
1243 if (Replace0) {
1244 AddToWorklist(NN0.getNode());
1245 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1246 }
1247 if (Replace1) {
1248 AddToWorklist(NN1.getNode());
1249 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1250 }
1251 return Op;
1252 }
1253 return SDValue();
1254}
1255
1256/// Promote the specified integer shift operation if the target indicates it is
1257/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1258/// i32 since i16 instructions are longer.
1259SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1260 if (!LegalOperations)
1261 return SDValue();
1262
1263 EVT VT = Op.getValueType();
1264 if (VT.isVector() || !VT.isInteger())
1265 return SDValue();
1266
1267 // If operation type is 'undesirable', e.g. i16 on x86, consider
1268 // promoting it.
1269 unsigned Opc = Op.getOpcode();
1270 if (TLI.isTypeDesirableForOp(Opc, VT))
1271 return SDValue();
1272
1273 EVT PVT = VT;
1274 // Consult target whether it is a good idea to promote this operation and
1275 // what's the right type to promote it to.
1276 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1277 assert(PVT != VT && "Don't know what type to promote to!")((PVT != VT && "Don't know what type to promote to!")
? static_cast<void> (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1277, __PRETTY_FUNCTION__))
;
1278
1279 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode(
)->dump(&DAG); } } while (false)
;
1280
1281 bool Replace = false;
1282 SDValue N0 = Op.getOperand(0);
1283 SDValue N1 = Op.getOperand(1);
1284 if (Opc == ISD::SRA)
1285 N0 = SExtPromoteOperand(N0, PVT);
1286 else if (Opc == ISD::SRL)
1287 N0 = ZExtPromoteOperand(N0, PVT);
1288 else
1289 N0 = PromoteOperand(N0, PVT, Replace);
1290
1291 if (!N0.getNode())
1292 return SDValue();
1293
1294 SDLoc DL(Op);
1295 SDValue RV =
1296 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1297
1298 if (Replace)
1299 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1300
1301 // Deal with Op being deleted.
1302 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1303 return RV;
1304 }
1305 return SDValue();
1306}
1307
1308SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1309 if (!LegalOperations)
1310 return SDValue();
1311
1312 EVT VT = Op.getValueType();
1313 if (VT.isVector() || !VT.isInteger())
1314 return SDValue();
1315
1316 // If operation type is 'undesirable', e.g. i16 on x86, consider
1317 // promoting it.
1318 unsigned Opc = Op.getOpcode();
1319 if (TLI.isTypeDesirableForOp(Opc, VT))
1320 return SDValue();
1321
1322 EVT PVT = VT;
1323 // Consult target whether it is a good idea to promote this operation and
1324 // what's the right type to promote it to.
1325 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1326 assert(PVT != VT && "Don't know what type to promote to!")((PVT != VT && "Don't know what type to promote to!")
? static_cast<void> (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1326, __PRETTY_FUNCTION__))
;
1327 // fold (aext (aext x)) -> (aext x)
1328 // fold (aext (zext x)) -> (zext x)
1329 // fold (aext (sext x)) -> (sext x)
1330 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; Op.getNode(
)->dump(&DAG); } } while (false)
;
1331 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1332 }
1333 return SDValue();
1334}
1335
1336bool DAGCombiner::PromoteLoad(SDValue Op) {
1337 if (!LegalOperations)
1338 return false;
1339
1340 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1341 return false;
1342
1343 EVT VT = Op.getValueType();
1344 if (VT.isVector() || !VT.isInteger())
1345 return false;
1346
1347 // If operation type is 'undesirable', e.g. i16 on x86, consider
1348 // promoting it.
1349 unsigned Opc = Op.getOpcode();
1350 if (TLI.isTypeDesirableForOp(Opc, VT))
1351 return false;
1352
1353 EVT PVT = VT;
1354 // Consult target whether it is a good idea to promote this operation and
1355 // what's the right type to promote it to.
1356 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1357 assert(PVT != VT && "Don't know what type to promote to!")((PVT != VT && "Don't know what type to promote to!")
? static_cast<void> (0) : __assert_fail ("PVT != VT && \"Don't know what type to promote to!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1357, __PRETTY_FUNCTION__))
;
1358
1359 SDLoc DL(Op);
1360 SDNode *N = Op.getNode();
1361 LoadSDNode *LD = cast<LoadSDNode>(N);
1362 EVT MemVT = LD->getMemoryVT();
1363 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1364 : LD->getExtensionType();
1365 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1366 LD->getChain(), LD->getBasePtr(),
1367 MemVT, LD->getMemOperand());
1368 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1369
1370 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.getNode()->dump
(&DAG); dbgs() << '\n'; } } while (false)
1371 Result.getNode()->dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nPromoting "; N->dump(
&DAG); dbgs() << "\nTo: "; Result.getNode()->dump
(&DAG); dbgs() << '\n'; } } while (false)
;
1372 WorklistRemover DeadNodes(*this);
1373 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1374 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1375 deleteAndRecombine(N);
1376 AddToWorklist(Result.getNode());
1377 return true;
1378 }
1379 return false;
1380}
1381
1382/// Recursively delete a node which has no uses and any operands for
1383/// which it is the only use.
1384///
1385/// Note that this both deletes the nodes and removes them from the worklist.
1386/// It also adds any nodes who have had a user deleted to the worklist as they
1387/// may now have only one use and subject to other combines.
1388bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1389 if (!N->use_empty())
1390 return false;
1391
1392 SmallSetVector<SDNode *, 16> Nodes;
1393 Nodes.insert(N);
1394 do {
1395 N = Nodes.pop_back_val();
1396 if (!N)
1397 continue;
1398
1399 if (N->use_empty()) {
1400 for (const SDValue &ChildN : N->op_values())
1401 Nodes.insert(ChildN.getNode());
1402
1403 removeFromWorklist(N);
1404 DAG.DeleteNode(N);
1405 } else {
1406 AddToWorklist(N);
1407 }
1408 } while (!Nodes.empty());
1409 return true;
1410}
1411
1412//===----------------------------------------------------------------------===//
1413// Main DAG Combiner implementation
1414//===----------------------------------------------------------------------===//
1415
1416void DAGCombiner::Run(CombineLevel AtLevel) {
1417 // set the instance variables, so that the various visit routines may use it.
1418 Level = AtLevel;
1419 LegalDAG = Level >= AfterLegalizeDAG;
1420 LegalOperations = Level >= AfterLegalizeVectorOps;
1421 LegalTypes = Level >= AfterLegalizeTypes;
1422
1423 WorklistInserter AddNodes(*this);
1424
1425 // Add all the dag nodes to the worklist.
1426 for (SDNode &Node : DAG.allnodes())
1427 AddToWorklist(&Node);
1428
1429 // Create a dummy node (which is not added to allnodes), that adds a reference
1430 // to the root node, preventing it from being deleted, and tracking any
1431 // changes of the root.
1432 HandleSDNode Dummy(DAG.getRoot());
1433
1434 // While we have a valid worklist entry node, try to combine it.
1435 while (SDNode *N = getNextWorklistEntry()) {
1436 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1437 // N is deleted from the DAG, since they too may now be dead or may have a
1438 // reduced number of uses, allowing other xforms.
1439 if (recursivelyDeleteUnusedNodes(N))
1440 continue;
1441
1442 WorklistRemover DeadNodes(*this);
1443
1444 // If this combine is running after legalizing the DAG, re-legalize any
1445 // nodes pulled off the worklist.
1446 if (LegalDAG) {
1447 SmallSetVector<SDNode *, 16> UpdatedNodes;
1448 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1449
1450 for (SDNode *LN : UpdatedNodes)
1451 AddToWorklistWithUsers(LN);
1452
1453 if (!NIsValid)
1454 continue;
1455 }
1456
1457 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nCombining: "; N->dump
(&DAG); } } while (false)
;
1458
1459 // Add any operands of the new node which have not yet been combined to the
1460 // worklist as well. Because the worklist uniques things already, this
1461 // won't repeatedly process the same operand.
1462 CombinedNodes.insert(N);
1463 for (const SDValue &ChildN : N->op_values())
1464 if (!CombinedNodes.count(ChildN.getNode()))
1465 AddToWorklist(ChildN.getNode());
1466
1467 SDValue RV = combine(N);
1468
1469 if (!RV.getNode())
1470 continue;
1471
1472 ++NodesCombined;
1473
1474 // If we get back the same node we passed in, rather than a new node or
1475 // zero, we know that the node must have defined multiple values and
1476 // CombineTo was used. Since CombineTo takes care of the worklist
1477 // mechanics for us, we have no work to do in this case.
1478 if (RV.getNode() == N)
1479 continue;
1480
1481 assert(N->getOpcode() != ISD::DELETED_NODE &&((N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode
() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1483, __PRETTY_FUNCTION__))
1482 RV.getOpcode() != ISD::DELETED_NODE &&((N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode
() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1483, __PRETTY_FUNCTION__))
1483 "Node was deleted but visit returned new node!")((N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode
() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned new node!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1483, __PRETTY_FUNCTION__))
;
1484
1485 LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << " ... into: "; RV.getNode()
->dump(&DAG); } } while (false)
;
1486
1487 if (N->getNumValues() == RV.getNode()->getNumValues())
1488 DAG.ReplaceAllUsesWith(N, RV.getNode());
1489 else {
1490 assert(N->getValueType(0) == RV.getValueType() &&((N->getValueType(0) == RV.getValueType() && N->
getNumValues() == 1 && "Type mismatch") ? static_cast
<void> (0) : __assert_fail ("N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && \"Type mismatch\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1491, __PRETTY_FUNCTION__))
1491 N->getNumValues() == 1 && "Type mismatch")((N->getValueType(0) == RV.getValueType() && N->
getNumValues() == 1 && "Type mismatch") ? static_cast
<void> (0) : __assert_fail ("N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && \"Type mismatch\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1491, __PRETTY_FUNCTION__))
;
1492 DAG.ReplaceAllUsesWith(N, &RV);
1493 }
1494
1495 // Push the new node and any users onto the worklist
1496 AddToWorklist(RV.getNode());
1497 AddUsersToWorklist(RV.getNode());
1498
1499 // Finally, if the node is now dead, remove it from the graph. The node
1500 // may not be dead if the replacement process recursively simplified to
1501 // something else needing this node. This will also take care of adding any
1502 // operands which have lost a user to the worklist.
1503 recursivelyDeleteUnusedNodes(N);
1504 }
1505
1506 // If the root changed (e.g. it was a dead load, update the root).
1507 DAG.setRoot(Dummy.getValue());
1508 DAG.RemoveDeadNodes();
1509}
1510
1511SDValue DAGCombiner::visit(SDNode *N) {
1512 switch (N->getOpcode()) {
1513 default: break;
1514 case ISD::TokenFactor: return visitTokenFactor(N);
1515 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1516 case ISD::ADD: return visitADD(N);
1517 case ISD::SUB: return visitSUB(N);
1518 case ISD::SADDSAT:
1519 case ISD::UADDSAT: return visitADDSAT(N);
1520 case ISD::SSUBSAT:
1521 case ISD::USUBSAT: return visitSUBSAT(N);
1522 case ISD::ADDC: return visitADDC(N);
1523 case ISD::SADDO:
1524 case ISD::UADDO: return visitADDO(N);
1525 case ISD::SUBC: return visitSUBC(N);
1526 case ISD::SSUBO:
1527 case ISD::USUBO: return visitSUBO(N);
1528 case ISD::ADDE: return visitADDE(N);
1529 case ISD::ADDCARRY: return visitADDCARRY(N);
1530 case ISD::SUBE: return visitSUBE(N);
1531 case ISD::SUBCARRY: return visitSUBCARRY(N);
1532 case ISD::SMULFIX:
1533 case ISD::SMULFIXSAT:
1534 case ISD::UMULFIX:
1535 case ISD::UMULFIXSAT: return visitMULFIX(N);
1536 case ISD::MUL: return visitMUL(N);
1537 case ISD::SDIV: return visitSDIV(N);
1538 case ISD::UDIV: return visitUDIV(N);
1539 case ISD::SREM:
1540 case ISD::UREM: return visitREM(N);
1541 case ISD::MULHU: return visitMULHU(N);
1542 case ISD::MULHS: return visitMULHS(N);
1543 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1544 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1545 case ISD::SMULO:
1546 case ISD::UMULO: return visitMULO(N);
1547 case ISD::SMIN:
1548 case ISD::SMAX:
1549 case ISD::UMIN:
1550 case ISD::UMAX: return visitIMINMAX(N);
1551 case ISD::AND: return visitAND(N);
1552 case ISD::OR: return visitOR(N);
1553 case ISD::XOR: return visitXOR(N);
1554 case ISD::SHL: return visitSHL(N);
1555 case ISD::SRA: return visitSRA(N);
1556 case ISD::SRL: return visitSRL(N);
1557 case ISD::ROTR:
1558 case ISD::ROTL: return visitRotate(N);
1559 case ISD::FSHL:
1560 case ISD::FSHR: return visitFunnelShift(N);
1561 case ISD::ABS: return visitABS(N);
1562 case ISD::BSWAP: return visitBSWAP(N);
1563 case ISD::BITREVERSE: return visitBITREVERSE(N);
1564 case ISD::CTLZ: return visitCTLZ(N);
1565 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1566 case ISD::CTTZ: return visitCTTZ(N);
1567 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1568 case ISD::CTPOP: return visitCTPOP(N);
1569 case ISD::SELECT: return visitSELECT(N);
1570 case ISD::VSELECT: return visitVSELECT(N);
1571 case ISD::SELECT_CC: return visitSELECT_CC(N);
1572 case ISD::SETCC: return visitSETCC(N);
1573 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1574 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1575 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1576 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1577 case ISD::AssertSext:
1578 case ISD::AssertZext: return visitAssertExt(N);
1579 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1580 case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1581 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1582 case ISD::TRUNCATE: return visitTRUNCATE(N);
1583 case ISD::BITCAST: return visitBITCAST(N);
1584 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1585 case ISD::FADD: return visitFADD(N);
1586 case ISD::FSUB: return visitFSUB(N);
1587 case ISD::FMUL: return visitFMUL(N);
1588 case ISD::FMA: return visitFMA(N);
1589 case ISD::FDIV: return visitFDIV(N);
1590 case ISD::FREM: return visitFREM(N);
1591 case ISD::FSQRT: return visitFSQRT(N);
1592 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1593 case ISD::FPOW: return visitFPOW(N);
1594 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1595 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1596 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1597 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1598 case ISD::FP_ROUND: return visitFP_ROUND(N);
1599 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1600 case ISD::FNEG: return visitFNEG(N);
1601 case ISD::FABS: return visitFABS(N);
1602 case ISD::FFLOOR: return visitFFLOOR(N);
1603 case ISD::FMINNUM: return visitFMINNUM(N);
1604 case ISD::FMAXNUM: return visitFMAXNUM(N);
1605 case ISD::FMINIMUM: return visitFMINIMUM(N);
1606 case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1607 case ISD::FCEIL: return visitFCEIL(N);
1608 case ISD::FTRUNC: return visitFTRUNC(N);
1609 case ISD::BRCOND: return visitBRCOND(N);
1610 case ISD::BR_CC: return visitBR_CC(N);
1611 case ISD::LOAD: return visitLOAD(N);
1612 case ISD::STORE: return visitSTORE(N);
1613 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1614 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1615 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1616 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1617 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1618 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1619 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1620 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1621 case ISD::MGATHER: return visitMGATHER(N);
1622 case ISD::MLOAD: return visitMLOAD(N);
1623 case ISD::MSCATTER: return visitMSCATTER(N);
1624 case ISD::MSTORE: return visitMSTORE(N);
1625 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1626 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1627 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1628 case ISD::VECREDUCE_FADD:
1629 case ISD::VECREDUCE_FMUL:
1630 case ISD::VECREDUCE_ADD:
1631 case ISD::VECREDUCE_MUL:
1632 case ISD::VECREDUCE_AND:
1633 case ISD::VECREDUCE_OR:
1634 case ISD::VECREDUCE_XOR:
1635 case ISD::VECREDUCE_SMAX:
1636 case ISD::VECREDUCE_SMIN:
1637 case ISD::VECREDUCE_UMAX:
1638 case ISD::VECREDUCE_UMIN:
1639 case ISD::VECREDUCE_FMAX:
1640 case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1641 }
1642 return SDValue();
1643}
1644
1645SDValue DAGCombiner::combine(SDNode *N) {
1646 SDValue RV = visit(N);
1647
1648 // If nothing happened, try a target-specific DAG combine.
1649 if (!RV.getNode()) {
1650 assert(N->getOpcode() != ISD::DELETED_NODE &&((N->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned NULL!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned NULL!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1651, __PRETTY_FUNCTION__))
1651 "Node was deleted but visit returned NULL!")((N->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned NULL!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() != ISD::DELETED_NODE && \"Node was deleted but visit returned NULL!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1651, __PRETTY_FUNCTION__))
;
1652
1653 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1654 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1655
1656 // Expose the DAG combiner to the target combiner impls.
1657 TargetLowering::DAGCombinerInfo
1658 DagCombineInfo(DAG, Level, false, this);
1659
1660 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1661 }
1662 }
1663
1664 // If nothing happened still, try promoting the operation.
1665 if (!RV.getNode()) {
1666 switch (N->getOpcode()) {
1667 default: break;
1668 case ISD::ADD:
1669 case ISD::SUB:
1670 case ISD::MUL:
1671 case ISD::AND:
1672 case ISD::OR:
1673 case ISD::XOR:
1674 RV = PromoteIntBinOp(SDValue(N, 0));
1675 break;
1676 case ISD::SHL:
1677 case ISD::SRA:
1678 case ISD::SRL:
1679 RV = PromoteIntShiftOp(SDValue(N, 0));
1680 break;
1681 case ISD::SIGN_EXTEND:
1682 case ISD::ZERO_EXTEND:
1683 case ISD::ANY_EXTEND:
1684 RV = PromoteExtend(SDValue(N, 0));
1685 break;
1686 case ISD::LOAD:
1687 if (PromoteLoad(SDValue(N, 0)))
1688 RV = SDValue(N, 0);
1689 break;
1690 }
1691 }
1692
1693 // If N is a commutative binary node, try to eliminate it if the commuted
1694 // version is already present in the DAG.
1695 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1696 N->getNumValues() == 1) {
1697 SDValue N0 = N->getOperand(0);
1698 SDValue N1 = N->getOperand(1);
1699
1700 // Constant operands are canonicalized to RHS.
1701 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1702 SDValue Ops[] = {N1, N0};
1703 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1704 N->getFlags());
1705 if (CSENode)
1706 return SDValue(CSENode, 0);
1707 }
1708 }
1709
1710 return RV;
1711}
1712
1713/// Given a node, return its input chain if it has one, otherwise return a null
1714/// sd operand.
1715static SDValue getInputChainForNode(SDNode *N) {
1716 if (unsigned NumOps = N->getNumOperands()) {
1717 if (N->getOperand(0).getValueType() == MVT::Other)
1718 return N->getOperand(0);
1719 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1720 return N->getOperand(NumOps-1);
1721 for (unsigned i = 1; i < NumOps-1; ++i)
1722 if (N->getOperand(i).getValueType() == MVT::Other)
1723 return N->getOperand(i);
1724 }
1725 return SDValue();
1726}
1727
1728SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1729 // If N has two operands, where one has an input chain equal to the other,
1730 // the 'other' chain is redundant.
1731 if (N->getNumOperands() == 2) {
1732 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1733 return N->getOperand(0);
1734 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1735 return N->getOperand(1);
1736 }
1737
1738 // Don't simplify token factors if optnone.
1739 if (OptLevel == CodeGenOpt::None)
1740 return SDValue();
1741
1742 // If the sole user is a token factor, we should make sure we have a
1743 // chance to merge them together. This prevents TF chains from inhibiting
1744 // optimizations.
1745 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1746 AddToWorklist(*(N->use_begin()));
1747
1748 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1749 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1750 SmallPtrSet<SDNode*, 16> SeenOps;
1751 bool Changed = false; // If we should replace this token factor.
1752
1753 // Start out with this token factor.
1754 TFs.push_back(N);
1755
1756 // Iterate through token factors. The TFs grows when new token factors are
1757 // encountered.
1758 for (unsigned i = 0; i < TFs.size(); ++i) {
1759 // Limit number of nodes to inline, to avoid quadratic compile times.
1760 // We have to add the outstanding Token Factors to Ops, otherwise we might
1761 // drop Ops from the resulting Token Factors.
1762 if (Ops.size() > TokenFactorInlineLimit) {
1763 for (unsigned j = i; j < TFs.size(); j++)
1764 Ops.emplace_back(TFs[j], 0);
1765 // Drop unprocessed Token Factors from TFs, so we do not add them to the
1766 // combiner worklist later.
1767 TFs.resize(i);
1768 break;
1769 }
1770
1771 SDNode *TF = TFs[i];
1772 // Check each of the operands.
1773 for (const SDValue &Op : TF->op_values()) {
1774 switch (Op.getOpcode()) {
1775 case ISD::EntryToken:
1776 // Entry tokens don't need to be added to the list. They are
1777 // redundant.
1778 Changed = true;
1779 break;
1780
1781 case ISD::TokenFactor:
1782 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1783 // Queue up for processing.
1784 TFs.push_back(Op.getNode());
1785 Changed = true;
1786 break;
1787 }
1788 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1789
1790 default:
1791 // Only add if it isn't already in the list.
1792 if (SeenOps.insert(Op.getNode()).second)
1793 Ops.push_back(Op);
1794 else
1795 Changed = true;
1796 break;
1797 }
1798 }
1799 }
1800
1801 // Re-visit inlined Token Factors, to clean them up in case they have been
1802 // removed. Skip the first Token Factor, as this is the current node.
1803 for (unsigned i = 1, e = TFs.size(); i < e; i++)
1804 AddToWorklist(TFs[i]);
1805
1806 // Remove Nodes that are chained to another node in the list. Do so
1807 // by walking up chains breath-first stopping when we've seen
1808 // another operand. In general we must climb to the EntryNode, but we can exit
1809 // early if we find all remaining work is associated with just one operand as
1810 // no further pruning is possible.
1811
1812 // List of nodes to search through and original Ops from which they originate.
1813 SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1814 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1815 SmallPtrSet<SDNode *, 16> SeenChains;
1816 bool DidPruneOps = false;
1817
1818 unsigned NumLeftToConsider = 0;
1819 for (const SDValue &Op : Ops) {
1820 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1821 OpWorkCount.push_back(1);
1822 }
1823
1824 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1825 // If this is an Op, we can remove the op from the list. Remark any
1826 // search associated with it as from the current OpNumber.
1827 if (SeenOps.count(Op) != 0) {
1828 Changed = true;
1829 DidPruneOps = true;
1830 unsigned OrigOpNumber = 0;
1831 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1832 OrigOpNumber++;
1833 assert((OrigOpNumber != Ops.size()) &&(((OrigOpNumber != Ops.size()) && "expected to find TokenFactor Operand"
) ? static_cast<void> (0) : __assert_fail ("(OrigOpNumber != Ops.size()) && \"expected to find TokenFactor Operand\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1834, __PRETTY_FUNCTION__))
1834 "expected to find TokenFactor Operand")(((OrigOpNumber != Ops.size()) && "expected to find TokenFactor Operand"
) ? static_cast<void> (0) : __assert_fail ("(OrigOpNumber != Ops.size()) && \"expected to find TokenFactor Operand\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1834, __PRETTY_FUNCTION__))
;
1835 // Re-mark worklist from OrigOpNumber to OpNumber
1836 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1837 if (Worklist[i].second == OrigOpNumber) {
1838 Worklist[i].second = OpNumber;
1839 }
1840 }
1841 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1842 OpWorkCount[OrigOpNumber] = 0;
1843 NumLeftToConsider--;
1844 }
1845 // Add if it's a new chain
1846 if (SeenChains.insert(Op).second) {
1847 OpWorkCount[OpNumber]++;
1848 Worklist.push_back(std::make_pair(Op, OpNumber));
1849 }
1850 };
1851
1852 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1853 // We need at least be consider at least 2 Ops to prune.
1854 if (NumLeftToConsider <= 1)
1855 break;
1856 auto CurNode = Worklist[i].first;
1857 auto CurOpNumber = Worklist[i].second;
1858 assert((OpWorkCount[CurOpNumber] > 0) &&(((OpWorkCount[CurOpNumber] > 0) && "Node should not appear in worklist"
) ? static_cast<void> (0) : __assert_fail ("(OpWorkCount[CurOpNumber] > 0) && \"Node should not appear in worklist\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1859, __PRETTY_FUNCTION__))
1859 "Node should not appear in worklist")(((OpWorkCount[CurOpNumber] > 0) && "Node should not appear in worklist"
) ? static_cast<void> (0) : __assert_fail ("(OpWorkCount[CurOpNumber] > 0) && \"Node should not appear in worklist\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1859, __PRETTY_FUNCTION__))
;
1860 switch (CurNode->getOpcode()) {
1861 case ISD::EntryToken:
1862 // Hitting EntryToken is the only way for the search to terminate without
1863 // hitting
1864 // another operand's search. Prevent us from marking this operand
1865 // considered.
1866 NumLeftToConsider++;
1867 break;
1868 case ISD::TokenFactor:
1869 for (const SDValue &Op : CurNode->op_values())
1870 AddToWorklist(i, Op.getNode(), CurOpNumber);
1871 break;
1872 case ISD::LIFETIME_START:
1873 case ISD::LIFETIME_END:
1874 case ISD::CopyFromReg:
1875 case ISD::CopyToReg:
1876 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1877 break;
1878 default:
1879 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1880 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1881 break;
1882 }
1883 OpWorkCount[CurOpNumber]--;
1884 if (OpWorkCount[CurOpNumber] == 0)
1885 NumLeftToConsider--;
1886 }
1887
1888 // If we've changed things around then replace token factor.
1889 if (Changed) {
1890 SDValue Result;
1891 if (Ops.empty()) {
1892 // The entry token is the only possible outcome.
1893 Result = DAG.getEntryNode();
1894 } else {
1895 if (DidPruneOps) {
1896 SmallVector<SDValue, 8> PrunedOps;
1897 //
1898 for (const SDValue &Op : Ops) {
1899 if (SeenChains.count(Op.getNode()) == 0)
1900 PrunedOps.push_back(Op);
1901 }
1902 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
1903 } else {
1904 Result = DAG.getTokenFactor(SDLoc(N), Ops);
1905 }
1906 }
1907 return Result;
1908 }
1909 return SDValue();
1910}
1911
1912/// MERGE_VALUES can always be eliminated.
1913SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1914 WorklistRemover DeadNodes(*this);
1915 // Replacing results may cause a different MERGE_VALUES to suddenly
1916 // be CSE'd with N, and carry its uses with it. Iterate until no
1917 // uses remain, to ensure that the node can be safely deleted.
1918 // First add the users of this node to the work list so that they
1919 // can be tried again once they have new operands.
1920 AddUsersToWorklist(N);
1921 do {
1922 // Do as a single replacement to avoid rewalking use lists.
1923 SmallVector<SDValue, 8> Ops;
1924 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1925 Ops.push_back(N->getOperand(i));
1926 DAG.ReplaceAllUsesWith(N, Ops.data());
1927 } while (!N->use_empty());
1928 deleteAndRecombine(N);
1929 return SDValue(N, 0); // Return N so it doesn't get rechecked!
1930}
1931
1932/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1933/// ConstantSDNode pointer else nullptr.
1934static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1935 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1936 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1937}
1938
1939SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1940 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&((TLI.isBinOp(BO->getOpcode()) && BO->getNumValues
() == 1 && "Unexpected binary operator") ? static_cast
<void> (0) : __assert_fail ("TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1941, __PRETTY_FUNCTION__))
1941 "Unexpected binary operator")((TLI.isBinOp(BO->getOpcode()) && BO->getNumValues
() == 1 && "Unexpected binary operator") ? static_cast
<void> (0) : __assert_fail ("TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && \"Unexpected binary operator\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 1941, __PRETTY_FUNCTION__))
;
1942
1943 // Don't do this unless the old select is going away. We want to eliminate the
1944 // binary operator, not replace a binop with a select.
1945 // TODO: Handle ISD::SELECT_CC.
1946 unsigned SelOpNo = 0;
1947 SDValue Sel = BO->getOperand(0);
1948 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1949 SelOpNo = 1;
1950 Sel = BO->getOperand(1);
1951 }
1952
1953 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1954 return SDValue();
1955
1956 SDValue CT = Sel.getOperand(1);
1957 if (!isConstantOrConstantVector(CT, true) &&
1958 !isConstantFPBuildVectorOrConstantFP(CT))
1959 return SDValue();
1960
1961 SDValue CF = Sel.getOperand(2);
1962 if (!isConstantOrConstantVector(CF, true) &&
1963 !isConstantFPBuildVectorOrConstantFP(CF))
1964 return SDValue();
1965
1966 // Bail out if any constants are opaque because we can't constant fold those.
1967 // The exception is "and" and "or" with either 0 or -1 in which case we can
1968 // propagate non constant operands into select. I.e.:
1969 // and (select Cond, 0, -1), X --> select Cond, 0, X
1970 // or X, (select Cond, -1, 0) --> select Cond, -1, X
1971 auto BinOpcode = BO->getOpcode();
1972 bool CanFoldNonConst =
1973 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1974 (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
1975 (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
1976
1977 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1978 if (!CanFoldNonConst &&
1979 !isConstantOrConstantVector(CBO, true) &&
1980 !isConstantFPBuildVectorOrConstantFP(CBO))
1981 return SDValue();
1982
1983 EVT VT = Sel.getValueType();
1984
1985 // In case of shift value and shift amount may have different VT. For instance
1986 // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1987 // swapped operands and value types do not match. NB: x86 is fine if operands
1988 // are not swapped with shift amount VT being not bigger than shifted value.
1989 // TODO: that is possible to check for a shift operation, correct VTs and
1990 // still perform optimization on x86 if needed.
1991 if (SelOpNo && VT != CBO.getValueType())
1992 return SDValue();
1993
1994 // We have a select-of-constants followed by a binary operator with a
1995 // constant. Eliminate the binop by pulling the constant math into the select.
1996 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1997 SDLoc DL(Sel);
1998 SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1999 : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2000 if (!CanFoldNonConst && !NewCT.isUndef() &&
2001 !isConstantOrConstantVector(NewCT, true) &&
2002 !isConstantFPBuildVectorOrConstantFP(NewCT))
2003 return SDValue();
2004
2005 SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2006 : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2007 if (!CanFoldNonConst && !NewCF.isUndef() &&
2008 !isConstantOrConstantVector(NewCF, true) &&
2009 !isConstantFPBuildVectorOrConstantFP(NewCF))
2010 return SDValue();
2011
2012 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2013 SelectOp->setFlags(BO->getFlags());
2014 return SelectOp;
2015}
2016
2017static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2018 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&(((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::
SUB) && "Expecting add or sub") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2019, __PRETTY_FUNCTION__))
2019 "Expecting add or sub")(((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::
SUB) && "Expecting add or sub") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2019, __PRETTY_FUNCTION__))
;
2020
2021 // Match a constant operand and a zext operand for the math instruction:
2022 // add Z, C
2023 // sub C, Z
2024 bool IsAdd = N->getOpcode() == ISD::ADD;
2025 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2026 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2027 auto *CN = dyn_cast<ConstantSDNode>(C);
2028 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2029 return SDValue();
2030
2031 // Match the zext operand as a setcc of a boolean.
2032 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2033 Z.getOperand(0).getValueType() != MVT::i1)
2034 return SDValue();
2035
2036 // Match the compare as: setcc (X & 1), 0, eq.
2037 SDValue SetCC = Z.getOperand(0);
2038 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2039 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2040 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2041 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2042 return SDValue();
2043
2044 // We are adding/subtracting a constant and an inverted low bit. Turn that
2045 // into a subtract/add of the low bit with incremented/decremented constant:
2046 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2047 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2048 EVT VT = C.getValueType();
2049 SDLoc DL(N);
2050 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2051 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2052 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2053 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2054}
2055
2056/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2057/// a shift and add with a different constant.
2058static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2059 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&(((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::
SUB) && "Expecting add or sub") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2060, __PRETTY_FUNCTION__))
2060 "Expecting add or sub")(((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::
SUB) && "Expecting add or sub") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && \"Expecting add or sub\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2060, __PRETTY_FUNCTION__))
;
2061
2062 // We need a constant operand for the add/sub, and the other operand is a
2063 // logical shift right: add (srl), C or sub C, (srl).
2064 // TODO - support non-uniform vector amounts.
2065 bool IsAdd = N->getOpcode() == ISD::ADD;
2066 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2067 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2068 ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2069 if (!C || ShiftOp.getOpcode() != ISD::SRL)
2070 return SDValue();
2071
2072 // The shift must be of a 'not' value.
2073 SDValue Not = ShiftOp.getOperand(0);
2074 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2075 return SDValue();
2076
2077 // The shift must be moving the sign bit to the least-significant-bit.
2078 EVT VT = ShiftOp.getValueType();
2079 SDValue ShAmt = ShiftOp.getOperand(1);
2080 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2081 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2082 return SDValue();
2083
2084 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2085 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2086 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2087 SDLoc DL(N);
2088 auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2089 SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2090 APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2091 return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2092}
2093
2094/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2095/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2096/// are no common bits set in the operands).
2097SDValue DAGCombiner::visitADDLike(SDNode *N) {
2098 SDValue N0 = N->getOperand(0);
2099 SDValue N1 = N->getOperand(1);
2100 EVT VT = N0.getValueType();
2101 SDLoc DL(N);
2102
2103 // fold vector ops
2104 if (VT.isVector()) {
2105 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2106 return FoldedVOp;
2107
2108 // fold (add x, 0) -> x, vector edition
2109 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2110 return N0;
2111 if (ISD::isBuildVectorAllZeros(N0.getNode()))
2112 return N1;
2113 }
2114
2115 // fold (add x, undef) -> undef
2116 if (N0.isUndef())
2117 return N0;
2118
2119 if (N1.isUndef())
2120 return N1;
2121
2122 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2123 // canonicalize constant to RHS
2124 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2125 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2126 // fold (add c1, c2) -> c1+c2
2127 return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2128 }
2129
2130 // fold (add x, 0) -> x
2131 if (isNullConstant(N1))
2132 return N0;
2133
2134 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2135 // fold ((A-c1)+c2) -> (A+(c2-c1))
2136 if (N0.getOpcode() == ISD::SUB &&
2137 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2138 SDValue Sub =
2139 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2140 assert(Sub && "Constant folding failed")((Sub && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("Sub && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2140, __PRETTY_FUNCTION__))
;
2141 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2142 }
2143
2144 // fold ((c1-A)+c2) -> (c1+c2)-A
2145 if (N0.getOpcode() == ISD::SUB &&
2146 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2147 SDValue Add =
2148 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2149 assert(Add && "Constant folding failed")((Add && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("Add && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 2149, __PRETTY_FUNCTION__))
;
2150 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2151 }
2152
2153 // add (sext i1 X), 1 -> zext (not i1 X)
2154 // We don't transform this pattern:
2155 // add (zext i1 X), -1 -> sext (not i1 X)
2156 // because most (?) targets generate better code for the zext form.
2157 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2158 isOneOrOneSplat(N1)) {
2159 SDValue X = N0.getOperand(0);
2160 if ((!LegalOperations ||
2161 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2162 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2163 X.getScalarValueSizeInBits() == 1) {
2164 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2165 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2166 }
2167 }
2168
2169 // Undo the add -> or combine to merge constant offsets from a frame index.
2170 if (N0.getOpcode() == ISD::OR &&
2171 isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2172 isa<ConstantSDNode>(N0.getOperand(1)) &&
2173 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2174 SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2175 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2176 }
2177 }
2178
2179 if (SDValue NewSel = foldBinOpIntoSelect(N))
2180 return NewSel;
2181
2182 // reassociate add
2183 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2184 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2185 return RADD;
2186 }
2187 // fold ((0-A) + B) -> B-A
2188 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2189 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2190
2191 // fold (A + (0-B)) -> A-B
2192 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2193 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2194
2195 // fold (A+(B-A)) -> B
2196 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2197 return N1.getOperand(0);
2198
2199 // fold ((B-A)+A) -> B
2200 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2201 return N0.getOperand(0);
2202
2203 // fold ((A-B)+(C-A)) -> (C-B)
2204 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2205 N0.getOperand(0) == N1.getOperand(1))
2206 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2207 N0.getOperand(1));
2208
2209 // fold ((A-B)+(B-C)) -> (A-C)
2210 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2211 N0.getOperand(1) == N1.getOperand(0))
2212 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2213 N1.getOperand(1));
2214
2215 // fold (A+(B-(A+C))) to (B-C)
2216 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2217 N0 == N1.getOperand(1).getOperand(0))
2218 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2219 N1.getOperand(1).getOperand(1));
2220
2221 // fold (A+(B-(C+A))) to (B-C)
2222 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2223 N0 == N1.getOperand(1).getOperand(1))
2224 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2225 N1.getOperand(1).getOperand(0));
2226
2227 // fold (A+((B-A)+or-C)) to (B+or-C)
2228 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2229 N1.getOperand(0).getOpcode() == ISD::SUB &&
2230 N0 == N1.getOperand(0).getOperand(1))
2231 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2232 N1.getOperand(1));
2233
2234 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2235 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2236 SDValue N00 = N0.getOperand(0);
2237 SDValue N01 = N0.getOperand(1);
2238 SDValue N10 = N1.getOperand(0);
2239 SDValue N11 = N1.getOperand(1);
2240
2241 if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2242 return DAG.getNode(ISD::SUB, DL, VT,
2243 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2244 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2245 }
2246
2247 // fold (add (umax X, C), -C) --> (usubsat X, C)
2248 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2249 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2250 return (!Max && !Op) ||
2251 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2252 };
2253 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2254 /*AllowUndefs*/ true))
2255 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2256 N0.getOperand(1));
2257 }
2258
2259 if (SimplifyDemandedBits(SDValue(N, 0)))
2260 return SDValue(N, 0);
2261
2262 if (isOneOrOneSplat(N1)) {
2263 // fold (add (xor a, -1), 1) -> (sub 0, a)
2264 if (isBitwiseNot(N0))
2265 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2266 N0.getOperand(0));
2267
2268 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2269 if (N0.getOpcode() == ISD::ADD ||
2270 N0.getOpcode() == ISD::UADDO ||
2271 N0.getOpcode() == ISD::SADDO) {
2272 SDValue A, Xor;
2273
2274 if (isBitwiseNot(N0.getOperand(0))) {
2275 A = N0.getOperand(1);
2276 Xor = N0.getOperand(0);
2277 } else if (isBitwiseNot(N0.getOperand(1))) {
2278 A = N0.getOperand(0);
2279 Xor = N0.getOperand(1);
2280 }
2281
2282 if (Xor)
2283 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2284 }
2285
2286 // Look for:
2287 // add (add x, y), 1
2288 // And if the target does not like this form then turn into:
2289 // sub y, (xor x, -1)
2290 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2291 N0.getOpcode() == ISD::ADD) {
2292 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2293 DAG.getAllOnesConstant(DL, VT));
2294 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2295 }
2296 }
2297
2298 // (x - y) + -1 -> add (xor y, -1), x
2299 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2300 isAllOnesOrAllOnesSplat(N1)) {
2301 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2302 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2303 }
2304
2305 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2306 return Combined;
2307
2308 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2309 return Combined;
2310
2311 return SDValue();
2312}
2313
2314SDValue DAGCombiner::visitADD(SDNode *N) {
2315 SDValue N0 = N->getOperand(0);
2316 SDValue N1 = N->getOperand(1);
2317 EVT VT = N0.getValueType();
2318 SDLoc DL(N);
2319
2320 if (SDValue Combined = visitADDLike(N))
2321 return Combined;
2322
2323 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2324 return V;
2325
2326 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2327 return V;
2328
2329 // fold (a+b) -> (a|b) iff a and b share no bits.
2330 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2331 DAG.haveNoCommonBitsSet(N0, N1))
2332 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2333
2334 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2335 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2336 APInt C0 = N0->getConstantOperandAPInt(0);
2337 APInt C1 = N1->getConstantOperandAPInt(0);
2338 return DAG.getVScale(DL, VT, C0 + C1);
2339 }
2340
2341 return SDValue();
2342}
2343
2344SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2345 unsigned Opcode = N->getOpcode();
2346 SDValue N0 = N->getOperand(0);
2347 SDValue N1 = N->getOperand(1);
2348 EVT VT = N0.getValueType();
2349 SDLoc DL(N);
2350
2351 // fold vector ops
2352 if (VT.isVector()) {
2353 // TODO SimplifyVBinOp
2354
2355 // fold (add_sat x, 0) -> x, vector edition
2356 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2357 return N0;
2358 if (ISD::isBuildVectorAllZeros(N0.getNode()))
2359 return N1;
2360 }
2361
2362 // fold (add_sat x, undef) -> -1
2363 if (N0.isUndef() || N1.isUndef())
2364 return DAG.getAllOnesConstant(DL, VT);
2365
2366 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2367 // canonicalize constant to RHS
2368 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2369 return DAG.getNode(Opcode, DL, VT, N1, N0);
2370 // fold (add_sat c1, c2) -> c3
2371 return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2372 }
2373
2374 // fold (add_sat x, 0) -> x
2375 if (isNullConstant(N1))
2376 return N0;
2377
2378 // If it cannot overflow, transform into an add.
2379 if (Opcode == ISD::UADDSAT)
2380 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2381 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2382
2383 return SDValue();
2384}
2385
2386static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2387 bool Masked = false;
2388
2389 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2390 while (true) {
2391 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2392 V = V.getOperand(0);
2393 continue;
2394 }
2395
2396 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2397 Masked = true;
2398 V = V.getOperand(0);
2399 continue;
2400 }
2401
2402 break;
2403 }
2404
2405 // If this is not a carry, return.
2406 if (V.getResNo() != 1)
2407 return SDValue();
2408
2409 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2410 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2411 return SDValue();
2412
2413 EVT VT = V.getNode()->getValueType(0);
2414 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2415 return SDValue();
2416
2417 // If the result is masked, then no matter what kind of bool it is we can
2418 // return. If it isn't, then we need to make sure the bool type is either 0 or
2419 // 1 and not other values.
2420 if (Masked ||
2421 TLI.getBooleanContents(V.getValueType()) ==
2422 TargetLoweringBase::ZeroOrOneBooleanContent)
2423 return V;
2424
2425 return SDValue();
2426}
2427
2428/// Given the operands of an add/sub operation, see if the 2nd operand is a
2429/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2430/// the opcode and bypass the mask operation.
2431static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2432 SelectionDAG &DAG, const SDLoc &DL) {
2433 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2434 return SDValue();
2435
2436 EVT VT = N0.getValueType();
2437 if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2438 return SDValue();
2439
2440 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2441 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2442 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2443}
2444
2445/// Helper for doing combines based on N0 and N1 being added to each other.
2446SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2447 SDNode *LocReference) {
2448 EVT VT = N0.getValueType();
2449 SDLoc DL(LocReference);
2450
2451 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2452 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2453 isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2454 return DAG.getNode(ISD::SUB, DL, VT, N0,
2455 DAG.getNode(ISD::SHL, DL, VT,
2456 N1.getOperand(0).getOperand(1),
2457 N1.getOperand(1)));
2458
2459 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2460 return V;
2461
2462 // Look for:
2463 // add (add x, 1), y
2464 // And if the target does not like this form then turn into:
2465 // sub y, (xor x, -1)
2466 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2467 N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2468 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2469 DAG.getAllOnesConstant(DL, VT));
2470 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2471 }
2472
2473 // Hoist one-use subtraction by non-opaque constant:
2474 // (x - C) + y -> (x + y) - C
2475 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2476 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2477 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2478 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2479 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2480 }
2481 // Hoist one-use subtraction from non-opaque constant:
2482 // (C - x) + y -> (y - x) + C
2483 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2484 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2485 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2486 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2487 }
2488
2489 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2490 // rather than 'add 0/-1' (the zext should get folded).
2491 // add (sext i1 Y), X --> sub X, (zext i1 Y)
2492 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2493 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2494 TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2495 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2496 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2497 }
2498
2499 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2500 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2501 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2502 if (TN->getVT() == MVT::i1) {
2503 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2504 DAG.getConstant(1, DL, VT));
2505 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2506 }
2507 }
2508
2509 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2510 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2511 N1.getResNo() == 0)
2512 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2513 N0, N1.getOperand(0), N1.getOperand(2));
2514
2515 // (add X, Carry) -> (addcarry X, 0, Carry)
2516 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2517 if (SDValue Carry = getAsCarry(TLI, N1))
2518 return DAG.getNode(ISD::ADDCARRY, DL,
2519 DAG.getVTList(VT, Carry.getValueType()), N0,
2520 DAG.getConstant(0, DL, VT), Carry);
2521
2522 return SDValue();
2523}
2524
2525SDValue DAGCombiner::visitADDC(SDNode *N) {
2526 SDValue N0 = N->getOperand(0);
2527 SDValue N1 = N->getOperand(1);
2528 EVT VT = N0.getValueType();
2529 SDLoc DL(N);
2530
2531 // If the flag result is dead, turn this into an ADD.
2532 if (!N->hasAnyUseOfValue(1))
2533 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2534 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2535
2536 // canonicalize constant to RHS.
2537 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2538 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2539 if (N0C && !N1C)
2540 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2541
2542 // fold (addc x, 0) -> x + no carry out
2543 if (isNullConstant(N1))
2544 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2545 DL, MVT::Glue));
2546
2547 // If it cannot overflow, transform into an add.
2548 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2549 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2550 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2551
2552 return SDValue();
2553}
2554
2555static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2556 SelectionDAG &DAG, const TargetLowering &TLI) {
2557 EVT VT = V.getValueType();
2558
2559 SDValue Cst;
2560 switch (TLI.getBooleanContents(VT)) {
2561 case TargetLowering::ZeroOrOneBooleanContent:
2562 case TargetLowering::UndefinedBooleanContent:
2563 Cst = DAG.getConstant(1, DL, VT);
2564 break;
2565 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2566 Cst = DAG.getAllOnesConstant(DL, VT);
2567 break;
2568 }
2569
2570 return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2571}
2572
2573/**
2574 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2575 * then the flip also occurs if computing the inverse is the same cost.
2576 * This function returns an empty SDValue in case it cannot flip the boolean
2577 * without increasing the cost of the computation. If you want to flip a boolean
2578 * no matter what, use flipBoolean.
2579 */
2580static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2581 const TargetLowering &TLI,
2582 bool Force) {
2583 if (Force && isa<ConstantSDNode>(V))
2584 return flipBoolean(V, SDLoc(V), DAG, TLI);
2585
2586 if (V.getOpcode() != ISD::XOR)
2587 return SDValue();
2588
2589 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2590 if (!Const)
2591 return SDValue();
2592
2593 EVT VT = V.getValueType();
2594
2595 bool IsFlip = false;
2596 switch(TLI.getBooleanContents(VT)) {
2597 case TargetLowering::ZeroOrOneBooleanContent:
2598 IsFlip = Const->isOne();
2599 break;
2600 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2601 IsFlip = Const->isAllOnesValue();
2602 break;
2603 case TargetLowering::UndefinedBooleanContent:
2604 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2605 break;
2606 }
2607
2608 if (IsFlip)
2609 return V.getOperand(0);
2610 if (Force)
2611 return flipBoolean(V, SDLoc(V), DAG, TLI);
2612 return SDValue();
2613}
2614
2615SDValue DAGCombiner::visitADDO(SDNode *N) {
2616 SDValue N0 = N->getOperand(0);
2617 SDValue N1 = N->getOperand(1);
2618 EVT VT = N0.getValueType();
2619 bool IsSigned = (ISD::SADDO == N->getOpcode());
2620
2621 EVT CarryVT = N->getValueType(1);
2622 SDLoc DL(N);
2623
2624 // If the flag result is dead, turn this into an ADD.
2625 if (!N->hasAnyUseOfValue(1))
2626 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2627 DAG.getUNDEF(CarryVT));
2628
2629 // canonicalize constant to RHS.
2630 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2631 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2632 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2633
2634 // fold (addo x, 0) -> x + no carry out
2635 if (isNullOrNullSplat(N1))
2636 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2637
2638 if (!IsSigned) {
2639 // If it cannot overflow, transform into an add.
2640 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2641 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2642 DAG.getConstant(0, DL, CarryVT));
2643
2644 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2645 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2646 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2647 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2648 return CombineTo(N, Sub,
2649 flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2650 }
2651
2652 if (SDValue Combined = visitUADDOLike(N0, N1, N))
2653 return Combined;
2654
2655 if (SDValue Combined = visitUADDOLike(N1, N0, N))
2656 return Combined;
2657 }
2658
2659 return SDValue();
2660}
2661
2662SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2663 EVT VT = N0.getValueType();
2664 if (VT.isVector())
2665 return SDValue();
2666
2667 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2668 // If Y + 1 cannot overflow.
2669 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2670 SDValue Y = N1.getOperand(0);
2671 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2672 if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2673 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2674 N1.getOperand(2));
2675 }
2676
2677 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2678 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2679 if (SDValue Carry = getAsCarry(TLI, N1))
2680 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2681 DAG.getConstant(0, SDLoc(N), VT), Carry);
2682
2683 return SDValue();
2684}
2685
2686SDValue DAGCombiner::visitADDE(SDNode *N) {
2687 SDValue N0 = N->getOperand(0);
2688 SDValue N1 = N->getOperand(1);
2689 SDValue CarryIn = N->getOperand(2);
2690
2691 // canonicalize constant to RHS
2692 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2693 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2694 if (N0C && !N1C)
2695 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2696 N1, N0, CarryIn);
2697
2698 // fold (adde x, y, false) -> (addc x, y)
2699 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2700 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2701
2702 return SDValue();
2703}
2704
2705SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2706 SDValue N0 = N->getOperand(0);
2707 SDValue N1 = N->getOperand(1);
2708 SDValue CarryIn = N->getOperand(2);
2709 SDLoc DL(N);
2710
2711 // canonicalize constant to RHS
2712 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2713 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2714 if (N0C && !N1C)
2715 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2716
2717 // fold (addcarry x, y, false) -> (uaddo x, y)
2718 if (isNullConstant(CarryIn)) {
2719 if (!LegalOperations ||
2720 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2721 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2722 }
2723
2724 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2725 if (isNullConstant(N0) && isNullConstant(N1)) {
2726 EVT VT = N0.getValueType();
2727 EVT CarryVT = CarryIn.getValueType();
2728 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2729 AddToWorklist(CarryExt.getNode());
2730 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2731 DAG.getConstant(1, DL, VT)),
2732 DAG.getConstant(0, DL, CarryVT));
2733 }
2734
2735 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2736 return Combined;
2737
2738 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2739 return Combined;
2740
2741 return SDValue();
2742}
2743
2744/**
2745 * If we are facing some sort of diamond carry propapagtion pattern try to
2746 * break it up to generate something like:
2747 * (addcarry X, 0, (addcarry A, B, Z):Carry)
2748 *
2749 * The end result is usually an increase in operation required, but because the
2750 * carry is now linearized, other tranforms can kick in and optimize the DAG.
2751 *
2752 * Patterns typically look something like
2753 * (uaddo A, B)
2754 * / \
2755 * Carry Sum
2756 * | \
2757 * | (addcarry *, 0, Z)
2758 * | /
2759 * \ Carry
2760 * | /
2761 * (addcarry X, *, *)
2762 *
2763 * But numerous variation exist. Our goal is to identify A, B, X and Z and
2764 * produce a combine with a single path for carry propagation.
2765 */
2766static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2767 SDValue X, SDValue Carry0, SDValue Carry1,
2768 SDNode *N) {
2769 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2770 return SDValue();
2771 if (Carry1.getOpcode() != ISD::UADDO)
2772 return SDValue();
2773
2774 SDValue Z;
2775
2776 /**
2777 * First look for a suitable Z. It will present itself in the form of
2778 * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2779 */
2780 if (Carry0.getOpcode() == ISD::ADDCARRY &&
2781 isNullConstant(Carry0.getOperand(1))) {
2782 Z = Carry0.getOperand(2);
2783 } else if (Carry0.getOpcode() == ISD::UADDO &&
2784 isOneConstant(Carry0.getOperand(1))) {
2785 EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2786 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2787 } else {
2788 // We couldn't find a suitable Z.
2789 return SDValue();
2790 }
2791
2792
2793 auto cancelDiamond = [&](SDValue A,SDValue B) {
2794 SDLoc DL(N);
2795 SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2796 Combiner.AddToWorklist(NewY.getNode());
2797 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2798 DAG.getConstant(0, DL, X.getValueType()),
2799 NewY.getValue(1));
2800 };
2801
2802 /**
2803 * (uaddo A, B)
2804 * |
2805 * Sum
2806 * |
2807 * (addcarry *, 0, Z)
2808 */
2809 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2810 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2811 }
2812
2813 /**
2814 * (addcarry A, 0, Z)
2815 * |
2816 * Sum
2817 * |
2818 * (uaddo *, B)
2819 */
2820 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2821 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2822 }
2823
2824 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2825 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2826 }
2827
2828 return SDValue();
2829}
2830
2831// If we are facing some sort of diamond carry/borrow in/out pattern try to
2832// match patterns like:
2833//
2834// (uaddo A, B) CarryIn
2835// | \ |
2836// | \ |
2837// PartialSum PartialCarryOutX /
2838// | | /
2839// | ____|____________/
2840// | / |
2841// (uaddo *, *) \________
2842// | \ \
2843// | \ |
2844// | PartialCarryOutY |
2845// | \ |
2846// | \ /
2847// AddCarrySum | ______/
2848// | /
2849// CarryOut = (or *, *)
2850//
2851// And generate ADDCARRY (or SUBCARRY) with two result values:
2852//
2853// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
2854//
2855// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
2856// a single path for carry/borrow out propagation:
2857static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2858 const TargetLowering &TLI, SDValue Carry0,
2859 SDValue Carry1, SDNode *N) {
2860 if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
2861 return SDValue();
2862 unsigned Opcode = Carry0.getOpcode();
2863 if (Opcode != Carry1.getOpcode())
2864 return SDValue();
2865 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
2866 return SDValue();
2867
2868 // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
2869 // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
2870 // the above ASCII art.)
2871 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
2872 Carry1.getOperand(1) != Carry0.getValue(0))
2873 std::swap(Carry0, Carry1);
2874 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
2875 Carry1.getOperand(1) != Carry0.getValue(0))
2876 return SDValue();
2877
2878 // The carry in value must be on the righthand side for subtraction.
2879 unsigned CarryInOperandNum =
2880 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
2881 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
2882 return SDValue();
2883 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
2884
2885 unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
2886 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
2887 return SDValue();
2888
2889 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
2890 // TODO: make getAsCarry() aware of how partial carries are merged.
2891 if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
2892 return SDValue();
2893 CarryIn = CarryIn.getOperand(0);
2894 if (CarryIn.getValueType() != MVT::i1)
2895 return SDValue();
2896
2897 SDLoc DL(N);
2898 SDValue Merged =
2899 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
2900 Carry0.getOperand(1), CarryIn);
2901
2902 // Please note that because we have proven that the result of the UADDO/USUBO
2903 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
2904 // therefore prove that if the first UADDO/USUBO overflows, the second
2905 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
2906 // maximum value.
2907 //
2908 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
2909 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
2910 //
2911 // This is important because it means that OR and XOR can be used to merge
2912 // carry flags; and that AND can return a constant zero.
2913 //
2914 // TODO: match other operations that can merge flags (ADD, etc)
2915 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
2916 if (N->getOpcode() == ISD::AND)
2917 return DAG.getConstant(0, DL, MVT::i1);
2918 return Merged.getValue(1);
2919}
2920
2921SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2922 SDNode *N) {
2923 // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
2924 if (isBitwiseNot(N0))
2925 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
2926 SDLoc DL(N);
2927 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
2928 N0.getOperand(0), NotC);
2929 return CombineTo(N, Sub,
2930 flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2931 }
2932
2933 // Iff the flag result is dead:
2934 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2935 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
2936 // or the dependency between the instructions.
2937 if ((N0.getOpcode() == ISD::ADD ||
2938 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
2939 N0.getValue(1) != CarryIn)) &&
2940 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2941 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2942 N0.getOperand(0), N0.getOperand(1), CarryIn);
2943
2944 /**
2945 * When one of the addcarry argument is itself a carry, we may be facing
2946 * a diamond carry propagation. In which case we try to transform the DAG
2947 * to ensure linear carry propagation if that is possible.
2948 */
2949 if (auto Y = getAsCarry(TLI, N1)) {
2950 // Because both are carries, Y and Z can be swapped.
2951 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
2952 return R;
2953 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
2954 return R;
2955 }
2956
2957 return SDValue();
2958}
2959
2960// Since it may not be valid to emit a fold to zero for vector initializers
2961// check if we can before folding.
2962static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2963 SelectionDAG &DAG, bool LegalOperations) {
2964 if (!VT.isVector())
2965 return DAG.getConstant(0, DL, VT);
2966 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2967 return DAG.getConstant(0, DL, VT);
2968 return SDValue();
2969}
2970
2971SDValue DAGCombiner::visitSUB(SDNode *N) {
2972 SDValue N0 = N->getOperand(0);
2973 SDValue N1 = N->getOperand(1);
2974 EVT VT = N0.getValueType();
2975 SDLoc DL(N);
2976
2977 // fold vector ops
2978 if (VT.isVector()) {
2979 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2980 return FoldedVOp;
2981
2982 // fold (sub x, 0) -> x, vector edition
2983 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2984 return N0;
2985 }
2986
2987 // fold (sub x, x) -> 0
2988 // FIXME: Refactor this and xor and other similar operations together.
2989 if (N0 == N1)
2990 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2991
2992 // fold (sub c1, c2) -> c3
2993 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
2994 return C;
2995
2996 if (SDValue NewSel = foldBinOpIntoSelect(N))
2997 return NewSel;
2998
2999 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3000
3001 // fold (sub x, c) -> (add x, -c)
3002 if (N1C) {
3003 return DAG.getNode(ISD::ADD, DL, VT, N0,
3004 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3005 }
3006
3007 if (isNullOrNullSplat(N0)) {
3008 unsigned BitWidth = VT.getScalarSizeInBits();
3009 // Right-shifting everything out but the sign bit followed by negation is
3010 // the same as flipping arithmetic/logical shift type without the negation:
3011 // -(X >>u 31) -> (X >>s 31)
3012 // -(X >>s 31) -> (X >>u 31)
3013 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3014 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3015 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3016 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3017 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3018 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3019 }
3020 }
3021
3022 // 0 - X --> 0 if the sub is NUW.
3023 if (N->getFlags().hasNoUnsignedWrap())
3024 return N0;
3025
3026 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3027 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3028 // N1 must be 0 because negating the minimum signed value is undefined.
3029 if (N->getFlags().hasNoSignedWrap())
3030 return N0;
3031
3032 // 0 - X --> X if X is 0 or the minimum signed value.
3033 return N1;
3034 }
3035 }
3036
3037 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3038 if (isAllOnesOrAllOnesSplat(N0))
3039 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3040
3041 // fold (A - (0-B)) -> A+B
3042 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3043 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3044
3045 // fold A-(A-B) -> B
3046 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3047 return N1.getOperand(1);
3048
3049 // fold (A+B)-A -> B
3050 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3051 return N0.getOperand(1);
3052
3053 // fold (A+B)-B -> A
3054 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3055 return N0.getOperand(0);
3056
3057 // fold (A+C1)-C2 -> A+(C1-C2)
3058 if (N0.getOpcode() == ISD::ADD &&
3059 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3060 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3061 SDValue NewC =
3062 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3063 assert(NewC && "Constant folding failed")((NewC && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3063, __PRETTY_FUNCTION__))
;
3064 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3065 }
3066
3067 // fold C2-(A+C1) -> (C2-C1)-A
3068 if (N1.getOpcode() == ISD::ADD) {
3069 SDValue N11 = N1.getOperand(1);
3070 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3071 isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3072 SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3073 assert(NewC && "Constant folding failed")((NewC && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3073, __PRETTY_FUNCTION__))
;
3074 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3075 }
3076 }
3077
3078 // fold (A-C1)-C2 -> A-(C1+C2)
3079 if (N0.getOpcode() == ISD::SUB &&
3080 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3081 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3082 SDValue NewC =
3083 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3084 assert(NewC && "Constant folding failed")((NewC && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3084, __PRETTY_FUNCTION__))
;
3085 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3086 }
3087
3088 // fold (c1-A)-c2 -> (c1-c2)-A
3089 if (N0.getOpcode() == ISD::SUB &&
3090 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3091 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3092 SDValue NewC =
3093 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3094 assert(NewC && "Constant folding failed")((NewC && "Constant folding failed") ? static_cast<
void> (0) : __assert_fail ("NewC && \"Constant folding failed\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3094, __PRETTY_FUNCTION__))
;
3095 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3096 }
3097
3098 // fold ((A+(B+or-C))-B) -> A+or-C
3099 if (N0.getOpcode() == ISD::ADD &&
3100 (N0.getOperand(1).getOpcode() == ISD::SUB ||
3101 N0.getOperand(1).getOpcode() == ISD::ADD) &&
3102 N0.getOperand(1).getOperand(0) == N1)
3103 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3104 N0.getOperand(1).getOperand(1));
3105
3106 // fold ((A+(C+B))-B) -> A+C
3107 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3108 N0.getOperand(1).getOperand(1) == N1)
3109 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3110 N0.getOperand(1).getOperand(0));
3111
3112 // fold ((A-(B-C))-C) -> A-B
3113 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3114 N0.getOperand(1).getOperand(1) == N1)
3115 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3116 N0.getOperand(1).getOperand(0));
3117
3118 // fold (A-(B-C)) -> A+(C-B)
3119 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3120 return DAG.getNode(ISD::ADD, DL, VT, N0,
3121 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3122 N1.getOperand(0)));
3123
3124 // A - (A & B) -> A & (~B)
3125 if (N1.getOpcode() == ISD::AND) {
3126 SDValue A = N1.getOperand(0);
3127 SDValue B = N1.getOperand(1);
3128 if (A != N0)
3129 std::swap(A, B);
3130 if (A == N0 &&
3131 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3132 SDValue InvB =
3133 DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3134 return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3135 }
3136 }
3137
3138 // fold (X - (-Y * Z)) -> (X + (Y * Z))
3139 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3140 if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3141 isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3142 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3143 N1.getOperand(0).getOperand(1),
3144 N1.getOperand(1));
3145 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3146 }
3147 if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3148 isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3149 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3150 N1.getOperand(0),
3151 N1.getOperand(1).getOperand(1));
3152 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3153 }
3154 }
3155
3156 // If either operand of a sub is undef, the result is undef
3157 if (N0.isUndef())
3158 return N0;
3159 if (N1.isUndef())
3160 return N1;
3161
3162 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3163 return V;
3164
3165 if (SDValue V = foldAddSubOfSignBit(N, DAG))
3166 return V;
3167
3168 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3169 return V;
3170
3171 // (x - y) - 1 -> add (xor y, -1), x
3172 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3173 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3174 DAG.getAllOnesConstant(DL, VT));
3175 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3176 }
3177
3178 // Look for:
3179 // sub y, (xor x, -1)
3180 // And if the target does not like this form then turn into:
3181 // add (add x, y), 1
3182 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3183 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3184 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3185 }
3186
3187 // Hoist one-use addition by non-opaque constant:
3188 // (x + C) - y -> (x - y) + C
3189 if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3190 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3191 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3192 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3193 }
3194 // y - (x + C) -> (y - x) - C
3195 if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3196 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3197 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3198 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3199 }
3200 // (x - C) - y -> (x - y) - C
3201 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3202 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3203 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3204 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3205 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3206 }
3207 // (C - x) - y -> C - (x + y)
3208 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3209 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3210 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3211 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3212 }
3213
3214 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3215 // rather than 'sub 0/1' (the sext should get folded).
3216 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3217 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3218 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3219 TLI.getBooleanContents(VT) ==
3220 TargetLowering::ZeroOrNegativeOneBooleanContent) {
3221 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3222 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3223 }
3224
3225 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3226 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3227 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3228 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3229 SDValue S0 = N1.getOperand(0);
3230 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
3231 unsigned OpSizeInBits = VT.getScalarSizeInBits();
3232 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3233 if (C->getAPIntValue() == (OpSizeInBits - 1))
3234 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3235 }
3236 }
3237 }
3238
3239 // If the relocation model supports it, consider symbol offsets.
3240 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3241 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3242 // fold (sub Sym, c) -> Sym-c
3243 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3244 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3245 GA->getOffset() -
3246 (uint64_t)N1C->getSExtValue());
3247 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3248 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3249 if (GA->getGlobal() == GB->getGlobal())
3250 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3251 DL, VT);
3252 }
3253
3254 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3255 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3256 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3257 if (TN->getVT() == MVT::i1) {
3258 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3259 DAG.getConstant(1, DL, VT));
3260 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3261 }
3262 }
3263
3264 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3265 if (N1.getOpcode() == ISD::VSCALE) {
3266 APInt IntVal = N1.getConstantOperandAPInt(0);
3267 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3268 }
3269
3270 // Prefer an add for more folding potential and possibly better codegen:
3271 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3272 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3273 SDValue ShAmt = N1.getOperand(1);
3274 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3275 if (ShAmtC &&
3276 ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3277 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3278 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3279 }
3280 }
3281
3282 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3283 // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3284 if (SDValue Carry = getAsCarry(TLI, N0)) {
3285 SDValue X = N1;
3286 SDValue Zero = DAG.getConstant(0, DL, VT);
3287 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3288 return DAG.getNode(ISD::ADDCARRY, DL,
3289 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3290 Carry);
3291 }
3292 }
3293
3294 return SDValue();
3295}
3296
3297SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3298 SDValue N0 = N->getOperand(0);
3299 SDValue N1 = N->getOperand(1);
3300 EVT VT = N0.getValueType();
3301 SDLoc DL(N);
3302
3303 // fold vector ops
3304 if (VT.isVector()) {
3305 // TODO SimplifyVBinOp
3306
3307 // fold (sub_sat x, 0) -> x, vector edition
3308 if (ISD::isBuildVectorAllZeros(N1.getNode()))
3309 return N0;
3310 }
3311
3312 // fold (sub_sat x, undef) -> 0
3313 if (N0.isUndef() || N1.isUndef())
3314 return DAG.getConstant(0, DL, VT);
3315
3316 // fold (sub_sat x, x) -> 0
3317 if (N0 == N1)
3318 return DAG.getConstant(0, DL, VT);
3319
3320 // fold (sub_sat c1, c2) -> c3
3321 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3322 return C;
3323
3324 // fold (sub_sat x, 0) -> x
3325 if (isNullConstant(N1))
3326 return N0;
3327
3328 return SDValue();
3329}
3330
3331SDValue DAGCombiner::visitSUBC(SDNode *N) {
3332 SDValue N0 = N->getOperand(0);
3333 SDValue N1 = N->getOperand(1);
3334 EVT VT = N0.getValueType();
3335 SDLoc DL(N);
3336
3337 // If the flag result is dead, turn this into an SUB.
3338 if (!N->hasAnyUseOfValue(1))
3339 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3340 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3341
3342 // fold (subc x, x) -> 0 + no borrow
3343 if (N0 == N1)
3344 return CombineTo(N, DAG.getConstant(0, DL, VT),
3345 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3346
3347 // fold (subc x, 0) -> x + no borrow
3348 if (isNullConstant(N1))
3349 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3350
3351 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3352 if (isAllOnesConstant(N0))
3353 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3354 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3355
3356 return SDValue();
3357}
3358
3359SDValue DAGCombiner::visitSUBO(SDNode *N) {
3360 SDValue N0 = N->getOperand(0);
3361 SDValue N1 = N->getOperand(1);
3362 EVT VT = N0.getValueType();
3363 bool IsSigned = (ISD::SSUBO == N->getOpcode());
3364
3365 EVT CarryVT = N->getValueType(1);
3366 SDLoc DL(N);
3367
3368 // If the flag result is dead, turn this into an SUB.
3369 if (!N->hasAnyUseOfValue(1))
3370 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3371 DAG.getUNDEF(CarryVT));
3372
3373 // fold (subo x, x) -> 0 + no borrow
3374 if (N0 == N1)
3375 return CombineTo(N, DAG.getConstant(0, DL, VT),
3376 DAG.getConstant(0, DL, CarryVT));
3377
3378 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3379
3380 // fold (subox, c) -> (addo x, -c)
3381 if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3382 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3383 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3384 }
3385
3386 // fold (subo x, 0) -> x + no borrow
3387 if (isNullOrNullSplat(N1))
3388 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3389
3390 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3391 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3392 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3393 DAG.getConstant(0, DL, CarryVT));
3394
3395 return SDValue();
3396}
3397
3398SDValue DAGCombiner::visitSUBE(SDNode *N) {
3399 SDValue N0 = N->getOperand(0);
3400 SDValue N1 = N->getOperand(1);
3401 SDValue CarryIn = N->getOperand(2);
3402
3403 // fold (sube x, y, false) -> (subc x, y)
3404 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3405 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3406
3407 return SDValue();
3408}
3409
3410SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3411 SDValue N0 = N->getOperand(0);
3412 SDValue N1 = N->getOperand(1);
3413 SDValue CarryIn = N->getOperand(2);
3414
3415 // fold (subcarry x, y, false) -> (usubo x, y)
3416 if (isNullConstant(CarryIn)) {
3417 if (!LegalOperations ||
3418 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3419 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3420 }
3421
3422 return SDValue();
3423}
3424
3425// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3426// UMULFIXSAT here.
3427SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3428 SDValue N0 = N->getOperand(0);
3429 SDValue N1 = N->getOperand(1);
3430 SDValue Scale = N->getOperand(2);
3431 EVT VT = N0.getValueType();
3432
3433 // fold (mulfix x, undef, scale) -> 0
3434 if (N0.isUndef() || N1.isUndef())
3435 return DAG.getConstant(0, SDLoc(N), VT);
3436
3437 // Canonicalize constant to RHS (vector doesn't have to splat)
3438 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3439 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3440 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3441
3442 // fold (mulfix x, 0, scale) -> 0
3443 if (isNullConstant(N1))
3444 return DAG.getConstant(0, SDLoc(N), VT);
3445
3446 return SDValue();
3447}
3448
3449SDValue DAGCombiner::visitMUL(SDNode *N) {
3450 SDValue N0 = N->getOperand(0);
3451 SDValue N1 = N->getOperand(1);
3452 EVT VT = N0.getValueType();
3453
3454 // fold (mul x, undef) -> 0
3455 if (N0.isUndef() || N1.isUndef())
3456 return DAG.getConstant(0, SDLoc(N), VT);
3457
3458 bool N1IsConst = false;
3459 bool N1IsOpaqueConst = false;
3460 APInt ConstValue1;
3461
3462 // fold vector ops
3463 if (VT.isVector()) {
3464 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3465 return FoldedVOp;
3466
3467 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3468 assert((!N1IsConst ||(((!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits
()) && "Splat APInt should be element width") ? static_cast
<void> (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3470, __PRETTY_FUNCTION__))
3469 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&(((!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits
()) && "Splat APInt should be element width") ? static_cast
<void> (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3470, __PRETTY_FUNCTION__))
3470 "Splat APInt should be element width")(((!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits
()) && "Splat APInt should be element width") ? static_cast
<void> (0) : __assert_fail ("(!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && \"Splat APInt should be element width\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3470, __PRETTY_FUNCTION__))
;
3471 } else {
3472 N1IsConst = isa<ConstantSDNode>(N1);
3473 if (N1IsConst) {
3474 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3475 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3476 }
3477 }
3478
3479 // fold (mul c1, c2) -> c1*c2
3480 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3481 return C;
3482
3483 // canonicalize constant to RHS (vector doesn't have to splat)
3484 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3485 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3486 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3487
3488 // fold (mul x, 0) -> 0
3489 if (N1IsConst && ConstValue1.isNullValue())
3490 return N1;
3491
3492 // fold (mul x, 1) -> x
3493 if (N1IsConst && ConstValue1.isOneValue())
3494 return N0;
3495
3496 if (SDValue NewSel = foldBinOpIntoSelect(N))
3497 return NewSel;
3498
3499 // fold (mul x, -1) -> 0-x
3500 if (N1IsConst && ConstValue1.isAllOnesValue()) {
3501 SDLoc DL(N);
3502 return DAG.getNode(ISD::SUB, DL, VT,
3503 DAG.getConstant(0, DL, VT), N0);
3504 }
3505
3506 // fold (mul x, (1 << c)) -> x << c
3507 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3508 DAG.isKnownToBeAPowerOfTwo(N1) &&
3509 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3510 SDLoc DL(N);
3511 SDValue LogBase2 = BuildLogBase2(N1, DL);
3512 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3513 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3514 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3515 }
3516
3517 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3518 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3519 unsigned Log2Val = (-ConstValue1).logBase2();
3520 SDLoc DL(N);
3521 // FIXME: If the input is something that is easily negated (e.g. a
3522 // single-use add), we should put the negate there.
3523 return DAG.getNode(ISD::SUB, DL, VT,
3524 DAG.getConstant(0, DL, VT),
3525 DAG.getNode(ISD::SHL, DL, VT, N0,
3526 DAG.getConstant(Log2Val, DL,
3527 getShiftAmountTy(N0.getValueType()))));
3528 }
3529
3530 // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3531 // mul x, (2^N + 1) --> add (shl x, N), x
3532 // mul x, (2^N - 1) --> sub (shl x, N), x
3533 // Examples: x * 33 --> (x << 5) + x
3534 // x * 15 --> (x << 4) - x
3535 // x * -33 --> -((x << 5) + x)
3536 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3537 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3538 // TODO: We could handle more general decomposition of any constant by
3539 // having the target set a limit on number of ops and making a
3540 // callback to determine that sequence (similar to sqrt expansion).
3541 unsigned MathOp = ISD::DELETED_NODE;
3542 APInt MulC = ConstValue1.abs();
3543 if ((MulC - 1).isPowerOf2())
3544 MathOp = ISD::ADD;
3545 else if ((MulC + 1).isPowerOf2())
3546 MathOp = ISD::SUB;
3547
3548 if (MathOp != ISD::DELETED_NODE) {
3549 unsigned ShAmt =
3550 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3551 assert(ShAmt < VT.getScalarSizeInBits() &&((ShAmt < VT.getScalarSizeInBits() && "multiply-by-constant generated out of bounds shift"
) ? static_cast<void> (0) : __assert_fail ("ShAmt < VT.getScalarSizeInBits() && \"multiply-by-constant generated out of bounds shift\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3552, __PRETTY_FUNCTION__))
3552 "multiply-by-constant generated out of bounds shift")((ShAmt < VT.getScalarSizeInBits() && "multiply-by-constant generated out of bounds shift"
) ? static_cast<void> (0) : __assert_fail ("ShAmt < VT.getScalarSizeInBits() && \"multiply-by-constant generated out of bounds shift\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3552, __PRETTY_FUNCTION__))
;
3553 SDLoc DL(N);
3554 SDValue Shl =
3555 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3556 SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3557 if (ConstValue1.isNegative())
3558 R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3559 return R;
3560 }
3561 }
3562
3563 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3564 if (N0.getOpcode() == ISD::SHL &&
3565 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3566 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3567 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3568 if (isConstantOrConstantVector(C3))
3569 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3570 }
3571
3572 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3573 // use.
3574 {
3575 SDValue Sh(nullptr, 0), Y(nullptr, 0);
3576
3577 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3578 if (N0.getOpcode() == ISD::SHL &&
3579 isConstantOrConstantVector(N0.getOperand(1)) &&
3580 N0.getNode()->hasOneUse()) {
3581 Sh = N0; Y = N1;
3582 } else if (N1.getOpcode() == ISD::SHL &&
3583 isConstantOrConstantVector(N1.getOperand(1)) &&
3584 N1.getNode()->hasOneUse()) {
3585 Sh = N1; Y = N0;
3586 }
3587
3588 if (Sh.getNode()) {
3589 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3590 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3591 }
3592 }
3593
3594 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3595 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3596 N0.getOpcode() == ISD::ADD &&
3597 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3598 isMulAddWithConstProfitable(N, N0, N1))
3599 return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3600 DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3601 N0.getOperand(0), N1),
3602 DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3603 N0.getOperand(1), N1));
3604
3605 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3606 if (N0.getOpcode() == ISD::VSCALE)
3607 if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3608 APInt C0 = N0.getConstantOperandAPInt(0);
3609 APInt C1 = NC1->getAPIntValue();
3610 return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3611 }
3612
3613 // reassociate mul
3614 if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3615 return RMUL;
3616
3617 return SDValue();
3618}
3619
3620/// Return true if divmod libcall is available.
3621static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3622 const TargetLowering &TLI) {
3623 RTLIB::Libcall LC;
3624 EVT NodeType = Node->getValueType(0);
3625 if (!NodeType.isSimple())
3626 return false;
3627 switch (NodeType.getSimpleVT().SimpleTy) {
3628 default: return false; // No libcall for vector types.
3629 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3630 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3631 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3632 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3633 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3634 }
3635
3636 return TLI.getLibcallName(LC) != nullptr;
3637}
3638
3639/// Issue divrem if both quotient and remainder are needed.
3640SDValue DAGCombiner::useDivRem(SDNode *Node) {
3641 if (Node->use_empty())
3642 return SDValue(); // This is a dead node, leave it alone.
3643
3644 unsigned Opcode = Node->getOpcode();
3645 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3646 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3647
3648 // DivMod lib calls can still work on non-legal types if using lib-calls.
3649 EVT VT = Node->getValueType(0);
3650 if (VT.isVector() || !VT.isInteger())
3651 return SDValue();
3652
3653 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3654 return SDValue();
3655
3656 // If DIVREM is going to get expanded into a libcall,
3657 // but there is no libcall available, then don't combine.
3658 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3659 !isDivRemLibcallAvailable(Node, isSigned, TLI))
3660 return SDValue();
3661
3662 // If div is legal, it's better to do the normal expansion
3663 unsigned OtherOpcode = 0;
3664 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3665 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3666 if (TLI.isOperationLegalOrCustom(Opcode, VT))
3667 return SDValue();
3668 } else {
3669 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3670 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3671 return SDValue();
3672 }
3673
3674 SDValue Op0 = Node->getOperand(0);
3675 SDValue Op1 = Node->getOperand(1);
3676 SDValue combined;
3677 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3678 UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3679 SDNode *User = *UI;
3680 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3681 User->use_empty())
3682 continue;
3683 // Convert the other matching node(s), too;
3684 // otherwise, the DIVREM may get target-legalized into something
3685 // target-specific that we won't be able to recognize.
3686 unsigned UserOpc = User->getOpcode();
3687 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3688 User->getOperand(0) == Op0 &&
3689 User->getOperand(1) == Op1) {
3690 if (!combined) {
3691 if (UserOpc == OtherOpcode) {
3692 SDVTList VTs = DAG.getVTList(VT, VT);
3693 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3694 } else if (UserOpc == DivRemOpc) {
3695 combined = SDValue(User, 0);
3696 } else {
3697 assert(UserOpc == Opcode)((UserOpc == Opcode) ? static_cast<void> (0) : __assert_fail
("UserOpc == Opcode", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 3697, __PRETTY_FUNCTION__))
;
3698 continue;
3699 }
3700 }
3701 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3702 CombineTo(User, combined);
3703 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3704 CombineTo(User, combined.getValue(1));
3705 }
3706 }
3707 return combined;
3708}
3709
3710static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3711 SDValue N0 = N->getOperand(0);
3712 SDValue N1 = N->getOperand(1);
3713 EVT VT = N->getValueType(0);
3714 SDLoc DL(N);
3715
3716 unsigned Opc = N->getOpcode();
3717 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3718 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3719
3720 // X / undef -> undef
3721 // X % undef -> undef
3722 // X / 0 -> undef
3723 // X % 0 -> undef
3724 // NOTE: This includes vectors where any divisor element is zero/undef.
3725 if (DAG.isUndef(Opc, {N0, N1}))
3726 return DAG.getUNDEF(VT);
3727
3728 // undef / X -> 0
3729 // undef % X -> 0
3730 if (N0.isUndef())
3731 return DAG.getConstant(0, DL, VT);
3732
3733 // 0 / X -> 0
3734 // 0 % X -> 0
3735 ConstantSDNode *N0C = isConstOrConstSplat(N0);
3736 if (N0C && N0C->isNullValue())
3737 return N0;
3738
3739 // X / X -> 1
3740 // X % X -> 0
3741 if (N0 == N1)
3742 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3743
3744 // X / 1 -> X
3745 // X % 1 -> 0
3746 // If this is a boolean op (single-bit element type), we can't have
3747 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3748 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3749 // it's a 1.
3750 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3751 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3752
3753 return SDValue();
3754}
3755
3756SDValue DAGCombiner::visitSDIV(SDNode *N) {
3757 SDValue N0 = N->getOperand(0);
3758 SDValue N1 = N->getOperand(1);
3759 EVT VT = N->getValueType(0);
3760 EVT CCVT = getSetCCResultType(VT);
3761
3762 // fold vector ops
3763 if (VT.isVector())
3764 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3765 return FoldedVOp;
3766
3767 SDLoc DL(N);
3768
3769 // fold (sdiv c1, c2) -> c1/c2
3770 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3771 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
3772 return C;
3773
3774 // fold (sdiv X, -1) -> 0-X
3775 if (N1C && N1C->isAllOnesValue())
3776 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3777
3778 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3779 if (N1C && N1C->getAPIntValue().isMinSignedValue())
3780 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3781 DAG.getConstant(1, DL, VT),
3782 DAG.getConstant(0, DL, VT));
3783
3784 if (SDValue V = simplifyDivRem(N, DAG))
3785 return V;
3786
3787 if (SDValue NewSel = foldBinOpIntoSelect(N))
3788 return NewSel;
3789
3790 // If we know the sign bits of both operands are zero, strength reduce to a
3791 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3792 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3793 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3794
3795 if (SDValue V = visitSDIVLike(N0, N1, N)) {
3796 // If the corresponding remainder node exists, update its users with
3797 // (Dividend - (Quotient * Divisor).
3798 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3799 { N0, N1 })) {
3800 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3801 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3802 AddToWorklist(Mul.getNode());
3803 AddToWorklist(Sub.getNode());
3804 CombineTo(RemNode, Sub);
3805 }
3806 return V;
3807 }
3808
3809 // sdiv, srem -> sdivrem
3810 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3811 // true. Otherwise, we break the simplification logic in visitREM().
3812 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3813 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3814 if (SDValue DivRem = useDivRem(N))
3815 return DivRem;
3816
3817 return SDValue();
3818}
3819
3820SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3821 SDLoc DL(N);
3822 EVT VT = N->getValueType(0);
3823 EVT CCVT = getSetCCResultType(VT);
3824 unsigned BitWidth = VT.getScalarSizeInBits();
3825
3826 // Helper for determining whether a value is a power-2 constant scalar or a
3827 // vector of such elements.
3828 auto IsPowerOfTwo = [](ConstantSDNode *C) {
3829 if (C->isNullValue() || C->isOpaque())
3830 return false;
3831 if (C->getAPIntValue().isPowerOf2())
3832 return true;
3833 if ((-C->getAPIntValue()).isPowerOf2())
3834 return true;
3835 return false;
3836 };
3837
3838 // fold (sdiv X, pow2) -> simple ops after legalize
3839 // FIXME: We check for the exact bit here because the generic lowering gives
3840 // better results in that case. The target-specific lowering should learn how
3841 // to handle exact sdivs efficiently.
3842 if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3843 // Target-specific implementation of sdiv x, pow2.
3844 if (SDValue Res = BuildSDIVPow2(N))
3845 return Res;
3846
3847 // Create constants that are functions of the shift amount value.
3848 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3849 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3850 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3851 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3852 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3853 if (!isConstantOrConstantVector(Inexact))
3854 return SDValue();
3855
3856 // Splat the sign bit into the register
3857 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3858 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3859 AddToWorklist(Sign.getNode());
3860
3861 // Add (N0 < 0) ? abs2 - 1 : 0;
3862 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3863 AddToWorklist(Srl.getNode());
3864 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3865 AddToWorklist(Add.getNode());
3866 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3867 AddToWorklist(Sra.getNode());
3868
3869 // Special case: (sdiv X, 1) -> X
3870 // Special Case: (sdiv X, -1) -> 0-X
3871 SDValue One = DAG.getConstant(1, DL, VT);
3872 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3873 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3874 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3875 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3876 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3877
3878 // If dividing by a positive value, we're done. Otherwise, the result must
3879 // be negated.
3880 SDValue Zero = DAG.getConstant(0, DL, VT);
3881 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3882
3883 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3884 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3885 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3886 return Res;
3887 }
3888
3889 // If integer divide is expensive and we satisfy the requirements, emit an
3890 // alternate sequence. Targets may check function attributes for size/speed
3891 // trade-offs.
3892 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3893 if (isConstantOrConstantVector(N1) &&
3894 !TLI.isIntDivCheap(N->getValueType(0), Attr))
3895 if (SDValue Op = BuildSDIV(N))
3896 return Op;
3897
3898 return SDValue();
3899}
3900
3901SDValue DAGCombiner::visitUDIV(SDNode *N) {
3902 SDValue N0 = N->getOperand(0);
3903 SDValue N1 = N->getOperand(1);
3904 EVT VT = N->getValueType(0);
3905 EVT CCVT = getSetCCResultType(VT);
3906
3907 // fold vector ops
3908 if (VT.isVector())
3909 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3910 return FoldedVOp;
3911
3912 SDLoc DL(N);
3913
3914 // fold (udiv c1, c2) -> c1/c2
3915 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3916 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
3917 return C;
3918
3919 // fold (udiv X, -1) -> select(X == -1, 1, 0)
3920 if (N1C && N1C->getAPIntValue().isAllOnesValue())
3921 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3922 DAG.getConstant(1, DL, VT),
3923 DAG.getConstant(0, DL, VT));
3924
3925 if (SDValue V = simplifyDivRem(N, DAG))
3926 return V;
3927
3928 if (SDValue NewSel = foldBinOpIntoSelect(N))
3929 return NewSel;
3930
3931 if (SDValue V = visitUDIVLike(N0, N1, N)) {
3932 // If the corresponding remainder node exists, update its users with
3933 // (Dividend - (Quotient * Divisor).
3934 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3935 { N0, N1 })) {
3936 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3937 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3938 AddToWorklist(Mul.getNode());
3939 AddToWorklist(Sub.getNode());
3940 CombineTo(RemNode, Sub);
3941 }
3942 return V;
3943 }
3944
3945 // sdiv, srem -> sdivrem
3946 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3947 // true. Otherwise, we break the simplification logic in visitREM().
3948 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3949 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3950 if (SDValue DivRem = useDivRem(N))
3951 return DivRem;
3952
3953 return SDValue();
3954}
3955
3956SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3957 SDLoc DL(N);
3958 EVT VT = N->getValueType(0);
3959
3960 // fold (udiv x, (1 << c)) -> x >>u c
3961 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3962 DAG.isKnownToBeAPowerOfTwo(N1)) {
3963 SDValue LogBase2 = BuildLogBase2(N1, DL);
3964 AddToWorklist(LogBase2.getNode());
3965
3966 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3967 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3968 AddToWorklist(Trunc.getNode());
3969 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3970 }
3971
3972 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3973 if (N1.getOpcode() == ISD::SHL) {
3974 SDValue N10 = N1.getOperand(0);
3975 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3976 DAG.isKnownToBeAPowerOfTwo(N10)) {
3977 SDValue LogBase2 = BuildLogBase2(N10, DL);
3978 AddToWorklist(LogBase2.getNode());
3979
3980 EVT ADDVT = N1.getOperand(1).getValueType();
3981 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3982 AddToWorklist(Trunc.getNode());
3983 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3984 AddToWorklist(Add.getNode());
3985 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3986 }
3987 }
3988
3989 // fold (udiv x, c) -> alternate
3990 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3991 if (isConstantOrConstantVector(N1) &&
3992 !TLI.isIntDivCheap(N->getValueType(0), Attr))
3993 if (SDValue Op = BuildUDIV(N))
3994 return Op;
3995
3996 return SDValue();
3997}
3998
3999// handles ISD::SREM and ISD::UREM
4000SDValue DAGCombiner::visitREM(SDNode *N) {
4001 unsigned Opcode = N->getOpcode();
4002 SDValue N0 = N->getOperand(0);
4003 SDValue N1 = N->getOperand(1);
4004 EVT VT = N->getValueType(0);
4005 EVT CCVT = getSetCCResultType(VT);
4006
4007 bool isSigned = (Opcode == ISD::SREM);
4008 SDLoc DL(N);
4009
4010 // fold (rem c1, c2) -> c1%c2
4011 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4012 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4013 return C;
4014
4015 // fold (urem X, -1) -> select(X == -1, 0, x)
4016 if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4017 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4018 DAG.getConstant(0, DL, VT), N0);
4019
4020 if (SDValue V = simplifyDivRem(N, DAG))
4021 return V;
4022
4023 if (SDValue NewSel = foldBinOpIntoSelect(N))
4024 return NewSel;
4025
4026 if (isSigned) {
4027 // If we know the sign bits of both operands are zero, strength reduce to a
4028 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4029 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4030 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4031 } else {
4032 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4033 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4034 // fold (urem x, pow2) -> (and x, pow2-1)
4035 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4036 AddToWorklist(Add.getNode());
4037 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4038 }
4039 if (N1.getOpcode() == ISD::SHL &&
4040 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4041 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4042 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4043 AddToWorklist(Add.getNode());
4044 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4045 }
4046 }
4047
4048 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4049
4050 // If X/C can be simplified by the division-by-constant logic, lower
4051 // X%C to the equivalent of X-X/C*C.
4052 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4053 // speculative DIV must not cause a DIVREM conversion. We guard against this
4054 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4055 // combine will not return a DIVREM. Regardless, checking cheapness here
4056 // makes sense since the simplification results in fatter code.
4057 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4058 SDValue OptimizedDiv =
4059 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4060 if (OptimizedDiv.getNode()) {
4061 // If the equivalent Div node also exists, update its users.
4062 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4063 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4064 { N0, N1 }))
4065 CombineTo(DivNode, OptimizedDiv);
4066 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4067 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4068 AddToWorklist(OptimizedDiv.getNode());
4069 AddToWorklist(Mul.getNode());
4070 return Sub;
4071 }
4072 }
4073
4074 // sdiv, srem -> sdivrem
4075 if (SDValue DivRem = useDivRem(N))
4076 return DivRem.getValue(1);
4077
4078 return SDValue();
4079}
4080
4081SDValue DAGCombiner::visitMULHS(SDNode *N) {
4082 SDValue N0 = N->getOperand(0);
4083 SDValue N1 = N->getOperand(1);
4084 EVT VT = N->getValueType(0);
4085 SDLoc DL(N);
4086
4087 if (VT.isVector()) {
4088 // fold (mulhs x, 0) -> 0
4089 // do not return N0/N1, because undef node may exist.
4090 if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4091 ISD::isBuildVectorAllZeros(N1.getNode()))
4092 return DAG.getConstant(0, DL, VT);
4093 }
4094
4095 // fold (mulhs x, 0) -> 0
4096 if (isNullConstant(N1))
4097 return N1;
4098 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4099 if (isOneConstant(N1))
4100 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4101 DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4102 getShiftAmountTy(N0.getValueType())));
4103
4104 // fold (mulhs x, undef) -> 0
4105 if (N0.isUndef() || N1.isUndef())
4106 return DAG.getConstant(0, DL, VT);
4107
4108 // If the type twice as wide is legal, transform the mulhs to a wider multiply
4109 // plus a shift.
4110 if (VT.isSimple() && !VT.isVector()) {
4111 MVT Simple = VT.getSimpleVT();
4112 unsigned SimpleSize = Simple.getSizeInBits();
4113 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4114 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4115 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4116 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4117 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4118 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4119 DAG.getConstant(SimpleSize, DL,
4120 getShiftAmountTy(N1.getValueType())));
4121 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4122 }
4123 }
4124
4125 return SDValue();
4126}
4127
4128SDValue DAGCombiner::visitMULHU(SDNode *N) {
4129 SDValue N0 = N->getOperand(0);
4130 SDValue N1 = N->getOperand(1);
4131 EVT VT = N->getValueType(0);
4132 SDLoc DL(N);
4133
4134 if (VT.isVector()) {
4135 // fold (mulhu x, 0) -> 0
4136 // do not return N0/N1, because undef node may exist.
4137 if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4138 ISD::isBuildVectorAllZeros(N1.getNode()))
4139 return DAG.getConstant(0, DL, VT);
4140 }
4141
4142 // fold (mulhu x, 0) -> 0
4143 if (isNullConstant(N1))
4144 return N1;
4145 // fold (mulhu x, 1) -> 0
4146 if (isOneConstant(N1))
4147 return DAG.getConstant(0, DL, N0.getValueType());
4148 // fold (mulhu x, undef) -> 0
4149 if (N0.isUndef() || N1.isUndef())
4150 return DAG.getConstant(0, DL, VT);
4151
4152 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4153 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4154 DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4155 unsigned NumEltBits = VT.getScalarSizeInBits();
4156 SDValue LogBase2 = BuildLogBase2(N1, DL);
4157 SDValue SRLAmt = DAG.getNode(
4158 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4159 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4160 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4161 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4162 }
4163
4164 // If the type twice as wide is legal, transform the mulhu to a wider multiply
4165 // plus a shift.
4166 if (VT.isSimple() && !VT.isVector()) {
4167 MVT Simple = VT.getSimpleVT();
4168 unsigned SimpleSize = Simple.getSizeInBits();
4169 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4170 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4171 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4172 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4173 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4174 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4175 DAG.getConstant(SimpleSize, DL,
4176 getShiftAmountTy(N1.getValueType())));
4177 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4178 }
4179 }
4180
4181 return SDValue();
4182}
4183
4184/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4185/// give the opcodes for the two computations that are being performed. Return
4186/// true if a simplification was made.
4187SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4188 unsigned HiOp) {
4189 // If the high half is not needed, just compute the low half.
4190 bool HiExists = N->hasAnyUseOfValue(1);
4191 if (!HiExists && (!LegalOperations ||
4192 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4193 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4194 return CombineTo(N, Res, Res);
4195 }
4196
4197 // If the low half is not needed, just compute the high half.
4198 bool LoExists = N->hasAnyUseOfValue(0);
4199 if (!LoExists && (!LegalOperations ||
4200 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4201 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4202 return CombineTo(N, Res, Res);
4203 }
4204
4205 // If both halves are used, return as it is.
4206 if (LoExists && HiExists)
4207 return SDValue();
4208
4209 // If the two computed results can be simplified separately, separate them.
4210 if (LoExists) {
4211 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4212 AddToWorklist(Lo.getNode());
4213 SDValue LoOpt = combine(Lo.getNode());
4214 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4215 (!LegalOperations ||
4216 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4217 return CombineTo(N, LoOpt, LoOpt);
4218 }
4219
4220 if (HiExists) {
4221 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4222 AddToWorklist(Hi.getNode());
4223 SDValue HiOpt = combine(Hi.getNode());
4224 if (HiOpt.getNode() && HiOpt != Hi &&
4225 (!LegalOperations ||
4226 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4227 return CombineTo(N, HiOpt, HiOpt);
4228 }
4229
4230 return SDValue();
4231}
4232
4233SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4234 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4235 return Res;
4236
4237 EVT VT = N->getValueType(0);
4238 SDLoc DL(N);
4239
4240 // If the type is twice as wide is legal, transform the mulhu to a wider
4241 // multiply plus a shift.
4242 if (VT.isSimple() && !VT.isVector()) {
4243 MVT Simple = VT.getSimpleVT();
4244 unsigned SimpleSize = Simple.getSizeInBits();
4245 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4246 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4247 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4248 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4249 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4250 // Compute the high part as N1.
4251 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4252 DAG.getConstant(SimpleSize, DL,
4253 getShiftAmountTy(Lo.getValueType())));
4254 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4255 // Compute the low part as N0.
4256 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4257 return CombineTo(N, Lo, Hi);
4258 }
4259 }
4260
4261 return SDValue();
4262}
4263
4264SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4265 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4266 return Res;
4267
4268 EVT VT = N->getValueType(0);
4269 SDLoc DL(N);
4270
4271 // (umul_lohi N0, 0) -> (0, 0)
4272 if (isNullConstant(N->getOperand(1))) {
4273 SDValue Zero = DAG.getConstant(0, DL, VT);
4274 return CombineTo(N, Zero, Zero);
4275 }
4276
4277 // (umul_lohi N0, 1) -> (N0, 0)
4278 if (isOneConstant(N->getOperand(1))) {
4279 SDValue Zero = DAG.getConstant(0, DL, VT);
4280 return CombineTo(N, N->getOperand(0), Zero);
4281 }
4282
4283 // If the type is twice as wide is legal, transform the mulhu to a wider
4284 // multiply plus a shift.
4285 if (VT.isSimple() && !VT.isVector()) {
4286 MVT Simple = VT.getSimpleVT();
4287 unsigned SimpleSize = Simple.getSizeInBits();
4288 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4289 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4290 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4291 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4292 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4293 // Compute the high part as N1.
4294 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4295 DAG.getConstant(SimpleSize, DL,
4296 getShiftAmountTy(Lo.getValueType())));
4297 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4298 // Compute the low part as N0.
4299 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4300 return CombineTo(N, Lo, Hi);
4301 }
4302 }
4303
4304 return SDValue();
4305}
4306
4307SDValue DAGCombiner::visitMULO(SDNode *N) {
4308 SDValue N0 = N->getOperand(0);
4309 SDValue N1 = N->getOperand(1);
4310 EVT VT = N0.getValueType();
4311 bool IsSigned = (ISD::SMULO == N->getOpcode());
4312
4313 EVT CarryVT = N->getValueType(1);
4314 SDLoc DL(N);
4315
4316 // canonicalize constant to RHS.
4317 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4318 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4319 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4320
4321 // fold (mulo x, 0) -> 0 + no carry out
4322 if (isNullOrNullSplat(N1))
4323 return CombineTo(N, DAG.getConstant(0, DL, VT),
4324 DAG.getConstant(0, DL, CarryVT));
4325
4326 // (mulo x, 2) -> (addo x, x)
4327 if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
4328 if (C2->getAPIntValue() == 2)
4329 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4330 N->getVTList(), N0, N0);
4331
4332 return SDValue();
4333}
4334
4335SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4336 SDValue N0 = N->getOperand(0);
4337 SDValue N1 = N->getOperand(1);
4338 EVT VT = N0.getValueType();
4339 unsigned Opcode = N->getOpcode();
4340
4341 // fold vector ops
4342 if (VT.isVector())
4343 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4344 return FoldedVOp;
4345
4346 // fold operation with constant operands.
4347 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4348 return C;
4349
4350 // canonicalize constant to RHS
4351 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4352 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4353 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4354
4355 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4356 // Only do this if the current op isn't legal and the flipped is.
4357 if (!TLI.isOperationLegal(Opcode, VT) &&
4358 (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4359 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4360 unsigned AltOpcode;
4361 switch (Opcode) {
4362 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4363 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4364 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4365 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4366 default: llvm_unreachable("Unknown MINMAX opcode")::llvm::llvm_unreachable_internal("Unknown MINMAX opcode", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4366)
;
4367 }
4368 if (TLI.isOperationLegal(AltOpcode, VT))
4369 return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4370 }
4371
4372 return SDValue();
4373}
4374
4375/// If this is a bitwise logic instruction and both operands have the same
4376/// opcode, try to sink the other opcode after the logic instruction.
4377SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4378 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4379 EVT VT = N0.getValueType();
4380 unsigned LogicOpcode = N->getOpcode();
4381 unsigned HandOpcode = N0.getOpcode();
4382 assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||(((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode
== ISD::XOR) && "Expected logic opcode") ? static_cast
<void> (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected logic opcode\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4383, __PRETTY_FUNCTION__))
4383 LogicOpcode == ISD::XOR) && "Expected logic opcode")(((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode
== ISD::XOR) && "Expected logic opcode") ? static_cast
<void> (0) : __assert_fail ("(LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || LogicOpcode == ISD::XOR) && \"Expected logic opcode\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4383, __PRETTY_FUNCTION__))
;
4384 assert(HandOpcode == N1.getOpcode() && "Bad input!")((HandOpcode == N1.getOpcode() && "Bad input!") ? static_cast
<void> (0) : __assert_fail ("HandOpcode == N1.getOpcode() && \"Bad input!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4384, __PRETTY_FUNCTION__))
;
4385
4386 // Bail early if none of these transforms apply.
4387 if (N0.getNumOperands() == 0)
4388 return SDValue();
4389
4390 // FIXME: We should check number of uses of the operands to not increase
4391 // the instruction count for all transforms.
4392
4393 // Handle size-changing casts.
4394 SDValue X = N0.getOperand(0);
4395 SDValue Y = N1.getOperand(0);
4396 EVT XVT = X.getValueType();
4397 SDLoc DL(N);
4398 if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4399 HandOpcode == ISD::SIGN_EXTEND) {
4400 // If both operands have other uses, this transform would create extra
4401 // instructions without eliminating anything.
4402 if (!N0.hasOneUse() && !N1.hasOneUse())
4403 return SDValue();
4404 // We need matching integer source types.
4405 if (XVT != Y.getValueType())
4406 return SDValue();
4407 // Don't create an illegal op during or after legalization. Don't ever
4408 // create an unsupported vector op.
4409 if ((VT.isVector() || LegalOperations) &&
4410 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4411 return SDValue();
4412 // Avoid infinite looping with PromoteIntBinOp.
4413 // TODO: Should we apply desirable/legal constraints to all opcodes?
4414 if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4415 !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4416 return SDValue();
4417 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4418 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4419 return DAG.getNode(HandOpcode, DL, VT, Logic);
4420 }
4421
4422 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4423 if (HandOpcode == ISD::TRUNCATE) {
4424 // If both operands have other uses, this transform would create extra
4425 // instructions without eliminating anything.
4426 if (!N0.hasOneUse() && !N1.hasOneUse())
4427 return SDValue();
4428 // We need matching source types.
4429 if (XVT != Y.getValueType())
4430 return SDValue();
4431 // Don't create an illegal op during or after legalization.
4432 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4433 return SDValue();
4434 // Be extra careful sinking truncate. If it's free, there's no benefit in
4435 // widening a binop. Also, don't create a logic op on an illegal type.
4436 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4437 return SDValue();
4438 if (!TLI.isTypeLegal(XVT))
4439 return SDValue();
4440 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4441 return DAG.getNode(HandOpcode, DL, VT, Logic);
4442 }
4443
4444 // For binops SHL/SRL/SRA/AND:
4445 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4446 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4447 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4448 N0.getOperand(1) == N1.getOperand(1)) {
4449 // If either operand has other uses, this transform is not an improvement.
4450 if (!N0.hasOneUse() || !N1.hasOneUse())
4451 return SDValue();
4452 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4453 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4454 }
4455
4456 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4457 if (HandOpcode == ISD::BSWAP) {
4458 // If either operand has other uses, this transform is not an improvement.
4459 if (!N0.hasOneUse() || !N1.hasOneUse())
4460 return SDValue();
4461 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4462 return DAG.getNode(HandOpcode, DL, VT, Logic);
4463 }
4464
4465 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4466 // Only perform this optimization up until type legalization, before
4467 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4468 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4469 // we don't want to undo this promotion.
4470 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4471 // on scalars.
4472 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4473 Level <= AfterLegalizeTypes) {
4474 // Input types must be integer and the same.
4475 if (XVT.isInteger() && XVT == Y.getValueType() &&
4476 !(VT.isVector() && TLI.isTypeLegal(VT) &&
4477 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4478 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4479 return DAG.getNode(HandOpcode, DL, VT, Logic);
4480 }
4481 }
4482
4483 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4484 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4485 // If both shuffles use the same mask, and both shuffle within a single
4486 // vector, then it is worthwhile to move the swizzle after the operation.
4487 // The type-legalizer generates this pattern when loading illegal
4488 // vector types from memory. In many cases this allows additional shuffle
4489 // optimizations.
4490 // There are other cases where moving the shuffle after the xor/and/or
4491 // is profitable even if shuffles don't perform a swizzle.
4492 // If both shuffles use the same mask, and both shuffles have the same first
4493 // or second operand, then it might still be profitable to move the shuffle
4494 // after the xor/and/or operation.
4495 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4496 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4497 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4498 assert(X.getValueType() == Y.getValueType() &&((X.getValueType() == Y.getValueType() && "Inputs to shuffles are not the same type"
) ? static_cast<void> (0) : __assert_fail ("X.getValueType() == Y.getValueType() && \"Inputs to shuffles are not the same type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4499, __PRETTY_FUNCTION__))
4499 "Inputs to shuffles are not the same type")((X.getValueType() == Y.getValueType() && "Inputs to shuffles are not the same type"
) ? static_cast<void> (0) : __assert_fail ("X.getValueType() == Y.getValueType() && \"Inputs to shuffles are not the same type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4499, __PRETTY_FUNCTION__))
;
4500
4501 // Check that both shuffles use the same mask. The masks are known to be of
4502 // the same length because the result vector type is the same.
4503 // Check also that shuffles have only one use to avoid introducing extra
4504 // instructions.
4505 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4506 !SVN0->getMask().equals(SVN1->getMask()))
4507 return SDValue();
4508
4509 // Don't try to fold this node if it requires introducing a
4510 // build vector of all zeros that might be illegal at this stage.
4511 SDValue ShOp = N0.getOperand(1);
4512 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4513 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4514
4515 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4516 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4517 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4518 N0.getOperand(0), N1.getOperand(0));
4519 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4520 }
4521
4522 // Don't try to fold this node if it requires introducing a
4523 // build vector of all zeros that might be illegal at this stage.
4524 ShOp = N0.getOperand(0);
4525 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4526 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4527
4528 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4529 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4530 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4531 N1.getOperand(1));
4532 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4533 }
4534 }
4535
4536 return SDValue();
4537}
4538
4539/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4540SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4541 const SDLoc &DL) {
4542 SDValue LL, LR, RL, RR, N0CC, N1CC;
4543 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4544 !isSetCCEquivalent(N1, RL, RR, N1CC))
4545 return SDValue();
4546
4547 assert(N0.getValueType() == N1.getValueType() &&((N0.getValueType() == N1.getValueType() && "Unexpected operand types for bitwise logic op"
) ? static_cast<void> (0) : __assert_fail ("N0.getValueType() == N1.getValueType() && \"Unexpected operand types for bitwise logic op\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4548, __PRETTY_FUNCTION__))
4548 "Unexpected operand types for bitwise logic op")((N0.getValueType() == N1.getValueType() && "Unexpected operand types for bitwise logic op"
) ? static_cast<void> (0) : __assert_fail ("N0.getValueType() == N1.getValueType() && \"Unexpected operand types for bitwise logic op\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4548, __PRETTY_FUNCTION__))
;
4549 assert(LL.getValueType() == LR.getValueType() &&((LL.getValueType() == LR.getValueType() && RL.getValueType
() == RR.getValueType() && "Unexpected operand types for setcc"
) ? static_cast<void> (0) : __assert_fail ("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4551, __PRETTY_FUNCTION__))
4550 RL.getValueType() == RR.getValueType() &&((LL.getValueType() == LR.getValueType() && RL.getValueType
() == RR.getValueType() && "Unexpected operand types for setcc"
) ? static_cast<void> (0) : __assert_fail ("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4551, __PRETTY_FUNCTION__))
4551 "Unexpected operand types for setcc")((LL.getValueType() == LR.getValueType() && RL.getValueType
() == RR.getValueType() && "Unexpected operand types for setcc"
) ? static_cast<void> (0) : __assert_fail ("LL.getValueType() == LR.getValueType() && RL.getValueType() == RR.getValueType() && \"Unexpected operand types for setcc\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4551, __PRETTY_FUNCTION__))
;
4552
4553 // If we're here post-legalization or the logic op type is not i1, the logic
4554 // op type must match a setcc result type. Also, all folds require new
4555 // operations on the left and right operands, so those types must match.
4556 EVT VT = N0.getValueType();
4557 EVT OpVT = LL.getValueType();
4558 if (LegalOperations || VT.getScalarType() != MVT::i1)
4559 if (VT != getSetCCResultType(OpVT))
4560 return SDValue();
4561 if (OpVT != RL.getValueType())
4562 return SDValue();
4563
4564 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4565 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4566 bool IsInteger = OpVT.isInteger();
4567 if (LR == RR && CC0 == CC1 && IsInteger) {
4568 bool IsZero = isNullOrNullSplat(LR);
4569 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4570
4571 // All bits clear?
4572 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4573 // All sign bits clear?
4574 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4575 // Any bits set?
4576 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4577 // Any sign bits set?
4578 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4579
4580 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
4581 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4582 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
4583 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
4584 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4585 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4586 AddToWorklist(Or.getNode());
4587 return DAG.getSetCC(DL, VT, Or, LR, CC1);
4588 }
4589
4590 // All bits set?
4591 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4592 // All sign bits set?
4593 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4594 // Any bits clear?
4595 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4596 // Any sign bits clear?
4597 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4598
4599 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4600 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
4601 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4602 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
4603 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4604 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4605 AddToWorklist(And.getNode());
4606 return DAG.getSetCC(DL, VT, And, LR, CC1);
4607 }
4608 }
4609
4610 // TODO: What is the 'or' equivalent of this fold?
4611 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4612 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4613 IsInteger && CC0 == ISD::SETNE &&
4614 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4615 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4616 SDValue One = DAG.getConstant(1, DL, OpVT);
4617 SDValue Two = DAG.getConstant(2, DL, OpVT);
4618 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4619 AddToWorklist(Add.getNode());
4620 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4621 }
4622
4623 // Try more general transforms if the predicates match and the only user of
4624 // the compares is the 'and' or 'or'.
4625 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4626 N0.hasOneUse() && N1.hasOneUse()) {
4627 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4628 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4629 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4630 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4631 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4632 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4633 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4634 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4635 }
4636
4637 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4638 // TODO - support non-uniform vector amounts.
4639 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4640 // Match a shared variable operand and 2 non-opaque constant operands.
4641 ConstantSDNode *C0 = isConstOrConstSplat(LR);
4642 ConstantSDNode *C1 = isConstOrConstSplat(RR);
4643 if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4644 // Canonicalize larger constant as C0.
4645 if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4646 std::swap(C0, C1);
4647
4648 // The difference of the constants must be a single bit.
4649 const APInt &C0Val = C0->getAPIntValue();
4650 const APInt &C1Val = C1->getAPIntValue();
4651 if ((C0Val - C1Val).isPowerOf2()) {
4652 // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4653 // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4654 SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4655 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4656 SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4657 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4658 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4659 return DAG.getSetCC(DL, VT, And, Zero, CC0);
4660 }
4661 }
4662 }
4663 }
4664
4665 // Canonicalize equivalent operands to LL == RL.
4666 if (LL == RR && LR == RL) {
4667 CC1 = ISD::getSetCCSwappedOperands(CC1);
4668 std::swap(RL, RR);
4669 }
4670
4671 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4672 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4673 if (LL == RL && LR == RR) {
4674 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
4675 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
4676 if (NewCC != ISD::SETCC_INVALID &&
4677 (!LegalOperations ||
4678 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4679 TLI.isOperationLegal(ISD::SETCC, OpVT))))
4680 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4681 }
4682
4683 return SDValue();
4684}
4685
4686/// This contains all DAGCombine rules which reduce two values combined by
4687/// an And operation to a single value. This makes them reusable in the context
4688/// of visitSELECT(). Rules involving constants are not included as
4689/// visitSELECT() already handles those cases.
4690SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4691 EVT VT = N1.getValueType();
4692 SDLoc DL(N);
4693
4694 // fold (and x, undef) -> 0
4695 if (N0.isUndef() || N1.isUndef())
4696 return DAG.getConstant(0, DL, VT);
4697
4698 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4699 return V;
4700
4701 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4702 VT.getSizeInBits() <= 64) {
4703 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4704 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4705 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4706 // immediate for an add, but it is legal if its top c2 bits are set,
4707 // transform the ADD so the immediate doesn't need to be materialized
4708 // in a register.
4709 APInt ADDC = ADDI->getAPIntValue();
4710 APInt SRLC = SRLI->getAPIntValue();
4711 if (ADDC.getMinSignedBits() <= 64 &&
4712 SRLC.ult(VT.getSizeInBits()) &&
4713 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4714 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4715 SRLC.getZExtValue());
4716 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4717 ADDC |= Mask;
4718 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4719 SDLoc DL0(N0);
4720 SDValue NewAdd =
4721 DAG.getNode(ISD::ADD, DL0, VT,
4722 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4723 CombineTo(N0.getNode(), NewAdd);
4724 // Return N so it doesn't get rechecked!
4725 return SDValue(N, 0);
4726 }
4727 }
4728 }
4729 }
4730 }
4731 }
4732
4733 // Reduce bit extract of low half of an integer to the narrower type.
4734 // (and (srl i64:x, K), KMask) ->
4735 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4736 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4737 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4738 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4739 unsigned Size = VT.getSizeInBits();
4740 const APInt &AndMask = CAnd->getAPIntValue();
4741 unsigned ShiftBits = CShift->getZExtValue();
4742
4743 // Bail out, this node will probably disappear anyway.
4744 if (ShiftBits == 0)
4745 return SDValue();
4746
4747 unsigned MaskBits = AndMask.countTrailingOnes();
4748 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4749
4750 if (AndMask.isMask() &&
4751 // Required bits must not span the two halves of the integer and
4752 // must fit in the half size type.
4753 (ShiftBits + MaskBits <= Size / 2) &&
4754 TLI.isNarrowingProfitable(VT, HalfVT) &&
4755 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4756 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4757 TLI.isTruncateFree(VT, HalfVT) &&
4758 TLI.isZExtFree(HalfVT, VT)) {
4759 // The isNarrowingProfitable is to avoid regressions on PPC and
4760 // AArch64 which match a few 64-bit bit insert / bit extract patterns
4761 // on downstream users of this. Those patterns could probably be
4762 // extended to handle extensions mixed in.
4763
4764 SDValue SL(N0);
4765 assert(MaskBits <= Size)((MaskBits <= Size) ? static_cast<void> (0) : __assert_fail
("MaskBits <= Size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4765, __PRETTY_FUNCTION__))
;
4766
4767 // Extracting the highest bit of the low half.
4768 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4769 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4770 N0.getOperand(0));
4771
4772 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4773 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4774 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4775 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4776 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4777 }
4778 }
4779 }
4780 }
4781
4782 return SDValue();
4783}
4784
4785bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4786 EVT LoadResultTy, EVT &ExtVT) {
4787 if (!AndC->getAPIntValue().isMask())
4788 return false;
4789
4790 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4791
4792 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4793 EVT LoadedVT = LoadN->getMemoryVT();
4794
4795 if (ExtVT == LoadedVT &&
4796 (!LegalOperations ||
4797 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4798 // ZEXTLOAD will match without needing to change the size of the value being
4799 // loaded.
4800 return true;
4801 }
4802
4803 // Do not change the width of a volatile or atomic loads.
4804 if (!LoadN->isSimple())
4805 return false;
4806
4807 // Do not generate loads of non-round integer types since these can
4808 // be expensive (and would be wrong if the type is not byte sized).
4809 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4810 return false;
4811
4812 if (LegalOperations &&
4813 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4814 return false;
4815
4816 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4817 return false;
4818
4819 return true;
4820}
4821
4822bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4823 ISD::LoadExtType ExtType, EVT &MemVT,
4824 unsigned ShAmt) {
4825 if (!LDST)
4826 return false;
4827 // Only allow byte offsets.
4828 if (ShAmt % 8)
4829 return false;
4830
4831 // Do not generate loads of non-round integer types since these can
4832 // be expensive (and would be wrong if the type is not byte sized).
4833 if (!MemVT.isRound())
4834 return false;
4835
4836 // Don't change the width of a volatile or atomic loads.
4837 if (!LDST->isSimple())
4838 return false;
4839
4840 // Verify that we are actually reducing a load width here.
4841 if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4842 return false;
4843
4844 // Ensure that this isn't going to produce an unsupported memory access.
4845 if (ShAmt &&
4846 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4847 LDST->getAddressSpace(), ShAmt / 8,
4848 LDST->getMemOperand()->getFlags()))
4849 return false;
4850
4851 // It's not possible to generate a constant of extended or untyped type.
4852 EVT PtrType = LDST->getBasePtr().getValueType();
4853 if (PtrType == MVT::Untyped || PtrType.isExtended())
4854 return false;
4855
4856 if (isa<LoadSDNode>(LDST)) {
4857 LoadSDNode *Load = cast<LoadSDNode>(LDST);
4858 // Don't transform one with multiple uses, this would require adding a new
4859 // load.
4860 if (!SDValue(Load, 0).hasOneUse())
4861 return false;
4862
4863 if (LegalOperations &&
4864 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4865 return false;
4866
4867 // For the transform to be legal, the load must produce only two values
4868 // (the value loaded and the chain). Don't transform a pre-increment
4869 // load, for example, which produces an extra value. Otherwise the
4870 // transformation is not equivalent, and the downstream logic to replace
4871 // uses gets things wrong.
4872 if (Load->getNumValues() > 2)
4873 return false;
4874
4875 // If the load that we're shrinking is an extload and we're not just
4876 // discarding the extension we can't simply shrink the load. Bail.
4877 // TODO: It would be possible to merge the extensions in some cases.
4878 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4879 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4880 return false;
4881
4882 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4883 return false;
4884 } else {
4885 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode")((isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode"
) ? static_cast<void> (0) : __assert_fail ("isa<StoreSDNode>(LDST) && \"It is not a Load nor a Store SDNode\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4885, __PRETTY_FUNCTION__))
;
4886 StoreSDNode *Store = cast<StoreSDNode>(LDST);
4887 // Can't write outside the original store
4888 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4889 return false;
4890
4891 if (LegalOperations &&
4892 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4893 return false;
4894 }
4895 return true;
4896}
4897
4898bool DAGCombiner::SearchForAndLoads(SDNode *N,
4899 SmallVectorImpl<LoadSDNode*> &Loads,
4900 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4901 ConstantSDNode *Mask,
4902 SDNode *&NodeToMask) {
4903 // Recursively search for the operands, looking for loads which can be
4904 // narrowed.
4905 for (SDValue Op : N->op_values()) {
4906 if (Op.getValueType().isVector())
4907 return false;
4908
4909 // Some constants may need fixing up later if they are too large.
4910 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4911 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4912 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4913 NodesWithConsts.insert(N);
4914 continue;
4915 }
4916
4917 if (!Op.hasOneUse())
4918 return false;
4919
4920 switch(Op.getOpcode()) {
4921 case ISD::LOAD: {
4922 auto *Load = cast<LoadSDNode>(Op);
4923 EVT ExtVT;
4924 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4925 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4926
4927 // ZEXTLOAD is already small enough.
4928 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4929 ExtVT.bitsGE(Load->getMemoryVT()))
4930 continue;
4931
4932 // Use LE to convert equal sized loads to zext.
4933 if (ExtVT.bitsLE(Load->getMemoryVT()))
4934 Loads.push_back(Load);
4935
4936 continue;
4937 }
4938 return false;
4939 }
4940 case ISD::ZERO_EXTEND:
4941 case ISD::AssertZext: {
4942 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4943 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4944 EVT VT = Op.getOpcode() == ISD::AssertZext ?
4945 cast<VTSDNode>(Op.getOperand(1))->getVT() :
4946 Op.getOperand(0).getValueType();
4947
4948 // We can accept extending nodes if the mask is wider or an equal
4949 // width to the original type.
4950 if (ExtVT.bitsGE(VT))
4951 continue;
4952 break;
4953 }
4954 case ISD::OR:
4955 case ISD::XOR:
4956 case ISD::AND:
4957 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4958 NodeToMask))
4959 return false;
4960 continue;
4961 }
4962
4963 // Allow one node which will masked along with any loads found.
4964 if (NodeToMask)
4965 return false;
4966
4967 // Also ensure that the node to be masked only produces one data result.
4968 NodeToMask = Op.getNode();
4969 if (NodeToMask->getNumValues() > 1) {
4970 bool HasValue = false;
4971 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4972 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4973 if (VT != MVT::Glue && VT != MVT::Other) {
4974 if (HasValue) {
4975 NodeToMask = nullptr;
4976 return false;
4977 }
4978 HasValue = true;
4979 }
4980 }
4981 assert(HasValue && "Node to be masked has no data result?")((HasValue && "Node to be masked has no data result?"
) ? static_cast<void> (0) : __assert_fail ("HasValue && \"Node to be masked has no data result?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 4981, __PRETTY_FUNCTION__))
;
4982 }
4983 }
4984 return true;
4985}
4986
4987bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
4988 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4989 if (!Mask)
4990 return false;
4991
4992 if (!Mask->getAPIntValue().isMask())
4993 return false;
4994
4995 // No need to do anything if the and directly uses a load.
4996 if (isa<LoadSDNode>(N->getOperand(0)))
4997 return false;
4998
4999 SmallVector<LoadSDNode*, 8> Loads;
5000 SmallPtrSet<SDNode*, 2> NodesWithConsts;
5001 SDNode *FixupNode = nullptr;
5002 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5003 if (Loads.size() == 0)
5004 return false;
5005
5006 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "Backwards propagate AND: "
; N->dump(); } } while (false)
;
5007 SDValue MaskOp = N->getOperand(1);
5008
5009 // If it exists, fixup the single node we allow in the tree that needs
5010 // masking.
5011 if (FixupNode) {
5012 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "First, need to fix up: "; FixupNode
->dump(); } } while (false)
;
5013 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5014 FixupNode->getValueType(0),
5015 SDValue(FixupNode, 0), MaskOp);
5016 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5017 if (And.getOpcode() == ISD ::AND)
5018 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5019 }
5020
5021 // Narrow any constants that need it.
5022 for (auto *LogicN : NodesWithConsts) {
5023 SDValue Op0 = LogicN->getOperand(0);
5024 SDValue Op1 = LogicN->getOperand(1);
5025
5026 if (isa<ConstantSDNode>(Op0))
5027 std::swap(Op0, Op1);
5028
5029 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5030 Op1, MaskOp);
5031
5032 DAG.UpdateNodeOperands(LogicN, Op0, And);
5033 }
5034
5035 // Create narrow loads.
5036 for (auto *Load : Loads) {
5037 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "Propagate AND back to: "; Load
->dump(); } } while (false)
;
5038 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5039 SDValue(Load, 0), MaskOp);
5040 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5041 if (And.getOpcode() == ISD ::AND)
5042 And = SDValue(
5043 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5044 SDValue NewLoad = ReduceLoadWidth(And.getNode());
5045 assert(NewLoad &&((NewLoad && "Shouldn't be masking the load if it can't be narrowed"
) ? static_cast<void> (0) : __assert_fail ("NewLoad && \"Shouldn't be masking the load if it can't be narrowed\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5046, __PRETTY_FUNCTION__))
5046 "Shouldn't be masking the load if it can't be narrowed")((NewLoad && "Shouldn't be masking the load if it can't be narrowed"
) ? static_cast<void> (0) : __assert_fail ("NewLoad && \"Shouldn't be masking the load if it can't be narrowed\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5046, __PRETTY_FUNCTION__))
;
5047 CombineTo(Load, NewLoad, NewLoad.getValue(1));
5048 }
5049 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5050 return true;
5051 }
5052 return false;
5053}
5054
5055// Unfold
5056// x & (-1 'logical shift' y)
5057// To
5058// (x 'opposite logical shift' y) 'logical shift' y
5059// if it is better for performance.
5060SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5061 assert(N->getOpcode() == ISD::AND)((N->getOpcode() == ISD::AND) ? static_cast<void> (0
) : __assert_fail ("N->getOpcode() == ISD::AND", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5061, __PRETTY_FUNCTION__))
;
5062
5063 SDValue N0 = N->getOperand(0);
5064 SDValue N1 = N->getOperand(1);
5065
5066 // Do we actually prefer shifts over mask?
5067 if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5068 return SDValue();
5069
5070 // Try to match (-1 '[outer] logical shift' y)
5071 unsigned OuterShift;
5072 unsigned InnerShift; // The opposite direction to the OuterShift.
5073 SDValue Y; // Shift amount.
5074 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5075 if (!M.hasOneUse())
5076 return false;
5077 OuterShift = M->getOpcode();
5078 if (OuterShift == ISD::SHL)
5079 InnerShift = ISD::SRL;
5080 else if (OuterShift == ISD::SRL)
5081 InnerShift = ISD::SHL;
5082 else
5083 return false;
5084 if (!isAllOnesConstant(M->getOperand(0)))
5085 return false;
5086 Y = M->getOperand(1);
5087 return true;
5088 };
5089
5090 SDValue X;
5091 if (matchMask(N1))
5092 X = N0;
5093 else if (matchMask(N0))
5094 X = N1;
5095 else
5096 return SDValue();
5097
5098 SDLoc DL(N);
5099 EVT VT = N->getValueType(0);
5100
5101 // tmp = x 'opposite logical shift' y
5102 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5103 // ret = tmp 'logical shift' y
5104 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5105
5106 return T1;
5107}
5108
5109/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5110/// For a target with a bit test, this is expected to become test + set and save
5111/// at least 1 instruction.
5112static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5113 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op")((And->getOpcode() == ISD::AND && "Expected an 'and' op"
) ? static_cast<void> (0) : __assert_fail ("And->getOpcode() == ISD::AND && \"Expected an 'and' op\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5113, __PRETTY_FUNCTION__))
;
5114
5115 // This is probably not worthwhile without a supported type.
5116 EVT VT = And->getValueType(0);
5117 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5118 if (!TLI.isTypeLegal(VT))
5119 return SDValue();
5120
5121 // Look through an optional extension and find a 'not'.
5122 // TODO: Should we favor test+set even without the 'not' op?
5123 SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5124 if (Not.getOpcode() == ISD::ANY_EXTEND)
5125 Not = Not.getOperand(0);
5126 if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5127 return SDValue();
5128
5129 // Look though an optional truncation. The source operand may not be the same
5130 // type as the original 'and', but that is ok because we are masking off
5131 // everything but the low bit.
5132 SDValue Srl = Not.getOperand(0);
5133 if (Srl.getOpcode() == ISD::TRUNCATE)
5134 Srl = Srl.getOperand(0);
5135
5136 // Match a shift-right by constant.
5137 if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5138 !isa<ConstantSDNode>(Srl.getOperand(1)))
5139 return SDValue();
5140
5141 // We might have looked through casts that make this transform invalid.
5142 // TODO: If the source type is wider than the result type, do the mask and
5143 // compare in the source type.
5144 const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5145 unsigned VTBitWidth = VT.getSizeInBits();
5146 if (ShiftAmt.uge(VTBitWidth))
5147 return SDValue();
5148
5149 // Turn this into a bit-test pattern using mask op + setcc:
5150 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5151 SDLoc DL(And);
5152 SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5153 EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5154 SDValue Mask = DAG.getConstant(
5155 APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5156 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5157 SDValue Zero = DAG.getConstant(0, DL, VT);
5158 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5159 return DAG.getZExtOrTrunc(Setcc, DL, VT);
5160}
5161
5162SDValue DAGCombiner::visitAND(SDNode *N) {
5163 SDValue N0 = N->getOperand(0);
5164 SDValue N1 = N->getOperand(1);
5165 EVT VT = N1.getValueType();
5166
5167 // x & x --> x
5168 if (N0 == N1)
5169 return N0;
5170
5171 // fold vector ops
5172 if (VT.isVector()) {
5173 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5174 return FoldedVOp;
5175
5176 // fold (and x, 0) -> 0, vector edition
5177 if (ISD::isBuildVectorAllZeros(N0.getNode()))
5178 // do not return N0, because undef node may exist in N0
5179 return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5180 SDLoc(N), N0.getValueType());
5181 if (ISD::isBuildVectorAllZeros(N1.getNode()))
5182 // do not return N1, because undef node may exist in N1
5183 return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5184 SDLoc(N), N1.getValueType());
5185
5186 // fold (and x, -1) -> x, vector edition
5187 if (ISD::isBuildVectorAllOnes(N0.getNode()))
5188 return N1;
5189 if (ISD::isBuildVectorAllOnes(N1.getNode()))
5190 return N0;
5191 }
5192
5193 // fold (and c1, c2) -> c1&c2
5194 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5195 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5196 return C;
5197
5198 // canonicalize constant to RHS
5199 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5200 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5201 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5202
5203 // fold (and x, -1) -> x
5204 if (isAllOnesConstant(N1))
5205 return N0;
5206
5207 // if (and x, c) is known to be zero, return 0
5208 unsigned BitWidth = VT.getScalarSizeInBits();
5209 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5210 APInt::getAllOnesValue(BitWidth)))
5211 return DAG.getConstant(0, SDLoc(N), VT);
5212
5213 if (SDValue NewSel = foldBinOpIntoSelect(N))
5214 return NewSel;
5215
5216 // reassociate and
5217 if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5218 return RAND;
5219
5220 // Try to convert a constant mask AND into a shuffle clear mask.
5221 if (VT.isVector())
5222 if (SDValue Shuffle = XformToShuffleWithZero(N))
5223 return Shuffle;
5224
5225 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5226 return Combined;
5227
5228 // fold (and (or x, C), D) -> D if (C & D) == D
5229 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5230 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5231 };
5232 if (N0.getOpcode() == ISD::OR &&
5233 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5234 return N1;
5235 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5236 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5237 SDValue N0Op0 = N0.getOperand(0);
5238 APInt Mask = ~N1C->getAPIntValue();
5239 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5240 if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5241 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5242 N0.getValueType(), N0Op0);
5243
5244 // Replace uses of the AND with uses of the Zero extend node.
5245 CombineTo(N, Zext);
5246
5247 // We actually want to replace all uses of the any_extend with the
5248 // zero_extend, to avoid duplicating things. This will later cause this
5249 // AND to be folded.
5250 CombineTo(N0.getNode(), Zext);
5251 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5252 }
5253 }
5254
5255 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5256 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5257 // already be zero by virtue of the width of the base type of the load.
5258 //
5259 // the 'X' node here can either be nothing or an extract_vector_elt to catch
5260 // more cases.
5261 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5262 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5263 N0.getOperand(0).getOpcode() == ISD::LOAD &&
5264 N0.getOperand(0).getResNo() == 0) ||
5265 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5266 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5267 N0 : N0.getOperand(0) );
5268
5269 // Get the constant (if applicable) the zero'th operand is being ANDed with.
5270 // This can be a pure constant or a vector splat, in which case we treat the
5271 // vector as a scalar and use the splat value.
5272 APInt Constant = APInt::getNullValue(1);
5273 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5274 Constant = C->getAPIntValue();
5275 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5276 APInt SplatValue, SplatUndef;
5277 unsigned SplatBitSize;
5278 bool HasAnyUndefs;
5279 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5280 SplatBitSize, HasAnyUndefs);
5281 if (IsSplat) {
5282 // Undef bits can contribute to a possible optimisation if set, so
5283 // set them.
5284 SplatValue |= SplatUndef;
5285
5286 // The splat value may be something like "0x00FFFFFF", which means 0 for
5287 // the first vector value and FF for the rest, repeating. We need a mask
5288 // that will apply equally to all members of the vector, so AND all the
5289 // lanes of the constant together.
5290 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5291
5292 // If the splat value has been compressed to a bitlength lower
5293 // than the size of the vector lane, we need to re-expand it to
5294 // the lane size.
5295 if (EltBitWidth > SplatBitSize)
5296 for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5297 SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5298 SplatValue |= SplatValue.shl(SplatBitSize);
5299
5300 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5301 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5302 if ((SplatBitSize % EltBitWidth) == 0) {
5303 Constant = APInt::getAllOnesValue(EltBitWidth);
5304 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5305 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5306 }
5307 }
5308 }
5309
5310 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5311 // actually legal and isn't going to get expanded, else this is a false
5312 // optimisation.
5313 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5314 Load->getValueType(0),
5315 Load->getMemoryVT());
5316
5317 // Resize the constant to the same size as the original memory access before
5318 // extension. If it is still the AllOnesValue then this AND is completely
5319 // unneeded.
5320 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5321
5322 bool B;
5323 switch (Load->getExtensionType()) {
5324 default: B = false; break;
5325 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5326 case ISD::ZEXTLOAD:
5327 case ISD::NON_EXTLOAD: B = true; break;
5328 }
5329
5330 if (B && Constant.isAllOnesValue()) {
5331 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5332 // preserve semantics once we get rid of the AND.
5333 SDValue NewLoad(Load, 0);
5334
5335 // Fold the AND away. NewLoad may get replaced immediately.
5336 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5337
5338 if (Load->getExtensionType() == ISD::EXTLOAD) {
5339 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5340 Load->getValueType(0), SDLoc(Load),
5341 Load->getChain(), Load->getBasePtr(),
5342 Load->getOffset(), Load->getMemoryVT(),
5343 Load->getMemOperand());
5344 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5345 if (Load->getNumValues() == 3) {
5346 // PRE/POST_INC loads have 3 values.
5347 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5348 NewLoad.getValue(2) };
5349 CombineTo(Load, To, 3, true);
5350 } else {
5351 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5352 }
5353 }
5354
5355 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5356 }
5357 }
5358
5359 // fold (and (load x), 255) -> (zextload x, i8)
5360 // fold (and (extload x, i16), 255) -> (zextload x, i8)
5361 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5362 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5363 (N0.getOpcode() == ISD::ANY_EXTEND &&
5364 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5365 if (SDValue Res = ReduceLoadWidth(N)) {
5366 LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5367 ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5368 AddToWorklist(N);
5369 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5370 return SDValue(N, 0);
5371 }
5372 }
5373
5374 if (LegalTypes) {
5375 // Attempt to propagate the AND back up to the leaves which, if they're
5376 // loads, can be combined to narrow loads and the AND node can be removed.
5377 // Perform after legalization so that extend nodes will already be
5378 // combined into the loads.
5379 if (BackwardsPropagateMask(N))
5380 return SDValue(N, 0);
5381 }
5382
5383 if (SDValue Combined = visitANDLike(N0, N1, N))
5384 return Combined;
5385
5386 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
5387 if (N0.getOpcode() == N1.getOpcode())
5388 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5389 return V;
5390
5391 // Masking the negated extension of a boolean is just the zero-extended
5392 // boolean:
5393 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5394 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5395 //
5396 // Note: the SimplifyDemandedBits fold below can make an information-losing
5397 // transform, and then we have no way to find this better fold.
5398 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5399 if (isNullOrNullSplat(N0.getOperand(0))) {
5400 SDValue SubRHS = N0.getOperand(1);
5401 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5402 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5403 return SubRHS;
5404 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5405 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5406 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5407 }
5408 }
5409
5410 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5411 // fold (and (sra)) -> (and (srl)) when possible.
5412 if (SimplifyDemandedBits(SDValue(N, 0)))
5413 return SDValue(N, 0);
5414
5415 // fold (zext_inreg (extload x)) -> (zextload x)
5416 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5417 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5418 (ISD::isEXTLoad(N0.getNode()) ||
5419 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5420 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5421 EVT MemVT = LN0->getMemoryVT();
5422 // If we zero all the possible extended bits, then we can turn this into
5423 // a zextload if we are running before legalize or the operation is legal.
5424 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5425 unsigned MemBitSize = MemVT.getScalarSizeInBits();
5426 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5427 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5428 ((!LegalOperations && LN0->isSimple()) ||
5429 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5430 SDValue ExtLoad =
5431 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5432 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5433 AddToWorklist(N);
5434 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5435 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5436 }
5437 }
5438
5439 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5440 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5441 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5442 N0.getOperand(1), false))
5443 return BSwap;
5444 }
5445
5446 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5447 return Shifts;
5448
5449 if (TLI.hasBitTest(N0, N1))
5450 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5451 return V;
5452
5453 return SDValue();
5454}
5455
5456/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5457SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5458 bool DemandHighBits) {
5459 if (!LegalOperations)
5460 return SDValue();
5461
5462 EVT VT = N->getValueType(0);
5463 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5464 return SDValue();
5465 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5466 return SDValue();
5467
5468 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5469 bool LookPassAnd0 = false;
5470 bool LookPassAnd1 = false;
5471 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5472 std::swap(N0, N1);
5473 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5474 std::swap(N0, N1);
5475 if (N0.getOpcode() == ISD::AND) {
5476 if (!N0.getNode()->hasOneUse())
5477 return SDValue();
5478 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5479 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5480 // This is needed for X86.
5481 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5482 N01C->getZExtValue() != 0xFFFF))
5483 return SDValue();
5484 N0 = N0.getOperand(0);
5485 LookPassAnd0 = true;
5486 }
5487
5488 if (N1.getOpcode() == ISD::AND) {
5489 if (!N1.getNode()->hasOneUse())
5490 return SDValue();
5491 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5492 if (!N11C || N11C->getZExtValue() != 0xFF)
5493 return SDValue();
5494 N1 = N1.getOperand(0);
5495 LookPassAnd1 = true;
5496 }
5497
5498 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5499 std::swap(N0, N1);
5500 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5501 return SDValue();
5502 if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5503 return SDValue();
5504
5505 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5506 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5507 if (!N01C || !N11C)
5508 return SDValue();
5509 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5510 return SDValue();
5511
5512 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5513 SDValue N00 = N0->getOperand(0);
5514 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5515 if (!N00.getNode()->hasOneUse())
5516 return SDValue();
5517 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5518 if (!N001C || N001C->getZExtValue() != 0xFF)
5519 return SDValue();
5520 N00 = N00.getOperand(0);
5521 LookPassAnd0 = true;
5522 }
5523
5524 SDValue N10 = N1->getOperand(0);
5525 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5526 if (!N10.getNode()->hasOneUse())
5527 return SDValue();
5528 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5529 // Also allow 0xFFFF since the bits will be shifted out. This is needed
5530 // for X86.
5531 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5532 N101C->getZExtValue() != 0xFFFF))
5533 return SDValue();
5534 N10 = N10.getOperand(0);
5535 LookPassAnd1 = true;
5536 }
5537
5538 if (N00 != N10)
5539 return SDValue();
5540
5541 // Make sure everything beyond the low halfword gets set to zero since the SRL
5542 // 16 will clear the top bits.
5543 unsigned OpSizeInBits = VT.getSizeInBits();
5544 if (DemandHighBits && OpSizeInBits > 16) {
5545 // If the left-shift isn't masked out then the only way this is a bswap is
5546 // if all bits beyond the low 8 are 0. In that case the entire pattern
5547 // reduces to a left shift anyway: leave it for other parts of the combiner.
5548 if (!LookPassAnd0)
5549 return SDValue();
5550
5551 // However, if the right shift isn't masked out then it might be because
5552 // it's not needed. See if we can spot that too.
5553 if (!LookPassAnd1 &&
5554 !DAG.MaskedValueIsZero(
5555 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5556 return SDValue();
5557 }
5558
5559 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5560 if (OpSizeInBits > 16) {
5561 SDLoc DL(N);
5562 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5563 DAG.getConstant(OpSizeInBits - 16, DL,
5564 getShiftAmountTy(VT)));
5565 }
5566 return Res;
5567}
5568
5569/// Return true if the specified node is an element that makes up a 32-bit
5570/// packed halfword byteswap.
5571/// ((x & 0x000000ff) << 8) |
5572/// ((x & 0x0000ff00) >> 8) |
5573/// ((x & 0x00ff0000) << 8) |
5574/// ((x & 0xff000000) >> 8)
5575static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5576 if (!N.getNode()->hasOneUse())
5577 return false;
5578
5579 unsigned Opc = N.getOpcode();
5580 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5581 return false;
5582
5583 SDValue N0 = N.getOperand(0);
5584 unsigned Opc0 = N0.getOpcode();
5585 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5586 return false;
5587
5588 ConstantSDNode *N1C = nullptr;
5589 // SHL or SRL: look upstream for AND mask operand
5590 if (Opc == ISD::AND)
5591 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5592 else if (Opc0 == ISD::AND)
5593 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5594 if (!N1C)
5595 return false;
5596
5597 unsigned MaskByteOffset;
5598 switch (N1C->getZExtValue()) {
5599 default:
5600 return false;
5601 case 0xFF: MaskByteOffset = 0; break;
5602 case 0xFF00: MaskByteOffset = 1; break;
5603 case 0xFFFF:
5604 // In case demanded bits didn't clear the bits that will be shifted out.
5605 // This is needed for X86.
5606 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5607 MaskByteOffset = 1;
5608 break;
5609 }
5610 return false;
5611 case 0xFF0000: MaskByteOffset = 2; break;
5612 case 0xFF000000: MaskByteOffset = 3; break;
5613 }
5614
5615 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5616 if (Opc == ISD::AND) {
5617 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5618 // (x >> 8) & 0xff
5619 // (x >> 8) & 0xff0000
5620 if (Opc0 != ISD::SRL)
5621 return false;
5622 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5623 if (!C || C->getZExtValue() != 8)
5624 return false;
5625 } else {
5626 // (x << 8) & 0xff00
5627 // (x << 8) & 0xff000000
5628 if (Opc0 != ISD::SHL)
5629 return false;
5630 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5631 if (!C || C->getZExtValue() != 8)
5632 return false;
5633 }
5634 } else if (Opc == ISD::SHL) {
5635 // (x & 0xff) << 8
5636 // (x & 0xff0000) << 8
5637 if (MaskByteOffset != 0 && MaskByteOffset != 2)
5638 return false;
5639 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5640 if (!C || C->getZExtValue() != 8)
5641 return false;
5642 } else { // Opc == ISD::SRL
5643 // (x & 0xff00) >> 8
5644 // (x & 0xff000000) >> 8
5645 if (MaskByteOffset != 1 && MaskByteOffset != 3)
5646 return false;
5647 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5648 if (!C || C->getZExtValue() != 8)
5649 return false;
5650 }
5651
5652 if (Parts[MaskByteOffset])
5653 return false;
5654
5655 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5656 return true;
5657}
5658
5659// Match 2 elements of a packed halfword bswap.
5660static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
5661 if (N.getOpcode() == ISD::OR)
5662 return isBSwapHWordElement(N.getOperand(0), Parts) &&
5663 isBSwapHWordElement(N.getOperand(1), Parts);
5664
5665 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
5666 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
5667 if (!C || C->getAPIntValue() != 16)
5668 return false;
5669 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
5670 return true;
5671 }
5672
5673 return false;
5674}
5675
5676// Match this pattern:
5677// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
5678// And rewrite this to:
5679// (rotr (bswap A), 16)
5680static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
5681 SelectionDAG &DAG, SDNode *N, SDValue N0,
5682 SDValue N1, EVT VT, EVT ShiftAmountTy) {
5683 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&((N->getOpcode() == ISD::OR && VT == MVT::i32 &&
"MatchBSwapHWordOrAndAnd: expecting i32") ? static_cast<void
> (0) : __assert_fail ("N->getOpcode() == ISD::OR && VT == MVT::i32 && \"MatchBSwapHWordOrAndAnd: expecting i32\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5684, __PRETTY_FUNCTION__))
5684 "MatchBSwapHWordOrAndAnd: expecting i32")((N->getOpcode() == ISD::OR && VT == MVT::i32 &&
"MatchBSwapHWordOrAndAnd: expecting i32") ? static_cast<void
> (0) : __assert_fail ("N->getOpcode() == ISD::OR && VT == MVT::i32 && \"MatchBSwapHWordOrAndAnd: expecting i32\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5684, __PRETTY_FUNCTION__))
;
5685 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5686 return SDValue();
5687 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
5688 return SDValue();
5689 // TODO: this is too restrictive; lifting this restriction requires more tests
5690 if (!N0->hasOneUse() || !N1->hasOneUse())
5691 return SDValue();
5692 ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
5693 ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
5694 if (!Mask0 || !Mask1)
5695 return SDValue();
5696 if (Mask0->getAPIntValue() != 0xff00ff00 ||
5697 Mask1->getAPIntValue() != 0x00ff00ff)
5698 return SDValue();
5699 SDValue Shift0 = N0.getOperand(0);
5700 SDValue Shift1 = N1.getOperand(0);
5701 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
5702 return SDValue();
5703 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
5704 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
5705 if (!ShiftAmt0 || !ShiftAmt1)
5706 return SDValue();
5707 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
5708 return SDValue();
5709 if (Shift0.getOperand(0) != Shift1.getOperand(0))
5710 return SDValue();
5711
5712 SDLoc DL(N);
5713 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
5714 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
5715 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5716}
5717
5718/// Match a 32-bit packed halfword bswap. That is
5719/// ((x & 0x000000ff) << 8) |
5720/// ((x & 0x0000ff00) >> 8) |
5721/// ((x & 0x00ff0000) << 8) |
5722/// ((x & 0xff000000) >> 8)
5723/// => (rotl (bswap x), 16)
5724SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5725 if (!LegalOperations)
5726 return SDValue();
5727
5728 EVT VT = N->getValueType(0);
5729 if (VT != MVT::i32)
5730 return SDValue();
5731 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5732 return SDValue();
5733
5734 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
5735 getShiftAmountTy(VT)))
5736 return BSwap;
5737
5738 // Try again with commuted operands.
5739 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
5740 getShiftAmountTy(VT)))
5741 return BSwap;
5742
5743
5744 // Look for either
5745 // (or (bswaphpair), (bswaphpair))
5746 // (or (or (bswaphpair), (and)), (and))
5747 // (or (or (and), (bswaphpair)), (and))
5748 SDNode *Parts[4] = {};
5749
5750 if (isBSwapHWordPair(N0, Parts)) {
5751 // (or (or (and), (and)), (or (and), (and)))
5752 if (!isBSwapHWordPair(N1, Parts))
5753 return SDValue();
5754 } else if (N0.getOpcode() == ISD::OR) {
5755 // (or (or (or (and), (and)), (and)), (and))
5756 if (!isBSwapHWordElement(N1, Parts))
5757 return SDValue();
5758 SDValue N00 = N0.getOperand(0);
5759 SDValue N01 = N0.getOperand(1);
5760 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
5761 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
5762 return SDValue();
5763 } else
5764 return SDValue();
5765
5766 // Make sure the parts are all coming from the same node.
5767 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5768 return SDValue();
5769
5770 SDLoc DL(N);
5771 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5772 SDValue(Parts[0], 0));
5773
5774 // Result of the bswap should be rotated by 16. If it's not legal, then
5775 // do (x << 16) | (x >> 16).
5776 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5777 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5778 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5779 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5780 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5781 return DAG.getNode(ISD::OR, DL, VT,
5782 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5783 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5784}
5785
5786/// This contains all DAGCombine rules which reduce two values combined by
5787/// an Or operation to a single value \see visitANDLike().
5788SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5789 EVT VT = N1.getValueType();
5790 SDLoc DL(N);
5791
5792 // fold (or x, undef) -> -1
5793 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5794 return DAG.getAllOnesConstant(DL, VT);
5795
5796 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5797 return V;
5798
5799 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
5800 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5801 // Don't increase # computations.
5802 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5803 // We can only do this xform if we know that bits from X that are set in C2
5804 // but not in C1 are already zero. Likewise for Y.
5805 if (const ConstantSDNode *N0O1C =
5806 getAsNonOpaqueConstant(N0.getOperand(1))) {
5807 if (const ConstantSDNode *N1O1C =
5808 getAsNonOpaqueConstant(N1.getOperand(1))) {
5809 // We can only do this xform if we know that bits from X that are set in
5810 // C2 but not in C1 are already zero. Likewise for Y.
5811 const APInt &LHSMask = N0O1C->getAPIntValue();
5812 const APInt &RHSMask = N1O1C->getAPIntValue();
5813
5814 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5815 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5816 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5817 N0.getOperand(0), N1.getOperand(0));
5818 return DAG.getNode(ISD::AND, DL, VT, X,
5819 DAG.getConstant(LHSMask | RHSMask, DL, VT));
5820 }
5821 }
5822 }
5823 }
5824
5825 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5826 if (N0.getOpcode() == ISD::AND &&
5827 N1.getOpcode() == ISD::AND &&
5828 N0.getOperand(0) == N1.getOperand(0) &&
5829 // Don't increase # computations.
5830 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5831 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5832 N0.getOperand(1), N1.getOperand(1));
5833 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5834 }
5835
5836 return SDValue();
5837}
5838
5839/// OR combines for which the commuted variant will be tried as well.
5840static SDValue visitORCommutative(
5841 SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
5842 EVT VT = N0.getValueType();
5843 if (N0.getOpcode() == ISD::AND) {
5844 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
5845 if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
5846 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
5847
5848 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
5849 if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
5850 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
5851 }
5852
5853 return SDValue();
5854}
5855
5856SDValue DAGCombiner::visitOR(SDNode *N) {
5857 SDValue N0 = N->getOperand(0);
5858 SDValue N1 = N->getOperand(1);
5859 EVT VT = N1.getValueType();
5860
5861 // x | x --> x
5862 if (N0 == N1)
5863 return N0;
5864
5865 // fold vector ops
5866 if (VT.isVector()) {
5867 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5868 return FoldedVOp;
5869
5870 // fold (or x, 0) -> x, vector edition
5871 if (ISD::isBuildVectorAllZeros(N0.getNode()))
5872 return N1;
5873 if (ISD::isBuildVectorAllZeros(N1.getNode()))
5874 return N0;
5875
5876 // fold (or x, -1) -> -1, vector edition
5877 if (ISD::isBuildVectorAllOnes(N0.getNode()))
5878 // do not return N0, because undef node may exist in N0
5879 return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5880 if (ISD::isBuildVectorAllOnes(N1.getNode()))
5881 // do not return N1, because undef node may exist in N1
5882 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5883
5884 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5885 // Do this only if the resulting shuffle is legal.
5886 if (isa<ShuffleVectorSDNode>(N0) &&
5887 isa<ShuffleVectorSDNode>(N1) &&
5888 // Avoid folding a node with illegal type.
5889 TLI.isTypeLegal(VT)) {
5890 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5891 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5892 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5893 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5894 // Ensure both shuffles have a zero input.
5895 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5896 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!")(((!ZeroN00 || !ZeroN01) && "Both inputs zero!") ? static_cast
<void> (0) : __assert_fail ("(!ZeroN00 || !ZeroN01) && \"Both inputs zero!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5896, __PRETTY_FUNCTION__))
;
5897 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!")(((!ZeroN10 || !ZeroN11) && "Both inputs zero!") ? static_cast
<void> (0) : __assert_fail ("(!ZeroN10 || !ZeroN11) && \"Both inputs zero!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5897, __PRETTY_FUNCTION__))
;
5898 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5899 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5900 bool CanFold = true;
5901 int NumElts = VT.getVectorNumElements();
5902 SmallVector<int, 4> Mask(NumElts);
5903
5904 for (int i = 0; i != NumElts; ++i) {
5905 int M0 = SV0->getMaskElt(i);
5906 int M1 = SV1->getMaskElt(i);
5907
5908 // Determine if either index is pointing to a zero vector.
5909 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5910 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5911
5912 // If one element is zero and the otherside is undef, keep undef.
5913 // This also handles the case that both are undef.
5914 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5915 Mask[i] = -1;
5916 continue;
5917 }
5918
5919 // Make sure only one of the elements is zero.
5920 if (M0Zero == M1Zero) {
5921 CanFold = false;
5922 break;
5923 }
5924
5925 assert((M0 >= 0 || M1 >= 0) && "Undef index!")(((M0 >= 0 || M1 >= 0) && "Undef index!") ? static_cast
<void> (0) : __assert_fail ("(M0 >= 0 || M1 >= 0) && \"Undef index!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 5925, __PRETTY_FUNCTION__))
;
5926
5927 // We have a zero and non-zero element. If the non-zero came from
5928 // SV0 make the index a LHS index. If it came from SV1, make it
5929 // a RHS index. We need to mod by NumElts because we don't care
5930 // which operand it came from in the original shuffles.
5931 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5932 }
5933
5934 if (CanFold) {
5935 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5936 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5937
5938 SDValue LegalShuffle =
5939 TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
5940 Mask, DAG);
5941 if (LegalShuffle)
5942 return LegalShuffle;
5943 }
5944 }
5945 }
5946 }
5947
5948 // fold (or c1, c2) -> c1|c2
5949 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5950 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
5951 return C;
5952
5953 // canonicalize constant to RHS
5954 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5955 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5956 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5957
5958 // fold (or x, 0) -> x
5959 if (isNullConstant(N1))
5960 return N0;
5961
5962 // fold (or x, -1) -> -1
5963 if (isAllOnesConstant(N1))
5964 return N1;
5965
5966 if (SDValue NewSel = foldBinOpIntoSelect(N))
5967 return NewSel;
5968
5969 // fold (or x, c) -> c iff (x & ~c) == 0
5970 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5971 return N1;
5972
5973 if (SDValue Combined = visitORLike(N0, N1, N))
5974 return Combined;
5975
5976 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5977 return Combined;
5978
5979 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5980 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5981 return BSwap;
5982 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5983 return BSwap;
5984
5985 // reassociate or
5986 if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5987 return ROR;
5988
5989 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5990 // iff (c1 & c2) != 0 or c1/c2 are undef.
5991 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
5992 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
5993 };
5994 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5995 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
5996 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
5997 {N1, N0.getOperand(1)})) {
5998 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5999 AddToWorklist(IOR.getNode());
6000 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6001 }
6002 }
6003
6004 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6005 return Combined;
6006 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6007 return Combined;
6008
6009 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
6010 if (N0.getOpcode() == N1.getOpcode())
6011 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6012 return V;
6013
6014 // See if this is some rotate idiom.
6015 if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6016 return Rot;
6017
6018 if (SDValue Load = MatchLoadCombine(N))
6019 return Load;
6020
6021 // Simplify the operands using demanded-bits information.
6022 if (SimplifyDemandedBits(SDValue(N, 0)))
6023 return SDValue(N, 0);
6024
6025 // If OR can be rewritten into ADD, try combines based on ADD.
6026 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6027 DAG.haveNoCommonBitsSet(N0, N1))
6028 if (SDValue Combined = visitADDLike(N))
6029 return Combined;
6030
6031 return SDValue();
6032}
6033
6034static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6035 if (Op.getOpcode() == ISD::AND &&
6036 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6037 Mask = Op.getOperand(1);
6038 return Op.getOperand(0);
6039 }
6040 return Op;
6041}
6042
6043/// Match "(X shl/srl V1) & V2" where V2 may not be present.
6044static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6045 SDValue &Mask) {
6046 Op = stripConstantMask(DAG, Op, Mask);
6047 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6048 Shift = Op;
6049 return true;
6050 }
6051 return false;
6052}
6053
6054/// Helper function for visitOR to extract the needed side of a rotate idiom
6055/// from a shl/srl/mul/udiv. This is meant to handle cases where
6056/// InstCombine merged some outside op with one of the shifts from
6057/// the rotate pattern.
6058/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6059/// Otherwise, returns an expansion of \p ExtractFrom based on the following
6060/// patterns:
6061///
6062/// (or (add v v) (shrl v bitwidth-1)):
6063/// expands (add v v) -> (shl v 1)
6064///
6065/// (or (mul v c0) (shrl (mul v c1) c2)):
6066/// expands (mul v c0) -> (shl (mul v c1) c3)
6067///
6068/// (or (udiv v c0) (shl (udiv v c1) c2)):
6069/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
6070///
6071/// (or (shl v c0) (shrl (shl v c1) c2)):
6072/// expands (shl v c0) -> (shl (shl v c1) c3)
6073///
6074/// (or (shrl v c0) (shl (shrl v c1) c2)):
6075/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
6076///
6077/// Such that in all cases, c3+c2==bitwidth(op v c1).
6078static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6079 SDValue ExtractFrom, SDValue &Mask,
6080 const SDLoc &DL) {
6081 assert(OppShift && ExtractFrom && "Empty SDValue")((OppShift && ExtractFrom && "Empty SDValue")
? static_cast<void> (0) : __assert_fail ("OppShift && ExtractFrom && \"Empty SDValue\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6081, __PRETTY_FUNCTION__))
;
6082 assert((((OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() ==
ISD::SRL) && "Existing shift must be valid as a rotate half"
) ? static_cast<void> (0) : __assert_fail ("(OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && \"Existing shift must be valid as a rotate half\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6084, __PRETTY_FUNCTION__))
6083 (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&(((OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() ==
ISD::SRL) && "Existing shift must be valid as a rotate half"
) ? static_cast<void> (0) : __assert_fail ("(OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && \"Existing shift must be valid as a rotate half\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6084, __PRETTY_FUNCTION__))
6084 "Existing shift must be valid as a rotate half")(((OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() ==
ISD::SRL) && "Existing shift must be valid as a rotate half"
) ? static_cast<void> (0) : __assert_fail ("(OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && \"Existing shift must be valid as a rotate half\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6084, __PRETTY_FUNCTION__))
;
6085
6086 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6087
6088 // Value and Type of the shift.
6089 SDValue OppShiftLHS = OppShift.getOperand(0);
6090 EVT ShiftedVT = OppShiftLHS.getValueType();
6091
6092 // Amount of the existing shift.
6093 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6094
6095 // (add v v) -> (shl v 1)
6096 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6097 ExtractFrom.getOpcode() == ISD::ADD &&
6098 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6099 ExtractFrom.getOperand(0) == OppShiftLHS &&
6100 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6101 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6102 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6103
6104 // Preconditions:
6105 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6106 //
6107 // Find opcode of the needed shift to be extracted from (op0 v c0).
6108 unsigned Opcode = ISD::DELETED_NODE;
6109 bool IsMulOrDiv = false;
6110 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6111 // opcode or its arithmetic (mul or udiv) variant.
6112 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6113 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6114 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6115 return false;
6116 Opcode = NeededShift;
6117 return true;
6118 };
6119 // op0 must be either the needed shift opcode or the mul/udiv equivalent
6120 // that the needed shift can be extracted from.
6121 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6122 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6123 return SDValue();
6124
6125 // op0 must be the same opcode on both sides, have the same LHS argument,
6126 // and produce the same value type.
6127 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6128 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6129 ShiftedVT != ExtractFrom.getValueType())
6130 return SDValue();
6131
6132 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6133 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6134 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6135 ConstantSDNode *ExtractFromCst =
6136 isConstOrConstSplat(ExtractFrom.getOperand(1));
6137 // TODO: We should be able to handle non-uniform constant vectors for these values
6138 // Check that we have constant values.
6139 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6140 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6141 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6142 return SDValue();
6143
6144 // Compute the shift amount we need to extract to complete the rotate.
6145 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6146 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6147 return SDValue();
6148 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6149 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6150 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6151 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6152 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6153
6154 // Now try extract the needed shift from the ExtractFrom op and see if the
6155 // result matches up with the existing shift's LHS op.
6156 if (IsMulOrDiv) {
6157 // Op to extract from is a mul or udiv by a constant.
6158 // Check:
6159 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6160 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6161 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6162 NeededShiftAmt.getZExtValue());
6163 APInt ResultAmt;
6164 APInt Rem;
6165 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6166 if (Rem != 0 || ResultAmt != OppLHSAmt)
6167 return SDValue();
6168 } else {
6169 // Op to extract from is a shift by a constant.
6170 // Check:
6171 // c2 - (bitwidth(op0 v c0) - c1) == c0
6172 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6173 ExtractFromAmt.getBitWidth()))
6174 return SDValue();
6175 }
6176
6177 // Return the expanded shift op that should allow a rotate to be formed.
6178 EVT ShiftVT = OppShift.getOperand(1).getValueType();
6179 EVT ResVT = ExtractFrom.getValueType();
6180 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6181 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6182}
6183
6184// Return true if we can prove that, whenever Neg and Pos are both in the
6185// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
6186// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6187//
6188// (or (shift1 X, Neg), (shift2 X, Pos))
6189//
6190// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6191// in direction shift1 by Neg. The range [0, EltSize) means that we only need
6192// to consider shift amounts with defined behavior.
6193static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6194 SelectionDAG &DAG) {
6195 // If EltSize is a power of 2 then:
6196 //
6197 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6198 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6199 //
6200 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6201 // for the stronger condition:
6202 //
6203 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
6204 //
6205 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6206 // we can just replace Neg with Neg' for the rest of the function.
6207 //
6208 // In other cases we check for the even stronger condition:
6209 //
6210 // Neg == EltSize - Pos [B]
6211 //
6212 // for all Neg and Pos. Note that the (or ...) then invokes undefined
6213 // behavior if Pos == 0 (and consequently Neg == EltSize).
6214 //
6215 // We could actually use [A] whenever EltSize is a power of 2, but the
6216 // only extra cases that it would match are those uninteresting ones
6217 // where Neg and Pos are never in range at the same time. E.g. for
6218 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6219 // as well as (sub 32, Pos), but:
6220 //
6221 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6222 //
6223 // always invokes undefined behavior for 32-bit X.
6224 //
6225 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6226 unsigned MaskLoBits = 0;
6227 if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6228 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6229 KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6230 unsigned Bits = Log2_64(EltSize);
6231 if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6232 ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6233 Neg = Neg.getOperand(0);
6234 MaskLoBits = Bits;
6235 }
6236 }
6237 }
6238
6239 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6240 if (Neg.getOpcode() != ISD::SUB)
6241 return false;
6242 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6243 if (!NegC)
6244 return false;
6245 SDValue NegOp1 = Neg.getOperand(1);
6246
6247 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6248 // Pos'. The truncation is redundant for the purpose of the equality.
6249 if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6250 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6251 KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6252 if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6253 ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6254 MaskLoBits))
6255 Pos = Pos.getOperand(0);
6256 }
6257 }
6258
6259 // The condition we need is now:
6260 //
6261 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6262 //
6263 // If NegOp1 == Pos then we need:
6264 //
6265 // EltSize & Mask == NegC & Mask
6266 //
6267 // (because "x & Mask" is a truncation and distributes through subtraction).
6268 APInt Width;
6269 if (Pos == NegOp1)
6270 Width = NegC->getAPIntValue();
6271
6272 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6273 // Then the condition we want to prove becomes:
6274 //
6275 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6276 //
6277 // which, again because "x & Mask" is a truncation, becomes:
6278 //
6279 // NegC & Mask == (EltSize - PosC) & Mask
6280 // EltSize & Mask == (NegC + PosC) & Mask
6281 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6282 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6283 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6284 else
6285 return false;
6286 } else
6287 return false;
6288
6289 // Now we just need to check that EltSize & Mask == Width & Mask.
6290 if (MaskLoBits)
6291 // EltSize & Mask is 0 since Mask is EltSize - 1.
6292 return Width.getLoBits(MaskLoBits) == 0;
6293 return Width == EltSize;
6294}
6295
6296// A subroutine of MatchRotate used once we have found an OR of two opposite
6297// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
6298// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6299// former being preferred if supported. InnerPos and InnerNeg are Pos and
6300// Neg with outer conversions stripped away.
6301SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6302 SDValue Neg, SDValue InnerPos,
6303 SDValue InnerNeg, unsigned PosOpcode,
6304 unsigned NegOpcode, const SDLoc &DL) {
6305 // fold (or (shl x, (*ext y)),
6306 // (srl x, (*ext (sub 32, y)))) ->
6307 // (rotl x, y) or (rotr x, (sub 32, y))
6308 //
6309 // fold (or (shl x, (*ext (sub 32, y))),
6310 // (srl x, (*ext y))) ->
6311 // (rotr x, y) or (rotl x, (sub 32, y))
6312 EVT VT = Shifted.getValueType();
6313 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
6314 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6315 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6316 HasPos ? Pos : Neg);
6317 }
6318
6319 return SDValue();
6320}
6321
6322// MatchRotate - Handle an 'or' of two operands. If this is one of the many
6323// idioms for rotate, and if the target supports rotation instructions, generate
6324// a rot[lr].
6325SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6326 // Must be a legal type. Expanded 'n promoted things won't work with rotates.
6327 EVT VT = LHS.getValueType();
6328 if (!TLI.isTypeLegal(VT))
6329 return SDValue();
6330
6331 // The target must have at least one rotate flavor.
6332 bool HasROTL = hasOperation(ISD::ROTL, VT);
6333 bool HasROTR = hasOperation(ISD::ROTR, VT);
6334 if (!HasROTL && !HasROTR)
6335 return SDValue();
6336
6337 // Check for truncated rotate.
6338 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6339 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6340 assert(LHS.getValueType() == RHS.getValueType())((LHS.getValueType() == RHS.getValueType()) ? static_cast<
void> (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6340, __PRETTY_FUNCTION__))
;
6341 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6342 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6343 }
6344 }
6345
6346 // Match "(X shl/srl V1) & V2" where V2 may not be present.
6347 SDValue LHSShift; // The shift.
6348 SDValue LHSMask; // AND value if any.
6349 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6350
6351 SDValue RHSShift; // The shift.
6352 SDValue RHSMask; // AND value if any.
6353 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6354
6355 // If neither side matched a rotate half, bail
6356 if (!LHSShift && !RHSShift)
6357 return SDValue();
6358
6359 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6360 // side of the rotate, so try to handle that here. In all cases we need to
6361 // pass the matched shift from the opposite side to compute the opcode and
6362 // needed shift amount to extract. We still want to do this if both sides
6363 // matched a rotate half because one half may be a potential overshift that
6364 // can be broken down (ie if InstCombine merged two shl or srl ops into a
6365 // single one).
6366
6367 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6368 if (LHSShift)
6369 if (SDValue NewRHSShift =
6370 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6371 RHSShift = NewRHSShift;
6372 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6373 if (RHSShift)
6374 if (SDValue NewLHSShift =
6375 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6376 LHSShift = NewLHSShift;
6377
6378 // If a side is still missing, nothing else we can do.
6379 if (!RHSShift || !LHSShift)
6380 return SDValue();
6381
6382 // At this point we've matched or extracted a shift op on each side.
6383
6384 if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
6385 return SDValue(); // Not shifting the same value.
6386
6387 if (LHSShift.getOpcode() == RHSShift.getOpcode())
6388 return SDValue(); // Shifts must disagree.
6389
6390 // Canonicalize shl to left side in a shl/srl pair.
6391 if (RHSShift.getOpcode() == ISD::SHL) {
6392 std::swap(LHS, RHS);
6393 std::swap(LHSShift, RHSShift);
6394 std::swap(LHSMask, RHSMask);
6395 }
6396
6397 unsigned EltSizeInBits = VT.getScalarSizeInBits();
6398 SDValue LHSShiftArg = LHSShift.getOperand(0);
6399 SDValue LHSShiftAmt = LHSShift.getOperand(1);
6400 SDValue RHSShiftArg = RHSShift.getOperand(0);
6401 SDValue RHSShiftAmt = RHSShift.getOperand(1);
6402
6403 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6404 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6405 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6406 ConstantSDNode *RHS) {
6407 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6408 };
6409 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6410 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
6411 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
6412
6413 // If there is an AND of either shifted operand, apply it to the result.
6414 if (LHSMask.getNode() || RHSMask.getNode()) {
6415 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6416 SDValue Mask = AllOnes;
6417
6418 if (LHSMask.getNode()) {
6419 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6420 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6421 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6422 }
6423 if (RHSMask.getNode()) {
6424 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6425 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6426 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6427 }
6428
6429 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
6430 }
6431
6432 return Rot;
6433 }
6434
6435 // If there is a mask here, and we have a variable shift, we can't be sure
6436 // that we're masking out the right stuff.
6437 if (LHSMask.getNode() || RHSMask.getNode())
6438 return SDValue();
6439
6440 // If the shift amount is sign/zext/any-extended just peel it off.
6441 SDValue LExtOp0 = LHSShiftAmt;
6442 SDValue RExtOp0 = RHSShiftAmt;
6443 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6444 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6445 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6446 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
6447 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6448 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6449 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6450 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
6451 LExtOp0 = LHSShiftAmt.getOperand(0);
6452 RExtOp0 = RHSShiftAmt.getOperand(0);
6453 }
6454
6455 SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
6456 LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6457 if (TryL)
6458 return TryL;
6459
6460 SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
6461 RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6462 if (TryR)
6463 return TryR;
6464
6465 return SDValue();
6466}
6467
6468namespace {
6469
6470/// Represents known origin of an individual byte in load combine pattern. The
6471/// value of the byte is either constant zero or comes from memory.
6472struct ByteProvider {
6473 // For constant zero providers Load is set to nullptr. For memory providers
6474 // Load represents the node which loads the byte from memory.
6475 // ByteOffset is the offset of the byte in the value produced by the load.
6476 LoadSDNode *Load = nullptr;
6477 unsigned ByteOffset = 0;
6478
6479 ByteProvider() = default;
6480
6481 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6482 return ByteProvider(Load, ByteOffset);
6483 }
6484
6485 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6486
6487 bool isConstantZero() const { return !Load; }
6488 bool isMemory() const { return Load; }
6489
6490 bool operator==(const ByteProvider &Other) const {
6491 return Other.Load == Load && Other.ByteOffset == ByteOffset;
6492 }
6493
6494private:
6495 ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6496 : Load(Load), ByteOffset(ByteOffset) {}
6497};
6498
6499} // end anonymous namespace
6500
6501/// Recursively traverses the expression calculating the origin of the requested
6502/// byte of the given value. Returns None if the provider can't be calculated.
6503///
6504/// For all the values except the root of the expression verifies that the value
6505/// has exactly one use and if it's not true return None. This way if the origin
6506/// of the byte is returned it's guaranteed that the values which contribute to
6507/// the byte are not used outside of this expression.
6508///
6509/// Because the parts of the expression are not allowed to have more than one
6510/// use this function iterates over trees, not DAGs. So it never visits the same
6511/// node more than once.
6512static const Optional<ByteProvider>
6513calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6514 bool Root = false) {
6515 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6516 if (Depth == 10)
6517 return None;
6518
6519 if (!Root && !Op.hasOneUse())
6520 return None;
6521
6522 assert(Op.getValueType().isScalarInteger() && "can't handle other types")((Op.getValueType().isScalarInteger() && "can't handle other types"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType().isScalarInteger() && \"can't handle other types\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6522, __PRETTY_FUNCTION__))
;
6523 unsigned BitWidth = Op.getValueSizeInBits();
6524 if (BitWidth % 8 != 0)
6525 return None;
6526 unsigned ByteWidth = BitWidth / 8;
6527 assert(Index < ByteWidth && "invalid index requested")((Index < ByteWidth && "invalid index requested") ?
static_cast<void> (0) : __assert_fail ("Index < ByteWidth && \"invalid index requested\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6527, __PRETTY_FUNCTION__))
;
6528 (void) ByteWidth;
6529
6530 switch (Op.getOpcode()) {
6531 case ISD::OR: {
6532 auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6533 if (!LHS)
6534 return None;
6535 auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6536 if (!RHS)
6537 return None;
6538
6539 if (LHS->isConstantZero())
6540 return RHS;
6541 if (RHS->isConstantZero())
6542 return LHS;
6543 return None;
6544 }
6545 case ISD::SHL: {
6546 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6547 if (!ShiftOp)
6548 return None;
6549
6550 uint64_t BitShift = ShiftOp->getZExtValue();
6551 if (BitShift % 8 != 0)
6552 return None;
6553 uint64_t ByteShift = BitShift / 8;
6554
6555 return Index < ByteShift
6556 ? ByteProvider::getConstantZero()
6557 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6558 Depth + 1);
6559 }
6560 case ISD::ANY_EXTEND:
6561 case ISD::SIGN_EXTEND:
6562 case ISD::ZERO_EXTEND: {
6563 SDValue NarrowOp = Op->getOperand(0);
6564 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6565 if (NarrowBitWidth % 8 != 0)
6566 return None;
6567 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6568
6569 if (Index >= NarrowByteWidth)
6570 return Op.getOpcode() == ISD::ZERO_EXTEND
6571 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6572 : None;
6573 return calculateByteProvider(NarrowOp, Index, Depth + 1);
6574 }
6575 case ISD::BSWAP:
6576 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
6577 Depth + 1);
6578 case ISD::LOAD: {
6579 auto L = cast<LoadSDNode>(Op.getNode());
6580 if (!L->isSimple() || L->isIndexed())
6581 return None;
6582
6583 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
6584 if (NarrowBitWidth % 8 != 0)
6585 return None;
6586 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6587
6588 if (Index >= NarrowByteWidth)
6589 return L->getExtensionType() == ISD::ZEXTLOAD
6590 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6591 : None;
6592 return ByteProvider::getMemory(L, Index);
6593 }
6594 }
6595
6596 return None;
6597}
6598
6599static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
6600 return i;
6601}
6602
6603static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
6604 return BW - i - 1;
6605}
6606
6607// Check if the bytes offsets we are looking at match with either big or
6608// little endian value loaded. Return true for big endian, false for little
6609// endian, and None if match failed.
6610static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
6611 int64_t FirstOffset) {
6612 // The endian can be decided only when it is 2 bytes at least.
6613 unsigned Width = ByteOffsets.size();
6614 if (Width < 2)
6615 return None;
6616
6617 bool BigEndian = true, LittleEndian = true;
6618 for (unsigned i = 0; i < Width; i++) {
6619 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6620 LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
6621 BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
6622 if (!BigEndian && !LittleEndian)
6623 return None;
6624 }
6625
6626 assert((BigEndian != LittleEndian) && "It should be either big endian or"(((BigEndian != LittleEndian) && "It should be either big endian or"
"little endian") ? static_cast<void> (0) : __assert_fail
("(BigEndian != LittleEndian) && \"It should be either big endian or\" \"little endian\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6627, __PRETTY_FUNCTION__))
6627 "little endian")(((BigEndian != LittleEndian) && "It should be either big endian or"
"little endian") ? static_cast<void> (0) : __assert_fail
("(BigEndian != LittleEndian) && \"It should be either big endian or\" \"little endian\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6627, __PRETTY_FUNCTION__))
;
6628 return BigEndian;
6629}
6630
6631static SDValue stripTruncAndExt(SDValue Value) {
6632 switch (Value.getOpcode()) {
6633 case ISD::TRUNCATE:
6634 case ISD::ZERO_EXTEND:
6635 case ISD::SIGN_EXTEND:
6636 case ISD::ANY_EXTEND:
6637 return stripTruncAndExt(Value.getOperand(0));
6638 }
6639 return Value;
6640}
6641
6642/// Match a pattern where a wide type scalar value is stored by several narrow
6643/// stores. Fold it into a single store or a BSWAP and a store if the targets
6644/// supports it.
6645///
6646/// Assuming little endian target:
6647/// i8 *p = ...
6648/// i32 val = ...
6649/// p[0] = (val >> 0) & 0xFF;
6650/// p[1] = (val >> 8) & 0xFF;
6651/// p[2] = (val >> 16) & 0xFF;
6652/// p[3] = (val >> 24) & 0xFF;
6653/// =>
6654/// *((i32)p) = val;
6655///
6656/// i8 *p = ...
6657/// i32 val = ...
6658/// p[0] = (val >> 24) & 0xFF;
6659/// p[1] = (val >> 16) & 0xFF;
6660/// p[2] = (val >> 8) & 0xFF;
6661/// p[3] = (val >> 0) & 0xFF;
6662/// =>
6663/// *((i32)p) = BSWAP(val);
6664SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
6665 // Collect all the stores in the chain.
6666 SDValue Chain;
6667 SmallVector<StoreSDNode *, 8> Stores;
6668 for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
6669 // TODO: Allow unordered atomics when wider type is legal (see D66309)
6670 if (Store->getMemoryVT() != MVT::i8 ||
6671 !Store->isSimple() || Store->isIndexed())
6672 return SDValue();
6673 Stores.push_back(Store);
6674 Chain = Store->getChain();
6675 }
6676 // Handle the simple type only.
6677 unsigned Width = Stores.size();
6678 EVT VT = EVT::getIntegerVT(
6679 *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
6680 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6681 return SDValue();
6682
6683 if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
6684 return SDValue();
6685
6686 // Check if all the bytes of the combined value we are looking at are stored
6687 // to the same base address. Collect bytes offsets from Base address into
6688 // ByteOffsets.
6689 SDValue CombinedValue;
6690 SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX(9223372036854775807L));
6691 int64_t FirstOffset = INT64_MAX(9223372036854775807L);
6692 StoreSDNode *FirstStore = nullptr;
6693 Optional<BaseIndexOffset> Base;
6694 for (auto Store : Stores) {
6695 // All the stores store different byte of the CombinedValue. A truncate is
6696 // required to get that byte value.
6697 SDValue Trunc = Store->getValue();
6698 if (Trunc.getOpcode() != ISD::TRUNCATE)
6699 return SDValue();
6700 // A shift operation is required to get the right byte offset, except the
6701 // first byte.
6702 int64_t Offset = 0;
6703 SDValue Value = Trunc.getOperand(0);
6704 if (Value.getOpcode() == ISD::SRL ||
6705 Value.getOpcode() == ISD::SRA) {
6706 auto *ShiftOffset = dyn_cast<ConstantSDNode>(Value.getOperand(1));
6707 // Trying to match the following pattern. The shift offset must be
6708 // a constant and a multiple of 8. It is the byte offset in "y".
6709 //
6710 // x = srl y, offset
6711 // i8 z = trunc x
6712 // store z, ...
6713 if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
6714 return SDValue();
6715
6716 Offset = ShiftOffset->getSExtValue()/8;
6717 Value = Value.getOperand(0);
6718 }
6719
6720 // Stores must share the same combined value with different offsets.
6721 if (!CombinedValue)
6722 CombinedValue = Value;
6723 else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
6724 return SDValue();
6725
6726 // The trunc and all the extend operation should be stripped to get the
6727 // real value we are stored.
6728 else if (CombinedValue.getValueType() != VT) {
6729 if (Value.getValueType() == VT ||
6730 Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
6731 CombinedValue = Value;
6732 // Give up if the combined value type is smaller than the store size.
6733 if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
6734 return SDValue();
6735 }
6736
6737 // Stores must share the same base address
6738 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
6739 int64_t ByteOffsetFromBase = 0;
6740 if (!Base)
6741 Base = Ptr;
6742 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6743 return SDValue();
6744
6745 // Remember the first byte store
6746 if (ByteOffsetFromBase < FirstOffset) {
6747 FirstStore = Store;
6748 FirstOffset = ByteOffsetFromBase;
6749 }
6750 // Map the offset in the store and the offset in the combined value, and
6751 // early return if it has been set before.
6752 if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX(9223372036854775807L))
6753 return SDValue();
6754 ByteOffsets[Offset] = ByteOffsetFromBase;
6755 }
6756
6757 assert(FirstOffset != INT64_MAX && "First byte offset must be set")((FirstOffset != (9223372036854775807L) && "First byte offset must be set"
) ? static_cast<void> (0) : __assert_fail ("FirstOffset != INT64_MAX && \"First byte offset must be set\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6757, __PRETTY_FUNCTION__))
;
6758 assert(FirstStore && "First store must be set")((FirstStore && "First store must be set") ? static_cast
<void> (0) : __assert_fail ("FirstStore && \"First store must be set\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6758, __PRETTY_FUNCTION__))
;
6759
6760 // Check if the bytes of the combined value we are looking at match with
6761 // either big or little endian value store.
6762 Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6763 if (!IsBigEndian.hasValue())
6764 return SDValue();
6765
6766 // The node we are looking at matches with the pattern, check if we can
6767 // replace it with a single bswap if needed and store.
6768
6769 // If the store needs byte swap check if the target supports it
6770 bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
6771
6772 // Before legalize we can introduce illegal bswaps which will be later
6773 // converted to an explicit bswap sequence. This way we end up with a single
6774 // store and byte shuffling instead of several stores and byte shuffling.
6775 if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6776 return SDValue();
6777
6778 // Check that a store of the wide type is both allowed and fast on the target
6779 bool Fast = false;
6780 bool Allowed =
6781 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
6782 *FirstStore->getMemOperand(), &Fast);
6783 if (!Allowed || !Fast)
6784 return SDValue();
6785
6786 if (VT != CombinedValue.getValueType()) {
6787 assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&((CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits
() && "Get unexpected store value to combine") ? static_cast
<void> (0) : __assert_fail ("CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() && \"Get unexpected store value to combine\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6788, __PRETTY_FUNCTION__))
6788 "Get unexpected store value to combine")((CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits
() && "Get unexpected store value to combine") ? static_cast
<void> (0) : __assert_fail ("CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() && \"Get unexpected store value to combine\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6788, __PRETTY_FUNCTION__))
;
6789 CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
6790 CombinedValue);
6791 }
6792
6793 if (NeedsBswap)
6794 CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
6795
6796 SDValue NewStore =
6797 DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(),
6798 FirstStore->getPointerInfo(), FirstStore->getAlignment());
6799
6800 // Rely on other DAG combine rules to remove the other individual stores.
6801 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
6802 return NewStore;
6803}
6804
6805/// Match a pattern where a wide type scalar value is loaded by several narrow
6806/// loads and combined by shifts and ors. Fold it into a single load or a load
6807/// and a BSWAP if the targets supports it.
6808///
6809/// Assuming little endian target:
6810/// i8 *a = ...
6811/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
6812/// =>
6813/// i32 val = *((i32)a)
6814///
6815/// i8 *a = ...
6816/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
6817/// =>
6818/// i32 val = BSWAP(*((i32)a))
6819///
6820/// TODO: This rule matches complex patterns with OR node roots and doesn't
6821/// interact well with the worklist mechanism. When a part of the pattern is
6822/// updated (e.g. one of the loads) its direct users are put into the worklist,
6823/// but the root node of the pattern which triggers the load combine is not
6824/// necessarily a direct user of the changed node. For example, once the address
6825/// of t28 load is reassociated load combine won't be triggered:
6826/// t25: i32 = add t4, Constant:i32<2>
6827/// t26: i64 = sign_extend t25
6828/// t27: i64 = add t2, t26
6829/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
6830/// t29: i32 = zero_extend t28
6831/// t32: i32 = shl t29, Constant:i8<8>
6832/// t33: i32 = or t23, t32
6833/// As a possible fix visitLoad can check if the load can be a part of a load
6834/// combine pattern and add corresponding OR roots to the worklist.
6835SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
6836 assert(N->getOpcode() == ISD::OR &&((N->getOpcode() == ISD::OR && "Can only match load combining against OR nodes"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Can only match load combining against OR nodes\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6837, __PRETTY_FUNCTION__))
6837 "Can only match load combining against OR nodes")((N->getOpcode() == ISD::OR && "Can only match load combining against OR nodes"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Can only match load combining against OR nodes\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6837, __PRETTY_FUNCTION__))
;
6838
6839 // Handles simple types only
6840 EVT VT = N->getValueType(0);
6841 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6842 return SDValue();
6843 unsigned ByteWidth = VT.getSizeInBits() / 8;
6844
6845 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
6846 auto MemoryByteOffset = [&] (ByteProvider P) {
6847 assert(P.isMemory() && "Must be a memory byte provider")((P.isMemory() && "Must be a memory byte provider") ?
static_cast<void> (0) : __assert_fail ("P.isMemory() && \"Must be a memory byte provider\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6847, __PRETTY_FUNCTION__))
;
6848 unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
6849 assert(LoadBitWidth % 8 == 0 &&((LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit"
) ? static_cast<void> (0) : __assert_fail ("LoadBitWidth % 8 == 0 && \"can only analyze providers for individual bytes not bit\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6850, __PRETTY_FUNCTION__))
6850 "can only analyze providers for individual bytes not bit")((LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit"
) ? static_cast<void> (0) : __assert_fail ("LoadBitWidth % 8 == 0 && \"can only analyze providers for individual bytes not bit\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6850, __PRETTY_FUNCTION__))
;
6851 unsigned LoadByteWidth = LoadBitWidth / 8;
6852 return IsBigEndianTarget
6853 ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
6854 : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
6855 };
6856
6857 Optional<BaseIndexOffset> Base;
6858 SDValue Chain;
6859
6860 SmallPtrSet<LoadSDNode *, 8> Loads;
6861 Optional<ByteProvider> FirstByteProvider;
6862 int64_t FirstOffset = INT64_MAX(9223372036854775807L);
6863
6864 // Check if all the bytes of the OR we are looking at are loaded from the same
6865 // base address. Collect bytes offsets from Base address in ByteOffsets.
6866 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
6867 unsigned ZeroExtendedBytes = 0;
6868 for (int i = ByteWidth - 1; i >= 0; --i) {
6869 auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
6870 if (!P)
6871 return SDValue();
6872
6873 if (P->isConstantZero()) {
6874 // It's OK for the N most significant bytes to be 0, we can just
6875 // zero-extend the load.
6876 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
6877 return SDValue();
6878 continue;
6879 }
6880 assert(P->isMemory() && "provenance should either be memory or zero")((P->isMemory() && "provenance should either be memory or zero"
) ? static_cast<void> (0) : __assert_fail ("P->isMemory() && \"provenance should either be memory or zero\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6880, __PRETTY_FUNCTION__))
;
6881
6882 LoadSDNode *L = P->Load;
6883 assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&((L->hasNUsesOfValue(1, 0) && L->isSimple() &&
!L->isIndexed() && "Must be enforced by calculateByteProvider"
) ? static_cast<void> (0) : __assert_fail ("L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6885, __PRETTY_FUNCTION__))
6884 !L->isIndexed() &&((L->hasNUsesOfValue(1, 0) && L->isSimple() &&
!L->isIndexed() && "Must be enforced by calculateByteProvider"
) ? static_cast<void> (0) : __assert_fail ("L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6885, __PRETTY_FUNCTION__))
6885 "Must be enforced by calculateByteProvider")((L->hasNUsesOfValue(1, 0) && L->isSimple() &&
!L->isIndexed() && "Must be enforced by calculateByteProvider"
) ? static_cast<void> (0) : __assert_fail ("L->hasNUsesOfValue(1, 0) && L->isSimple() && !L->isIndexed() && \"Must be enforced by calculateByteProvider\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6885, __PRETTY_FUNCTION__))
;
6886 assert(L->getOffset().isUndef() && "Unindexed load must have undef offset")((L->getOffset().isUndef() && "Unindexed load must have undef offset"
) ? static_cast<void> (0) : __assert_fail ("L->getOffset().isUndef() && \"Unindexed load must have undef offset\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6886, __PRETTY_FUNCTION__))
;
6887
6888 // All loads must share the same chain
6889 SDValue LChain = L->getChain();
6890 if (!Chain)
6891 Chain = LChain;
6892 else if (Chain != LChain)
6893 return SDValue();
6894
6895 // Loads must share the same base address
6896 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
6897 int64_t ByteOffsetFromBase = 0;
6898 if (!Base)
6899 Base = Ptr;
6900 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6901 return SDValue();
6902
6903 // Calculate the offset of the current byte from the base address
6904 ByteOffsetFromBase += MemoryByteOffset(*P);
6905 ByteOffsets[i] = ByteOffsetFromBase;
6906
6907 // Remember the first byte load
6908 if (ByteOffsetFromBase < FirstOffset) {
6909 FirstByteProvider = P;
6910 FirstOffset = ByteOffsetFromBase;
6911 }
6912
6913 Loads.insert(L);
6914 }
6915 assert(!Loads.empty() && "All the bytes of the value must be loaded from "((!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value"
) ? static_cast<void> (0) : __assert_fail ("!Loads.empty() && \"All the bytes of the value must be loaded from \" \"memory, so there must be at least one load which produces the value\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6916, __PRETTY_FUNCTION__))
6916 "memory, so there must be at least one load which produces the value")((!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value"
) ? static_cast<void> (0) : __assert_fail ("!Loads.empty() && \"All the bytes of the value must be loaded from \" \"memory, so there must be at least one load which produces the value\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6916, __PRETTY_FUNCTION__))
;
6917 assert(Base && "Base address of the accessed memory location must be set")((Base && "Base address of the accessed memory location must be set"
) ? static_cast<void> (0) : __assert_fail ("Base && \"Base address of the accessed memory location must be set\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6917, __PRETTY_FUNCTION__))
;
6918 assert(FirstOffset != INT64_MAX && "First byte offset must be set")((FirstOffset != (9223372036854775807L) && "First byte offset must be set"
) ? static_cast<void> (0) : __assert_fail ("FirstOffset != INT64_MAX && \"First byte offset must be set\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6918, __PRETTY_FUNCTION__))
;
6919
6920 bool NeedsZext = ZeroExtendedBytes > 0;
6921
6922 EVT MemVT =
6923 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
6924
6925 if (!MemVT.isSimple())
6926 return SDValue();
6927
6928 // Before legalize we can introduce too wide illegal loads which will be later
6929 // split into legal sized loads. This enables us to combine i64 load by i8
6930 // patterns to a couple of i32 loads on 32 bit targets.
6931 if (LegalOperations &&
6932 !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
6933 MemVT))
6934 return SDValue();
6935
6936 // Check if the bytes of the OR we are looking at match with either big or
6937 // little endian value load
6938 Optional<bool> IsBigEndian = isBigEndian(
6939 makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
6940 if (!IsBigEndian.hasValue())
6941 return SDValue();
6942
6943 assert(FirstByteProvider && "must be set")((FirstByteProvider && "must be set") ? static_cast<
void> (0) : __assert_fail ("FirstByteProvider && \"must be set\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 6943, __PRETTY_FUNCTION__))
;
6944
6945 // Ensure that the first byte is loaded from zero offset of the first load.
6946 // So the combined value can be loaded from the first load address.
6947 if (MemoryByteOffset(*FirstByteProvider) != 0)
6948 return SDValue();
6949 LoadSDNode *FirstLoad = FirstByteProvider->Load;
6950
6951 // The node we are looking at matches with the pattern, check if we can
6952 // replace it with a single (possibly zero-extended) load and bswap + shift if
6953 // needed.
6954
6955 // If the load needs byte swap check if the target supports it
6956 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
6957
6958 // Before legalize we can introduce illegal bswaps which will be later
6959 // converted to an explicit bswap sequence. This way we end up with a single
6960 // load and byte shuffling instead of several loads and byte shuffling.
6961 // We do not introduce illegal bswaps when zero-extending as this tends to
6962 // introduce too many arithmetic instructions.
6963 if (NeedsBswap && (LegalOperations || NeedsZext) &&
6964 !TLI.isOperationLegal(ISD::BSWAP, VT))
6965 return SDValue();
6966
6967 // If we need to bswap and zero extend, we have to insert a shift. Check that
6968 // it is legal.
6969 if (NeedsBswap && NeedsZext && LegalOperations &&
6970 !TLI.isOperationLegal(ISD::SHL, VT))
6971 return SDValue();
6972
6973 // Check that a load of the wide type is both allowed and fast on the target
6974 bool Fast = false;
6975 bool Allowed =
6976 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6977 *FirstLoad->getMemOperand(), &Fast);
6978 if (!Allowed || !Fast)
6979 return SDValue();
6980
6981 SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
6982 SDLoc(N), VT, Chain, FirstLoad->getBasePtr(),
6983 FirstLoad->getPointerInfo(), MemVT,
6984 FirstLoad->getAlignment());
6985
6986 // Transfer chain users from old loads to the new load.
6987 for (LoadSDNode *L : Loads)
6988 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
6989
6990 if (!NeedsBswap)
6991 return NewLoad;
6992
6993 SDValue ShiftedLoad =
6994 NeedsZext
6995 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
6996 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
6997 SDLoc(N), LegalOperations))
6998 : NewLoad;
6999 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7000}
7001
7002// If the target has andn, bsl, or a similar bit-select instruction,
7003// we want to unfold masked merge, with canonical pattern of:
7004// | A | |B|
7005// ((x ^ y) & m) ^ y
7006// | D |
7007// Into:
7008// (x & m) | (y & ~m)
7009// If y is a constant, and the 'andn' does not work with immediates,
7010// we unfold into a different pattern:
7011// ~(~x & m) & (m | y)
7012// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7013// the very least that breaks andnpd / andnps patterns, and because those
7014// patterns are simplified in IR and shouldn't be created in the DAG
7015SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7016 assert(N->getOpcode() == ISD::XOR)((N->getOpcode() == ISD::XOR) ? static_cast<void> (0
) : __assert_fail ("N->getOpcode() == ISD::XOR", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7016, __PRETTY_FUNCTION__))
;
7017
7018 // Don't touch 'not' (i.e. where y = -1).
7019 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7020 return SDValue();
7021
7022 EVT VT = N->getValueType(0);
7023
7024 // There are 3 commutable operators in the pattern,
7025 // so we have to deal with 8 possible variants of the basic pattern.
7026 SDValue X, Y, M;
7027 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7028 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7029 return false;
7030 SDValue Xor = And.getOperand(XorIdx);
7031 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7032 return false;
7033 SDValue Xor0 = Xor.getOperand(0);
7034 SDValue Xor1 = Xor.getOperand(1);
7035 // Don't touch 'not' (i.e. where y = -1).
7036 if (isAllOnesOrAllOnesSplat(Xor1))
7037 return false;
7038 if (Other == Xor0)
7039 std::swap(Xor0, Xor1);
7040 if (Other != Xor1)
7041 return false;
7042 X = Xor0;
7043 Y = Xor1;
7044 M = And.getOperand(XorIdx ? 0 : 1);
7045 return true;
7046 };
7047
7048 SDValue N0 = N->getOperand(0);
7049 SDValue N1 = N->getOperand(1);
7050 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7051 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7052 return SDValue();
7053
7054 // Don't do anything if the mask is constant. This should not be reachable.
7055 // InstCombine should have already unfolded this pattern, and DAGCombiner
7056 // probably shouldn't produce it, too.
7057 if (isa<ConstantSDNode>(M.getNode()))
7058 return SDValue();
7059
7060 // We can transform if the target has AndNot
7061 if (!TLI.hasAndNot(M))
7062 return SDValue();
7063
7064 SDLoc DL(N);
7065
7066 // If Y is a constant, check that 'andn' works with immediates.
7067 if (!TLI.hasAndNot(Y)) {
7068 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.")((TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."
) ? static_cast<void> (0) : __assert_fail ("TLI.hasAndNot(X) && \"Only mask is a variable? Unreachable.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7068, __PRETTY_FUNCTION__))
;
7069 // If not, we need to do a bit more work to make sure andn is still used.
7070 SDValue NotX = DAG.getNOT(DL, X, VT);
7071 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7072 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7073 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7074 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7075 }
7076
7077 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7078 SDValue NotM = DAG.getNOT(DL, M, VT);
7079 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7080
7081 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7082}
7083
7084SDValue DAGCombiner::visitXOR(SDNode *N) {
7085 SDValue N0 = N->getOperand(0);
7086 SDValue N1 = N->getOperand(1);
7087 EVT VT = N0.getValueType();
7088
7089 // fold vector ops
7090 if (VT.isVector()) {
7091 if (SDValue FoldedVOp = SimplifyVBinOp(N))
7092 return FoldedVOp;
7093
7094 // fold (xor x, 0) -> x, vector edition
7095 if (ISD::isBuildVectorAllZeros(N0.getNode()))
7096 return N1;
7097 if (ISD::isBuildVectorAllZeros(N1.getNode()))
7098 return N0;
7099 }
7100
7101 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7102 SDLoc DL(N);
7103 if (N0.isUndef() && N1.isUndef())
7104 return DAG.getConstant(0, DL, VT);
7105
7106 // fold (xor x, undef) -> undef
7107 if (N0.isUndef())
7108 return N0;
7109 if (N1.isUndef())
7110 return N1;
7111
7112 // fold (xor c1, c2) -> c1^c2
7113 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7114 return C;
7115
7116 // canonicalize constant to RHS
7117 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7118 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7119 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7120
7121 // fold (xor x, 0) -> x
7122 if (isNullConstant(N1))
7123 return N0;
7124
7125 if (SDValue NewSel = foldBinOpIntoSelect(N))
7126 return NewSel;
7127
7128 // reassociate xor
7129 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7130 return RXOR;
7131
7132 // fold !(x cc y) -> (x !cc y)
7133 unsigned N0Opcode = N0.getOpcode();
7134 SDValue LHS, RHS, CC;
7135 if (TLI.isConstTrueVal(N1.getNode()) &&
7136 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
7137 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7138 LHS.getValueType());
7139 if (!LegalOperations ||
7140 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7141 switch (N0Opcode) {
7142 default:
7143 llvm_unreachable("Unhandled SetCC Equivalent!")::llvm::llvm_unreachable_internal("Unhandled SetCC Equivalent!"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7143)
;
7144 case ISD::SETCC:
7145 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7146 case ISD::SELECT_CC:
7147 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7148 N0.getOperand(3), NotCC);
7149 case ISD::STRICT_FSETCC:
7150 case ISD::STRICT_FSETCCS: {
7151 if (N0.hasOneUse()) {
7152 // FIXME Can we handle multiple uses? Could we token factor the chain
7153 // results from the new/old setcc?
7154 SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7155 N0.getOperand(0),
7156 N0Opcode == ISD::STRICT_FSETCCS);
7157 CombineTo(N, SetCC);
7158 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7159 recursivelyDeleteUnusedNodes(N0.getNode());
7160 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7161 }
7162 break;
7163 }
7164 }
7165 }
7166 }
7167
7168 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7169 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7170 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7171 SDValue V = N0.getOperand(0);
7172 SDLoc DL0(N0);
7173 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7174 DAG.getConstant(1, DL0, V.getValueType()));
7175 AddToWorklist(V.getNode());
7176 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7177 }
7178
7179 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7180 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7181 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7182 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7183 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7184 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7185 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7186 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7187 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7188 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7189 }
7190 }
7191 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7192 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7193 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7194 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7195 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7196 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7197 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7198 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7199 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7200 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7201 }
7202 }
7203
7204 // fold (not (neg x)) -> (add X, -1)
7205 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7206 // Y is a constant or the subtract has a single use.
7207 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7208 isNullConstant(N0.getOperand(0))) {
7209 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7210 DAG.getAllOnesConstant(DL, VT));
7211 }
7212
7213 // fold (not (add X, -1)) -> (neg X)
7214 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7215 isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7216 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7217 N0.getOperand(0));
7218 }
7219
7220 // fold (xor (and x, y), y) -> (and (not x), y)
7221 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7222 SDValue X = N0.getOperand(0);
7223 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7224 AddToWorklist(NotX.getNode());
7225 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7226 }
7227
7228 if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7229 ConstantSDNode *XorC = isConstOrConstSplat(N1);
7230 ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7231 unsigned BitWidth = VT.getScalarSizeInBits();
7232 if (XorC && ShiftC) {
7233 // Don't crash on an oversized shift. We can not guarantee that a bogus
7234 // shift has been simplified to undef.
7235 uint64_t ShiftAmt = ShiftC->getLimitedValue();
7236 if (ShiftAmt < BitWidth) {
7237 APInt Ones = APInt::getAllOnesValue(BitWidth);
7238 Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7239 if (XorC->getAPIntValue() == Ones) {
7240 // If the xor constant is a shifted -1, do a 'not' before the shift:
7241 // xor (X << ShiftC), XorC --> (not X) << ShiftC
7242 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7243 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7244 return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7245 }
7246 }
7247 }
7248 }
7249
7250 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7251 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7252 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7253 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7254 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7255 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7256 SDValue S0 = S.getOperand(0);
7257 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
7258 unsigned OpSizeInBits = VT.getScalarSizeInBits();
7259 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7260 if (C->getAPIntValue() == (OpSizeInBits - 1))
7261 return DAG.getNode(ISD::ABS, DL, VT, S0);
7262 }
7263 }
7264 }
7265
7266 // fold (xor x, x) -> 0
7267 if (N0 == N1)
7268 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7269
7270 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7271 // Here is a concrete example of this equivalence:
7272 // i16 x == 14
7273 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
7274 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7275 //
7276 // =>
7277 //
7278 // i16 ~1 == 0b1111111111111110
7279 // i16 rol(~1, 14) == 0b1011111111111111
7280 //
7281 // Some additional tips to help conceptualize this transform:
7282 // - Try to see the operation as placing a single zero in a value of all ones.
7283 // - There exists no value for x which would allow the result to contain zero.
7284 // - Values of x larger than the bitwidth are undefined and do not require a
7285 // consistent result.
7286 // - Pushing the zero left requires shifting one bits in from the right.
7287 // A rotate left of ~1 is a nice way of achieving the desired result.
7288 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7289 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7290 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7291 N0.getOperand(1));
7292 }
7293
7294 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
7295 if (N0Opcode == N1.getOpcode())
7296 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7297 return V;
7298
7299 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
7300 if (SDValue MM = unfoldMaskedMerge(N))
7301 return MM;
7302
7303 // Simplify the expression using non-local knowledge.
7304 if (SimplifyDemandedBits(SDValue(N, 0)))
7305 return SDValue(N, 0);
7306
7307 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7308 return Combined;
7309
7310 return SDValue();
7311}
7312
7313/// If we have a shift-by-constant of a bitwise logic op that itself has a
7314/// shift-by-constant operand with identical opcode, we may be able to convert
7315/// that into 2 independent shifts followed by the logic op. This is a
7316/// throughput improvement.
7317static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7318 // Match a one-use bitwise logic op.
7319 SDValue LogicOp = Shift->getOperand(0);
7320 if (!LogicOp.hasOneUse())
7321 return SDValue();
7322
7323 unsigned LogicOpcode = LogicOp.getOpcode();
7324 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
7325 LogicOpcode != ISD::XOR)
7326 return SDValue();
7327
7328 // Find a matching one-use shift by constant.
7329 unsigned ShiftOpcode = Shift->getOpcode();
7330 SDValue C1 = Shift->getOperand(1);
7331 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
7332 assert(C1Node && "Expected a shift with constant operand")((C1Node && "Expected a shift with constant operand")
? static_cast<void> (0) : __assert_fail ("C1Node && \"Expected a shift with constant operand\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7332, __PRETTY_FUNCTION__))
;
7333 const APInt &C1Val = C1Node->getAPIntValue();
7334 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
7335 const APInt *&ShiftAmtVal) {
7336 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
7337 return false;
7338
7339 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
7340 if (!ShiftCNode)
7341 return false;
7342
7343 // Capture the shifted operand and shift amount value.
7344 ShiftOp = V.getOperand(0);
7345 ShiftAmtVal = &ShiftCNode->getAPIntValue();
7346
7347 // Shift amount types do not have to match their operand type, so check that
7348 // the constants are the same width.
7349 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
7350 return false;
7351
7352 // The fold is not valid if the sum of the shift values exceeds bitwidth.
7353 if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
7354 return false;
7355
7356 return true;
7357 };
7358
7359 // Logic ops are commutative, so check each operand for a match.
7360 SDValue X, Y;
7361 const APInt *C0Val;
7362 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
7363 Y = LogicOp.getOperand(1);
7364 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
7365 Y = LogicOp.getOperand(0);
7366 else
7367 return SDValue();
7368
7369 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
7370 SDLoc DL(Shift);
7371 EVT VT = Shift->getValueType(0);
7372 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
7373 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
7374 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
7375 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
7376 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
7377}
7378
7379/// Handle transforms common to the three shifts, when the shift amount is a
7380/// constant.
7381/// We are looking for: (shift being one of shl/sra/srl)
7382/// shift (binop X, C0), C1
7383/// And want to transform into:
7384/// binop (shift X, C1), (shift C0, C1)
7385SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
7386 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand")((isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand"
) ? static_cast<void> (0) : __assert_fail ("isConstOrConstSplat(N->getOperand(1)) && \"Expected constant operand\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7386, __PRETTY_FUNCTION__))
;
7387
7388 // Do not turn a 'not' into a regular xor.
7389 if (isBitwiseNot(N->getOperand(0)))
7390 return SDValue();
7391
7392 // The inner binop must be one-use, since we want to replace it.
7393 SDValue LHS = N->getOperand(0);
7394 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
7395 return SDValue();
7396
7397 // TODO: This is limited to early combining because it may reveal regressions
7398 // otherwise. But since we just checked a target hook to see if this is
7399 // desirable, that should have filtered out cases where this interferes
7400 // with some other pattern matching.
7401 if (!LegalTypes)
7402 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
7403 return R;
7404
7405 // We want to pull some binops through shifts, so that we have (and (shift))
7406 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
7407 // thing happens with address calculations, so it's important to canonicalize
7408 // it.
7409 switch (LHS.getOpcode()) {
7410 default:
7411 return SDValue();
7412 case ISD::OR:
7413 case ISD::XOR:
7414 case ISD::AND:
7415 break;
7416 case ISD::ADD:
7417 if (N->getOpcode() != ISD::SHL)
7418 return SDValue(); // only shl(add) not sr[al](add).
7419 break;
7420 }
7421
7422 // We require the RHS of the binop to be a constant and not opaque as well.
7423 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
7424 if (!BinOpCst)
7425 return SDValue();
7426
7427 // FIXME: disable this unless the input to the binop is a shift by a constant
7428 // or is copy/select. Enable this in other cases when figure out it's exactly
7429 // profitable.
7430 SDValue BinOpLHSVal = LHS.getOperand(0);
7431 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
7432 BinOpLHSVal.getOpcode() == ISD::SRA ||
7433 BinOpLHSVal.getOpcode() == ISD::SRL) &&
7434 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
7435 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
7436 BinOpLHSVal.getOpcode() == ISD::SELECT;
7437
7438 if (!IsShiftByConstant && !IsCopyOrSelect)
7439 return SDValue();
7440
7441 if (IsCopyOrSelect && N->hasOneUse())
7442 return SDValue();
7443
7444 // Fold the constants, shifting the binop RHS by the shift amount.
7445 SDLoc DL(N);
7446 EVT VT = N->getValueType(0);
7447 SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
7448 N->getOperand(1));
7449 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!")((isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"
) ? static_cast<void> (0) : __assert_fail ("isa<ConstantSDNode>(NewRHS) && \"Folding was not successful!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7449, __PRETTY_FUNCTION__))
;
7450
7451 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
7452 N->getOperand(1));
7453 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
7454}
7455
7456SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
7457 assert(N->getOpcode() == ISD::TRUNCATE)((N->getOpcode() == ISD::TRUNCATE) ? static_cast<void>
(0) : __assert_fail ("N->getOpcode() == ISD::TRUNCATE", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7457, __PRETTY_FUNCTION__))
;
7458 assert(N->getOperand(0).getOpcode() == ISD::AND)((N->getOperand(0).getOpcode() == ISD::AND) ? static_cast<
void> (0) : __assert_fail ("N->getOperand(0).getOpcode() == ISD::AND"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 7458, __PRETTY_FUNCTION__))
;
7459
7460 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
7461 EVT TruncVT = N->getValueType(0);
7462 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
7463 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
7464 SDValue N01 = N->getOperand(0).getOperand(1);
7465 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
7466 SDLoc DL(N);
7467 SDValue N00 = N->getOperand(0).getOperand(0);
7468 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
7469 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
7470 AddToWorklist(Trunc00.getNode());
7471 AddToWorklist(Trunc01.getNode());
7472 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
7473 }
7474 }
7475
7476 return SDValue();
7477}
7478
7479SDValue DAGCombiner::visitRotate(SDNode *N) {
7480 SDLoc dl(N);
7481 SDValue N0 = N->getOperand(0);
7482 SDValue N1 = N->getOperand(1);
7483 EVT VT = N->getValueType(0);
7484 unsigned Bitsize = VT.getScalarSizeInBits();
7485
7486 // fold (rot x, 0) -> x
7487 if (isNullOrNullSplat(N1))
7488 return N0;
7489
7490 // fold (rot x, c) -> x iff (c % BitSize) == 0
7491 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
7492 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
7493 if (DAG.MaskedValueIsZero(N1, ModuloMask))
7494 return N0;
7495 }
7496
7497 // fold (rot x, c) -> (rot x, c % BitSize)
7498 bool OutOfRange = false;
7499 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
7500 OutOfRange |= C->getAPIntValue().uge(Bitsize);
7501 return true;
7502 };
7503 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
7504 EVT AmtVT = N1.getValueType();
7505 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
7506 if (SDValue Amt =
7507 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
7508 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
7509 }
7510
7511 // Simplify the operands using demanded-bits information.
7512 if (SimplifyDemandedBits(SDValue(N, 0)))
7513 return SDValue(N, 0);
7514
7515 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
7516 if (N1.getOpcode() == ISD::TRUNCATE &&
7517 N1.getOperand(0).getOpcode() == ISD::AND) {
7518 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7519 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
7520 }
7521
7522 unsigned NextOp = N0.getOpcode();
7523 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
7524 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
7525 SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
7526 SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
7527 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
7528 EVT ShiftVT = C1->getValueType(0);
7529 bool SameSide = (N->getOpcode() == NextOp);
7530 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
7531 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
7532 CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
7533 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
7534 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
7535 ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
7536 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
7537 CombinedShiftNorm);
7538 }
7539 }
7540 }
7541 return SDValue();
7542}
7543
7544SDValue DAGCombiner::visitSHL(SDNode *N) {
7545 SDValue N0 = N->getOperand(0);
7546 SDValue N1 = N->getOperand(1);
7547 if (SDValue V = DAG.simplifyShift(N0, N1))
7548 return V;
7549
7550 EVT VT = N0.getValueType();
7551 EVT ShiftVT = N1.getValueType();
7552 unsigned OpSizeInBits = VT.getScalarSizeInBits();
7553
7554 // fold vector ops
7555 if (VT.isVector()) {
7556 if (SDValue FoldedVOp = SimplifyVBinOp(N))
7557 return FoldedVOp;
7558
7559 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
7560 // If setcc produces all-one true value then:
7561 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
7562 if (N1CV && N1CV->isConstant()) {
7563 if (N0.getOpcode() == ISD::AND) {
7564 SDValue N00 = N0->getOperand(0);
7565 SDValue N01 = N0->getOperand(1);
7566 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
7567
7568 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
7569 TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
7570 TargetLowering::ZeroOrNegativeOneBooleanContent) {
7571 if (SDValue C =
7572 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
7573 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
7574 }
7575 }
7576 }
7577 }
7578
7579 ConstantSDNode *N1C = isConstOrConstSplat(N1);
7580
7581 // fold (shl c1, c2) -> c1<<c2
7582 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
7583 return C;
7584
7585 if (SDValue NewSel = foldBinOpIntoSelect(N))
7586 return NewSel;
7587
7588 // if (shl x, c) is known to be zero, return 0
7589 if (DAG.MaskedValueIsZero(SDValue(N, 0),
7590 APInt::getAllOnesValue(OpSizeInBits)))
7591 return DAG.getConstant(0, SDLoc(N), VT);
7592
7593 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
7594 if (N1.getOpcode() == ISD::TRUNCATE &&
7595 N1.getOperand(0).getOpcode() == ISD::AND) {
7596 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7597 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
7598 }
7599
7600 if (SimplifyDemandedBits(SDValue(N, 0)))
7601 return SDValue(N, 0);
7602
7603 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
7604 if (N0.getOpcode() == ISD::SHL) {
7605 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7606 ConstantSDNode *RHS) {
7607 APInt c1 = LHS->getAPIntValue();
7608 APInt c2 = RHS->getAPIntValue();
7609 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7610 return (c1 + c2).uge(OpSizeInBits);
7611 };
7612 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7613 return DAG.getConstant(0, SDLoc(N), VT);
7614
7615 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7616 ConstantSDNode *RHS) {
7617 APInt c1 = LHS->getAPIntValue();
7618 APInt c2 = RHS->getAPIntValue();
7619 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7620 return (c1 + c2).ult(OpSizeInBits);
7621 };
7622 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7623 SDLoc DL(N);
7624 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7625 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
7626 }
7627 }
7628
7629 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
7630 // For this to be valid, the second form must not preserve any of the bits
7631 // that are shifted out by the inner shift in the first form. This means
7632 // the outer shift size must be >= the number of bits added by the ext.
7633 // As a corollary, we don't care what kind of ext it is.
7634 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
7635 N0.getOpcode() == ISD::ANY_EXTEND ||
7636 N0.getOpcode() == ISD::SIGN_EXTEND) &&
7637 N0.getOperand(0).getOpcode() == ISD::SHL) {
7638 SDValue N0Op0 = N0.getOperand(0);
7639 SDValue InnerShiftAmt = N0Op0.getOperand(1);
7640 EVT InnerVT = N0Op0.getValueType();
7641 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
7642
7643 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7644 ConstantSDNode *RHS) {
7645 APInt c1 = LHS->getAPIntValue();
7646 APInt c2 = RHS->getAPIntValue();
7647 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7648 return c2.uge(OpSizeInBits - InnerBitwidth) &&
7649 (c1 + c2).uge(OpSizeInBits);
7650 };
7651 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
7652 /*AllowUndefs*/ false,
7653 /*AllowTypeMismatch*/ true))
7654 return DAG.getConstant(0, SDLoc(N), VT);
7655
7656 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7657 ConstantSDNode *RHS) {
7658 APInt c1 = LHS->getAPIntValue();
7659 APInt c2 = RHS->getAPIntValue();
7660 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7661 return c2.uge(OpSizeInBits - InnerBitwidth) &&
7662 (c1 + c2).ult(OpSizeInBits);
7663 };
7664 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
7665 /*AllowUndefs*/ false,
7666 /*AllowTypeMismatch*/ true)) {
7667 SDLoc DL(N);
7668 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
7669 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
7670 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
7671 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
7672 }
7673 }
7674
7675 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
7676 // Only fold this if the inner zext has no other uses to avoid increasing
7677 // the total number of instructions.
7678 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7679 N0.getOperand(0).getOpcode() == ISD::SRL) {
7680 SDValue N0Op0 = N0.getOperand(0);
7681 SDValue InnerShiftAmt = N0Op0.getOperand(1);
7682
7683 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7684 APInt c1 = LHS->getAPIntValue();
7685 APInt c2 = RHS->getAPIntValue();
7686 zeroExtendToMatch(c1, c2);
7687 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
7688 };
7689 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
7690 /*AllowUndefs*/ false,
7691 /*AllowTypeMismatch*/ true)) {
7692 SDLoc DL(N);
7693 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
7694 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
7695 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
7696 AddToWorklist(NewSHL.getNode());
7697 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
7698 }
7699 }
7700
7701 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
7702 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
7703 // TODO - support non-uniform vector shift amounts.
7704 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
7705 N0->getFlags().hasExact()) {
7706 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7707 uint64_t C1 = N0C1->getZExtValue();
7708 uint64_t C2 = N1C->getZExtValue();
7709 SDLoc DL(N);
7710 if (C1 <= C2)
7711 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7712 DAG.getConstant(C2 - C1, DL, ShiftVT));
7713 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
7714 DAG.getConstant(C1 - C2, DL, ShiftVT));
7715 }
7716 }
7717
7718 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
7719 // (and (srl x, (sub c1, c2), MASK)
7720 // Only fold this if the inner shift has no other uses -- if it does, folding
7721 // this will increase the total number of instructions.
7722 // TODO - drop hasOneUse requirement if c1 == c2?
7723 // TODO - support non-uniform vector shift amounts.
7724 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
7725 TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
7726 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7727 if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
7728 uint64_t c1 = N0C1->getZExtValue();
7729 uint64_t c2 = N1C->getZExtValue();
7730 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
7731 SDValue Shift;
7732 if (c2 > c1) {
7733 Mask <<= c2 - c1;
7734 SDLoc DL(N);
7735 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7736 DAG.getConstant(c2 - c1, DL, ShiftVT));
7737 } else {
7738 Mask.lshrInPlace(c1 - c2);
7739 SDLoc DL(N);
7740 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
7741 DAG.getConstant(c1 - c2, DL, ShiftVT));
7742 }
7743 SDLoc DL(N0);
7744 return DAG.getNode(ISD::AND, DL, VT, Shift,
7745 DAG.getConstant(Mask, DL, VT));
7746 }
7747 }
7748 }
7749
7750 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
7751 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
7752 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
7753 SDLoc DL(N);
7754 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
7755 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
7756 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
7757 }
7758
7759 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
7760 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
7761 // Variant of version done on multiply, except mul by a power of 2 is turned
7762 // into a shift.
7763 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
7764 N0.getNode()->hasOneUse() &&
7765 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7766 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
7767 TLI.isDesirableToCommuteWithShift(N, Level)) {
7768 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
7769 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7770 AddToWorklist(Shl0.getNode());
7771 AddToWorklist(Shl1.getNode());
7772 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
7773 }
7774
7775 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
7776 if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
7777 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7778 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
7779 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7780 if (isConstantOrConstantVector(Shl))
7781 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
7782 }
7783
7784 if (N1C && !N1C->isOpaque())
7785 if (SDValue NewSHL = visitShiftByConstant(N))
7786 return NewSHL;
7787
7788 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
7789 if (N0.getOpcode() == ISD::VSCALE)
7790 if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
7791 auto DL = SDLoc(N);
7792 APInt C0 = N0.getConstantOperandAPInt(0);
7793 APInt C1 = NC1->getAPIntValue();
7794 return DAG.getVScale(DL, VT, C0 << C1);
7795 }
7796
7797 return SDValue();
7798}
7799
7800SDValue DAGCombiner::visitSRA(SDNode *N) {
7801 SDValue N0 = N->getOperand(0);
7802 SDValue N1 = N->getOperand(1);
7803 if (SDValue V = DAG.simplifyShift(N0, N1))
7804 return V;
7805
7806 EVT VT = N0.getValueType();
7807 unsigned OpSizeInBits = VT.getScalarSizeInBits();
7808
7809 // Arithmetic shifting an all-sign-bit value is a no-op.
7810 // fold (sra 0, x) -> 0
7811 // fold (sra -1, x) -> -1
7812 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
7813 return N0;
7814
7815 // fold vector ops
7816 if (VT.isVector())
7817 if (SDValue FoldedVOp = SimplifyVBinOp(N))
7818 return FoldedVOp;
7819
7820 ConstantSDNode *N1C = isConstOrConstSplat(N1);
7821
7822 // fold (sra c1, c2) -> (sra c1, c2)
7823 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
7824 return C;
7825
7826 if (SDValue NewSel = foldBinOpIntoSelect(N))
7827 return NewSel;
7828
7829 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
7830 // sext_inreg.
7831 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
7832 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
7833 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
7834 if (VT.isVector())
7835 ExtVT = EVT::getVectorVT(*DAG.getContext(),
7836 ExtVT, VT.getVectorNumElements());
7837 if (!LegalOperations ||
7838 TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
7839 TargetLowering::Legal)
7840 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7841 N0.getOperand(0), DAG.getValueType(ExtVT));
7842 }
7843
7844 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
7845 // clamp (add c1, c2) to max shift.
7846 if (N0.getOpcode() == ISD::SRA) {
7847 SDLoc DL(N);
7848 EVT ShiftVT = N1.getValueType();
7849 EVT ShiftSVT = ShiftVT.getScalarType();
7850 SmallVector<SDValue, 16> ShiftValues;
7851
7852 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7853 APInt c1 = LHS->getAPIntValue();
7854 APInt c2 = RHS->getAPIntValue();
7855 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7856 APInt Sum = c1 + c2;
7857 unsigned ShiftSum =
7858 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
7859 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
7860 return true;
7861 };
7862 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
7863 SDValue ShiftValue;
7864 if (VT.isVector())
7865 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
7866 else
7867 ShiftValue = ShiftValues[0];
7868 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
7869 }
7870 }
7871
7872 // fold (sra (shl X, m), (sub result_size, n))
7873 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
7874 // result_size - n != m.
7875 // If truncate is free for the target sext(shl) is likely to result in better
7876 // code.
7877 if (N0.getOpcode() == ISD::SHL && N1C) {
7878 // Get the two constanst of the shifts, CN0 = m, CN = n.
7879 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
7880 if (N01C) {
7881 LLVMContext &Ctx = *DAG.getContext();
7882 // Determine what the truncate's result bitsize and type would be.
7883 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
7884
7885 if (VT.isVector())
7886 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
7887
7888 // Determine the residual right-shift amount.
7889 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
7890
7891 // If the shift is not a no-op (in which case this should be just a sign
7892 // extend already), the truncated to type is legal, sign_extend is legal
7893 // on that type, and the truncate to that type is both legal and free,
7894 // perform the transform.
7895 if ((ShiftAmt > 0) &&
7896 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
7897 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
7898 TLI.isTruncateFree(VT, TruncVT)) {
7899 SDLoc DL(N);
7900 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
7901 getShiftAmountTy(N0.getOperand(0).getValueType()));
7902 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
7903 N0.getOperand(0), Amt);
7904 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
7905 Shift);
7906 return DAG.getNode(ISD::SIGN_EXTEND, DL,
7907 N->getValueType(0), Trunc);
7908 }
7909 }
7910 }
7911
7912 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
7913 // sra (add (shl X, N1C), AddC), N1C -->
7914 // sext (add (trunc X to (width - N1C)), AddC')
7915 if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
7916 N0.getOperand(0).getOpcode() == ISD::SHL &&
7917 N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
7918 if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
7919 SDValue Shl = N0.getOperand(0);
7920 // Determine what the truncate's type would be and ask the target if that
7921 // is a free operation.
7922 LLVMContext &Ctx = *DAG.getContext();
7923 unsigned ShiftAmt = N1C->getZExtValue();
7924 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
7925 if (VT.isVector())
7926 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
7927
7928 // TODO: The simple type check probably belongs in the default hook
7929 // implementation and/or target-specific overrides (because
7930 // non-simple types likely require masking when legalized), but that
7931 // restriction may conflict with other transforms.
7932 if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) {
7933 SDLoc DL(N);
7934 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
7935 SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
7936 trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
7937 SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
7938 return DAG.getSExtOrTrunc(Add, DL, VT);
7939 }
7940 }
7941 }
7942
7943 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
7944 if (N1.getOpcode() == ISD::TRUNCATE &&
7945 N1.getOperand(0).getOpcode() == ISD::AND) {
7946 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7947 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
7948 }
7949
7950 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
7951 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
7952 // if c1 is equal to the number of bits the trunc removes
7953 // TODO - support non-uniform vector shift amounts.
7954 if (N0.getOpcode() == ISD::TRUNCATE &&
7955 (N0.getOperand(0).getOpcode() == ISD::SRL ||
7956 N0.getOperand(0).getOpcode() == ISD::SRA) &&
7957 N0.getOperand(0).hasOneUse() &&
7958 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
7959 SDValue N0Op0 = N0.getOperand(0);
7960 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
7961 EVT LargeVT = N0Op0.getValueType();
7962 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
7963 if (LargeShift->getAPIntValue() == TruncBits) {
7964 SDLoc DL(N);
7965 SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
7966 getShiftAmountTy(LargeVT));
7967 SDValue SRA =
7968 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
7969 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
7970 }
7971 }
7972 }
7973
7974 // Simplify, based on bits shifted out of the LHS.
7975 if (SimplifyDemandedBits(SDValue(N, 0)))
7976 return SDValue(N, 0);
7977
7978 // If the sign bit is known to be zero, switch this to a SRL.
7979 if (DAG.SignBitIsZero(N0))
7980 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
7981
7982 if (N1C && !N1C->isOpaque())
7983 if (SDValue NewSRA = visitShiftByConstant(N))
7984 return NewSRA;
7985
7986 return SDValue();
7987}
7988
7989SDValue DAGCombiner::visitSRL(SDNode *N) {
7990 SDValue N0 = N->getOperand(0);
7991 SDValue N1 = N->getOperand(1);
7992 if (SDValue V = DAG.simplifyShift(N0, N1))
7993 return V;
7994
7995 EVT VT = N0.getValueType();
7996 unsigned OpSizeInBits = VT.getScalarSizeInBits();
7997
7998 // fold vector ops
7999 if (VT.isVector())
8000 if (SDValue FoldedVOp = SimplifyVBinOp(N))
8001 return FoldedVOp;
8002
8003 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8004
8005 // fold (srl c1, c2) -> c1 >>u c2
8006 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8007 return C;
8008
8009 if (SDValue NewSel = foldBinOpIntoSelect(N))
8010 return NewSel;
8011
8012 // if (srl x, c) is known to be zero, return 0
8013 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
8014 APInt::getAllOnesValue(OpSizeInBits)))
8015 return DAG.getConstant(0, SDLoc(N), VT);
8016
8017 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8018 if (N0.getOpcode() == ISD::SRL) {
8019 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8020 ConstantSDNode *RHS) {
8021 APInt c1 = LHS->getAPIntValue();
8022 APInt c2 = RHS->getAPIntValue();
8023 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8024 return (c1 + c2).uge(OpSizeInBits);
8025 };
8026 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8027 return DAG.getConstant(0, SDLoc(N), VT);
8028
8029 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8030 ConstantSDNode *RHS) {
8031 APInt c1 = LHS->getAPIntValue();
8032 APInt c2 = RHS->getAPIntValue();
8033 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8034 return (c1 + c2).ult(OpSizeInBits);
8035 };
8036 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8037 SDLoc DL(N);
8038 EVT ShiftVT = N1.getValueType();
8039 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8040 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
8041 }
8042 }
8043
8044 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
8045 N0.getOperand(0).getOpcode() == ISD::SRL) {
8046 SDValue InnerShift = N0.getOperand(0);
8047 // TODO - support non-uniform vector shift amounts.
8048 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
8049 uint64_t c1 = N001C->getZExtValue();
8050 uint64_t c2 = N1C->getZExtValue();
8051 EVT InnerShiftVT = InnerShift.getValueType();
8052 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
8053 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
8054 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
8055 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
8056 if (c1 + OpSizeInBits == InnerShiftSize) {
8057 SDLoc DL(N);
8058 if (c1 + c2 >= InnerShiftSize)
8059 return DAG.getConstant(0, DL, VT);
8060 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8061 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8062 InnerShift.getOperand(0), NewShiftAmt);
8063 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
8064 }
8065 // In the more general case, we can clear the high bits after the shift:
8066 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
8067 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
8068 c1 + c2 < InnerShiftSize) {
8069 SDLoc DL(N);
8070 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8071 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8072 InnerShift.getOperand(0), NewShiftAmt);
8073 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
8074 OpSizeInBits - c2),
8075 DL, InnerShiftVT);
8076 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
8077 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
8078 }
8079 }
8080 }
8081
8082 // fold (srl (shl x, c), c) -> (and x, cst2)
8083 // TODO - (srl (shl x, c1), c2).
8084 if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8085 isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8086 SDLoc DL(N);
8087 SDValue Mask =
8088 DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8089 AddToWorklist(Mask.getNode());
8090 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8091 }
8092
8093 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8094 // TODO - support non-uniform vector shift amounts.
8095 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8096 // Shifting in all undef bits?
8097 EVT SmallVT = N0.getOperand(0).getValueType();
8098 unsigned BitSize = SmallVT.getScalarSizeInBits();
8099 if (N1C->getAPIntValue().uge(BitSize))
8100 return DAG.getUNDEF(VT);
8101
8102 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8103 uint64_t ShiftAmt = N1C->getZExtValue();
8104 SDLoc DL0(N0);
8105 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8106 N0.getOperand(0),
8107 DAG.getConstant(ShiftAmt, DL0,
8108 getShiftAmountTy(SmallVT)));
8109 AddToWorklist(SmallShift.getNode());
8110 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8111 SDLoc DL(N);
8112 return DAG.getNode(ISD::AND, DL, VT,
8113 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8114 DAG.getConstant(Mask, DL, VT));
8115 }
8116 }
8117
8118 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
8119 // bit, which is unmodified by sra.
8120 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8121 if (N0.getOpcode() == ISD::SRA)
8122 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8123 }
8124
8125 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
8126 if (N1C && N0.getOpcode() == ISD::CTLZ &&
8127 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8128 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8129
8130 // If any of the input bits are KnownOne, then the input couldn't be all
8131 // zeros, thus the result of the srl will always be zero.
8132 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8133
8134 // If all of the bits input the to ctlz node are known to be zero, then
8135 // the result of the ctlz is "32" and the result of the shift is one.
8136 APInt UnknownBits = ~Known.Zero;
8137 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8138
8139 // Otherwise, check to see if there is exactly one bit input to the ctlz.
8140 if (UnknownBits.isPowerOf2()) {
8141 // Okay, we know that only that the single bit specified by UnknownBits
8142 // could be set on input to the CTLZ node. If this bit is set, the SRL
8143 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8144 // to an SRL/XOR pair, which is likely to simplify more.
8145 unsigned ShAmt = UnknownBits.countTrailingZeros();
8146 SDValue Op = N0.getOperand(0);
8147
8148 if (ShAmt) {
8149 SDLoc DL(N0);
8150 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8151 DAG.getConstant(ShAmt, DL,
8152 getShiftAmountTy(Op.getValueType())));
8153 AddToWorklist(Op.getNode());
8154 }
8155
8156 SDLoc DL(N);
8157 return DAG.getNode(ISD::XOR, DL, VT,
8158 Op, DAG.getConstant(1, DL, VT));
8159 }
8160 }
8161
8162 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8163 if (N1.getOpcode() == ISD::TRUNCATE &&
8164 N1.getOperand(0).getOpcode() == ISD::AND) {
8165 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8166 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8167 }
8168
8169 // fold operands of srl based on knowledge that the low bits are not
8170 // demanded.
8171 if (SimplifyDemandedBits(SDValue(N, 0)))
8172 return SDValue(N, 0);
8173
8174 if (N1C && !N1C->isOpaque())
8175 if (SDValue NewSRL = visitShiftByConstant(N))
8176 return NewSRL;
8177
8178 // Attempt to convert a srl of a load into a narrower zero-extending load.
8179 if (SDValue NarrowLoad = ReduceLoadWidth(N))
8180 return NarrowLoad;
8181
8182 // Here is a common situation. We want to optimize:
8183 //
8184 // %a = ...
8185 // %b = and i32 %a, 2
8186 // %c = srl i32 %b, 1
8187 // brcond i32 %c ...
8188 //
8189 // into
8190 //
8191 // %a = ...
8192 // %b = and %a, 2
8193 // %c = setcc eq %b, 0
8194 // brcond %c ...
8195 //
8196 // However when after the source operand of SRL is optimized into AND, the SRL
8197 // itself may not be optimized further. Look for it and add the BRCOND into
8198 // the worklist.
8199 if (N->hasOneUse()) {
8200 SDNode *Use = *N->use_begin();
8201 if (Use->getOpcode() == ISD::BRCOND)
8202 AddToWorklist(Use);
8203 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8204 // Also look pass the truncate.
8205 Use = *Use->use_begin();
8206 if (Use->getOpcode() == ISD::BRCOND)
8207 AddToWorklist(Use);
8208 }
8209 }
8210
8211 return SDValue();
8212}
8213
8214SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8215 EVT VT = N->getValueType(0);
8216 SDValue N0 = N->getOperand(0);
8217 SDValue N1 = N->getOperand(1);
8218 SDValue N2 = N->getOperand(2);
8219 bool IsFSHL = N->getOpcode() == ISD::FSHL;
8220 unsigned BitWidth = VT.getScalarSizeInBits();
8221
8222 // fold (fshl N0, N1, 0) -> N0
8223 // fold (fshr N0, N1, 0) -> N1
8224 if (isPowerOf2_32(BitWidth))
8225 if (DAG.MaskedValueIsZero(
8226 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
8227 return IsFSHL ? N0 : N1;
8228
8229 auto IsUndefOrZero = [](SDValue V) {
8230 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
8231 };
8232
8233 // TODO - support non-uniform vector shift amounts.
8234 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
8235 EVT ShAmtTy = N2.getValueType();
8236
8237 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
8238 if (Cst->getAPIntValue().uge(BitWidth)) {
8239 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
8240 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
8241 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
8242 }
8243
8244 unsigned ShAmt = Cst->getZExtValue();
8245 if (ShAmt == 0)
8246 return IsFSHL ? N0 : N1;
8247
8248 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
8249 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
8250 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
8251 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
8252 if (IsUndefOrZero(N0))
8253 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
8254 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
8255 SDLoc(N), ShAmtTy));
8256 if (IsUndefOrZero(N1))
8257 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
8258 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
8259 SDLoc(N), ShAmtTy));
8260
8261 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8262 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8263 // TODO - bigendian support once we have test coverage.
8264 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
8265 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
8266 !DAG.getDataLayout().isBigEndian()) {
8267 auto *LHS = dyn_cast<LoadSDNode>(N0);
8268 auto *RHS = dyn_cast<LoadSDNode>(N1);
8269 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
8270 LHS->getAddressSpace() == RHS->getAddressSpace() &&
8271 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS)) {
8272 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
8273 SDLoc DL(RHS);
8274 uint64_t PtrOff =
8275 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
8276 unsigned NewAlign = MinAlign(RHS->getAlignment(), PtrOff);
8277 bool Fast = false;
8278 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8279 RHS->getAddressSpace(), NewAlign,
8280 RHS->getMemOperand()->getFlags(), &Fast) &&
8281 Fast) {
8282 SDValue NewPtr =
8283 DAG.getMemBasePlusOffset(RHS->getBasePtr(), PtrOff, DL);
8284 AddToWorklist(NewPtr.getNode());
8285 SDValue Load = DAG.getLoad(
8286 VT, DL, RHS->getChain(), NewPtr,
8287 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8288 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
8289 // Replace the old load's chain with the new load's chain.
8290 WorklistRemover DeadNodes(*this);
8291 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
8292 return Load;
8293 }
8294 }
8295 }
8296 }
8297 }
8298
8299 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
8300 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
8301 // iff We know the shift amount is in range.
8302 // TODO: when is it worth doing SUB(BW, N2) as well?
8303 if (isPowerOf2_32(BitWidth)) {
8304 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
8305 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8306 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
8307 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8308 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
8309 }
8310
8311 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
8312 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
8313 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
8314 // is legal as well we might be better off avoiding non-constant (BW - N2).
8315 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
8316 if (N0 == N1 && hasOperation(RotOpc, VT))
8317 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
8318
8319 // Simplify, based on bits shifted out of N0/N1.
8320 if (SimplifyDemandedBits(SDValue(N, 0)))
8321 return SDValue(N, 0);
8322
8323 return SDValue();
8324}
8325
8326SDValue DAGCombiner::visitABS(SDNode *N) {
8327 SDValue N0 = N->getOperand(0);
8328 EVT VT = N->getValueType(0);
8329
8330 // fold (abs c1) -> c2
8331 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8332 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
8333 // fold (abs (abs x)) -> (abs x)
8334 if (N0.getOpcode() == ISD::ABS)
8335 return N0;
8336 // fold (abs x) -> x iff not-negative
8337 if (DAG.SignBitIsZero(N0))
8338 return N0;
8339 return SDValue();
8340}
8341
8342SDValue DAGCombiner::visitBSWAP(SDNode *N) {
8343 SDValue N0 = N->getOperand(0);
8344 EVT VT = N->getValueType(0);
8345
8346 // fold (bswap c1) -> c2
8347 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8348 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
8349 // fold (bswap (bswap x)) -> x
8350 if (N0.getOpcode() == ISD::BSWAP)
8351 return N0->getOperand(0);
8352 return SDValue();
8353}
8354
8355SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
8356 SDValue N0 = N->getOperand(0);
8357 EVT VT = N->getValueType(0);
8358
8359 // fold (bitreverse c1) -> c2
8360 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8361 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
8362 // fold (bitreverse (bitreverse x)) -> x
8363 if (N0.getOpcode() == ISD::BITREVERSE)
8364 return N0.getOperand(0);
8365 return SDValue();
8366}
8367
8368SDValue DAGCombiner::visitCTLZ(SDNode *N) {
8369 SDValue N0 = N->getOperand(0);
8370 EVT VT = N->getValueType(0);
8371
8372 // fold (ctlz c1) -> c2
8373 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8374 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
8375
8376 // If the value is known never to be zero, switch to the undef version.
8377 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
8378 if (DAG.isKnownNeverZero(N0))
8379 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8380 }
8381
8382 return SDValue();
8383}
8384
8385SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
8386 SDValue N0 = N->getOperand(0);
8387 EVT VT = N->getValueType(0);
8388
8389 // fold (ctlz_zero_undef c1) -> c2
8390 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8391 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8392 return SDValue();
8393}
8394
8395SDValue DAGCombiner::visitCTTZ(SDNode *N) {
8396 SDValue N0 = N->getOperand(0);
8397 EVT VT = N->getValueType(0);
8398
8399 // fold (cttz c1) -> c2
8400 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8401 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
8402
8403 // If the value is known never to be zero, switch to the undef version.
8404 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
8405 if (DAG.isKnownNeverZero(N0))
8406 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8407 }
8408
8409 return SDValue();
8410}
8411
8412SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
8413 SDValue N0 = N->getOperand(0);
8414 EVT VT = N->getValueType(0);
8415
8416 // fold (cttz_zero_undef c1) -> c2
8417 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8418 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8419 return SDValue();
8420}
8421
8422SDValue DAGCombiner::visitCTPOP(SDNode *N) {
8423 SDValue N0 = N->getOperand(0);
8424 EVT VT = N->getValueType(0);
8425
8426 // fold (ctpop c1) -> c2
8427 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8428 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
8429 return SDValue();
8430}
8431
8432// FIXME: This should be checking for no signed zeros on individual operands, as
8433// well as no nans.
8434static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
8435 SDValue RHS,
8436 const TargetLowering &TLI) {
8437 const TargetOptions &Options = DAG.getTarget().Options;
8438 EVT VT = LHS.getValueType();
8439
8440 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
8441 TLI.isProfitableToCombineMinNumMaxNum(VT) &&
8442 DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
8443}
8444
8445/// Generate Min/Max node
8446static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
8447 SDValue RHS, SDValue True, SDValue False,
8448 ISD::CondCode CC, const TargetLowering &TLI,
8449 SelectionDAG &DAG) {
8450 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
8451 return SDValue();
8452
8453 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
8454 switch (CC) {
8455 case ISD::SETOLT:
8456 case ISD::SETOLE:
8457 case ISD::SETLT:
8458 case ISD::SETLE:
8459 case ISD::SETULT:
8460 case ISD::SETULE: {
8461 // Since it's known never nan to get here already, either fminnum or
8462 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
8463 // expanded in terms of it.
8464 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8465 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8466 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8467
8468 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
8469 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8470 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8471 return SDValue();
8472 }
8473 case ISD::SETOGT:
8474 case ISD::SETOGE:
8475 case ISD::SETGT:
8476 case ISD::SETGE:
8477 case ISD::SETUGT:
8478 case ISD::SETUGE: {
8479 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8480 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8481 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8482
8483 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
8484 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8485 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8486 return SDValue();
8487 }
8488 default:
8489 return SDValue();
8490 }
8491}
8492
8493/// If a (v)select has a condition value that is a sign-bit test, try to smear
8494/// the condition operand sign-bit across the value width and use it as a mask.
8495static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
8496 SDValue Cond = N->getOperand(0);
8497 SDValue C1 = N->getOperand(1);
8498 SDValue C2 = N->getOperand(2);
8499 assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&((isConstantOrConstantVector(C1) && isConstantOrConstantVector
(C2) && "Expected select-of-constants") ? static_cast
<void> (0) : __assert_fail ("isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) && \"Expected select-of-constants\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8500, __PRETTY_FUNCTION__))
8500 "Expected select-of-constants")((isConstantOrConstantVector(C1) && isConstantOrConstantVector
(C2) && "Expected select-of-constants") ? static_cast
<void> (0) : __assert_fail ("isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) && \"Expected select-of-constants\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8500, __PRETTY_FUNCTION__))
;
8501
8502 EVT VT = N->getValueType(0);
8503 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
8504 VT != Cond.getOperand(0).getValueType())
8505 return SDValue();
8506
8507 // The inverted-condition + commuted-select variants of these patterns are
8508 // canonicalized to these forms in IR.
8509 SDValue X = Cond.getOperand(0);
8510 SDValue CondC = Cond.getOperand(1);
8511 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
8512 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
8513 isAllOnesOrAllOnesSplat(C2)) {
8514 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
8515 SDLoc DL(N);
8516 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
8517 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
8518 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
8519 }
8520 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
8521 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
8522 SDLoc DL(N);
8523 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
8524 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
8525 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
8526 }
8527 return SDValue();
8528}
8529
8530SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
8531 SDValue Cond = N->getOperand(0);
8532 SDValue N1 = N->getOperand(1);
8533 SDValue N2 = N->getOperand(2);
8534 EVT VT = N->getValueType(0);
8535 EVT CondVT = Cond.getValueType();
8536 SDLoc DL(N);
8537
8538 if (!VT.isInteger())
8539 return SDValue();
8540
8541 auto *C1 = dyn_cast<ConstantSDNode>(N1);
8542 auto *C2 = dyn_cast<ConstantSDNode>(N2);
8543 if (!C1 || !C2)
8544 return SDValue();
8545
8546 // Only do this before legalization to avoid conflicting with target-specific
8547 // transforms in the other direction (create a select from a zext/sext). There
8548 // is also a target-independent combine here in DAGCombiner in the other
8549 // direction for (select Cond, -1, 0) when the condition is not i1.
8550 if (CondVT == MVT::i1 && !LegalOperations) {
8551 if (C1->isNullValue() && C2->isOne()) {
8552 // select Cond, 0, 1 --> zext (!Cond)
8553 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8554 if (VT != MVT::i1)
8555 NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
8556 return NotCond;
8557 }
8558 if (C1->isNullValue() && C2->isAllOnesValue()) {
8559 // select Cond, 0, -1 --> sext (!Cond)
8560 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8561 if (VT != MVT::i1)
8562 NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
8563 return NotCond;
8564 }
8565 if (C1->isOne() && C2->isNullValue()) {
8566 // select Cond, 1, 0 --> zext (Cond)
8567 if (VT != MVT::i1)
8568 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8569 return Cond;
8570 }
8571 if (C1->isAllOnesValue() && C2->isNullValue()) {
8572 // select Cond, -1, 0 --> sext (Cond)
8573 if (VT != MVT::i1)
8574 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8575 return Cond;
8576 }
8577
8578 // Use a target hook because some targets may prefer to transform in the
8579 // other direction.
8580 if (TLI.convertSelectOfConstantsToMath(VT)) {
8581 // For any constants that differ by 1, we can transform the select into an
8582 // extend and add.
8583 const APInt &C1Val = C1->getAPIntValue();
8584 const APInt &C2Val = C2->getAPIntValue();
8585 if (C1Val - 1 == C2Val) {
8586 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
8587 if (VT != MVT::i1)
8588 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8589 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8590 }
8591 if (C1Val + 1 == C2Val) {
8592 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
8593 if (VT != MVT::i1)
8594 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8595 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8596 }
8597
8598 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
8599 if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
8600 if (VT != MVT::i1)
8601 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8602 SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
8603 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
8604 }
8605
8606 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
8607 return V;
8608 }
8609
8610 return SDValue();
8611 }
8612
8613 // fold (select Cond, 0, 1) -> (xor Cond, 1)
8614 // We can't do this reliably if integer based booleans have different contents
8615 // to floating point based booleans. This is because we can't tell whether we
8616 // have an integer-based boolean or a floating-point-based boolean unless we
8617 // can find the SETCC that produced it and inspect its operands. This is
8618 // fairly easy if C is the SETCC node, but it can potentially be
8619 // undiscoverable (or not reasonably discoverable). For example, it could be
8620 // in another basic block or it could require searching a complicated
8621 // expression.
8622 if (CondVT.isInteger() &&
8623 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
8624 TargetLowering::ZeroOrOneBooleanContent &&
8625 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
8626 TargetLowering::ZeroOrOneBooleanContent &&
8627 C1->isNullValue() && C2->isOne()) {
8628 SDValue NotCond =
8629 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
8630 if (VT.bitsEq(CondVT))
8631 return NotCond;
8632 return DAG.getZExtOrTrunc(NotCond, DL, VT);
8633 }
8634
8635 return SDValue();
8636}
8637
8638SDValue DAGCombiner::visitSELECT(SDNode *N) {
8639 SDValue N0 = N->getOperand(0);
8640 SDValue N1 = N->getOperand(1);
8641 SDValue N2 = N->getOperand(2);
8642 EVT VT = N->getValueType(0);
8643 EVT VT0 = N0.getValueType();
8644 SDLoc DL(N);
8645 SDNodeFlags Flags = N->getFlags();
8646
8647 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8648 return V;
8649
8650 // fold (select X, X, Y) -> (or X, Y)
8651 // fold (select X, 1, Y) -> (or C, Y)
8652 if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
8653 return DAG.getNode(ISD::OR, DL, VT, N0, N2);
8654
8655 if (SDValue V = foldSelectOfConstants(N))
8656 return V;
8657
8658 // fold (select C, 0, X) -> (and (not C), X)
8659 if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
8660 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8661 AddToWorklist(NOTNode.getNode());
8662 return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
8663 }
8664 // fold (select C, X, 1) -> (or (not C), X)
8665 if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
8666 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8667 AddToWorklist(NOTNode.getNode());
8668 return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
8669 }
8670 // fold (select X, Y, X) -> (and X, Y)
8671 // fold (select X, Y, 0) -> (and X, Y)
8672 if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
8673 return DAG.getNode(ISD::AND, DL, VT, N0, N1);
8674
8675 // If we can fold this based on the true/false value, do so.
8676 if (SimplifySelectOps(N, N1, N2))
8677 return SDValue(N, 0); // Don't revisit N.
8678
8679 if (VT0 == MVT::i1) {
8680 // The code in this block deals with the following 2 equivalences:
8681 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
8682 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
8683 // The target can specify its preferred form with the
8684 // shouldNormalizeToSelectSequence() callback. However we always transform
8685 // to the right anyway if we find the inner select exists in the DAG anyway
8686 // and we always transform to the left side if we know that we can further
8687 // optimize the combination of the conditions.
8688 bool normalizeToSequence =
8689 TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
8690 // select (and Cond0, Cond1), X, Y
8691 // -> select Cond0, (select Cond1, X, Y), Y
8692 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
8693 SDValue Cond0 = N0->getOperand(0);
8694 SDValue Cond1 = N0->getOperand(1);
8695 SDValue InnerSelect =
8696 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
8697 if (normalizeToSequence || !InnerSelect.use_empty())
8698 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
8699 InnerSelect, N2, Flags);
8700 // Cleanup on failure.
8701 if (InnerSelect.use_empty())
8702 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8703 }
8704 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
8705 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
8706 SDValue Cond0 = N0->getOperand(0);
8707 SDValue Cond1 = N0->getOperand(1);
8708 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
8709 Cond1, N1, N2, Flags);
8710 if (normalizeToSequence || !InnerSelect.use_empty())
8711 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
8712 InnerSelect, Flags);
8713 // Cleanup on failure.
8714 if (InnerSelect.use_empty())
8715 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8716 }
8717
8718 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
8719 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
8720 SDValue N1_0 = N1->getOperand(0);
8721 SDValue N1_1 = N1->getOperand(1);
8722 SDValue N1_2 = N1->getOperand(2);
8723 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
8724 // Create the actual and node if we can generate good code for it.
8725 if (!normalizeToSequence) {
8726 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
8727 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
8728 N2, Flags);
8729 }
8730 // Otherwise see if we can optimize the "and" to a better pattern.
8731 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
8732 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
8733 N2, Flags);
8734 }
8735 }
8736 }
8737 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
8738 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
8739 SDValue N2_0 = N2->getOperand(0);
8740 SDValue N2_1 = N2->getOperand(1);
8741 SDValue N2_2 = N2->getOperand(2);
8742 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
8743 // Create the actual or node if we can generate good code for it.
8744 if (!normalizeToSequence) {
8745 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
8746 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
8747 N2_2, Flags);
8748 }
8749 // Otherwise see if we can optimize to a better pattern.
8750 if (SDValue Combined = visitORLike(N0, N2_0, N))
8751 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
8752 N2_2, Flags);
8753 }
8754 }
8755 }
8756
8757 // select (not Cond), N1, N2 -> select Cond, N2, N1
8758 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
8759 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
8760 SelectOp->setFlags(Flags);
8761 return SelectOp;
8762 }
8763
8764 // Fold selects based on a setcc into other things, such as min/max/abs.
8765 if (N0.getOpcode() == ISD::SETCC) {
8766 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
8767 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8768
8769 // select (fcmp lt x, y), x, y -> fminnum x, y
8770 // select (fcmp gt x, y), x, y -> fmaxnum x, y
8771 //
8772 // This is OK if we don't care what happens if either operand is a NaN.
8773 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
8774 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
8775 CC, TLI, DAG))
8776 return FMinMax;
8777
8778 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
8779 // This is conservatively limited to pre-legal-operations to give targets
8780 // a chance to reverse the transform if they want to do that. Also, it is
8781 // unlikely that the pattern would be formed late, so it's probably not
8782 // worth going through the other checks.
8783 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
8784 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
8785 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
8786 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
8787 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
8788 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
8789 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
8790 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
8791 //
8792 // The IR equivalent of this transform would have this form:
8793 // %a = add %x, C
8794 // %c = icmp ugt %x, ~C
8795 // %r = select %c, -1, %a
8796 // =>
8797 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
8798 // %u0 = extractvalue %u, 0
8799 // %u1 = extractvalue %u, 1
8800 // %r = select %u1, -1, %u0
8801 SDVTList VTs = DAG.getVTList(VT, VT0);
8802 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
8803 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
8804 }
8805 }
8806
8807 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
8808 (!LegalOperations &&
8809 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
8810 // Any flags available in a select/setcc fold will be on the setcc as they
8811 // migrated from fcmp
8812 Flags = N0.getNode()->getFlags();
8813 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
8814 N2, N0.getOperand(2));
8815 SelectNode->setFlags(Flags);
8816 return SelectNode;
8817 }
8818
8819 return SimplifySelect(DL, N0, N1, N2);
8820 }
8821
8822 return SDValue();
8823}
8824
8825// This function assumes all the vselect's arguments are CONCAT_VECTOR
8826// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
8827static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
8828 SDLoc DL(N);
8829 SDValue Cond = N->getOperand(0);
8830 SDValue LHS = N->getOperand(1);
8831 SDValue RHS = N->getOperand(2);
8832 EVT VT = N->getValueType(0);
8833 int NumElems = VT.getVectorNumElements();
8834 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&((LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode
() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::
BUILD_VECTOR) ? static_cast<void> (0) : __assert_fail (
"LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8836, __PRETTY_FUNCTION__))
8835 RHS.getOpcode() == ISD::CONCAT_VECTORS &&((LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode
() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::
BUILD_VECTOR) ? static_cast<void> (0) : __assert_fail (
"LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8836, __PRETTY_FUNCTION__))
8836 Cond.getOpcode() == ISD::BUILD_VECTOR)((LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode
() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::
BUILD_VECTOR) ? static_cast<void> (0) : __assert_fail (
"LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8836, __PRETTY_FUNCTION__))
;
8837
8838 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
8839 // binary ones here.
8840 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
8841 return SDValue();
8842
8843 // We're sure we have an even number of elements due to the
8844 // concat_vectors we have as arguments to vselect.
8845 // Skip BV elements until we find one that's not an UNDEF
8846 // After we find an UNDEF element, keep looping until we get to half the
8847 // length of the BV and see if all the non-undef nodes are the same.
8848 ConstantSDNode *BottomHalf = nullptr;
8849 for (int i = 0; i < NumElems / 2; ++i) {
8850 if (Cond->getOperand(i)->isUndef())
8851 continue;
8852
8853 if (BottomHalf == nullptr)
8854 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
8855 else if (Cond->getOperand(i).getNode() != BottomHalf)
8856 return SDValue();
8857 }
8858
8859 // Do the same for the second half of the BuildVector
8860 ConstantSDNode *TopHalf = nullptr;
8861 for (int i = NumElems / 2; i < NumElems; ++i) {
8862 if (Cond->getOperand(i)->isUndef())
8863 continue;
8864
8865 if (TopHalf == nullptr)
8866 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
8867 else if (Cond->getOperand(i).getNode() != TopHalf)
8868 return SDValue();
8869 }
8870
8871 assert(TopHalf && BottomHalf &&((TopHalf && BottomHalf && "One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? static_cast<void> (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8873, __PRETTY_FUNCTION__))
8872 "One half of the selector was all UNDEFs and the other was all the "((TopHalf && BottomHalf && "One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? static_cast<void> (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8873, __PRETTY_FUNCTION__))
8873 "same value. This should have been addressed before this function.")((TopHalf && BottomHalf && "One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function."
) ? static_cast<void> (0) : __assert_fail ("TopHalf && BottomHalf && \"One half of the selector was all UNDEFs and the other was all the \" \"same value. This should have been addressed before this function.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 8873, __PRETTY_FUNCTION__))
;
8874 return DAG.getNode(
8875 ISD::CONCAT_VECTORS, DL, VT,
8876 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
8877 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
8878}
8879
8880SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
8881 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
8882 SDValue Mask = MSC->getMask();
8883 SDValue Chain = MSC->getChain();
8884 SDLoc DL(N);
8885
8886 // Zap scatters with a zero mask.
8887 if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8888 return Chain;
8889
8890 return SDValue();
8891}
8892
8893SDValue DAGCombiner::visitMSTORE(SDNode *N) {
8894 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
8895 SDValue Mask = MST->getMask();
8896 SDValue Chain = MST->getChain();
8897 SDLoc DL(N);
8898
8899 // Zap masked stores with a zero mask.
8900 if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8901 return Chain;
8902
8903 // Try transforming N to an indexed store.
8904 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
8905 return SDValue(N, 0);
8906
8907 return SDValue();
8908}
8909
8910SDValue DAGCombiner::visitMGATHER(SDNode *N) {
8911 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
8912 SDValue Mask = MGT->getMask();
8913 SDLoc DL(N);
8914
8915 // Zap gathers with a zero mask.
8916 if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8917 return CombineTo(N, MGT->getPassThru(), MGT->getChain());
8918
8919 return SDValue();
8920}
8921
8922SDValue DAGCombiner::visitMLOAD(SDNode *N) {
8923 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
8924 SDValue Mask = MLD->getMask();
8925 SDLoc DL(N);
8926
8927 // Zap masked loads with a zero mask.
8928 if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8929 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
8930
8931 // Try transforming N to an indexed load.
8932 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
8933 return SDValue(N, 0);
8934
8935 return SDValue();
8936}
8937
8938/// A vector select of 2 constant vectors can be simplified to math/logic to
8939/// avoid a variable select instruction and possibly avoid constant loads.
8940SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
8941 SDValue Cond = N->getOperand(0);
8942 SDValue N1 = N->getOperand(1);
8943 SDValue N2 = N->getOperand(2);
8944 EVT VT = N->getValueType(0);
8945 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
8946 !TLI.convertSelectOfConstantsToMath(VT) ||
8947 !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
8948 !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
8949 return SDValue();
8950
8951 // Check if we can use the condition value to increment/decrement a single
8952 // constant value. This simplifies a select to an add and removes a constant
8953 // load/materialization from the general case.
8954 bool AllAddOne = true;
8955 bool AllSubOne = true;
8956 unsigned Elts = VT.getVectorNumElements();
8957 for (unsigned i = 0; i != Elts; ++i) {
8958 SDValue N1Elt = N1.getOperand(i);
8959 SDValue N2Elt = N2.getOperand(i);
8960 if (N1Elt.isUndef() || N2Elt.isUndef())
8961 continue;
8962
8963 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
8964 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
8965 if (C1 != C2 + 1)
8966 AllAddOne = false;
8967 if (C1 != C2 - 1)
8968 AllSubOne = false;
8969 }
8970
8971 // Further simplifications for the extra-special cases where the constants are
8972 // all 0 or all -1 should be implemented as folds of these patterns.
8973 SDLoc DL(N);
8974 if (AllAddOne || AllSubOne) {
8975 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
8976 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
8977 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
8978 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
8979 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
8980 }
8981
8982 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
8983 APInt Pow2C;
8984 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
8985 isNullOrNullSplat(N2)) {
8986 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
8987 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
8988 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
8989 }
8990
8991 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
8992 return V;
8993
8994 // The general case for select-of-constants:
8995 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
8996 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
8997 // leave that to a machine-specific pass.
8998 return SDValue();
8999}
9000
9001SDValue DAGCombiner::visitVSELECT(SDNode *N) {
9002 SDValue N0 = N->getOperand(0);
9003 SDValue N1 = N->getOperand(1);
9004 SDValue N2 = N->getOperand(2);
9005 EVT VT = N->getValueType(0);
9006 SDLoc DL(N);
9007
9008 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9009 return V;
9010
9011 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
9012 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
9013 return DAG.getSelect(DL, VT, F, N2, N1);
9014
9015 // Canonicalize integer abs.
9016 // vselect (setg[te] X, 0), X, -X ->
9017 // vselect (setgt X, -1), X, -X ->
9018 // vselect (setl[te] X, 0), -X, X ->
9019 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
9020 if (N0.getOpcode() == ISD::SETCC) {
9021 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
9022 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9023 bool isAbs = false;
9024 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
9025
9026 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
9027 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
9028 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
9029 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
9030 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
9031 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
9032 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
9033
9034 if (isAbs) {
9035 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
9036 return DAG.getNode(ISD::ABS, DL, VT, LHS);
9037
9038 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
9039 DAG.getConstant(VT.getScalarSizeInBits() - 1,
9040 DL, getShiftAmountTy(VT)));
9041 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
9042 AddToWorklist(Shift.getNode());
9043 AddToWorklist(Add.getNode());
9044 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
9045 }
9046
9047 // vselect x, y (fcmp lt x, y) -> fminnum x, y
9048 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
9049 //
9050 // This is OK if we don't care about what happens if either operand is a
9051 // NaN.
9052 //
9053 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
9054 if (SDValue FMinMax =
9055 combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
9056 return FMinMax;
9057 }
9058
9059 // If this select has a condition (setcc) with narrower operands than the
9060 // select, try to widen the compare to match the select width.
9061 // TODO: This should be extended to handle any constant.
9062 // TODO: This could be extended to handle non-loading patterns, but that
9063 // requires thorough testing to avoid regressions.
9064 if (isNullOrNullSplat(RHS)) {
9065 EVT NarrowVT = LHS.getValueType();
9066 EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
9067 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
9068 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
9069 unsigned WideWidth = WideVT.getScalarSizeInBits();
9070 bool IsSigned = isSignedIntSetCC(CC);
9071 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9072 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
9073 SetCCWidth != 1 && SetCCWidth < WideWidth &&
9074 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
9075 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
9076 // Both compare operands can be widened for free. The LHS can use an
9077 // extended load, and the RHS is a constant:
9078 // vselect (ext (setcc load(X), C)), N1, N2 -->
9079 // vselect (setcc extload(X), C'), N1, N2
9080 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9081 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
9082 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
9083 EVT WideSetCCVT = getSetCCResultType(WideVT);
9084 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
9085 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
9086 }
9087 }
9088 }
9089
9090 if (SimplifySelectOps(N, N1, N2))
9091 return SDValue(N, 0); // Don't revisit N.
9092
9093 // Fold (vselect (build_vector all_ones), N1, N2) -> N1
9094 if (ISD::isBuildVectorAllOnes(N0.getNode()))
9095 return N1;
9096 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
9097 if (ISD::isBuildVectorAllZeros(N0.getNode()))
9098 return N2;
9099
9100 // The ConvertSelectToConcatVector function is assuming both the above
9101 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
9102 // and addressed.
9103 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
9104 N2.getOpcode() == ISD::CONCAT_VECTORS &&
9105 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
9106 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
9107 return CV;
9108 }
9109
9110 if (SDValue V = foldVSelectOfConstants(N))
9111 return V;
9112
9113 return SDValue();
9114}
9115
9116SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
9117 SDValue N0 = N->getOperand(0);
9118 SDValue N1 = N->getOperand(1);
9119 SDValue N2 = N->getOperand(2);
9120 SDValue N3 = N->getOperand(3);
9121 SDValue N4 = N->getOperand(4);
9122 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
9123
9124 // fold select_cc lhs, rhs, x, x, cc -> x
9125 if (N2 == N3)
9126 return N2;
9127
9128 // Determine if the condition we're dealing with is constant
9129 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
9130 CC, SDLoc(N), false)) {
9131 AddToWorklist(SCC.getNode());
9132
9133 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
9134 if (!SCCC->isNullValue())
9135 return N2; // cond always true -> true val
9136 else
9137 return N3; // cond always false -> false val
9138 } else if (SCC->isUndef()) {
9139 // When the condition is UNDEF, just return the first operand. This is
9140 // coherent the DAG creation, no setcc node is created in this case
9141 return N2;
9142 } else if (SCC.getOpcode() == ISD::SETCC) {
9143 // Fold to a simpler select_cc
9144 SDValue SelectOp = DAG.getNode(
9145 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
9146 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
9147 SelectOp->setFlags(SCC->getFlags());
9148 return SelectOp;
9149 }
9150 }
9151
9152 // If we can fold this based on the true/false value, do so.
9153 if (SimplifySelectOps(N, N2, N3))
9154 return SDValue(N, 0); // Don't revisit N.
9155
9156 // fold select_cc into other things, such as min/max/abs
9157 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
9158}
9159
9160SDValue DAGCombiner::visitSETCC(SDNode *N) {
9161 // setcc is very commonly used as an argument to brcond. This pattern
9162 // also lend itself to numerous combines and, as a result, it is desired
9163 // we keep the argument to a brcond as a setcc as much as possible.
9164 bool PreferSetCC =
9165 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
9166
9167 SDValue Combined = SimplifySetCC(
9168 N->getValueType(0), N->getOperand(0), N->getOperand(1),
9169 cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
9170
9171 if (!Combined)
9172 return SDValue();
9173
9174 // If we prefer to have a setcc, and we don't, we'll try our best to
9175 // recreate one using rebuildSetCC.
9176 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
9177 SDValue NewSetCC = rebuildSetCC(Combined);
9178
9179 // We don't have anything interesting to combine to.
9180 if (NewSetCC.getNode() == N)
9181 return SDValue();
9182
9183 if (NewSetCC)
9184 return NewSetCC;
9185 }
9186
9187 return Combined;
9188}
9189
9190SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
9191 SDValue LHS = N->getOperand(0);
9192 SDValue RHS = N->getOperand(1);
9193 SDValue Carry = N->getOperand(2);
9194 SDValue Cond = N->getOperand(3);
9195
9196 // If Carry is false, fold to a regular SETCC.
9197 if (isNullConstant(Carry))
9198 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
9199
9200 return SDValue();
9201}
9202
9203/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
9204/// a build_vector of constants.
9205/// This function is called by the DAGCombiner when visiting sext/zext/aext
9206/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
9207/// Vector extends are not folded if operations are legal; this is to
9208/// avoid introducing illegal build_vector dag nodes.
9209static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
9210 SelectionDAG &DAG, bool LegalTypes) {
9211 unsigned Opcode = N->getOpcode();
9212 SDValue N0 = N->getOperand(0);
9213 EVT VT = N->getValueType(0);
9214 SDLoc DL(N);
9215
9216 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||(((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG
|| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"
) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9219, __PRETTY_FUNCTION__))
9217 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||(((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG
|| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"
) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9219, __PRETTY_FUNCTION__))
9218 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)(((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG
|| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"
) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9219, __PRETTY_FUNCTION__))
9219 && "Expected EXTEND dag node in input!")(((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG
|| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"
) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && \"Expected EXTEND dag node in input!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9219, __PRETTY_FUNCTION__))
;
9220
9221 // fold (sext c1) -> c1
9222 // fold (zext c1) -> c1
9223 // fold (aext c1) -> c1
9224 if (isa<ConstantSDNode>(N0))
9225 return DAG.getNode(Opcode, DL, VT, N0);
9226
9227 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9228 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
9229 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9230 if (N0->getOpcode() == ISD::SELECT) {
9231 SDValue Op1 = N0->getOperand(1);
9232 SDValue Op2 = N0->getOperand(2);
9233 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
9234 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
9235 // For any_extend, choose sign extension of the constants to allow a
9236 // possible further transform to sign_extend_inreg.i.e.
9237 //
9238 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
9239 // t2: i64 = any_extend t1
9240 // -->
9241 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
9242 // -->
9243 // t4: i64 = sign_extend_inreg t3
9244 unsigned FoldOpc = Opcode;
9245 if (FoldOpc == ISD::ANY_EXTEND)
9246 FoldOpc = ISD::SIGN_EXTEND;
9247 return DAG.getSelect(DL, VT, N0->getOperand(0),
9248 DAG.getNode(FoldOpc, DL, VT, Op1),
9249 DAG.getNode(FoldOpc, DL, VT, Op2));
9250 }
9251 }
9252
9253 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
9254 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
9255 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
9256 EVT SVT = VT.getScalarType();
9257 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
9258 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
9259 return SDValue();
9260
9261 // We can fold this node into a build_vector.
9262 unsigned VTBits = SVT.getSizeInBits();
9263 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
9264 SmallVector<SDValue, 8> Elts;
9265 unsigned NumElts = VT.getVectorNumElements();
9266
9267 // For zero-extensions, UNDEF elements still guarantee to have the upper
9268 // bits set to zero.
9269 bool IsZext =
9270 Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
9271
9272 for (unsigned i = 0; i != NumElts; ++i) {
9273 SDValue Op = N0.getOperand(i);
9274 if (Op.isUndef()) {
9275 Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
9276 continue;
9277 }
9278
9279 SDLoc DL(Op);
9280 // Get the constant value and if needed trunc it to the size of the type.
9281 // Nodes like build_vector might have constants wider than the scalar type.
9282 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
9283 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
9284 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
9285 else
9286 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
9287 }
9288
9289 return DAG.getBuildVector(VT, DL, Elts);
9290}
9291
9292// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
9293// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
9294// transformation. Returns true if extension are possible and the above
9295// mentioned transformation is profitable.
9296static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
9297 unsigned ExtOpc,
9298 SmallVectorImpl<SDNode *> &ExtendNodes,
9299 const TargetLowering &TLI) {
9300 bool HasCopyToRegUses = false;
9301 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
9302 for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
9303 UE = N0.getNode()->use_end();
9304 UI != UE; ++UI) {
9305 SDNode *User = *UI;
9306 if (User == N)
9307 continue;
9308 if (UI.getUse().getResNo() != N0.getResNo())
9309 continue;
9310 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
9311 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
9312 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
9313 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
9314 // Sign bits will be lost after a zext.
9315 return false;
9316 bool Add = false;
9317 for (unsigned i = 0; i != 2; ++i) {
9318 SDValue UseOp = User->getOperand(i);
9319 if (UseOp == N0)
9320 continue;
9321 if (!isa<ConstantSDNode>(UseOp))
9322 return false;
9323 Add = true;
9324 }
9325 if (Add)
9326 ExtendNodes.push_back(User);
9327 continue;
9328 }
9329 // If truncates aren't free and there are users we can't
9330 // extend, it isn't worthwhile.
9331 if (!isTruncFree)
9332 return false;
9333 // Remember if this value is live-out.
9334 if (User->getOpcode() == ISD::CopyToReg)
9335 HasCopyToRegUses = true;
9336 }
9337
9338 if (HasCopyToRegUses) {
9339 bool BothLiveOut = false;
9340 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
9341 UI != UE; ++UI) {
9342 SDUse &Use = UI.getUse();
9343 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
9344 BothLiveOut = true;
9345 break;
9346 }
9347 }
9348 if (BothLiveOut)
9349 // Both unextended and extended values are live out. There had better be
9350 // a good reason for the transformation.
9351 return ExtendNodes.size();
9352 }
9353 return true;
9354}
9355
9356void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
9357 SDValue OrigLoad, SDValue ExtLoad,
9358 ISD::NodeType ExtType) {
9359 // Extend SetCC uses if necessary.
9360 SDLoc DL(ExtLoad);
9361 for (SDNode *SetCC : SetCCs) {
9362 SmallVector<SDValue, 4> Ops;
9363
9364 for (unsigned j = 0; j != 2; ++j) {
9365 SDValue SOp = SetCC->getOperand(j);
9366 if (SOp == OrigLoad)
9367 Ops.push_back(ExtLoad);
9368 else
9369 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
9370 }
9371
9372 Ops.push_back(SetCC->getOperand(2));
9373 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
9374 }
9375}
9376
9377// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
9378SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
9379 SDValue N0 = N->getOperand(0);
9380 EVT DstVT = N->getValueType(0);
9381 EVT SrcVT = N0.getValueType();
9382
9383 assert((N->getOpcode() == ISD::SIGN_EXTEND ||(((N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode()
== ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9385, __PRETTY_FUNCTION__))
9384 N->getOpcode() == ISD::ZERO_EXTEND) &&(((N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode()
== ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9385, __PRETTY_FUNCTION__))
9385 "Unexpected node type (not an extend)!")(((N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode()
== ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Unexpected node type (not an extend)!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9385, __PRETTY_FUNCTION__))
;
9386
9387 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
9388 // For example, on a target with legal v4i32, but illegal v8i32, turn:
9389 // (v8i32 (sext (v8i16 (load x))))
9390 // into:
9391 // (v8i32 (concat_vectors (v4i32 (sextload x)),
9392 // (v4i32 (sextload (x + 16)))))
9393 // Where uses of the original load, i.e.:
9394 // (v8i16 (load x))
9395 // are replaced with:
9396 // (v8i16 (truncate
9397 // (v8i32 (concat_vectors (v4i32 (sextload x)),
9398 // (v4i32 (sextload (x + 16)))))))
9399 //
9400 // This combine is only applicable to illegal, but splittable, vectors.
9401 // All legal types, and illegal non-vector types, are handled elsewhere.
9402 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
9403 //
9404 if (N0->getOpcode() != ISD::LOAD)
9405 return SDValue();
9406
9407 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9408
9409 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
9410 !N0.hasOneUse() || !LN0->isSimple() ||
9411 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
9412 !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9413 return SDValue();
9414
9415 SmallVector<SDNode *, 4> SetCCs;
9416 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
9417 return SDValue();
9418
9419 ISD::LoadExtType ExtType =
9420 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9421
9422 // Try to split the vector types to get down to legal types.
9423 EVT SplitSrcVT = SrcVT;
9424 EVT SplitDstVT = DstVT;
9425 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
9426 SplitSrcVT.getVectorNumElements() > 1) {
9427 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
9428 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
9429 }
9430
9431 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
9432 return SDValue();
9433
9434 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type")((!DstVT.isScalableVector() && "Unexpected scalable vector type"
) ? static_cast<void> (0) : __assert_fail ("!DstVT.isScalableVector() && \"Unexpected scalable vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9434, __PRETTY_FUNCTION__))
;
9435
9436 SDLoc DL(N);
9437 const unsigned NumSplits =
9438 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
9439 const unsigned Stride = SplitSrcVT.getStoreSize();
9440 SmallVector<SDValue, 4> Loads;
9441 SmallVector<SDValue, 4> Chains;
9442
9443 SDValue BasePtr = LN0->getBasePtr();
9444 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
9445 const unsigned Offset = Idx * Stride;
9446 const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
9447
9448 SDValue SplitLoad = DAG.getExtLoad(
9449 ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
9450 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
9451 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9452
9453 BasePtr = DAG.getMemBasePlusOffset(BasePtr, Stride, DL);
9454
9455 Loads.push_back(SplitLoad.getValue(0));
9456 Chains.push_back(SplitLoad.getValue(1));
9457 }
9458
9459 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
9460 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
9461
9462 // Simplify TF.
9463 AddToWorklist(NewChain.getNode());
9464
9465 CombineTo(N, NewValue);
9466
9467 // Replace uses of the original load (before extension)
9468 // with a truncate of the concatenated sextloaded vectors.
9469 SDValue Trunc =
9470 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
9471 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
9472 CombineTo(N0.getNode(), Trunc, NewChain);
9473 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9474}
9475
9476// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9477// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9478SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
9479 assert(N->getOpcode() == ISD::ZERO_EXTEND)((N->getOpcode() == ISD::ZERO_EXTEND) ? static_cast<void
> (0) : __assert_fail ("N->getOpcode() == ISD::ZERO_EXTEND"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9479, __PRETTY_FUNCTION__))
;
9480 EVT VT = N->getValueType(0);
9481 EVT OrigVT = N->getOperand(0).getValueType();
9482 if (TLI.isZExtFree(OrigVT, VT))
9483 return SDValue();
9484
9485 // and/or/xor
9486 SDValue N0 = N->getOperand(0);
9487 if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9488 N0.getOpcode() == ISD::XOR) ||
9489 N0.getOperand(1).getOpcode() != ISD::Constant ||
9490 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
9491 return SDValue();
9492
9493 // shl/shr
9494 SDValue N1 = N0->getOperand(0);
9495 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
9496 N1.getOperand(1).getOpcode() != ISD::Constant ||
9497 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
9498 return SDValue();
9499
9500 // load
9501 if (!isa<LoadSDNode>(N1.getOperand(0)))
9502 return SDValue();
9503 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
9504 EVT MemVT = Load->getMemoryVT();
9505 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
9506 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
9507 return SDValue();
9508
9509
9510 // If the shift op is SHL, the logic op must be AND, otherwise the result
9511 // will be wrong.
9512 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
9513 return SDValue();
9514
9515 if (!N0.hasOneUse() || !N1.hasOneUse())
9516 return SDValue();
9517
9518 SmallVector<SDNode*, 4> SetCCs;
9519 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
9520 ISD::ZERO_EXTEND, SetCCs, TLI))
9521 return SDValue();
9522
9523 // Actually do the transformation.
9524 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
9525 Load->getChain(), Load->getBasePtr(),
9526 Load->getMemoryVT(), Load->getMemOperand());
9527
9528 SDLoc DL1(N1);
9529 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
9530 N1.getOperand(1));
9531
9532 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
9533 SDLoc DL0(N0);
9534 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
9535 DAG.getConstant(Mask, DL0, VT));
9536
9537 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9538 CombineTo(N, And);
9539 if (SDValue(Load, 0).hasOneUse()) {
9540 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
9541 } else {
9542 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
9543 Load->getValueType(0), ExtLoad);
9544 CombineTo(Load, Trunc, ExtLoad.getValue(1));
9545 }
9546
9547 // N0 is dead at this point.
9548 recursivelyDeleteUnusedNodes(N0.getNode());
9549
9550 return SDValue(N,0); // Return N so it doesn't get rechecked!
9551}
9552
9553/// If we're narrowing or widening the result of a vector select and the final
9554/// size is the same size as a setcc (compare) feeding the select, then try to
9555/// apply the cast operation to the select's operands because matching vector
9556/// sizes for a select condition and other operands should be more efficient.
9557SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
9558 unsigned CastOpcode = Cast->getOpcode();
9559 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||(((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND
|| CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND
|| CastOpcode == ISD::FP_ROUND) && "Unexpected opcode for vector select narrowing/widening"
) ? static_cast<void> (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9562, __PRETTY_FUNCTION__))
9560 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||(((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND
|| CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND
|| CastOpcode == ISD::FP_ROUND) && "Unexpected opcode for vector select narrowing/widening"
) ? static_cast<void> (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9562, __PRETTY_FUNCTION__))
9561 CastOpcode == ISD::FP_ROUND) &&(((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND
|| CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND
|| CastOpcode == ISD::FP_ROUND) && "Unexpected opcode for vector select narrowing/widening"
) ? static_cast<void> (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9562, __PRETTY_FUNCTION__))
9562 "Unexpected opcode for vector select narrowing/widening")(((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND
|| CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND
|| CastOpcode == ISD::FP_ROUND) && "Unexpected opcode for vector select narrowing/widening"
) ? static_cast<void> (0) : __assert_fail ("(CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || CastOpcode == ISD::FP_ROUND) && \"Unexpected opcode for vector select narrowing/widening\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9562, __PRETTY_FUNCTION__))
;
9563
9564 // We only do this transform before legal ops because the pattern may be
9565 // obfuscated by target-specific operations after legalization. Do not create
9566 // an illegal select op, however, because that may be difficult to lower.
9567 EVT VT = Cast->getValueType(0);
9568 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
9569 return SDValue();
9570
9571 SDValue VSel = Cast->getOperand(0);
9572 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
9573 VSel.getOperand(0).getOpcode() != ISD::SETCC)
9574 return SDValue();
9575
9576 // Does the setcc have the same vector size as the casted select?
9577 SDValue SetCC = VSel.getOperand(0);
9578 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
9579 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
9580 return SDValue();
9581
9582 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
9583 SDValue A = VSel.getOperand(1);
9584 SDValue B = VSel.getOperand(2);
9585 SDValue CastA, CastB;
9586 SDLoc DL(Cast);
9587 if (CastOpcode == ISD::FP_ROUND) {
9588 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
9589 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
9590 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
9591 } else {
9592 CastA = DAG.getNode(CastOpcode, DL, VT, A);
9593 CastB = DAG.getNode(CastOpcode, DL, VT, B);
9594 }
9595 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
9596}
9597
9598// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9599// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9600static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
9601 const TargetLowering &TLI, EVT VT,
9602 bool LegalOperations, SDNode *N,
9603 SDValue N0, ISD::LoadExtType ExtLoadType) {
9604 SDNode *N0Node = N0.getNode();
9605 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
9606 : ISD::isZEXTLoad(N0Node);
9607 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
9608 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
9609 return SDValue();
9610
9611 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9612 EVT MemVT = LN0->getMemoryVT();
9613 if ((LegalOperations || !LN0->isSimple() ||
9614 VT.isVector()) &&
9615 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
9616 return SDValue();
9617
9618 SDValue ExtLoad =
9619 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9620 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
9621 Combiner.CombineTo(N, ExtLoad);
9622 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9623 if (LN0->use_empty())
9624 Combiner.recursivelyDeleteUnusedNodes(LN0);
9625 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9626}
9627
9628// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9629// Only generate vector extloads when 1) they're legal, and 2) they are
9630// deemed desirable by the target.
9631static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
9632 const TargetLowering &TLI, EVT VT,
9633 bool LegalOperations, SDNode *N, SDValue N0,
9634 ISD::LoadExtType ExtLoadType,
9635 ISD::NodeType ExtOpc) {
9636 if (!ISD::isNON_EXTLoad(N0.getNode()) ||
9637 !ISD::isUNINDEXEDLoad(N0.getNode()) ||
9638 ((LegalOperations || VT.isVector() ||
9639 !cast<LoadSDNode>(N0)->isSimple()) &&
9640 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
9641 return {};
9642
9643 bool DoXform = true;
9644 SmallVector<SDNode *, 4> SetCCs;
9645 if (!N0.hasOneUse())
9646 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
9647 if (VT.isVector())
9648 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
9649 if (!DoXform)
9650 return {};
9651
9652 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9653 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9654 LN0->getBasePtr(), N0.getValueType(),
9655 LN0->getMemOperand());
9656 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
9657 // If the load value is used only by N, replace it via CombineTo N.
9658 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
9659 Combiner.CombineTo(N, ExtLoad);
9660 if (NoReplaceTrunc) {
9661 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9662 Combiner.recursivelyDeleteUnusedNodes(LN0);
9663 } else {
9664 SDValue Trunc =
9665 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
9666 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9667 }
9668 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9669}
9670
9671static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
9672 const TargetLowering &TLI, EVT VT,
9673 SDNode *N, SDValue N0,
9674 ISD::LoadExtType ExtLoadType,
9675 ISD::NodeType ExtOpc) {
9676 if (!N0.hasOneUse())
9677 return SDValue();
9678
9679 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
9680 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
9681 return SDValue();
9682
9683 if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
9684 return SDValue();
9685
9686 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9687 return SDValue();
9688
9689 SDLoc dl(Ld);
9690 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
9691 SDValue NewLoad = DAG.getMaskedLoad(
9692 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
9693 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
9694 ExtLoadType, Ld->isExpandingLoad());
9695 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
9696 return NewLoad;
9697}
9698
9699static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
9700 bool LegalOperations) {
9701 assert((N->getOpcode() == ISD::SIGN_EXTEND ||(((N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode()
== ISD::ZERO_EXTEND) && "Expected sext or zext") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Expected sext or zext\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9702, __PRETTY_FUNCTION__))
9702 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext")(((N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode()
== ISD::ZERO_EXTEND) && "Expected sext or zext") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && \"Expected sext or zext\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9702, __PRETTY_FUNCTION__))
;
9703
9704 SDValue SetCC = N->getOperand(0);
9705 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
9706 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
9707 return SDValue();
9708
9709 SDValue X = SetCC.getOperand(0);
9710 SDValue Ones = SetCC.getOperand(1);
9711 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
9712 EVT VT = N->getValueType(0);
9713 EVT XVT = X.getValueType();
9714 // setge X, C is canonicalized to setgt, so we do not need to match that
9715 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
9716 // not require the 'not' op.
9717 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
9718 // Invert and smear/shift the sign bit:
9719 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
9720 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
9721 SDLoc DL(N);
9722 unsigned ShCt = VT.getSizeInBits() - 1;
9723 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9724 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
9725 SDValue NotX = DAG.getNOT(DL, X, VT);
9726 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
9727 auto ShiftOpcode =
9728 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
9729 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
9730 }
9731 }
9732 return SDValue();
9733}
9734
9735SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
9736 SDValue N0 = N->getOperand(0);
9737 EVT VT = N->getValueType(0);
9738 SDLoc DL(N);
9739
9740 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9741 return Res;
9742
9743 // fold (sext (sext x)) -> (sext x)
9744 // fold (sext (aext x)) -> (sext x)
9745 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9746 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
9747
9748 if (N0.getOpcode() == ISD::TRUNCATE) {
9749 // fold (sext (truncate (load x))) -> (sext (smaller load x))
9750 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
9751 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9752 SDNode *oye = N0.getOperand(0).getNode();
9753 if (NarrowLoad.getNode() != N0.getNode()) {
9754 CombineTo(N0.getNode(), NarrowLoad);
9755 // CombineTo deleted the truncate, if needed, but not what's under it.
9756 AddToWorklist(oye);
9757 }
9758 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9759 }
9760
9761 // See if the value being truncated is already sign extended. If so, just
9762 // eliminate the trunc/sext pair.
9763 SDValue Op = N0.getOperand(0);
9764 unsigned OpBits = Op.getScalarValueSizeInBits();
9765 unsigned MidBits = N0.getScalarValueSizeInBits();
9766 unsigned DestBits = VT.getScalarSizeInBits();
9767 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
9768
9769 if (OpBits == DestBits) {
9770 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
9771 // bits, it is already ready.
9772 if (NumSignBits > DestBits-MidBits)
9773 return Op;
9774 } else if (OpBits < DestBits) {
9775 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
9776 // bits, just sext from i32.
9777 if (NumSignBits > OpBits-MidBits)
9778 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
9779 } else {
9780 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
9781 // bits, just truncate to i32.
9782 if (NumSignBits > OpBits-MidBits)
9783 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
9784 }
9785
9786 // fold (sext (truncate x)) -> (sextinreg x).
9787 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
9788 N0.getValueType())) {
9789 if (OpBits < DestBits)
9790 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
9791 else if (OpBits > DestBits)
9792 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
9793 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
9794 DAG.getValueType(N0.getValueType()));
9795 }
9796 }
9797
9798 // Try to simplify (sext (load x)).
9799 if (SDValue foldedExt =
9800 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9801 ISD::SEXTLOAD, ISD::SIGN_EXTEND))
9802 return foldedExt;
9803
9804 if (SDValue foldedExt =
9805 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
9806 ISD::SIGN_EXTEND))
9807 return foldedExt;
9808
9809 // fold (sext (load x)) to multiple smaller sextloads.
9810 // Only on illegal but splittable vectors.
9811 if (SDValue ExtLoad = CombineExtLoad(N))
9812 return ExtLoad;
9813
9814 // Try to simplify (sext (sextload x)).
9815 if (SDValue foldedExt = tryToFoldExtOfExtload(
9816 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
9817 return foldedExt;
9818
9819 // fold (sext (and/or/xor (load x), cst)) ->
9820 // (and/or/xor (sextload x), (sext cst))
9821 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9822 N0.getOpcode() == ISD::XOR) &&
9823 isa<LoadSDNode>(N0.getOperand(0)) &&
9824 N0.getOperand(1).getOpcode() == ISD::Constant &&
9825 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9826 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9827 EVT MemVT = LN00->getMemoryVT();
9828 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
9829 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
9830 SmallVector<SDNode*, 4> SetCCs;
9831 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9832 ISD::SIGN_EXTEND, SetCCs, TLI);
9833 if (DoXform) {
9834 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
9835 LN00->getChain(), LN00->getBasePtr(),
9836 LN00->getMemoryVT(),
9837 LN00->getMemOperand());
9838 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
9839 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9840 ExtLoad, DAG.getConstant(Mask, DL, VT));
9841 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
9842 bool NoReplaceTruncAnd = !N0.hasOneUse();
9843 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9844 CombineTo(N, And);
9845 // If N0 has multiple uses, change other uses as well.
9846 if (NoReplaceTruncAnd) {
9847 SDValue TruncAnd =
9848 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9849 CombineTo(N0.getNode(), TruncAnd);
9850 }
9851 if (NoReplaceTrunc) {
9852 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9853 } else {
9854 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9855 LN00->getValueType(0), ExtLoad);
9856 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9857 }
9858 return SDValue(N,0); // Return N so it doesn't get rechecked!
9859 }
9860 }
9861 }
9862
9863 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9864 return V;
9865
9866 if (N0.getOpcode() == ISD::SETCC) {
9867 SDValue N00 = N0.getOperand(0);
9868 SDValue N01 = N0.getOperand(1);
9869 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9870 EVT N00VT = N0.getOperand(0).getValueType();
9871
9872 // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
9873 // Only do this before legalize for now.
9874 if (VT.isVector() && !LegalOperations &&
9875 TLI.getBooleanContents(N00VT) ==
9876 TargetLowering::ZeroOrNegativeOneBooleanContent) {
9877 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
9878 // of the same size as the compared operands. Only optimize sext(setcc())
9879 // if this is the case.
9880 EVT SVT = getSetCCResultType(N00VT);
9881
9882 // If we already have the desired type, don't change it.
9883 if (SVT != N0.getValueType()) {
9884 // We know that the # elements of the results is the same as the
9885 // # elements of the compare (and the # elements of the compare result
9886 // for that matter). Check to see that they are the same size. If so,
9887 // we know that the element size of the sext'd result matches the
9888 // element size of the compare operands.
9889 if (VT.getSizeInBits() == SVT.getSizeInBits())
9890 return DAG.getSetCC(DL, VT, N00, N01, CC);
9891
9892 // If the desired elements are smaller or larger than the source
9893 // elements, we can use a matching integer vector type and then
9894 // truncate/sign extend.
9895 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
9896 if (SVT == MatchingVecType) {
9897 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
9898 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
9899 }
9900 }
9901 }
9902
9903 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
9904 // Here, T can be 1 or -1, depending on the type of the setcc and
9905 // getBooleanContents().
9906 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
9907
9908 // To determine the "true" side of the select, we need to know the high bit
9909 // of the value returned by the setcc if it evaluates to true.
9910 // If the type of the setcc is i1, then the true case of the select is just
9911 // sext(i1 1), that is, -1.
9912 // If the type of the setcc is larger (say, i8) then the value of the high
9913 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
9914 // of the appropriate width.
9915 SDValue ExtTrueVal = (SetCCWidth == 1)
9916 ? DAG.getAllOnesConstant(DL, VT)
9917 : DAG.getBoolConstant(true, DL, VT, N00VT);
9918 SDValue Zero = DAG.getConstant(0, DL, VT);
9919 if (SDValue SCC =
9920 SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
9921 return SCC;
9922
9923 if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
9924 EVT SetCCVT = getSetCCResultType(N00VT);
9925 // Don't do this transform for i1 because there's a select transform
9926 // that would reverse it.
9927 // TODO: We should not do this transform at all without a target hook
9928 // because a sext is likely cheaper than a select?
9929 if (SetCCVT.getScalarSizeInBits() != 1 &&
9930 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
9931 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
9932 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
9933 }
9934 }
9935 }
9936
9937 // fold (sext x) -> (zext x) if the sign bit is known zero.
9938 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
9939 DAG.SignBitIsZero(N0))
9940 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
9941
9942 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9943 return NewVSel;
9944
9945 // Eliminate this sign extend by doing a negation in the destination type:
9946 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
9947 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
9948 isNullOrNullSplat(N0.getOperand(0)) &&
9949 N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
9950 TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
9951 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
9952 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
9953 }
9954 // Eliminate this sign extend by doing a decrement in the destination type:
9955 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
9956 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
9957 isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
9958 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9959 TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
9960 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
9961 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
9962 }
9963
9964 return SDValue();
9965}
9966
9967// isTruncateOf - If N is a truncate of some other value, return true, record
9968// the value being truncated in Op and which of Op's bits are zero/one in Known.
9969// This function computes KnownBits to avoid a duplicated call to
9970// computeKnownBits in the caller.
9971static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
9972 KnownBits &Known) {
9973 if (N->getOpcode() == ISD::TRUNCATE) {
9974 Op = N->getOperand(0);
9975 Known = DAG.computeKnownBits(Op);
9976 return true;
9977 }
9978
9979 if (N.getOpcode() != ISD::SETCC ||
9980 N.getValueType().getScalarType() != MVT::i1 ||
9981 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
9982 return false;
9983
9984 SDValue Op0 = N->getOperand(0);
9985 SDValue Op1 = N->getOperand(1);
9986 assert(Op0.getValueType() == Op1.getValueType())((Op0.getValueType() == Op1.getValueType()) ? static_cast<
void> (0) : __assert_fail ("Op0.getValueType() == Op1.getValueType()"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 9986, __PRETTY_FUNCTION__))
;
9987
9988 if (isNullOrNullSplat(Op0))
9989 Op = Op1;
9990 else if (isNullOrNullSplat(Op1))
9991 Op = Op0;
9992 else
9993 return false;
9994
9995 Known = DAG.computeKnownBits(Op);
9996
9997 return (Known.Zero | 1).isAllOnesValue();
9998}
9999
10000/// Given an extending node with a pop-count operand, if the target does not
10001/// support a pop-count in the narrow source type but does support it in the
10002/// destination type, widen the pop-count to the destination type.
10003static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
10004 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||(((Extend->getOpcode() == ISD::ZERO_EXTEND || Extend->getOpcode
() == ISD::ANY_EXTEND) && "Expected extend op") ? static_cast
<void> (0) : __assert_fail ("(Extend->getOpcode() == ISD::ZERO_EXTEND || Extend->getOpcode() == ISD::ANY_EXTEND) && \"Expected extend op\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10005, __PRETTY_FUNCTION__))
10005 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op")(((Extend->getOpcode() == ISD::ZERO_EXTEND || Extend->getOpcode
() == ISD::ANY_EXTEND) && "Expected extend op") ? static_cast
<void> (0) : __assert_fail ("(Extend->getOpcode() == ISD::ZERO_EXTEND || Extend->getOpcode() == ISD::ANY_EXTEND) && \"Expected extend op\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10005, __PRETTY_FUNCTION__))
;
10006
10007 SDValue CtPop = Extend->getOperand(0);
10008 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
10009 return SDValue();
10010
10011 EVT VT = Extend->getValueType(0);
10012 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10013 if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
10014 !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
10015 return SDValue();
10016
10017 // zext (ctpop X) --> ctpop (zext X)
10018 SDLoc DL(Extend);
10019 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
10020 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
10021}
10022
10023SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
10024 SDValue N0 = N->getOperand(0);
10025 EVT VT = N->getValueType(0);
10026
10027 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10028 return Res;
10029
10030 // fold (zext (zext x)) -> (zext x)
10031 // fold (zext (aext x)) -> (zext x)
10032 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
10033 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
10034 N0.getOperand(0));
10035
10036 // fold (zext (truncate x)) -> (zext x) or
10037 // (zext (truncate x)) -> (truncate x)
10038 // This is valid when the truncated bits of x are already zero.
10039 SDValue Op;
10040 KnownBits Known;
10041 if (isTruncateOf(DAG, N0, Op, Known)) {
10042 APInt TruncatedBits =
10043 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
10044 APInt(Op.getScalarValueSizeInBits(), 0) :
10045 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
10046 N0.getScalarValueSizeInBits(),
10047 std::min(Op.getScalarValueSizeInBits(),
10048 VT.getScalarSizeInBits()));
10049 if (TruncatedBits.isSubsetOf(Known.Zero))
10050 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
10051 }
10052
10053 // fold (zext (truncate x)) -> (and x, mask)
10054 if (N0.getOpcode() == ISD::TRUNCATE) {
10055 // fold (zext (truncate (load x))) -> (zext (smaller load x))
10056 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
10057 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10058 SDNode *oye = N0.getOperand(0).getNode();
10059 if (NarrowLoad.getNode() != N0.getNode()) {
10060 CombineTo(N0.getNode(), NarrowLoad);
10061 // CombineTo deleted the truncate, if needed, but not what's under it.
10062 AddToWorklist(oye);
10063 }
10064 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10065 }
10066
10067 EVT SrcVT = N0.getOperand(0).getValueType();
10068 EVT MinVT = N0.getValueType();
10069
10070 // Try to mask before the extension to avoid having to generate a larger mask,
10071 // possibly over several sub-vectors.
10072 if (SrcVT.bitsLT(VT) && VT.isVector()) {
10073 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
10074 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
10075 SDValue Op = N0.getOperand(0);
10076 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
10077 AddToWorklist(Op.getNode());
10078 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
10079 // Transfer the debug info; the new node is equivalent to N0.
10080 DAG.transferDbgValues(N0, ZExtOrTrunc);
10081 return ZExtOrTrunc;
10082 }
10083 }
10084
10085 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
10086 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10087 AddToWorklist(Op.getNode());
10088 SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
10089 // We may safely transfer the debug info describing the truncate node over
10090 // to the equivalent and operation.
10091 DAG.transferDbgValues(N0, And);
10092 return And;
10093 }
10094 }
10095
10096 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
10097 // if either of the casts is not free.
10098 if (N0.getOpcode() == ISD::AND &&
10099 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10100 N0.getOperand(1).getOpcode() == ISD::Constant &&
10101 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10102 N0.getValueType()) ||
10103 !TLI.isZExtFree(N0.getValueType(), VT))) {
10104 SDValue X = N0.getOperand(0).getOperand(0);
10105 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
10106 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10107 SDLoc DL(N);
10108 return DAG.getNode(ISD::AND, DL, VT,
10109 X, DAG.getConstant(Mask, DL, VT));
10110 }
10111
10112 // Try to simplify (zext (load x)).
10113 if (SDValue foldedExt =
10114 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10115 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
10116 return foldedExt;
10117
10118 if (SDValue foldedExt =
10119 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
10120 ISD::ZERO_EXTEND))
10121 return foldedExt;
10122
10123 // fold (zext (load x)) to multiple smaller zextloads.
10124 // Only on illegal but splittable vectors.
10125 if (SDValue ExtLoad = CombineExtLoad(N))
10126 return ExtLoad;
10127
10128 // fold (zext (and/or/xor (load x), cst)) ->
10129 // (and/or/xor (zextload x), (zext cst))
10130 // Unless (and (load x) cst) will match as a zextload already and has
10131 // additional users.
10132 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10133 N0.getOpcode() == ISD::XOR) &&
10134 isa<LoadSDNode>(N0.getOperand(0)) &&
10135 N0.getOperand(1).getOpcode() == ISD::Constant &&
10136 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10137 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10138 EVT MemVT = LN00->getMemoryVT();
10139 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
10140 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
10141 bool DoXform = true;
10142 SmallVector<SDNode*, 4> SetCCs;
10143 if (!N0.hasOneUse()) {
10144 if (N0.getOpcode() == ISD::AND) {
10145 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
10146 EVT LoadResultTy = AndC->getValueType(0);
10147 EVT ExtVT;
10148 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
10149 DoXform = false;
10150 }
10151 }
10152 if (DoXform)
10153 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10154 ISD::ZERO_EXTEND, SetCCs, TLI);
10155 if (DoXform) {
10156 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
10157 LN00->getChain(), LN00->getBasePtr(),
10158 LN00->getMemoryVT(),
10159 LN00->getMemOperand());
10160 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10161 SDLoc DL(N);
10162 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10163 ExtLoad, DAG.getConstant(Mask, DL, VT));
10164 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10165 bool NoReplaceTruncAnd = !N0.hasOneUse();
10166 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10167 CombineTo(N, And);
10168 // If N0 has multiple uses, change other uses as well.
10169 if (NoReplaceTruncAnd) {
10170 SDValue TruncAnd =
10171 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10172 CombineTo(N0.getNode(), TruncAnd);
10173 }
10174 if (NoReplaceTrunc) {
10175 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10176 } else {
10177 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10178 LN00->getValueType(0), ExtLoad);
10179 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10180 }
10181 return SDValue(N,0); // Return N so it doesn't get rechecked!
10182 }
10183 }
10184 }
10185
10186 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10187 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10188 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
10189 return ZExtLoad;
10190
10191 // Try to simplify (zext (zextload x)).
10192 if (SDValue foldedExt = tryToFoldExtOfExtload(
10193 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
10194 return foldedExt;
10195
10196 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10197 return V;
10198
10199 if (N0.getOpcode() == ISD::SETCC) {
10200 // Only do this before legalize for now.
10201 if (!LegalOperations && VT.isVector() &&
10202 N0.getValueType().getVectorElementType() == MVT::i1) {
10203 EVT N00VT = N0.getOperand(0).getValueType();
10204 if (getSetCCResultType(N00VT) == N0.getValueType())
10205 return SDValue();
10206
10207 // We know that the # elements of the results is the same as the #
10208 // elements of the compare (and the # elements of the compare result for
10209 // that matter). Check to see that they are the same size. If so, we know
10210 // that the element size of the sext'd result matches the element size of
10211 // the compare operands.
10212 SDLoc DL(N);
10213 SDValue VecOnes = DAG.getConstant(1, DL, VT);
10214 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
10215 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
10216 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
10217 N0.getOperand(1), N0.getOperand(2));
10218 return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
10219 }
10220
10221 // If the desired elements are smaller or larger than the source
10222 // elements we can use a matching integer vector type and then
10223 // truncate/sign extend.
10224 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10225 SDValue VsetCC =
10226 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
10227 N0.getOperand(1), N0.getOperand(2));
10228 return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
10229 VecOnes);
10230 }
10231
10232 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10233 SDLoc DL(N);
10234 if (SDValue SCC = SimplifySelectCC(
10235 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10236 DAG.getConstant(0, DL, VT),
10237 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10238 return SCC;
10239 }
10240
10241 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
10242 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
10243 isa<ConstantSDNode>(N0.getOperand(1)) &&
10244 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10245 N0.hasOneUse()) {
10246 SDValue ShAmt = N0.getOperand(1);
10247 if (N0.getOpcode() == ISD::SHL) {
10248 SDValue InnerZExt = N0.getOperand(0);
10249 // If the original shl may be shifting out bits, do not perform this
10250 // transformation.
10251 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
10252 InnerZExt.getOperand(0).getValueSizeInBits();
10253 if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
10254 return SDValue();
10255 }
10256
10257 SDLoc DL(N);
10258
10259 // Ensure that the shift amount is wide enough for the shifted value.
10260 if (VT.getSizeInBits() >= 256)
10261 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
10262
10263 return DAG.getNode(N0.getOpcode(), DL, VT,
10264 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
10265 ShAmt);
10266 }
10267
10268 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10269 return NewVSel;
10270
10271 if (SDValue NewCtPop = widenCtPop(N, DAG))
10272 return NewCtPop;
10273
10274 return SDValue();
10275}
10276
10277SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
10278 SDValue N0 = N->getOperand(0);
10279 EVT VT = N->getValueType(0);
10280
10281 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10282 return Res;
10283
10284 // fold (aext (aext x)) -> (aext x)
10285 // fold (aext (zext x)) -> (zext x)
10286 // fold (aext (sext x)) -> (sext x)
10287 if (N0.getOpcode() == ISD::ANY_EXTEND ||
10288 N0.getOpcode() == ISD::ZERO_EXTEND ||
10289 N0.getOpcode() == ISD::SIGN_EXTEND)
10290 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10291
10292 // fold (aext (truncate (load x))) -> (aext (smaller load x))
10293 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
10294 if (N0.getOpcode() == ISD::TRUNCATE) {
10295 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10296 SDNode *oye = N0.getOperand(0).getNode();
10297 if (NarrowLoad.getNode() != N0.getNode()) {
10298 CombineTo(N0.getNode(), NarrowLoad);
10299 // CombineTo deleted the truncate, if needed, but not what's under it.
10300 AddToWorklist(oye);
10301 }
10302 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10303 }
10304 }
10305
10306 // fold (aext (truncate x))
10307 if (N0.getOpcode() == ISD::TRUNCATE)
10308 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10309
10310 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
10311 // if the trunc is not free.
10312 if (N0.getOpcode() == ISD::AND &&
10313 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10314 N0.getOperand(1).getOpcode() == ISD::Constant &&
10315 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10316 N0.getValueType())) {
10317 SDLoc DL(N);
10318 SDValue X = N0.getOperand(0).getOperand(0);
10319 X = DAG.getAnyExtOrTrunc(X, DL, VT);
10320 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10321 return DAG.getNode(ISD::AND, DL, VT,
10322 X, DAG.getConstant(Mask, DL, VT));
10323 }
10324
10325 // fold (aext (load x)) -> (aext (truncate (extload x)))
10326 // None of the supported targets knows how to perform load and any_ext
10327 // on vectors in one instruction. We only perform this transformation on
10328 // scalars.
10329 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
10330 ISD::isUNINDEXEDLoad(N0.getNode()) &&
10331 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10332 bool DoXform = true;
10333 SmallVector<SDNode*, 4> SetCCs;
10334 if (!N0.hasOneUse())
10335 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
10336 TLI);
10337 if (DoXform) {
10338 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10339 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10340 LN0->getChain(),
10341 LN0->getBasePtr(), N0.getValueType(),
10342 LN0->getMemOperand());
10343 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
10344 // If the load value is used only by N, replace it via CombineTo N.
10345 bool NoReplaceTrunc = N0.hasOneUse();
10346 CombineTo(N, ExtLoad);
10347 if (NoReplaceTrunc) {
10348 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10349 recursivelyDeleteUnusedNodes(LN0);
10350 } else {
10351 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
10352 N0.getValueType(), ExtLoad);
10353 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10354 }
10355 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10356 }
10357 }
10358
10359 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
10360 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
10361 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
10362 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
10363 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
10364 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10365 ISD::LoadExtType ExtType = LN0->getExtensionType();
10366 EVT MemVT = LN0->getMemoryVT();
10367 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
10368 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
10369 VT, LN0->getChain(), LN0->getBasePtr(),
10370 MemVT, LN0->getMemOperand());
10371 CombineTo(N, ExtLoad);
10372 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10373 recursivelyDeleteUnusedNodes(LN0);
10374 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10375 }
10376 }
10377
10378 if (N0.getOpcode() == ISD::SETCC) {
10379 // For vectors:
10380 // aext(setcc) -> vsetcc
10381 // aext(setcc) -> truncate(vsetcc)
10382 // aext(setcc) -> aext(vsetcc)
10383 // Only do this before legalize for now.
10384 if (VT.isVector() && !LegalOperations) {
10385 EVT N00VT = N0.getOperand(0).getValueType();
10386 if (getSetCCResultType(N00VT) == N0.getValueType())
10387 return SDValue();
10388
10389 // We know that the # elements of the results is the same as the
10390 // # elements of the compare (and the # elements of the compare result
10391 // for that matter). Check to see that they are the same size. If so,
10392 // we know that the element size of the sext'd result matches the
10393 // element size of the compare operands.
10394 if (VT.getSizeInBits() == N00VT.getSizeInBits())
10395 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
10396 N0.getOperand(1),
10397 cast<CondCodeSDNode>(N0.getOperand(2))->get());
10398
10399 // If the desired elements are smaller or larger than the source
10400 // elements we can use a matching integer vector type and then
10401 // truncate/any extend
10402 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10403 SDValue VsetCC =
10404 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
10405 N0.getOperand(1),
10406 cast<CondCodeSDNode>(N0.getOperand(2))->get());
10407 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
10408 }
10409
10410 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10411 SDLoc DL(N);
10412 if (SDValue SCC = SimplifySelectCC(
10413 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10414 DAG.getConstant(0, DL, VT),
10415 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10416 return SCC;
10417 }
10418
10419 if (SDValue NewCtPop = widenCtPop(N, DAG))
10420 return NewCtPop;
10421
10422 return SDValue();
10423}
10424
10425SDValue DAGCombiner::visitAssertExt(SDNode *N) {
10426 unsigned Opcode = N->getOpcode();
10427 SDValue N0 = N->getOperand(0);
10428 SDValue N1 = N->getOperand(1);
10429 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
10430
10431 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
10432 if (N0.getOpcode() == Opcode &&
10433 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
10434 return N0;
10435
10436 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10437 N0.getOperand(0).getOpcode() == Opcode) {
10438 // We have an assert, truncate, assert sandwich. Make one stronger assert
10439 // by asserting on the smallest asserted type to the larger source type.
10440 // This eliminates the later assert:
10441 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
10442 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
10443 SDValue BigA = N0.getOperand(0);
10444 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10445 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? static_cast
<void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10447, __PRETTY_FUNCTION__))
10446 "Asserting zero/sign-extended bits to a type larger than the "((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? static_cast
<void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10447, __PRETTY_FUNCTION__))
10447 "truncated destination does not provide information")((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? static_cast
<void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10447, __PRETTY_FUNCTION__))
;
10448
10449 SDLoc DL(N);
10450 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
10451 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
10452 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10453 BigA.getOperand(0), MinAssertVTVal);
10454 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10455 }
10456
10457 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
10458 // than X. Just move the AssertZext in front of the truncate and drop the
10459 // AssertSExt.
10460 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10461 N0.getOperand(0).getOpcode() == ISD::AssertSext &&
10462 Opcode == ISD::AssertZext) {
10463 SDValue BigA = N0.getOperand(0);
10464 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10465 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? static_cast
<void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10467, __PRETTY_FUNCTION__))
10466 "Asserting zero/sign-extended bits to a type larger than the "((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? static_cast
<void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10467, __PRETTY_FUNCTION__))
10467 "truncated destination does not provide information")((BigA_AssertVT.bitsLE(N0.getValueType()) && "Asserting zero/sign-extended bits to a type larger than the "
"truncated destination does not provide information") ? static_cast
<void> (0) : __assert_fail ("BigA_AssertVT.bitsLE(N0.getValueType()) && \"Asserting zero/sign-extended bits to a type larger than the \" \"truncated destination does not provide information\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10467, __PRETTY_FUNCTION__))
;
10468
10469 if (AssertVT.bitsLT(BigA_AssertVT)) {
10470 SDLoc DL(N);
10471 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10472 BigA.getOperand(0), N1);
10473 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10474 }
10475 }
10476
10477 return SDValue();
10478}
10479
10480/// If the result of a wider load is shifted to right of N bits and then
10481/// truncated to a narrower type and where N is a multiple of number of bits of
10482/// the narrower type, transform it to a narrower load from address + N / num of
10483/// bits of new type. Also narrow the load if the result is masked with an AND
10484/// to effectively produce a smaller type. If the result is to be extended, also
10485/// fold the extension to form a extending load.
10486SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
10487 unsigned Opc = N->getOpcode();
10488
10489 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
10490 SDValue N0 = N->getOperand(0);
10491 EVT VT = N->getValueType(0);
10492 EVT ExtVT = VT;
10493
10494 // This transformation isn't valid for vector loads.
10495 if (VT.isVector())
10496 return SDValue();
10497
10498 unsigned ShAmt = 0;
10499 bool HasShiftedOffset = false;
10500 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
10501 // extended to VT.
10502 if (Opc == ISD::SIGN_EXTEND_INREG) {
10503 ExtType = ISD::SEXTLOAD;
10504 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10505 } else if (Opc == ISD::SRL) {
10506 // Another special-case: SRL is basically zero-extending a narrower value,
10507 // or it maybe shifting a higher subword, half or byte into the lowest
10508 // bits.
10509 ExtType = ISD::ZEXTLOAD;
10510 N0 = SDValue(N, 0);
10511
10512 auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
10513 auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10514 if (!N01 || !LN0)
10515 return SDValue();
10516
10517 uint64_t ShiftAmt = N01->getZExtValue();
10518 uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
10519 if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
10520 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
10521 else
10522 ExtVT = EVT::getIntegerVT(*DAG.getContext(),
10523 VT.getSizeInBits() - ShiftAmt);
10524 } else if (Opc == ISD::AND) {
10525 // An AND with a constant mask is the same as a truncate + zero-extend.
10526 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
10527 if (!AndC)
10528 return SDValue();
10529
10530 const APInt &Mask = AndC->getAPIntValue();
10531 unsigned ActiveBits = 0;
10532 if (Mask.isMask()) {
10533 ActiveBits = Mask.countTrailingOnes();
10534 } else if (Mask.isShiftedMask()) {
10535 ShAmt = Mask.countTrailingZeros();
10536 APInt ShiftedMask = Mask.lshr(ShAmt);
10537 ActiveBits = ShiftedMask.countTrailingOnes();
10538 HasShiftedOffset = true;
10539 } else
10540 return SDValue();
10541
10542 ExtType = ISD::ZEXTLOAD;
10543 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
10544 }
10545
10546 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
10547 SDValue SRL = N0;
10548 if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
10549 ShAmt = ConstShift->getZExtValue();
10550 unsigned EVTBits = ExtVT.getSizeInBits();
10551 // Is the shift amount a multiple of size of VT?
10552 if ((ShAmt & (EVTBits-1)) == 0) {
10553 N0 = N0.getOperand(0);
10554 // Is the load width a multiple of size of VT?
10555 if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
10556 return SDValue();
10557 }
10558
10559 // At this point, we must have a load or else we can't do the transform.
10560 auto *LN0 = dyn_cast<LoadSDNode>(N0);
10561 if (!LN0) return SDValue();
10562
10563 // Because a SRL must be assumed to *need* to zero-extend the high bits
10564 // (as opposed to anyext the high bits), we can't combine the zextload
10565 // lowering of SRL and an sextload.
10566 if (LN0->getExtensionType() == ISD::SEXTLOAD)
10567 return SDValue();
10568
10569 // If the shift amount is larger than the input type then we're not
10570 // accessing any of the loaded bytes. If the load was a zextload/extload
10571 // then the result of the shift+trunc is zero/undef (handled elsewhere).
10572 if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
10573 return SDValue();
10574
10575 // If the SRL is only used by a masking AND, we may be able to adjust
10576 // the ExtVT to make the AND redundant.
10577 SDNode *Mask = *(SRL->use_begin());
10578 if (Mask->getOpcode() == ISD::AND &&
10579 isa<ConstantSDNode>(Mask->getOperand(1))) {
10580 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
10581 if (ShiftMask.isMask()) {
10582 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
10583 ShiftMask.countTrailingOnes());
10584 // If the mask is smaller, recompute the type.
10585 if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
10586 TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
10587 ExtVT = MaskedVT;
10588 }
10589 }
10590 }
10591 }
10592
10593 // If the load is shifted left (and the result isn't shifted back right),
10594 // we can fold the truncate through the shift.
10595 unsigned ShLeftAmt = 0;
10596 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10597 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
10598 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
10599 ShLeftAmt = N01->getZExtValue();
10600 N0 = N0.getOperand(0);
10601 }
10602 }
10603
10604 // If we haven't found a load, we can't narrow it.
10605 if (!isa<LoadSDNode>(N0))
10606 return SDValue();
10607
10608 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10609 // Reducing the width of a volatile load is illegal. For atomics, we may be
10610 // able to reduce the width provided we never widen again. (see D66309)
10611 if (!LN0->isSimple() ||
10612 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
10613 return SDValue();
10614
10615 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
10616 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
10617 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
10618 return LVTStoreBits - EVTStoreBits - ShAmt;
10619 };
10620
10621 // For big endian targets, we need to adjust the offset to the pointer to
10622 // load the correct bytes.
10623 if (DAG.getDataLayout().isBigEndian())
10624 ShAmt = AdjustBigEndianShift(ShAmt);
10625
10626 uint64_t PtrOff = ShAmt / 8;
10627 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
10628 SDLoc DL(LN0);
10629 // The original load itself didn't wrap, so an offset within it doesn't.
10630 SDNodeFlags Flags;
10631 Flags.setNoUnsignedWrap(true);
10632 SDValue NewPtr =
10633 DAG.getMemBasePlusOffset(LN0->getBasePtr(), PtrOff, DL, Flags);
10634 AddToWorklist(NewPtr.getNode());
10635
10636 SDValue Load;
10637 if (ExtType == ISD::NON_EXTLOAD)
10638 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
10639 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10640 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10641 else
10642 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
10643 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
10644 NewAlign, LN0->getMemOperand()->getFlags(),
10645 LN0->getAAInfo());
10646
10647 // Replace the old load's chain with the new load's chain.
10648 WorklistRemover DeadNodes(*this);
10649 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10650
10651 // Shift the result left, if we've swallowed a left shift.
10652 SDValue Result = Load;
10653 if (ShLeftAmt != 0) {
10654 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
10655 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
10656 ShImmTy = VT;
10657 // If the shift amount is as large as the result size (but, presumably,
10658 // no larger than the source) then the useful bits of the result are
10659 // zero; we can't simply return the shortened shift, because the result
10660 // of that operation is undefined.
10661 if (ShLeftAmt >= VT.getSizeInBits())
10662 Result = DAG.getConstant(0, DL, VT);
10663 else
10664 Result = DAG.getNode(ISD::SHL, DL, VT,
10665 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
10666 }
10667
10668 if (HasShiftedOffset) {
10669 // Recalculate the shift amount after it has been altered to calculate
10670 // the offset.
10671 if (DAG.getDataLayout().isBigEndian())
10672 ShAmt = AdjustBigEndianShift(ShAmt);
10673
10674 // We're using a shifted mask, so the load now has an offset. This means
10675 // that data has been loaded into the lower bytes than it would have been
10676 // before, so we need to shl the loaded data into the correct position in the
10677 // register.
10678 SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
10679 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
10680 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
10681 }
10682
10683 // Return the new loaded value.
10684 return Result;
10685}
10686
10687SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
10688 SDValue N0 = N->getOperand(0);
10689 SDValue N1 = N->getOperand(1);
10690 EVT VT = N->getValueType(0);
10691 EVT EVT = cast<VTSDNode>(N1)->getVT();
10692 unsigned VTBits = VT.getScalarSizeInBits();
10693 unsigned EVTBits = EVT.getScalarSizeInBits();
10694
10695 if (N0.isUndef())
10696 return DAG.getUNDEF(VT);
10697
10698 // fold (sext_in_reg c1) -> c1
10699 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10700 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
10701
10702 // If the input is already sign extended, just drop the extension.
10703 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
10704 return N0;
10705
10706 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
10707 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
10708 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
10709 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10710 N0.getOperand(0), N1);
10711
10712 // fold (sext_in_reg (sext x)) -> (sext x)
10713 // fold (sext_in_reg (aext x)) -> (sext x)
10714 // if x is small enough or if we know that x has more than 1 sign bit and the
10715 // sign_extend_inreg is extending from one of them.
10716 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
10717 SDValue N00 = N0.getOperand(0);
10718 unsigned N00Bits = N00.getScalarValueSizeInBits();
10719 if ((N00Bits <= EVTBits ||
10720 (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
10721 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
10722 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
10723 }
10724
10725 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
10726 if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
10727 N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
10728 N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
10729 N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
10730 if (!LegalOperations ||
10731 TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
10732 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
10733 N0.getOperand(0));
10734 }
10735
10736 // fold (sext_in_reg (zext x)) -> (sext x)
10737 // iff we are extending the source sign bit.
10738 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
10739 SDValue N00 = N0.getOperand(0);
10740 if (N00.getScalarValueSizeInBits() == EVTBits &&
10741 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
10742 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
10743 }
10744
10745 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
10746 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
10747 return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
10748
10749 // fold operands of sext_in_reg based on knowledge that the top bits are not
10750 // demanded.
10751 if (SimplifyDemandedBits(SDValue(N, 0)))
10752 return SDValue(N, 0);
10753
10754 // fold (sext_in_reg (load x)) -> (smaller sextload x)
10755 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
10756 if (SDValue NarrowLoad = ReduceLoadWidth(N))
10757 return NarrowLoad;
10758
10759 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
10760 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
10761 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
10762 if (N0.getOpcode() == ISD::SRL) {
10763 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
10764 if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) {
10765 // We can turn this into an SRA iff the input to the SRL is already sign
10766 // extended enough.
10767 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
10768 if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits)
10769 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
10770 N0.getOperand(1));
10771 }
10772 }
10773
10774 // fold (sext_inreg (extload x)) -> (sextload x)
10775 // If sextload is not supported by target, we can only do the combine when
10776 // load has one use. Doing otherwise can block folding the extload with other
10777 // extends that the target does support.
10778 if (ISD::isEXTLoad(N0.getNode()) &&
10779 ISD::isUNINDEXEDLoad(N0.getNode()) &&
10780 EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
10781 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
10782 N0.hasOneUse()) ||
10783 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10784 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10785 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10786 LN0->getChain(),
10787 LN0->getBasePtr(), EVT,
10788 LN0->getMemOperand());
10789 CombineTo(N, ExtLoad);
10790 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10791 AddToWorklist(ExtLoad.getNode());
10792 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10793 }
10794 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
10795 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
10796 N0.hasOneUse() &&
10797 EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
10798 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
10799 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10800 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10801 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10802 LN0->getChain(),
10803 LN0->getBasePtr(), EVT,
10804 LN0->getMemOperand());
10805 CombineTo(N, ExtLoad);
10806 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10807 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10808 }
10809
10810 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
10811 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
10812 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
10813 N0.getOperand(1), false))
10814 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10815 BSwap, N1);
10816 }
10817
10818 return SDValue();
10819}
10820
10821SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
10822 SDValue N0 = N->getOperand(0);
10823 EVT VT = N->getValueType(0);
10824
10825 if (N0.isUndef())
10826 return DAG.getUNDEF(VT);
10827
10828 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10829 return Res;
10830
10831 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10832 return SDValue(N, 0);
10833
10834 return SDValue();
10835}
10836
10837SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
10838 SDValue N0 = N->getOperand(0);
10839 EVT VT = N->getValueType(0);
10840
10841 if (N0.isUndef())
10842 return DAG.getUNDEF(VT);
10843
10844 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10845 return Res;
10846
10847 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10848 return SDValue(N, 0);
10849
10850 return SDValue();
10851}
10852
10853SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
10854 SDValue N0 = N->getOperand(0);
10855 EVT VT = N->getValueType(0);
10856 EVT SrcVT = N0.getValueType();
10857 bool isLE = DAG.getDataLayout().isLittleEndian();
10858
10859 // noop truncate
10860 if (SrcVT == VT)
10861 return N0;
10862
10863 // fold (truncate (truncate x)) -> (truncate x)
10864 if (N0.getOpcode() == ISD::TRUNCATE)
10865 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10866
10867 // fold (truncate c1) -> c1
10868 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
10869 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
10870 if (C.getNode() != N)
10871 return C;
10872 }
10873
10874 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
10875 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
10876 N0.getOpcode() == ISD::SIGN_EXTEND ||
10877 N0.getOpcode() == ISD::ANY_EXTEND) {
10878 // if the source is smaller than the dest, we still need an extend.
10879 if (N0.getOperand(0).getValueType().bitsLT(VT))
10880 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10881 // if the source is larger than the dest, than we just need the truncate.
10882 if (N0.getOperand(0).getValueType().bitsGT(VT))
10883 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10884 // if the source and dest are the same type, we can drop both the extend
10885 // and the truncate.
10886 return N0.getOperand(0);
10887 }
10888
10889 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
10890 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
10891 return SDValue();
10892
10893 // Fold extract-and-trunc into a narrow extract. For example:
10894 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
10895 // i32 y = TRUNCATE(i64 x)
10896 // -- becomes --
10897 // v16i8 b = BITCAST (v2i64 val)
10898 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
10899 //
10900 // Note: We only run this optimization after type legalization (which often
10901 // creates this pattern) and before operation legalization after which
10902 // we need to be more careful about the vector instructions that we generate.
10903 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
10904 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
10905 EVT VecTy = N0.getOperand(0).getValueType();
10906 EVT ExTy = N0.getValueType();
10907 EVT TrTy = N->getValueType(0);
10908
10909 unsigned NumElem = VecTy.getVectorNumElements();
10910 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
10911
10912 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
10913 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size")((NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"
) ? static_cast<void> (0) : __assert_fail ("NVT.getSizeInBits() == VecTy.getSizeInBits() && \"Invalid Size\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10913, __PRETTY_FUNCTION__))
;
10914
10915 SDValue EltNo = N0->getOperand(1);
10916 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
10917 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
10918 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
10919
10920 SDLoc DL(N);
10921 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
10922 DAG.getBitcast(NVT, N0.getOperand(0)),
10923 DAG.getVectorIdxConstant(Index, DL));
10924 }
10925 }
10926
10927 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
10928 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
10929 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
10930 TLI.isTruncateFree(SrcVT, VT)) {
10931 SDLoc SL(N0);
10932 SDValue Cond = N0.getOperand(0);
10933 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10934 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
10935 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
10936 }
10937 }
10938
10939 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
10940 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10941 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
10942 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
10943 SDValue Amt = N0.getOperand(1);
10944 KnownBits Known = DAG.computeKnownBits(Amt);
10945 unsigned Size = VT.getScalarSizeInBits();
10946 if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
10947 SDLoc SL(N);
10948 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
10949
10950 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10951 if (AmtVT != Amt.getValueType()) {
10952 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
10953 AddToWorklist(Amt.getNode());
10954 }
10955 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
10956 }
10957 }
10958
10959 // Attempt to pre-truncate BUILD_VECTOR sources.
10960 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
10961 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
10962 SDLoc DL(N);
10963 EVT SVT = VT.getScalarType();
10964 SmallVector<SDValue, 8> TruncOps;
10965 for (const SDValue &Op : N0->op_values()) {
10966 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
10967 TruncOps.push_back(TruncOp);
10968 }
10969 return DAG.getBuildVector(VT, DL, TruncOps);
10970 }
10971
10972 // Fold a series of buildvector, bitcast, and truncate if possible.
10973 // For example fold
10974 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
10975 // (2xi32 (buildvector x, y)).
10976 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
10977 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
10978 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
10979 N0.getOperand(0).hasOneUse()) {
10980 SDValue BuildVect = N0.getOperand(0);
10981 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
10982 EVT TruncVecEltTy = VT.getVectorElementType();
10983
10984 // Check that the element types match.
10985 if (BuildVectEltTy == TruncVecEltTy) {
10986 // Now we only need to compute the offset of the truncated elements.
10987 unsigned BuildVecNumElts = BuildVect.getNumOperands();
10988 unsigned TruncVecNumElts = VT.getVectorNumElements();
10989 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
10990
10991 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&(((BuildVecNumElts % TruncVecNumElts) == 0 && "Invalid number of elements"
) ? static_cast<void> (0) : __assert_fail ("(BuildVecNumElts % TruncVecNumElts) == 0 && \"Invalid number of elements\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10992, __PRETTY_FUNCTION__))
10992 "Invalid number of elements")(((BuildVecNumElts % TruncVecNumElts) == 0 && "Invalid number of elements"
) ? static_cast<void> (0) : __assert_fail ("(BuildVecNumElts % TruncVecNumElts) == 0 && \"Invalid number of elements\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 10992, __PRETTY_FUNCTION__))
;
10993
10994 SmallVector<SDValue, 8> Opnds;
10995 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
10996 Opnds.push_back(BuildVect.getOperand(i));
10997
10998 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
10999 }
11000 }
11001
11002 // See if we can simplify the input to this truncate through knowledge that
11003 // only the low bits are being used.
11004 // For example "trunc (or (shl x, 8), y)" // -> trunc y
11005 // Currently we only perform this optimization on scalars because vectors
11006 // may have different active low bits.
11007 if (!VT.isVector()) {
11008 APInt Mask =
11009 APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
11010 if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
11011 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
11012 }
11013
11014 // fold (truncate (load x)) -> (smaller load x)
11015 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
11016 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
11017 if (SDValue Reduced = ReduceLoadWidth(N))
11018 return Reduced;
11019
11020 // Handle the case where the load remains an extending load even
11021 // after truncation.
11022 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
11023 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11024 if (LN0->isSimple() &&
11025 LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
11026 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
11027 VT, LN0->getChain(), LN0->getBasePtr(),
11028 LN0->getMemoryVT(),
11029 LN0->getMemOperand());
11030 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
11031 return NewLoad;
11032 }
11033 }
11034 }
11035
11036 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
11037 // where ... are all 'undef'.
11038 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
11039 SmallVector<EVT, 8> VTs;
11040 SDValue V;
11041 unsigned Idx = 0;
11042 unsigned NumDefs = 0;
11043
11044 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
11045 SDValue X = N0.getOperand(i);
11046 if (!X.isUndef()) {
11047 V = X;
11048 Idx = i;
11049 NumDefs++;
11050 }
11051 // Stop if more than one members are non-undef.
11052 if (NumDefs > 1)
11053 break;
11054 VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
11055 VT.getVectorElementType(),
11056 X.getValueType().getVectorNumElements()));
11057 }
11058
11059 if (NumDefs == 0)
11060 return DAG.getUNDEF(VT);
11061
11062 if (NumDefs == 1) {
11063 assert(V.getNode() && "The single defined operand is empty!")((V.getNode() && "The single defined operand is empty!"
) ? static_cast<void> (0) : __assert_fail ("V.getNode() && \"The single defined operand is empty!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11063, __PRETTY_FUNCTION__))
;
11064 SmallVector<SDValue, 8> Opnds;
11065 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
11066 if (i != Idx) {
11067 Opnds.push_back(DAG.getUNDEF(VTs[i]));
11068 continue;
11069 }
11070 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
11071 AddToWorklist(NV.getNode());
11072 Opnds.push_back(NV);
11073 }
11074 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
11075 }
11076 }
11077
11078 // Fold truncate of a bitcast of a vector to an extract of the low vector
11079 // element.
11080 //
11081 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
11082 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
11083 SDValue VecSrc = N0.getOperand(0);
11084 EVT VecSrcVT = VecSrc.getValueType();
11085 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
11086 (!LegalOperations ||
11087 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
11088 SDLoc SL(N);
11089
11090 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
11091 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
11092 DAG.getVectorIdxConstant(Idx, SL));
11093 }
11094 }
11095
11096 // Simplify the operands using demanded-bits information.
11097 if (!VT.isVector() &&
11098 SimplifyDemandedBits(SDValue(N, 0)))
11099 return SDValue(N, 0);
11100
11101 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
11102 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
11103 // When the adde's carry is not used.
11104 if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
11105 N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
11106 // We only do for addcarry before legalize operation
11107 ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
11108 TLI.isOperationLegal(N0.getOpcode(), VT))) {
11109 SDLoc SL(N);
11110 auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
11111 auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
11112 auto VTs = DAG.getVTList(VT, N0->getValueType(1));
11113 return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
11114 }
11115
11116 // fold (truncate (extract_subvector(ext x))) ->
11117 // (extract_subvector x)
11118 // TODO: This can be generalized to cover cases where the truncate and extract
11119 // do not fully cancel each other out.
11120 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
11121 SDValue N00 = N0.getOperand(0);
11122 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
11123 N00.getOpcode() == ISD::ZERO_EXTEND ||
11124 N00.getOpcode() == ISD::ANY_EXTEND) {
11125 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
11126 VT.getVectorElementType())
11127 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
11128 N00.getOperand(0), N0.getOperand(1));
11129 }
11130 }
11131
11132 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11133 return NewVSel;
11134
11135 // Narrow a suitable binary operation with a non-opaque constant operand by
11136 // moving it ahead of the truncate. This is limited to pre-legalization
11137 // because targets may prefer a wider type during later combines and invert
11138 // this transform.
11139 switch (N0.getOpcode()) {
11140 case ISD::ADD:
11141 case ISD::SUB:
11142 case ISD::MUL:
11143 case ISD::AND:
11144 case ISD::OR:
11145 case ISD::XOR:
11146 if (!LegalOperations && N0.hasOneUse() &&
11147 (isConstantOrConstantVector(N0.getOperand(0), true) ||
11148 isConstantOrConstantVector(N0.getOperand(1), true))) {
11149 // TODO: We already restricted this to pre-legalization, but for vectors
11150 // we are extra cautious to not create an unsupported operation.
11151 // Target-specific changes are likely needed to avoid regressions here.
11152 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
11153 SDLoc DL(N);
11154 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
11155 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
11156 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
11157 }
11158 }
11159 }
11160
11161 return SDValue();
11162}
11163
11164static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
11165 SDValue Elt = N->getOperand(i);
11166 if (Elt.getOpcode() != ISD::MERGE_VALUES)
11167 return Elt.getNode();
11168 return Elt.getOperand(Elt.getResNo()).getNode();
11169}
11170
11171/// build_pair (load, load) -> load
11172/// if load locations are consecutive.
11173SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
11174 assert(N->getOpcode() == ISD::BUILD_PAIR)((N->getOpcode() == ISD::BUILD_PAIR) ? static_cast<void
> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_PAIR"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11174, __PRETTY_FUNCTION__))
;
11175
11176 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
11177 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
11178
11179 // A BUILD_PAIR is always having the least significant part in elt 0 and the
11180 // most significant part in elt 1. So when combining into one large load, we
11181 // need to consider the endianness.
11182 if (DAG.getDataLayout().isBigEndian())
11183 std::swap(LD1, LD2);
11184
11185 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
11186 LD1->getAddressSpace() != LD2->getAddressSpace())
11187 return SDValue();
11188 EVT LD1VT = LD1->getValueType(0);
11189 unsigned LD1Bytes = LD1VT.getStoreSize();
11190 if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
11191 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
11192 unsigned Align = LD1->getAlignment();
11193 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
11194 VT.getTypeForEVT(*DAG.getContext()));
11195
11196 if (NewAlign <= Align &&
11197 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
11198 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
11199 LD1->getPointerInfo(), Align);
11200 }
11201
11202 return SDValue();
11203}
11204
11205static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
11206 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
11207 // and Lo parts; on big-endian machines it doesn't.
11208 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
11209}
11210
11211static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
11212 const TargetLowering &TLI) {
11213 // If this is not a bitcast to an FP type or if the target doesn't have
11214 // IEEE754-compliant FP logic, we're done.
11215 EVT VT = N->getValueType(0);
11216 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
11217 return SDValue();
11218
11219 // TODO: Handle cases where the integer constant is a different scalar
11220 // bitwidth to the FP.
11221 SDValue N0 = N->getOperand(0);
11222 EVT SourceVT = N0.getValueType();
11223 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
11224 return SDValue();
11225
11226 unsigned FPOpcode;
11227 APInt SignMask;
11228 switch (N0.getOpcode()) {
11229 case ISD::AND:
11230 FPOpcode = ISD::FABS;
11231 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
11232 break;
11233 case ISD::XOR:
11234 FPOpcode = ISD::FNEG;
11235 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
11236 break;
11237 case ISD::OR:
11238 FPOpcode = ISD::FABS;
11239 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
11240 break;
11241 default:
11242 return SDValue();
11243 }
11244
11245 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
11246 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
11247 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
11248 // fneg (fabs X)
11249 SDValue LogicOp0 = N0.getOperand(0);
11250 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
11251 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
11252 LogicOp0.getOpcode() == ISD::BITCAST &&
11253 LogicOp0.getOperand(0).getValueType() == VT) {
11254 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
11255 NumFPLogicOpsConv++;
11256 if (N0.getOpcode() == ISD::OR)
11257 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
11258 return FPOp;
11259 }
11260
11261 return SDValue();
11262}
11263
11264SDValue DAGCombiner::visitBITCAST(SDNode *N) {
11265 SDValue N0 = N->getOperand(0);
11266 EVT VT = N->getValueType(0);
11267
11268 if (N0.isUndef())
11269 return DAG.getUNDEF(VT);
11270
11271 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
11272 // Only do this before legalize types, unless both types are integer and the
11273 // scalar type is legal. Only do this before legalize ops, since the target
11274 // maybe depending on the bitcast.
11275 // First check to see if this is all constant.
11276 // TODO: Support FP bitcasts after legalize types.
11277 if (VT.isVector() &&
11278 (!LegalTypes ||
11279 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
11280 TLI.isTypeLegal(VT.getVectorElementType()))) &&
11281 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
11282 cast<BuildVectorSDNode>(N0)->isConstant())
11283 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
11284 VT.getVectorElementType());
11285
11286 // If the input is a constant, let getNode fold it.
11287 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
11288 // If we can't allow illegal operations, we need to check that this is just
11289 // a fp -> int or int -> conversion and that the resulting operation will
11290 // be legal.
11291 if (!LegalOperations ||
11292 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
11293 TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
11294 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
11295 TLI.isOperationLegal(ISD::Constant, VT))) {
11296 SDValue C = DAG.getBitcast(VT, N0);
11297 if (C.getNode() != N)
11298 return C;
11299 }
11300 }
11301
11302 // (conv (conv x, t1), t2) -> (conv x, t2)
11303 if (N0.getOpcode() == ISD::BITCAST)
11304 return DAG.getBitcast(VT, N0.getOperand(0));
11305
11306 // fold (conv (load x)) -> (load (conv*)x)
11307 // If the resultant load doesn't need a higher alignment than the original!
11308 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11309 // Do not remove the cast if the types differ in endian layout.
11310 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
11311 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
11312 // If the load is volatile, we only want to change the load type if the
11313 // resulting load is legal. Otherwise we might increase the number of
11314 // memory accesses. We don't care if the original type was legal or not
11315 // as we assume software couldn't rely on the number of accesses of an
11316 // illegal type.
11317 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
11318 TLI.isOperationLegal(ISD::LOAD, VT))) {
11319 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11320
11321 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
11322 *LN0->getMemOperand())) {
11323 SDValue Load =
11324 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
11325 LN0->getPointerInfo(), LN0->getAlignment(),
11326 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11327 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11328 return Load;
11329 }
11330 }
11331
11332 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
11333 return V;
11334
11335 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
11336 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
11337 //
11338 // For ppc_fp128:
11339 // fold (bitcast (fneg x)) ->
11340 // flipbit = signbit
11341 // (xor (bitcast x) (build_pair flipbit, flipbit))
11342 //
11343 // fold (bitcast (fabs x)) ->
11344 // flipbit = (and (extract_element (bitcast x), 0), signbit)
11345 // (xor (bitcast x) (build_pair flipbit, flipbit))
11346 // This often reduces constant pool loads.
11347 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
11348 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
11349 N0.getNode()->hasOneUse() && VT.isInteger() &&
11350 !VT.isVector() && !N0.getValueType().isVector()) {
11351 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
11352 AddToWorklist(NewConv.getNode());
11353
11354 SDLoc DL(N);
11355 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11356 assert(VT.getSizeInBits() == 128)((VT.getSizeInBits() == 128) ? static_cast<void> (0) : __assert_fail
("VT.getSizeInBits() == 128", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11356, __PRETTY_FUNCTION__))
;
11357 SDValue SignBit = DAG.getConstant(
11358 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
11359 SDValue FlipBit;
11360 if (N0.getOpcode() == ISD::FNEG) {
11361 FlipBit = SignBit;
11362 AddToWorklist(FlipBit.getNode());
11363 } else {
11364 assert(N0.getOpcode() == ISD::FABS)((N0.getOpcode() == ISD::FABS) ? static_cast<void> (0) :
__assert_fail ("N0.getOpcode() == ISD::FABS", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11364, __PRETTY_FUNCTION__))
;
11365 SDValue Hi =
11366 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
11367 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11368 SDLoc(NewConv)));
11369 AddToWorklist(Hi.getNode());
11370 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
11371 AddToWorklist(FlipBit.getNode());
11372 }
11373 SDValue FlipBits =
11374 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11375 AddToWorklist(FlipBits.getNode());
11376 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
11377 }
11378 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11379 if (N0.getOpcode() == ISD::FNEG)
11380 return DAG.getNode(ISD::XOR, DL, VT,
11381 NewConv, DAG.getConstant(SignBit, DL, VT));
11382 assert(N0.getOpcode() == ISD::FABS)((N0.getOpcode() == ISD::FABS) ? static_cast<void> (0) :
__assert_fail ("N0.getOpcode() == ISD::FABS", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11382, __PRETTY_FUNCTION__))
;
11383 return DAG.getNode(ISD::AND, DL, VT,
11384 NewConv, DAG.getConstant(~SignBit, DL, VT));
11385 }
11386
11387 // fold (bitconvert (fcopysign cst, x)) ->
11388 // (or (and (bitconvert x), sign), (and cst, (not sign)))
11389 // Note that we don't handle (copysign x, cst) because this can always be
11390 // folded to an fneg or fabs.
11391 //
11392 // For ppc_fp128:
11393 // fold (bitcast (fcopysign cst, x)) ->
11394 // flipbit = (and (extract_element
11395 // (xor (bitcast cst), (bitcast x)), 0),
11396 // signbit)
11397 // (xor (bitcast cst) (build_pair flipbit, flipbit))
11398 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
11399 isa<ConstantFPSDNode>(N0.getOperand(0)) &&
11400 VT.isInteger() && !VT.isVector()) {
11401 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
11402 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
11403 if (isTypeLegal(IntXVT)) {
11404 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
11405 AddToWorklist(X.getNode());
11406
11407 // If X has a different width than the result/lhs, sext it or truncate it.
11408 unsigned VTWidth = VT.getSizeInBits();
11409 if (OrigXWidth < VTWidth) {
11410 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
11411 AddToWorklist(X.getNode());
11412 } else if (OrigXWidth > VTWidth) {
11413 // To get the sign bit in the right place, we have to shift it right
11414 // before truncating.
11415 SDLoc DL(X);
11416 X = DAG.getNode(ISD::SRL, DL,
11417 X.getValueType(), X,
11418 DAG.getConstant(OrigXWidth-VTWidth, DL,
11419 X.getValueType()));
11420 AddToWorklist(X.getNode());
11421 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
11422 AddToWorklist(X.getNode());
11423 }
11424
11425 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11426 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
11427 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11428 AddToWorklist(Cst.getNode());
11429 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
11430 AddToWorklist(X.getNode());
11431 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
11432 AddToWorklist(XorResult.getNode());
11433 SDValue XorResult64 = DAG.getNode(
11434 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
11435 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11436 SDLoc(XorResult)));
11437 AddToWorklist(XorResult64.getNode());
11438 SDValue FlipBit =
11439 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
11440 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
11441 AddToWorklist(FlipBit.getNode());
11442 SDValue FlipBits =
11443 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11444 AddToWorklist(FlipBits.getNode());
11445 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
11446 }
11447 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11448 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
11449 X, DAG.getConstant(SignBit, SDLoc(X), VT));
11450 AddToWorklist(X.getNode());
11451
11452 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11453 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
11454 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
11455 AddToWorklist(Cst.getNode());
11456
11457 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
11458 }
11459 }
11460
11461 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
11462 if (N0.getOpcode() == ISD::BUILD_PAIR)
11463 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
11464 return CombineLD;
11465
11466 // Remove double bitcasts from shuffles - this is often a legacy of
11467 // XformToShuffleWithZero being used to combine bitmaskings (of
11468 // float vectors bitcast to integer vectors) into shuffles.
11469 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
11470 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
11471 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
11472 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
11473 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
11474 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
11475
11476 // If operands are a bitcast, peek through if it casts the original VT.
11477 // If operands are a constant, just bitcast back to original VT.
11478 auto PeekThroughBitcast = [&](SDValue Op) {
11479 if (Op.getOpcode() == ISD::BITCAST &&
11480 Op.getOperand(0).getValueType() == VT)
11481 return SDValue(Op.getOperand(0));
11482 if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
11483 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
11484 return DAG.getBitcast(VT, Op);
11485 return SDValue();
11486 };
11487
11488 // FIXME: If either input vector is bitcast, try to convert the shuffle to
11489 // the result type of this bitcast. This would eliminate at least one
11490 // bitcast. See the transform in InstCombine.
11491 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
11492 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
11493 if (!(SV0 && SV1))
11494 return SDValue();
11495
11496 int MaskScale =
11497 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
11498 SmallVector<int, 8> NewMask;
11499 for (int M : SVN->getMask())
11500 for (int i = 0; i != MaskScale; ++i)
11501 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
11502
11503 SDValue LegalShuffle =
11504 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
11505 if (LegalShuffle)
11506 return LegalShuffle;
11507 }
11508
11509 return SDValue();
11510}
11511
11512SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
11513 EVT VT = N->getValueType(0);
11514 return CombineConsecutiveLoads(N, VT);
11515}
11516
11517/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
11518/// operands. DstEltVT indicates the destination element value type.
11519SDValue DAGCombiner::
11520ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
11521 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
11522
11523 // If this is already the right type, we're done.
11524 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
11525
11526 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
11527 unsigned DstBitSize = DstEltVT.getSizeInBits();
11528
11529 // If this is a conversion of N elements of one type to N elements of another
11530 // type, convert each element. This handles FP<->INT cases.
11531 if (SrcBitSize == DstBitSize) {
11532 SmallVector<SDValue, 8> Ops;
11533 for (SDValue Op : BV->op_values()) {
11534 // If the vector element type is not legal, the BUILD_VECTOR operands
11535 // are promoted and implicitly truncated. Make that explicit here.
11536 if (Op.getValueType() != SrcEltVT)
11537 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
11538 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
11539 AddToWorklist(Ops.back().getNode());
11540 }
11541 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11542 BV->getValueType(0).getVectorNumElements());
11543 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
11544 }
11545
11546 // Otherwise, we're growing or shrinking the elements. To avoid having to
11547 // handle annoying details of growing/shrinking FP values, we convert them to
11548 // int first.
11549 if (SrcEltVT.isFloatingPoint()) {
11550 // Convert the input float vector to a int vector where the elements are the
11551 // same sizes.
11552 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
11553 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
11554 SrcEltVT = IntVT;
11555 }
11556
11557 // Now we know the input is an integer vector. If the output is a FP type,
11558 // convert to integer first, then to FP of the right size.
11559 if (DstEltVT.isFloatingPoint()) {
11560 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
11561 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
11562
11563 // Next, convert to FP elements of the same size.
11564 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
11565 }
11566
11567 SDLoc DL(BV);
11568
11569 // Okay, we know the src/dst types are both integers of differing types.
11570 // Handling growing first.
11571 assert(SrcEltVT.isInteger() && DstEltVT.isInteger())((SrcEltVT.isInteger() && DstEltVT.isInteger()) ? static_cast
<void> (0) : __assert_fail ("SrcEltVT.isInteger() && DstEltVT.isInteger()"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 11571, __PRETTY_FUNCTION__))
;
11572 if (SrcBitSize < DstBitSize) {
11573 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
11574
11575 SmallVector<SDValue, 8> Ops;
11576 for (unsigned i = 0, e = BV->getNumOperands(); i != e;
11577 i += NumInputsPerOutput) {
11578 bool isLE = DAG.getDataLayout().isLittleEndian();
11579 APInt NewBits = APInt(DstBitSize, 0);
11580 bool EltIsUndef = true;
11581 for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
11582 // Shift the previously computed bits over.
11583 NewBits <<= SrcBitSize;
11584 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
11585 if (Op.isUndef()) continue;
11586 EltIsUndef = false;
11587
11588 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
11589 zextOrTrunc(SrcBitSize).zext(DstBitSize);
11590 }
11591
11592 if (EltIsUndef)
11593 Ops.push_back(DAG.getUNDEF(DstEltVT));
11594 else
11595 Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
11596 }
11597
11598 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
11599 return DAG.getBuildVector(VT, DL, Ops);
11600 }
11601
11602 // Finally, this must be the case where we are shrinking elements: each input
11603 // turns into multiple outputs.
11604 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
11605 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11606 NumOutputsPerInput*BV->getNumOperands());
11607 SmallVector<SDValue, 8> Ops;
11608
11609 for (const SDValue &Op : BV->op_values()) {
11610 if (Op.isUndef()) {
11611 Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
11612 continue;
11613 }
11614
11615 APInt OpVal = cast<ConstantSDNode>(Op)->
11616 getAPIntValue().zextOrTrunc(SrcBitSize);
11617
11618 for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
11619 APInt ThisVal = OpVal.trunc(DstBitSize);
11620 Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
11621 OpVal.lshrInPlace(DstBitSize);
11622 }
11623
11624 // For big endian targets, swap the order of the pieces of each element.
11625 if (DAG.getDataLayout().isBigEndian())
11626 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
11627 }
11628
11629 return DAG.getBuildVector(VT, DL, Ops);
11630}
11631
11632static bool isContractable(SDNode *N) {
11633 SDNodeFlags F = N->getFlags();
11634 return F.hasAllowContract() || F.hasAllowReassociation();
11635}
11636
11637/// Try to perform FMA combining on a given FADD node.
11638SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
11639 SDValue N0 = N->getOperand(0);
11640 SDValue N1 = N->getOperand(1);
11641 EVT VT = N->getValueType(0);
11642 SDLoc SL(N);
11643
11644 const TargetOptions &Options = DAG.getTarget().Options;
11645
11646 // Floating-point multiply-add with intermediate rounding.
11647 bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
11648
11649 // Floating-point multiply-add without intermediate rounding.
11650 bool HasFMA =
11651 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
11652 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11653
11654 // No valid opcode, do not combine.
11655 if (!HasFMAD && !HasFMA)
11656 return SDValue();
11657
11658 SDNodeFlags Flags = N->getFlags();
11659 bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11660 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11661 CanFuse || HasFMAD);
11662 // If the addition is not contractable, do not combine.
11663 if (!AllowFusionGlobally && !isContractable(N))
11664 return SDValue();
11665
11666 const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11667 if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11668 return SDValue();
11669
11670 // Always prefer FMAD to FMA for precision.
11671 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11672 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11673
11674 // Is the node an FMUL and contractable either due to global flags or
11675 // SDNodeFlags.
11676 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11677 if (N.getOpcode() != ISD::FMUL)
11678 return false;
11679 return AllowFusionGlobally || isContractable(N.getNode());
11680 };
11681 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
11682 // prefer to fold the multiply with fewer uses.
11683 if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
11684 if (N0.getNode()->use_size() > N1.getNode()->use_size())
11685 std::swap(N0, N1);
11686 }
11687
11688 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
11689 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11690 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11691 N0.getOperand(0), N0.getOperand(1), N1, Flags);
11692 }
11693
11694 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
11695 // Note: Commutes FADD operands.
11696 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11697 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11698 N1.getOperand(0), N1.getOperand(1), N0, Flags);
11699 }
11700
11701 // Look through FP_EXTEND nodes to do more combining.
11702
11703 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
11704 if (N0.getOpcode() == ISD::FP_EXTEND) {
11705 SDValue N00 = N0.getOperand(0);
11706 if (isContractableFMUL(N00) &&
11707 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11708 N00.getValueType())) {
11709 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11710 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11711 N00.getOperand(0)),
11712 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11713 N00.getOperand(1)), N1, Flags);
11714 }
11715 }
11716
11717 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
11718 // Note: Commutes FADD operands.
11719 if (N1.getOpcode() == ISD::FP_EXTEND) {
11720 SDValue N10 = N1.getOperand(0);
11721 if (isContractableFMUL(N10) &&
11722 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11723 N10.getValueType())) {
11724 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11725 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11726 N10.getOperand(0)),
11727 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11728 N10.getOperand(1)), N0, Flags);
11729 }
11730 }
11731
11732 // More folding opportunities when target permits.
11733 if (Aggressive) {
11734 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
11735 if (CanFuse &&
11736 N0.getOpcode() == PreferredFusedOpcode &&
11737 N0.getOperand(2).getOpcode() == ISD::FMUL &&
11738 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
11739 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11740 N0.getOperand(0), N0.getOperand(1),
11741 DAG.getNode(PreferredFusedOpcode, SL, VT,
11742 N0.getOperand(2).getOperand(0),
11743 N0.getOperand(2).getOperand(1),
11744 N1, Flags), Flags);
11745 }
11746
11747 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
11748 if (CanFuse &&
11749 N1->getOpcode() == PreferredFusedOpcode &&
11750 N1.getOperand(2).getOpcode() == ISD::FMUL &&
11751 N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
11752 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11753 N1.getOperand(0), N1.getOperand(1),
11754 DAG.getNode(PreferredFusedOpcode, SL, VT,
11755 N1.getOperand(2).getOperand(0),
11756 N1.getOperand(2).getOperand(1),
11757 N0, Flags), Flags);
11758 }
11759
11760
11761 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
11762 // -> (fma x, y, (fma (fpext u), (fpext v), z))
11763 auto FoldFAddFMAFPExtFMul = [&] (
11764 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
11765 SDNodeFlags Flags) {
11766 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
11767 DAG.getNode(PreferredFusedOpcode, SL, VT,
11768 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11769 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11770 Z, Flags), Flags);
11771 };
11772 if (N0.getOpcode() == PreferredFusedOpcode) {
11773 SDValue N02 = N0.getOperand(2);
11774 if (N02.getOpcode() == ISD::FP_EXTEND) {
11775 SDValue N020 = N02.getOperand(0);
11776 if (isContractableFMUL(N020) &&
11777 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11778 N020.getValueType())) {
11779 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
11780 N020.getOperand(0), N020.getOperand(1),
11781 N1, Flags);
11782 }
11783 }
11784 }
11785
11786 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
11787 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
11788 // FIXME: This turns two single-precision and one double-precision
11789 // operation into two double-precision operations, which might not be
11790 // interesting for all targets, especially GPUs.
11791 auto FoldFAddFPExtFMAFMul = [&] (
11792 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
11793 SDNodeFlags Flags) {
11794 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11795 DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
11796 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
11797 DAG.getNode(PreferredFusedOpcode, SL, VT,
11798 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11799 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11800 Z, Flags), Flags);
11801 };
11802 if (N0.getOpcode() == ISD::FP_EXTEND) {
11803 SDValue N00 = N0.getOperand(0);
11804 if (N00.getOpcode() == PreferredFusedOpcode) {
11805 SDValue N002 = N00.getOperand(2);
11806 if (isContractableFMUL(N002) &&
11807 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11808 N00.getValueType())) {
11809 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
11810 N002.getOperand(0), N002.getOperand(1),
11811 N1, Flags);
11812 }
11813 }
11814 }
11815
11816 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
11817 // -> (fma y, z, (fma (fpext u), (fpext v), x))
11818 if (N1.getOpcode() == PreferredFusedOpcode) {
11819 SDValue N12 = N1.getOperand(2);
11820 if (N12.getOpcode() == ISD::FP_EXTEND) {
11821 SDValue N120 = N12.getOperand(0);
11822 if (isContractableFMUL(N120) &&
11823 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11824 N120.getValueType())) {
11825 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
11826 N120.getOperand(0), N120.getOperand(1),
11827 N0, Flags);
11828 }
11829 }
11830 }
11831
11832 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
11833 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
11834 // FIXME: This turns two single-precision and one double-precision
11835 // operation into two double-precision operations, which might not be
11836 // interesting for all targets, especially GPUs.
11837 if (N1.getOpcode() == ISD::FP_EXTEND) {
11838 SDValue N10 = N1.getOperand(0);
11839 if (N10.getOpcode() == PreferredFusedOpcode) {
11840 SDValue N102 = N10.getOperand(2);
11841 if (isContractableFMUL(N102) &&
11842 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11843 N10.getValueType())) {
11844 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
11845 N102.getOperand(0), N102.getOperand(1),
11846 N0, Flags);
11847 }
11848 }
11849 }
11850 }
11851
11852 return SDValue();
11853}
11854
11855/// Try to perform FMA combining on a given FSUB node.
11856SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
11857 SDValue N0 = N->getOperand(0);
11858 SDValue N1 = N->getOperand(1);
11859 EVT VT = N->getValueType(0);
11860 SDLoc SL(N);
11861
11862 const TargetOptions &Options = DAG.getTarget().Options;
11863 // Floating-point multiply-add with intermediate rounding.
11864 bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
11865
11866 // Floating-point multiply-add without intermediate rounding.
11867 bool HasFMA =
11868 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
11869 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11870
11871 // No valid opcode, do not combine.
11872 if (!HasFMAD && !HasFMA)
11873 return SDValue();
11874
11875 const SDNodeFlags Flags = N->getFlags();
11876 bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11877 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11878 CanFuse || HasFMAD);
11879
11880 // If the subtraction is not contractable, do not combine.
11881 if (!AllowFusionGlobally && !isContractable(N))
11882 return SDValue();
11883
11884 const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11885 if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11886 return SDValue();
11887
11888 // Always prefer FMAD to FMA for precision.
11889 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11890 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11891
11892 // Is the node an FMUL and contractable either due to global flags or
11893 // SDNodeFlags.
11894 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11895 if (N.getOpcode() != ISD::FMUL)
11896 return false;
11897 return AllowFusionGlobally || isContractable(N.getNode());
11898 };
11899
11900 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
11901 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11902 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11903 N0.getOperand(0), N0.getOperand(1),
11904 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11905 }
11906
11907 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
11908 // Note: Commutes FSUB operands.
11909 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11910 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11911 DAG.getNode(ISD::FNEG, SL, VT,
11912 N1.getOperand(0)),
11913 N1.getOperand(1), N0, Flags);
11914 }
11915
11916 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
11917 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
11918 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
11919 SDValue N00 = N0.getOperand(0).getOperand(0);
11920 SDValue N01 = N0.getOperand(0).getOperand(1);
11921 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11922 DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
11923 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11924 }
11925
11926 // Look through FP_EXTEND nodes to do more combining.
11927
11928 // fold (fsub (fpext (fmul x, y)), z)
11929 // -> (fma (fpext x), (fpext y), (fneg z))
11930 if (N0.getOpcode() == ISD::FP_EXTEND) {
11931 SDValue N00 = N0.getOperand(0);
11932 if (isContractableFMUL(N00) &&
11933 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11934 N00.getValueType())) {
11935 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11936 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11937 N00.getOperand(0)),
11938 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11939 N00.getOperand(1)),
11940 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11941 }
11942 }
11943
11944 // fold (fsub x, (fpext (fmul y, z)))
11945 // -> (fma (fneg (fpext y)), (fpext z), x)
11946 // Note: Commutes FSUB operands.
11947 if (N1.getOpcode() == ISD::FP_EXTEND) {
11948 SDValue N10 = N1.getOperand(0);
11949 if (isContractableFMUL(N10) &&
11950 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11951 N10.getValueType())) {
11952 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11953 DAG.getNode(ISD::FNEG, SL, VT,
11954 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11955 N10.getOperand(0))),
11956 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11957 N10.getOperand(1)),
11958 N0, Flags);
11959 }
11960 }
11961
11962 // fold (fsub (fpext (fneg (fmul, x, y))), z)
11963 // -> (fneg (fma (fpext x), (fpext y), z))
11964 // Note: This could be removed with appropriate canonicalization of the
11965 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11966 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11967 // from implementing the canonicalization in visitFSUB.
11968 if (N0.getOpcode() == ISD::FP_EXTEND) {
11969 SDValue N00 = N0.getOperand(0);
11970 if (N00.getOpcode() == ISD::FNEG) {
11971 SDValue N000 = N00.getOperand(0);
11972 if (isContractableFMUL(N000) &&
11973 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11974 N00.getValueType())) {
11975 return DAG.getNode(ISD::FNEG, SL, VT,
11976 DAG.getNode(PreferredFusedOpcode, SL, VT,
11977 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11978 N000.getOperand(0)),
11979 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11980 N000.getOperand(1)),
11981 N1, Flags));
11982 }
11983 }
11984 }
11985
11986 // fold (fsub (fneg (fpext (fmul, x, y))), z)
11987 // -> (fneg (fma (fpext x)), (fpext y), z)
11988 // Note: This could be removed with appropriate canonicalization of the
11989 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11990 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11991 // from implementing the canonicalization in visitFSUB.
11992 if (N0.getOpcode() == ISD::FNEG) {
11993 SDValue N00 = N0.getOperand(0);
11994 if (N00.getOpcode() == ISD::FP_EXTEND) {
11995 SDValue N000 = N00.getOperand(0);
11996 if (isContractableFMUL(N000) &&
11997 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
11998 N000.getValueType())) {
11999 return DAG.getNode(ISD::FNEG, SL, VT,
12000 DAG.getNode(PreferredFusedOpcode, SL, VT,
12001 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12002 N000.getOperand(0)),
12003 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12004 N000.getOperand(1)),
12005 N1, Flags));
12006 }
12007 }
12008 }
12009
12010 // More folding opportunities when target permits.
12011 if (Aggressive) {
12012 // fold (fsub (fma x, y, (fmul u, v)), z)
12013 // -> (fma x, y (fma u, v, (fneg z)))
12014 if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
12015 isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
12016 N0.getOperand(2)->hasOneUse()) {
12017 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12018 N0.getOperand(0), N0.getOperand(1),
12019 DAG.getNode(PreferredFusedOpcode, SL, VT,
12020 N0.getOperand(2).getOperand(0),
12021 N0.getOperand(2).getOperand(1),
12022 DAG.getNode(ISD::FNEG, SL, VT,
12023 N1), Flags), Flags);
12024 }
12025
12026 // fold (fsub x, (fma y, z, (fmul u, v)))
12027 // -> (fma (fneg y), z, (fma (fneg u), v, x))
12028 if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
12029 isContractableFMUL(N1.getOperand(2)) &&
12030 N1->hasOneUse()) {
12031 SDValue N20 = N1.getOperand(2).getOperand(0);
12032 SDValue N21 = N1.getOperand(2).getOperand(1);
12033 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12034 DAG.getNode(ISD::FNEG, SL, VT,
12035 N1.getOperand(0)),
12036 N1.getOperand(1),
12037 DAG.getNode(PreferredFusedOpcode, SL, VT,
12038 DAG.getNode(ISD::FNEG, SL, VT, N20),
12039 N21, N0, Flags), Flags);
12040 }
12041
12042
12043 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
12044 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
12045 if (N0.getOpcode() == PreferredFusedOpcode &&
12046 N0->hasOneUse()) {
12047 SDValue N02 = N0.getOperand(2);
12048 if (N02.getOpcode() == ISD::FP_EXTEND) {
12049 SDValue N020 = N02.getOperand(0);
12050 if (isContractableFMUL(N020) &&
12051 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12052 N020.getValueType())) {
12053 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12054 N0.getOperand(0), N0.getOperand(1),
12055 DAG.getNode(PreferredFusedOpcode, SL, VT,
12056 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12057 N020.getOperand(0)),
12058 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12059 N020.getOperand(1)),
12060 DAG.getNode(ISD::FNEG, SL, VT,
12061 N1), Flags), Flags);
12062 }
12063 }
12064 }
12065
12066 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
12067 // -> (fma (fpext x), (fpext y),
12068 // (fma (fpext u), (fpext v), (fneg z)))
12069 // FIXME: This turns two single-precision and one double-precision
12070 // operation into two double-precision operations, which might not be
12071 // interesting for all targets, especially GPUs.
12072 if (N0.getOpcode() == ISD::FP_EXTEND) {
12073 SDValue N00 = N0.getOperand(0);
12074 if (N00.getOpcode() == PreferredFusedOpcode) {
12075 SDValue N002 = N00.getOperand(2);
12076 if (isContractableFMUL(N002) &&
12077 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12078 N00.getValueType())) {
12079 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12080 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12081 N00.getOperand(0)),
12082 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12083 N00.getOperand(1)),
12084 DAG.getNode(PreferredFusedOpcode, SL, VT,
12085 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12086 N002.getOperand(0)),
12087 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12088 N002.getOperand(1)),
12089 DAG.getNode(ISD::FNEG, SL, VT,
12090 N1), Flags), Flags);
12091 }
12092 }
12093 }
12094
12095 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
12096 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
12097 if (N1.getOpcode() == PreferredFusedOpcode &&
12098 N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
12099 N1->hasOneUse()) {
12100 SDValue N120 = N1.getOperand(2).getOperand(0);
12101 if (isContractableFMUL(N120) &&
12102 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12103 N120.getValueType())) {
12104 SDValue N1200 = N120.getOperand(0);
12105 SDValue N1201 = N120.getOperand(1);
12106 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12107 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
12108 N1.getOperand(1),
12109 DAG.getNode(PreferredFusedOpcode, SL, VT,
12110 DAG.getNode(ISD::FNEG, SL, VT,
12111 DAG.getNode(ISD::FP_EXTEND, SL,
12112 VT, N1200)),
12113 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12114 N1201),
12115 N0, Flags), Flags);
12116 }
12117 }
12118
12119 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
12120 // -> (fma (fneg (fpext y)), (fpext z),
12121 // (fma (fneg (fpext u)), (fpext v), x))
12122 // FIXME: This turns two single-precision and one double-precision
12123 // operation into two double-precision operations, which might not be
12124 // interesting for all targets, especially GPUs.
12125 if (N1.getOpcode() == ISD::FP_EXTEND &&
12126 N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
12127 SDValue CvtSrc = N1.getOperand(0);
12128 SDValue N100 = CvtSrc.getOperand(0);
12129 SDValue N101 = CvtSrc.getOperand(1);
12130 SDValue N102 = CvtSrc.getOperand(2);
12131 if (isContractableFMUL(N102) &&
12132 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12133 CvtSrc.getValueType())) {
12134 SDValue N1020 = N102.getOperand(0);
12135 SDValue N1021 = N102.getOperand(1);
12136 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12137 DAG.getNode(ISD::FNEG, SL, VT,
12138 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12139 N100)),
12140 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
12141 DAG.getNode(PreferredFusedOpcode, SL, VT,
12142 DAG.getNode(ISD::FNEG, SL, VT,
12143 DAG.getNode(ISD::FP_EXTEND, SL,
12144 VT, N1020)),
12145 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12146 N1021),
12147 N0, Flags), Flags);
12148 }
12149 }
12150 }
12151
12152 return SDValue();
12153}
12154
12155/// Try to perform FMA combining on a given FMUL node based on the distributive
12156/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
12157/// subtraction instead of addition).
12158SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
12159 SDValue N0 = N->getOperand(0);
12160 SDValue N1 = N->getOperand(1);
12161 EVT VT = N->getValueType(0);
12162 SDLoc SL(N);
12163 const SDNodeFlags Flags = N->getFlags();
12164
12165 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation")((N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::FMUL && \"Expected FMUL Operation\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 12165, __PRETTY_FUNCTION__))
;
12166
12167 const TargetOptions &Options = DAG.getTarget().Options;
12168
12169 // The transforms below are incorrect when x == 0 and y == inf, because the
12170 // intermediate multiplication produces a nan.
12171 if (!Options.NoInfsFPMath)
12172 return SDValue();
12173
12174 // Floating-point multiply-add without intermediate rounding.
12175 bool HasFMA =
12176 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
12177 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12178 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12179
12180 // Floating-point multiply-add with intermediate rounding. This can result
12181 // in a less precise result due to the changed rounding order.
12182 bool HasFMAD = Options.UnsafeFPMath &&
12183 (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
12184
12185 // No valid opcode, do not combine.
12186 if (!HasFMAD && !HasFMA)
12187 return SDValue();
12188
12189 // Always prefer FMAD to FMA for precision.
12190 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12191 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12192
12193 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
12194 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
12195 auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
12196 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
12197 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
12198 if (C->isExactlyValue(+1.0))
12199 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12200 Y, Flags);
12201 if (C->isExactlyValue(-1.0))
12202 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12203 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12204 }
12205 }
12206 return SDValue();
12207 };
12208
12209 if (SDValue FMA = FuseFADD(N0, N1, Flags))
12210 return FMA;
12211 if (SDValue FMA = FuseFADD(N1, N0, Flags))
12212 return FMA;
12213
12214 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
12215 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
12216 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
12217 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
12218 auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
12219 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
12220 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
12221 if (C0->isExactlyValue(+1.0))
12222 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12223 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
12224 Y, Flags);
12225 if (C0->isExactlyValue(-1.0))
12226 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12227 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
12228 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12229 }
12230 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
12231 if (C1->isExactlyValue(+1.0))
12232 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12233 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12234 if (C1->isExactlyValue(-1.0))
12235 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12236 Y, Flags);
12237 }
12238 }
12239 return SDValue();
12240 };
12241
12242 if (SDValue FMA = FuseFSUB(N0, N1, Flags))
12243 return FMA;
12244 if (SDValue FMA = FuseFSUB(N1, N0, Flags))
12245 return FMA;
12246
12247 return SDValue();
12248}
12249
12250SDValue DAGCombiner::visitFADD(SDNode *N) {
12251 SDValue N0 = N->getOperand(0);
12252 SDValue N1 = N->getOperand(1);
12253 bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12254 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12255 EVT VT = N->getValueType(0);
12256 SDLoc DL(N);
12257 const TargetOptions &Options = DAG.getTarget().Options;
12258 const SDNodeFlags Flags = N->getFlags();
12259
12260 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12261 return R;
12262
12263 // fold vector ops
12264 if (VT.isVector())
12265 if (SDValue FoldedVOp = SimplifyVBinOp(N))
12266 return FoldedVOp;
12267
12268 // fold (fadd c1, c2) -> c1 + c2
12269 if (N0CFP && N1CFP)
12270 return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
12271
12272 // canonicalize constant to RHS
12273 if (N0CFP && !N1CFP)
12274 return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
12275
12276 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
12277 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
12278 if (N1C && N1C->isZero())
12279 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
12280 return N0;
12281
12282 if (SDValue NewSel = foldBinOpIntoSelect(N))
12283 return NewSel;
12284
12285 // fold (fadd A, (fneg B)) -> (fsub A, B)
12286 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
12287 TLI.getNegatibleCost(N1, DAG, LegalOperations, ForCodeSize) ==
12288 TargetLowering::NegatibleCost::Cheaper)
12289 return DAG.getNode(
12290 ISD::FSUB, DL, VT, N0,
12291 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
12292
12293 // fold (fadd (fneg A), B) -> (fsub B, A)
12294 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
12295 TLI.getNegatibleCost(N0, DAG, LegalOperations, ForCodeSize) ==
12296 TargetLowering::NegatibleCost::Cheaper)
12297 return DAG.getNode(
12298 ISD::FSUB, DL, VT, N1,
12299 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags);
12300
12301 auto isFMulNegTwo = [](SDValue FMul) {
12302 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
12303 return false;
12304 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
12305 return C && C->isExactlyValue(-2.0);
12306 };
12307
12308 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
12309 if (isFMulNegTwo(N0)) {
12310 SDValue B = N0.getOperand(0);
12311 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12312 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
12313 }
12314 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
12315 if (isFMulNegTwo(N1)) {
12316 SDValue B = N1.getOperand(0);
12317 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12318 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
12319 }
12320
12321 // No FP constant should be created after legalization as Instruction
12322 // Selection pass has a hard time dealing with FP constants.
12323 bool AllowNewConst = (Level < AfterLegalizeDAG);
12324
12325 // If nnan is enabled, fold lots of things.
12326 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
12327 // If allowed, fold (fadd (fneg x), x) -> 0.0
12328 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
12329 return DAG.getConstantFP(0.0, DL, VT);
12330
12331 // If allowed, fold (fadd x, (fneg x)) -> 0.0
12332 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
12333 return DAG.getConstantFP(0.0, DL, VT);
12334 }
12335
12336 // If 'unsafe math' or reassoc and nsz, fold lots of things.
12337 // TODO: break out portions of the transformations below for which Unsafe is
12338 // considered and which do not require both nsz and reassoc
12339 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12340 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12341 AllowNewConst) {
12342 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
12343 if (N1CFP && N0.getOpcode() == ISD::FADD &&
12344 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
12345 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
12346 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
12347 }
12348
12349 // We can fold chains of FADD's of the same value into multiplications.
12350 // This transform is not safe in general because we are reducing the number
12351 // of rounding steps.
12352 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
12353 if (N0.getOpcode() == ISD::FMUL) {
12354 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12355 bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
12356
12357 // (fadd (fmul x, c), x) -> (fmul x, c+1)
12358 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
12359 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12360 DAG.getConstantFP(1.0, DL, VT), Flags);
12361 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
12362 }
12363
12364 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
12365 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
12366 N1.getOperand(0) == N1.getOperand(1) &&
12367 N0.getOperand(0) == N1.getOperand(0)) {
12368 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12369 DAG.getConstantFP(2.0, DL, VT), Flags);
12370 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
12371 }
12372 }
12373
12374 if (N1.getOpcode() == ISD::FMUL) {
12375 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12376 bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
12377
12378 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
12379 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
12380 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12381 DAG.getConstantFP(1.0, DL, VT), Flags);
12382 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
12383 }
12384
12385 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
12386 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
12387 N0.getOperand(0) == N0.getOperand(1) &&
12388 N1.getOperand(0) == N0.getOperand(0)) {
12389 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12390 DAG.getConstantFP(2.0, DL, VT), Flags);
12391 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
12392 }
12393 }
12394
12395 if (N0.getOpcode() == ISD::FADD) {
12396 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12397 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
12398 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
12399 (N0.getOperand(0) == N1)) {
12400 return DAG.getNode(ISD::FMUL, DL, VT,
12401 N1, DAG.getConstantFP(3.0, DL, VT), Flags);
12402 }
12403 }
12404
12405 if (N1.getOpcode() == ISD::FADD) {
12406 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12407 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
12408 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
12409 N1.getOperand(0) == N0) {
12410 return DAG.getNode(ISD::FMUL, DL, VT,
12411 N0, DAG.getConstantFP(3.0, DL, VT), Flags);
12412 }
12413 }
12414
12415 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
12416 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
12417 N0.getOperand(0) == N0.getOperand(1) &&
12418 N1.getOperand(0) == N1.getOperand(1) &&
12419 N0.getOperand(0) == N1.getOperand(0)) {
12420 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
12421 DAG.getConstantFP(4.0, DL, VT), Flags);
12422 }
12423 }
12424 } // enable-unsafe-fp-math
12425
12426 // FADD -> FMA combines:
12427 if (SDValue Fused = visitFADDForFMACombine(N)) {
12428 AddToWorklist(Fused.getNode());
12429 return Fused;
12430 }
12431 return SDValue();
12432}
12433
12434SDValue DAGCombiner::visitFSUB(SDNode *N) {
12435 SDValue N0 = N->getOperand(0);
12436 SDValue N1 = N->getOperand(1);
12437 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12438 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12439 EVT VT = N->getValueType(0);
12440 SDLoc DL(N);
12441 const TargetOptions &Options = DAG.getTarget().Options;
12442 const SDNodeFlags Flags = N->getFlags();
12443
12444 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12445 return R;
12446
12447 // fold vector ops
12448 if (VT.isVector())
12449 if (SDValue FoldedVOp = SimplifyVBinOp(N))
12450 return FoldedVOp;
12451
12452 // fold (fsub c1, c2) -> c1-c2
12453 if (N0CFP && N1CFP)
12454 return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
12455
12456 if (SDValue NewSel = foldBinOpIntoSelect(N))
12457 return NewSel;
12458
12459 // (fsub A, 0) -> A
12460 if (N1CFP && N1CFP->isZero()) {
12461 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
12462 Flags.hasNoSignedZeros()) {
12463 return N0;
12464 }
12465 }
12466
12467 if (N0 == N1) {
12468 // (fsub x, x) -> 0.0
12469 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
12470 return DAG.getConstantFP(0.0f, DL, VT);
12471 }
12472
12473 // (fsub -0.0, N1) -> -N1
12474 // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
12475 // FSUB does not specify the sign bit of a NaN. Also note that for
12476 // the same reason, the inverse transform is not safe, unless fast math
12477 // flags are in play.
12478 if (N0CFP && N0CFP->isZero()) {
12479 if (N0CFP->isNegative() ||
12480 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
12481 if (TLI.getNegatibleCost(N1, DAG, LegalOperations, ForCodeSize) !=
12482 TargetLowering::NegatibleCost::Expensive)
12483 return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12484 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12485 return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
12486 }
12487 }
12488
12489 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12490 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12491 N1.getOpcode() == ISD::FADD) {
12492 // X - (X + Y) -> -Y
12493 if (N0 == N1->getOperand(0))
12494 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
12495 // X - (Y + X) -> -Y
12496 if (N0 == N1->getOperand(1))
12497 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
12498 }
12499
12500 // fold (fsub A, (fneg B)) -> (fadd A, B)
12501 if (TLI.getNegatibleCost(N1, DAG, LegalOperations, ForCodeSize) !=
12502 TargetLowering::NegatibleCost::Expensive)
12503 return DAG.getNode(
12504 ISD::FADD, DL, VT, N0,
12505 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
12506
12507 // FSUB -> FMA combines:
12508 if (SDValue Fused = visitFSUBForFMACombine(N)) {
12509 AddToWorklist(Fused.getNode());
12510 return Fused;
12511 }
12512
12513 return SDValue();
12514}
12515
12516/// Return true if both inputs are at least as cheap in negated form and at
12517/// least one input is strictly cheaper in negated form.
12518bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {
12519 TargetLowering::NegatibleCost LHSNeg =
12520 TLI.getNegatibleCost(X, DAG, LegalOperations, ForCodeSize);
12521 if (TargetLowering::NegatibleCost::Expensive == LHSNeg)
12522 return false;
12523
12524 TargetLowering::NegatibleCost RHSNeg =
12525 TLI.getNegatibleCost(Y, DAG, LegalOperations, ForCodeSize);
12526 if (TargetLowering::NegatibleCost::Expensive == RHSNeg)
12527 return false;
12528
12529 // Both negated operands are at least as cheap as their counterparts.
12530 // Check to see if at least one is cheaper negated.
12531 return (TargetLowering::NegatibleCost::Cheaper == LHSNeg ||
12532 TargetLowering::NegatibleCost::Cheaper == RHSNeg);
12533}
12534
12535SDValue DAGCombiner::visitFMUL(SDNode *N) {
12536 SDValue N0 = N->getOperand(0);
12537 SDValue N1 = N->getOperand(1);
12538 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12539 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12540 EVT VT = N->getValueType(0);
12541 SDLoc DL(N);
12542 const TargetOptions &Options = DAG.getTarget().Options;
12543 const SDNodeFlags Flags = N->getFlags();
12544
12545 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12546 return R;
12547
12548 // fold vector ops
12549 if (VT.isVector()) {
12550 // This just handles C1 * C2 for vectors. Other vector folds are below.
12551 if (SDValue FoldedVOp = SimplifyVBinOp(N))
12552 return FoldedVOp;
12553 }
12554
12555 // fold (fmul c1, c2) -> c1*c2
12556 if (N0CFP && N1CFP)
12557 return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
12558
12559 // canonicalize constant to RHS
12560 if (isConstantFPBuildVectorOrConstantFP(N0) &&
12561 !isConstantFPBuildVectorOrConstantFP(N1))
12562 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
12563
12564 if (SDValue NewSel = foldBinOpIntoSelect(N))
12565 return NewSel;
12566
12567 if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
12568 (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
12569 // fold (fmul A, 0) -> 0
12570 if (N1CFP && N1CFP->isZero())
12571 return N1;
12572 }
12573
12574 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
12575 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
12576 if (isConstantFPBuildVectorOrConstantFP(N1) &&
12577 N0.getOpcode() == ISD::FMUL) {
12578 SDValue N00 = N0.getOperand(0);
12579 SDValue N01 = N0.getOperand(1);
12580 // Avoid an infinite loop by making sure that N00 is not a constant
12581 // (the inner multiply has not been constant folded yet).
12582 if (isConstantFPBuildVectorOrConstantFP(N01) &&
12583 !isConstantFPBuildVectorOrConstantFP(N00)) {
12584 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
12585 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
12586 }
12587 }
12588
12589 // Match a special-case: we convert X * 2.0 into fadd.
12590 // fmul (fadd X, X), C -> fmul X, 2.0 * C
12591 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
12592 N0.getOperand(0) == N0.getOperand(1)) {
12593 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
12594 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
12595 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
12596 }
12597 }
12598
12599 // fold (fmul X, 2.0) -> (fadd X, X)
12600 if (N1CFP && N1CFP->isExactlyValue(+2.0))
12601 return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
12602
12603 // fold (fmul X, -1.0) -> (fneg X)
12604 if (N1CFP && N1CFP->isExactlyValue(-1.0))
12605 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12606 return DAG.getNode(ISD::FNEG, DL, VT, N0);
12607
12608 // -N0 * -N1 --> N0 * N1
12609 if (isCheaperToUseNegatedFPOps(N0, N1)) {
12610 SDValue NegN0 =
12611 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
12612 SDValue NegN1 =
12613 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12614 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
12615 }
12616
12617 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
12618 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
12619 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
12620 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
12621 TLI.isOperationLegal(ISD::FABS, VT)) {
12622 SDValue Select = N0, X = N1;
12623 if (Select.getOpcode() != ISD::SELECT)
12624 std::swap(Select, X);
12625
12626 SDValue Cond = Select.getOperand(0);
12627 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
12628 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
12629
12630 if (TrueOpnd && FalseOpnd &&
12631 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
12632 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
12633 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
12634 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12635 switch (CC) {
12636 default: break;
12637 case ISD::SETOLT:
12638 case ISD::SETULT:
12639 case ISD::SETOLE:
12640 case ISD::SETULE:
12641 case ISD::SETLT:
12642 case ISD::SETLE:
12643 std::swap(TrueOpnd, FalseOpnd);
12644 LLVM_FALLTHROUGH[[gnu::fallthrough]];
12645 case ISD::SETOGT:
12646 case ISD::SETUGT:
12647 case ISD::SETOGE:
12648 case ISD::SETUGE:
12649 case ISD::SETGT:
12650 case ISD::SETGE:
12651 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
12652 TLI.isOperationLegal(ISD::FNEG, VT))
12653 return DAG.getNode(ISD::FNEG, DL, VT,
12654 DAG.getNode(ISD::FABS, DL, VT, X));
12655 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
12656 return DAG.getNode(ISD::FABS, DL, VT, X);
12657
12658 break;
12659 }
12660 }
12661 }
12662
12663 // FMUL -> FMA combines:
12664 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
12665 AddToWorklist(Fused.getNode());
12666 return Fused;
12667 }
12668
12669 return SDValue();
12670}
12671
12672SDValue DAGCombiner::visitFMA(SDNode *N) {
12673 SDValue N0 = N->getOperand(0);
12674 SDValue N1 = N->getOperand(1);
12675 SDValue N2 = N->getOperand(2);
12676 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12677 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12678 EVT VT = N->getValueType(0);
12679 SDLoc DL(N);
12680 const TargetOptions &Options = DAG.getTarget().Options;
12681
12682 // FMA nodes have flags that propagate to the created nodes.
12683 const SDNodeFlags Flags = N->getFlags();
12684 bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
12685
12686 // Constant fold FMA.
12687 if (isa<ConstantFPSDNode>(N0) &&
12688 isa<ConstantFPSDNode>(N1) &&
12689 isa<ConstantFPSDNode>(N2)) {
12690 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
12691 }
12692
12693 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
12694 if (isCheaperToUseNegatedFPOps(N0, N1)) {
12695 SDValue NegN0 =
12696 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
12697 SDValue NegN1 =
12698 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12699 return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
12700 }
12701
12702 if (UnsafeFPMath) {
12703 if (N0CFP && N0CFP->isZero())
12704 return N2;
12705 if (N1CFP && N1CFP->isZero())
12706 return N2;
12707 }
12708 // TODO: The FMA node should have flags that propagate to these nodes.
12709 if (N0CFP && N0CFP->isExactlyValue(1.0))
12710 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
12711 if (N1CFP && N1CFP->isExactlyValue(1.0))
12712 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
12713
12714 // Canonicalize (fma c, x, y) -> (fma x, c, y)
12715 if (isConstantFPBuildVectorOrConstantFP(N0) &&
12716 !isConstantFPBuildVectorOrConstantFP(N1))
12717 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
12718
12719 if (UnsafeFPMath) {
12720 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
12721 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
12722 isConstantFPBuildVectorOrConstantFP(N1) &&
12723 isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
12724 return DAG.getNode(ISD::FMUL, DL, VT, N0,
12725 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
12726 Flags), Flags);
12727 }
12728
12729 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
12730 if (N0.getOpcode() == ISD::FMUL &&
12731 isConstantFPBuildVectorOrConstantFP(N1) &&
12732 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
12733 return DAG.getNode(ISD::FMA, DL, VT,
12734 N0.getOperand(0),
12735 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
12736 Flags),
12737 N2);
12738 }
12739 }
12740
12741 // (fma x, 1, y) -> (fadd x, y)
12742 // (fma x, -1, y) -> (fadd (fneg x), y)
12743 if (N1CFP) {
12744 if (N1CFP->isExactlyValue(1.0))
12745 // TODO: The FMA node should have flags that propagate to this node.
12746 return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
12747
12748 if (N1CFP->isExactlyValue(-1.0) &&
12749 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
12750 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
12751 AddToWorklist(RHSNeg.getNode());
12752 // TODO: The FMA node should have flags that propagate to this node.
12753 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
12754 }
12755
12756 // fma (fneg x), K, y -> fma x -K, y
12757 if (N0.getOpcode() == ISD::FNEG &&
12758 (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
12759 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
12760 ForCodeSize)))) {
12761 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
12762 DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
12763 }
12764 }
12765
12766 if (UnsafeFPMath) {
12767 // (fma x, c, x) -> (fmul x, (c+1))
12768 if (N1CFP && N0 == N2) {
12769 return DAG.getNode(ISD::FMUL, DL, VT, N0,
12770 DAG.getNode(ISD::FADD, DL, VT, N1,
12771 DAG.getConstantFP(1.0, DL, VT), Flags),
12772 Flags);
12773 }
12774
12775 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
12776 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
12777 return DAG.getNode(ISD::FMUL, DL, VT, N0,
12778 DAG.getNode(ISD::FADD, DL, VT, N1,
12779 DAG.getConstantFP(-1.0, DL, VT), Flags),
12780 Flags);
12781 }
12782 }
12783
12784 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
12785 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
12786 if (!TLI.isFNegFree(VT) &&
12787 TLI.getNegatibleCost(SDValue(N, 0), DAG, LegalOperations, ForCodeSize) ==
12788 TargetLowering::NegatibleCost::Cheaper)
12789 return DAG.getNode(ISD::FNEG, DL, VT,
12790 TLI.getNegatedExpression(SDValue(N, 0), DAG,
12791 LegalOperations, ForCodeSize),
12792 Flags);
12793 return SDValue();
12794}
12795
12796// Combine multiple FDIVs with the same divisor into multiple FMULs by the
12797// reciprocal.
12798// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
12799// Notice that this is not always beneficial. One reason is different targets
12800// may have different costs for FDIV and FMUL, so sometimes the cost of two
12801// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
12802// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
12803SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
12804 // TODO: Limit this transform based on optsize/minsize - it always creates at
12805 // least 1 extra instruction. But the perf win may be substantial enough
12806 // that only minsize should restrict this.
12807 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
12808 const SDNodeFlags Flags = N->getFlags();
12809 if (!UnsafeMath && !Flags.hasAllowReciprocal())
12810 return SDValue();
12811
12812 // Skip if current node is a reciprocal/fneg-reciprocal.
12813 SDValue N0 = N->getOperand(0);
12814 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
12815 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
12816 return SDValue();
12817
12818 // Exit early if the target does not want this transform or if there can't
12819 // possibly be enough uses of the divisor to make the transform worthwhile.
12820 SDValue N1 = N->getOperand(1);
12821 unsigned MinUses = TLI.combineRepeatedFPDivisors();
12822
12823 // For splat vectors, scale the number of uses by the splat factor. If we can
12824 // convert the division into a scalar op, that will likely be much faster.
12825 unsigned NumElts = 1;
12826 EVT VT = N->getValueType(0);
12827 if (VT.isVector() && DAG.isSplatValue(N1))
12828 NumElts = VT.getVectorNumElements();
12829
12830 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
12831 return SDValue();
12832
12833 // Find all FDIV users of the same divisor.
12834 // Use a set because duplicates may be present in the user list.
12835 SetVector<SDNode *> Users;
12836 for (auto *U : N1->uses()) {
12837 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
12838 // This division is eligible for optimization only if global unsafe math
12839 // is enabled or if this division allows reciprocal formation.
12840 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
12841 Users.insert(U);
12842 }
12843 }
12844
12845 // Now that we have the actual number of divisor uses, make sure it meets
12846 // the minimum threshold specified by the target.
12847 if ((Users.size() * NumElts) < MinUses)
12848 return SDValue();
12849
12850 SDLoc DL(N);
12851 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
12852 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
12853
12854 // Dividend / Divisor -> Dividend * Reciprocal
12855 for (auto *U : Users) {
12856 SDValue Dividend = U->getOperand(0);
12857 if (Dividend != FPOne) {
12858 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
12859 Reciprocal, Flags);
12860 CombineTo(U, NewNode);
12861 } else if (U != Reciprocal.getNode()) {
12862 // In the absence of fast-math-flags, this user node is always the
12863 // same node as Reciprocal, but with FMF they may be different nodes.
12864 CombineTo(U, Reciprocal);
12865 }
12866 }
12867 return SDValue(N, 0); // N was replaced.
12868}
12869
12870SDValue DAGCombiner::visitFDIV(SDNode *N) {
12871 SDValue N0 = N->getOperand(0);
12872 SDValue N1 = N->getOperand(1);
12873 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12874 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12875 EVT VT = N->getValueType(0);
12876 SDLoc DL(N);
12877 const TargetOptions &Options = DAG.getTarget().Options;
12878 SDNodeFlags Flags = N->getFlags();
12879
12880 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12881 return R;
12882
12883 // fold vector ops
12884 if (VT.isVector())
12885 if (SDValue FoldedVOp = SimplifyVBinOp(N))
12886 return FoldedVOp;
12887
12888 // fold (fdiv c1, c2) -> c1/c2
12889 if (N0CFP && N1CFP)
12890 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
12891
12892 if (SDValue NewSel = foldBinOpIntoSelect(N))
12893 return NewSel;
12894
12895 if (SDValue V = combineRepeatedFPDivisors(N))
12896 return V;
12897
12898 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
12899 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
12900 if (N1CFP) {
12901 // Compute the reciprocal 1.0 / c2.
12902 const APFloat &N1APF = N1CFP->getValueAPF();
12903 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
12904 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
12905 // Only do the transform if the reciprocal is a legal fp immediate that
12906 // isn't too nasty (eg NaN, denormal, ...).
12907 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
12908 (!LegalOperations ||
12909 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
12910 // backend)... we should handle this gracefully after Legalize.
12911 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
12912 TLI.isOperationLegal(ISD::ConstantFP, VT) ||
12913 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
12914 return DAG.getNode(ISD::FMUL, DL, VT, N0,
12915 DAG.getConstantFP(Recip, DL, VT), Flags);
12916 }
12917
12918 // If this FDIV is part of a reciprocal square root, it may be folded
12919 // into a target-specific square root estimate instruction.
12920 if (N1.getOpcode() == ISD::FSQRT) {
12921 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
12922 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12923 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
12924 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12925 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12926 Flags)) {
12927 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
12928 AddToWorklist(RV.getNode());
12929 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12930 }
12931 } else if (N1.getOpcode() == ISD::FP_ROUND &&
12932 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12933 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12934 Flags)) {
12935 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
12936 AddToWorklist(RV.getNode());
12937 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12938 }
12939 } else if (N1.getOpcode() == ISD::FMUL) {
12940 // Look through an FMUL. Even though this won't remove the FDIV directly,
12941 // it's still worthwhile to get rid of the FSQRT if possible.
12942 SDValue SqrtOp;
12943 SDValue OtherOp;
12944 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12945 SqrtOp = N1.getOperand(0);
12946 OtherOp = N1.getOperand(1);
12947 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
12948 SqrtOp = N1.getOperand(1);
12949 OtherOp = N1.getOperand(0);
12950 }
12951 if (SqrtOp.getNode()) {
12952 // We found a FSQRT, so try to make this fold:
12953 // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
12954 if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
12955 RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
12956 AddToWorklist(RV.getNode());
12957 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12958 }
12959 }
12960 }
12961
12962 // Fold into a reciprocal estimate and multiply instead of a real divide.
12963 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
12964 return RV;
12965 }
12966
12967 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
12968 if (isCheaperToUseNegatedFPOps(N0, N1))
12969 return DAG.getNode(
12970 ISD::FDIV, SDLoc(N), VT,
12971 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize),
12972 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
12973
12974 return SDValue();
12975}
12976
12977SDValue DAGCombiner::visitFREM(SDNode *N) {
12978 SDValue N0 = N->getOperand(0);
12979 SDValue N1 = N->getOperand(1);
12980 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12981 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12982 EVT VT = N->getValueType(0);
12983 SDNodeFlags Flags = N->getFlags();
12984
12985 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12986 return R;
12987
12988 // fold (frem c1, c2) -> fmod(c1,c2)
12989 if (N0CFP && N1CFP)
12990 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
12991
12992 if (SDValue NewSel = foldBinOpIntoSelect(N))
12993 return NewSel;
12994
12995 return SDValue();
12996}
12997
12998SDValue DAGCombiner::visitFSQRT(SDNode *N) {
12999 SDNodeFlags Flags = N->getFlags();
13000 if (!DAG.getTarget().Options.UnsafeFPMath &&
13001 !Flags.hasApproximateFuncs())
13002 return SDValue();
13003
13004 SDValue N0 = N->getOperand(0);
13005 if (TLI.isFsqrtCheap(N0, DAG))
13006 return SDValue();
13007
13008 // FSQRT nodes have flags that propagate to the created nodes.
13009 return buildSqrtEstimate(N0, Flags);
13010}
13011
13012/// copysign(x, fp_extend(y)) -> copysign(x, y)
13013/// copysign(x, fp_round(y)) -> copysign(x, y)
13014static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
13015 SDValue N1 = N->getOperand(1);
13016 if ((N1.getOpcode() == ISD::FP_EXTEND ||
13017 N1.getOpcode() == ISD::FP_ROUND)) {
13018 // Do not optimize out type conversion of f128 type yet.
13019 // For some targets like x86_64, configuration is changed to keep one f128
13020 // value in one SSE register, but instruction selection cannot handle
13021 // FCOPYSIGN on SSE registers yet.
13022 EVT N1VT = N1->getValueType(0);
13023 EVT N1Op0VT = N1->getOperand(0).getValueType();
13024 return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
13025 }
13026 return false;
13027}
13028
13029SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
13030 SDValue N0 = N->getOperand(0);
13031 SDValue N1 = N->getOperand(1);
13032 bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
13033 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
13034 EVT VT = N->getValueType(0);
13035
13036 if (N0CFP && N1CFP) // Constant fold
13037 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
13038
13039 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
13040 const APFloat &V = N1C->getValueAPF();
13041 // copysign(x, c1) -> fabs(x) iff ispos(c1)
13042 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
13043 if (!V.isNegative()) {
13044 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
13045 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13046 } else {
13047 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13048 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
13049 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
13050 }
13051 }
13052
13053 // copysign(fabs(x), y) -> copysign(x, y)
13054 // copysign(fneg(x), y) -> copysign(x, y)
13055 // copysign(copysign(x,z), y) -> copysign(x, y)
13056 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
13057 N0.getOpcode() == ISD::FCOPYSIGN)
13058 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
13059
13060 // copysign(x, abs(y)) -> abs(x)
13061 if (N1.getOpcode() == ISD::FABS)
13062 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13063
13064 // copysign(x, copysign(y,z)) -> copysign(x, z)
13065 if (N1.getOpcode() == ISD::FCOPYSIGN)
13066 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
13067
13068 // copysign(x, fp_extend(y)) -> copysign(x, y)
13069 // copysign(x, fp_round(y)) -> copysign(x, y)
13070 if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
13071 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
13072
13073 return SDValue();
13074}
13075
13076SDValue DAGCombiner::visitFPOW(SDNode *N) {
13077 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
13078 if (!ExponentC)
13079 return SDValue();
13080
13081 // Try to convert x ** (1/3) into cube root.
13082 // TODO: Handle the various flavors of long double.
13083 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
13084 // Some range near 1/3 should be fine.
13085 EVT VT = N->getValueType(0);
13086 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
13087 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
13088 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
13089 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
13090 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
13091 // For regular numbers, rounding may cause the results to differ.
13092 // Therefore, we require { nsz ninf nnan afn } for this transform.
13093 // TODO: We could select out the special cases if we don't have nsz/ninf.
13094 SDNodeFlags Flags = N->getFlags();
13095 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
13096 !Flags.hasApproximateFuncs())
13097 return SDValue();
13098
13099 // Do not create a cbrt() libcall if the target does not have it, and do not
13100 // turn a pow that has lowering support into a cbrt() libcall.
13101 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
13102 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
13103 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
13104 return SDValue();
13105
13106 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
13107 }
13108
13109 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
13110 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
13111 // TODO: This could be extended (using a target hook) to handle smaller
13112 // power-of-2 fractional exponents.
13113 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
13114 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
13115 if (ExponentIs025 || ExponentIs075) {
13116 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
13117 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
13118 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
13119 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
13120 // For regular numbers, rounding may cause the results to differ.
13121 // Therefore, we require { nsz ninf afn } for this transform.
13122 // TODO: We could select out the special cases if we don't have nsz/ninf.
13123 SDNodeFlags Flags = N->getFlags();
13124
13125 // We only need no signed zeros for the 0.25 case.
13126 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
13127 !Flags.hasApproximateFuncs())
13128 return SDValue();
13129
13130 // Don't double the number of libcalls. We are trying to inline fast code.
13131 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
13132 return SDValue();
13133
13134 // Assume that libcalls are the smallest code.
13135 // TODO: This restriction should probably be lifted for vectors.
13136 if (ForCodeSize)
13137 return SDValue();
13138
13139 // pow(X, 0.25) --> sqrt(sqrt(X))
13140 SDLoc DL(N);
13141 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
13142 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
13143 if (ExponentIs025)
13144 return SqrtSqrt;
13145 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
13146 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
13147 }
13148
13149 return SDValue();
13150}
13151
13152static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
13153 const TargetLowering &TLI) {
13154 // This optimization is guarded by a function attribute because it may produce
13155 // unexpected results. Ie, programs may be relying on the platform-specific
13156 // undefined behavior when the float-to-int conversion overflows.
13157 const Function &F = DAG.getMachineFunction().getFunction();
13158 Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
13159 if (StrictOverflow.getValueAsString().equals("false"))
13160 return SDValue();
13161
13162 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
13163 // replacing casts with a libcall. We also must be allowed to ignore -0.0
13164 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
13165 // conversions would return +0.0.
13166 // FIXME: We should be able to use node-level FMF here.
13167 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
13168 EVT VT = N->getValueType(0);
13169 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
13170 !DAG.getTarget().Options.NoSignedZerosFPMath)
13171 return SDValue();
13172
13173 // fptosi/fptoui round towards zero, so converting from FP to integer and
13174 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
13175 SDValue N0 = N->getOperand(0);
13176 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
13177 N0.getOperand(0).getValueType() == VT)
13178 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
13179
13180 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
13181 N0.getOperand(0).getValueType() == VT)
13182 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
13183
13184 return SDValue();
13185}
13186
13187SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
13188 SDValue N0 = N->getOperand(0);
13189 EVT VT = N->getValueType(0);
13190 EVT OpVT = N0.getValueType();
13191
13192 // [us]itofp(undef) = 0, because the result value is bounded.
13193 if (N0.isUndef())
13194 return DAG.getConstantFP(0.0, SDLoc(N), VT);
13195
13196 // fold (sint_to_fp c1) -> c1fp
13197 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
13198 // ...but only if the target supports immediate floating-point values
13199 (!LegalOperations ||
13200 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
13201 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
13202
13203 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
13204 // but UINT_TO_FP is legal on this target, try to convert.
13205 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
13206 hasOperation(ISD::UINT_TO_FP, OpVT)) {
13207 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
13208 if (DAG.SignBitIsZero(N0))
13209 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
13210 }
13211
13212 // The next optimizations are desirable only if SELECT_CC can be lowered.
13213 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
13214 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
13215 !VT.isVector() &&
13216 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13217 SDLoc DL(N);
13218 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
13219 DAG.getConstantFP(0.0, DL, VT));
13220 }
13221
13222 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
13223 // (select (setcc x, y, cc), 1.0, 0.0)
13224 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
13225 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
13226 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13227 SDLoc DL(N);
13228 return DAG.getSelect(DL, VT, N0.getOperand(0),
13229 DAG.getConstantFP(1.0, DL, VT),
13230 DAG.getConstantFP(0.0, DL, VT));
13231 }
13232
13233 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
13234 return FTrunc;
13235
13236 return SDValue();
13237}
13238
13239SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
13240 SDValue N0 = N->getOperand(0);
13241 EVT VT = N->getValueType(0);
13242 EVT OpVT = N0.getValueType();
13243
13244 // [us]itofp(undef) = 0, because the result value is bounded.
13245 if (N0.isUndef())
13246 return DAG.getConstantFP(0.0, SDLoc(N), VT);
13247
13248 // fold (uint_to_fp c1) -> c1fp
13249 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
13250 // ...but only if the target supports immediate floating-point values
13251 (!LegalOperations ||
13252 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
13253 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
13254
13255 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
13256 // but SINT_TO_FP is legal on this target, try to convert.
13257 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
13258 hasOperation(ISD::SINT_TO_FP, OpVT)) {
13259 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
13260 if (DAG.SignBitIsZero(N0))
13261 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
13262 }
13263
13264 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
13265 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
13266 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13267 SDLoc DL(N);
13268 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
13269 DAG.getConstantFP(0.0, DL, VT));
13270 }
13271
13272 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
13273 return FTrunc;
13274
13275 return SDValue();
13276}
13277
13278// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
13279static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
13280 SDValue N0 = N->getOperand(0);
13281 EVT VT = N->getValueType(0);
13282
13283 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
13284 return SDValue();
13285
13286 SDValue Src = N0.getOperand(0);
13287 EVT SrcVT = Src.getValueType();
13288 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
13289 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
13290
13291 // We can safely assume the conversion won't overflow the output range,
13292 // because (for example) (uint8_t)18293.f is undefined behavior.
13293
13294 // Since we can assume the conversion won't overflow, our decision as to
13295 // whether the input will fit in the float should depend on the minimum
13296 // of the input range and output range.
13297
13298 // This means this is also safe for a signed input and unsigned output, since
13299 // a negative input would lead to undefined behavior.
13300 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
13301 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
13302 unsigned ActualSize = std::min(InputSize, OutputSize);
13303 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
13304
13305 // We can only fold away the float conversion if the input range can be
13306 // represented exactly in the float range.
13307 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
13308 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
13309 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
13310 : ISD::ZERO_EXTEND;
13311 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
13312 }
13313 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
13314 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
13315 return DAG.getBitcast(VT, Src);
13316 }
13317 return SDValue();
13318}
13319
13320SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
13321 SDValue N0 = N->getOperand(0);
13322 EVT VT = N->getValueType(0);
13323
13324 // fold (fp_to_sint undef) -> undef
13325 if (N0.isUndef())
13326 return DAG.getUNDEF(VT);
13327
13328 // fold (fp_to_sint c1fp) -> c1
13329 if (isConstantFPBuildVectorOrConstantFP(N0))
13330 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
13331
13332 return FoldIntToFPToInt(N, DAG);
13333}
13334
13335SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
13336 SDValue N0 = N->getOperand(0);
13337 EVT VT = N->getValueType(0);
13338
13339 // fold (fp_to_uint undef) -> undef
13340 if (N0.isUndef())
13341 return DAG.getUNDEF(VT);
13342
13343 // fold (fp_to_uint c1fp) -> c1
13344 if (isConstantFPBuildVectorOrConstantFP(N0))
13345 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
13346
13347 return FoldIntToFPToInt(N, DAG);
13348}
13349
13350SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
13351 SDValue N0 = N->getOperand(0);
13352 SDValue N1 = N->getOperand(1);
13353 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13354 EVT VT = N->getValueType(0);
13355
13356 // fold (fp_round c1fp) -> c1fp
13357 if (N0CFP)
13358 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
13359
13360 // fold (fp_round (fp_extend x)) -> x
13361 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
13362 return N0.getOperand(0);
13363
13364 // fold (fp_round (fp_round x)) -> (fp_round x)
13365 if (N0.getOpcode() == ISD::FP_ROUND) {
13366 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
13367 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
13368
13369 // Skip this folding if it results in an fp_round from f80 to f16.
13370 //
13371 // f80 to f16 always generates an expensive (and as yet, unimplemented)
13372 // libcall to __truncxfhf2 instead of selecting native f16 conversion
13373 // instructions from f32 or f64. Moreover, the first (value-preserving)
13374 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
13375 // x86.
13376 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
13377 return SDValue();
13378
13379 // If the first fp_round isn't a value preserving truncation, it might
13380 // introduce a tie in the second fp_round, that wouldn't occur in the
13381 // single-step fp_round we want to fold to.
13382 // In other words, double rounding isn't the same as rounding.
13383 // Also, this is a value preserving truncation iff both fp_round's are.
13384 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
13385 SDLoc DL(N);
13386 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
13387 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
13388 }
13389 }
13390
13391 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
13392 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
13393 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
13394 N0.getOperand(0), N1);
13395 AddToWorklist(Tmp.getNode());
13396 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
13397 Tmp, N0.getOperand(1));
13398 }
13399
13400 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13401 return NewVSel;
13402
13403 return SDValue();
13404}
13405
13406SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
13407 SDValue N0 = N->getOperand(0);
13408 EVT VT = N->getValueType(0);
13409
13410 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
13411 if (N->hasOneUse() &&
13412 N->use_begin()->getOpcode() == ISD::FP_ROUND)
13413 return SDValue();
13414
13415 // fold (fp_extend c1fp) -> c1fp
13416 if (isConstantFPBuildVectorOrConstantFP(N0))
13417 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
13418
13419 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
13420 if (N0.getOpcode() == ISD::FP16_TO_FP &&
13421 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
13422 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
13423
13424 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
13425 // value of X.
13426 if (N0.getOpcode() == ISD::FP_ROUND
13427 && N0.getConstantOperandVal(1) == 1) {
13428 SDValue In = N0.getOperand(0);
13429 if (In.getValueType() == VT) return In;
13430 if (VT.bitsLT(In.getValueType()))
13431 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
13432 In, N0.getOperand(1));
13433 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
13434 }
13435
13436 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
13437 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13438 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
13439 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13440 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
13441 LN0->getChain(),
13442 LN0->getBasePtr(), N0.getValueType(),
13443 LN0->getMemOperand());
13444 CombineTo(N, ExtLoad);
13445 CombineTo(N0.getNode(),
13446 DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
13447 N0.getValueType(), ExtLoad,
13448 DAG.getIntPtrConstant(1, SDLoc(N0))),
13449 ExtLoad.getValue(1));
13450 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13451 }
13452
13453 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13454 return NewVSel;
13455
13456 return SDValue();
13457}
13458
13459SDValue DAGCombiner::visitFCEIL(SDNode *N) {
13460 SDValue N0 = N->getOperand(0);
13461 EVT VT = N->getValueType(0);
13462
13463 // fold (fceil c1) -> fceil(c1)
13464 if (isConstantFPBuildVectorOrConstantFP(N0))
13465 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
13466
13467 return SDValue();
13468}
13469
13470SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
13471 SDValue N0 = N->getOperand(0);
13472 EVT VT = N->getValueType(0);
13473
13474 // fold (ftrunc c1) -> ftrunc(c1)
13475 if (isConstantFPBuildVectorOrConstantFP(N0))
13476 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
13477
13478 // fold ftrunc (known rounded int x) -> x
13479 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
13480 // likely to be generated to extract integer from a rounded floating value.
13481 switch (N0.getOpcode()) {
13482 default: break;
13483 case ISD::FRINT:
13484 case ISD::FTRUNC:
13485 case ISD::FNEARBYINT:
13486 case ISD::FFLOOR:
13487 case ISD::FCEIL:
13488 return N0;
13489 }
13490
13491 return SDValue();
13492}
13493
13494SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
13495 SDValue N0 = N->getOperand(0);
13496 EVT VT = N->getValueType(0);
13497
13498 // fold (ffloor c1) -> ffloor(c1)
13499 if (isConstantFPBuildVectorOrConstantFP(N0))
13500 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
13501
13502 return SDValue();
13503}
13504
13505// FIXME: FNEG and FABS have a lot in common; refactor.
13506SDValue DAGCombiner::visitFNEG(SDNode *N) {
13507 SDValue N0 = N->getOperand(0);
13508 EVT VT = N->getValueType(0);
13509
13510 // Constant fold FNEG.
13511 if (isConstantFPBuildVectorOrConstantFP(N0))
13512 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
13513
13514 if (TLI.getNegatibleCost(N0, DAG, LegalOperations, ForCodeSize) !=
13515 TargetLowering::NegatibleCost::Expensive)
13516 return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
13517
13518 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
13519 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
13520 // know it was called from a context with a nsz flag if the input fsub does
13521 // not.
13522 if (N0.getOpcode() == ISD::FSUB &&
13523 (DAG.getTarget().Options.NoSignedZerosFPMath ||
13524 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
13525 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
13526 N0.getOperand(0), N->getFlags());
13527 }
13528
13529 // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
13530 // constant pool values.
13531 if (!TLI.isFNegFree(VT) &&
13532 N0.getOpcode() == ISD::BITCAST &&
13533 N0.getNode()->hasOneUse()) {
13534 SDValue Int = N0.getOperand(0);
13535 EVT IntVT = Int.getValueType();
13536 if (IntVT.isInteger() && !IntVT.isVector()) {
13537 APInt SignMask;
13538 if (N0.getValueType().isVector()) {
13539 // For a vector, get a mask such as 0x80... per scalar element
13540 // and splat it.
13541 SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
13542 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13543 } else {
13544 // For a scalar, just generate 0x80...
13545 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
13546 }
13547 SDLoc DL0(N0);
13548 Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
13549 DAG.getConstant(SignMask, DL0, IntVT));
13550 AddToWorklist(Int.getNode());
13551 return DAG.getBitcast(VT, Int);
13552 }
13553 }
13554
13555 // (fneg (fmul c, x)) -> (fmul -c, x)
13556 if (N0.getOpcode() == ISD::FMUL &&
13557 (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
13558 ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
13559 if (CFP1) {
13560 APFloat CVal = CFP1->getValueAPF();
13561 CVal.changeSign();
13562 if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
13563 TLI.isOperationLegal(ISD::ConstantFP, VT)))
13564 return DAG.getNode(
13565 ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
13566 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
13567 N0->getFlags());
13568 }
13569 }
13570
13571 return SDValue();
13572}
13573
13574static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
13575 APFloat (*Op)(const APFloat &, const APFloat &)) {
13576 SDValue N0 = N->getOperand(0);
13577 SDValue N1 = N->getOperand(1);
13578 EVT VT = N->getValueType(0);
13579 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
13580 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
13581
13582 if (N0CFP && N1CFP) {
13583 const APFloat &C0 = N0CFP->getValueAPF();
13584 const APFloat &C1 = N1CFP->getValueAPF();
13585 return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
13586 }
13587
13588 // Canonicalize to constant on RHS.
13589 if (isConstantFPBuildVectorOrConstantFP(N0) &&
13590 !isConstantFPBuildVectorOrConstantFP(N1))
13591 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
13592
13593 return SDValue();
13594}
13595
13596SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
13597 return visitFMinMax(DAG, N, minnum);
13598}
13599
13600SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
13601 return visitFMinMax(DAG, N, maxnum);
13602}
13603
13604SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
13605 return visitFMinMax(DAG, N, minimum);
13606}
13607
13608SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
13609 return visitFMinMax(DAG, N, maximum);
13610}
13611
13612SDValue DAGCombiner::visitFABS(SDNode *N) {
13613 SDValue N0 = N->getOperand(0);
13614 EVT VT = N->getValueType(0);
13615
13616 // fold (fabs c1) -> fabs(c1)
13617 if (isConstantFPBuildVectorOrConstantFP(N0))
13618 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13619
13620 // fold (fabs (fabs x)) -> (fabs x)
13621 if (N0.getOpcode() == ISD::FABS)
13622 return N->getOperand(0);
13623
13624 // fold (fabs (fneg x)) -> (fabs x)
13625 // fold (fabs (fcopysign x, y)) -> (fabs x)
13626 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
13627 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
13628
13629 // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
13630 if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
13631 SDValue Int = N0.getOperand(0);
13632 EVT IntVT = Int.getValueType();
13633 if (IntVT.isInteger() && !IntVT.isVector()) {
13634 APInt SignMask;
13635 if (N0.getValueType().isVector()) {
13636 // For a vector, get a mask such as 0x7f... per scalar element
13637 // and splat it.
13638 SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
13639 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13640 } else {
13641 // For a scalar, just generate 0x7f...
13642 SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
13643 }
13644 SDLoc DL(N0);
13645 Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
13646 DAG.getConstant(SignMask, DL, IntVT));
13647 AddToWorklist(Int.getNode());
13648 return DAG.getBitcast(N->getValueType(0), Int);
13649 }
13650 }
13651
13652 return SDValue();
13653}
13654
13655SDValue DAGCombiner::visitBRCOND(SDNode *N) {
13656 SDValue Chain = N->getOperand(0);
13657 SDValue N1 = N->getOperand(1);
13658 SDValue N2 = N->getOperand(2);
13659
13660 // If N is a constant we could fold this into a fallthrough or unconditional
13661 // branch. However that doesn't happen very often in normal code, because
13662 // Instcombine/SimplifyCFG should have handled the available opportunities.
13663 // If we did this folding here, it would be necessary to update the
13664 // MachineBasicBlock CFG, which is awkward.
13665
13666 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
13667 // on the target.
13668 if (N1.getOpcode() == ISD::SETCC &&
13669 TLI.isOperationLegalOrCustom(ISD::BR_CC,
13670 N1.getOperand(0).getValueType())) {
13671 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13672 Chain, N1.getOperand(2),
13673 N1.getOperand(0), N1.getOperand(1), N2);
13674 }
13675
13676 if (N1.hasOneUse()) {
13677 // rebuildSetCC calls visitXor which may change the Chain when there is a
13678 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
13679 HandleSDNode ChainHandle(Chain);
13680 if (SDValue NewN1 = rebuildSetCC(N1))
13681 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
13682 ChainHandle.getValue(), NewN1, N2);
13683 }
13684
13685 return SDValue();
13686}
13687
13688SDValue DAGCombiner::rebuildSetCC(SDValue N) {
13689 if (N.getOpcode() == ISD::SRL ||
13690 (N.getOpcode() == ISD::TRUNCATE &&
13691 (N.getOperand(0).hasOneUse() &&
13692 N.getOperand(0).getOpcode() == ISD::SRL))) {
13693 // Look pass the truncate.
13694 if (N.getOpcode() == ISD::TRUNCATE)
13695 N = N.getOperand(0);
13696
13697 // Match this pattern so that we can generate simpler code:
13698 //
13699 // %a = ...
13700 // %b = and i32 %a, 2
13701 // %c = srl i32 %b, 1
13702 // brcond i32 %c ...
13703 //
13704 // into
13705 //
13706 // %a = ...
13707 // %b = and i32 %a, 2
13708 // %c = setcc eq %b, 0
13709 // brcond %c ...
13710 //
13711 // This applies only when the AND constant value has one bit set and the
13712 // SRL constant is equal to the log2 of the AND constant. The back-end is
13713 // smart enough to convert the result into a TEST/JMP sequence.
13714 SDValue Op0 = N.getOperand(0);
13715 SDValue Op1 = N.getOperand(1);
13716
13717 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
13718 SDValue AndOp1 = Op0.getOperand(1);
13719
13720 if (AndOp1.getOpcode() == ISD::Constant) {
13721 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
13722
13723 if (AndConst.isPowerOf2() &&
13724 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
13725 SDLoc DL(N);
13726 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
13727 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
13728 ISD::SETNE);
13729 }
13730 }
13731 }
13732 }
13733
13734 // Transform br(xor(x, y)) -> br(x != y)
13735 // Transform br(xor(xor(x,y), 1)) -> br (x == y)
13736 if (N.getOpcode() == ISD::XOR) {
13737 // Because we may call this on a speculatively constructed
13738 // SimplifiedSetCC Node, we need to simplify this node first.
13739 // Ideally this should be folded into SimplifySetCC and not
13740 // here. For now, grab a handle to N so we don't lose it from
13741 // replacements interal to the visit.
13742 HandleSDNode XORHandle(N);
13743 while (N.getOpcode() == ISD::XOR) {
13744 SDValue Tmp = visitXOR(N.getNode());
13745 // No simplification done.
13746 if (!Tmp.getNode())
13747 break;
13748 // Returning N is form in-visit replacement that may invalidated
13749 // N. Grab value from Handle.
13750 if (Tmp.getNode() == N.getNode())
13751 N = XORHandle.getValue();
13752 else // Node simplified. Try simplifying again.
13753 N = Tmp;
13754 }
13755
13756 if (N.getOpcode() != ISD::XOR)
13757 return N;
13758
13759 SDNode *TheXor = N.getNode();
13760
13761 SDValue Op0 = TheXor->getOperand(0);
13762 SDValue Op1 = TheXor->getOperand(1);
13763
13764 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
13765 bool Equal = false;
13766 if (isOneConstant(Op0) && Op0.hasOneUse() &&
13767 Op0.getOpcode() == ISD::XOR) {
13768 TheXor = Op0.getNode();
13769 Equal = true;
13770 }
13771
13772 EVT SetCCVT = N.getValueType();
13773 if (LegalTypes)
13774 SetCCVT = getSetCCResultType(SetCCVT);
13775 // Replace the uses of XOR with SETCC
13776 return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
13777 Equal ? ISD::SETEQ : ISD::SETNE);
13778 }
13779 }
13780
13781 return SDValue();
13782}
13783
13784// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
13785//
13786SDValue DAGCombiner::visitBR_CC(SDNode *N) {
13787 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
13788 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
13789
13790 // If N is a constant we could fold this into a fallthrough or unconditional
13791 // branch. However that doesn't happen very often in normal code, because
13792 // Instcombine/SimplifyCFG should have handled the available opportunities.
13793 // If we did this folding here, it would be necessary to update the
13794 // MachineBasicBlock CFG, which is awkward.
13795
13796 // Use SimplifySetCC to simplify SETCC's.
13797 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
13798 CondLHS, CondRHS, CC->get(), SDLoc(N),
13799 false);
13800 if (Simp.getNode()) AddToWorklist(Simp.getNode());
13801
13802 // fold to a simpler setcc
13803 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
13804 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13805 N->getOperand(0), Simp.getOperand(2),
13806 Simp.getOperand(0), Simp.getOperand(1),
13807 N->getOperand(4));
13808
13809 return SDValue();
13810}
13811
13812/// Return true if 'Use' is a load or a store that uses N as its base pointer
13813/// and that N may be folded in the load / store addressing mode.
13814static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
13815 SelectionDAG &DAG,
13816 const TargetLowering &TLI) {
13817 EVT VT;
13818 unsigned AS;
13819
13820 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
13821 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
13822 return false;
13823 VT = LD->getMemoryVT();
13824 AS = LD->getAddressSpace();
13825 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
13826 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
13827 return false;
13828 VT = ST->getMemoryVT();
13829 AS = ST->getAddressSpace();
13830 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
13831 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
13832 return false;
13833 VT = LD->getMemoryVT();
13834 AS = LD->getAddressSpace();
13835 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
13836 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
13837 return false;
13838 VT = ST->getMemoryVT();
13839 AS = ST->getAddressSpace();
13840 } else
13841 return false;
13842
13843 TargetLowering::AddrMode AM;
13844 if (N->getOpcode() == ISD::ADD) {
13845 AM.HasBaseReg = true;
13846 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13847 if (Offset)
13848 // [reg +/- imm]
13849 AM.BaseOffs = Offset->getSExtValue();
13850 else
13851 // [reg +/- reg]
13852 AM.Scale = 1;
13853 } else if (N->getOpcode() == ISD::SUB) {
13854 AM.HasBaseReg = true;
13855 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13856 if (Offset)
13857 // [reg +/- imm]
13858 AM.BaseOffs = -Offset->getSExtValue();
13859 else
13860 // [reg +/- reg]
13861 AM.Scale = 1;
13862 } else
13863 return false;
13864
13865 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
13866 VT.getTypeForEVT(*DAG.getContext()), AS);
13867}
13868
13869static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
13870 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
13871 const TargetLowering &TLI) {
13872 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
13873 if (LD->isIndexed())
13874 return false;
13875 EVT VT = LD->getMemoryVT();
13876 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
13877 return false;
13878 Ptr = LD->getBasePtr();
13879 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
13880 if (ST->isIndexed())
13881 return false;
13882 EVT VT = ST->getMemoryVT();
13883 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
13884 return false;
13885 Ptr = ST->getBasePtr();
13886 IsLoad = false;
13887 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
13888 if (LD->isIndexed())
13889 return false;
13890 EVT VT = LD->getMemoryVT();
13891 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
13892 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
13893 return false;
13894 Ptr = LD->getBasePtr();
13895 IsMasked = true;
13896 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
13897 if (ST->isIndexed())
13898 return false;
13899 EVT VT = ST->getMemoryVT();
13900 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
13901 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
13902 return false;
13903 Ptr = ST->getBasePtr();
13904 IsLoad = false;
13905 IsMasked = true;
13906 } else {
13907 return false;
13908 }
13909 return true;
13910}
13911
13912/// Try turning a load/store into a pre-indexed load/store when the base
13913/// pointer is an add or subtract and it has other uses besides the load/store.
13914/// After the transformation, the new indexed load/store has effectively folded
13915/// the add/subtract in and all of its other uses are redirected to the
13916/// new load/store.
13917bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
13918 if (Level < AfterLegalizeDAG)
13919 return false;
13920
13921 bool IsLoad = true;
13922 bool IsMasked = false;
13923 SDValue Ptr;
13924 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
13925 Ptr, TLI))
13926 return false;
13927
13928 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
13929 // out. There is no reason to make this a preinc/predec.
13930 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
13931 Ptr.getNode()->hasOneUse())
13932 return false;
13933
13934 // Ask the target to do addressing mode selection.
13935 SDValue BasePtr;
13936 SDValue Offset;
13937 ISD::MemIndexedMode AM = ISD::UNINDEXED;
13938 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
13939 return false;
13940
13941 // Backends without true r+i pre-indexed forms may need to pass a
13942 // constant base with a variable offset so that constant coercion
13943 // will work with the patterns in canonical form.
13944 bool Swapped = false;
13945 if (isa<ConstantSDNode>(BasePtr)) {
13946 std::swap(BasePtr, Offset);
13947 Swapped = true;
13948 }
13949
13950 // Don't create a indexed load / store with zero offset.
13951 if (isNullConstant(Offset))
13952 return false;
13953
13954 // Try turning it into a pre-indexed load / store except when:
13955 // 1) The new base ptr is a frame index.
13956 // 2) If N is a store and the new base ptr is either the same as or is a
13957 // predecessor of the value being stored.
13958 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
13959 // that would create a cycle.
13960 // 4) All uses are load / store ops that use it as old base ptr.
13961
13962 // Check #1. Preinc'ing a frame index would require copying the stack pointer
13963 // (plus the implicit offset) to a register to preinc anyway.
13964 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13965 return false;
13966
13967 // Check #2.
13968 if (!IsLoad) {
13969 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
13970 : cast<StoreSDNode>(N)->getValue();
13971
13972 // Would require a copy.
13973 if (Val == BasePtr)
13974 return false;
13975
13976 // Would create a cycle.
13977 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
13978 return false;
13979 }
13980
13981 // Caches for hasPredecessorHelper.
13982 SmallPtrSet<const SDNode *, 32> Visited;
13983 SmallVector<const SDNode *, 16> Worklist;
13984 Worklist.push_back(N);
13985
13986 // If the offset is a constant, there may be other adds of constants that
13987 // can be folded with this one. We should do this to avoid having to keep
13988 // a copy of the original base pointer.
13989 SmallVector<SDNode *, 16> OtherUses;
13990 if (isa<ConstantSDNode>(Offset))
13991 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
13992 UE = BasePtr.getNode()->use_end();
13993 UI != UE; ++UI) {
13994 SDUse &Use = UI.getUse();
13995 // Skip the use that is Ptr and uses of other results from BasePtr's
13996 // node (important for nodes that return multiple results).
13997 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
13998 continue;
13999
14000 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
14001 continue;
14002
14003 if (Use.getUser()->getOpcode() != ISD::ADD &&
14004 Use.getUser()->getOpcode() != ISD::SUB) {
14005 OtherUses.clear();
14006 break;
14007 }
14008
14009 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
14010 if (!isa<ConstantSDNode>(Op1)) {
14011 OtherUses.clear();
14012 break;
14013 }
14014
14015 // FIXME: In some cases, we can be smarter about this.
14016 if (Op1.getValueType() != Offset.getValueType()) {
14017 OtherUses.clear();
14018 break;
14019 }
14020
14021 OtherUses.push_back(Use.getUser());
14022 }
14023
14024 if (Swapped)
14025 std::swap(BasePtr, Offset);
14026
14027 // Now check for #3 and #4.
14028 bool RealUse = false;
14029
14030 for (SDNode *Use : Ptr.getNode()->uses()) {
14031 if (Use == N)
14032 continue;
14033 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
14034 return false;
14035
14036 // If Ptr may be folded in addressing mode of other use, then it's
14037 // not profitable to do this transformation.
14038 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
14039 RealUse = true;
14040 }
14041
14042 if (!RealUse)
14043 return false;
14044
14045 SDValue Result;
14046 if (!IsMasked) {
14047 if (IsLoad)
14048 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
14049 else
14050 Result =
14051 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
14052 } else {
14053 if (IsLoad)
14054 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
14055 Offset, AM);
14056 else
14057 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
14058 Offset, AM);
14059 }
14060 ++PreIndexedNodes;
14061 ++NodesCombined;
14062 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
14063 Result.getNode()->dump(&DAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.4 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
;
14064 WorklistRemover DeadNodes(*this);
14065 if (IsLoad) {
14066 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
14067 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
14068 } else {
14069 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
14070 }
14071
14072 // Finally, since the node is now dead, remove it from the graph.
14073 deleteAndRecombine(N);
14074
14075 if (Swapped)
14076 std::swap(BasePtr, Offset);
14077
14078 // Replace other uses of BasePtr that can be updated to use Ptr
14079 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
14080 unsigned OffsetIdx = 1;
14081 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
14082 OffsetIdx = 0;
14083 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==((OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr
.getNode() && "Expected BasePtr operand") ? static_cast
<void> (0) : __assert_fail ("OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && \"Expected BasePtr operand\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14084, __PRETTY_FUNCTION__))
14084 BasePtr.getNode() && "Expected BasePtr operand")((OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr
.getNode() && "Expected BasePtr operand") ? static_cast
<void> (0) : __assert_fail ("OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && \"Expected BasePtr operand\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14084, __PRETTY_FUNCTION__))
;
14085
14086 // We need to replace ptr0 in the following expression:
14087 // x0 * offset0 + y0 * ptr0 = t0
14088 // knowing that
14089 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
14090 //
14091 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
14092 // indexed load/store and the expression that needs to be re-written.
14093 //
14094 // Therefore, we have:
14095 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
14096
14097 ConstantSDNode *CN =
14098 cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
14099 int X0, X1, Y0, Y1;
14100 const APInt &Offset0 = CN->getAPIntValue();
14101 APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
14102
14103 X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
14104 Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
14105 X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
14106 Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
14107
14108 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
14109
14110 APInt CNV = Offset0;
14111 if (X0 < 0) CNV = -CNV;
14112 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
14113 else CNV = CNV - Offset1;
14114
14115 SDLoc DL(OtherUses[i]);
14116
14117 // We can now generate the new expression.
14118 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
14119 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
14120
14121 SDValue NewUse = DAG.getNode(Opcode,
14122 DL,
14123 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
14124 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
14125 deleteAndRecombine(OtherUses[i]);
14126 }
14127
14128 // Replace the uses of Ptr with uses of the updated base value.
14129 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
14130 deleteAndRecombine(Ptr.getNode());
14131 AddToWorklist(Result.getNode());
14132
14133 return true;
14134}
14135
14136/// Try to combine a load/store with a add/sub of the base pointer node into a
14137/// post-indexed load/store. The transformation folded the add/subtract into the
14138/// new indexed load/store effectively and all of its uses are redirected to the
14139/// new load/store.
14140bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
14141 if (Level < AfterLegalizeDAG)
14142 return false;
14143
14144 bool IsLoad = true;
14145 bool IsMasked = false;
14146 SDValue Ptr;
14147 if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked,
14148 Ptr, TLI))
14149 return false;
14150
14151 if (Ptr.getNode()->hasOneUse())
14152 return false;
14153
14154 for (SDNode *Op : Ptr.getNode()->uses()) {
14155 if (Op == N ||
14156 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
14157 continue;
14158
14159 SDValue BasePtr;
14160 SDValue Offset;
14161 ISD::MemIndexedMode AM = ISD::UNINDEXED;
14162 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
14163 // Don't create a indexed load / store with zero offset.
14164 if (isNullConstant(Offset))
14165 continue;
14166
14167 // Try turning it into a post-indexed load / store except when
14168 // 1) All uses are load / store ops that use it as base ptr (and
14169 // it may be folded as addressing mmode).
14170 // 2) Op must be independent of N, i.e. Op is neither a predecessor
14171 // nor a successor of N. Otherwise, if Op is folded that would
14172 // create a cycle.
14173
14174 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
14175 continue;
14176
14177 // Check for #1.
14178 bool TryNext = false;
14179 for (SDNode *Use : BasePtr.getNode()->uses()) {
14180 if (Use == Ptr.getNode())
14181 continue;
14182
14183 // If all the uses are load / store addresses, then don't do the
14184 // transformation.
14185 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
14186 bool RealUse = false;
14187 for (SDNode *UseUse : Use->uses()) {
14188 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
14189 RealUse = true;
14190 }
14191
14192 if (!RealUse) {
14193 TryNext = true;
14194 break;
14195 }
14196 }
14197 }
14198
14199 if (TryNext)
14200 continue;
14201
14202 // Check for #2.
14203 SmallPtrSet<const SDNode *, 32> Visited;
14204 SmallVector<const SDNode *, 8> Worklist;
14205 // Ptr is predecessor to both N and Op.
14206 Visited.insert(Ptr.getNode());
14207 Worklist.push_back(N);
14208 Worklist.push_back(Op);
14209 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
14210 !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
14211 SDValue Result;
14212 if (!IsMasked)
14213 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
14214 Offset, AM)
14215 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
14216 BasePtr, Offset, AM);
14217 else
14218 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
14219 BasePtr, Offset, AM)
14220 : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
14221 BasePtr, Offset, AM);
14222 ++PostIndexedNodes;
14223 ++NodesCombined;
14224 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
14225 dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
14226 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.5 "; N->dump
(&DAG); dbgs() << "\nWith: "; Result.getNode()->
dump(&DAG); dbgs() << '\n'; } } while (false)
;
14227 WorklistRemover DeadNodes(*this);
14228 if (IsLoad) {
14229 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
14230 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
14231 } else {
14232 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
14233 }
14234
14235 // Finally, since the node is now dead, remove it from the graph.
14236 deleteAndRecombine(N);
14237
14238 // Replace the uses of Use with uses of the updated base value.
14239 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
14240 Result.getValue(IsLoad ? 1 : 0));
14241 deleteAndRecombine(Op);
14242 return true;
14243 }
14244 }
14245 }
14246
14247 return false;
14248}
14249
14250/// Return the base-pointer arithmetic from an indexed \p LD.
14251SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
14252 ISD::MemIndexedMode AM = LD->getAddressingMode();
14253 assert(AM != ISD::UNINDEXED)((AM != ISD::UNINDEXED) ? static_cast<void> (0) : __assert_fail
("AM != ISD::UNINDEXED", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14253, __PRETTY_FUNCTION__))
;
14254 SDValue BP = LD->getOperand(1);
14255 SDValue Inc = LD->getOperand(2);
14256
14257 // Some backends use TargetConstants for load offsets, but don't expect
14258 // TargetConstants in general ADD nodes. We can convert these constants into
14259 // regular Constants (if the constant is not opaque).
14260 assert((Inc.getOpcode() != ISD::TargetConstant ||(((Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode
>(Inc)->isOpaque()) && "Cannot split out indexing using opaque target constants"
) ? static_cast<void> (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14262, __PRETTY_FUNCTION__))
14261 !cast<ConstantSDNode>(Inc)->isOpaque()) &&(((Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode
>(Inc)->isOpaque()) && "Cannot split out indexing using opaque target constants"
) ? static_cast<void> (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14262, __PRETTY_FUNCTION__))
14262 "Cannot split out indexing using opaque target constants")(((Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode
>(Inc)->isOpaque()) && "Cannot split out indexing using opaque target constants"
) ? static_cast<void> (0) : __assert_fail ("(Inc.getOpcode() != ISD::TargetConstant || !cast<ConstantSDNode>(Inc)->isOpaque()) && \"Cannot split out indexing using opaque target constants\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14262, __PRETTY_FUNCTION__))
;
14263 if (Inc.getOpcode() == ISD::TargetConstant) {
14264 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
14265 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
14266 ConstInc->getValueType(0));
14267 }
14268
14269 unsigned Opc =
14270 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
14271 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
14272}
14273
14274static inline int numVectorEltsOrZero(EVT T) {
14275 return T.isVector() ? T.getVectorNumElements() : 0;
14276}
14277
14278bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
14279 Val = ST->getValue();
14280 EVT STType = Val.getValueType();
14281 EVT STMemType = ST->getMemoryVT();
14282 if (STType == STMemType)
14283 return true;
14284 if (isTypeLegal(STMemType))
14285 return false; // fail.
14286 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
14287 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
14288 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
14289 return true;
14290 }
14291 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
14292 STType.isInteger() && STMemType.isInteger()) {
14293 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
14294 return true;
14295 }
14296 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
14297 Val = DAG.getBitcast(STMemType, Val);
14298 return true;
14299 }
14300 return false; // fail.
14301}
14302
14303bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
14304 EVT LDMemType = LD->getMemoryVT();
14305 EVT LDType = LD->getValueType(0);
14306 assert(Val.getValueType() == LDMemType &&((Val.getValueType() == LDMemType && "Attempting to extend value of non-matching type"
) ? static_cast<void> (0) : __assert_fail ("Val.getValueType() == LDMemType && \"Attempting to extend value of non-matching type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14307, __PRETTY_FUNCTION__))
14307 "Attempting to extend value of non-matching type")((Val.getValueType() == LDMemType && "Attempting to extend value of non-matching type"
) ? static_cast<void> (0) : __assert_fail ("Val.getValueType() == LDMemType && \"Attempting to extend value of non-matching type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14307, __PRETTY_FUNCTION__))
;
14308 if (LDType == LDMemType)
14309 return true;
14310 if (LDMemType.isInteger() && LDType.isInteger()) {
14311 switch (LD->getExtensionType()) {
14312 case ISD::NON_EXTLOAD:
14313 Val = DAG.getBitcast(LDType, Val);
14314 return true;
14315 case ISD::EXTLOAD:
14316 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
14317 return true;
14318 case ISD::SEXTLOAD:
14319 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
14320 return true;
14321 case ISD::ZEXTLOAD:
14322 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
14323 return true;
14324 }
14325 }
14326 return false;
14327}
14328
14329SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
14330 if (OptLevel == CodeGenOpt::None || !LD->isSimple())
14331 return SDValue();
14332 SDValue Chain = LD->getOperand(0);
14333 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
14334 // TODO: Relax this restriction for unordered atomics (see D66309)
14335 if (!ST || !ST->isSimple())
14336 return SDValue();
14337
14338 EVT LDType = LD->getValueType(0);
14339 EVT LDMemType = LD->getMemoryVT();
14340 EVT STMemType = ST->getMemoryVT();
14341 EVT STType = ST->getValue().getValueType();
14342
14343 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
14344 BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
14345 int64_t Offset;
14346 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
14347 return SDValue();
14348
14349 // Normalize for Endianness. After this Offset=0 will denote that the least
14350 // significant bit in the loaded value maps to the least significant bit in
14351 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
14352 // n:th least significant byte of the stored value.
14353 if (DAG.getDataLayout().isBigEndian())
14354 Offset = ((int64_t)STMemType.getStoreSizeInBits() -
14355 (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset;
14356
14357 // Check that the stored value cover all bits that are loaded.
14358 bool STCoversLD =
14359 (Offset >= 0) &&
14360 (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
14361
14362 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
14363 if (LD->isIndexed()) {
14364 bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
14365 LD->getAddressingMode() == ISD::POST_DEC);
14366 unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
14367 SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
14368 LD->getOperand(1), LD->getOperand(2));
14369 SDValue Ops[] = {Val, Idx, Chain};
14370 return CombineTo(LD, Ops, 3);
14371 }
14372 return CombineTo(LD, Val, Chain);
14373 };
14374
14375 if (!STCoversLD)
14376 return SDValue();
14377
14378 // Memory as copy space (potentially masked).
14379 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
14380 // Simple case: Direct non-truncating forwarding
14381 if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
14382 return ReplaceLd(LD, ST->getValue(), Chain);
14383 // Can we model the truncate and extension with an and mask?
14384 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
14385 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
14386 // Mask to size of LDMemType
14387 auto Mask =
14388 DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
14389 STMemType.getSizeInBits()),
14390 SDLoc(ST), STType);
14391 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
14392 return ReplaceLd(LD, Val, Chain);
14393 }
14394 }
14395
14396 // TODO: Deal with nonzero offset.
14397 if (LD->getBasePtr().isUndef() || Offset != 0)
14398 return SDValue();
14399 // Model necessary truncations / extenstions.
14400 SDValue Val;
14401 // Truncate Value To Stored Memory Size.
14402 do {
14403 if (!getTruncatedStoreValue(ST, Val))
14404 continue;
14405 if (!isTypeLegal(LDMemType))
14406 continue;
14407 if (STMemType != LDMemType) {
14408 // TODO: Support vectors? This requires extract_subvector/bitcast.
14409 if (!STMemType.isVector() && !LDMemType.isVector() &&
14410 STMemType.isInteger() && LDMemType.isInteger())
14411 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
14412 else
14413 continue;
14414 }
14415 if (!extendLoadedValueToExtension(LD, Val))
14416 continue;
14417 return ReplaceLd(LD, Val, Chain);
14418 } while (false);
14419
14420 // On failure, cleanup dead nodes we may have created.
14421 if (Val->use_empty())
14422 deleteAndRecombine(Val.getNode());
14423 return SDValue();
14424}
14425
14426SDValue DAGCombiner::visitLOAD(SDNode *N) {
14427 LoadSDNode *LD = cast<LoadSDNode>(N);
14428 SDValue Chain = LD->getChain();
14429 SDValue Ptr = LD->getBasePtr();
14430
14431 // If load is not volatile and there are no uses of the loaded value (and
14432 // the updated indexed value in case of indexed loads), change uses of the
14433 // chain value into uses of the chain input (i.e. delete the dead load).
14434 // TODO: Allow this for unordered atomics (see D66309)
14435 if (LD->isSimple()) {
14436 if (N->getValueType(1) == MVT::Other) {
14437 // Unindexed loads.
14438 if (!N->hasAnyUseOfValue(0)) {
14439 // It's not safe to use the two value CombineTo variant here. e.g.
14440 // v1, chain2 = load chain1, loc
14441 // v2, chain3 = load chain2, loc
14442 // v3 = add v2, c
14443 // Now we replace use of chain2 with chain1. This makes the second load
14444 // isomorphic to the one we are deleting, and thus makes this load live.
14445 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.getNode()
->dump(&DAG); dbgs() << "\n"; } } while (false)
14446 dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.getNode()
->dump(&DAG); dbgs() << "\n"; } } while (false)
14447 dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.6 "; N->dump
(&DAG); dbgs() << "\nWith chain: "; Chain.getNode()
->dump(&DAG); dbgs() << "\n"; } } while (false)
;
14448 WorklistRemover DeadNodes(*this);
14449 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14450 AddUsersToWorklist(Chain.getNode());
14451 if (N->use_empty())
14452 deleteAndRecombine(N);
14453
14454 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14455 }
14456 } else {
14457 // Indexed loads.
14458 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?")((N->getValueType(2) == MVT::Other && "Malformed indexed loads?"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(2) == MVT::Other && \"Malformed indexed loads?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14458, __PRETTY_FUNCTION__))
;
14459
14460 // If this load has an opaque TargetConstant offset, then we cannot split
14461 // the indexing into an add/sub directly (that TargetConstant may not be
14462 // valid for a different type of node, and we cannot convert an opaque
14463 // target constant into a regular constant).
14464 bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
14465 cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
14466
14467 if (!N->hasAnyUseOfValue(0) &&
14468 ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
14469 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
14470 SDValue Index;
14471 if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
14472 Index = SplitIndexingFromLoad(LD);
14473 // Try to fold the base pointer arithmetic into subsequent loads and
14474 // stores.
14475 AddUsersToWorklist(N);
14476 } else
14477 Index = DAG.getUNDEF(N->getValueType(1));
14478 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump
(&DAG); dbgs() << " and 2 other values\n"; } } while
(false)
14479 dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump
(&DAG); dbgs() << " and 2 other values\n"; } } while
(false)
14480 dbgs() << " and 2 other values\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nReplacing.7 "; N->dump
(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump
(&DAG); dbgs() << " and 2 other values\n"; } } while
(false)
;
14481 WorklistRemover DeadNodes(*this);
14482 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
14483 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
14484 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
14485 deleteAndRecombine(N);
14486 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14487 }
14488 }
14489 }
14490
14491 // If this load is directly stored, replace the load value with the stored
14492 // value.
14493 if (auto V = ForwardStoreValueToDirectLoad(LD))
14494 return V;
14495
14496 // Try to infer better alignment information than the load already has.
14497 if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
14498 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
14499 if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
14500 SDValue NewLoad = DAG.getExtLoad(
14501 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
14502 LD->getPointerInfo(), LD->getMemoryVT(), Align,
14503 LD->getMemOperand()->getFlags(), LD->getAAInfo());
14504 // NewLoad will always be N as we are only refining the alignment
14505 assert(NewLoad.getNode() == N)((NewLoad.getNode() == N) ? static_cast<void> (0) : __assert_fail
("NewLoad.getNode() == N", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14505, __PRETTY_FUNCTION__))
;
14506 (void)NewLoad;
14507 }
14508 }
14509 }
14510
14511 if (LD->isUnindexed()) {
14512 // Walk up chain skipping non-aliasing memory nodes.
14513 SDValue BetterChain = FindBetterChain(LD, Chain);
14514
14515 // If there is a better chain.
14516 if (Chain != BetterChain) {
14517 SDValue ReplLoad;
14518
14519 // Replace the chain to void dependency.
14520 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
14521 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
14522 BetterChain, Ptr, LD->getMemOperand());
14523 } else {
14524 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
14525 LD->getValueType(0),
14526 BetterChain, Ptr, LD->getMemoryVT(),
14527 LD->getMemOperand());
14528 }
14529
14530 // Create token factor to keep old chain connected.
14531 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
14532 MVT::Other, Chain, ReplLoad.getValue(1));
14533
14534 // Replace uses with load result and token factor
14535 return CombineTo(N, ReplLoad.getValue(0), Token);
14536 }
14537 }
14538
14539 // Try transforming N to an indexed load.
14540 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
14541 return SDValue(N, 0);
14542
14543 // Try to slice up N to more direct loads if the slices are mapped to
14544 // different register banks or pairing can take place.
14545 if (SliceUpLoad(N))
14546 return SDValue(N, 0);
14547
14548 return SDValue();
14549}
14550
14551namespace {
14552
14553/// Helper structure used to slice a load in smaller loads.
14554/// Basically a slice is obtained from the following sequence:
14555/// Origin = load Ty1, Base
14556/// Shift = srl Ty1 Origin, CstTy Amount
14557/// Inst = trunc Shift to Ty2
14558///
14559/// Then, it will be rewritten into:
14560/// Slice = load SliceTy, Base + SliceOffset
14561/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
14562///
14563/// SliceTy is deduced from the number of bits that are actually used to
14564/// build Inst.
14565struct LoadedSlice {
14566 /// Helper structure used to compute the cost of a slice.
14567 struct Cost {
14568 /// Are we optimizing for code size.
14569 bool ForCodeSize = false;
14570
14571 /// Various cost.
14572 unsigned Loads = 0;
14573 unsigned Truncates = 0;
14574 unsigned CrossRegisterBanksCopies = 0;
14575 unsigned ZExts = 0;
14576 unsigned Shift = 0;
14577
14578 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
14579
14580 /// Get the cost of one isolated slice.
14581 Cost(const LoadedSlice &LS, bool ForCodeSize)
14582 : ForCodeSize(ForCodeSize), Loads(1) {
14583 EVT TruncType = LS.Inst->getValueType(0);
14584 EVT LoadedType = LS.getLoadedType();
14585 if (TruncType != LoadedType &&
14586 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
14587 ZExts = 1;
14588 }
14589
14590 /// Account for slicing gain in the current cost.
14591 /// Slicing provide a few gains like removing a shift or a
14592 /// truncate. This method allows to grow the cost of the original
14593 /// load with the gain from this slice.
14594 void addSliceGain(const LoadedSlice &LS) {
14595 // Each slice saves a truncate.
14596 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
14597 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
14598 LS.Inst->getValueType(0)))
14599 ++Truncates;
14600 // If there is a shift amount, this slice gets rid of it.
14601 if (LS.Shift)
14602 ++Shift;
14603 // If this slice can merge a cross register bank copy, account for it.
14604 if (LS.canMergeExpensiveCrossRegisterBankCopy())
14605 ++CrossRegisterBanksCopies;
14606 }
14607
14608 Cost &operator+=(const Cost &RHS) {
14609 Loads += RHS.Loads;
14610 Truncates += RHS.Truncates;
14611 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
14612 ZExts += RHS.ZExts;
14613 Shift += RHS.Shift;
14614 return *this;
14615 }
14616
14617 bool operator==(const Cost &RHS) const {
14618 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
14619 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
14620 ZExts == RHS.ZExts && Shift == RHS.Shift;
14621 }
14622
14623 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
14624
14625 bool operator<(const Cost &RHS) const {
14626 // Assume cross register banks copies are as expensive as loads.
14627 // FIXME: Do we want some more target hooks?
14628 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
14629 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
14630 // Unless we are optimizing for code size, consider the
14631 // expensive operation first.
14632 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
14633 return ExpensiveOpsLHS < ExpensiveOpsRHS;
14634 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
14635 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
14636 }
14637
14638 bool operator>(const Cost &RHS) const { return RHS < *this; }
14639
14640 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
14641
14642 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
14643 };
14644
14645 // The last instruction that represent the slice. This should be a
14646 // truncate instruction.
14647 SDNode *Inst;
14648
14649 // The original load instruction.
14650 LoadSDNode *Origin;
14651
14652 // The right shift amount in bits from the original load.
14653 unsigned Shift;
14654
14655 // The DAG from which Origin came from.
14656 // This is used to get some contextual information about legal types, etc.
14657 SelectionDAG *DAG;
14658
14659 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
14660 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
14661 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
14662
14663 /// Get the bits used in a chunk of bits \p BitWidth large.
14664 /// \return Result is \p BitWidth and has used bits set to 1 and
14665 /// not used bits set to 0.
14666 APInt getUsedBits() const {
14667 // Reproduce the trunc(lshr) sequence:
14668 // - Start from the truncated value.
14669 // - Zero extend to the desired bit width.
14670 // - Shift left.
14671 assert(Origin && "No original load to compare against.")((Origin && "No original load to compare against.") ?
static_cast<void> (0) : __assert_fail ("Origin && \"No original load to compare against.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14671, __PRETTY_FUNCTION__))
;
14672 unsigned BitWidth = Origin->getValueSizeInBits(0);
14673 assert(Inst && "This slice is not bound to an instruction")((Inst && "This slice is not bound to an instruction"
) ? static_cast<void> (0) : __assert_fail ("Inst && \"This slice is not bound to an instruction\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14673, __PRETTY_FUNCTION__))
;
14674 assert(Inst->getValueSizeInBits(0) <= BitWidth &&((Inst->getValueSizeInBits(0) <= BitWidth && "Extracted slice is bigger than the whole type!"
) ? static_cast<void> (0) : __assert_fail ("Inst->getValueSizeInBits(0) <= BitWidth && \"Extracted slice is bigger than the whole type!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14675, __PRETTY_FUNCTION__))
14675 "Extracted slice is bigger than the whole type!")((Inst->getValueSizeInBits(0) <= BitWidth && "Extracted slice is bigger than the whole type!"
) ? static_cast<void> (0) : __assert_fail ("Inst->getValueSizeInBits(0) <= BitWidth && \"Extracted slice is bigger than the whole type!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14675, __PRETTY_FUNCTION__))
;
14676 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
14677 UsedBits.setAllBits();
14678 UsedBits = UsedBits.zext(BitWidth);
14679 UsedBits <<= Shift;
14680 return UsedBits;
14681 }
14682
14683 /// Get the size of the slice to be loaded in bytes.
14684 unsigned getLoadedSize() const {
14685 unsigned SliceSize = getUsedBits().countPopulation();
14686 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.")((!(SliceSize & 0x7) && "Size is not a multiple of a byte."
) ? static_cast<void> (0) : __assert_fail ("!(SliceSize & 0x7) && \"Size is not a multiple of a byte.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14686, __PRETTY_FUNCTION__))
;
14687 return SliceSize / 8;
14688 }
14689
14690 /// Get the type that will be loaded for this slice.
14691 /// Note: This may not be the final type for the slice.
14692 EVT getLoadedType() const {
14693 assert(DAG && "Missing context")((DAG && "Missing context") ? static_cast<void>
(0) : __assert_fail ("DAG && \"Missing context\"", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14693, __PRETTY_FUNCTION__))
;
14694 LLVMContext &Ctxt = *DAG->getContext();
14695 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
14696 }
14697
14698 /// Get the alignment of the load used for this slice.
14699 unsigned getAlignment() const {
14700 unsigned Alignment = Origin->getAlignment();
14701 uint64_t Offset = getOffsetFromBase();
14702 if (Offset != 0)
14703 Alignment = MinAlign(Alignment, Alignment + Offset);
14704 return Alignment;
14705 }
14706
14707 /// Check if this slice can be rewritten with legal operations.
14708 bool isLegal() const {
14709 // An invalid slice is not legal.
14710 if (!Origin || !Inst || !DAG)
14711 return false;
14712
14713 // Offsets are for indexed load only, we do not handle that.
14714 if (!Origin->getOffset().isUndef())
14715 return false;
14716
14717 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14718
14719 // Check that the type is legal.
14720 EVT SliceType = getLoadedType();
14721 if (!TLI.isTypeLegal(SliceType))
14722 return false;
14723
14724 // Check that the load is legal for this type.
14725 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
14726 return false;
14727
14728 // Check that the offset can be computed.
14729 // 1. Check its type.
14730 EVT PtrType = Origin->getBasePtr().getValueType();
14731 if (PtrType == MVT::Untyped || PtrType.isExtended())
14732 return false;
14733
14734 // 2. Check that it fits in the immediate.
14735 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
14736 return false;
14737
14738 // 3. Check that the computation is legal.
14739 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
14740 return false;
14741
14742 // Check that the zext is legal if it needs one.
14743 EVT TruncateType = Inst->getValueType(0);
14744 if (TruncateType != SliceType &&
14745 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
14746 return false;
14747
14748 return true;
14749 }
14750
14751 /// Get the offset in bytes of this slice in the original chunk of
14752 /// bits.
14753 /// \pre DAG != nullptr.
14754 uint64_t getOffsetFromBase() const {
14755 assert(DAG && "Missing context.")((DAG && "Missing context.") ? static_cast<void>
(0) : __assert_fail ("DAG && \"Missing context.\"", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14755, __PRETTY_FUNCTION__))
;
14756 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
14757 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.")((!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."
) ? static_cast<void> (0) : __assert_fail ("!(Shift & 0x7) && \"Shifts not aligned on Bytes are not supported.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14757, __PRETTY_FUNCTION__))
;
14758 uint64_t Offset = Shift / 8;
14759 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
14760 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&((!(Origin->getValueSizeInBits(0) & 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? static_cast<void> (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14762, __PRETTY_FUNCTION__))
14761 "The size of the original loaded type is not a multiple of a"((!(Origin->getValueSizeInBits(0) & 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? static_cast<void> (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14762, __PRETTY_FUNCTION__))
14762 " byte.")((!(Origin->getValueSizeInBits(0) & 0x7) && "The size of the original loaded type is not a multiple of a"
" byte.") ? static_cast<void> (0) : __assert_fail ("!(Origin->getValueSizeInBits(0) & 0x7) && \"The size of the original loaded type is not a multiple of a\" \" byte.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14762, __PRETTY_FUNCTION__))
;
14763 // If Offset is bigger than TySizeInBytes, it means we are loading all
14764 // zeros. This should have been optimized before in the process.
14765 assert(TySizeInBytes > Offset &&((TySizeInBytes > Offset && "Invalid shift amount for given loaded size"
) ? static_cast<void> (0) : __assert_fail ("TySizeInBytes > Offset && \"Invalid shift amount for given loaded size\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14766, __PRETTY_FUNCTION__))
14766 "Invalid shift amount for given loaded size")((TySizeInBytes > Offset && "Invalid shift amount for given loaded size"
) ? static_cast<void> (0) : __assert_fail ("TySizeInBytes > Offset && \"Invalid shift amount for given loaded size\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14766, __PRETTY_FUNCTION__))
;
14767 if (IsBigEndian)
14768 Offset = TySizeInBytes - Offset - getLoadedSize();
14769 return Offset;
14770 }
14771
14772 /// Generate the sequence of instructions to load the slice
14773 /// represented by this object and redirect the uses of this slice to
14774 /// this new sequence of instructions.
14775 /// \pre this->Inst && this->Origin are valid Instructions and this
14776 /// object passed the legal check: LoadedSlice::isLegal returned true.
14777 /// \return The last instruction of the sequence used to load the slice.
14778 SDValue loadSlice() const {
14779 assert(Inst && Origin && "Unable to replace a non-existing slice.")((Inst && Origin && "Unable to replace a non-existing slice."
) ? static_cast<void> (0) : __assert_fail ("Inst && Origin && \"Unable to replace a non-existing slice.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14779, __PRETTY_FUNCTION__))
;
14780 const SDValue &OldBaseAddr = Origin->getBasePtr();
14781 SDValue BaseAddr = OldBaseAddr;
14782 // Get the offset in that chunk of bytes w.r.t. the endianness.
14783 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
14784 assert(Offset >= 0 && "Offset too big to fit in int64_t!")((Offset >= 0 && "Offset too big to fit in int64_t!"
) ? static_cast<void> (0) : __assert_fail ("Offset >= 0 && \"Offset too big to fit in int64_t!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14784, __PRETTY_FUNCTION__))
;
14785 if (Offset) {
14786 // BaseAddr = BaseAddr + Offset.
14787 EVT ArithType = BaseAddr.getValueType();
14788 SDLoc DL(Origin);
14789 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
14790 DAG->getConstant(Offset, DL, ArithType));
14791 }
14792
14793 // Create the type of the loaded slice according to its size.
14794 EVT SliceType = getLoadedType();
14795
14796 // Create the load for the slice.
14797 SDValue LastInst =
14798 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
14799 Origin->getPointerInfo().getWithOffset(Offset),
14800 getAlignment(), Origin->getMemOperand()->getFlags());
14801 // If the final type is not the same as the loaded type, this means that
14802 // we have to pad with zero. Create a zero extend for that.
14803 EVT FinalType = Inst->getValueType(0);
14804 if (SliceType != FinalType)
14805 LastInst =
14806 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
14807 return LastInst;
14808 }
14809
14810 /// Check if this slice can be merged with an expensive cross register
14811 /// bank copy. E.g.,
14812 /// i = load i32
14813 /// f = bitcast i32 i to float
14814 bool canMergeExpensiveCrossRegisterBankCopy() const {
14815 if (!Inst || !Inst->hasOneUse())
14816 return false;
14817 SDNode *Use = *Inst->use_begin();
14818 if (Use->getOpcode() != ISD::BITCAST)
14819 return false;
14820 assert(DAG && "Missing context")((DAG && "Missing context") ? static_cast<void>
(0) : __assert_fail ("DAG && \"Missing context\"", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14820, __PRETTY_FUNCTION__))
;
14821 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14822 EVT ResVT = Use->getValueType(0);
14823 const TargetRegisterClass *ResRC =
14824 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
14825 const TargetRegisterClass *ArgRC =
14826 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
14827 Use->getOperand(0)->isDivergent());
14828 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
14829 return false;
14830
14831 // At this point, we know that we perform a cross-register-bank copy.
14832 // Check if it is expensive.
14833 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
14834 // Assume bitcasts are cheap, unless both register classes do not
14835 // explicitly share a common sub class.
14836 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
14837 return false;
14838
14839 // Check if it will be merged with the load.
14840 // 1. Check the alignment constraint.
14841 unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
14842 ResVT.getTypeForEVT(*DAG->getContext()));
14843
14844 if (RequiredAlignment > getAlignment())
14845 return false;
14846
14847 // 2. Check that the load is a legal operation for that type.
14848 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
14849 return false;
14850
14851 // 3. Check that we do not have a zext in the way.
14852 if (Inst->getValueType(0) != getLoadedType())
14853 return false;
14854
14855 return true;
14856 }
14857};
14858
14859} // end anonymous namespace
14860
14861/// Check that all bits set in \p UsedBits form a dense region, i.e.,
14862/// \p UsedBits looks like 0..0 1..1 0..0.
14863static bool areUsedBitsDense(const APInt &UsedBits) {
14864 // If all the bits are one, this is dense!
14865 if (UsedBits.isAllOnesValue())
14866 return true;
14867
14868 // Get rid of the unused bits on the right.
14869 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
14870 // Get rid of the unused bits on the left.
14871 if (NarrowedUsedBits.countLeadingZeros())
14872 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
14873 // Check that the chunk of bits is completely used.
14874 return NarrowedUsedBits.isAllOnesValue();
14875}
14876
14877/// Check whether or not \p First and \p Second are next to each other
14878/// in memory. This means that there is no hole between the bits loaded
14879/// by \p First and the bits loaded by \p Second.
14880static bool areSlicesNextToEachOther(const LoadedSlice &First,
14881 const LoadedSlice &Second) {
14882 assert(First.Origin == Second.Origin && First.Origin &&((First.Origin == Second.Origin && First.Origin &&
"Unable to match different memory origins.") ? static_cast<
void> (0) : __assert_fail ("First.Origin == Second.Origin && First.Origin && \"Unable to match different memory origins.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14883, __PRETTY_FUNCTION__))
14883 "Unable to match different memory origins.")((First.Origin == Second.Origin && First.Origin &&
"Unable to match different memory origins.") ? static_cast<
void> (0) : __assert_fail ("First.Origin == Second.Origin && First.Origin && \"Unable to match different memory origins.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14883, __PRETTY_FUNCTION__))
;
14884 APInt UsedBits = First.getUsedBits();
14885 assert((UsedBits & Second.getUsedBits()) == 0 &&(((UsedBits & Second.getUsedBits()) == 0 && "Slices are not supposed to overlap."
) ? static_cast<void> (0) : __assert_fail ("(UsedBits & Second.getUsedBits()) == 0 && \"Slices are not supposed to overlap.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14886, __PRETTY_FUNCTION__))
14886 "Slices are not supposed to overlap.")(((UsedBits & Second.getUsedBits()) == 0 && "Slices are not supposed to overlap."
) ? static_cast<void> (0) : __assert_fail ("(UsedBits & Second.getUsedBits()) == 0 && \"Slices are not supposed to overlap.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14886, __PRETTY_FUNCTION__))
;
14887 UsedBits |= Second.getUsedBits();
14888 return areUsedBitsDense(UsedBits);
14889}
14890
14891/// Adjust the \p GlobalLSCost according to the target
14892/// paring capabilities and the layout of the slices.
14893/// \pre \p GlobalLSCost should account for at least as many loads as
14894/// there is in the slices in \p LoadedSlices.
14895static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14896 LoadedSlice::Cost &GlobalLSCost) {
14897 unsigned NumberOfSlices = LoadedSlices.size();
14898 // If there is less than 2 elements, no pairing is possible.
14899 if (NumberOfSlices < 2)
14900 return;
14901
14902 // Sort the slices so that elements that are likely to be next to each
14903 // other in memory are next to each other in the list.
14904 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
14905 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.")((LHS.Origin == RHS.Origin && "Different bases not implemented."
) ? static_cast<void> (0) : __assert_fail ("LHS.Origin == RHS.Origin && \"Different bases not implemented.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14905, __PRETTY_FUNCTION__))
;
14906 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
14907 });
14908 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
14909 // First (resp. Second) is the first (resp. Second) potentially candidate
14910 // to be placed in a paired load.
14911 const LoadedSlice *First = nullptr;
14912 const LoadedSlice *Second = nullptr;
14913 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
14914 // Set the beginning of the pair.
14915 First = Second) {
14916 Second = &LoadedSlices[CurrSlice];
14917
14918 // If First is NULL, it means we start a new pair.
14919 // Get to the next slice.
14920 if (!First)
14921 continue;
14922
14923 EVT LoadedType = First->getLoadedType();
14924
14925 // If the types of the slices are different, we cannot pair them.
14926 if (LoadedType != Second->getLoadedType())
14927 continue;
14928
14929 // Check if the target supplies paired loads for this type.
14930 unsigned RequiredAlignment = 0;
14931 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
14932 // move to the next pair, this type is hopeless.
14933 Second = nullptr;
14934 continue;
14935 }
14936 // Check if we meet the alignment requirement.
14937 if (RequiredAlignment > First->getAlignment())
14938 continue;
14939
14940 // Check that both loads are next to each other in memory.
14941 if (!areSlicesNextToEachOther(*First, *Second))
14942 continue;
14943
14944 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!")((GlobalLSCost.Loads > 0 && "We save more loads than we created!"
) ? static_cast<void> (0) : __assert_fail ("GlobalLSCost.Loads > 0 && \"We save more loads than we created!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 14944, __PRETTY_FUNCTION__))
;
14945 --GlobalLSCost.Loads;
14946 // Move to the next pair.
14947 Second = nullptr;
14948 }
14949}
14950
14951/// Check the profitability of all involved LoadedSlice.
14952/// Currently, it is considered profitable if there is exactly two
14953/// involved slices (1) which are (2) next to each other in memory, and
14954/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
14955///
14956/// Note: The order of the elements in \p LoadedSlices may be modified, but not
14957/// the elements themselves.
14958///
14959/// FIXME: When the cost model will be mature enough, we can relax
14960/// constraints (1) and (2).
14961static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14962 const APInt &UsedBits, bool ForCodeSize) {
14963 unsigned NumberOfSlices = LoadedSlices.size();
14964 if (StressLoadSlicing)
14965 return NumberOfSlices > 1;
14966
14967 // Check (1).
14968 if (NumberOfSlices != 2)
14969 return false;
14970
14971 // Check (2).
14972 if (!areUsedBitsDense(UsedBits))
14973 return false;
14974
14975 // Check (3).
14976 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
14977 // The original code has one big load.
14978 OrigCost.Loads = 1;
14979 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
14980 const LoadedSlice &LS = LoadedSlices[CurrSlice];
14981 // Accumulate the cost of all the slices.
14982 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
14983 GlobalSlicingCost += SliceCost;
14984
14985 // Account as cost in the original configuration the gain obtained
14986 // with the current slices.
14987 OrigCost.addSliceGain(LS);
14988 }
14989
14990 // If the target supports paired load, adjust the cost accordingly.
14991 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
14992 return OrigCost > GlobalSlicingCost;
14993}
14994
14995/// If the given load, \p LI, is used only by trunc or trunc(lshr)
14996/// operations, split it in the various pieces being extracted.
14997///
14998/// This sort of thing is introduced by SROA.
14999/// This slicing takes care not to insert overlapping loads.
15000/// \pre LI is a simple load (i.e., not an atomic or volatile load).
15001bool DAGCombiner::SliceUpLoad(SDNode *N) {
15002 if (Level < AfterLegalizeDAG)
15003 return false;
15004
15005 LoadSDNode *LD = cast<LoadSDNode>(N);
15006 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
15007 !LD->getValueType(0).isInteger())
15008 return false;
15009
15010 // The algorithm to split up a load of a scalable vector into individual
15011 // elements currently requires knowing the length of the loaded type,
15012 // so will need adjusting to work on scalable vectors.
15013 if (LD->getValueType(0).isScalableVector())
15014 return false;
15015
15016 // Keep track of already used bits to detect overlapping values.
15017 // In that case, we will just abort the transformation.
15018 APInt UsedBits(LD->getValueSizeInBits(0), 0);
15019
15020 SmallVector<LoadedSlice, 4> LoadedSlices;
15021
15022 // Check if this load is used as several smaller chunks of bits.
15023 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
15024 // of computation for each trunc.
15025 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
15026 UI != UIEnd; ++UI) {
15027 // Skip the uses of the chain.
15028 if (UI.getUse().getResNo() != 0)
15029 continue;
15030
15031 SDNode *User = *UI;
15032 unsigned Shift = 0;
15033
15034 // Check if this is a trunc(lshr).
15035 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
15036 isa<ConstantSDNode>(User->getOperand(1))) {
15037 Shift = User->getConstantOperandVal(1);
15038 User = *User->use_begin();
15039 }
15040
15041 // At this point, User is a Truncate, iff we encountered, trunc or
15042 // trunc(lshr).
15043 if (User->getOpcode() != ISD::TRUNCATE)
15044 return false;
15045
15046 // The width of the type must be a power of 2 and greater than 8-bits.
15047 // Otherwise the load cannot be represented in LLVM IR.
15048 // Moreover, if we shifted with a non-8-bits multiple, the slice
15049 // will be across several bytes. We do not support that.
15050 unsigned Width = User->getValueSizeInBits(0);
15051 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
15052 return false;
15053
15054 // Build the slice for this chain of computations.
15055 LoadedSlice LS(User, LD, Shift, &DAG);
15056 APInt CurrentUsedBits = LS.getUsedBits();
15057
15058 // Check if this slice overlaps with another.
15059 if ((CurrentUsedBits & UsedBits) != 0)
15060 return false;
15061 // Update the bits used globally.
15062 UsedBits |= CurrentUsedBits;
15063
15064 // Check if the new slice would be legal.
15065 if (!LS.isLegal())
15066 return false;
15067
15068 // Record the slice.
15069 LoadedSlices.push_back(LS);
15070 }
15071
15072 // Abort slicing if it does not seem to be profitable.
15073 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
15074 return false;
15075
15076 ++SlicedLoads;
15077
15078 // Rewrite each chain to use an independent load.
15079 // By construction, each chain can be represented by a unique load.
15080
15081 // Prepare the argument for the new token factor for all the slices.
15082 SmallVector<SDValue, 8> ArgChains;
15083 for (SmallVectorImpl<LoadedSlice>::const_iterator
15084 LSIt = LoadedSlices.begin(),
15085 LSItEnd = LoadedSlices.end();
15086 LSIt != LSItEnd; ++LSIt) {
15087 SDValue SliceInst = LSIt->loadSlice();
15088 CombineTo(LSIt->Inst, SliceInst, true);
15089 if (SliceInst.getOpcode() != ISD::LOAD)
15090 SliceInst = SliceInst.getOperand(0);
15091 assert(SliceInst->getOpcode() == ISD::LOAD &&((SliceInst->getOpcode() == ISD::LOAD && "It takes more than a zext to get to the loaded slice!!"
) ? static_cast<void> (0) : __assert_fail ("SliceInst->getOpcode() == ISD::LOAD && \"It takes more than a zext to get to the loaded slice!!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15092, __PRETTY_FUNCTION__))
15092 "It takes more than a zext to get to the loaded slice!!")((SliceInst->getOpcode() == ISD::LOAD && "It takes more than a zext to get to the loaded slice!!"
) ? static_cast<void> (0) : __assert_fail ("SliceInst->getOpcode() == ISD::LOAD && \"It takes more than a zext to get to the loaded slice!!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15092, __PRETTY_FUNCTION__))
;
15093 ArgChains.push_back(SliceInst.getValue(1));
15094 }
15095
15096 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
15097 ArgChains);
15098 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15099 AddToWorklist(Chain.getNode());
15100 return true;
15101}
15102
15103/// Check to see if V is (and load (ptr), imm), where the load is having
15104/// specific bytes cleared out. If so, return the byte size being masked out
15105/// and the shift amount.
15106static std::pair<unsigned, unsigned>
15107CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
15108 std::pair<unsigned, unsigned> Result(0, 0);
15109
15110 // Check for the structure we're looking for.
15111 if (V->getOpcode() != ISD::AND ||
15112 !isa<ConstantSDNode>(V->getOperand(1)) ||
15113 !ISD::isNormalLoad(V->getOperand(0).getNode()))
15114 return Result;
15115
15116 // Check the chain and pointer.
15117 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
15118 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
15119
15120 // This only handles simple types.
15121 if (V.getValueType() != MVT::i16 &&
15122 V.getValueType() != MVT::i32 &&
15123 V.getValueType() != MVT::i64)
15124 return Result;
15125
15126 // Check the constant mask. Invert it so that the bits being masked out are
15127 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
15128 // follow the sign bit for uniformity.
15129 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
15130 unsigned NotMaskLZ = countLeadingZeros(NotMask);
15131 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
15132 unsigned NotMaskTZ = countTrailingZeros(NotMask);
15133 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
15134 if (NotMaskLZ == 64) return Result; // All zero mask.
15135
15136 // See if we have a continuous run of bits. If so, we have 0*1+0*
15137 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
15138 return Result;
15139
15140 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
15141 if (V.getValueType() != MVT::i64 && NotMaskLZ)
15142 NotMaskLZ -= 64-V.getValueSizeInBits();
15143
15144 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
15145 switch (MaskedBytes) {
15146 case 1:
15147 case 2:
15148 case 4: break;
15149 default: return Result; // All one mask, or 5-byte mask.
15150 }
15151
15152 // Verify that the first bit starts at a multiple of mask so that the access
15153 // is aligned the same as the access width.
15154 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
15155
15156 // For narrowing to be valid, it must be the case that the load the
15157 // immediately preceding memory operation before the store.
15158 if (LD == Chain.getNode())
15159 ; // ok.
15160 else if (Chain->getOpcode() == ISD::TokenFactor &&
15161 SDValue(LD, 1).hasOneUse()) {
15162 // LD has only 1 chain use so they are no indirect dependencies.
15163 if (!LD->isOperandOf(Chain.getNode()))
15164 return Result;
15165 } else
15166 return Result; // Fail.
15167
15168 Result.first = MaskedBytes;
15169 Result.second = NotMaskTZ/8;
15170 return Result;
15171}
15172
15173/// Check to see if IVal is something that provides a value as specified by
15174/// MaskInfo. If so, replace the specified store with a narrower store of
15175/// truncated IVal.
15176static SDValue
15177ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
15178 SDValue IVal, StoreSDNode *St,
15179 DAGCombiner *DC) {
15180 unsigned NumBytes = MaskInfo.first;
15181 unsigned ByteShift = MaskInfo.second;
15182 SelectionDAG &DAG = DC->getDAG();
15183
15184 // Check to see if IVal is all zeros in the part being masked in by the 'or'
15185 // that uses this. If not, this is not a replacement.
15186 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
15187 ByteShift*8, (ByteShift+NumBytes)*8);
15188 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
1
Assuming the condition is false
2
Taking false branch
15189
15190 // Check that it is legal on the target to do this. It is legal if the new
15191 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
15192 // legalization (and the target doesn't explicitly think this is a bad idea).
15193 MVT VT = MVT::getIntegerVT(NumBytes * 8);
15194 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15195 if (!DC->isTypeLegal(VT))
3
Taking false branch
15196 return SDValue();
15197 if (St->getMemOperand() &&
4
Assuming pointer value is null
5
Taking false branch
15198 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15199 *St->getMemOperand()))
15200 return SDValue();
15201
15202 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
15203 // shifted by ByteShift and truncated down to NumBytes.
15204 if (ByteShift) {
6
Assuming 'ByteShift' is 0
7
Taking false branch
15205 SDLoc DL(IVal);
15206 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
15207 DAG.getConstant(ByteShift*8, DL,
15208 DC->getShiftAmountTy(IVal.getValueType())));
15209 }
15210
15211 // Figure out the offset for the store and the alignment of the access.
15212 unsigned StOffset;
15213 unsigned NewAlign = St->getAlignment();
8
Calling 'MemSDNode::getAlignment'
15214
15215 if (DAG.getDataLayout().isLittleEndian())
15216 StOffset = ByteShift;
15217 else
15218 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
15219
15220 SDValue Ptr = St->getBasePtr();
15221 if (StOffset) {
15222 SDLoc DL(IVal);
15223 Ptr = DAG.getMemBasePlusOffset(Ptr, StOffset, DL);
15224 NewAlign = MinAlign(NewAlign, StOffset);
15225 }
15226
15227 // Truncate down to the new size.
15228 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
15229
15230 ++OpsNarrowed;
15231 return DAG
15232 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
15233 St->getPointerInfo().getWithOffset(StOffset), NewAlign);
15234}
15235
15236/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
15237/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
15238/// narrowing the load and store if it would end up being a win for performance
15239/// or code size.
15240SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
15241 StoreSDNode *ST = cast<StoreSDNode>(N);
15242 if (!ST->isSimple())
15243 return SDValue();
15244
15245 SDValue Chain = ST->getChain();
15246 SDValue Value = ST->getValue();
15247 SDValue Ptr = ST->getBasePtr();
15248 EVT VT = Value.getValueType();
15249
15250 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
15251 return SDValue();
15252
15253 unsigned Opc = Value.getOpcode();
15254
15255 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
15256 // is a byte mask indicating a consecutive number of bytes, check to see if
15257 // Y is known to provide just those bytes. If so, we try to replace the
15258 // load + replace + store sequence with a single (narrower) store, which makes
15259 // the load dead.
15260 if (Opc == ISD::OR) {
15261 std::pair<unsigned, unsigned> MaskedLoad;
15262 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
15263 if (MaskedLoad.first)
15264 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
15265 Value.getOperand(1), ST,this))
15266 return NewST;
15267
15268 // Or is commutative, so try swapping X and Y.
15269 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
15270 if (MaskedLoad.first)
15271 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
15272 Value.getOperand(0), ST,this))
15273 return NewST;
15274 }
15275
15276 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
15277 Value.getOperand(1).getOpcode() != ISD::Constant)
15278 return SDValue();
15279
15280 SDValue N0 = Value.getOperand(0);
15281 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15282 Chain == SDValue(N0.getNode(), 1)) {
15283 LoadSDNode *LD = cast<LoadSDNode>(N0);
15284 if (LD->getBasePtr() != Ptr ||
15285 LD->getPointerInfo().getAddrSpace() !=
15286 ST->getPointerInfo().getAddrSpace())
15287 return SDValue();
15288
15289 // Find the type to narrow it the load / op / store to.
15290 SDValue N1 = Value.getOperand(1);
15291 unsigned BitWidth = N1.getValueSizeInBits();
15292 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
15293 if (Opc == ISD::AND)
15294 Imm ^= APInt::getAllOnesValue(BitWidth);
15295 if (Imm == 0 || Imm.isAllOnesValue())
15296 return SDValue();
15297 unsigned ShAmt = Imm.countTrailingZeros();
15298 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
15299 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
15300 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
15301 // The narrowing should be profitable, the load/store operation should be
15302 // legal (or custom) and the store size should be equal to the NewVT width.
15303 while (NewBW < BitWidth &&
15304 (NewVT.getStoreSizeInBits() != NewBW ||
15305 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
15306 !TLI.isNarrowingProfitable(VT, NewVT))) {
15307 NewBW = NextPowerOf2(NewBW);
15308 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
15309 }
15310 if (NewBW >= BitWidth)
15311 return SDValue();
15312
15313 // If the lsb changed does not start at the type bitwidth boundary,
15314 // start at the previous one.
15315 if (ShAmt % NewBW)
15316 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
15317 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
15318 std::min(BitWidth, ShAmt + NewBW));
15319 if ((Imm & Mask) == Imm) {
15320 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
15321 if (Opc == ISD::AND)
15322 NewImm ^= APInt::getAllOnesValue(NewBW);
15323 uint64_t PtrOff = ShAmt / 8;
15324 // For big endian targets, we need to adjust the offset to the pointer to
15325 // load the correct bytes.
15326 if (DAG.getDataLayout().isBigEndian())
15327 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
15328
15329 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
15330 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
15331 if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
15332 return SDValue();
15333
15334 SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD));
15335 SDValue NewLD =
15336 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
15337 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
15338 LD->getMemOperand()->getFlags(), LD->getAAInfo());
15339 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
15340 DAG.getConstant(NewImm, SDLoc(Value),
15341 NewVT));
15342 SDValue NewST =
15343 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
15344 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
15345
15346 AddToWorklist(NewPtr.getNode());
15347 AddToWorklist(NewLD.getNode());
15348 AddToWorklist(NewVal.getNode());
15349 WorklistRemover DeadNodes(*this);
15350 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
15351 ++OpsNarrowed;
15352 return NewST;
15353 }
15354 }
15355
15356 return SDValue();
15357}
15358
15359/// For a given floating point load / store pair, if the load value isn't used
15360/// by any other operations, then consider transforming the pair to integer
15361/// load / store operations if the target deems the transformation profitable.
15362SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
15363 StoreSDNode *ST = cast<StoreSDNode>(N);
15364 SDValue Value = ST->getValue();
15365 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
15366 Value.hasOneUse()) {
15367 LoadSDNode *LD = cast<LoadSDNode>(Value);
15368 EVT VT = LD->getMemoryVT();
15369 if (!VT.isFloatingPoint() ||
15370 VT != ST->getMemoryVT() ||
15371 LD->isNonTemporal() ||
15372 ST->isNonTemporal() ||
15373 LD->getPointerInfo().getAddrSpace() != 0 ||
15374 ST->getPointerInfo().getAddrSpace() != 0)
15375 return SDValue();
15376
15377 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
15378 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
15379 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
15380 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
15381 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
15382 return SDValue();
15383
15384 unsigned LDAlign = LD->getAlignment();
15385 unsigned STAlign = ST->getAlignment();
15386 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
15387 unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
15388 if (LDAlign < ABIAlign || STAlign < ABIAlign)
15389 return SDValue();
15390
15391 SDValue NewLD =
15392 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
15393 LD->getPointerInfo(), LDAlign);
15394
15395 SDValue NewST =
15396 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
15397 ST->getPointerInfo(), STAlign);
15398
15399 AddToWorklist(NewLD.getNode());
15400 AddToWorklist(NewST.getNode());
15401 WorklistRemover DeadNodes(*this);
15402 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
15403 ++LdStFP2Int;
15404 return NewST;
15405 }
15406
15407 return SDValue();
15408}
15409
15410// This is a helper function for visitMUL to check the profitability
15411// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
15412// MulNode is the original multiply, AddNode is (add x, c1),
15413// and ConstNode is c2.
15414//
15415// If the (add x, c1) has multiple uses, we could increase
15416// the number of adds if we make this transformation.
15417// It would only be worth doing this if we can remove a
15418// multiply in the process. Check for that here.
15419// To illustrate:
15420// (A + c1) * c3
15421// (A + c2) * c3
15422// We're checking for cases where we have common "c3 * A" expressions.
15423bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
15424 SDValue &AddNode,
15425 SDValue &ConstNode) {
15426 APInt Val;
15427
15428 // If the add only has one use, this would be OK to do.
15429 if (AddNode.getNode()->hasOneUse())
15430 return true;
15431
15432 // Walk all the users of the constant with which we're multiplying.
15433 for (SDNode *Use : ConstNode->uses()) {
15434 if (Use == MulNode) // This use is the one we're on right now. Skip it.
15435 continue;
15436
15437 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
15438 SDNode *OtherOp;
15439 SDNode *MulVar = AddNode.getOperand(0).getNode();
15440
15441 // OtherOp is what we're multiplying against the constant.
15442 if (Use->getOperand(0) == ConstNode)
15443 OtherOp = Use->getOperand(1).getNode();
15444 else
15445 OtherOp = Use->getOperand(0).getNode();
15446
15447 // Check to see if multiply is with the same operand of our "add".
15448 //
15449 // ConstNode = CONST
15450 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
15451 // ...
15452 // AddNode = (A + c1) <-- MulVar is A.
15453 // = AddNode * ConstNode <-- current visiting instruction.
15454 //
15455 // If we make this transformation, we will have a common
15456 // multiply (ConstNode * A) that we can save.
15457 if (OtherOp == MulVar)
15458 return true;
15459
15460 // Now check to see if a future expansion will give us a common
15461 // multiply.
15462 //
15463 // ConstNode = CONST
15464 // AddNode = (A + c1)
15465 // ... = AddNode * ConstNode <-- current visiting instruction.
15466 // ...
15467 // OtherOp = (A + c2)
15468 // Use = OtherOp * ConstNode <-- visiting Use.
15469 //
15470 // If we make this transformation, we will have a common
15471 // multiply (CONST * A) after we also do the same transformation
15472 // to the "t2" instruction.
15473 if (OtherOp->getOpcode() == ISD::ADD &&
15474 DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
15475 OtherOp->getOperand(0).getNode() == MulVar)
15476 return true;
15477 }
15478 }
15479
15480 // Didn't find a case where this would be profitable.
15481 return false;
15482}
15483
15484SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
15485 unsigned NumStores) {
15486 SmallVector<SDValue, 8> Chains;
15487 SmallPtrSet<const SDNode *, 8> Visited;
15488 SDLoc StoreDL(StoreNodes[0].MemNode);
15489
15490 for (unsigned i = 0; i < NumStores; ++i) {
15491 Visited.insert(StoreNodes[i].MemNode);
15492 }
15493
15494 // don't include nodes that are children or repeated nodes.
15495 for (unsigned i = 0; i < NumStores; ++i) {
15496 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
15497 Chains.push_back(StoreNodes[i].MemNode->getChain());
15498 }
15499
15500 assert(Chains.size() > 0 && "Chain should have generated a chain")((Chains.size() > 0 && "Chain should have generated a chain"
) ? static_cast<void> (0) : __assert_fail ("Chains.size() > 0 && \"Chain should have generated a chain\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15500, __PRETTY_FUNCTION__))
;
15501 return DAG.getTokenFactor(StoreDL, Chains);
15502}
15503
15504bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
15505 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
15506 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
15507 // Make sure we have something to merge.
15508 if (NumStores < 2)
15509 return false;
15510
15511 // The latest Node in the DAG.
15512 SDLoc DL(StoreNodes[0].MemNode);
15513
15514 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
15515 unsigned SizeInBits = NumStores * ElementSizeBits;
15516 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15517
15518 EVT StoreTy;
15519 if (UseVector) {
15520 unsigned Elts = NumStores * NumMemElts;
15521 // Get the type for the merged vector store.
15522 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15523 } else
15524 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
15525
15526 SDValue StoredVal;
15527 if (UseVector) {
15528 if (IsConstantSrc) {
15529 SmallVector<SDValue, 8> BuildVector;
15530 for (unsigned I = 0; I != NumStores; ++I) {
15531 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
15532 SDValue Val = St->getValue();
15533 // If constant is of the wrong type, convert it now.
15534 if (MemVT != Val.getValueType()) {
15535 Val = peekThroughBitcasts(Val);
15536 // Deal with constants of wrong size.
15537 if (ElementSizeBits != Val.getValueSizeInBits()) {
15538 EVT IntMemVT =
15539 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
15540 if (isa<ConstantFPSDNode>(Val)) {
15541 // Not clear how to truncate FP values.
15542 return false;
15543 } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
15544 Val = DAG.getConstant(C->getAPIntValue()
15545 .zextOrTrunc(Val.getValueSizeInBits())
15546 .zextOrTrunc(ElementSizeBits),
15547 SDLoc(C), IntMemVT);
15548 }
15549 // Make sure correctly size type is the correct type.
15550 Val = DAG.getBitcast(MemVT, Val);
15551 }
15552 BuildVector.push_back(Val);
15553 }
15554 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15555 : ISD::BUILD_VECTOR,
15556 DL, StoreTy, BuildVector);
15557 } else {
15558 SmallVector<SDValue, 8> Ops;
15559 for (unsigned i = 0; i < NumStores; ++i) {
15560 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15561 SDValue Val = peekThroughBitcasts(St->getValue());
15562 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
15563 // type MemVT. If the underlying value is not the correct
15564 // type, but it is an extraction of an appropriate vector we
15565 // can recast Val to be of the correct type. This may require
15566 // converting between EXTRACT_VECTOR_ELT and
15567 // EXTRACT_SUBVECTOR.
15568 if ((MemVT != Val.getValueType()) &&
15569 (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15570 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
15571 EVT MemVTScalarTy = MemVT.getScalarType();
15572 // We may need to add a bitcast here to get types to line up.
15573 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
15574 Val = DAG.getBitcast(MemVT, Val);
15575 } else {
15576 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
15577 : ISD::EXTRACT_VECTOR_ELT;
15578 SDValue Vec = Val.getOperand(0);
15579 SDValue Idx = Val.getOperand(1);
15580 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
15581 }
15582 }
15583 Ops.push_back(Val);
15584 }
15585
15586 // Build the extracted vector elements back into a vector.
15587 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15588 : ISD::BUILD_VECTOR,
15589 DL, StoreTy, Ops);
15590 }
15591 } else {
15592 // We should always use a vector store when merging extracted vector
15593 // elements, so this path implies a store of constants.
15594 assert(IsConstantSrc && "Merged vector elements should use vector store")((IsConstantSrc && "Merged vector elements should use vector store"
) ? static_cast<void> (0) : __assert_fail ("IsConstantSrc && \"Merged vector elements should use vector store\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15594, __PRETTY_FUNCTION__))
;
15595
15596 APInt StoreInt(SizeInBits, 0);
15597
15598 // Construct a single integer constant which is made of the smaller
15599 // constant inputs.
15600 bool IsLE = DAG.getDataLayout().isLittleEndian();
15601 for (unsigned i = 0; i < NumStores; ++i) {
15602 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
15603 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
15604
15605 SDValue Val = St->getValue();
15606 Val = peekThroughBitcasts(Val);
15607 StoreInt <<= ElementSizeBits;
15608 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
15609 StoreInt |= C->getAPIntValue()
15610 .zextOrTrunc(ElementSizeBits)
15611 .zextOrTrunc(SizeInBits);
15612 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
15613 StoreInt |= C->getValueAPF()
15614 .bitcastToAPInt()
15615 .zextOrTrunc(ElementSizeBits)
15616 .zextOrTrunc(SizeInBits);
15617 // If fp truncation is necessary give up for now.
15618 if (MemVT.getSizeInBits() != ElementSizeBits)
15619 return false;
15620 } else {
15621 llvm_unreachable("Invalid constant element type")::llvm::llvm_unreachable_internal("Invalid constant element type"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 15621)
;
15622 }
15623 }
15624
15625 // Create the new Load and Store operations.
15626 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
15627 }
15628
15629 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15630 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
15631
15632 // make sure we use trunc store if it's necessary to be legal.
15633 SDValue NewStore;
15634 if (!UseTrunc) {
15635 NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
15636 FirstInChain->getPointerInfo(),
15637 FirstInChain->getAlignment());
15638 } else { // Must be realized as a trunc store
15639 EVT LegalizedStoredValTy =
15640 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
15641 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
15642 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
15643 SDValue ExtendedStoreVal =
15644 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
15645 LegalizedStoredValTy);
15646 NewStore = DAG.getTruncStore(
15647 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
15648 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
15649 FirstInChain->getAlignment(),
15650 FirstInChain->getMemOperand()->getFlags());
15651 }
15652
15653 // Replace all merged stores with the new store.
15654 for (unsigned i = 0; i < NumStores; ++i)
15655 CombineTo(StoreNodes[i].MemNode, NewStore);
15656
15657 AddToWorklist(NewChain.getNode());
15658 return true;
15659}
15660
15661void DAGCombiner::getStoreMergeCandidates(
15662 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
15663 SDNode *&RootNode) {
15664 // This holds the base pointer, index, and the offset in bytes from the base
15665 // pointer.
15666 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
15667 EVT MemVT = St->getMemoryVT();
15668
15669 SDValue Val = peekThroughBitcasts(St->getValue());
15670 // We must have a base and an offset.
15671 if (!BasePtr.getBase().getNode())
15672 return;
15673
15674 // Do not handle stores to undef base pointers.
15675 if (BasePtr.getBase().isUndef())
15676 return;
15677
15678 bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
15679 bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15680 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
15681 bool IsLoadSrc = isa<LoadSDNode>(Val);
15682 BaseIndexOffset LBasePtr;
15683 // Match on loadbaseptr if relevant.
15684 EVT LoadVT;
15685 if (IsLoadSrc) {
15686 auto *Ld = cast<LoadSDNode>(Val);
15687 LBasePtr = BaseIndexOffset::match(Ld, DAG);
15688 LoadVT = Ld->getMemoryVT();
15689 // Load and store should be the same type.
15690 if (MemVT != LoadVT)
15691 return;
15692 // Loads must only have one use.
15693 if (!Ld->hasNUsesOfValue(1, 0))
15694 return;
15695 // The memory operands must not be volatile/indexed/atomic.
15696 // TODO: May be able to relax for unordered atomics (see D66309)
15697 if (!Ld->isSimple() || Ld->isIndexed())
15698 return;
15699 }
15700 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
15701 int64_t &Offset) -> bool {
15702 // The memory operands must not be volatile/indexed/atomic.
15703 // TODO: May be able to relax for unordered atomics (see D66309)
15704 if (!Other->isSimple() || Other->isIndexed())
15705 return false;
15706 // Don't mix temporal stores with non-temporal stores.
15707 if (St->isNonTemporal() != Other->isNonTemporal())
15708 return false;
15709 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
15710 // Allow merging constants of different types as integers.
15711 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
15712 : Other->getMemoryVT() != MemVT;
15713 if (IsLoadSrc) {
15714 if (NoTypeMatch)
15715 return false;
15716 // The Load's Base Ptr must also match
15717 if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
15718 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
15719 if (LoadVT != OtherLd->getMemoryVT())
15720 return false;
15721 // Loads must only have one use.
15722 if (!OtherLd->hasNUsesOfValue(1, 0))
15723 return false;
15724 // The memory operands must not be volatile/indexed/atomic.
15725 // TODO: May be able to relax for unordered atomics (see D66309)
15726 if (!OtherLd->isSimple() ||
15727 OtherLd->isIndexed())
15728 return false;
15729 // Don't mix temporal loads with non-temporal loads.
15730 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
15731 return false;
15732 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
15733 return false;
15734 } else
15735 return false;
15736 }
15737 if (IsConstantSrc) {
15738 if (NoTypeMatch)
15739 return false;
15740 if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
15741 return false;
15742 }
15743 if (IsExtractVecSrc) {
15744 // Do not merge truncated stores here.
15745 if (Other->isTruncatingStore())
15746 return false;
15747 if (!MemVT.bitsEq(OtherBC.getValueType()))
15748 return false;
15749 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
15750 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
15751 return false;
15752 }
15753 Ptr = BaseIndexOffset::match(Other, DAG);
15754 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
15755 };
15756
15757 // Check if the pair of StoreNode and the RootNode already bail out many
15758 // times which is over the limit in dependence check.
15759 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
15760 SDNode *RootNode) -> bool {
15761 auto RootCount = StoreRootCountMap.find(StoreNode);
15762 if (RootCount != StoreRootCountMap.end() &&
15763 RootCount->second.first == RootNode &&
15764 RootCount->second.second > StoreMergeDependenceLimit)
15765 return true;
15766 return false;
15767 };
15768
15769 // We looking for a root node which is an ancestor to all mergable
15770 // stores. We search up through a load, to our root and then down
15771 // through all children. For instance we will find Store{1,2,3} if
15772 // St is Store1, Store2. or Store3 where the root is not a load
15773 // which always true for nonvolatile ops. TODO: Expand
15774 // the search to find all valid candidates through multiple layers of loads.
15775 //
15776 // Root
15777 // |-------|-------|
15778 // Load Load Store3
15779 // | |
15780 // Store1 Store2
15781 //
15782 // FIXME: We should be able to climb and
15783 // descend TokenFactors to find candidates as well.
15784
15785 RootNode = St->getChain().getNode();
15786
15787 unsigned NumNodesExplored = 0;
15788 if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
15789 RootNode = Ldn->getChain().getNode();
15790 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
15791 I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
15792 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
15793 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
15794 if (I2.getOperandNo() == 0)
15795 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
15796 BaseIndexOffset Ptr;
15797 int64_t PtrDiff;
15798 if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
15799 !OverLimitInDependenceCheck(OtherST, RootNode))
15800 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
15801 }
15802 } else
15803 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
15804 I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
15805 if (I.getOperandNo() == 0)
15806 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
15807 BaseIndexOffset Ptr;
15808 int64_t PtrDiff;
15809 if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
15810 !OverLimitInDependenceCheck(OtherST, RootNode))
15811 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
15812 }
15813}
15814
15815// We need to check that merging these stores does not cause a loop in
15816// the DAG. Any store candidate may depend on another candidate
15817// indirectly through its operand (we already consider dependencies
15818// through the chain). Check in parallel by searching up from
15819// non-chain operands of candidates.
15820bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
15821 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
15822 SDNode *RootNode) {
15823 // FIXME: We should be able to truncate a full search of
15824 // predecessors by doing a BFS and keeping tabs the originating
15825 // stores from which worklist nodes come from in a similar way to
15826 // TokenFactor simplfication.
15827
15828 SmallPtrSet<const SDNode *, 32> Visited;
15829 SmallVector<const SDNode *, 8> Worklist;
15830
15831 // RootNode is a predecessor to all candidates so we need not search
15832 // past it. Add RootNode (peeking through TokenFactors). Do not count
15833 // these towards size check.
15834
15835 Worklist.push_back(RootNode);
15836 while (!Worklist.empty()) {
15837 auto N = Worklist.pop_back_val();
15838 if (!Visited.insert(N).second)
15839 continue; // Already present in Visited.
15840 if (N->getOpcode() == ISD::TokenFactor) {
15841 for (SDValue Op : N->ops())
15842 Worklist.push_back(Op.getNode());
15843 }
15844 }
15845
15846 // Don't count pruning nodes towards max.
15847 unsigned int Max = 1024 + Visited.size();
15848 // Search Ops of store candidates.
15849 for (unsigned i = 0; i < NumStores; ++i) {
15850 SDNode *N = StoreNodes[i].MemNode;
15851 // Of the 4 Store Operands:
15852 // * Chain (Op 0) -> We have already considered these
15853 // in candidate selection and can be
15854 // safely ignored
15855 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
15856 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
15857 // but aren't necessarily fromt the same base node, so
15858 // cycles possible (e.g. via indexed store).
15859 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
15860 // non-indexed stores). Not constant on all targets (e.g. ARM)
15861 // and so can participate in a cycle.
15862 for (unsigned j = 1; j < N->getNumOperands(); ++j)
15863 Worklist.push_back(N->getOperand(j).getNode());
15864 }
15865 // Search through DAG. We can stop early if we find a store node.
15866 for (unsigned i = 0; i < NumStores; ++i)
15867 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
15868 Max)) {
15869 // If the searching bail out, record the StoreNode and RootNode in the
15870 // StoreRootCountMap. If we have seen the pair many times over a limit,
15871 // we won't add the StoreNode into StoreNodes set again.
15872 if (Visited.size() >= Max) {
15873 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
15874 if (RootCount.first == RootNode)
15875 RootCount.second++;
15876 else
15877 RootCount = {RootNode, 1};
15878 }
15879 return false;
15880 }
15881 return true;
15882}
15883
15884bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
15885 if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
15886 return false;
15887
15888 // TODO: Extend this function to merge stores of scalable vectors.
15889 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
15890 // store since we know <vscale x 16 x i8> is exactly twice as large as
15891 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
15892 EVT MemVT = St->getMemoryVT();
15893 if (MemVT.isScalableVector())
15894 return false;
15895
15896 int64_t ElementSizeBytes = MemVT.getStoreSize();
15897 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15898
15899 if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
15900 return false;
15901
15902 bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
15903 Attribute::NoImplicitFloat);
15904
15905 // This function cannot currently deal with non-byte-sized memory sizes.
15906 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
15907 return false;
15908
15909 if (!MemVT.isSimple())
15910 return false;
15911
15912 // Perform an early exit check. Do not bother looking at stored values that
15913 // are not constants, loads, or extracted vector elements.
15914 SDValue StoredVal = peekThroughBitcasts(St->getValue());
15915 bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
15916 bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
15917 isa<ConstantFPSDNode>(StoredVal);
15918 bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15919 StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
15920 bool IsNonTemporalStore = St->isNonTemporal();
15921 bool IsNonTemporalLoad =
15922 IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal();
15923
15924 if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
15925 return false;
15926
15927 SmallVector<MemOpLink, 8> StoreNodes;
15928 SDNode *RootNode;
15929 // Find potential store merge candidates by searching through chain sub-DAG
15930 getStoreMergeCandidates(St, StoreNodes, RootNode);
15931
15932 // Check if there is anything to merge.
15933 if (StoreNodes.size() < 2)
15934 return false;
15935
15936 // Sort the memory operands according to their distance from the
15937 // base pointer.
15938 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
15939 return LHS.OffsetFromBase < RHS.OffsetFromBase;
15940 });
15941
15942 // Store Merge attempts to merge the lowest stores. This generally
15943 // works out as if successful, as the remaining stores are checked
15944 // after the first collection of stores is merged. However, in the
15945 // case that a non-mergeable store is found first, e.g., {p[-2],
15946 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
15947 // mergeable cases. To prevent this, we prune such stores from the
15948 // front of StoreNodes here.
15949
15950 bool RV = false;
15951 while (StoreNodes.size() > 1) {
15952 size_t StartIdx = 0;
15953 while ((StartIdx + 1 < StoreNodes.size()) &&
15954 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
15955 StoreNodes[StartIdx + 1].OffsetFromBase)
15956 ++StartIdx;
15957
15958 // Bail if we don't have enough candidates to merge.
15959 if (StartIdx + 1 >= StoreNodes.size())
15960 return RV;
15961
15962 if (StartIdx)
15963 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
15964
15965 // Scan the memory operations on the chain and find the first
15966 // non-consecutive store memory address.
15967 unsigned NumConsecutiveStores = 1;
15968 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
15969 // Check that the addresses are consecutive starting from the second
15970 // element in the list of stores.
15971 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
15972 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
15973 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15974 break;
15975 NumConsecutiveStores = i + 1;
15976 }
15977
15978 if (NumConsecutiveStores < 2) {
15979 StoreNodes.erase(StoreNodes.begin(),
15980 StoreNodes.begin() + NumConsecutiveStores);
15981 continue;
15982 }
15983
15984 // The node with the lowest store address.
15985 LLVMContext &Context = *DAG.getContext();
15986 const DataLayout &DL = DAG.getDataLayout();
15987
15988 // Store the constants into memory as one consecutive store.
15989 if (IsConstantSrc) {
15990 while (NumConsecutiveStores >= 2) {
15991 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15992 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15993 unsigned FirstStoreAlign = FirstInChain->getAlignment();
15994 unsigned LastLegalType = 1;
15995 unsigned LastLegalVectorType = 1;
15996 bool LastIntegerTrunc = false;
15997 bool NonZero = false;
15998 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
15999 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16000 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
16001 SDValue StoredVal = ST->getValue();
16002 bool IsElementZero = false;
16003 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
16004 IsElementZero = C->isNullValue();
16005 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
16006 IsElementZero = C->getConstantFPValue()->isNullValue();
16007 if (IsElementZero) {
16008 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
16009 FirstZeroAfterNonZero = i;
16010 }
16011 NonZero |= !IsElementZero;
16012
16013 // Find a legal type for the constant store.
16014 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
16015 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
16016 bool IsFast = false;
16017
16018 // Break early when size is too large to be legal.
16019 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
16020 break;
16021
16022 if (TLI.isTypeLegal(StoreTy) &&
16023 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16024 TLI.allowsMemoryAccess(Context, DL, StoreTy,
16025 *FirstInChain->getMemOperand(), &IsFast) &&
16026 IsFast) {
16027 LastIntegerTrunc = false;
16028 LastLegalType = i + 1;
16029 // Or check whether a truncstore is legal.
16030 } else if (TLI.getTypeAction(Context, StoreTy) ==
16031 TargetLowering::TypePromoteInteger) {
16032 EVT LegalizedStoredValTy =
16033 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
16034 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
16035 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
16036 TLI.allowsMemoryAccess(Context, DL, StoreTy,
16037 *FirstInChain->getMemOperand(),
16038 &IsFast) &&
16039 IsFast) {
16040 LastIntegerTrunc = true;
16041 LastLegalType = i + 1;
16042 }
16043 }
16044
16045 // We only use vectors if the constant is known to be zero or the
16046 // target allows it and the function is not marked with the
16047 // noimplicitfloat attribute.
16048 if ((!NonZero ||
16049 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
16050 !NoVectors) {
16051 // Find a legal type for the vector store.
16052 unsigned Elts = (i + 1) * NumMemElts;
16053 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16054 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
16055 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
16056 TLI.allowsMemoryAccess(
16057 Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
16058 IsFast)
16059 LastLegalVectorType = i + 1;
16060 }
16061 }
16062
16063 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
16064 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
16065
16066 // Check if we found a legal integer type that creates a meaningful
16067 // merge.
16068 if (NumElem < 2) {
16069 // We know that candidate stores are in order and of correct
16070 // shape. While there is no mergeable sequence from the
16071 // beginning one may start later in the sequence. The only
16072 // reason a merge of size N could have failed where another of
16073 // the same size would not have, is if the alignment has
16074 // improved or we've dropped a non-zero value. Drop as many
16075 // candidates as we can here.
16076 unsigned NumSkip = 1;
16077 while (
16078 (NumSkip < NumConsecutiveStores) &&
16079 (NumSkip < FirstZeroAfterNonZero) &&
16080 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16081 NumSkip++;
16082
16083 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16084 NumConsecutiveStores -= NumSkip;
16085 continue;
16086 }
16087
16088 // Check that we can merge these candidates without causing a cycle.
16089 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
16090 RootNode)) {
16091 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16092 NumConsecutiveStores -= NumElem;
16093 continue;
16094 }
16095
16096 RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
16097 UseVector, LastIntegerTrunc);
16098
16099 // Remove merged stores for next iteration.
16100 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16101 NumConsecutiveStores -= NumElem;
16102 }
16103 continue;
16104 }
16105
16106 // When extracting multiple vector elements, try to store them
16107 // in one vector store rather than a sequence of scalar stores.
16108 if (IsExtractVecSrc) {
16109 // Loop on Consecutive Stores on success.
16110 while (NumConsecutiveStores >= 2) {
16111 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16112 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16113 unsigned FirstStoreAlign = FirstInChain->getAlignment();
16114 unsigned NumStoresToMerge = 1;
16115 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16116 // Find a legal type for the vector store.
16117 unsigned Elts = (i + 1) * NumMemElts;
16118 EVT Ty =
16119 EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16120 bool IsFast;
16121
16122 // Break early when size is too large to be legal.
16123 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
16124 break;
16125
16126 if (TLI.isTypeLegal(Ty) &&
16127 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
16128 TLI.allowsMemoryAccess(Context, DL, Ty,
16129 *FirstInChain->getMemOperand(), &IsFast) &&
16130 IsFast)
16131 NumStoresToMerge = i + 1;
16132 }
16133
16134 // Check if we found a legal integer type creating a meaningful
16135 // merge.
16136 if (NumStoresToMerge < 2) {
16137 // We know that candidate stores are in order and of correct
16138 // shape. While there is no mergeable sequence from the
16139 // beginning one may start later in the sequence. The only
16140 // reason a merge of size N could have failed where another of
16141 // the same size would not have, is if the alignment has
16142 // improved. Drop as many candidates as we can here.
16143 unsigned NumSkip = 1;
16144 while (
16145 (NumSkip < NumConsecutiveStores) &&
16146 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16147 NumSkip++;
16148
16149 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16150 NumConsecutiveStores -= NumSkip;
16151 continue;
16152 }
16153
16154 // Check that we can merge these candidates without causing a cycle.
16155 if (!checkMergeStoreCandidatesForDependencies(
16156 StoreNodes, NumStoresToMerge, RootNode)) {
16157 StoreNodes.erase(StoreNodes.begin(),
16158 StoreNodes.begin() + NumStoresToMerge);
16159 NumConsecutiveStores -= NumStoresToMerge;
16160 continue;
16161 }
16162
16163 RV |= MergeStoresOfConstantsOrVecElts(
16164 StoreNodes, MemVT, NumStoresToMerge, false, true, false);
16165
16166 StoreNodes.erase(StoreNodes.begin(),
16167 StoreNodes.begin() + NumStoresToMerge);
16168 NumConsecutiveStores -= NumStoresToMerge;
16169 }
16170 continue;
16171 }
16172
16173 // Below we handle the case of multiple consecutive stores that
16174 // come from multiple consecutive loads. We merge them into a single
16175 // wide load and a single wide store.
16176
16177 // Look for load nodes which are used by the stored values.
16178 SmallVector<MemOpLink, 8> LoadNodes;
16179
16180 // Find acceptable loads. Loads need to have the same chain (token factor),
16181 // must not be zext, volatile, indexed, and they must be consecutive.
16182 BaseIndexOffset LdBasePtr;
16183
16184 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16185 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
16186 SDValue Val = peekThroughBitcasts(St->getValue());
16187 LoadSDNode *Ld = cast<LoadSDNode>(Val);
16188
16189 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
16190 // If this is not the first ptr that we check.
16191 int64_t LdOffset = 0;
16192 if (LdBasePtr.getBase().getNode()) {
16193 // The base ptr must be the same.
16194 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
16195 break;
16196 } else {
16197 // Check that all other base pointers are the same as this one.
16198 LdBasePtr = LdPtr;
16199 }
16200
16201 // We found a potential memory operand to merge.
16202 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
16203 }
16204
16205 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
16206 // If we have load/store pair instructions and we only have two values,
16207 // don't bother merging.
16208 unsigned RequiredAlignment;
16209 if (LoadNodes.size() == 2 &&
16210 TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
16211 StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
16212 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
16213 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
16214 break;
16215 }
16216 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16217 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16218 unsigned FirstStoreAlign = FirstInChain->getAlignment();
16219 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
16220 unsigned FirstLoadAlign = FirstLoad->getAlignment();
16221
16222 // Scan the memory operations on the chain and find the first
16223 // non-consecutive load memory address. These variables hold the index in
16224 // the store node array.
16225
16226 unsigned LastConsecutiveLoad = 1;
16227
16228 // This variable refers to the size and not index in the array.
16229 unsigned LastLegalVectorType = 1;
16230 unsigned LastLegalIntegerType = 1;
16231 bool isDereferenceable = true;
16232 bool DoIntegerTruncate = false;
16233 StartAddress = LoadNodes[0].OffsetFromBase;
16234 SDValue FirstChain = FirstLoad->getChain();
16235 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
16236 // All loads must share the same chain.
16237 if (LoadNodes[i].MemNode->getChain() != FirstChain)
16238 break;
16239
16240 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
16241 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
16242 break;
16243 LastConsecutiveLoad = i;
16244
16245 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
16246 isDereferenceable = false;
16247
16248 // Find a legal type for the vector store.
16249 unsigned Elts = (i + 1) * NumMemElts;
16250 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16251
16252 // Break early when size is too large to be legal.
16253 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
16254 break;
16255
16256 bool IsFastSt, IsFastLd;
16257 if (TLI.isTypeLegal(StoreTy) &&
16258 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16259 TLI.allowsMemoryAccess(Context, DL, StoreTy,
16260 *FirstInChain->getMemOperand(), &IsFastSt) &&
16261 IsFastSt &&
16262 TLI.allowsMemoryAccess(Context, DL, StoreTy,
16263 *FirstLoad->getMemOperand(), &IsFastLd) &&
16264 IsFastLd) {
16265 LastLegalVectorType = i + 1;
16266 }
16267
16268 // Find a legal type for the integer store.
16269 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
16270 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
16271 if (TLI.isTypeLegal(StoreTy) &&
16272 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16273 TLI.allowsMemoryAccess(Context, DL, StoreTy,
16274 *FirstInChain->getMemOperand(), &IsFastSt) &&
16275 IsFastSt &&
16276 TLI.allowsMemoryAccess(Context, DL, StoreTy,
16277 *FirstLoad->getMemOperand(), &IsFastLd) &&
16278 IsFastLd) {
16279 LastLegalIntegerType = i + 1;
16280 DoIntegerTruncate = false;
16281 // Or check whether a truncstore and extload is legal.
16282 } else if (TLI.getTypeAction(Context, StoreTy) ==
16283 TargetLowering::TypePromoteInteger) {
16284 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
16285 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
16286 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
16287 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
16288 StoreTy) &&
16289 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
16290 StoreTy) &&
16291 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
16292 TLI.allowsMemoryAccess(Context, DL, StoreTy,
16293 *FirstInChain->getMemOperand(),
16294 &IsFastSt) &&
16295 IsFastSt &&
16296 TLI.allowsMemoryAccess(Context, DL, StoreTy,
16297 *FirstLoad->getMemOperand(), &IsFastLd) &&
16298 IsFastLd) {
16299 LastLegalIntegerType = i + 1;
16300 DoIntegerTruncate = true;
16301 }
16302 }
16303 }
16304
16305 // Only use vector types if the vector type is larger than the integer
16306 // type. If they are the same, use integers.
16307 bool UseVectorTy =
16308 LastLegalVectorType > LastLegalIntegerType && !NoVectors;
16309 unsigned LastLegalType =
16310 std::max(LastLegalVectorType, LastLegalIntegerType);
16311
16312 // We add +1 here because the LastXXX variables refer to location while
16313 // the NumElem refers to array/index size.
16314 unsigned NumElem =
16315 std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
16316 NumElem = std::min(LastLegalType, NumElem);
16317
16318 if (NumElem < 2) {
16319 // We know that candidate stores are in order and of correct
16320 // shape. While there is no mergeable sequence from the
16321 // beginning one may start later in the sequence. The only
16322 // reason a merge of size N could have failed where another of
16323 // the same size would not have is if the alignment or either
16324 // the load or store has improved. Drop as many candidates as we
16325 // can here.
16326 unsigned NumSkip = 1;
16327 while ((NumSkip < LoadNodes.size()) &&
16328 (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
16329 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16330 NumSkip++;
16331 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16332 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
16333 NumConsecutiveStores -= NumSkip;
16334 continue;
16335 }
16336
16337 // Check that we can merge these candidates without causing a cycle.
16338 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
16339 RootNode)) {
16340 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16341 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16342 NumConsecutiveStores -= NumElem;
16343 continue;
16344 }
16345
16346 // Find if it is better to use vectors or integers to load and store
16347 // to memory.
16348 EVT JointMemOpVT;
16349 if (UseVectorTy) {
16350 // Find a legal type for the vector store.
16351 unsigned Elts = NumElem * NumMemElts;
16352 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16353 } else {
16354 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
16355 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
16356 }
16357
16358 SDLoc LoadDL(LoadNodes[0].MemNode);
16359 SDLoc StoreDL(StoreNodes[0].MemNode);
16360
16361 // The merged loads are required to have the same incoming chain, so
16362 // using the first's chain is acceptable.
16363
16364 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
16365 AddToWorklist(NewStoreChain.getNode());
16366
16367 MachineMemOperand::Flags LdMMOFlags =
16368 isDereferenceable ? MachineMemOperand::MODereferenceable
16369 : MachineMemOperand::MONone;
16370 if (IsNonTemporalLoad)
16371 LdMMOFlags |= MachineMemOperand::MONonTemporal;
16372
16373 MachineMemOperand::Flags StMMOFlags =
16374 IsNonTemporalStore ? MachineMemOperand::MONonTemporal
16375 : MachineMemOperand::MONone;
16376
16377 SDValue NewLoad, NewStore;
16378 if (UseVectorTy || !DoIntegerTruncate) {
16379 NewLoad =
16380 DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
16381 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16382 FirstLoadAlign, LdMMOFlags);
16383 NewStore = DAG.getStore(
16384 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
16385 FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
16386 } else { // This must be the truncstore/extload case
16387 EVT ExtendedTy =
16388 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
16389 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
16390 FirstLoad->getChain(), FirstLoad->getBasePtr(),
16391 FirstLoad->getPointerInfo(), JointMemOpVT,
16392 FirstLoadAlign, LdMMOFlags);
16393 NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
16394 FirstInChain->getBasePtr(),
16395 FirstInChain->getPointerInfo(),
16396 JointMemOpVT, FirstInChain->getAlignment(),
16397 FirstInChain->getMemOperand()->getFlags());
16398 }
16399
16400 // Transfer chain users from old loads to the new load.
16401 for (unsigned i = 0; i < NumElem; ++i) {
16402 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
16403 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
16404 SDValue(NewLoad.getNode(), 1));
16405 }
16406
16407 // Replace the all stores with the new store. Recursively remove
16408 // corresponding value if its no longer used.
16409 for (unsigned i = 0; i < NumElem; ++i) {
16410 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
16411 CombineTo(StoreNodes[i].MemNode, NewStore);
16412 if (Val.getNode()->use_empty())
16413 recursivelyDeleteUnusedNodes(Val.getNode());
16414 }
16415
16416 RV = true;
16417 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16418 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16419 NumConsecutiveStores -= NumElem;
16420 }
16421 }
16422 return RV;
16423}
16424
16425SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
16426 SDLoc SL(ST);
16427 SDValue ReplStore;
16428
16429 // Replace the chain to avoid dependency.
16430 if (ST->isTruncatingStore()) {
16431 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
16432 ST->getBasePtr(), ST->getMemoryVT(),
16433 ST->getMemOperand());
16434 } else {
16435 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
16436 ST->getMemOperand());
16437 }
16438
16439 // Create token to keep both nodes around.
16440 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
16441 MVT::Other, ST->getChain(), ReplStore);
16442
16443 // Make sure the new and old chains are cleaned up.
16444 AddToWorklist(Token.getNode());
16445
16446 // Don't add users to work list.
16447 return CombineTo(ST, Token, false);
16448}
16449
16450SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
16451 SDValue Value = ST->getValue();
16452 if (Value.getOpcode() == ISD::TargetConstantFP)
16453 return SDValue();
16454
16455 if (!ISD::isNormalStore(ST))
16456 return SDValue();
16457
16458 SDLoc DL(ST);
16459
16460 SDValue Chain = ST->getChain();
16461 SDValue Ptr = ST->getBasePtr();
16462
16463 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
16464
16465 // NOTE: If the original store is volatile, this transform must not increase
16466 // the number of stores. For example, on x86-32 an f64 can be stored in one
16467 // processor operation but an i64 (which is not legal) requires two. So the
16468 // transform should not be done in this case.
16469
16470 SDValue Tmp;
16471 switch (CFP->getSimpleValueType(0).SimpleTy) {
16472 default:
16473 llvm_unreachable("Unknown FP type")::llvm::llvm_unreachable_internal("Unknown FP type", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16473)
;
16474 case MVT::f16: // We don't do this for these yet.
16475 case MVT::f80:
16476 case MVT::f128:
16477 case MVT::ppcf128:
16478 return SDValue();
16479 case MVT::f32:
16480 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
16481 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16482 ;
16483 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
16484 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
16485 MVT::i32);
16486 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
16487 }
16488
16489 return SDValue();
16490 case MVT::f64:
16491 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
16492 ST->isSimple()) ||
16493 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
16494 ;
16495 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
16496 getZExtValue(), SDLoc(CFP), MVT::i64);
16497 return DAG.getStore(Chain, DL, Tmp,
16498 Ptr, ST->getMemOperand());
16499 }
16500
16501 if (ST->isSimple() &&
16502 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16503 // Many FP stores are not made apparent until after legalize, e.g. for
16504 // argument passing. Since this is so common, custom legalize the
16505 // 64-bit integer store into two 32-bit stores.
16506 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
16507 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
16508 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
16509 if (DAG.getDataLayout().isBigEndian())
16510 std::swap(Lo, Hi);
16511
16512 unsigned Alignment = ST->getAlignment();
16513 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16514 AAMDNodes AAInfo = ST->getAAInfo();
16515
16516 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16517 ST->getAlignment(), MMOFlags, AAInfo);
16518 Ptr = DAG.getMemBasePlusOffset(Ptr, 4, DL);
16519 Alignment = MinAlign(Alignment, 4U);
16520 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
16521 ST->getPointerInfo().getWithOffset(4),
16522 Alignment, MMOFlags, AAInfo);
16523 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
16524 St0, St1);
16525 }
16526
16527 return SDValue();
16528 }
16529}
16530
16531SDValue DAGCombiner::visitSTORE(SDNode *N) {
16532 StoreSDNode *ST = cast<StoreSDNode>(N);
16533 SDValue Chain = ST->getChain();
16534 SDValue Value = ST->getValue();
16535 SDValue Ptr = ST->getBasePtr();
16536
16537 // If this is a store of a bit convert, store the input value if the
16538 // resultant store does not need a higher alignment than the original.
16539 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
16540 ST->isUnindexed()) {
16541 EVT SVT = Value.getOperand(0).getValueType();
16542 // If the store is volatile, we only want to change the store type if the
16543 // resulting store is legal. Otherwise we might increase the number of
16544 // memory accesses. We don't care if the original type was legal or not
16545 // as we assume software couldn't rely on the number of accesses of an
16546 // illegal type.
16547 // TODO: May be able to relax for unordered atomics (see D66309)
16548 if (((!LegalOperations && ST->isSimple()) ||
16549 TLI.isOperationLegal(ISD::STORE, SVT)) &&
16550 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
16551 DAG, *ST->getMemOperand())) {
16552 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
16553 ST->getMemOperand());
16554 }
16555 }
16556
16557 // Turn 'store undef, Ptr' -> nothing.
16558 if (Value.isUndef() && ST->isUnindexed())
16559 return Chain;
16560
16561 // Try to infer better alignment information than the store already has.
16562 if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
16563 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
16564 if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
16565 SDValue NewStore =
16566 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
16567 ST->getMemoryVT(), Align,
16568 ST->getMemOperand()->getFlags(), ST->getAAInfo());
16569 // NewStore will always be N as we are only refining the alignment
16570 assert(NewStore.getNode() == N)((NewStore.getNode() == N) ? static_cast<void> (0) : __assert_fail
("NewStore.getNode() == N", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16570, __PRETTY_FUNCTION__))
;
16571 (void)NewStore;
16572 }
16573 }
16574 }
16575
16576 // Try transforming a pair floating point load / store ops to integer
16577 // load / store ops.
16578 if (SDValue NewST = TransformFPLoadStorePair(N))
16579 return NewST;
16580
16581 // Try transforming several stores into STORE (BSWAP).
16582 if (SDValue Store = MatchStoreCombine(ST))
16583 return Store;
16584
16585 if (ST->isUnindexed()) {
16586 // Walk up chain skipping non-aliasing memory nodes, on this store and any
16587 // adjacent stores.
16588 if (findBetterNeighborChains(ST)) {
16589 // replaceStoreChain uses CombineTo, which handled all of the worklist
16590 // manipulation. Return the original node to not do anything else.
16591 return SDValue(ST, 0);
16592 }
16593 Chain = ST->getChain();
16594 }
16595
16596 // FIXME: is there such a thing as a truncating indexed store?
16597 if (ST->isTruncatingStore() && ST->isUnindexed() &&
16598 Value.getValueType().isInteger() &&
16599 (!isa<ConstantSDNode>(Value) ||
16600 !cast<ConstantSDNode>(Value)->isOpaque())) {
16601 APInt TruncDemandedBits =
16602 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
16603 ST->getMemoryVT().getScalarSizeInBits());
16604
16605 // See if we can simplify the input to this truncstore with knowledge that
16606 // only the low bits are being used. For example:
16607 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
16608 AddToWorklist(Value.getNode());
16609 if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
16610 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
16611 ST->getMemOperand());
16612
16613 // Otherwise, see if we can simplify the operation with
16614 // SimplifyDemandedBits, which only works if the value has a single use.
16615 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
16616 // Re-visit the store if anything changed and the store hasn't been merged
16617 // with another node (N is deleted) SimplifyDemandedBits will add Value's
16618 // node back to the worklist if necessary, but we also need to re-visit
16619 // the Store node itself.
16620 if (N->getOpcode() != ISD::DELETED_NODE)
16621 AddToWorklist(N);
16622 return SDValue(N, 0);
16623 }
16624 }
16625
16626 // If this is a load followed by a store to the same location, then the store
16627 // is dead/noop.
16628 // TODO: Can relax for unordered atomics (see D66309)
16629 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
16630 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
16631 ST->isUnindexed() && ST->isSimple() &&
16632 // There can't be any side effects between the load and store, such as
16633 // a call or store.
16634 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
16635 // The store is dead, remove it.
16636 return Chain;
16637 }
16638 }
16639
16640 // TODO: Can relax for unordered atomics (see D66309)
16641 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
16642 if (ST->isUnindexed() && ST->isSimple() &&
16643 ST1->isUnindexed() && ST1->isSimple()) {
16644 if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
16645 ST->getMemoryVT() == ST1->getMemoryVT()) {
16646 // If this is a store followed by a store with the same value to the
16647 // same location, then the store is dead/noop.
16648 return Chain;
16649 }
16650
16651 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
16652 !ST1->getBasePtr().isUndef() &&
16653 // BaseIndexOffset and the code below requires knowing the size
16654 // of a vector, so bail out if MemoryVT is scalable.
16655 !ST1->getMemoryVT().isScalableVector()) {
16656 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
16657 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
16658 unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
16659 unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
16660 // If this is a store who's preceding store to a subset of the current
16661 // location and no one other node is chained to that store we can
16662 // effectively drop the store. Do not remove stores to undef as they may
16663 // be used as data sinks.
16664 if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
16665 CombineTo(ST1, ST1->getChain());
16666 return SDValue();
16667 }
16668 }
16669 }
16670 }
16671
16672 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
16673 // truncating store. We can do this even if this is already a truncstore.
16674 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
16675 && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
16676 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
16677 ST->getMemoryVT())) {
16678 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
16679 Ptr, ST->getMemoryVT(), ST->getMemOperand());
16680 }
16681
16682 // Always perform this optimization before types are legal. If the target
16683 // prefers, also try this after legalization to catch stores that were created
16684 // by intrinsics or other nodes.
16685 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
16686 while (true) {
16687 // There can be multiple store sequences on the same chain.
16688 // Keep trying to merge store sequences until we are unable to do so
16689 // or until we merge the last store on the chain.
16690 bool Changed = MergeConsecutiveStores(ST);
16691 if (!Changed) break;
16692 // Return N as merge only uses CombineTo and no worklist clean
16693 // up is necessary.
16694 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
16695 return SDValue(N, 0);
16696 }
16697 }
16698
16699 // Try transforming N to an indexed store.
16700 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
16701 return SDValue(N, 0);
16702
16703 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
16704 //
16705 // Make sure to do this only after attempting to merge stores in order to
16706 // avoid changing the types of some subset of stores due to visit order,
16707 // preventing their merging.
16708 if (isa<ConstantFPSDNode>(ST->getValue())) {
16709 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
16710 return NewSt;
16711 }
16712
16713 if (SDValue NewSt = splitMergedValStore(ST))
16714 return NewSt;
16715
16716 return ReduceLoadOpStoreWidth(N);
16717}
16718
16719SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
16720 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
16721 if (!LifetimeEnd->hasOffset())
16722 return SDValue();
16723
16724 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
16725 LifetimeEnd->getOffset(), false);
16726
16727 // We walk up the chains to find stores.
16728 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
16729 while (!Chains.empty()) {
16730 SDValue Chain = Chains.back();
16731 Chains.pop_back();
16732 if (!Chain.hasOneUse())
16733 continue;
16734 switch (Chain.getOpcode()) {
16735 case ISD::TokenFactor:
16736 for (unsigned Nops = Chain.getNumOperands(); Nops;)
16737 Chains.push_back(Chain.getOperand(--Nops));
16738 break;
16739 case ISD::LIFETIME_START:
16740 case ISD::LIFETIME_END:
16741 // We can forward past any lifetime start/end that can be proven not to
16742 // alias the node.
16743 if (!isAlias(Chain.getNode(), N))
16744 Chains.push_back(Chain.getOperand(0));
16745 break;
16746 case ISD::STORE: {
16747 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
16748 // TODO: Can relax for unordered atomics (see D66309)
16749 if (!ST->isSimple() || ST->isIndexed())
16750 continue;
16751 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
16752 // If we store purely within object bounds just before its lifetime ends,
16753 // we can remove the store.
16754 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
16755 ST->getMemoryVT().getStoreSizeInBits())) {
16756 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase
.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase
.dump(); dbgs() << "\n"; } } while (false)
16757 dbgs() << "\nwithin LIFETIME_END of : ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase
.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase
.dump(); dbgs() << "\n"; } } while (false)
16758 LifetimeEndBase.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("dagcombine")) { dbgs() << "\nRemoving store:"; StoreBase
.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase
.dump(); dbgs() << "\n"; } } while (false)
;
16759 CombineTo(ST, ST->getChain());
16760 return SDValue(N, 0);
16761 }
16762 }
16763 }
16764 }
16765 return SDValue();
16766}
16767
16768/// For the instruction sequence of store below, F and I values
16769/// are bundled together as an i64 value before being stored into memory.
16770/// Sometimes it is more efficent to generate separate stores for F and I,
16771/// which can remove the bitwise instructions or sink them to colder places.
16772///
16773/// (store (or (zext (bitcast F to i32) to i64),
16774/// (shl (zext I to i64), 32)), addr) -->
16775/// (store F, addr) and (store I, addr+4)
16776///
16777/// Similarly, splitting for other merged store can also be beneficial, like:
16778/// For pair of {i32, i32}, i64 store --> two i32 stores.
16779/// For pair of {i32, i16}, i64 store --> two i32 stores.
16780/// For pair of {i16, i16}, i32 store --> two i16 stores.
16781/// For pair of {i16, i8}, i32 store --> two i16 stores.
16782/// For pair of {i8, i8}, i16 store --> two i8 stores.
16783///
16784/// We allow each target to determine specifically which kind of splitting is
16785/// supported.
16786///
16787/// The store patterns are commonly seen from the simple code snippet below
16788/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
16789/// void goo(const std::pair<int, float> &);
16790/// hoo() {
16791/// ...
16792/// goo(std::make_pair(tmp, ftmp));
16793/// ...
16794/// }
16795///
16796SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
16797 if (OptLevel == CodeGenOpt::None)
16798 return SDValue();
16799
16800 // Can't change the number of memory accesses for a volatile store or break
16801 // atomicity for an atomic one.
16802 if (!ST->isSimple())
16803 return SDValue();
16804
16805 SDValue Val = ST->getValue();
16806 SDLoc DL(ST);
16807
16808 // Match OR operand.
16809 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
16810 return SDValue();
16811
16812 // Match SHL operand and get Lower and Higher parts of Val.
16813 SDValue Op1 = Val.getOperand(0);
16814 SDValue Op2 = Val.getOperand(1);
16815 SDValue Lo, Hi;
16816 if (Op1.getOpcode() != ISD::SHL) {
16817 std::swap(Op1, Op2);
16818 if (Op1.getOpcode() != ISD::SHL)
16819 return SDValue();
16820 }
16821 Lo = Op2;
16822 Hi = Op1.getOperand(0);
16823 if (!Op1.hasOneUse())
16824 return SDValue();
16825
16826 // Match shift amount to HalfValBitSize.
16827 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
16828 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
16829 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
16830 return SDValue();
16831
16832 // Lo and Hi are zero-extended from int with size less equal than 32
16833 // to i64.
16834 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
16835 !Lo.getOperand(0).getValueType().isScalarInteger() ||
16836 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
16837 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
16838 !Hi.getOperand(0).getValueType().isScalarInteger() ||
16839 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
16840 return SDValue();
16841
16842 // Use the EVT of low and high parts before bitcast as the input
16843 // of target query.
16844 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
16845 ? Lo.getOperand(0).getValueType()
16846 : Lo.getValueType();
16847 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
16848 ? Hi.getOperand(0).getValueType()
16849 : Hi.getValueType();
16850 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
16851 return SDValue();
16852
16853 // Start to split store.
16854 unsigned Alignment = ST->getAlignment();
16855 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16856 AAMDNodes AAInfo = ST->getAAInfo();
16857
16858 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
16859 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
16860 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
16861 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
16862
16863 SDValue Chain = ST->getChain();
16864 SDValue Ptr = ST->getBasePtr();
16865 // Lower value store.
16866 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16867 ST->getAlignment(), MMOFlags, AAInfo);
16868 Ptr = DAG.getMemBasePlusOffset(Ptr, HalfValBitSize / 8, DL);
16869 // Higher value store.
16870 SDValue St1 =
16871 DAG.getStore(St0, DL, Hi, Ptr,
16872 ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
16873 Alignment / 2, MMOFlags, AAInfo);
16874 return St1;
16875}
16876
16877/// Convert a disguised subvector insertion into a shuffle:
16878SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
16879 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&((N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Expected extract_vector_elt"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Expected extract_vector_elt\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16880, __PRETTY_FUNCTION__))
16880 "Expected extract_vector_elt")((N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Expected extract_vector_elt"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Expected extract_vector_elt\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16880, __PRETTY_FUNCTION__))
;
16881 SDValue InsertVal = N->getOperand(1);
16882 SDValue Vec = N->getOperand(0);
16883
16884 // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
16885 // InsIndex)
16886 // --> (vector_shuffle X, Y) and variations where shuffle operands may be
16887 // CONCAT_VECTORS.
16888 if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
16889 InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16890 isa<ConstantSDNode>(InsertVal.getOperand(1))) {
16891 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
16892 ArrayRef<int> Mask = SVN->getMask();
16893
16894 SDValue X = Vec.getOperand(0);
16895 SDValue Y = Vec.getOperand(1);
16896
16897 // Vec's operand 0 is using indices from 0 to N-1 and
16898 // operand 1 from N to 2N - 1, where N is the number of
16899 // elements in the vectors.
16900 SDValue InsertVal0 = InsertVal.getOperand(0);
16901 int ElementOffset = -1;
16902
16903 // We explore the inputs of the shuffle in order to see if we find the
16904 // source of the extract_vector_elt. If so, we can use it to modify the
16905 // shuffle rather than perform an insert_vector_elt.
16906 SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
16907 ArgWorkList.emplace_back(Mask.size(), Y);
16908 ArgWorkList.emplace_back(0, X);
16909
16910 while (!ArgWorkList.empty()) {
16911 int ArgOffset;
16912 SDValue ArgVal;
16913 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
16914
16915 if (ArgVal == InsertVal0) {
16916 ElementOffset = ArgOffset;
16917 break;
16918 }
16919
16920 // Peek through concat_vector.
16921 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
16922 int CurrentArgOffset =
16923 ArgOffset + ArgVal.getValueType().getVectorNumElements();
16924 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
16925 for (SDValue Op : reverse(ArgVal->ops())) {
16926 CurrentArgOffset -= Step;
16927 ArgWorkList.emplace_back(CurrentArgOffset, Op);
16928 }
16929
16930 // Make sure we went through all the elements and did not screw up index
16931 // computation.
16932 assert(CurrentArgOffset == ArgOffset)((CurrentArgOffset == ArgOffset) ? static_cast<void> (0
) : __assert_fail ("CurrentArgOffset == ArgOffset", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16932, __PRETTY_FUNCTION__))
;
16933 }
16934 }
16935
16936 if (ElementOffset != -1) {
16937 SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
16938
16939 auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
16940 NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
16941 assert(NewMask[InsIndex] <((NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements
()) && NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"
) ? static_cast<void> (0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16943, __PRETTY_FUNCTION__))
16942 (int)(2 * Vec.getValueType().getVectorNumElements()) &&((NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements
()) && NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"
) ? static_cast<void> (0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16943, __PRETTY_FUNCTION__))
16943 NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound")((NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements
()) && NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"
) ? static_cast<void> (0) : __assert_fail ("NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && \"NewMask[InsIndex] is out of bound\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 16943, __PRETTY_FUNCTION__))
;
16944
16945 SDValue LegalShuffle =
16946 TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
16947 Y, NewMask, DAG);
16948 if (LegalShuffle)
16949 return LegalShuffle;
16950 }
16951 }
16952
16953 // insert_vector_elt V, (bitcast X from vector type), IdxC -->
16954 // bitcast(shuffle (bitcast V), (extended X), Mask)
16955 // Note: We do not use an insert_subvector node because that requires a
16956 // legal subvector type.
16957 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
16958 !InsertVal.getOperand(0).getValueType().isVector())
16959 return SDValue();
16960
16961 SDValue SubVec = InsertVal.getOperand(0);
16962 SDValue DestVec = N->getOperand(0);
16963 EVT SubVecVT = SubVec.getValueType();
16964 EVT VT = DestVec.getValueType();
16965 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
16966 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
16967 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
16968
16969 // Step 1: Create a shuffle mask that implements this insert operation. The
16970 // vector that we are inserting into will be operand 0 of the shuffle, so
16971 // those elements are just 'i'. The inserted subvector is in the first
16972 // positions of operand 1 of the shuffle. Example:
16973 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
16974 SmallVector<int, 16> Mask(NumMaskVals);
16975 for (unsigned i = 0; i != NumMaskVals; ++i) {
16976 if (i / NumSrcElts == InsIndex)
16977 Mask[i] = (i % NumSrcElts) + NumMaskVals;
16978 else
16979 Mask[i] = i;
16980 }
16981
16982 // Bail out if the target can not handle the shuffle we want to create.
16983 EVT SubVecEltVT = SubVecVT.getVectorElementType();
16984 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
16985 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
16986 return SDValue();
16987
16988 // Step 2: Create a wide vector from the inserted source vector by appending
16989 // undefined elements. This is the same size as our destination vector.
16990 SDLoc DL(N);
16991 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
16992 ConcatOps[0] = SubVec;
16993 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
16994
16995 // Step 3: Shuffle in the padded subvector.
16996 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
16997 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
16998 AddToWorklist(PaddedSubV.getNode());
16999 AddToWorklist(DestVecBC.getNode());
17000 AddToWorklist(Shuf.getNode());
17001 return DAG.getBitcast(VT, Shuf);
17002}
17003
17004SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
17005 SDValue InVec = N->getOperand(0);
17006 SDValue InVal = N->getOperand(1);
17007 SDValue EltNo = N->getOperand(2);
17008 SDLoc DL(N);
17009
17010 EVT VT = InVec.getValueType();
17011 unsigned NumElts = VT.getVectorNumElements();
17012 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
17013
17014 // Insert into out-of-bounds element is undefined.
17015 if (IndexC && IndexC->getZExtValue() >= VT.getVectorNumElements())
17016 return DAG.getUNDEF(VT);
17017
17018 // Remove redundant insertions:
17019 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
17020 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17021 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
17022 return InVec;
17023
17024 if (!IndexC) {
17025 // If this is variable insert to undef vector, it might be better to splat:
17026 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
17027 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
17028 SmallVector<SDValue, 8> Ops(NumElts, InVal);
17029 return DAG.getBuildVector(VT, DL, Ops);
17030 }
17031 return SDValue();
17032 }
17033
17034 // We must know which element is being inserted for folds below here.
17035 unsigned Elt = IndexC->getZExtValue();
17036 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
17037 return Shuf;
17038
17039 // Canonicalize insert_vector_elt dag nodes.
17040 // Example:
17041 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
17042 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
17043 //
17044 // Do this only if the child insert_vector node has one use; also
17045 // do this only if indices are both constants and Idx1 < Idx0.
17046 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
17047 && isa<ConstantSDNode>(InVec.getOperand(2))) {
17048 unsigned OtherElt = InVec.getConstantOperandVal(2);
17049 if (Elt < OtherElt) {
17050 // Swap nodes.
17051 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
17052 InVec.getOperand(0), InVal, EltNo);
17053 AddToWorklist(NewOp.getNode());
17054 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
17055 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
17056 }
17057 }
17058
17059 // If we can't generate a legal BUILD_VECTOR, exit
17060 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
17061 return SDValue();
17062
17063 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
17064 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
17065 // vector elements.
17066 SmallVector<SDValue, 8> Ops;
17067 // Do not combine these two vectors if the output vector will not replace
17068 // the input vector.
17069 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
17070 Ops.append(InVec.getNode()->op_begin(),
17071 InVec.getNode()->op_end());
17072 } else if (InVec.isUndef()) {
17073 Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
17074 } else {
17075 return SDValue();
17076 }
17077 assert(Ops.size() == NumElts && "Unexpected vector size")((Ops.size() == NumElts && "Unexpected vector size") ?
static_cast<void> (0) : __assert_fail ("Ops.size() == NumElts && \"Unexpected vector size\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17077, __PRETTY_FUNCTION__))
;
17078
17079 // Insert the element
17080 if (Elt < Ops.size()) {
17081 // All the operands of BUILD_VECTOR must have the same type;
17082 // we enforce that here.
17083 EVT OpVT = Ops[0].getValueType();
17084 Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
17085 }
17086
17087 // Return the new vector
17088 return DAG.getBuildVector(VT, DL, Ops);
17089}
17090
17091SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
17092 SDValue EltNo,
17093 LoadSDNode *OriginalLoad) {
17094 assert(OriginalLoad->isSimple())((OriginalLoad->isSimple()) ? static_cast<void> (0) :
__assert_fail ("OriginalLoad->isSimple()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17094, __PRETTY_FUNCTION__))
;
17095
17096 EVT ResultVT = EVE->getValueType(0);
17097 EVT VecEltVT = InVecVT.getVectorElementType();
17098 unsigned Align = OriginalLoad->getAlignment();
17099 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
17100 VecEltVT.getTypeForEVT(*DAG.getContext()));
17101
17102 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
17103 return SDValue();
17104
17105 ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
17106 ISD::NON_EXTLOAD : ISD::EXTLOAD;
17107 if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
17108 return SDValue();
17109
17110 Align = NewAlign;
17111
17112 SDValue NewPtr = OriginalLoad->getBasePtr();
17113 SDValue Offset;
17114 EVT PtrType = NewPtr.getValueType();
17115 MachinePointerInfo MPI;
17116 SDLoc DL(EVE);
17117 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
17118 int Elt = ConstEltNo->getZExtValue();
17119 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
17120 Offset = DAG.getConstant(PtrOff, DL, PtrType);
17121 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
17122 } else {
17123 Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
17124 Offset = DAG.getNode(
17125 ISD::MUL, DL, PtrType, Offset,
17126 DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
17127 // Discard the pointer info except the address space because the memory
17128 // operand can't represent this new access since the offset is variable.
17129 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
17130 }
17131 NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL);
17132
17133 // The replacement we need to do here is a little tricky: we need to
17134 // replace an extractelement of a load with a load.
17135 // Use ReplaceAllUsesOfValuesWith to do the replacement.
17136 // Note that this replacement assumes that the extractvalue is the only
17137 // use of the load; that's okay because we don't want to perform this
17138 // transformation in other cases anyway.
17139 SDValue Load;
17140 SDValue Chain;
17141 if (ResultVT.bitsGT(VecEltVT)) {
17142 // If the result type of vextract is wider than the load, then issue an
17143 // extending load instead.
17144 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
17145 VecEltVT)
17146 ? ISD::ZEXTLOAD
17147 : ISD::EXTLOAD;
17148 Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
17149 OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
17150 Align, OriginalLoad->getMemOperand()->getFlags(),
17151 OriginalLoad->getAAInfo());
17152 Chain = Load.getValue(1);
17153 } else {
17154 Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
17155 MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
17156 OriginalLoad->getAAInfo());
17157 Chain = Load.getValue(1);
17158 if (ResultVT.bitsLT(VecEltVT))
17159 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
17160 else
17161 Load = DAG.getBitcast(ResultVT, Load);
17162 }
17163 WorklistRemover DeadNodes(*this);
17164 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
17165 SDValue To[] = { Load, Chain };
17166 DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
17167 // Make sure to revisit this node to clean it up; it will usually be dead.
17168 AddToWorklist(EVE);
17169 // Since we're explicitly calling ReplaceAllUses, add the new node to the
17170 // worklist explicitly as well.
17171 AddToWorklistWithUsers(Load.getNode());
17172 ++OpsNarrowed;
17173 return SDValue(EVE, 0);
17174}
17175
17176/// Transform a vector binary operation into a scalar binary operation by moving
17177/// the math/logic after an extract element of a vector.
17178static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
17179 bool LegalOperations) {
17180 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17181 SDValue Vec = ExtElt->getOperand(0);
17182 SDValue Index = ExtElt->getOperand(1);
17183 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
17184 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
17185 Vec.getNode()->getNumValues() != 1)
17186 return SDValue();
17187
17188 // Targets may want to avoid this to prevent an expensive register transfer.
17189 if (!TLI.shouldScalarizeBinop(Vec))
17190 return SDValue();
17191
17192 // Extracting an element of a vector constant is constant-folded, so this
17193 // transform is just replacing a vector op with a scalar op while moving the
17194 // extract.
17195 SDValue Op0 = Vec.getOperand(0);
17196 SDValue Op1 = Vec.getOperand(1);
17197 if (isAnyConstantBuildVector(Op0, true) ||
17198 isAnyConstantBuildVector(Op1, true)) {
17199 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
17200 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
17201 SDLoc DL(ExtElt);
17202 EVT VT = ExtElt->getValueType(0);
17203 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
17204 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
17205 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
17206 }
17207
17208 return SDValue();
17209}
17210
17211SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
17212 SDValue VecOp = N->getOperand(0);
17213 SDValue Index = N->getOperand(1);
17214 EVT ScalarVT = N->getValueType(0);
17215 EVT VecVT = VecOp.getValueType();
17216 if (VecOp.isUndef())
17217 return DAG.getUNDEF(ScalarVT);
17218
17219 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
17220 //
17221 // This only really matters if the index is non-constant since other combines
17222 // on the constant elements already work.
17223 SDLoc DL(N);
17224 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
17225 Index == VecOp.getOperand(2)) {
17226 SDValue Elt = VecOp.getOperand(1);
17227 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
17228 }
17229
17230 // (vextract (scalar_to_vector val, 0) -> val
17231 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17232 // Only 0'th element of SCALAR_TO_VECTOR is defined.
17233 if (DAG.isKnownNeverZero(Index))
17234 return DAG.getUNDEF(ScalarVT);
17235
17236 // Check if the result type doesn't match the inserted element type. A
17237 // SCALAR_TO_VECTOR may truncate the inserted element and the
17238 // EXTRACT_VECTOR_ELT may widen the extracted vector.
17239 SDValue InOp = VecOp.getOperand(0);
17240 if (InOp.getValueType() != ScalarVT) {
17241 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger())((InOp.getValueType().isInteger() && ScalarVT.isInteger
()) ? static_cast<void> (0) : __assert_fail ("InOp.getValueType().isInteger() && ScalarVT.isInteger()"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17241, __PRETTY_FUNCTION__))
;
17242 return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
17243 }
17244 return InOp;
17245 }
17246
17247 // extract_vector_elt of out-of-bounds element -> UNDEF
17248 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
17249 unsigned NumElts = VecVT.getVectorNumElements();
17250 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
17251 if (IndexC && IndexC->getAPIntValue().uge(NumElts))
17252 return DAG.getUNDEF(ScalarVT);
17253
17254 // extract_vector_elt (build_vector x, y), 1 -> y
17255 if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
17256 TLI.isTypeLegal(VecVT) &&
17257 (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
17258 SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
17259 EVT InEltVT = Elt.getValueType();
17260
17261 // Sometimes build_vector's scalar input types do not match result type.
17262 if (ScalarVT == InEltVT)
17263 return Elt;
17264
17265 // TODO: It may be useful to truncate if free if the build_vector implicitly
17266 // converts.
17267 }
17268
17269 // TODO: These transforms should not require the 'hasOneUse' restriction, but
17270 // there are regressions on multiple targets without it. We can end up with a
17271 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
17272 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
17273 VecOp.hasOneUse()) {
17274 // The vector index of the LSBs of the source depend on the endian-ness.
17275 bool IsLE = DAG.getDataLayout().isLittleEndian();
17276 unsigned ExtractIndex = IndexC->getZExtValue();
17277 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
17278 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
17279 SDValue BCSrc = VecOp.getOperand(0);
17280 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
17281 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
17282
17283 if (LegalTypes && BCSrc.getValueType().isInteger() &&
17284 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17285 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
17286 // trunc i64 X to i32
17287 SDValue X = BCSrc.getOperand(0);
17288 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&((X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger
() && "Extract element and scalar to vector can't change element type "
"from FP to integer.") ? static_cast<void> (0) : __assert_fail
("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17290, __PRETTY_FUNCTION__))
17289 "Extract element and scalar to vector can't change element type "((X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger
() && "Extract element and scalar to vector can't change element type "
"from FP to integer.") ? static_cast<void> (0) : __assert_fail
("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17290, __PRETTY_FUNCTION__))
17290 "from FP to integer.")((X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger
() && "Extract element and scalar to vector can't change element type "
"from FP to integer.") ? static_cast<void> (0) : __assert_fail
("X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && \"Extract element and scalar to vector can't change element type \" \"from FP to integer.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17290, __PRETTY_FUNCTION__))
;
17291 unsigned XBitWidth = X.getValueSizeInBits();
17292 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
17293
17294 // An extract element return value type can be wider than its vector
17295 // operand element type. In that case, the high bits are undefined, so
17296 // it's possible that we may need to extend rather than truncate.
17297 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
17298 assert(XBitWidth % VecEltBitWidth == 0 &&((XBitWidth % VecEltBitWidth == 0 && "Scalar bitwidth must be a multiple of vector element bitwidth"
) ? static_cast<void> (0) : __assert_fail ("XBitWidth % VecEltBitWidth == 0 && \"Scalar bitwidth must be a multiple of vector element bitwidth\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17299, __PRETTY_FUNCTION__))
17299 "Scalar bitwidth must be a multiple of vector element bitwidth")((XBitWidth % VecEltBitWidth == 0 && "Scalar bitwidth must be a multiple of vector element bitwidth"
) ? static_cast<void> (0) : __assert_fail ("XBitWidth % VecEltBitWidth == 0 && \"Scalar bitwidth must be a multiple of vector element bitwidth\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17299, __PRETTY_FUNCTION__))
;
17300 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
17301 }
17302 }
17303 }
17304
17305 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
17306 return BO;
17307
17308 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
17309 // We only perform this optimization before the op legalization phase because
17310 // we may introduce new vector instructions which are not backed by TD
17311 // patterns. For example on AVX, extracting elements from a wide vector
17312 // without using extract_subvector. However, if we can find an underlying
17313 // scalar value, then we can always use that.
17314 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
17315 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
17316 // Find the new index to extract from.
17317 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
17318
17319 // Extracting an undef index is undef.
17320 if (OrigElt == -1)
17321 return DAG.getUNDEF(ScalarVT);
17322
17323 // Select the right vector half to extract from.
17324 SDValue SVInVec;
17325 if (OrigElt < (int)NumElts) {
17326 SVInVec = VecOp.getOperand(0);
17327 } else {
17328 SVInVec = VecOp.getOperand(1);
17329 OrigElt -= NumElts;
17330 }
17331
17332 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
17333 SDValue InOp = SVInVec.getOperand(OrigElt);
17334 if (InOp.getValueType() != ScalarVT) {
17335 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger())((InOp.getValueType().isInteger() && ScalarVT.isInteger
()) ? static_cast<void> (0) : __assert_fail ("InOp.getValueType().isInteger() && ScalarVT.isInteger()"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17335, __PRETTY_FUNCTION__))
;
17336 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
17337 }
17338
17339 return InOp;
17340 }
17341
17342 // FIXME: We should handle recursing on other vector shuffles and
17343 // scalar_to_vector here as well.
17344
17345 if (!LegalOperations ||
17346 // FIXME: Should really be just isOperationLegalOrCustom.
17347 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
17348 TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
17349 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
17350 DAG.getVectorIdxConstant(OrigElt, DL));
17351 }
17352 }
17353
17354 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
17355 // simplify it based on the (valid) extraction indices.
17356 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
17357 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17358 Use->getOperand(0) == VecOp &&
17359 isa<ConstantSDNode>(Use->getOperand(1));
17360 })) {
17361 APInt DemandedElts = APInt::getNullValue(NumElts);
17362 for (SDNode *Use : VecOp->uses()) {
17363 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
17364 if (CstElt->getAPIntValue().ult(NumElts))
17365 DemandedElts.setBit(CstElt->getZExtValue());
17366 }
17367 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
17368 // We simplified the vector operand of this extract element. If this
17369 // extract is not dead, visit it again so it is folded properly.
17370 if (N->getOpcode() != ISD::DELETED_NODE)
17371 AddToWorklist(N);
17372 return SDValue(N, 0);
17373 }
17374 APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
17375 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
17376 // We simplified the vector operand of this extract element. If this
17377 // extract is not dead, visit it again so it is folded properly.
17378 if (N->getOpcode() != ISD::DELETED_NODE)
17379 AddToWorklist(N);
17380 return SDValue(N, 0);
17381 }
17382 }
17383
17384 // Everything under here is trying to match an extract of a loaded value.
17385 // If the result of load has to be truncated, then it's not necessarily
17386 // profitable.
17387 bool BCNumEltsChanged = false;
17388 EVT ExtVT = VecVT.getVectorElementType();
17389 EVT LVT = ExtVT;
17390 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
17391 return SDValue();
17392
17393 if (VecOp.getOpcode() == ISD::BITCAST) {
17394 // Don't duplicate a load with other uses.
17395 if (!VecOp.hasOneUse())
17396 return SDValue();
17397
17398 EVT BCVT = VecOp.getOperand(0).getValueType();
17399 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
17400 return SDValue();
17401 if (NumElts != BCVT.getVectorNumElements())
17402 BCNumEltsChanged = true;
17403 VecOp = VecOp.getOperand(0);
17404 ExtVT = BCVT.getVectorElementType();
17405 }
17406
17407 // extract (vector load $addr), i --> load $addr + i * size
17408 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
17409 ISD::isNormalLoad(VecOp.getNode()) &&
17410 !Index->hasPredecessor(VecOp.getNode())) {
17411 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
17412 if (VecLoad && VecLoad->isSimple())
17413 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
17414 }
17415
17416 // Perform only after legalization to ensure build_vector / vector_shuffle
17417 // optimizations have already been done.
17418 if (!LegalOperations || !IndexC)
17419 return SDValue();
17420
17421 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
17422 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
17423 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
17424 int Elt = IndexC->getZExtValue();
17425 LoadSDNode *LN0 = nullptr;
17426 if (ISD::isNormalLoad(VecOp.getNode())) {
17427 LN0 = cast<LoadSDNode>(VecOp);
17428 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17429 VecOp.getOperand(0).getValueType() == ExtVT &&
17430 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
17431 // Don't duplicate a load with other uses.
17432 if (!VecOp.hasOneUse())
17433 return SDValue();
17434
17435 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
17436 }
17437 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
17438 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
17439 // =>
17440 // (load $addr+1*size)
17441
17442 // Don't duplicate a load with other uses.
17443 if (!VecOp.hasOneUse())
17444 return SDValue();
17445
17446 // If the bit convert changed the number of elements, it is unsafe
17447 // to examine the mask.
17448 if (BCNumEltsChanged)
17449 return SDValue();
17450
17451 // Select the input vector, guarding against out of range extract vector.
17452 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
17453 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
17454
17455 if (VecOp.getOpcode() == ISD::BITCAST) {
17456 // Don't duplicate a load with other uses.
17457 if (!VecOp.hasOneUse())
17458 return SDValue();
17459
17460 VecOp = VecOp.getOperand(0);
17461 }
17462 if (ISD::isNormalLoad(VecOp.getNode())) {
17463 LN0 = cast<LoadSDNode>(VecOp);
17464 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
17465 Index = DAG.getConstant(Elt, DL, Index.getValueType());
17466 }
17467 }
17468
17469 // Make sure we found a non-volatile load and the extractelement is
17470 // the only use.
17471 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
17472 return SDValue();
17473
17474 // If Idx was -1 above, Elt is going to be -1, so just return undef.
17475 if (Elt == -1)
17476 return DAG.getUNDEF(LVT);
17477
17478 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
17479}
17480
17481// Simplify (build_vec (ext )) to (bitcast (build_vec ))
17482SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
17483 // We perform this optimization post type-legalization because
17484 // the type-legalizer often scalarizes integer-promoted vectors.
17485 // Performing this optimization before may create bit-casts which
17486 // will be type-legalized to complex code sequences.
17487 // We perform this optimization only before the operation legalizer because we
17488 // may introduce illegal operations.
17489 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
17490 return SDValue();
17491
17492 unsigned NumInScalars = N->getNumOperands();
17493 SDLoc DL(N);
17494 EVT VT = N->getValueType(0);
17495
17496 // Check to see if this is a BUILD_VECTOR of a bunch of values
17497 // which come from any_extend or zero_extend nodes. If so, we can create
17498 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
17499 // optimizations. We do not handle sign-extend because we can't fill the sign
17500 // using shuffles.
17501 EVT SourceType = MVT::Other;
17502 bool AllAnyExt = true;
17503
17504 for (unsigned i = 0; i != NumInScalars; ++i) {
17505 SDValue In = N->getOperand(i);
17506 // Ignore undef inputs.
17507 if (In.isUndef()) continue;
17508
17509 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
17510 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
17511
17512 // Abort if the element is not an extension.
17513 if (!ZeroExt && !AnyExt) {
17514 SourceType = MVT::Other;
17515 break;
17516 }
17517
17518 // The input is a ZeroExt or AnyExt. Check the original type.
17519 EVT InTy = In.getOperand(0).getValueType();
17520
17521 // Check that all of the widened source types are the same.
17522 if (SourceType == MVT::Other)
17523 // First time.
17524 SourceType = InTy;
17525 else if (InTy != SourceType) {
17526 // Multiple income types. Abort.
17527 SourceType = MVT::Other;
17528 break;
17529 }
17530
17531 // Check if all of the extends are ANY_EXTENDs.
17532 AllAnyExt &= AnyExt;
17533 }
17534
17535 // In order to have valid types, all of the inputs must be extended from the
17536 // same source type and all of the inputs must be any or zero extend.
17537 // Scalar sizes must be a power of two.
17538 EVT OutScalarTy = VT.getScalarType();
17539 bool ValidTypes = SourceType != MVT::Other &&
17540 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
17541 isPowerOf2_32(SourceType.getSizeInBits());
17542
17543 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
17544 // turn into a single shuffle instruction.
17545 if (!ValidTypes)
17546 return SDValue();
17547
17548 bool isLE = DAG.getDataLayout().isLittleEndian();
17549 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
17550 assert(ElemRatio > 1 && "Invalid element size ratio")((ElemRatio > 1 && "Invalid element size ratio") ?
static_cast<void> (0) : __assert_fail ("ElemRatio > 1 && \"Invalid element size ratio\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17550, __PRETTY_FUNCTION__))
;
17551 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
17552 DAG.getConstant(0, DL, SourceType);
17553
17554 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
17555 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
17556
17557 // Populate the new build_vector
17558 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
17559 SDValue Cast = N->getOperand(i);
17560 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||(((Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() ==
ISD::ZERO_EXTEND || Cast.isUndef()) && "Invalid cast opcode"
) ? static_cast<void> (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17562, __PRETTY_FUNCTION__))
17561 Cast.getOpcode() == ISD::ZERO_EXTEND ||(((Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() ==
ISD::ZERO_EXTEND || Cast.isUndef()) && "Invalid cast opcode"
) ? static_cast<void> (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17562, __PRETTY_FUNCTION__))
17562 Cast.isUndef()) && "Invalid cast opcode")(((Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() ==
ISD::ZERO_EXTEND || Cast.isUndef()) && "Invalid cast opcode"
) ? static_cast<void> (0) : __assert_fail ("(Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && \"Invalid cast opcode\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17562, __PRETTY_FUNCTION__))
;
17563 SDValue In;
17564 if (Cast.isUndef())
17565 In = DAG.getUNDEF(SourceType);
17566 else
17567 In = Cast->getOperand(0);
17568 unsigned Index = isLE ? (i * ElemRatio) :
17569 (i * ElemRatio + (ElemRatio - 1));
17570
17571 assert(Index < Ops.size() && "Invalid index")((Index < Ops.size() && "Invalid index") ? static_cast
<void> (0) : __assert_fail ("Index < Ops.size() && \"Invalid index\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17571, __PRETTY_FUNCTION__))
;
17572 Ops[Index] = In;
17573 }
17574
17575 // The type of the new BUILD_VECTOR node.
17576 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
17577 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&((VecVT.getSizeInBits() == VT.getSizeInBits() && "Invalid vector size"
) ? static_cast<void> (0) : __assert_fail ("VecVT.getSizeInBits() == VT.getSizeInBits() && \"Invalid vector size\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17578, __PRETTY_FUNCTION__))
17578 "Invalid vector size")((VecVT.getSizeInBits() == VT.getSizeInBits() && "Invalid vector size"
) ? static_cast<void> (0) : __assert_fail ("VecVT.getSizeInBits() == VT.getSizeInBits() && \"Invalid vector size\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17578, __PRETTY_FUNCTION__))
;
17579 // Check if the new vector type is legal.
17580 if (!isTypeLegal(VecVT) ||
17581 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
17582 TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
17583 return SDValue();
17584
17585 // Make the new BUILD_VECTOR.
17586 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
17587
17588 // The new BUILD_VECTOR node has the potential to be further optimized.
17589 AddToWorklist(BV.getNode());
17590 // Bitcast to the desired type.
17591 return DAG.getBitcast(VT, BV);
17592}
17593
17594// Simplify (build_vec (trunc $1)
17595// (trunc (srl $1 half-width))
17596// (trunc (srl $1 (2 * half-width))) …)
17597// to (bitcast $1)
17598SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
17599 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector")((N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"Expected build vector\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17599, __PRETTY_FUNCTION__))
;
17600
17601 // Only for little endian
17602 if (!DAG.getDataLayout().isLittleEndian())
17603 return SDValue();
17604
17605 SDLoc DL(N);
17606 EVT VT = N->getValueType(0);
17607 EVT OutScalarTy = VT.getScalarType();
17608 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
17609
17610 // Only for power of two types to be sure that bitcast works well
17611 if (!isPowerOf2_64(ScalarTypeBitsize))
17612 return SDValue();
17613
17614 unsigned NumInScalars = N->getNumOperands();
17615
17616 // Look through bitcasts
17617 auto PeekThroughBitcast = [](SDValue Op) {
17618 if (Op.getOpcode() == ISD::BITCAST)
17619 return Op.getOperand(0);
17620 return Op;
17621 };
17622
17623 // The source value where all the parts are extracted.
17624 SDValue Src;
17625 for (unsigned i = 0; i != NumInScalars; ++i) {
17626 SDValue In = PeekThroughBitcast(N->getOperand(i));
17627 // Ignore undef inputs.
17628 if (In.isUndef()) continue;
17629
17630 if (In.getOpcode() != ISD::TRUNCATE)
17631 return SDValue();
17632
17633 In = PeekThroughBitcast(In.getOperand(0));
17634
17635 if (In.getOpcode() != ISD::SRL) {
17636 // For now only build_vec without shuffling, handle shifts here in the
17637 // future.
17638 if (i != 0)
17639 return SDValue();
17640
17641 Src = In;
17642 } else {
17643 // In is SRL
17644 SDValue part = PeekThroughBitcast(In.getOperand(0));
17645
17646 if (!Src) {
17647 Src = part;
17648 } else if (Src != part) {
17649 // Vector parts do not stem from the same variable
17650 return SDValue();
17651 }
17652
17653 SDValue ShiftAmtVal = In.getOperand(1);
17654 if (!isa<ConstantSDNode>(ShiftAmtVal))
17655 return SDValue();
17656
17657 uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
17658
17659 // The extracted value is not extracted at the right position
17660 if (ShiftAmt != i * ScalarTypeBitsize)
17661 return SDValue();
17662 }
17663 }
17664
17665 // Only cast if the size is the same
17666 if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
17667 return SDValue();
17668
17669 return DAG.getBitcast(VT, Src);
17670}
17671
17672SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
17673 ArrayRef<int> VectorMask,
17674 SDValue VecIn1, SDValue VecIn2,
17675 unsigned LeftIdx, bool DidSplitVec) {
17676 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
17677
17678 EVT VT = N->getValueType(0);
17679 EVT InVT1 = VecIn1.getValueType();
17680 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
17681
17682 unsigned NumElems = VT.getVectorNumElements();
17683 unsigned ShuffleNumElems = NumElems;
17684
17685 // If we artificially split a vector in two already, then the offsets in the
17686 // operands will all be based off of VecIn1, even those in VecIn2.
17687 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
17688
17689 // We can't generate a shuffle node with mismatched input and output types.
17690 // Try to make the types match the type of the output.
17691 if (InVT1 != VT || InVT2 != VT) {
17692 if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
17693 // If the output vector length is a multiple of both input lengths,
17694 // we can concatenate them and pad the rest with undefs.
17695 unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
17696 assert(NumConcats >= 2 && "Concat needs at least two inputs!")((NumConcats >= 2 && "Concat needs at least two inputs!"
) ? static_cast<void> (0) : __assert_fail ("NumConcats >= 2 && \"Concat needs at least two inputs!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17696, __PRETTY_FUNCTION__))
;
17697 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
17698 ConcatOps[0] = VecIn1;
17699 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
17700 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17701 VecIn2 = SDValue();
17702 } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
17703 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
17704 return SDValue();
17705
17706 if (!VecIn2.getNode()) {
17707 // If we only have one input vector, and it's twice the size of the
17708 // output, split it in two.
17709 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
17710 DAG.getVectorIdxConstant(NumElems, DL));
17711 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
17712 // Since we now have shorter input vectors, adjust the offset of the
17713 // second vector's start.
17714 Vec2Offset = NumElems;
17715 } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
17716 // VecIn1 is wider than the output, and we have another, possibly
17717 // smaller input. Pad the smaller input with undefs, shuffle at the
17718 // input vector width, and extract the output.
17719 // The shuffle type is different than VT, so check legality again.
17720 if (LegalOperations &&
17721 !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
17722 return SDValue();
17723
17724 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
17725 // lower it back into a BUILD_VECTOR. So if the inserted type is
17726 // illegal, don't even try.
17727 if (InVT1 != InVT2) {
17728 if (!TLI.isTypeLegal(InVT2))
17729 return SDValue();
17730 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
17731 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
17732 }
17733 ShuffleNumElems = NumElems * 2;
17734 } else {
17735 // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
17736 // than VecIn1. We can't handle this for now - this case will disappear
17737 // when we start sorting the vectors by type.
17738 return SDValue();
17739 }
17740 } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
17741 InVT1.getSizeInBits() == VT.getSizeInBits()) {
17742 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
17743 ConcatOps[0] = VecIn2;
17744 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17745 } else {
17746 // TODO: Support cases where the length mismatch isn't exactly by a
17747 // factor of 2.
17748 // TODO: Move this check upwards, so that if we have bad type
17749 // mismatches, we don't create any DAG nodes.
17750 return SDValue();
17751 }
17752 }
17753
17754 // Initialize mask to undef.
17755 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
17756
17757 // Only need to run up to the number of elements actually used, not the
17758 // total number of elements in the shuffle - if we are shuffling a wider
17759 // vector, the high lanes should be set to undef.
17760 for (unsigned i = 0; i != NumElems; ++i) {
17761 if (VectorMask[i] <= 0)
17762 continue;
17763
17764 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
17765 if (VectorMask[i] == (int)LeftIdx) {
17766 Mask[i] = ExtIndex;
17767 } else if (VectorMask[i] == (int)LeftIdx + 1) {
17768 Mask[i] = Vec2Offset + ExtIndex;
17769 }
17770 }
17771
17772 // The type the input vectors may have changed above.
17773 InVT1 = VecIn1.getValueType();
17774
17775 // If we already have a VecIn2, it should have the same type as VecIn1.
17776 // If we don't, get an undef/zero vector of the appropriate type.
17777 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
17778 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.")((InVT1 == VecIn2.getValueType() && "Unexpected second input type."
) ? static_cast<void> (0) : __assert_fail ("InVT1 == VecIn2.getValueType() && \"Unexpected second input type.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17778, __PRETTY_FUNCTION__))
;
17779
17780 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
17781 if (ShuffleNumElems > NumElems)
17782 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
17783
17784 return Shuffle;
17785}
17786
17787static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
17788 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector")((BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector"
) ? static_cast<void> (0) : __assert_fail ("BV->getOpcode() == ISD::BUILD_VECTOR && \"Expected build vector\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 17788, __PRETTY_FUNCTION__))
;
17789
17790 // First, determine where the build vector is not undef.
17791 // TODO: We could extend this to handle zero elements as well as undefs.
17792 int NumBVOps = BV->getNumOperands();
17793 int ZextElt = -1;
17794 for (int i = 0; i != NumBVOps; ++i) {
17795 SDValue Op = BV->getOperand(i);
17796 if (Op.isUndef())
17797 continue;
17798 if (ZextElt == -1)
17799 ZextElt = i;
17800 else
17801 return SDValue();
17802 }
17803 // Bail out if there's no non-undef element.
17804 if (ZextElt == -1)
17805 return SDValue();
17806
17807 // The build vector contains some number of undef elements and exactly
17808 // one other element. That other element must be a zero-extended scalar
17809 // extracted from a vector at a constant index to turn this into a shuffle.
17810 // Also, require that the build vector does not implicitly truncate/extend
17811 // its elements.
17812 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
17813 EVT VT = BV->getValueType(0);
17814 SDValue Zext = BV->getOperand(ZextElt);
17815 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
17816 Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
17817 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
17818 Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
17819 return SDValue();
17820
17821 // The zero-extend must be a multiple of the source size, and we must be
17822 // building a vector of the same size as the source of the extract element.
17823 SDValue Extract = Zext.getOperand(0);
17824 unsigned DestSize = Zext.getValueSizeInBits();
17825 unsigned SrcSize = Extract.getValueSizeInBits();
17826 if (DestSize % SrcSize != 0 ||
17827 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
17828 return SDValue();
17829
17830 // Create a shuffle mask that will combine the extracted element with zeros
17831 // and undefs.
17832 int ZextRatio = DestSize / SrcSize;
17833 int NumMaskElts = NumBVOps * ZextRatio;
17834 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
17835 for (int i = 0; i != NumMaskElts; ++i) {
17836 if (i / ZextRatio == ZextElt) {
17837 // The low bits of the (potentially translated) extracted element map to
17838 // the source vector. The high bits map to zero. We will use a zero vector
17839 // as the 2nd source operand of the shuffle, so use the 1st element of
17840 // that vector (mask value is number-of-elements) for the high bits.
17841 if (i % ZextRatio == 0)
17842 ShufMask[i] = Extract.getConstantOperandVal(1);
17843 else
17844 ShufMask[i] = NumMaskElts;
17845 }
17846
17847 // Undef elements of the build vector remain undef because we initialize
17848 // the shuffle mask with -1.
17849 }
17850
17851 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
17852 // bitcast (shuffle V, ZeroVec, VectorMask)
17853 SDLoc DL(BV);
17854 EVT VecVT = Extract.getOperand(0).getValueType();
17855 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
17856 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17857 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
17858 ZeroVec, ShufMask, DAG);
17859 if (!Shuf)
17860 return SDValue();
17861 return DAG.getBitcast(VT, Shuf);
17862}
17863
17864// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
17865// operations. If the types of the vectors we're extracting from allow it,
17866// turn this into a vector_shuffle node.
17867SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
17868 SDLoc DL(N);
17869 EVT VT = N->getValueType(0);
17870
17871 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
17872 if (!isTypeLegal(VT))
17873 return SDValue();
17874
17875 if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
17876 return V;
17877
17878 // May only combine to shuffle after legalize if shuffle is legal.
17879 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
17880 return SDValue();
17881
17882 bool UsesZeroVector = false;
17883 unsigned NumElems = N->getNumOperands();
17884
17885 // Record, for each element of the newly built vector, which input vector
17886 // that element comes from. -1 stands for undef, 0 for the zero vector,
17887 // and positive values for the input vectors.
17888 // VectorMask maps each element to its vector number, and VecIn maps vector
17889 // numbers to their initial SDValues.
17890
17891 SmallVector<int, 8> VectorMask(NumElems, -1);
17892 SmallVector<SDValue, 8> VecIn;
17893 VecIn.push_back(SDValue());
17894
17895 for (unsigned i = 0; i != NumElems; ++i) {
17896 SDValue Op = N->getOperand(i);
17897
17898 if (Op.isUndef())
17899 continue;
17900
17901 // See if we can use a blend with a zero vector.
17902 // TODO: Should we generalize this to a blend with an arbitrary constant
17903 // vector?
17904 if (isNullConstant(Op) || isNullFPConstant(Op)) {
17905 UsesZeroVector = true;
17906 VectorMask[i] = 0;
17907 continue;
17908 }
17909
17910 // Not an undef or zero. If the input is something other than an
17911 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
17912 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
17913 !isa<ConstantSDNode>(Op.getOperand(1)))
17914 return SDValue();
17915 SDValue ExtractedFromVec = Op.getOperand(0);
17916
17917 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
17918 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
17919 return SDValue();
17920
17921 // All inputs must have the same element type as the output.
17922 if (VT.getVectorElementType() !=
17923 ExtractedFromVec.getValueType().getVectorElementType())
17924 return SDValue();
17925
17926 // Have we seen this input vector before?
17927 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
17928 // a map back from SDValues to numbers isn't worth it.
17929 unsigned Idx = std::distance(
17930 VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
17931 if (Idx == VecIn.size())
17932 VecIn.push_back(ExtractedFromVec);
17933
17934 VectorMask[i] = Idx;
17935 }
17936
17937 // If we didn't find at least one input vector, bail out.
17938 if (VecIn.size() < 2)
17939 return SDValue();
17940
17941 // If all the Operands of BUILD_VECTOR extract from same
17942 // vector, then split the vector efficiently based on the maximum
17943 // vector access index and adjust the VectorMask and
17944 // VecIn accordingly.
17945 bool DidSplitVec = false;
17946 if (VecIn.size() == 2) {
17947 unsigned MaxIndex = 0;
17948 unsigned NearestPow2 = 0;
17949 SDValue Vec = VecIn.back();
17950 EVT InVT = Vec.getValueType();
17951 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
17952
17953 for (unsigned i = 0; i < NumElems; i++) {
17954 if (VectorMask[i] <= 0)
17955 continue;
17956 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
17957 IndexVec[i] = Index;
17958 MaxIndex = std::max(MaxIndex, Index);
17959 }
17960
17961 NearestPow2 = PowerOf2Ceil(MaxIndex);
17962 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
17963 NumElems * 2 < NearestPow2) {
17964 unsigned SplitSize = NearestPow2 / 2;
17965 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
17966 InVT.getVectorElementType(), SplitSize);
17967 if (TLI.isTypeLegal(SplitVT)) {
17968 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
17969 DAG.getVectorIdxConstant(SplitSize, DL));
17970 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
17971 DAG.getVectorIdxConstant(0, DL));
17972 VecIn.pop_back();
17973 VecIn.push_back(VecIn1);
17974 VecIn.push_back(VecIn2);
17975 DidSplitVec = true;
17976
17977 for (unsigned i = 0; i < NumElems; i++) {
17978 if (VectorMask[i] <= 0)
17979 continue;
17980 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
17981 }
17982 }
17983 }
17984 }
17985
17986 // TODO: We want to sort the vectors by descending length, so that adjacent
17987 // pairs have similar length, and the longer vector is always first in the
17988 // pair.
17989
17990 // TODO: Should this fire if some of the input vectors has illegal type (like
17991 // it does now), or should we let legalization run its course first?
17992
17993 // Shuffle phase:
17994 // Take pairs of vectors, and shuffle them so that the result has elements
17995 // from these vectors in the correct places.
17996 // For example, given:
17997 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
17998 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
17999 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
18000 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
18001 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
18002 // We will generate:
18003 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
18004 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
18005 SmallVector<SDValue, 4> Shuffles;
18006 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
18007 unsigned LeftIdx = 2 * In + 1;
18008 SDValue VecLeft = VecIn[LeftIdx];
18009 SDValue VecRight =
18010 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
18011
18012 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
18013 VecRight, LeftIdx, DidSplitVec))
18014 Shuffles.push_back(Shuffle);
18015 else
18016 return SDValue();
18017 }
18018
18019 // If we need the zero vector as an "ingredient" in the blend tree, add it
18020 // to the list of shuffles.
18021 if (UsesZeroVector)
18022 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
18023 : DAG.getConstantFP(0.0, DL, VT));
18024
18025 // If we only have one shuffle, we're done.
18026 if (Shuffles.size() == 1)
18027 return Shuffles[0];
18028
18029 // Update the vector mask to point to the post-shuffle vectors.
18030 for (int &Vec : VectorMask)
18031 if (Vec == 0)
18032 Vec = Shuffles.size() - 1;
18033 else
18034 Vec = (Vec - 1) / 2;
18035
18036 // More than one shuffle. Generate a binary tree of blends, e.g. if from
18037 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
18038 // generate:
18039 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
18040 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
18041 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
18042 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
18043 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
18044 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
18045 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
18046
18047 // Make sure the initial size of the shuffle list is even.
18048 if (Shuffles.size() % 2)
18049 Shuffles.push_back(DAG.getUNDEF(VT));
18050
18051 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
18052 if (CurSize % 2) {
18053 Shuffles[CurSize] = DAG.getUNDEF(VT);
18054 CurSize++;
18055 }
18056 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
18057 int Left = 2 * In;
18058 int Right = 2 * In + 1;
18059 SmallVector<int, 8> Mask(NumElems, -1);
18060 for (unsigned i = 0; i != NumElems; ++i) {
18061 if (VectorMask[i] == Left) {
18062 Mask[i] = i;
18063 VectorMask[i] = In;
18064 } else if (VectorMask[i] == Right) {
18065 Mask[i] = i + NumElems;
18066 VectorMask[i] = In;
18067 }
18068 }
18069
18070 Shuffles[In] =
18071 DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
18072 }
18073 }
18074 return Shuffles[0];
18075}
18076
18077// Try to turn a build vector of zero extends of extract vector elts into a
18078// a vector zero extend and possibly an extract subvector.
18079// TODO: Support sign extend?
18080// TODO: Allow undef elements?
18081SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
18082 if (LegalOperations)
18083 return SDValue();
18084
18085 EVT VT = N->getValueType(0);
18086
18087 bool FoundZeroExtend = false;
18088 SDValue Op0 = N->getOperand(0);
18089 auto checkElem = [&](SDValue Op) -> int64_t {
18090 unsigned Opc = Op.getOpcode();
18091 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
18092 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
18093 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18094 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
18095 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
18096 return C->getZExtValue();
18097 return -1;
18098 };
18099
18100 // Make sure the first element matches
18101 // (zext (extract_vector_elt X, C))
18102 int64_t Offset = checkElem(Op0);
18103 if (Offset < 0)
18104 return SDValue();
18105
18106 unsigned NumElems = N->getNumOperands();
18107 SDValue In = Op0.getOperand(0).getOperand(0);
18108 EVT InSVT = In.getValueType().getScalarType();
18109 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
18110
18111 // Don't create an illegal input type after type legalization.
18112 if (LegalTypes && !TLI.isTypeLegal(InVT))
18113 return SDValue();
18114
18115 // Ensure all the elements come from the same vector and are adjacent.
18116 for (unsigned i = 1; i != NumElems; ++i) {
18117 if ((Offset + i) != checkElem(N->getOperand(i)))
18118 return SDValue();
18119 }
18120
18121 SDLoc DL(N);
18122 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
18123 Op0.getOperand(0).getOperand(1));
18124 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
18125 VT, In);
18126}
18127
18128SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
18129 EVT VT = N->getValueType(0);
18130
18131 // A vector built entirely of undefs is undef.
18132 if (ISD::allOperandsUndef(N))
18133 return DAG.getUNDEF(VT);
18134
18135 // If this is a splat of a bitcast from another vector, change to a
18136 // concat_vector.
18137 // For example:
18138 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
18139 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
18140 //
18141 // If X is a build_vector itself, the concat can become a larger build_vector.
18142 // TODO: Maybe this is useful for non-splat too?
18143 if (!LegalOperations) {
18144 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
18145 Splat = peekThroughBitcasts(Splat);
18146 EVT SrcVT = Splat.getValueType();
18147 if (SrcVT.isVector()) {
18148 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
18149 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
18150 SrcVT.getVectorElementType(), NumElts);
18151 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
18152 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
18153 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
18154 NewVT, Ops);
18155 return DAG.getBitcast(VT, Concat);
18156 }
18157 }
18158 }
18159 }
18160
18161 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
18162 if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
18163 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
18164 assert(!V.isUndef() && "Splat of undef should have been handled earlier")((!V.isUndef() && "Splat of undef should have been handled earlier"
) ? static_cast<void> (0) : __assert_fail ("!V.isUndef() && \"Splat of undef should have been handled earlier\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18164, __PRETTY_FUNCTION__))
;
18165 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
18166 }
18167
18168 // Check if we can express BUILD VECTOR via subvector extract.
18169 if (!LegalTypes && (N->getNumOperands() > 1)) {
18170 SDValue Op0 = N->getOperand(0);
18171 auto checkElem = [&](SDValue Op) -> uint64_t {
18172 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
18173 (Op0.getOperand(0) == Op.getOperand(0)))
18174 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
18175 return CNode->getZExtValue();
18176 return -1;
18177 };
18178
18179 int Offset = checkElem(Op0);
18180 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
18181 if (Offset + i != checkElem(N->getOperand(i))) {
18182 Offset = -1;
18183 break;
18184 }
18185 }
18186
18187 if ((Offset == 0) &&
18188 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
18189 return Op0.getOperand(0);
18190 if ((Offset != -1) &&
18191 ((Offset % N->getValueType(0).getVectorNumElements()) ==
18192 0)) // IDX must be multiple of output size.
18193 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
18194 Op0.getOperand(0), Op0.getOperand(1));
18195 }
18196
18197 if (SDValue V = convertBuildVecZextToZext(N))
18198 return V;
18199
18200 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
18201 return V;
18202
18203 if (SDValue V = reduceBuildVecTruncToBitCast(N))
18204 return V;
18205
18206 if (SDValue V = reduceBuildVecToShuffle(N))
18207 return V;
18208
18209 return SDValue();
18210}
18211
18212static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
18213 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18214 EVT OpVT = N->getOperand(0).getValueType();
18215
18216 // If the operands are legal vectors, leave them alone.
18217 if (TLI.isTypeLegal(OpVT))
18218 return SDValue();
18219
18220 SDLoc DL(N);
18221 EVT VT = N->getValueType(0);
18222 SmallVector<SDValue, 8> Ops;
18223
18224 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
18225 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
18226
18227 // Keep track of what we encounter.
18228 bool AnyInteger = false;
18229 bool AnyFP = false;
18230 for (const SDValue &Op : N->ops()) {
18231 if (ISD::BITCAST == Op.getOpcode() &&
18232 !Op.getOperand(0).getValueType().isVector())
18233 Ops.push_back(Op.getOperand(0));
18234 else if (ISD::UNDEF == Op.getOpcode())
18235 Ops.push_back(ScalarUndef);
18236 else
18237 return SDValue();
18238
18239 // Note whether we encounter an integer or floating point scalar.
18240 // If it's neither, bail out, it could be something weird like x86mmx.
18241 EVT LastOpVT = Ops.back().getValueType();
18242 if (LastOpVT.isFloatingPoint())
18243 AnyFP = true;
18244 else if (LastOpVT.isInteger())
18245 AnyInteger = true;
18246 else
18247 return SDValue();
18248 }
18249
18250 // If any of the operands is a floating point scalar bitcast to a vector,
18251 // use floating point types throughout, and bitcast everything.
18252 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
18253 if (AnyFP) {
18254 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
18255 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
18256 if (AnyInteger) {
18257 for (SDValue &Op : Ops) {
18258 if (Op.getValueType() == SVT)
18259 continue;
18260 if (Op.isUndef())
18261 Op = ScalarUndef;
18262 else
18263 Op = DAG.getBitcast(SVT, Op);
18264 }
18265 }
18266 }
18267
18268 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
18269 VT.getSizeInBits() / SVT.getSizeInBits());
18270 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
18271}
18272
18273// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
18274// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
18275// most two distinct vectors the same size as the result, attempt to turn this
18276// into a legal shuffle.
18277static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
18278 EVT VT = N->getValueType(0);
18279 EVT OpVT = N->getOperand(0).getValueType();
18280 int NumElts = VT.getVectorNumElements();
18281 int NumOpElts = OpVT.getVectorNumElements();
18282
18283 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
18284 SmallVector<int, 8> Mask;
18285
18286 for (SDValue Op : N->ops()) {
18287 Op = peekThroughBitcasts(Op);
18288
18289 // UNDEF nodes convert to UNDEF shuffle mask values.
18290 if (Op.isUndef()) {
18291 Mask.append((unsigned)NumOpElts, -1);
18292 continue;
18293 }
18294
18295 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
18296 return SDValue();
18297
18298 // What vector are we extracting the subvector from and at what index?
18299 SDValue ExtVec = Op.getOperand(0);
18300
18301 // We want the EVT of the original extraction to correctly scale the
18302 // extraction index.
18303 EVT ExtVT = ExtVec.getValueType();
18304 ExtVec = peekThroughBitcasts(ExtVec);
18305
18306 // UNDEF nodes convert to UNDEF shuffle mask values.
18307 if (ExtVec.isUndef()) {
18308 Mask.append((unsigned)NumOpElts, -1);
18309 continue;
18310 }
18311
18312 if (!isa<ConstantSDNode>(Op.getOperand(1)))
18313 return SDValue();
18314 int ExtIdx = Op.getConstantOperandVal(1);
18315
18316 // Ensure that we are extracting a subvector from a vector the same
18317 // size as the result.
18318 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
18319 return SDValue();
18320
18321 // Scale the subvector index to account for any bitcast.
18322 int NumExtElts = ExtVT.getVectorNumElements();
18323 if (0 == (NumExtElts % NumElts))
18324 ExtIdx /= (NumExtElts / NumElts);
18325 else if (0 == (NumElts % NumExtElts))
18326 ExtIdx *= (NumElts / NumExtElts);
18327 else
18328 return SDValue();
18329
18330 // At most we can reference 2 inputs in the final shuffle.
18331 if (SV0.isUndef() || SV0 == ExtVec) {
18332 SV0 = ExtVec;
18333 for (int i = 0; i != NumOpElts; ++i)
18334 Mask.push_back(i + ExtIdx);
18335 } else if (SV1.isUndef() || SV1 == ExtVec) {
18336 SV1 = ExtVec;
18337 for (int i = 0; i != NumOpElts; ++i)
18338 Mask.push_back(i + ExtIdx + NumElts);
18339 } else {
18340 return SDValue();
18341 }
18342 }
18343
18344 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18345 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
18346 DAG.getBitcast(VT, SV1), Mask, DAG);
18347}
18348
18349SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
18350 // If we only have one input vector, we don't need to do any concatenation.
18351 if (N->getNumOperands() == 1)
18352 return N->getOperand(0);
18353
18354 // Check if all of the operands are undefs.
18355 EVT VT = N->getValueType(0);
18356 if (ISD::allOperandsUndef(N))
18357 return DAG.getUNDEF(VT);
18358
18359 // Optimize concat_vectors where all but the first of the vectors are undef.
18360 if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
18361 return Op.isUndef();
18362 })) {
18363 SDValue In = N->getOperand(0);
18364 assert(In.getValueType().isVector() && "Must concat vectors")((In.getValueType().isVector() && "Must concat vectors"
) ? static_cast<void> (0) : __assert_fail ("In.getValueType().isVector() && \"Must concat vectors\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18364, __PRETTY_FUNCTION__))
;
18365
18366 // If the input is a concat_vectors, just make a larger concat by padding
18367 // with smaller undefs.
18368 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
18369 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
18370 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
18371 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
18372 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18373 }
18374
18375 SDValue Scalar = peekThroughOneUseBitcasts(In);
18376
18377 // concat_vectors(scalar_to_vector(scalar), undef) ->
18378 // scalar_to_vector(scalar)
18379 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
18380 Scalar.hasOneUse()) {
18381 EVT SVT = Scalar.getValueType().getVectorElementType();
18382 if (SVT == Scalar.getOperand(0).getValueType())
18383 Scalar = Scalar.getOperand(0);
18384 }
18385
18386 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
18387 if (!Scalar.getValueType().isVector()) {
18388 // If the bitcast type isn't legal, it might be a trunc of a legal type;
18389 // look through the trunc so we can still do the transform:
18390 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
18391 if (Scalar->getOpcode() == ISD::TRUNCATE &&
18392 !TLI.isTypeLegal(Scalar.getValueType()) &&
18393 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
18394 Scalar = Scalar->getOperand(0);
18395
18396 EVT SclTy = Scalar.getValueType();
18397
18398 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
18399 return SDValue();
18400
18401 // Bail out if the vector size is not a multiple of the scalar size.
18402 if (VT.getSizeInBits() % SclTy.getSizeInBits())
18403 return SDValue();
18404
18405 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
18406 if (VNTNumElms < 2)
18407 return SDValue();
18408
18409 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
18410 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
18411 return SDValue();
18412
18413 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
18414 return DAG.getBitcast(VT, Res);
18415 }
18416 }
18417
18418 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
18419 // We have already tested above for an UNDEF only concatenation.
18420 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
18421 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
18422 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
18423 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
18424 };
18425 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
18426 SmallVector<SDValue, 8> Opnds;
18427 EVT SVT = VT.getScalarType();
18428
18429 EVT MinVT = SVT;
18430 if (!SVT.isFloatingPoint()) {
18431 // If BUILD_VECTOR are from built from integer, they may have different
18432 // operand types. Get the smallest type and truncate all operands to it.
18433 bool FoundMinVT = false;
18434 for (const SDValue &Op : N->ops())
18435 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
18436 EVT OpSVT = Op.getOperand(0).getValueType();
18437 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
18438 FoundMinVT = true;
18439 }
18440 assert(FoundMinVT && "Concat vector type mismatch")((FoundMinVT && "Concat vector type mismatch") ? static_cast
<void> (0) : __assert_fail ("FoundMinVT && \"Concat vector type mismatch\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18440, __PRETTY_FUNCTION__))
;
18441 }
18442
18443 for (const SDValue &Op : N->ops()) {
18444 EVT OpVT = Op.getValueType();
18445 unsigned NumElts = OpVT.getVectorNumElements();
18446
18447 if (ISD::UNDEF == Op.getOpcode())
18448 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
18449
18450 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
18451 if (SVT.isFloatingPoint()) {
18452 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch")((SVT == OpVT.getScalarType() && "Concat vector type mismatch"
) ? static_cast<void> (0) : __assert_fail ("SVT == OpVT.getScalarType() && \"Concat vector type mismatch\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18452, __PRETTY_FUNCTION__))
;
18453 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
18454 } else {
18455 for (unsigned i = 0; i != NumElts; ++i)
18456 Opnds.push_back(
18457 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
18458 }
18459 }
18460 }
18461
18462 assert(VT.getVectorNumElements() == Opnds.size() &&((VT.getVectorNumElements() == Opnds.size() && "Concat vector type mismatch"
) ? static_cast<void> (0) : __assert_fail ("VT.getVectorNumElements() == Opnds.size() && \"Concat vector type mismatch\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18463, __PRETTY_FUNCTION__))
18463 "Concat vector type mismatch")((VT.getVectorNumElements() == Opnds.size() && "Concat vector type mismatch"
) ? static_cast<void> (0) : __assert_fail ("VT.getVectorNumElements() == Opnds.size() && \"Concat vector type mismatch\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18463, __PRETTY_FUNCTION__))
;
18464 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
18465 }
18466
18467 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
18468 if (SDValue V = combineConcatVectorOfScalars(N, DAG))
18469 return V;
18470
18471 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
18472 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
18473 if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
18474 return V;
18475
18476 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
18477 // nodes often generate nop CONCAT_VECTOR nodes.
18478 // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
18479 // place the incoming vectors at the exact same location.
18480 SDValue SingleSource = SDValue();
18481 unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
18482
18483 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
18484 SDValue Op = N->getOperand(i);
18485
18486 if (Op.isUndef())
18487 continue;
18488
18489 // Check if this is the identity extract:
18490 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
18491 return SDValue();
18492
18493 // Find the single incoming vector for the extract_subvector.
18494 if (SingleSource.getNode()) {
18495 if (Op.getOperand(0) != SingleSource)
18496 return SDValue();
18497 } else {
18498 SingleSource = Op.getOperand(0);
18499
18500 // Check the source type is the same as the type of the result.
18501 // If not, this concat may extend the vector, so we can not
18502 // optimize it away.
18503 if (SingleSource.getValueType() != N->getValueType(0))
18504 return SDValue();
18505 }
18506
18507 auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
18508 // The extract index must be constant.
18509 if (!CS)
18510 return SDValue();
18511
18512 // Check that we are reading from the identity index.
18513 unsigned IdentityIndex = i * PartNumElem;
18514 if (CS->getAPIntValue() != IdentityIndex)
18515 return SDValue();
18516 }
18517
18518 if (SingleSource.getNode())
18519 return SingleSource;
18520
18521 return SDValue();
18522}
18523
18524// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
18525// if the subvector can be sourced for free.
18526static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
18527 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
18528 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
18529 return V.getOperand(1);
18530 }
18531 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18532 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
18533 V.getOperand(0).getValueType() == SubVT &&
18534 (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
18535 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
18536 return V.getOperand(SubIdx);
18537 }
18538 return SDValue();
18539}
18540
18541static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
18542 SelectionDAG &DAG) {
18543 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18544 SDValue BinOp = Extract->getOperand(0);
18545 unsigned BinOpcode = BinOp.getOpcode();
18546 if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
18547 return SDValue();
18548
18549 EVT VecVT = BinOp.getValueType();
18550 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
18551 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
18552 return SDValue();
18553
18554 SDValue Index = Extract->getOperand(1);
18555 EVT SubVT = Extract->getValueType(0);
18556 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
18557 return SDValue();
18558
18559 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
18560 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
18561
18562 // TODO: We could handle the case where only 1 operand is being inserted by
18563 // creating an extract of the other operand, but that requires checking
18564 // number of uses and/or costs.
18565 if (!Sub0 || !Sub1)
18566 return SDValue();
18567
18568 // We are inserting both operands of the wide binop only to extract back
18569 // to the narrow vector size. Eliminate all of the insert/extract:
18570 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
18571 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
18572 BinOp->getFlags());
18573}
18574
18575/// If we are extracting a subvector produced by a wide binary operator try
18576/// to use a narrow binary operator and/or avoid concatenation and extraction.
18577static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
18578 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
18579 // some of these bailouts with other transforms.
18580
18581 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
18582 return V;
18583
18584 // The extract index must be a constant, so we can map it to a concat operand.
18585 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
18586 if (!ExtractIndexC)
18587 return SDValue();
18588
18589 // We are looking for an optionally bitcasted wide vector binary operator
18590 // feeding an extract subvector.
18591 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18592 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
18593 unsigned BOpcode = BinOp.getOpcode();
18594 if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
18595 return SDValue();
18596
18597 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
18598 // reduced to the unary fneg when it is visited, and we probably want to deal
18599 // with fneg in a target-specific way.
18600 if (BOpcode == ISD::FSUB) {
18601 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
18602 if (C && C->getValueAPF().isNegZero())
18603 return SDValue();
18604 }
18605
18606 // The binop must be a vector type, so we can extract some fraction of it.
18607 EVT WideBVT = BinOp.getValueType();
18608 if (!WideBVT.isVector())
18609 return SDValue();
18610
18611 EVT VT = Extract->getValueType(0);
18612 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
18613 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&((ExtractIndex % VT.getVectorNumElements() == 0 && "Extract index is not a multiple of the vector length."
) ? static_cast<void> (0) : __assert_fail ("ExtractIndex % VT.getVectorNumElements() == 0 && \"Extract index is not a multiple of the vector length.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18614, __PRETTY_FUNCTION__))
18614 "Extract index is not a multiple of the vector length.")((ExtractIndex % VT.getVectorNumElements() == 0 && "Extract index is not a multiple of the vector length."
) ? static_cast<void> (0) : __assert_fail ("ExtractIndex % VT.getVectorNumElements() == 0 && \"Extract index is not a multiple of the vector length.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18614, __PRETTY_FUNCTION__))
;
18615
18616 // Bail out if this is not a proper multiple width extraction.
18617 unsigned WideWidth = WideBVT.getSizeInBits();
18618 unsigned NarrowWidth = VT.getSizeInBits();
18619 if (WideWidth % NarrowWidth != 0)
18620 return SDValue();
18621
18622 // Bail out if we are extracting a fraction of a single operation. This can
18623 // occur because we potentially looked through a bitcast of the binop.
18624 unsigned NarrowingRatio = WideWidth / NarrowWidth;
18625 unsigned WideNumElts = WideBVT.getVectorNumElements();
18626 if (WideNumElts % NarrowingRatio != 0)
18627 return SDValue();
18628
18629 // Bail out if the target does not support a narrower version of the binop.
18630 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
18631 WideNumElts / NarrowingRatio);
18632 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
18633 return SDValue();
18634
18635 // If extraction is cheap, we don't need to look at the binop operands
18636 // for concat ops. The narrow binop alone makes this transform profitable.
18637 // We can't just reuse the original extract index operand because we may have
18638 // bitcasted.
18639 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
18640 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
18641 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
18642 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
18643 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
18644 SDLoc DL(Extract);
18645 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
18646 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18647 BinOp.getOperand(0), NewExtIndex);
18648 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18649 BinOp.getOperand(1), NewExtIndex);
18650 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
18651 BinOp.getNode()->getFlags());
18652 return DAG.getBitcast(VT, NarrowBinOp);
18653 }
18654
18655 // Only handle the case where we are doubling and then halving. A larger ratio
18656 // may require more than two narrow binops to replace the wide binop.
18657 if (NarrowingRatio != 2)
18658 return SDValue();
18659
18660 // TODO: The motivating case for this transform is an x86 AVX1 target. That
18661 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
18662 // flavors, but no other 256-bit integer support. This could be extended to
18663 // handle any binop, but that may require fixing/adding other folds to avoid
18664 // codegen regressions.
18665 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
18666 return SDValue();
18667
18668 // We need at least one concatenation operation of a binop operand to make
18669 // this transform worthwhile. The concat must double the input vector sizes.
18670 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
18671 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
18672 return V.getOperand(ConcatOpNum);
18673 return SDValue();
18674 };
18675 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
18676 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
18677
18678 if (SubVecL || SubVecR) {
18679 // If a binop operand was not the result of a concat, we must extract a
18680 // half-sized operand for our new narrow binop:
18681 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
18682 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
18683 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
18684 SDLoc DL(Extract);
18685 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
18686 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
18687 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18688 BinOp.getOperand(0), IndexC);
18689
18690 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
18691 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18692 BinOp.getOperand(1), IndexC);
18693
18694 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
18695 return DAG.getBitcast(VT, NarrowBinOp);
18696 }
18697
18698 return SDValue();
18699}
18700
18701/// If we are extracting a subvector from a wide vector load, convert to a
18702/// narrow load to eliminate the extraction:
18703/// (extract_subvector (load wide vector)) --> (load narrow vector)
18704static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
18705 // TODO: Add support for big-endian. The offset calculation must be adjusted.
18706 if (DAG.getDataLayout().isBigEndian())
18707 return SDValue();
18708
18709 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
18710 auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
18711 if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
18712 !ExtIdx)
18713 return SDValue();
18714
18715 // Allow targets to opt-out.
18716 EVT VT = Extract->getValueType(0);
18717
18718 // We can only create byte sized loads.
18719 if (!VT.isByteSized())
18720 return SDValue();
18721
18722 unsigned Index = ExtIdx->getZExtValue();
18723 unsigned NumElts = VT.getVectorNumElements();
18724
18725 // If the index is a multiple of the extract element count, we can offset the
18726 // address by the store size multiplied by the subvector index. Otherwise if
18727 // the scalar type is byte sized, we can just use the index multiplied by
18728 // the element size in bytes as the offset.
18729 unsigned Offset;
18730 if (Index % NumElts == 0)
18731 Offset = (Index / NumElts) * VT.getStoreSize();
18732 else if (VT.getScalarType().isByteSized())
18733 Offset = Index * VT.getScalarType().getStoreSize();
18734 else
18735 return SDValue();
18736
18737 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18738 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
18739 return SDValue();
18740
18741 // The narrow load will be offset from the base address of the old load if
18742 // we are extracting from something besides index 0 (little-endian).
18743 SDLoc DL(Extract);
18744 SDValue BaseAddr = Ld->getBasePtr();
18745
18746 // TODO: Use "BaseIndexOffset" to make this more effective.
18747 SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
18748 MachineFunction &MF = DAG.getMachineFunction();
18749 MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
18750 VT.getStoreSize());
18751 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
18752 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
18753 return NewLd;
18754}
18755
18756SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
18757 EVT NVT = N->getValueType(0);
18758 SDValue V = N->getOperand(0);
18759
18760 // Extract from UNDEF is UNDEF.
18761 if (V.isUndef())
18762 return DAG.getUNDEF(NVT);
18763
18764 if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
18765 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
18766 return NarrowLoad;
18767
18768 // Combine an extract of an extract into a single extract_subvector.
18769 // ext (ext X, C), 0 --> ext X, C
18770 SDValue Index = N->getOperand(1);
18771 if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18772 V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) {
18773 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
18774 V.getConstantOperandVal(1)) &&
18775 TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
18776 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
18777 V.getOperand(1));
18778 }
18779 }
18780
18781 // Try to move vector bitcast after extract_subv by scaling extraction index:
18782 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
18783 if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
18784 V.getOperand(0).getValueType().isVector()) {
18785 SDValue SrcOp = V.getOperand(0);
18786 EVT SrcVT = SrcOp.getValueType();
18787 unsigned SrcNumElts = SrcVT.getVectorNumElements();
18788 unsigned DestNumElts = V.getValueType().getVectorNumElements();
18789 if ((SrcNumElts % DestNumElts) == 0) {
18790 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
18791 unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
18792 EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
18793 NewExtNumElts);
18794 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
18795 unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
18796 SDLoc DL(N);
18797 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
18798 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
18799 V.getOperand(0), NewIndex);
18800 return DAG.getBitcast(NVT, NewExtract);
18801 }
18802 }
18803 if ((DestNumElts % SrcNumElts) == 0) {
18804 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
18805 if ((NVT.getVectorNumElements() % DestSrcRatio) == 0) {
18806 unsigned NewExtNumElts = NVT.getVectorNumElements() / DestSrcRatio;
18807 EVT ScalarVT = SrcVT.getScalarType();
18808 if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0) {
18809 SDLoc DL(N);
18810 unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio;
18811 EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(),
18812 ScalarVT, NewExtNumElts);
18813 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
18814 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
18815 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
18816 V.getOperand(0), NewIndex);
18817 return DAG.getBitcast(NVT, NewExtract);
18818 }
18819 if (NewExtNumElts == 1 &&
18820 TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
18821 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
18822 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
18823 V.getOperand(0), NewIndex);
18824 return DAG.getBitcast(NVT, NewExtract);
18825 }
18826 }
18827 }
18828 }
18829 }
18830
18831 if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index)) {
18832 EVT ConcatSrcVT = V.getOperand(0).getValueType();
18833 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&((ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType
() && "Concat and extract subvector do not change element type"
) ? static_cast<void> (0) : __assert_fail ("ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && \"Concat and extract subvector do not change element type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18834, __PRETTY_FUNCTION__))
18834 "Concat and extract subvector do not change element type")((ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType
() && "Concat and extract subvector do not change element type"
) ? static_cast<void> (0) : __assert_fail ("ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && \"Concat and extract subvector do not change element type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18834, __PRETTY_FUNCTION__))
;
18835
18836 unsigned ExtIdx = N->getConstantOperandVal(1);
18837 unsigned ExtNumElts = NVT.getVectorNumElements();
18838 assert(ExtIdx % ExtNumElts == 0 &&((ExtIdx % ExtNumElts == 0 && "Extract index is not a multiple of the input vector length."
) ? static_cast<void> (0) : __assert_fail ("ExtIdx % ExtNumElts == 0 && \"Extract index is not a multiple of the input vector length.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18839, __PRETTY_FUNCTION__))
18839 "Extract index is not a multiple of the input vector length.")((ExtIdx % ExtNumElts == 0 && "Extract index is not a multiple of the input vector length."
) ? static_cast<void> (0) : __assert_fail ("ExtIdx % ExtNumElts == 0 && \"Extract index is not a multiple of the input vector length.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18839, __PRETTY_FUNCTION__))
;
18840
18841 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorNumElements();
18842 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
18843
18844 // If the concatenated source types match this extract, it's a direct
18845 // simplification:
18846 // extract_subvec (concat V1, V2, ...), i --> Vi
18847 if (ConcatSrcNumElts == ExtNumElts)
18848 return V.getOperand(ConcatOpIdx);
18849
18850 // If the concatenated source vectors are a multiple length of this extract,
18851 // then extract a fraction of one of those source vectors directly from a
18852 // concat operand. Example:
18853 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
18854 // v2i8 extract_subvec v8i8 Y, 6
18855 if (ConcatSrcNumElts % ExtNumElts == 0) {
18856 SDLoc DL(N);
18857 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
18858 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&((NewExtIdx + ExtNumElts <= ConcatSrcNumElts && "Trying to extract from >1 concat operand?"
) ? static_cast<void> (0) : __assert_fail ("NewExtIdx + ExtNumElts <= ConcatSrcNumElts && \"Trying to extract from >1 concat operand?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18859, __PRETTY_FUNCTION__))
18859 "Trying to extract from >1 concat operand?")((NewExtIdx + ExtNumElts <= ConcatSrcNumElts && "Trying to extract from >1 concat operand?"
) ? static_cast<void> (0) : __assert_fail ("NewExtIdx + ExtNumElts <= ConcatSrcNumElts && \"Trying to extract from >1 concat operand?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18859, __PRETTY_FUNCTION__))
;
18860 assert(NewExtIdx % ExtNumElts == 0 &&((NewExtIdx % ExtNumElts == 0 && "Extract index is not a multiple of the input vector length."
) ? static_cast<void> (0) : __assert_fail ("NewExtIdx % ExtNumElts == 0 && \"Extract index is not a multiple of the input vector length.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18861, __PRETTY_FUNCTION__))
18861 "Extract index is not a multiple of the input vector length.")((NewExtIdx % ExtNumElts == 0 && "Extract index is not a multiple of the input vector length."
) ? static_cast<void> (0) : __assert_fail ("NewExtIdx % ExtNumElts == 0 && \"Extract index is not a multiple of the input vector length.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 18861, __PRETTY_FUNCTION__))
;
18862 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
18863 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
18864 V.getOperand(ConcatOpIdx), NewIndexC);
18865 }
18866 }
18867
18868 V = peekThroughBitcasts(V);
18869
18870 // If the input is a build vector. Try to make a smaller build vector.
18871 if (V.getOpcode() == ISD::BUILD_VECTOR) {
18872 if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
18873 EVT InVT = V.getValueType();
18874 unsigned ExtractSize = NVT.getSizeInBits();
18875 unsigned EltSize = InVT.getScalarSizeInBits();
18876 // Only do this if we won't split any elements.
18877 if (ExtractSize % EltSize == 0) {
18878 unsigned NumElems = ExtractSize / EltSize;
18879 EVT EltVT = InVT.getVectorElementType();
18880 EVT ExtractVT = NumElems == 1 ? EltVT
18881 : EVT::getVectorVT(*DAG.getContext(),
18882 EltVT, NumElems);
18883 if ((Level < AfterLegalizeDAG ||
18884 (NumElems == 1 ||
18885 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
18886 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
18887 unsigned IdxVal = IdxC->getZExtValue();
18888 IdxVal *= NVT.getScalarSizeInBits();
18889 IdxVal /= EltSize;
18890
18891 if (NumElems == 1) {
18892 SDValue Src = V->getOperand(IdxVal);
18893 if (EltVT != Src.getValueType())
18894 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
18895 return DAG.getBitcast(NVT, Src);
18896 }
18897
18898 // Extract the pieces from the original build_vector.
18899 SDValue BuildVec = DAG.getBuildVector(
18900 ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
18901 return DAG.getBitcast(NVT, BuildVec);
18902 }
18903 }
18904 }
18905 }
18906
18907 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
18908 // Handle only simple case where vector being inserted and vector
18909 // being extracted are of same size.
18910 EVT SmallVT = V.getOperand(1).getValueType();
18911 if (!NVT.bitsEq(SmallVT))
18912 return SDValue();
18913
18914 // Only handle cases where both indexes are constants.
18915 auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
18916 auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
18917 if (InsIdx && ExtIdx) {
18918 // Combine:
18919 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
18920 // Into:
18921 // indices are equal or bit offsets are equal => V1
18922 // otherwise => (extract_subvec V1, ExtIdx)
18923 if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
18924 ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
18925 return DAG.getBitcast(NVT, V.getOperand(1));
18926 return DAG.getNode(
18927 ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
18928 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
18929 Index);
18930 }
18931 }
18932
18933 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
18934 return NarrowBOp;
18935
18936 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18937 return SDValue(N, 0);
18938
18939 return SDValue();
18940}
18941
18942/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
18943/// followed by concatenation. Narrow vector ops may have better performance
18944/// than wide ops, and this can unlock further narrowing of other vector ops.
18945/// Targets can invert this transform later if it is not profitable.
18946static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
18947 SelectionDAG &DAG) {
18948 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
18949 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
18950 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
18951 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
18952 return SDValue();
18953
18954 // Split the wide shuffle mask into halves. Any mask element that is accessing
18955 // operand 1 is offset down to account for narrowing of the vectors.
18956 ArrayRef<int> Mask = Shuf->getMask();
18957 EVT VT = Shuf->getValueType(0);
18958 unsigned NumElts = VT.getVectorNumElements();
18959 unsigned HalfNumElts = NumElts / 2;
18960 SmallVector<int, 16> Mask0(HalfNumElts, -1);
18961 SmallVector<int, 16> Mask1(HalfNumElts, -1);
18962 for (unsigned i = 0; i != NumElts; ++i) {
18963 if (Mask[i] == -1)
18964 continue;
18965 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
18966 if (i < HalfNumElts)
18967 Mask0[i] = M;
18968 else
18969 Mask1[i - HalfNumElts] = M;
18970 }
18971
18972 // Ask the target if this is a valid transform.
18973 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18974 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
18975 HalfNumElts);
18976 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
18977 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
18978 return SDValue();
18979
18980 // shuffle (concat X, undef), (concat Y, undef), Mask -->
18981 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
18982 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
18983 SDLoc DL(Shuf);
18984 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
18985 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
18986 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
18987}
18988
18989// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
18990// or turn a shuffle of a single concat into simpler shuffle then concat.
18991static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
18992 EVT VT = N->getValueType(0);
18993 unsigned NumElts = VT.getVectorNumElements();
18994
18995 SDValue N0 = N->getOperand(0);
18996 SDValue N1 = N->getOperand(1);
18997 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18998 ArrayRef<int> Mask = SVN->getMask();
18999
19000 SmallVector<SDValue, 4> Ops;
19001 EVT ConcatVT = N0.getOperand(0).getValueType();
19002 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
19003 unsigned NumConcats = NumElts / NumElemsPerConcat;
19004
19005 auto IsUndefMaskElt = [](int i) { return i == -1; };
19006
19007 // Special case: shuffle(concat(A,B)) can be more efficiently represented
19008 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
19009 // half vector elements.
19010 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
19011 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
19012 IsUndefMaskElt)) {
19013 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
19014 N0.getOperand(1),
19015 Mask.slice(0, NumElemsPerConcat));
19016 N1 = DAG.getUNDEF(ConcatVT);
19017 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
19018 }
19019
19020 // Look at every vector that's inserted. We're looking for exact
19021 // subvector-sized copies from a concatenated vector
19022 for (unsigned I = 0; I != NumConcats; ++I) {
19023 unsigned Begin = I * NumElemsPerConcat;
19024 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
19025
19026 // Make sure we're dealing with a copy.
19027 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
19028 Ops.push_back(DAG.getUNDEF(ConcatVT));
19029 continue;
19030 }
19031
19032 int OpIdx = -1;
19033 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
19034 if (IsUndefMaskElt(SubMask[i]))
19035 continue;
19036 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
19037 return SDValue();
19038 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
19039 if (0 <= OpIdx && EltOpIdx != OpIdx)
19040 return SDValue();
19041 OpIdx = EltOpIdx;
19042 }
19043 assert(0 <= OpIdx && "Unknown concat_vectors op")((0 <= OpIdx && "Unknown concat_vectors op") ? static_cast
<void> (0) : __assert_fail ("0 <= OpIdx && \"Unknown concat_vectors op\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19043, __PRETTY_FUNCTION__))
;
19044
19045 if (OpIdx < (int)N0.getNumOperands())
19046 Ops.push_back(N0.getOperand(OpIdx));
19047 else
19048 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
19049 }
19050
19051 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19052}
19053
19054// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
19055// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
19056//
19057// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
19058// a simplification in some sense, but it isn't appropriate in general: some
19059// BUILD_VECTORs are substantially cheaper than others. The general case
19060// of a BUILD_VECTOR requires inserting each element individually (or
19061// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
19062// all constants is a single constant pool load. A BUILD_VECTOR where each
19063// element is identical is a splat. A BUILD_VECTOR where most of the operands
19064// are undef lowers to a small number of element insertions.
19065//
19066// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
19067// We don't fold shuffles where one side is a non-zero constant, and we don't
19068// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
19069// non-constant operands. This seems to work out reasonably well in practice.
19070static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
19071 SelectionDAG &DAG,
19072 const TargetLowering &TLI) {
19073 EVT VT = SVN->getValueType(0);
19074 unsigned NumElts = VT.getVectorNumElements();
19075 SDValue N0 = SVN->getOperand(0);
19076 SDValue N1 = SVN->getOperand(1);
19077
19078 if (!N0->hasOneUse())
19079 return SDValue();
19080
19081 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
19082 // discussed above.
19083 if (!N1.isUndef()) {
19084 if (!N1->hasOneUse())
19085 return SDValue();
19086
19087 bool N0AnyConst = isAnyConstantBuildVector(N0);
19088 bool N1AnyConst = isAnyConstantBuildVector(N1);
19089 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
19090 return SDValue();
19091 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
19092 return SDValue();
19093 }
19094
19095 // If both inputs are splats of the same value then we can safely merge this
19096 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
19097 bool IsSplat = false;
19098 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
19099 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
19100 if (BV0 && BV1)
19101 if (SDValue Splat0 = BV0->getSplatValue())
19102 IsSplat = (Splat0 == BV1->getSplatValue());
19103
19104 SmallVector<SDValue, 8> Ops;
19105 SmallSet<SDValue, 16> DuplicateOps;
19106 for (int M : SVN->getMask()) {
19107 SDValue Op = DAG.getUNDEF(VT.getScalarType());
19108 if (M >= 0) {
19109 int Idx = M < (int)NumElts ? M : M - NumElts;
19110 SDValue &S = (M < (int)NumElts ? N0 : N1);
19111 if (S.getOpcode() == ISD::BUILD_VECTOR) {
19112 Op = S.getOperand(Idx);
19113 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19114 SDValue Op0 = S.getOperand(0);
19115 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
19116 } else {
19117 // Operand can't be combined - bail out.
19118 return SDValue();
19119 }
19120 }
19121
19122 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
19123 // generating a splat; semantically, this is fine, but it's likely to
19124 // generate low-quality code if the target can't reconstruct an appropriate
19125 // shuffle.
19126 if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
19127 if (!IsSplat && !DuplicateOps.insert(Op).second)
19128 return SDValue();
19129
19130 Ops.push_back(Op);
19131 }
19132
19133 // BUILD_VECTOR requires all inputs to be of the same type, find the
19134 // maximum type and extend them all.
19135 EVT SVT = VT.getScalarType();
19136 if (SVT.isInteger())
19137 for (SDValue &Op : Ops)
19138 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
19139 if (SVT != VT.getScalarType())
19140 for (SDValue &Op : Ops)
19141 Op = TLI.isZExtFree(Op.getValueType(), SVT)
19142 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
19143 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
19144 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
19145}
19146
19147// Match shuffles that can be converted to any_vector_extend_in_reg.
19148// This is often generated during legalization.
19149// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
19150// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
19151static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
19152 SelectionDAG &DAG,
19153 const TargetLowering &TLI,
19154 bool LegalOperations) {
19155 EVT VT = SVN->getValueType(0);
19156 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
19157
19158 // TODO Add support for big-endian when we have a test case.
19159 if (!VT.isInteger() || IsBigEndian)
19160 return SDValue();
19161
19162 unsigned NumElts = VT.getVectorNumElements();
19163 unsigned EltSizeInBits = VT.getScalarSizeInBits();
19164 ArrayRef<int> Mask = SVN->getMask();
19165 SDValue N0 = SVN->getOperand(0);
19166
19167 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
19168 auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
19169 for (unsigned i = 0; i != NumElts; ++i) {
19170 if (Mask[i] < 0)
19171 continue;
19172 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
19173 continue;
19174 return false;
19175 }
19176 return true;
19177 };
19178
19179 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
19180 // power-of-2 extensions as they are the most likely.
19181 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
19182 // Check for non power of 2 vector sizes
19183 if (NumElts % Scale != 0)
19184 continue;
19185 if (!isAnyExtend(Scale))
19186 continue;
19187
19188 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
19189 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
19190 // Never create an illegal type. Only create unsupported operations if we
19191 // are pre-legalization.
19192 if (TLI.isTypeLegal(OutVT))
19193 if (!LegalOperations ||
19194 TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
19195 return DAG.getBitcast(VT,
19196 DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
19197 SDLoc(SVN), OutVT, N0));
19198 }
19199
19200 return SDValue();
19201}
19202
19203// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
19204// each source element of a large type into the lowest elements of a smaller
19205// destination type. This is often generated during legalization.
19206// If the source node itself was a '*_extend_vector_inreg' node then we should
19207// then be able to remove it.
19208static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
19209 SelectionDAG &DAG) {
19210 EVT VT = SVN->getValueType(0);
19211 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
19212
19213 // TODO Add support for big-endian when we have a test case.
19214 if (!VT.isInteger() || IsBigEndian)
19215 return SDValue();
19216
19217 SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
19218
19219 unsigned Opcode = N0.getOpcode();
19220 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
19221 Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
19222 Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
19223 return SDValue();
19224
19225 SDValue N00 = N0.getOperand(0);
19226 ArrayRef<int> Mask = SVN->getMask();
19227 unsigned NumElts = VT.getVectorNumElements();
19228 unsigned EltSizeInBits = VT.getScalarSizeInBits();
19229 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
19230 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
19231
19232 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
19233 return SDValue();
19234 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
19235
19236 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
19237 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
19238 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
19239 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
19240 for (unsigned i = 0; i != NumElts; ++i) {
19241 if (Mask[i] < 0)
19242 continue;
19243 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
19244 continue;
19245 return false;
19246 }
19247 return true;
19248 };
19249
19250 // At the moment we just handle the case where we've truncated back to the
19251 // same size as before the extension.
19252 // TODO: handle more extension/truncation cases as cases arise.
19253 if (EltSizeInBits != ExtSrcSizeInBits)
19254 return SDValue();
19255
19256 // We can remove *extend_vector_inreg only if the truncation happens at
19257 // the same scale as the extension.
19258 if (isTruncate(ExtScale))
19259 return DAG.getBitcast(VT, N00);
19260
19261 return SDValue();
19262}
19263
19264// Combine shuffles of splat-shuffles of the form:
19265// shuffle (shuffle V, undef, splat-mask), undef, M
19266// If splat-mask contains undef elements, we need to be careful about
19267// introducing undef's in the folded mask which are not the result of composing
19268// the masks of the shuffles.
19269static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
19270 SelectionDAG &DAG) {
19271 if (!Shuf->getOperand(1).isUndef())
19272 return SDValue();
19273 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
19274 if (!Splat || !Splat->isSplat())
19275 return SDValue();
19276
19277 ArrayRef<int> ShufMask = Shuf->getMask();
19278 ArrayRef<int> SplatMask = Splat->getMask();
19279 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch")((ShufMask.size() == SplatMask.size() && "Mask length mismatch"
) ? static_cast<void> (0) : __assert_fail ("ShufMask.size() == SplatMask.size() && \"Mask length mismatch\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19279, __PRETTY_FUNCTION__))
;
19280
19281 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
19282 // every undef mask element in the splat-shuffle has a corresponding undef
19283 // element in the user-shuffle's mask or if the composition of mask elements
19284 // would result in undef.
19285 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
19286 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
19287 // In this case it is not legal to simplify to the splat-shuffle because we
19288 // may be exposing the users of the shuffle an undef element at index 1
19289 // which was not there before the combine.
19290 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
19291 // In this case the composition of masks yields SplatMask, so it's ok to
19292 // simplify to the splat-shuffle.
19293 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
19294 // In this case the composed mask includes all undef elements of SplatMask
19295 // and in addition sets element zero to undef. It is safe to simplify to
19296 // the splat-shuffle.
19297 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
19298 ArrayRef<int> SplatMask) {
19299 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
19300 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
19301 SplatMask[UserMask[i]] != -1)
19302 return false;
19303 return true;
19304 };
19305 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
19306 return Shuf->getOperand(0);
19307
19308 // Create a new shuffle with a mask that is composed of the two shuffles'
19309 // masks.
19310 SmallVector<int, 32> NewMask;
19311 for (int Idx : ShufMask)
19312 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
19313
19314 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
19315 Splat->getOperand(0), Splat->getOperand(1),
19316 NewMask);
19317}
19318
19319/// Combine shuffle of shuffle of the form:
19320/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
19321static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
19322 SelectionDAG &DAG) {
19323 if (!OuterShuf->getOperand(1).isUndef())
19324 return SDValue();
19325 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
19326 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
19327 return SDValue();
19328
19329 ArrayRef<int> OuterMask = OuterShuf->getMask();
19330 ArrayRef<int> InnerMask = InnerShuf->getMask();
19331 unsigned NumElts = OuterMask.size();
19332 assert(NumElts == InnerMask.size() && "Mask length mismatch")((NumElts == InnerMask.size() && "Mask length mismatch"
) ? static_cast<void> (0) : __assert_fail ("NumElts == InnerMask.size() && \"Mask length mismatch\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19332, __PRETTY_FUNCTION__))
;
19333 SmallVector<int, 32> CombinedMask(NumElts, -1);
19334 int SplatIndex = -1;
19335 for (unsigned i = 0; i != NumElts; ++i) {
19336 // Undef lanes remain undef.
19337 int OuterMaskElt = OuterMask[i];
19338 if (OuterMaskElt == -1)
19339 continue;
19340
19341 // Peek through the shuffle masks to get the underlying source element.
19342 int InnerMaskElt = InnerMask[OuterMaskElt];
19343 if (InnerMaskElt == -1)
19344 continue;
19345
19346 // Initialize the splatted element.
19347 if (SplatIndex == -1)
19348 SplatIndex = InnerMaskElt;
19349
19350 // Non-matching index - this is not a splat.
19351 if (SplatIndex != InnerMaskElt)
19352 return SDValue();
19353
19354 CombinedMask[i] = InnerMaskElt;
19355 }
19356 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||(((all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex
(CombinedMask) != -1) && "Expected a splat mask") ? static_cast
<void> (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19358, __PRETTY_FUNCTION__))
19357 getSplatIndex(CombinedMask) != -1) &&(((all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex
(CombinedMask) != -1) && "Expected a splat mask") ? static_cast
<void> (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19358, __PRETTY_FUNCTION__))
19358 "Expected a splat mask")(((all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex
(CombinedMask) != -1) && "Expected a splat mask") ? static_cast
<void> (0) : __assert_fail ("(all_of(CombinedMask, [](int M) { return M == -1; }) || getSplatIndex(CombinedMask) != -1) && \"Expected a splat mask\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19358, __PRETTY_FUNCTION__))
;
19359
19360 // TODO: The transform may be a win even if the mask is not legal.
19361 EVT VT = OuterShuf->getValueType(0);
19362 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types")((VT == InnerShuf->getValueType(0) && "Expected matching shuffle types"
) ? static_cast<void> (0) : __assert_fail ("VT == InnerShuf->getValueType(0) && \"Expected matching shuffle types\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19362, __PRETTY_FUNCTION__))
;
19363 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
19364 return SDValue();
19365
19366 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
19367 InnerShuf->getOperand(1), CombinedMask);
19368}
19369
19370/// If the shuffle mask is taking exactly one element from the first vector
19371/// operand and passing through all other elements from the second vector
19372/// operand, return the index of the mask element that is choosing an element
19373/// from the first operand. Otherwise, return -1.
19374static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
19375 int MaskSize = Mask.size();
19376 int EltFromOp0 = -1;
19377 // TODO: This does not match if there are undef elements in the shuffle mask.
19378 // Should we ignore undefs in the shuffle mask instead? The trade-off is
19379 // removing an instruction (a shuffle), but losing the knowledge that some
19380 // vector lanes are not needed.
19381 for (int i = 0; i != MaskSize; ++i) {
19382 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
19383 // We're looking for a shuffle of exactly one element from operand 0.
19384 if (EltFromOp0 != -1)
19385 return -1;
19386 EltFromOp0 = i;
19387 } else if (Mask[i] != i + MaskSize) {
19388 // Nothing from operand 1 can change lanes.
19389 return -1;
19390 }
19391 }
19392 return EltFromOp0;
19393}
19394
19395/// If a shuffle inserts exactly one element from a source vector operand into
19396/// another vector operand and we can access the specified element as a scalar,
19397/// then we can eliminate the shuffle.
19398static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
19399 SelectionDAG &DAG) {
19400 // First, check if we are taking one element of a vector and shuffling that
19401 // element into another vector.
19402 ArrayRef<int> Mask = Shuf->getMask();
19403 SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
19404 SDValue Op0 = Shuf->getOperand(0);
19405 SDValue Op1 = Shuf->getOperand(1);
19406 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
19407 if (ShufOp0Index == -1) {
19408 // Commute mask and check again.
19409 ShuffleVectorSDNode::commuteMask(CommutedMask);
19410 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
19411 if (ShufOp0Index == -1)
19412 return SDValue();
19413 // Commute operands to match the commuted shuffle mask.
19414 std::swap(Op0, Op1);
19415 Mask = CommutedMask;
19416 }
19417
19418 // The shuffle inserts exactly one element from operand 0 into operand 1.
19419 // Now see if we can access that element as a scalar via a real insert element
19420 // instruction.
19421 // TODO: We can try harder to locate the element as a scalar. Examples: it
19422 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
19423 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&((Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] <
(int)Mask.size() && "Shuffle mask value must be from operand 0"
) ? static_cast<void> (0) : __assert_fail ("Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && \"Shuffle mask value must be from operand 0\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19424, __PRETTY_FUNCTION__))
19424 "Shuffle mask value must be from operand 0")((Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] <
(int)Mask.size() && "Shuffle mask value must be from operand 0"
) ? static_cast<void> (0) : __assert_fail ("Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && \"Shuffle mask value must be from operand 0\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19424, __PRETTY_FUNCTION__))
;
19425 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
19426 return SDValue();
19427
19428 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
19429 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
19430 return SDValue();
19431
19432 // There's an existing insertelement with constant insertion index, so we
19433 // don't need to check the legality/profitability of a replacement operation
19434 // that differs at most in the constant value. The target should be able to
19435 // lower any of those in a similar way. If not, legalization will expand this
19436 // to a scalar-to-vector plus shuffle.
19437 //
19438 // Note that the shuffle may move the scalar from the position that the insert
19439 // element used. Therefore, our new insert element occurs at the shuffle's
19440 // mask index value, not the insert's index value.
19441 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
19442 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
19443 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
19444 Op1, Op0.getOperand(1), NewInsIndex);
19445}
19446
19447/// If we have a unary shuffle of a shuffle, see if it can be folded away
19448/// completely. This has the potential to lose undef knowledge because the first
19449/// shuffle may not have an undef mask element where the second one does. So
19450/// only call this after doing simplifications based on demanded elements.
19451static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
19452 // shuf (shuf0 X, Y, Mask0), undef, Mask
19453 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
19454 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
19455 return SDValue();
19456
19457 ArrayRef<int> Mask = Shuf->getMask();
19458 ArrayRef<int> Mask0 = Shuf0->getMask();
19459 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
19460 // Ignore undef elements.
19461 if (Mask[i] == -1)
19462 continue;
19463 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value")((Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value"
) ? static_cast<void> (0) : __assert_fail ("Mask[i] >= 0 && Mask[i] < e && \"Unexpected shuffle mask value\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19463, __PRETTY_FUNCTION__))
;
19464
19465 // Is the element of the shuffle operand chosen by this shuffle the same as
19466 // the element chosen by the shuffle operand itself?
19467 if (Mask0[Mask[i]] != Mask0[i])
19468 return SDValue();
19469 }
19470 // Every element of this shuffle is identical to the result of the previous
19471 // shuffle, so we can replace this value.
19472 return Shuf->getOperand(0);
19473}
19474
19475SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
19476 EVT VT = N->getValueType(0);
19477 unsigned NumElts = VT.getVectorNumElements();
19478
19479 SDValue N0 = N->getOperand(0);
19480 SDValue N1 = N->getOperand(1);
19481
19482 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG")((N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"
) ? static_cast<void> (0) : __assert_fail ("N0.getValueType() == VT && \"Vector shuffle must be normalized in DAG\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19482, __PRETTY_FUNCTION__))
;
19483
19484 // Canonicalize shuffle undef, undef -> undef
19485 if (N0.isUndef() && N1.isUndef())
19486 return DAG.getUNDEF(VT);
19487
19488 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
19489
19490 // Canonicalize shuffle v, v -> v, undef
19491 if (N0 == N1) {
19492 SmallVector<int, 8> NewMask;
19493 for (unsigned i = 0; i != NumElts; ++i) {
19494 int Idx = SVN->getMaskElt(i);
19495 if (Idx >= (int)NumElts) Idx -= NumElts;
19496 NewMask.push_back(Idx);
19497 }
19498 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
19499 }
19500
19501 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
19502 if (N0.isUndef())
19503 return DAG.getCommutedVectorShuffle(*SVN);
19504
19505 // Remove references to rhs if it is undef
19506 if (N1.isUndef()) {
19507 bool Changed = false;
19508 SmallVector<int, 8> NewMask;
19509 for (unsigned i = 0; i != NumElts; ++i) {
19510 int Idx = SVN->getMaskElt(i);
19511 if (Idx >= (int)NumElts) {
19512 Idx = -1;
19513 Changed = true;
19514 }
19515 NewMask.push_back(Idx);
19516 }
19517 if (Changed)
19518 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
19519 }
19520
19521 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
19522 return InsElt;
19523
19524 // A shuffle of a single vector that is a splatted value can always be folded.
19525 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
19526 return V;
19527
19528 if (SDValue V = formSplatFromShuffles(SVN, DAG))
19529 return V;
19530
19531 // If it is a splat, check if the argument vector is another splat or a
19532 // build_vector.
19533 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
19534 int SplatIndex = SVN->getSplatIndex();
19535 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
19536 TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
19537 // splat (vector_bo L, R), Index -->
19538 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
19539 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
19540 SDLoc DL(N);
19541 EVT EltVT = VT.getScalarType();
19542 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
19543 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
19544 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
19545 SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
19546 N0.getNode()->getFlags());
19547 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
19548 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
19549 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
19550 }
19551
19552 // If this is a bit convert that changes the element type of the vector but
19553 // not the number of vector elements, look through it. Be careful not to
19554 // look though conversions that change things like v4f32 to v2f64.
19555 SDNode *V = N0.getNode();
19556 if (V->getOpcode() == ISD::BITCAST) {
19557 SDValue ConvInput = V->getOperand(0);
19558 if (ConvInput.getValueType().isVector() &&
19559 ConvInput.getValueType().getVectorNumElements() == NumElts)
19560 V = ConvInput.getNode();
19561 }
19562
19563 if (V->getOpcode() == ISD::BUILD_VECTOR) {
19564 assert(V->getNumOperands() == NumElts &&((V->getNumOperands() == NumElts && "BUILD_VECTOR has wrong number of operands"
) ? static_cast<void> (0) : __assert_fail ("V->getNumOperands() == NumElts && \"BUILD_VECTOR has wrong number of operands\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19565, __PRETTY_FUNCTION__))
19565 "BUILD_VECTOR has wrong number of operands")((V->getNumOperands() == NumElts && "BUILD_VECTOR has wrong number of operands"
) ? static_cast<void> (0) : __assert_fail ("V->getNumOperands() == NumElts && \"BUILD_VECTOR has wrong number of operands\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19565, __PRETTY_FUNCTION__))
;
19566 SDValue Base;
19567 bool AllSame = true;
19568 for (unsigned i = 0; i != NumElts; ++i) {
19569 if (!V->getOperand(i).isUndef()) {
19570 Base = V->getOperand(i);
19571 break;
19572 }
19573 }
19574 // Splat of <u, u, u, u>, return <u, u, u, u>
19575 if (!Base.getNode())
19576 return N0;
19577 for (unsigned i = 0; i != NumElts; ++i) {
19578 if (V->getOperand(i) != Base) {
19579 AllSame = false;
19580 break;
19581 }
19582 }
19583 // Splat of <x, x, x, x>, return <x, x, x, x>
19584 if (AllSame)
19585 return N0;
19586
19587 // Canonicalize any other splat as a build_vector.
19588 SDValue Splatted = V->getOperand(SplatIndex);
19589 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
19590 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
19591
19592 // We may have jumped through bitcasts, so the type of the
19593 // BUILD_VECTOR may not match the type of the shuffle.
19594 if (V->getValueType(0) != VT)
19595 NewBV = DAG.getBitcast(VT, NewBV);
19596 return NewBV;
19597 }
19598 }
19599
19600 // Simplify source operands based on shuffle mask.
19601 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
19602 return SDValue(N, 0);
19603
19604 // This is intentionally placed after demanded elements simplification because
19605 // it could eliminate knowledge of undef elements created by this shuffle.
19606 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
19607 return ShufOp;
19608
19609 // Match shuffles that can be converted to any_vector_extend_in_reg.
19610 if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
19611 return V;
19612
19613 // Combine "truncate_vector_in_reg" style shuffles.
19614 if (SDValue V = combineTruncationShuffle(SVN, DAG))
19615 return V;
19616
19617 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
19618 Level < AfterLegalizeVectorOps &&
19619 (N1.isUndef() ||
19620 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
19621 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
19622 if (SDValue V = partitionShuffleOfConcats(N, DAG))
19623 return V;
19624 }
19625
19626 // A shuffle of a concat of the same narrow vector can be reduced to use
19627 // only low-half elements of a concat with undef:
19628 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
19629 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
19630 N0.getNumOperands() == 2 &&
19631 N0.getOperand(0) == N0.getOperand(1)) {
19632 int HalfNumElts = (int)NumElts / 2;
19633 SmallVector<int, 8> NewMask;
19634 for (unsigned i = 0; i != NumElts; ++i) {
19635 int Idx = SVN->getMaskElt(i);
19636 if (Idx >= HalfNumElts) {
19637 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op")((Idx < (int)NumElts && "Shuffle mask chooses undef op"
) ? static_cast<void> (0) : __assert_fail ("Idx < (int)NumElts && \"Shuffle mask chooses undef op\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19637, __PRETTY_FUNCTION__))
;
19638 Idx -= HalfNumElts;
19639 }
19640 NewMask.push_back(Idx);
19641 }
19642 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
19643 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
19644 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
19645 N0.getOperand(0), UndefVec);
19646 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
19647 }
19648 }
19649
19650 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
19651 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
19652 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
19653 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
19654 return Res;
19655
19656 // If this shuffle only has a single input that is a bitcasted shuffle,
19657 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
19658 // back to their original types.
19659 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
19660 N1.isUndef() && Level < AfterLegalizeVectorOps &&
19661 TLI.isTypeLegal(VT)) {
19662 auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
19663 if (Scale == 1)
19664 return SmallVector<int, 8>(Mask.begin(), Mask.end());
19665
19666 SmallVector<int, 8> NewMask;
19667 for (int M : Mask)
19668 for (int s = 0; s != Scale; ++s)
19669 NewMask.push_back(M < 0 ? -1 : Scale * M + s);
19670 return NewMask;
19671 };
19672
19673 SDValue BC0 = peekThroughOneUseBitcasts(N0);
19674 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
19675 EVT SVT = VT.getScalarType();
19676 EVT InnerVT = BC0->getValueType(0);
19677 EVT InnerSVT = InnerVT.getScalarType();
19678
19679 // Determine which shuffle works with the smaller scalar type.
19680 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
19681 EVT ScaleSVT = ScaleVT.getScalarType();
19682
19683 if (TLI.isTypeLegal(ScaleVT) &&
19684 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
19685 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
19686 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
19687 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
19688
19689 // Scale the shuffle masks to the smaller scalar type.
19690 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
19691 SmallVector<int, 8> InnerMask =
19692 ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
19693 SmallVector<int, 8> OuterMask =
19694 ScaleShuffleMask(SVN->getMask(), OuterScale);
19695
19696 // Merge the shuffle masks.
19697 SmallVector<int, 8> NewMask;
19698 for (int M : OuterMask)
19699 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
19700
19701 // Test for shuffle mask legality over both commutations.
19702 SDValue SV0 = BC0->getOperand(0);
19703 SDValue SV1 = BC0->getOperand(1);
19704 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
19705 if (!LegalMask) {
19706 std::swap(SV0, SV1);
19707 ShuffleVectorSDNode::commuteMask(NewMask);
19708 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
19709 }
19710
19711 if (LegalMask) {
19712 SV0 = DAG.getBitcast(ScaleVT, SV0);
19713 SV1 = DAG.getBitcast(ScaleVT, SV1);
19714 return DAG.getBitcast(
19715 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
19716 }
19717 }
19718 }
19719 }
19720
19721 // Canonicalize shuffles according to rules:
19722 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
19723 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
19724 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
19725 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
19726 N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
19727 TLI.isTypeLegal(VT)) {
19728 // The incoming shuffle must be of the same type as the result of the
19729 // current shuffle.
19730 assert(N1->getOperand(0).getValueType() == VT &&((N1->getOperand(0).getValueType() == VT && "Shuffle types don't match"
) ? static_cast<void> (0) : __assert_fail ("N1->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19731, __PRETTY_FUNCTION__))
19731 "Shuffle types don't match")((N1->getOperand(0).getValueType() == VT && "Shuffle types don't match"
) ? static_cast<void> (0) : __assert_fail ("N1->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19731, __PRETTY_FUNCTION__))
;
19732
19733 SDValue SV0 = N1->getOperand(0);
19734 SDValue SV1 = N1->getOperand(1);
19735 bool HasSameOp0 = N0 == SV0;
19736 bool IsSV1Undef = SV1.isUndef();
19737 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
19738 // Commute the operands of this shuffle so that next rule
19739 // will trigger.
19740 return DAG.getCommutedVectorShuffle(*SVN);
19741 }
19742
19743 // Try to fold according to rules:
19744 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
19745 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
19746 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
19747 // Don't try to fold shuffles with illegal type.
19748 // Only fold if this shuffle is the only user of the other shuffle.
19749 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
19750 Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
19751 ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
19752
19753 // Don't try to fold splats; they're likely to simplify somehow, or they
19754 // might be free.
19755 if (OtherSV->isSplat())
19756 return SDValue();
19757
19758 // The incoming shuffle must be of the same type as the result of the
19759 // current shuffle.
19760 assert(OtherSV->getOperand(0).getValueType() == VT &&((OtherSV->getOperand(0).getValueType() == VT && "Shuffle types don't match"
) ? static_cast<void> (0) : __assert_fail ("OtherSV->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19761, __PRETTY_FUNCTION__))
19761 "Shuffle types don't match")((OtherSV->getOperand(0).getValueType() == VT && "Shuffle types don't match"
) ? static_cast<void> (0) : __assert_fail ("OtherSV->getOperand(0).getValueType() == VT && \"Shuffle types don't match\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 19761, __PRETTY_FUNCTION__))
;
19762
19763 SDValue SV0, SV1;
19764 SmallVector<int, 4> Mask;
19765 // Compute the combined shuffle mask for a shuffle with SV0 as the first
19766 // operand, and SV1 as the second operand.
19767 for (unsigned i = 0; i != NumElts; ++i) {
19768 int Idx = SVN->getMaskElt(i);
19769 if (Idx < 0) {
19770 // Propagate Undef.
19771 Mask.push_back(Idx);
19772 continue;
19773 }
19774
19775 SDValue CurrentVec;
19776 if (Idx < (int)NumElts) {
19777 // This shuffle index refers to the inner shuffle N0. Lookup the inner
19778 // shuffle mask to identify which vector is actually referenced.
19779 Idx = OtherSV->getMaskElt(Idx);
19780 if (Idx < 0) {
19781 // Propagate Undef.
19782 Mask.push_back(Idx);
19783 continue;
19784 }
19785
19786 CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
19787 : OtherSV->getOperand(1);
19788 } else {
19789 // This shuffle index references an element within N1.
19790 CurrentVec = N1;
19791 }
19792
19793 // Simple case where 'CurrentVec' is UNDEF.
19794 if (CurrentVec.isUndef()) {
19795 Mask.push_back(-1);
19796 continue;
19797 }
19798
19799 // Canonicalize the shuffle index. We don't know yet if CurrentVec
19800 // will be the first or second operand of the combined shuffle.
19801 Idx = Idx % NumElts;
19802 if (!SV0.getNode() || SV0 == CurrentVec) {
19803 // Ok. CurrentVec is the left hand side.
19804 // Update the mask accordingly.
19805 SV0 = CurrentVec;
19806 Mask.push_back(Idx);
19807 continue;
19808 }
19809
19810 // Bail out if we cannot convert the shuffle pair into a single shuffle.
19811 if (SV1.getNode() && SV1 != CurrentVec)
19812 return SDValue();
19813
19814 // Ok. CurrentVec is the right hand side.
19815 // Update the mask accordingly.
19816 SV1 = CurrentVec;
19817 Mask.push_back(Idx + NumElts);
19818 }
19819
19820 // Check if all indices in Mask are Undef. In case, propagate Undef.
19821 bool isUndefMask = true;
19822 for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
19823 isUndefMask &= Mask[i] < 0;
19824
19825 if (isUndefMask)
19826 return DAG.getUNDEF(VT);
19827
19828 if (!SV0.getNode())
19829 SV0 = DAG.getUNDEF(VT);
19830 if (!SV1.getNode())
19831 SV1 = DAG.getUNDEF(VT);
19832
19833 // Avoid introducing shuffles with illegal mask.
19834 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
19835 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
19836 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
19837 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
19838 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
19839 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
19840 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
19841 }
19842
19843 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
19844 return V;
19845
19846 return SDValue();
19847}
19848
19849SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
19850 SDValue InVal = N->getOperand(0);
19851 EVT VT = N->getValueType(0);
19852
19853 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
19854 // with a VECTOR_SHUFFLE and possible truncate.
19855 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
19856 SDValue InVec = InVal->getOperand(0);
19857 SDValue EltNo = InVal->getOperand(1);
19858 auto InVecT = InVec.getValueType();
19859 if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
19860 SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
19861 int Elt = C0->getZExtValue();
19862 NewMask[0] = Elt;
19863 // If we have an implict truncate do truncate here as long as it's legal.
19864 // if it's not legal, this should
19865 if (VT.getScalarType() != InVal.getValueType() &&
19866 InVal.getValueType().isScalarInteger() &&
19867 isTypeLegal(VT.getScalarType())) {
19868 SDValue Val =
19869 DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
19870 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
19871 }
19872 if (VT.getScalarType() == InVecT.getScalarType() &&
19873 VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
19874 SDValue LegalShuffle =
19875 TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
19876 DAG.getUNDEF(InVecT), NewMask, DAG);
19877 if (LegalShuffle) {
19878 // If the initial vector is the correct size this shuffle is a
19879 // valid result.
19880 if (VT == InVecT)
19881 return LegalShuffle;
19882 // If not we must truncate the vector.
19883 if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
19884 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
19885 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
19886 InVecT.getVectorElementType(),
19887 VT.getVectorNumElements());
19888 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
19889 LegalShuffle, ZeroIdx);
19890 }
19891 }
19892 }
19893 }
19894 }
19895
19896 return SDValue();
19897}
19898
19899SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
19900 EVT VT = N->getValueType(0);
19901 SDValue N0 = N->getOperand(0);
19902 SDValue N1 = N->getOperand(1);
19903 SDValue N2 = N->getOperand(2);
19904
19905 // If inserting an UNDEF, just return the original vector.
19906 if (N1.isUndef())
19907 return N0;
19908
19909 // If this is an insert of an extracted vector into an undef vector, we can
19910 // just use the input to the extract.
19911 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19912 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
19913 return N1.getOperand(0);
19914
19915 // If we are inserting a bitcast value into an undef, with the same
19916 // number of elements, just use the bitcast input of the extract.
19917 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
19918 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
19919 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
19920 N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19921 N1.getOperand(0).getOperand(1) == N2 &&
19922 N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
19923 VT.getVectorNumElements() &&
19924 N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
19925 VT.getSizeInBits()) {
19926 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
19927 }
19928
19929 // If both N1 and N2 are bitcast values on which insert_subvector
19930 // would makes sense, pull the bitcast through.
19931 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
19932 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
19933 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
19934 SDValue CN0 = N0.getOperand(0);
19935 SDValue CN1 = N1.getOperand(0);
19936 EVT CN0VT = CN0.getValueType();
19937 EVT CN1VT = CN1.getValueType();
19938 if (CN0VT.isVector() && CN1VT.isVector() &&
19939 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
19940 CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
19941 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
19942 CN0.getValueType(), CN0, CN1, N2);
19943 return DAG.getBitcast(VT, NewINSERT);
19944 }
19945 }
19946
19947 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
19948 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
19949 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
19950 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
19951 N0.getOperand(1).getValueType() == N1.getValueType() &&
19952 N0.getOperand(2) == N2)
19953 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
19954 N1, N2);
19955
19956 // Eliminate an intermediate insert into an undef vector:
19957 // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
19958 // insert_subvector undef, X, N2
19959 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
19960 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
19961 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
19962 N1.getOperand(1), N2);
19963
19964 if (!isa<ConstantSDNode>(N2))
19965 return SDValue();
19966
19967 uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
19968
19969 // Push subvector bitcasts to the output, adjusting the index as we go.
19970 // insert_subvector(bitcast(v), bitcast(s), c1)
19971 // -> bitcast(insert_subvector(v, s, c2))
19972 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
19973 N1.getOpcode() == ISD::BITCAST) {
19974 SDValue N0Src = peekThroughBitcasts(N0);
19975 SDValue N1Src = peekThroughBitcasts(N1);
19976 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
19977 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
19978 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
19979 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
19980 EVT NewVT;
19981 SDLoc DL(N);
19982 SDValue NewIdx;
19983 LLVMContext &Ctx = *DAG.getContext();
19984 unsigned NumElts = VT.getVectorNumElements();
19985 unsigned EltSizeInBits = VT.getScalarSizeInBits();
19986 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
19987 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
19988 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
19989 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
19990 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
19991 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
19992 if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
19993 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
19994 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
19995 }
19996 }
19997 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
19998 SDValue Res = DAG.getBitcast(NewVT, N0Src);
19999 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
20000 return DAG.getBitcast(VT, Res);
20001 }
20002 }
20003 }
20004
20005 // Canonicalize insert_subvector dag nodes.
20006 // Example:
20007 // (insert_subvector (insert_subvector A, Idx0), Idx1)
20008 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
20009 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
20010 N1.getValueType() == N0.getOperand(1).getValueType() &&
20011 isa<ConstantSDNode>(N0.getOperand(2))) {
20012 unsigned OtherIdx = N0.getConstantOperandVal(2);
20013 if (InsIdx < OtherIdx) {
20014 // Swap nodes.
20015 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
20016 N0.getOperand(0), N1, N2);
20017 AddToWorklist(NewOp.getNode());
20018 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
20019 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
20020 }
20021 }
20022
20023 // If the input vector is a concatenation, and the insert replaces
20024 // one of the pieces, we can optimize into a single concat_vectors.
20025 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
20026 N0.getOperand(0).getValueType() == N1.getValueType()) {
20027 unsigned Factor = N1.getValueType().getVectorNumElements();
20028
20029 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
20030 Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
20031
20032 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20033 }
20034
20035 // Simplify source operands based on insertion.
20036 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20037 return SDValue(N, 0);
20038
20039 return SDValue();
20040}
20041
20042SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
20043 SDValue N0 = N->getOperand(0);
20044
20045 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
20046 if (N0->getOpcode() == ISD::FP16_TO_FP)
20047 return N0->getOperand(0);
20048
20049 return SDValue();
20050}
20051
20052SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
20053 SDValue N0 = N->getOperand(0);
20054
20055 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
20056 if (N0->getOpcode() == ISD::AND) {
20057 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
20058 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
20059 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
20060 N0.getOperand(0));
20061 }
20062 }
20063
20064 return SDValue();
20065}
20066
20067SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
20068 SDValue N0 = N->getOperand(0);
20069 EVT VT = N0.getValueType();
20070 unsigned Opcode = N->getOpcode();
20071
20072 // VECREDUCE over 1-element vector is just an extract.
20073 if (VT.getVectorNumElements() == 1) {
20074 SDLoc dl(N);
20075 SDValue Res =
20076 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
20077 DAG.getVectorIdxConstant(0, dl));
20078 if (Res.getValueType() != N->getValueType(0))
20079 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
20080 return Res;
20081 }
20082
20083 // On an boolean vector an and/or reduction is the same as a umin/umax
20084 // reduction. Convert them if the latter is legal while the former isn't.
20085 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
20086 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
20087 ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
20088 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
20089 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
20090 DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
20091 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
20092 }
20093
20094 return SDValue();
20095}
20096
20097/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
20098/// with the destination vector and a zero vector.
20099/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
20100/// vector_shuffle V, Zero, <0, 4, 2, 4>
20101SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
20102 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!")((N->getOpcode() == ISD::AND && "Unexpected opcode!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"Unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20102, __PRETTY_FUNCTION__))
;
20103
20104 EVT VT = N->getValueType(0);
20105 SDValue LHS = N->getOperand(0);
20106 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
20107 SDLoc DL(N);
20108
20109 // Make sure we're not running after operation legalization where it
20110 // may have custom lowered the vector shuffles.
20111 if (LegalOperations)
20112 return SDValue();
20113
20114 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
20115 return SDValue();
20116
20117 EVT RVT = RHS.getValueType();
20118 unsigned NumElts = RHS.getNumOperands();
20119
20120 // Attempt to create a valid clear mask, splitting the mask into
20121 // sub elements and checking to see if each is
20122 // all zeros or all ones - suitable for shuffle masking.
20123 auto BuildClearMask = [&](int Split) {
20124 int NumSubElts = NumElts * Split;
20125 int NumSubBits = RVT.getScalarSizeInBits() / Split;
20126
20127 SmallVector<int, 8> Indices;
20128 for (int i = 0; i != NumSubElts; ++i) {
20129 int EltIdx = i / Split;
20130 int SubIdx = i % Split;
20131 SDValue Elt = RHS.getOperand(EltIdx);
20132 // X & undef --> 0 (not undef). So this lane must be converted to choose
20133 // from the zero constant vector (same as if the element had all 0-bits).
20134 if (Elt.isUndef()) {
20135 Indices.push_back(i + NumSubElts);
20136 continue;
20137 }
20138
20139 APInt Bits;
20140 if (isa<ConstantSDNode>(Elt))
20141 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
20142 else if (isa<ConstantFPSDNode>(Elt))
20143 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
20144 else
20145 return SDValue();
20146
20147 // Extract the sub element from the constant bit mask.
20148 if (DAG.getDataLayout().isBigEndian())
20149 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
20150 else
20151 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
20152
20153 if (Bits.isAllOnesValue())
20154 Indices.push_back(i);
20155 else if (Bits == 0)
20156 Indices.push_back(i + NumSubElts);
20157 else
20158 return SDValue();
20159 }
20160
20161 // Let's see if the target supports this vector_shuffle.
20162 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
20163 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
20164 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
20165 return SDValue();
20166
20167 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
20168 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
20169 DAG.getBitcast(ClearVT, LHS),
20170 Zero, Indices));
20171 };
20172
20173 // Determine maximum split level (byte level masking).
20174 int MaxSplit = 1;
20175 if (RVT.getScalarSizeInBits() % 8 == 0)
20176 MaxSplit = RVT.getScalarSizeInBits() / 8;
20177
20178 for (int Split = 1; Split <= MaxSplit; ++Split)
20179 if (RVT.getScalarSizeInBits() % Split == 0)
20180 if (SDValue S = BuildClearMask(Split))
20181 return S;
20182
20183 return SDValue();
20184}
20185
20186/// If a vector binop is performed on splat values, it may be profitable to
20187/// extract, scalarize, and insert/splat.
20188static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
20189 SDValue N0 = N->getOperand(0);
20190 SDValue N1 = N->getOperand(1);
20191 unsigned Opcode = N->getOpcode();
20192 EVT VT = N->getValueType(0);
20193 EVT EltVT = VT.getVectorElementType();
20194 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20195
20196 // TODO: Remove/replace the extract cost check? If the elements are available
20197 // as scalars, then there may be no extract cost. Should we ask if
20198 // inserting a scalar back into a vector is cheap instead?
20199 int Index0, Index1;
20200 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
20201 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
20202 if (!Src0 || !Src1 || Index0 != Index1 ||
20203 Src0.getValueType().getVectorElementType() != EltVT ||
20204 Src1.getValueType().getVectorElementType() != EltVT ||
20205 !TLI.isExtractVecEltCheap(VT, Index0) ||
20206 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
20207 return SDValue();
20208
20209 SDLoc DL(N);
20210 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
20211 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
20212 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
20213 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
20214
20215 // If all lanes but 1 are undefined, no need to splat the scalar result.
20216 // TODO: Keep track of undefs and use that info in the general case.
20217 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
20218 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
20219 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
20220 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
20221 // build_vec ..undef, (bo X, Y), undef...
20222 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
20223 Ops[Index0] = ScalarBO;
20224 return DAG.getBuildVector(VT, DL, Ops);
20225 }
20226
20227 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
20228 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
20229 return DAG.getBuildVector(VT, DL, Ops);
20230}
20231
20232/// Visit a binary vector operation, like ADD.
20233SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
20234 assert(N->getValueType(0).isVector() &&((N->getValueType(0).isVector() && "SimplifyVBinOp only works on vectors!"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0).isVector() && \"SimplifyVBinOp only works on vectors!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20235, __PRETTY_FUNCTION__))
20235 "SimplifyVBinOp only works on vectors!")((N->getValueType(0).isVector() && "SimplifyVBinOp only works on vectors!"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0).isVector() && \"SimplifyVBinOp only works on vectors!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20235, __PRETTY_FUNCTION__))
;
20236
20237 SDValue LHS = N->getOperand(0);
20238 SDValue RHS = N->getOperand(1);
20239 SDValue Ops[] = {LHS, RHS};
20240 EVT VT = N->getValueType(0);
20241 unsigned Opcode = N->getOpcode();
20242
20243 // See if we can constant fold the vector operation.
20244 if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
20245 Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
20246 return Fold;
20247
20248 // Move unary shuffles with identical masks after a vector binop:
20249 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
20250 // --> shuffle (VBinOp A, B), Undef, Mask
20251 // This does not require type legality checks because we are creating the
20252 // same types of operations that are in the original sequence. We do have to
20253 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
20254 // though. This code is adapted from the identical transform in instcombine.
20255 if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
20256 Opcode != ISD::UREM && Opcode != ISD::SREM &&
20257 Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
20258 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
20259 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
20260 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
20261 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
20262 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
20263 SDLoc DL(N);
20264 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
20265 RHS.getOperand(0), N->getFlags());
20266 SDValue UndefV = LHS.getOperand(1);
20267 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
20268 }
20269 }
20270
20271 // The following pattern is likely to emerge with vector reduction ops. Moving
20272 // the binary operation ahead of insertion may allow using a narrower vector
20273 // instruction that has better performance than the wide version of the op:
20274 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
20275 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
20276 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
20277 LHS.getOperand(2) == RHS.getOperand(2) &&
20278 (LHS.hasOneUse() || RHS.hasOneUse())) {
20279 SDValue X = LHS.getOperand(1);
20280 SDValue Y = RHS.getOperand(1);
20281 SDValue Z = LHS.getOperand(2);
20282 EVT NarrowVT = X.getValueType();
20283 if (NarrowVT == Y.getValueType() &&
20284 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
20285 // (binop undef, undef) may not return undef, so compute that result.
20286 SDLoc DL(N);
20287 SDValue VecC =
20288 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
20289 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
20290 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
20291 }
20292 }
20293
20294 // Make sure all but the first op are undef or constant.
20295 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
20296 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
20297 std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
20298 [](const SDValue &Op) {
20299 return Op.isUndef() ||
20300 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
20301 });
20302 };
20303
20304 // The following pattern is likely to emerge with vector reduction ops. Moving
20305 // the binary operation ahead of the concat may allow using a narrower vector
20306 // instruction that has better performance than the wide version of the op:
20307 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
20308 // concat (VBinOp X, Y), VecC
20309 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
20310 (LHS.hasOneUse() || RHS.hasOneUse())) {
20311 EVT NarrowVT = LHS.getOperand(0).getValueType();
20312 if (NarrowVT == RHS.getOperand(0).getValueType() &&
20313 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
20314 SDLoc DL(N);
20315 unsigned NumOperands = LHS.getNumOperands();
20316 SmallVector<SDValue, 4> ConcatOps;
20317 for (unsigned i = 0; i != NumOperands; ++i) {
20318 // This constant fold for operands 1 and up.
20319 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
20320 RHS.getOperand(i)));
20321 }
20322
20323 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
20324 }
20325 }
20326
20327 if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
20328 return V;
20329
20330 return SDValue();
20331}
20332
20333SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
20334 SDValue N2) {
20335 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!")((N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"
) ? static_cast<void> (0) : __assert_fail ("N0.getOpcode() ==ISD::SETCC && \"First argument must be a SetCC node!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 20335, __PRETTY_FUNCTION__))
;
20336
20337 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
20338 cast<CondCodeSDNode>(N0.getOperand(2))->get());
20339
20340 // If we got a simplified select_cc node back from SimplifySelectCC, then
20341 // break it down into a new SETCC node, and a new SELECT node, and then return
20342 // the SELECT node, since we were called with a SELECT node.
20343 if (SCC.getNode()) {
20344 // Check to see if we got a select_cc back (to turn into setcc/select).
20345 // Otherwise, just return whatever node we got back, like fabs.
20346 if (SCC.getOpcode() == ISD::SELECT_CC) {
20347 const SDNodeFlags Flags = N0.getNode()->getFlags();
20348 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
20349 N0.getValueType(),
20350 SCC.getOperand(0), SCC.getOperand(1),
20351 SCC.getOperand(4), Flags);
20352 AddToWorklist(SETCC.getNode());
20353 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
20354 SCC.getOperand(2), SCC.getOperand(3));
20355 SelectNode->setFlags(Flags);
20356 return SelectNode;
20357 }
20358
20359 return SCC;
20360 }
20361 return SDValue();
20362}
20363
20364/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
20365/// being selected between, see if we can simplify the select. Callers of this
20366/// should assume that TheSelect is deleted if this returns true. As such, they
20367/// should return the appropriate thing (e.g. the node) back to the top-level of
20368/// the DAG combiner loop to avoid it being looked at.
20369bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
20370 SDValue RHS) {
20371 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
20372 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
20373 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
20374 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
20375 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
20376 SDValue Sqrt = RHS;
20377 ISD::CondCode CC;
20378 SDValue CmpLHS;
20379 const ConstantFPSDNode *Zero = nullptr;
20380
20381 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
20382 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
20383 CmpLHS = TheSelect->getOperand(0);
20384 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
20385 } else {
20386 // SELECT or VSELECT
20387 SDValue Cmp = TheSelect->getOperand(0);
20388 if (Cmp.getOpcode() == ISD::SETCC) {
20389 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
20390 CmpLHS = Cmp.getOperand(0);
20391 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
20392 }
20393 }
20394 if (Zero && Zero->isZero() &&
20395 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
20396 CC == ISD::SETULT || CC == ISD::SETLT)) {
20397 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
20398 CombineTo(TheSelect, Sqrt);
20399 return true;
20400 }
20401 }
20402 }
20403 // Cannot simplify select with vector condition
20404 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
20405
20406 // If this is a select from two identical things, try to pull the operation
20407 // through the select.
20408 if (LHS.getOpcode() != RHS.getOpcode() ||
20409 !LHS.hasOneUse() || !RHS.hasOneUse())
20410 return false;
20411
20412 // If this is a load and the token chain is identical, replace the select
20413 // of two loads with a load through a select of the address to load from.
20414 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
20415 // constants have been dropped into the constant pool.
20416 if (LHS.getOpcode() == ISD::LOAD) {
20417 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
20418 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
20419
20420 // Token chains must be identical.
20421 if (LHS.getOperand(0) != RHS.getOperand(0) ||
20422 // Do not let this transformation reduce the number of volatile loads.
20423 // Be conservative for atomics for the moment
20424 // TODO: This does appear to be legal for unordered atomics (see D66309)
20425 !LLD->isSimple() || !RLD->isSimple() ||
20426 // FIXME: If either is a pre/post inc/dec load,
20427 // we'd need to split out the address adjustment.
20428 LLD->isIndexed() || RLD->isIndexed() ||
20429 // If this is an EXTLOAD, the VT's must match.
20430 LLD->getMemoryVT() != RLD->getMemoryVT() ||
20431 // If this is an EXTLOAD, the kind of extension must match.
20432 (LLD->getExtensionType() != RLD->getExtensionType() &&
20433 // The only exception is if one of the extensions is anyext.
20434 LLD->getExtensionType() != ISD::EXTLOAD &&
20435 RLD->getExtensionType() != ISD::EXTLOAD) ||
20436 // FIXME: this discards src value information. This is
20437 // over-conservative. It would be beneficial to be able to remember
20438 // both potential memory locations. Since we are discarding
20439 // src value info, don't do the transformation if the memory
20440 // locations are not in the default address space.
20441 LLD->getPointerInfo().getAddrSpace() != 0 ||
20442 RLD->getPointerInfo().getAddrSpace() != 0 ||
20443 // We can't produce a CMOV of a TargetFrameIndex since we won't
20444 // generate the address generation required.
20445 LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
20446 RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
20447 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
20448 LLD->getBasePtr().getValueType()))
20449 return false;
20450
20451 // The loads must not depend on one another.
20452 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
20453 return false;
20454
20455 // Check that the select condition doesn't reach either load. If so,
20456 // folding this will induce a cycle into the DAG. If not, this is safe to
20457 // xform, so create a select of the addresses.
20458
20459 SmallPtrSet<const SDNode *, 32> Visited;
20460 SmallVector<const SDNode *, 16> Worklist;
20461
20462 // Always fail if LLD and RLD are not independent. TheSelect is a
20463 // predecessor to all Nodes in question so we need not search past it.
20464
20465 Visited.insert(TheSelect);
20466 Worklist.push_back(LLD);
20467 Worklist.push_back(RLD);
20468
20469 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
20470 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
20471 return false;
20472
20473 SDValue Addr;
20474 if (TheSelect->getOpcode() == ISD::SELECT) {
20475 // We cannot do this optimization if any pair of {RLD, LLD} is a
20476 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
20477 // Loads, we only need to check if CondNode is a successor to one of the
20478 // loads. We can further avoid this if there's no use of their chain
20479 // value.
20480 SDNode *CondNode = TheSelect->getOperand(0).getNode();
20481 Worklist.push_back(CondNode);
20482
20483 if ((LLD->hasAnyUseOfValue(1) &&
20484 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
20485 (RLD->hasAnyUseOfValue(1) &&
20486 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
20487 return false;
20488
20489 Addr = DAG.getSelect(SDLoc(TheSelect),
20490 LLD->getBasePtr().getValueType(),
20491 TheSelect->getOperand(0), LLD->getBasePtr(),
20492 RLD->getBasePtr());
20493 } else { // Otherwise SELECT_CC
20494 // We cannot do this optimization if any pair of {RLD, LLD} is a
20495 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
20496 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
20497 // one of the loads. We can further avoid this if there's no use of their
20498 // chain value.
20499
20500 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
20501 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
20502 Worklist.push_back(CondLHS);
20503 Worklist.push_back(CondRHS);
20504
20505 if ((LLD->hasAnyUseOfValue(1) &&
20506 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
20507 (RLD->hasAnyUseOfValue(1) &&
20508 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
20509 return false;
20510
20511 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
20512 LLD->getBasePtr().getValueType(),
20513 TheSelect->getOperand(0),
20514 TheSelect->getOperand(1),
20515 LLD->getBasePtr(), RLD->getBasePtr(),
20516 TheSelect->getOperand(4));
20517 }
20518
20519 SDValue Load;
20520 // It is safe to replace the two loads if they have different alignments,
20521 // but the new load must be the minimum (most restrictive) alignment of the
20522 // inputs.
20523 unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
20524 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
20525 if (!RLD->isInvariant())
20526 MMOFlags &= ~MachineMemOperand::MOInvariant;
20527 if (!RLD->isDereferenceable())
20528 MMOFlags &= ~MachineMemOperand::MODereferenceable;
20529 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
20530 // FIXME: Discards pointer and AA info.
20531 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
20532 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
20533 MMOFlags);
20534 } else {
20535 // FIXME: Discards pointer and AA info.
20536 Load = DAG.getExtLoad(
20537 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
20538 : LLD->getExtensionType(),
20539 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
20540 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
20541 }
20542
20543 // Users of the select now use the result of the load.
20544 CombineTo(TheSelect, Load);
20545
20546 // Users of the old loads now use the new load's chain. We know the
20547 // old-load value is dead now.
20548 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
20549 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
20550 return true;
20551 }
20552
20553 return false;
20554}
20555
20556/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
20557/// bitwise 'and'.
20558SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
20559 SDValue N1, SDValue N2, SDValue N3,
20560 ISD::CondCode CC) {
20561 // If this is a select where the false operand is zero and the compare is a
20562 // check of the sign bit, see if we can perform the "gzip trick":
20563 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
20564 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
20565 EVT XType = N0.getValueType();
20566 EVT AType = N2.getValueType();
20567 if (!isNullConstant(N3) || !XType.bitsGE(AType))
20568 return SDValue();
20569
20570 // If the comparison is testing for a positive value, we have to invert
20571 // the sign bit mask, so only do that transform if the target has a bitwise
20572 // 'and not' instruction (the invert is free).
20573 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
20574 // (X > -1) ? A : 0
20575 // (X > 0) ? X : 0 <-- This is canonical signed max.
20576 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
20577 return SDValue();
20578 } else if (CC == ISD::SETLT) {
20579 // (X < 0) ? A : 0
20580 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
20581 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
20582 return SDValue();
20583 } else {
20584 return SDValue();
20585 }
20586
20587 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
20588 // constant.
20589 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
20590 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
20591 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
20592 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
20593 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
20594 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
20595 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
20596 AddToWorklist(Shift.getNode());
20597
20598 if (XType.bitsGT(AType)) {
20599 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
20600 AddToWorklist(Shift.getNode());
20601 }
20602
20603 if (CC == ISD::SETGT)
20604 Shift = DAG.getNOT(DL, Shift, AType);
20605
20606 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
20607 }
20608 }
20609
20610 unsigned ShCt = XType.getSizeInBits() - 1;
20611 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
20612 return SDValue();
20613
20614 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
20615 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
20616 AddToWorklist(Shift.getNode());
20617
20618 if (XType.bitsGT(AType)) {
20619 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
20620 AddToWorklist(Shift.getNode());
20621 }
20622
20623 if (CC == ISD::SETGT)
20624 Shift = DAG.getNOT(DL, Shift, AType);
20625
20626 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
20627}
20628
20629/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
20630/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
20631/// in it. This may be a win when the constant is not otherwise available
20632/// because it replaces two constant pool loads with one.
20633SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
20634 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
20635 ISD::CondCode CC) {
20636 if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
20637 return SDValue();
20638
20639 // If we are before legalize types, we want the other legalization to happen
20640 // first (for example, to avoid messing with soft float).
20641 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
20642 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
20643 EVT VT = N2.getValueType();
20644 if (!TV || !FV || !TLI.isTypeLegal(VT))
20645 return SDValue();
20646
20647 // If a constant can be materialized without loads, this does not make sense.
20648 if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
20649 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
20650 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
20651 return SDValue();
20652
20653 // If both constants have multiple uses, then we won't need to do an extra
20654 // load. The values are likely around in registers for other users.
20655 if (!TV->hasOneUse() && !FV->hasOneUse())
20656 return SDValue();
20657
20658 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
20659 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
20660 Type *FPTy = Elts[0]->getType();
20661 const DataLayout &TD = DAG.getDataLayout();
20662
20663 // Create a ConstantArray of the two constants.
20664 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
20665 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
20666 TD.getPrefTypeAlignment(FPTy));
20667 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
20668
20669 // Get offsets to the 0 and 1 elements of the array, so we can select between
20670 // them.
20671 SDValue Zero = DAG.getIntPtrConstant(0, DL);
20672 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
20673 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
20674 SDValue Cond =
20675 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
20676 AddToWorklist(Cond.getNode());
20677 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
20678 AddToWorklist(CstOffset.getNode());
20679 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
20680 AddToWorklist(CPIdx.getNode());
20681 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
20682 MachinePointerInfo::getConstantPool(
20683 DAG.getMachineFunction()), Alignment);
20684}
20685
20686/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
20687/// where 'cond' is the comparison specified by CC.
20688SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
20689 SDValue N2, SDValue N3, ISD::CondCode CC,
20690 bool NotExtCompare) {
20691 // (x ? y : y) -> y.
20692 if (N2 == N3) return N2;
20693
20694 EVT CmpOpVT = N0.getValueType();
20695 EVT CmpResVT = getSetCCResultType(CmpOpVT);
20696 EVT VT = N2.getValueType();
20697 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
20698 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
20699 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
20700
20701 // Determine if the condition we're dealing with is constant.
20702 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
20703 AddToWorklist(SCC.getNode());
20704 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
20705 // fold select_cc true, x, y -> x
20706 // fold select_cc false, x, y -> y
20707 return !(SCCC->isNullValue()) ? N2 : N3;
20708 }
20709 }
20710
20711 if (SDValue V =
20712 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
20713 return V;
20714
20715 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
20716 return V;
20717
20718 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
20719 // where y is has a single bit set.
20720 // A plaintext description would be, we can turn the SELECT_CC into an AND
20721 // when the condition can be materialized as an all-ones register. Any
20722 // single bit-test can be materialized as an all-ones register with
20723 // shift-left and shift-right-arith.
20724 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
20725 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
20726 SDValue AndLHS = N0->getOperand(0);
20727 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
20728 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
20729 // Shift the tested bit over the sign bit.
20730 const APInt &AndMask = ConstAndRHS->getAPIntValue();
20731 unsigned ShCt = AndMask.getBitWidth() - 1;
20732 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
20733 SDValue ShlAmt =
20734 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
20735 getShiftAmountTy(AndLHS.getValueType()));
20736 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
20737
20738 // Now arithmetic right shift it all the way over, so the result is
20739 // either all-ones, or zero.
20740 SDValue ShrAmt =
20741 DAG.getConstant(ShCt, SDLoc(Shl),
20742 getShiftAmountTy(Shl.getValueType()));
20743 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
20744
20745 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
20746 }
20747 }
20748 }
20749
20750 // fold select C, 16, 0 -> shl C, 4
20751 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
20752 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
20753
20754 if ((Fold || Swap) &&
20755 TLI.getBooleanContents(CmpOpVT) ==
20756 TargetLowering::ZeroOrOneBooleanContent &&
20757 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
20758
20759 if (Swap) {
20760 CC = ISD::getSetCCInverse(CC, CmpOpVT);
20761 std::swap(N2C, N3C);
20762 }
20763
20764 // If the caller doesn't want us to simplify this into a zext of a compare,
20765 // don't do it.
20766 if (NotExtCompare && N2C->isOne())
20767 return SDValue();
20768
20769 SDValue Temp, SCC;
20770 // zext (setcc n0, n1)
20771 if (LegalTypes) {
20772 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
20773 if (VT.bitsLT(SCC.getValueType()))
20774 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
20775 else
20776 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
20777 } else {
20778 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
20779 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
20780 }
20781
20782 AddToWorklist(SCC.getNode());
20783 AddToWorklist(Temp.getNode());
20784
20785 if (N2C->isOne())
20786 return Temp;
20787
20788 unsigned ShCt = N2C->getAPIntValue().logBase2();
20789 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
20790 return SDValue();
20791
20792 // shl setcc result by log2 n2c
20793 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
20794 DAG.getConstant(ShCt, SDLoc(Temp),
20795 getShiftAmountTy(Temp.getValueType())));
20796 }
20797
20798 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
20799 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
20800 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
20801 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
20802 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
20803 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
20804 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
20805 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
20806 if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
20807 SDValue ValueOnZero = N2;
20808 SDValue Count = N3;
20809 // If the condition is NE instead of E, swap the operands.
20810 if (CC == ISD::SETNE)
20811 std::swap(ValueOnZero, Count);
20812 // Check if the value on zero is a constant equal to the bits in the type.
20813 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
20814 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
20815 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
20816 // legal, combine to just cttz.
20817 if ((Count.getOpcode() == ISD::CTTZ ||
20818 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
20819 N0 == Count.getOperand(0) &&
20820 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
20821 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
20822 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
20823 // legal, combine to just ctlz.
20824 if ((Count.getOpcode() == ISD::CTLZ ||
20825 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
20826 N0 == Count.getOperand(0) &&
20827 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
20828 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
20829 }
20830 }
20831 }
20832
20833 return SDValue();
20834}
20835
20836/// This is a stub for TargetLowering::SimplifySetCC.
20837SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
20838 ISD::CondCode Cond, const SDLoc &DL,
20839 bool foldBooleans) {
20840 TargetLowering::DAGCombinerInfo
20841 DagCombineInfo(DAG, Level, false, this);
20842 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
20843}
20844
20845/// Given an ISD::SDIV node expressing a divide by constant, return
20846/// a DAG expression to select that will generate the same value by multiplying
20847/// by a magic number.
20848/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
20849SDValue DAGCombiner::BuildSDIV(SDNode *N) {
20850 // when optimising for minimum size, we don't want to expand a div to a mul
20851 // and a shift.
20852 if (DAG.getMachineFunction().getFunction().hasMinSize())
20853 return SDValue();
20854
20855 SmallVector<SDNode *, 8> Built;
20856 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
20857 for (SDNode *N : Built)
20858 AddToWorklist(N);
20859 return S;
20860 }
20861
20862 return SDValue();
20863}
20864
20865/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
20866/// DAG expression that will generate the same value by right shifting.
20867SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
20868 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
20869 if (!C)
20870 return SDValue();
20871
20872 // Avoid division by zero.
20873 if (C->isNullValue())
20874 return SDValue();
20875
20876 SmallVector<SDNode *, 8> Built;
20877 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
20878 for (SDNode *N : Built)
20879 AddToWorklist(N);
20880 return S;
20881 }
20882
20883 return SDValue();
20884}
20885
20886/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
20887/// expression that will generate the same value by multiplying by a magic
20888/// number.
20889/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
20890SDValue DAGCombiner::BuildUDIV(SDNode *N) {
20891 // when optimising for minimum size, we don't want to expand a div to a mul
20892 // and a shift.
20893 if (DAG.getMachineFunction().getFunction().hasMinSize())
20894 return SDValue();
20895
20896 SmallVector<SDNode *, 8> Built;
20897 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
20898 for (SDNode *N : Built)
20899 AddToWorklist(N);
20900 return S;
20901 }
20902
20903 return SDValue();
20904}
20905
20906/// Determines the LogBase2 value for a non-null input value using the
20907/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
20908SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
20909 EVT VT = V.getValueType();
20910 unsigned EltBits = VT.getScalarSizeInBits();
20911 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
20912 SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
20913 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
20914 return LogBase2;
20915}
20916
20917/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20918/// For the reciprocal, we need to find the zero of the function:
20919/// F(X) = A X - 1 [which has a zero at X = 1/A]
20920/// =>
20921/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
20922/// does not require additional intermediate precision]
20923/// For the last iteration, put numerator N into it to gain more precision:
20924/// Result = N X_i + X_i (N - N A X_i)
20925SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
20926 SDNodeFlags Flags) {
20927 if (LegalDAG)
20928 return SDValue();
20929
20930 // TODO: Handle half and/or extended types?
20931 EVT VT = Op.getValueType();
20932 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
20933 return SDValue();
20934
20935 // If estimates are explicitly disabled for this function, we're done.
20936 MachineFunction &MF = DAG.getMachineFunction();
20937 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
20938 if (Enabled == TLI.ReciprocalEstimate::Disabled)
20939 return SDValue();
20940
20941 // Estimates may be explicitly enabled for this type with a custom number of
20942 // refinement steps.
20943 int Iterations = TLI.getDivRefinementSteps(VT, MF);
20944 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
20945 AddToWorklist(Est.getNode());
20946
20947 SDLoc DL(Op);
20948 if (Iterations) {
20949 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
20950
20951 // Newton iterations: Est = Est + Est (N - Arg * Est)
20952 // If this is the last iteration, also multiply by the numerator.
20953 for (int i = 0; i < Iterations; ++i) {
20954 SDValue MulEst = Est;
20955
20956 if (i == Iterations - 1) {
20957 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
20958 AddToWorklist(MulEst.getNode());
20959 }
20960
20961 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
20962 AddToWorklist(NewEst.getNode());
20963
20964 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
20965 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
20966 AddToWorklist(NewEst.getNode());
20967
20968 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20969 AddToWorklist(NewEst.getNode());
20970
20971 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
20972 AddToWorklist(Est.getNode());
20973 }
20974 } else {
20975 // If no iterations are available, multiply with N.
20976 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
20977 AddToWorklist(Est.getNode());
20978 }
20979
20980 return Est;
20981 }
20982
20983 return SDValue();
20984}
20985
20986/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20987/// For the reciprocal sqrt, we need to find the zero of the function:
20988/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
20989/// =>
20990/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
20991/// As a result, we precompute A/2 prior to the iteration loop.
20992SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
20993 unsigned Iterations,
20994 SDNodeFlags Flags, bool Reciprocal) {
20995 EVT VT = Arg.getValueType();
20996 SDLoc DL(Arg);
20997 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
20998
20999 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
21000 // this entire sequence requires only one FP constant.
21001 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
21002 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
21003
21004 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
21005 for (unsigned i = 0; i < Iterations; ++i) {
21006 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
21007 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
21008 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
21009 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
21010 }
21011
21012 // If non-reciprocal square root is requested, multiply the result by Arg.
21013 if (!Reciprocal)
21014 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
21015
21016 return Est;
21017}
21018
21019/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
21020/// For the reciprocal sqrt, we need to find the zero of the function:
21021/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
21022/// =>
21023/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
21024SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
21025 unsigned Iterations,
21026 SDNodeFlags Flags, bool Reciprocal) {
21027 EVT VT = Arg.getValueType();
21028 SDLoc DL(Arg);
21029 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
21030 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
21031
21032 // This routine must enter the loop below to work correctly
21033 // when (Reciprocal == false).
21034 assert(Iterations > 0)((Iterations > 0) ? static_cast<void> (0) : __assert_fail
("Iterations > 0", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp"
, 21034, __PRETTY_FUNCTION__))
;
21035
21036 // Newton iterations for reciprocal square root:
21037 // E = (E * -0.5) * ((A * E) * E + -3.0)
21038 for (unsigned i = 0; i < Iterations; ++i) {
21039 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
21040 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
21041 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
21042
21043 // When calculating a square root at the last iteration build:
21044 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
21045 // (notice a common subexpression)
21046 SDValue LHS;
21047 if (Reciprocal || (i + 1) < Iterations) {
21048 // RSQRT: LHS = (E * -0.5)
21049 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
21050 } else {
21051 // SQRT: LHS = (A * E) * -0.5
21052 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
21053 }
21054
21055 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
21056 }
21057
21058 return Est;
21059}
21060
21061/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
21062/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
21063/// Op can be zero.
21064SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
21065 bool Reciprocal) {
21066 if (LegalDAG)
21067 return SDValue();
21068
21069 // TODO: Handle half and/or extended types?
21070 EVT VT = Op.getValueType();
21071 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
21072 return SDValue();
21073
21074 // If estimates are explicitly disabled for this function, we're done.
21075 MachineFunction &MF = DAG.getMachineFunction();
21076 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
21077 if (Enabled == TLI.ReciprocalEstimate::Disabled)
21078 return SDValue();
21079
21080 // Estimates may be explicitly enabled for this type with a custom number of
21081 // refinement steps.
21082 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
21083
21084 bool UseOneConstNR = false;
21085 if (SDValue Est =
21086 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
21087 Reciprocal)) {
21088 AddToWorklist(Est.getNode());
21089
21090 if (Iterations) {
21091 Est = UseOneConstNR
21092 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
21093 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
21094
21095 if (!Reciprocal) {
21096 // The estimate is now completely wrong if the input was exactly 0.0 or
21097 // possibly a denormal. Force the answer to 0.0 for those cases.
21098 SDLoc DL(Op);
21099 EVT CCVT = getSetCCResultType(VT);
21100 ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
21101 DenormalMode DenormMode = DAG.getDenormalMode(VT);
21102 if (DenormMode.Input == DenormalMode::IEEE) {
21103 // This is specifically a check for the handling of denormal inputs,
21104 // not the result.
21105
21106 // fabs(X) < SmallestNormal ? 0.0 : Est
21107 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
21108 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
21109 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
21110 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
21111 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
21112 SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
21113 Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
21114 } else {
21115 // X == 0.0 ? 0.0 : Est
21116 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
21117 SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
21118 Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
21119 }
21120 }
21121 }
21122 return Est;
21123 }
21124
21125 return SDValue();
21126}
21127
21128SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
21129 return buildSqrtEstimateImpl(Op, Flags, true);
21130}
21131
21132SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
21133 return buildSqrtEstimateImpl(Op, Flags, false);
21134}
21135
21136/// Return true if there is any possibility that the two addresses overlap.
21137bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
21138
21139 struct MemUseCharacteristics {
21140 bool IsVolatile;
21141 bool IsAtomic;
21142 SDValue BasePtr;
21143 int64_t Offset;
21144 Optional<int64_t> NumBytes;
21145 MachineMemOperand *MMO;
21146 };
21147
21148 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
21149 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
21150 int64_t Offset = 0;
21151 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
21152 Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
21153 ? C->getSExtValue()
21154 : (LSN->getAddressingMode() == ISD::PRE_DEC)
21155 ? -1 * C->getSExtValue()
21156 : 0;
21157 uint64_t Size =
21158 MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
21159 return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
21160 Offset /*base offset*/,
21161 Optional<int64_t>(Size),
21162 LSN->getMemOperand()};
21163 }
21164 if (const auto *LN = cast<LifetimeSDNode>(N))
21165 return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
21166 (LN->hasOffset()) ? LN->getOffset() : 0,
21167 (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
21168 : Optional<int64_t>(),
21169 (MachineMemOperand *)nullptr};
21170 // Default.
21171 return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
21172 (int64_t)0 /*offset*/,
21173 Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
21174 };
21175
21176 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
21177 MUC1 = getCharacteristics(Op1);
21178
21179 // If they are to the same address, then they must be aliases.
21180 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
21181 MUC0.Offset == MUC1.Offset)
21182 return true;
21183
21184 // If they are both volatile then they cannot be reordered.
21185 if (MUC0.IsVolatile && MUC1.IsVolatile)
21186 return true;
21187
21188 // Be conservative about atomics for the moment
21189 // TODO: This is way overconservative for unordered atomics (see D66309)
21190 if (MUC0.IsAtomic && MUC1.IsAtomic)
21191 return true;
21192
21193 if (MUC0.MMO && MUC1.MMO) {
21194 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
21195 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
21196 return false;
21197 }
21198
21199 // Try to prove that there is aliasing, or that there is no aliasing. Either
21200 // way, we can return now. If nothing can be proved, proceed with more tests.
21201 bool IsAlias;
21202 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
21203 DAG, IsAlias))
21204 return IsAlias;
21205
21206 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
21207 // either are not known.
21208 if (!MUC0.MMO || !MUC1.MMO)
21209 return true;
21210
21211 // If one operation reads from invariant memory, and the other may store, they
21212 // cannot alias. These should really be checking the equivalent of mayWrite,
21213 // but it only matters for memory nodes other than load /store.
21214 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
21215 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
21216 return false;
21217
21218 // If we know required SrcValue1 and SrcValue2 have relatively large
21219 // alignment compared to the size and offset of the access, we may be able
21220 // to prove they do not alias. This check is conservative for now to catch
21221 // cases created by splitting vector types, it only works when the offsets are
21222 // multiples of the size of the data.
21223 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
21224 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
21225 unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
21226 unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
21227 auto &Size0 = MUC0.NumBytes;
21228 auto &Size1 = MUC1.NumBytes;
21229 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
21230 Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
21231 OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
21232 SrcValOffset1 % *Size1 == 0) {
21233 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
21234 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
21235
21236 // There is no overlap between these relatively aligned accesses of
21237 // similar size. Return no alias.
21238 if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
21239 return false;
21240 }
21241
21242 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
21243 ? CombinerGlobalAA
21244 : DAG.getSubtarget().useAA();
21245#ifndef NDEBUG
21246 if (CombinerAAOnlyFunc.getNumOccurrences() &&
21247 CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
21248 UseAA = false;
21249#endif
21250
21251 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
21252 Size0.hasValue() && Size1.hasValue()) {
21253 // Use alias analysis information.
21254 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
21255 int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
21256 int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
21257 AliasResult AAResult = AA->alias(
21258 MemoryLocation(MUC0.MMO->getValue(), Overlap0,
21259 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
21260 MemoryLocation(MUC1.MMO->getValue(), Overlap1,
21261 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
21262 if (AAResult == NoAlias)
21263 return false;
21264 }
21265
21266 // Otherwise we have to assume they alias.
21267 return true;
21268}
21269
21270/// Walk up chain skipping non-aliasing memory nodes,
21271/// looking for aliasing nodes and adding them to the Aliases vector.
21272void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
21273 SmallVectorImpl<SDValue> &Aliases) {
21274 SmallVector<SDValue, 8> Chains; // List of chains to visit.
21275 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
21276
21277 // Get alias information for node.
21278 // TODO: relax aliasing for unordered atomics (see D66309)
21279 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
21280
21281 // Starting off.
21282 Chains.push_back(OriginalChain);
21283 unsigned Depth = 0;
21284
21285 // Attempt to improve chain by a single step
21286 std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
21287 switch (C.getOpcode()) {
21288 case ISD::EntryToken:
21289 // No need to mark EntryToken.
21290 C = SDValue();
21291 return true;
21292 case ISD::LOAD:
21293 case ISD::STORE: {
21294 // Get alias information for C.
21295 // TODO: Relax aliasing for unordered atomics (see D66309)
21296 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
21297 cast<LSBaseSDNode>(C.getNode())->isSimple();
21298 if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
21299 // Look further up the chain.
21300 C = C.getOperand(0);
21301 return true;
21302 }
21303 // Alias, so stop here.
21304 return false;
21305 }
21306
21307 case ISD::CopyFromReg:
21308 // Always forward past past CopyFromReg.
21309 C = C.getOperand(0);
21310 return true;
21311
21312 case ISD::LIFETIME_START:
21313 case ISD::LIFETIME_END: {
21314 // We can forward past any lifetime start/end that can be proven not to
21315 // alias the memory access.
21316 if (!isAlias(N, C.getNode())) {
21317 // Look further up the chain.
21318 C = C.getOperand(0);
21319 return true;
21320 }
21321 return false;
21322 }
21323 default:
21324 return false;
21325 }
21326 };
21327
21328 // Look at each chain and determine if it is an alias. If so, add it to the
21329 // aliases list. If not, then continue up the chain looking for the next
21330 // candidate.
21331 while (!Chains.empty()) {
21332 SDValue Chain = Chains.pop_back_val();
21333
21334 // Don't bother if we've seen Chain before.
21335 if (!Visited.insert(Chain.getNode()).second)
21336 continue;
21337
21338 // For TokenFactor nodes, look at each operand and only continue up the
21339 // chain until we reach the depth limit.
21340 //
21341 // FIXME: The depth check could be made to return the last non-aliasing
21342 // chain we found before we hit a tokenfactor rather than the original
21343 // chain.
21344 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
21345 Aliases.clear();
21346 Aliases.push_back(OriginalChain);
21347 return;
21348 }
21349
21350 if (Chain.getOpcode() == ISD::TokenFactor) {
21351 // We have to check each of the operands of the token factor for "small"
21352 // token factors, so we queue them up. Adding the operands to the queue
21353 // (stack) in reverse order maintains the original order and increases the
21354 // likelihood that getNode will find a matching token factor (CSE.)
21355 if (Chain.getNumOperands() > 16) {
21356 Aliases.push_back(Chain);
21357 continue;
21358 }
21359 for (unsigned n = Chain.getNumOperands(); n;)
21360 Chains.push_back(Chain.getOperand(--n));
21361 ++Depth;
21362 continue;
21363 }
21364 // Everything else
21365 if (ImproveChain(Chain)) {
21366 // Updated Chain Found, Consider new chain if one exists.
21367 if (Chain.getNode())
21368 Chains.push_back(Chain);
21369 ++Depth;
21370 continue;
21371 }
21372 // No Improved Chain Possible, treat as Alias.
21373 Aliases.push_back(Chain);
21374 }
21375}
21376
21377/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
21378/// (aliasing node.)
21379SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
21380 if (OptLevel == CodeGenOpt::None)
21381 return OldChain;
21382
21383 // Ops for replacing token factor.
21384 SmallVector<SDValue, 8> Aliases;
21385
21386 // Accumulate all the aliases to this node.
21387 GatherAllAliases(N, OldChain, Aliases);
21388
21389 // If no operands then chain to entry token.
21390 if (Aliases.size() == 0)
21391 return DAG.getEntryNode();
21392
21393 // If a single operand then chain to it. We don't need to revisit it.
21394 if (Aliases.size() == 1)
21395 return Aliases[0];
21396
21397 // Construct a custom tailored token factor.
21398 return DAG.getTokenFactor(SDLoc(N), Aliases);
21399}
21400
21401namespace {
21402// TODO: Replace with with std::monostate when we move to C++17.
21403struct UnitT { } Unit;
21404bool operator==(const UnitT &, const UnitT &) { return true; }
21405bool operator!=(const UnitT &, const UnitT &) { return false; }
21406} // namespace
21407
21408// This function tries to collect a bunch of potentially interesting
21409// nodes to improve the chains of, all at once. This might seem
21410// redundant, as this function gets called when visiting every store
21411// node, so why not let the work be done on each store as it's visited?
21412//
21413// I believe this is mainly important because MergeConsecutiveStores
21414// is unable to deal with merging stores of different sizes, so unless
21415// we improve the chains of all the potential candidates up-front
21416// before running MergeConsecutiveStores, it might only see some of
21417// the nodes that will eventually be candidates, and then not be able
21418// to go from a partially-merged state to the desired final
21419// fully-merged state.
21420
21421bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
21422 SmallVector<StoreSDNode *, 8> ChainedStores;
21423 StoreSDNode *STChain = St;
21424 // Intervals records which offsets from BaseIndex have been covered. In
21425 // the common case, every store writes to the immediately previous address
21426 // space and thus merged with the previous interval at insertion time.
21427
21428 using IMap =
21429 llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
21430 IMap::Allocator A;
21431 IMap Intervals(A);
21432
21433 // This holds the base pointer, index, and the offset in bytes from the base
21434 // pointer.
21435 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
21436
21437 // We must have a base and an offset.
21438 if (!BasePtr.getBase().getNode())
21439 return false;
21440
21441 // Do not handle stores to undef base pointers.
21442 if (BasePtr.getBase().isUndef())
21443 return false;
21444
21445 // BaseIndexOffset assumes that offsets are fixed-size, which
21446 // is not valid for scalable vectors where the offsets are
21447 // scaled by `vscale`, so bail out early.
21448 if (St->getMemoryVT().isScalableVector())
21449 return false;
21450
21451 // Add ST's interval.
21452 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
21453
21454 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
21455 // If the chain has more than one use, then we can't reorder the mem ops.
21456 if (!SDValue(Chain, 0)->hasOneUse())
21457 break;
21458 // TODO: Relax for unordered atomics (see D66309)
21459 if (!Chain->isSimple() || Chain->isIndexed())
21460 break;
21461
21462 // Find the base pointer and offset for this memory node.
21463 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
21464 // Check that the base pointer is the same as the original one.
21465 int64_t Offset;
21466 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
21467 break;
21468 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
21469 // Make sure we don't overlap with other intervals by checking the ones to
21470 // the left or right before inserting.
21471 auto I = Intervals.find(Offset);
21472 // If there's a next interval, we should end before it.
21473 if (I != Intervals.end() && I.start() < (Offset + Length))
21474 break;
21475 // If there's a previous interval, we should start after it.
21476 if (I != Intervals.begin() && (--I).stop() <= Offset)
21477 break;
21478 Intervals.insert(Offset, Offset + Length, Unit);
21479
21480 ChainedStores.push_back(Chain);
21481 STChain = Chain;
21482 }
21483
21484 // If we didn't find a chained store, exit.
21485 if (ChainedStores.size() == 0)
21486 return false;
21487
21488 // Improve all chained stores (St and ChainedStores members) starting from
21489 // where the store chain ended and return single TokenFactor.
21490 SDValue NewChain = STChain->getChain();
21491 SmallVector<SDValue, 8> TFOps;
21492 for (unsigned I = ChainedStores.size(); I;) {
21493 StoreSDNode *S = ChainedStores[--I];
21494 SDValue BetterChain = FindBetterChain(S, NewChain);
21495 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
21496 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
21497 TFOps.push_back(SDValue(S, 0));
21498 ChainedStores[I] = S;
21499 }
21500
21501 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
21502 SDValue BetterChain = FindBetterChain(St, NewChain);
21503 SDValue NewST;
21504 if (St->isTruncatingStore())
21505 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
21506 St->getBasePtr(), St->getMemoryVT(),
21507 St->getMemOperand());
21508 else
21509 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
21510 St->getBasePtr(), St->getMemOperand());
21511
21512 TFOps.push_back(NewST);
21513
21514 // If we improved every element of TFOps, then we've lost the dependence on
21515 // NewChain to successors of St and we need to add it back to TFOps. Do so at
21516 // the beginning to keep relative order consistent with FindBetterChains.
21517 auto hasImprovedChain = [&](SDValue ST) -> bool {
21518 return ST->getOperand(0) != NewChain;
21519 };
21520 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
21521 if (AddNewChain)
21522 TFOps.insert(TFOps.begin(), NewChain);
21523
21524 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
21525 CombineTo(St, TF);
21526
21527 // Add TF and its operands to the worklist.
21528 AddToWorklist(TF.getNode());
21529 for (const SDValue &Op : TF->ops())
21530 AddToWorklist(Op.getNode());
21531 AddToWorklist(STChain);
21532 return true;
21533}
21534
21535bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
21536 if (OptLevel == CodeGenOpt::None)
21537 return false;
21538
21539 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
21540
21541 // We must have a base and an offset.
21542 if (!BasePtr.getBase().getNode())
21543 return false;
21544
21545 // Do not handle stores to undef base pointers.
21546 if (BasePtr.getBase().isUndef())
21547 return false;
21548
21549 // Directly improve a chain of disjoint stores starting at St.
21550 if (parallelizeChainedStores(St))
21551 return true;
21552
21553 // Improve St's Chain..
21554 SDValue BetterChain = FindBetterChain(St, St->getChain());
21555 if (St->getChain() != BetterChain) {
21556 replaceStoreChain(St, BetterChain);
21557 return true;
21558 }
21559 return false;
21560}
21561
21562/// This is the entry point for the file.
21563void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
21564 CodeGenOpt::Level OptLevel) {
21565 /// This is the main entry point to this class.
21566 DAGCombiner(*this, AA, OptLevel).Run(Level);
21567}

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h

1//===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the SDNode class and derived classes, which are used to
10// represent the nodes and operations present in a SelectionDAG. These nodes
11// and operations are machine code level operations, with some similarities to
12// the GCC RTL representation.
13//
14// Clients should include the SelectionDAG.h file instead of this file directly.
15//
16//===----------------------------------------------------------------------===//
17
18#ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
19#define LLVM_CODEGEN_SELECTIONDAGNODES_H
20
21#include "llvm/ADT/APFloat.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/BitVector.h"
24#include "llvm/ADT/FoldingSet.h"
25#include "llvm/ADT/GraphTraits.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/ilist_node.h"
29#include "llvm/ADT/iterator.h"
30#include "llvm/ADT/iterator_range.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/MachineMemOperand.h"
33#include "llvm/CodeGen/ValueTypes.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DebugLoc.h"
36#include "llvm/IR/Instruction.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/Metadata.h"
39#include "llvm/IR/Operator.h"
40#include "llvm/Support/AlignOf.h"
41#include "llvm/Support/AtomicOrdering.h"
42#include "llvm/Support/Casting.h"
43#include "llvm/Support/ErrorHandling.h"
44#include "llvm/Support/MachineValueType.h"
45#include "llvm/Support/TypeSize.h"
46#include <algorithm>
47#include <cassert>
48#include <climits>
49#include <cstddef>
50#include <cstdint>
51#include <cstring>
52#include <iterator>
53#include <string>
54#include <tuple>
55
56namespace llvm {
57
58class APInt;
59class Constant;
60template <typename T> struct DenseMapInfo;
61class GlobalValue;
62class MachineBasicBlock;
63class MachineConstantPoolValue;
64class MCSymbol;
65class raw_ostream;
66class SDNode;
67class SelectionDAG;
68class Type;
69class Value;
70
71void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr,
72 bool force = false);
73
74/// This represents a list of ValueType's that has been intern'd by
75/// a SelectionDAG. Instances of this simple value class are returned by
76/// SelectionDAG::getVTList(...).
77///
78struct SDVTList {
79 const EVT *VTs;
80 unsigned int NumVTs;
81};
82
83namespace ISD {
84
85 /// Node predicates
86
87 /// If N is a BUILD_VECTOR node whose elements are all the same constant or
88 /// undefined, return true and return the constant value in \p SplatValue.
89 bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
90
91 /// Return true if the specified node is a BUILD_VECTOR where all of the
92 /// elements are ~0 or undef.
93 bool isBuildVectorAllOnes(const SDNode *N);
94
95 /// Return true if the specified node is a BUILD_VECTOR where all of the
96 /// elements are 0 or undef.
97 bool isBuildVectorAllZeros(const SDNode *N);
98
99 /// Return true if the specified node is a BUILD_VECTOR node of all
100 /// ConstantSDNode or undef.
101 bool isBuildVectorOfConstantSDNodes(const SDNode *N);
102
103 /// Return true if the specified node is a BUILD_VECTOR node of all
104 /// ConstantFPSDNode or undef.
105 bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
106
107 /// Return true if the node has at least one operand and all operands of the
108 /// specified node are ISD::UNDEF.
109 bool allOperandsUndef(const SDNode *N);
110
111} // end namespace ISD
112
113//===----------------------------------------------------------------------===//
114/// Unlike LLVM values, Selection DAG nodes may return multiple
115/// values as the result of a computation. Many nodes return multiple values,
116/// from loads (which define a token and a return value) to ADDC (which returns
117/// a result and a carry value), to calls (which may return an arbitrary number
118/// of values).
119///
120/// As such, each use of a SelectionDAG computation must indicate the node that
121/// computes it as well as which return value to use from that node. This pair
122/// of information is represented with the SDValue value type.
123///
124class SDValue {
125 friend struct DenseMapInfo<SDValue>;
126
127 SDNode *Node = nullptr; // The node defining the value we are using.
128 unsigned ResNo = 0; // Which return value of the node we are using.
129
130public:
131 SDValue() = default;
132 SDValue(SDNode *node, unsigned resno);
133
134 /// get the index which selects a specific result in the SDNode
135 unsigned getResNo() const { return ResNo; }
136
137 /// get the SDNode which holds the desired result
138 SDNode *getNode() const { return Node; }
139
140 /// set the SDNode
141 void setNode(SDNode *N) { Node = N; }
142
143 inline SDNode *operator->() const { return Node; }
144
145 bool operator==(const SDValue &O) const {
146 return Node == O.Node && ResNo == O.ResNo;
147 }
148 bool operator!=(const SDValue &O) const {
149 return !operator==(O);
150 }
151 bool operator<(const SDValue &O) const {
152 return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo);
153 }
154 explicit operator bool() const {
155 return Node != nullptr;
156 }
157
158 SDValue getValue(unsigned R) const {
159 return SDValue(Node, R);
160 }
161
162 /// Return true if this node is an operand of N.
163 bool isOperandOf(const SDNode *N) const;
164
165 /// Return the ValueType of the referenced return value.
166 inline EVT getValueType() const;
167
168 /// Return the simple ValueType of the referenced return value.
169 MVT getSimpleValueType() const {
170 return getValueType().getSimpleVT();
171 }
172
173 /// Returns the size of the value in bits.
174 ///
175 /// If the value type is a scalable vector type, the scalable property will
176 /// be set and the runtime size will be a positive integer multiple of the
177 /// base size.
178 TypeSize getValueSizeInBits() const {
179 return getValueType().getSizeInBits();
180 }
181
182 TypeSize getScalarValueSizeInBits() const {
183 return getValueType().getScalarType().getSizeInBits();
184 }
185
186 // Forwarding methods - These forward to the corresponding methods in SDNode.
187 inline unsigned getOpcode() const;
188 inline unsigned getNumOperands() const;
189 inline const SDValue &getOperand(unsigned i) const;
190 inline uint64_t getConstantOperandVal(unsigned i) const;
191 inline const APInt &getConstantOperandAPInt(unsigned i) const;
192 inline bool isTargetMemoryOpcode() const;
193 inline bool isTargetOpcode() const;
194 inline bool isMachineOpcode() const;
195 inline bool isUndef() const;
196 inline unsigned getMachineOpcode() const;
197 inline const DebugLoc &getDebugLoc() const;
198 inline void dump() const;
199 inline void dump(const SelectionDAG *G) const;
200 inline void dumpr() const;
201 inline void dumpr(const SelectionDAG *G) const;
202
203 /// Return true if this operand (which must be a chain) reaches the
204 /// specified operand without crossing any side-effecting instructions.
205 /// In practice, this looks through token factors and non-volatile loads.
206 /// In order to remain efficient, this only
207 /// looks a couple of nodes in, it does not do an exhaustive search.
208 bool reachesChainWithoutSideEffects(SDValue Dest,
209 unsigned Depth = 2) const;
210
211 /// Return true if there are no nodes using value ResNo of Node.
212 inline bool use_empty() const;
213
214 /// Return true if there is exactly one node using value ResNo of Node.
215 inline bool hasOneUse() const;
216};
217
218template<> struct DenseMapInfo<SDValue> {
219 static inline SDValue getEmptyKey() {
220 SDValue V;
221 V.ResNo = -1U;
222 return V;
223 }
224
225 static inline SDValue getTombstoneKey() {
226 SDValue V;
227 V.ResNo = -2U;
228 return V;
229 }
230
231 static unsigned getHashValue(const SDValue &Val) {
232 return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
233 (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
234 }
235
236 static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
237 return LHS == RHS;
238 }
239};
240
241/// Allow casting operators to work directly on
242/// SDValues as if they were SDNode*'s.
243template<> struct simplify_type<SDValue> {
244 using SimpleType = SDNode *;
245
246 static SimpleType getSimplifiedValue(SDValue &Val) {
247 return Val.getNode();
248 }
249};
250template<> struct simplify_type<const SDValue> {
251 using SimpleType = /*const*/ SDNode *;
252
253 static SimpleType getSimplifiedValue(const SDValue &Val) {
254 return Val.getNode();
255 }
256};
257
258/// Represents a use of a SDNode. This class holds an SDValue,
259/// which records the SDNode being used and the result number, a
260/// pointer to the SDNode using the value, and Next and Prev pointers,
261/// which link together all the uses of an SDNode.
262///
263class SDUse {
264 /// Val - The value being used.
265 SDValue Val;
266 /// User - The user of this value.
267 SDNode *User = nullptr;
268 /// Prev, Next - Pointers to the uses list of the SDNode referred by
269 /// this operand.
270 SDUse **Prev = nullptr;
271 SDUse *Next = nullptr;
272
273public:
274 SDUse() = default;
275 SDUse(const SDUse &U) = delete;
276 SDUse &operator=(const SDUse &) = delete;
277
278 /// Normally SDUse will just implicitly convert to an SDValue that it holds.
279 operator const SDValue&() const { return Val; }
280
281 /// If implicit conversion to SDValue doesn't work, the get() method returns
282 /// the SDValue.
283 const SDValue &get() const { return Val; }
284
285 /// This returns the SDNode that contains this Use.
286 SDNode *getUser() { return User; }
287
288 /// Get the next SDUse in the use list.
289 SDUse *getNext() const { return Next; }
290
291 /// Convenience function for get().getNode().
292 SDNode *getNode() const { return Val.getNode(); }
293 /// Convenience function for get().getResNo().
294 unsigned getResNo() const { return Val.getResNo(); }
295 /// Convenience function for get().getValueType().
296 EVT getValueType() const { return Val.getValueType(); }
297
298 /// Convenience function for get().operator==
299 bool operator==(const SDValue &V) const {
300 return Val == V;
301 }
302
303 /// Convenience function for get().operator!=
304 bool operator!=(const SDValue &V) const {
305 return Val != V;
306 }
307
308 /// Convenience function for get().operator<
309 bool operator<(const SDValue &V) const {
310 return Val < V;
311 }
312
313private:
314 friend class SelectionDAG;
315 friend class SDNode;
316 // TODO: unfriend HandleSDNode once we fix its operand handling.
317 friend class HandleSDNode;
318
319 void setUser(SDNode *p) { User = p; }
320
321 /// Remove this use from its existing use list, assign it the
322 /// given value, and add it to the new value's node's use list.
323 inline void set(const SDValue &V);
324 /// Like set, but only supports initializing a newly-allocated
325 /// SDUse with a non-null value.
326 inline void setInitial(const SDValue &V);
327 /// Like set, but only sets the Node portion of the value,
328 /// leaving the ResNo portion unmodified.
329 inline void setNode(SDNode *N);
330
331 void addToList(SDUse **List) {
332 Next = *List;
333 if (Next) Next->Prev = &Next;
334 Prev = List;
335 *List = this;
336 }
337
338 void removeFromList() {
339 *Prev = Next;
340 if (Next) Next->Prev = Prev;
341 }
342};
343
344/// simplify_type specializations - Allow casting operators to work directly on
345/// SDValues as if they were SDNode*'s.
346template<> struct simplify_type<SDUse> {
347 using SimpleType = SDNode *;
348
349 static SimpleType getSimplifiedValue(SDUse &Val) {
350 return Val.getNode();
351 }
352};
353
354/// These are IR-level optimization flags that may be propagated to SDNodes.
355/// TODO: This data structure should be shared by the IR optimizer and the
356/// the backend.
357struct SDNodeFlags {
358private:
359 // This bit is used to determine if the flags are in a defined state.
360 // Flag bits can only be masked out during intersection if the masking flags
361 // are defined.
362 bool AnyDefined : 1;
363
364 bool NoUnsignedWrap : 1;
365 bool NoSignedWrap : 1;
366 bool Exact : 1;
367 bool NoNaNs : 1;
368 bool NoInfs : 1;
369 bool NoSignedZeros : 1;
370 bool AllowReciprocal : 1;
371 bool VectorReduction : 1;
372 bool AllowContract : 1;
373 bool ApproximateFuncs : 1;
374 bool AllowReassociation : 1;
375
376 // We assume instructions do not raise floating-point exceptions by default,
377 // and only those marked explicitly may do so. We could choose to represent
378 // this via a positive "FPExcept" flags like on the MI level, but having a
379 // negative "NoFPExcept" flag here (that defaults to true) makes the flag
380 // intersection logic more straightforward.
381 bool NoFPExcept : 1;
382
383public:
384 /// Default constructor turns off all optimization flags.
385 SDNodeFlags()
386 : AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false),
387 Exact(false), NoNaNs(false), NoInfs(false),
388 NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
389 AllowContract(false), ApproximateFuncs(false),
390 AllowReassociation(false), NoFPExcept(false) {}
391
392 /// Propagate the fast-math-flags from an IR FPMathOperator.
393 void copyFMF(const FPMathOperator &FPMO) {
394 setNoNaNs(FPMO.hasNoNaNs());
395 setNoInfs(FPMO.hasNoInfs());
396 setNoSignedZeros(FPMO.hasNoSignedZeros());
397 setAllowReciprocal(FPMO.hasAllowReciprocal());
398 setAllowContract(FPMO.hasAllowContract());
399 setApproximateFuncs(FPMO.hasApproxFunc());
400 setAllowReassociation(FPMO.hasAllowReassoc());
401 }
402
403 /// Sets the state of the flags to the defined state.
404 void setDefined() { AnyDefined = true; }
405 /// Returns true if the flags are in a defined state.
406 bool isDefined() const { return AnyDefined; }
407
408 // These are mutators for each flag.
409 void setNoUnsignedWrap(bool b) {
410 setDefined();
411 NoUnsignedWrap = b;
412 }
413 void setNoSignedWrap(bool b) {
414 setDefined();
415 NoSignedWrap = b;
416 }
417 void setExact(bool b) {
418 setDefined();
419 Exact = b;
420 }
421 void setNoNaNs(bool b) {
422 setDefined();
423 NoNaNs = b;
424 }
425 void setNoInfs(bool b) {
426 setDefined();
427 NoInfs = b;
428 }
429 void setNoSignedZeros(bool b) {
430 setDefined();
431 NoSignedZeros = b;
432 }
433 void setAllowReciprocal(bool b) {
434 setDefined();
435 AllowReciprocal = b;
436 }
437 void setVectorReduction(bool b) {
438 setDefined();
439 VectorReduction = b;
440 }
441 void setAllowContract(bool b) {
442 setDefined();
443 AllowContract = b;
444 }
445 void setApproximateFuncs(bool b) {
446 setDefined();
447 ApproximateFuncs = b;
448 }
449 void setAllowReassociation(bool b) {
450 setDefined();
451 AllowReassociation = b;
452 }
453 void setNoFPExcept(bool b) {
454 setDefined();
455 NoFPExcept = b;
456 }
457
458 // These are accessors for each flag.
459 bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
460 bool hasNoSignedWrap() const { return NoSignedWrap; }
461 bool hasExact() const { return Exact; }
462 bool hasNoNaNs() const { return NoNaNs; }
463 bool hasNoInfs() const { return NoInfs; }
464 bool hasNoSignedZeros() const { return NoSignedZeros; }
465 bool hasAllowReciprocal() const { return AllowReciprocal; }
466 bool hasVectorReduction() const { return VectorReduction; }
467 bool hasAllowContract() const { return AllowContract; }
468 bool hasApproximateFuncs() const { return ApproximateFuncs; }
469 bool hasAllowReassociation() const { return AllowReassociation; }
470 bool hasNoFPExcept() const { return NoFPExcept; }
471
472 /// Clear any flags in this flag set that aren't also set in Flags.
473 /// If the given Flags are undefined then don't do anything.
474 void intersectWith(const SDNodeFlags Flags) {
475 if (!Flags.isDefined())
476 return;
477 NoUnsignedWrap &= Flags.NoUnsignedWrap;
478 NoSignedWrap &= Flags.NoSignedWrap;
479 Exact &= Flags.Exact;
480 NoNaNs &= Flags.NoNaNs;
481 NoInfs &= Flags.NoInfs;
482 NoSignedZeros &= Flags.NoSignedZeros;
483 AllowReciprocal &= Flags.AllowReciprocal;
484 VectorReduction &= Flags.VectorReduction;
485 AllowContract &= Flags.AllowContract;
486 ApproximateFuncs &= Flags.ApproximateFuncs;
487 AllowReassociation &= Flags.AllowReassociation;
488 NoFPExcept &= Flags.NoFPExcept;
489 }
490};
491
492/// Represents one node in the SelectionDAG.
493///
494class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
495private:
496 /// The operation that this node performs.
497 int16_t NodeType;
498
499protected:
500 // We define a set of mini-helper classes to help us interpret the bits in our
501 // SubclassData. These are designed to fit within a uint16_t so they pack
502 // with NodeType.
503
504#if defined(_AIX) && (!defined(__GNUC__4) || defined(__ibmxl__))
505// Except for GCC; by default, AIX compilers store bit-fields in 4-byte words
506// and give the `pack` pragma push semantics.
507#define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")pack(2)
508#define END_TWO_BYTE_PACK() _Pragma("pack(pop)")pack(pop)
509#else
510#define BEGIN_TWO_BYTE_PACK()
511#define END_TWO_BYTE_PACK()
512#endif
513
514BEGIN_TWO_BYTE_PACK()
515 class SDNodeBitfields {
516 friend class SDNode;
517 friend class MemIntrinsicSDNode;
518 friend class MemSDNode;
519 friend class SelectionDAG;
520
521 uint16_t HasDebugValue : 1;
522 uint16_t IsMemIntrinsic : 1;
523 uint16_t IsDivergent : 1;
524 };
525 enum { NumSDNodeBits = 3 };
526
527 class ConstantSDNodeBitfields {
528 friend class ConstantSDNode;
529
530 uint16_t : NumSDNodeBits;
531
532 uint16_t IsOpaque : 1;
533 };
534
535 class MemSDNodeBitfields {
536 friend class MemSDNode;
537 friend class MemIntrinsicSDNode;
538 friend class AtomicSDNode;
539
540 uint16_t : NumSDNodeBits;
541
542 uint16_t IsVolatile : 1;
543 uint16_t IsNonTemporal : 1;
544 uint16_t IsDereferenceable : 1;
545 uint16_t IsInvariant : 1;
546 };
547 enum { NumMemSDNodeBits = NumSDNodeBits + 4 };
548
549 class LSBaseSDNodeBitfields {
550 friend class LSBaseSDNode;
551 friend class MaskedLoadStoreSDNode;
552 friend class MaskedGatherScatterSDNode;
553
554 uint16_t : NumMemSDNodeBits;
555
556 // This storage is shared between disparate class hierarchies to hold an
557 // enumeration specific to the class hierarchy in use.
558 // LSBaseSDNode => enum ISD::MemIndexedMode
559 // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode
560 // MaskedGatherScatterSDNode => enum ISD::MemIndexType
561 uint16_t AddressingMode : 3;
562 };
563 enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 };
564
565 class LoadSDNodeBitfields {
566 friend class LoadSDNode;
567 friend class MaskedLoadSDNode;
568
569 uint16_t : NumLSBaseSDNodeBits;
570
571 uint16_t ExtTy : 2; // enum ISD::LoadExtType
572 uint16_t IsExpanding : 1;
573 };
574
575 class StoreSDNodeBitfields {
576 friend class StoreSDNode;
577 friend class MaskedStoreSDNode;
578
579 uint16_t : NumLSBaseSDNodeBits;
580
581 uint16_t IsTruncating : 1;
582 uint16_t IsCompressing : 1;
583 };
584
585 union {
586 char RawSDNodeBits[sizeof(uint16_t)];
587 SDNodeBitfields SDNodeBits;
588 ConstantSDNodeBitfields ConstantSDNodeBits;
589 MemSDNodeBitfields MemSDNodeBits;
590 LSBaseSDNodeBitfields LSBaseSDNodeBits;
591 LoadSDNodeBitfields LoadSDNodeBits;
592 StoreSDNodeBitfields StoreSDNodeBits;
593 };
594END_TWO_BYTE_PACK()
595#undef BEGIN_TWO_BYTE_PACK
596#undef END_TWO_BYTE_PACK
597
598 // RawSDNodeBits must cover the entirety of the union. This means that all of
599 // the union's members must have size <= RawSDNodeBits. We write the RHS as
600 // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter.
601 static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide");
602 static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide");
603 static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide");
604 static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide");
605 static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide");
606 static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide");
607
608private:
609 friend class SelectionDAG;
610 // TODO: unfriend HandleSDNode once we fix its operand handling.
611 friend class HandleSDNode;
612
613 /// Unique id per SDNode in the DAG.
614 int NodeId = -1;
615
616 /// The values that are used by this operation.
617 SDUse *OperandList = nullptr;
618
619 /// The types of the values this node defines. SDNode's may
620 /// define multiple values simultaneously.
621 const EVT *ValueList;
622
623 /// List of uses for this SDNode.
624 SDUse *UseList = nullptr;
625
626 /// The number of entries in the Operand/Value list.
627 unsigned short NumOperands = 0;
628 unsigned short NumValues;
629
630 // The ordering of the SDNodes. It roughly corresponds to the ordering of the
631 // original LLVM instructions.
632 // This is used for turning off scheduling, because we'll forgo
633 // the normal scheduling algorithms and output the instructions according to
634 // this ordering.
635 unsigned IROrder;
636
637 /// Source line information.
638 DebugLoc debugLoc;
639
640 /// Return a pointer to the specified value type.
641 static const EVT *getValueTypeList(EVT VT);
642
643 SDNodeFlags Flags;
644
645public:
646 /// Unique and persistent id per SDNode in the DAG.
647 /// Used for debug printing.
648 uint16_t PersistentId;
649
650 //===--------------------------------------------------------------------===//
651 // Accessors
652 //
653
654 /// Return the SelectionDAG opcode value for this node. For
655 /// pre-isel nodes (those for which isMachineOpcode returns false), these
656 /// are the opcode values in the ISD and <target>ISD namespaces. For
657 /// post-isel opcodes, see getMachineOpcode.
658 unsigned getOpcode() const { return (unsigned short)NodeType; }
659
660 /// Test if this node has a target-specific opcode (in the
661 /// \<target\>ISD namespace).
662 bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
663
664 /// Test if this node has a target-specific opcode that may raise
665 /// FP exceptions (in the \<target\>ISD namespace and greater than
666 /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory
667 /// opcode are currently automatically considered to possibly raise
668 /// FP exceptions as well.
669 bool isTargetStrictFPOpcode() const {
670 return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE;
671 }
672
673 /// Test if this node has a target-specific
674 /// memory-referencing opcode (in the \<target\>ISD namespace and
675 /// greater than FIRST_TARGET_MEMORY_OPCODE).
676 bool isTargetMemoryOpcode() const {
677 return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
678 }
679
680 /// Return true if the type of the node type undefined.
681 bool isUndef() const { return NodeType == ISD::UNDEF; }
682
683 /// Test if this node is a memory intrinsic (with valid pointer information).
684 /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for
685 /// non-memory intrinsics (with chains) that are not really instances of
686 /// MemSDNode. For such nodes, we need some extra state to determine the
687 /// proper classof relationship.
688 bool isMemIntrinsic() const {
689 return (NodeType == ISD::INTRINSIC_W_CHAIN ||
690 NodeType == ISD::INTRINSIC_VOID) &&
691 SDNodeBits.IsMemIntrinsic;
692 }
693
694 /// Test if this node is a strict floating point pseudo-op.
695 bool isStrictFPOpcode() {
696 switch (NodeType) {
697 default:
698 return false;
699 case ISD::STRICT_FP16_TO_FP:
700 case ISD::STRICT_FP_TO_FP16:
701#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
702 case ISD::STRICT_##DAGN:
703#include "llvm/IR/ConstrainedOps.def"
704 return true;
705 }
706 }
707
708 /// Test if this node has a post-isel opcode, directly
709 /// corresponding to a MachineInstr opcode.
710 bool isMachineOpcode() const { return NodeType < 0; }
711
712 /// This may only be called if isMachineOpcode returns
713 /// true. It returns the MachineInstr opcode value that the node's opcode
714 /// corresponds to.
715 unsigned getMachineOpcode() const {
716 assert(isMachineOpcode() && "Not a MachineInstr opcode!")((isMachineOpcode() && "Not a MachineInstr opcode!") ?
static_cast<void> (0) : __assert_fail ("isMachineOpcode() && \"Not a MachineInstr opcode!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 716, __PRETTY_FUNCTION__))
;
717 return ~NodeType;
718 }
719
720 bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
721 void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
722
723 bool isDivergent() const { return SDNodeBits.IsDivergent; }
724
725 /// Return true if there are no uses of this node.
726 bool use_empty() const { return UseList == nullptr; }
727
728 /// Return true if there is exactly one use of this node.
729 bool hasOneUse() const {
730 return !use_empty() && std::next(use_begin()) == use_end();
731 }
732
733 /// Return the number of uses of this node. This method takes
734 /// time proportional to the number of uses.
735 size_t use_size() const { return std::distance(use_begin(), use_end()); }
736
737 /// Return the unique node id.
738 int getNodeId() const { return NodeId; }
739
740 /// Set unique node id.
741 void setNodeId(int Id) { NodeId = Id; }
742
743 /// Return the node ordering.
744 unsigned getIROrder() const { return IROrder; }
745
746 /// Set the node ordering.
747 void setIROrder(unsigned Order) { IROrder = Order; }
748
749 /// Return the source location info.
750 const DebugLoc &getDebugLoc() const { return debugLoc; }
751
752 /// Set source location info. Try to avoid this, putting
753 /// it in the constructor is preferable.
754 void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); }
755
756 /// This class provides iterator support for SDUse
757 /// operands that use a specific SDNode.
758 class use_iterator
759 : public std::iterator<std::forward_iterator_tag, SDUse, ptrdiff_t> {
760 friend class SDNode;
761
762 SDUse *Op = nullptr;
763
764 explicit use_iterator(SDUse *op) : Op(op) {}
765
766 public:
767 using reference = std::iterator<std::forward_iterator_tag,
768 SDUse, ptrdiff_t>::reference;
769 using pointer = std::iterator<std::forward_iterator_tag,
770 SDUse, ptrdiff_t>::pointer;
771
772 use_iterator() = default;
773 use_iterator(const use_iterator &I) : Op(I.Op) {}
774
775 bool operator==(const use_iterator &x) const {
776 return Op == x.Op;
777 }
778 bool operator!=(const use_iterator &x) const {
779 return !operator==(x);
780 }
781
782 /// Return true if this iterator is at the end of uses list.
783 bool atEnd() const { return Op == nullptr; }
784
785 // Iterator traversal: forward iteration only.
786 use_iterator &operator++() { // Preincrement
787 assert(Op && "Cannot increment end iterator!")((Op && "Cannot increment end iterator!") ? static_cast
<void> (0) : __assert_fail ("Op && \"Cannot increment end iterator!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 787, __PRETTY_FUNCTION__))
;
788 Op = Op->getNext();
789 return *this;
790 }
791
792 use_iterator operator++(int) { // Postincrement
793 use_iterator tmp = *this; ++*this; return tmp;
794 }
795
796 /// Retrieve a pointer to the current user node.
797 SDNode *operator*() const {
798 assert(Op && "Cannot dereference end iterator!")((Op && "Cannot dereference end iterator!") ? static_cast
<void> (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 798, __PRETTY_FUNCTION__))
;
799 return Op->getUser();
800 }
801
802 SDNode *operator->() const { return operator*(); }
803
804 SDUse &getUse() const { return *Op; }
805
806 /// Retrieve the operand # of this use in its user.
807 unsigned getOperandNo() const {
808 assert(Op && "Cannot dereference end iterator!")((Op && "Cannot dereference end iterator!") ? static_cast
<void> (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 808, __PRETTY_FUNCTION__))
;
809 return (unsigned)(Op - Op->getUser()->OperandList);
810 }
811 };
812
813 /// Provide iteration support to walk over all uses of an SDNode.
814 use_iterator use_begin() const {
815 return use_iterator(UseList);
816 }
817
818 static use_iterator use_end() { return use_iterator(nullptr); }
819
820 inline iterator_range<use_iterator> uses() {
821 return make_range(use_begin(), use_end());
822 }
823 inline iterator_range<use_iterator> uses() const {
824 return make_range(use_begin(), use_end());
825 }
826
827 /// Return true if there are exactly NUSES uses of the indicated value.
828 /// This method ignores uses of other values defined by this operation.
829 bool hasNUsesOfValue(unsigned NUses, unsigned Value) const;
830
831 /// Return true if there are any use of the indicated value.
832 /// This method ignores uses of other values defined by this operation.
833 bool hasAnyUseOfValue(unsigned Value) const;
834
835 /// Return true if this node is the only use of N.
836 bool isOnlyUserOf(const SDNode *N) const;
837
838 /// Return true if this node is an operand of N.
839 bool isOperandOf(const SDNode *N) const;
840
841 /// Return true if this node is a predecessor of N.
842 /// NOTE: Implemented on top of hasPredecessor and every bit as
843 /// expensive. Use carefully.
844 bool isPredecessorOf(const SDNode *N) const {
845 return N->hasPredecessor(this);
846 }
847
848 /// Return true if N is a predecessor of this node.
849 /// N is either an operand of this node, or can be reached by recursively
850 /// traversing up the operands.
851 /// NOTE: This is an expensive method. Use it carefully.
852 bool hasPredecessor(const SDNode *N) const;
853
854 /// Returns true if N is a predecessor of any node in Worklist. This
855 /// helper keeps Visited and Worklist sets externally to allow unions
856 /// searches to be performed in parallel, caching of results across
857 /// queries and incremental addition to Worklist. Stops early if N is
858 /// found but will resume. Remember to clear Visited and Worklists
859 /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before
860 /// giving up. The TopologicalPrune flag signals that positive NodeIds are
861 /// topologically ordered (Operands have strictly smaller node id) and search
862 /// can be pruned leveraging this.
863 static bool hasPredecessorHelper(const SDNode *N,
864 SmallPtrSetImpl<const SDNode *> &Visited,
865 SmallVectorImpl<const SDNode *> &Worklist,
866 unsigned int MaxSteps = 0,
867 bool TopologicalPrune = false) {
868 SmallVector<const SDNode *, 8> DeferredNodes;
869 if (Visited.count(N))
870 return true;
871
872 // Node Id's are assigned in three places: As a topological
873 // ordering (> 0), during legalization (results in values set to
874 // 0), new nodes (set to -1). If N has a topolgical id then we
875 // know that all nodes with ids smaller than it cannot be
876 // successors and we need not check them. Filter out all node
877 // that can't be matches. We add them to the worklist before exit
878 // in case of multiple calls. Note that during selection the topological id
879 // may be violated if a node's predecessor is selected before it. We mark
880 // this at selection negating the id of unselected successors and
881 // restricting topological pruning to positive ids.
882
883 int NId = N->getNodeId();
884 // If we Invalidated the Id, reconstruct original NId.
885 if (NId < -1)
886 NId = -(NId + 1);
887
888 bool Found = false;
889 while (!Worklist.empty()) {
890 const SDNode *M = Worklist.pop_back_val();
891 int MId = M->getNodeId();
892 if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) &&
893 (MId > 0) && (MId < NId)) {
894 DeferredNodes.push_back(M);
895 continue;
896 }
897 for (const SDValue &OpV : M->op_values()) {
898 SDNode *Op = OpV.getNode();
899 if (Visited.insert(Op).second)
900 Worklist.push_back(Op);
901 if (Op == N)
902 Found = true;
903 }
904 if (Found)
905 break;
906 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
907 break;
908 }
909 // Push deferred nodes back on worklist.
910 Worklist.append(DeferredNodes.begin(), DeferredNodes.end());
911 // If we bailed early, conservatively return found.
912 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
913 return true;
914 return Found;
915 }
916
917 /// Return true if all the users of N are contained in Nodes.
918 /// NOTE: Requires at least one match, but doesn't require them all.
919 static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N);
920
921 /// Return the number of values used by this operation.
922 unsigned getNumOperands() const { return NumOperands; }
923
924 /// Return the maximum number of operands that a SDNode can hold.
925 static constexpr size_t getMaxNumOperands() {
926 return std::numeric_limits<decltype(SDNode::NumOperands)>::max();
927 }
928
929 /// Helper method returns the integer value of a ConstantSDNode operand.
930 inline uint64_t getConstantOperandVal(unsigned Num) const;
931
932 /// Helper method returns the APInt of a ConstantSDNode operand.
933 inline const APInt &getConstantOperandAPInt(unsigned Num) const;
934
935 const SDValue &getOperand(unsigned Num) const {
936 assert(Num < NumOperands && "Invalid child # of SDNode!")((Num < NumOperands && "Invalid child # of SDNode!"
) ? static_cast<void> (0) : __assert_fail ("Num < NumOperands && \"Invalid child # of SDNode!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 936, __PRETTY_FUNCTION__))
;
937 return OperandList[Num];
938 }
939
940 using op_iterator = SDUse *;
941
942 op_iterator op_begin() const { return OperandList; }
943 op_iterator op_end() const { return OperandList+NumOperands; }
944 ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); }
945
946 /// Iterator for directly iterating over the operand SDValue's.
947 struct value_op_iterator
948 : iterator_adaptor_base<value_op_iterator, op_iterator,
949 std::random_access_iterator_tag, SDValue,
950 ptrdiff_t, value_op_iterator *,
951 value_op_iterator *> {
952 explicit value_op_iterator(SDUse *U = nullptr)
953 : iterator_adaptor_base(U) {}
954
955 const SDValue &operator*() const { return I->get(); }
956 };
957
958 iterator_range<value_op_iterator> op_values() const {
959 return make_range(value_op_iterator(op_begin()),
960 value_op_iterator(op_end()));
961 }
962
963 SDVTList getVTList() const {
964 SDVTList X = { ValueList, NumValues };
965 return X;
966 }
967
968 /// If this node has a glue operand, return the node
969 /// to which the glue operand points. Otherwise return NULL.
970 SDNode *getGluedNode() const {
971 if (getNumOperands() != 0 &&
972 getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
973 return getOperand(getNumOperands()-1).getNode();
974 return nullptr;
975 }
976
977 /// If this node has a glue value with a user, return
978 /// the user (there is at most one). Otherwise return NULL.
979 SDNode *getGluedUser() const {
980 for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI)
981 if (UI.getUse().get().getValueType() == MVT::Glue)
982 return *UI;
983 return nullptr;
984 }
985
986 const SDNodeFlags getFlags() const { return Flags; }
987 void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }
988
989 /// Clear any flags in this node that aren't also set in Flags.
990 /// If Flags is not in a defined state then this has no effect.
991 void intersectFlagsWith(const SDNodeFlags Flags);
992
993 /// Return the number of values defined/returned by this operator.
994 unsigned getNumValues() const { return NumValues; }
995
996 /// Return the type of a specified result.
997 EVT getValueType(unsigned ResNo) const {
998 assert(ResNo < NumValues && "Illegal result number!")((ResNo < NumValues && "Illegal result number!") ?
static_cast<void> (0) : __assert_fail ("ResNo < NumValues && \"Illegal result number!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 998, __PRETTY_FUNCTION__))
;
999 return ValueList[ResNo];
1000 }
1001
1002 /// Return the type of a specified result as a simple type.
1003 MVT getSimpleValueType(unsigned ResNo) const {
1004 return getValueType(ResNo).getSimpleVT();
1005 }
1006
1007 /// Returns MVT::getSizeInBits(getValueType(ResNo)).
1008 ///
1009 /// If the value type is a scalable vector type, the scalable property will
1010 /// be set and the runtime size will be a positive integer multiple of the
1011 /// base size.
1012 TypeSize getValueSizeInBits(unsigned ResNo) const {
1013 return getValueType(ResNo).getSizeInBits();
1014 }
1015
1016 using value_iterator = const EVT *;
1017
1018 value_iterator value_begin() const { return ValueList; }
1019 value_iterator value_end() const { return ValueList+NumValues; }
1020 iterator_range<value_iterator> values() const {
1021 return llvm::make_range(value_begin(), value_end());
1022 }
1023
1024 /// Return the opcode of this operation for printing.
1025 std::string getOperationName(const SelectionDAG *G = nullptr) const;
1026 static const char* getIndexedModeName(ISD::MemIndexedMode AM);
1027 void print_types(raw_ostream &OS, const SelectionDAG *G) const;
1028 void print_details(raw_ostream &OS, const SelectionDAG *G) const;
1029 void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
1030 void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
1031
1032 /// Print a SelectionDAG node and all children down to
1033 /// the leaves. The given SelectionDAG allows target-specific nodes
1034 /// to be printed in human-readable form. Unlike printr, this will
1035 /// print the whole DAG, including children that appear multiple
1036 /// times.
1037 ///
1038 void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const;
1039
1040 /// Print a SelectionDAG node and children up to
1041 /// depth "depth." The given SelectionDAG allows target-specific
1042 /// nodes to be printed in human-readable form. Unlike printr, this
1043 /// will print children that appear multiple times wherever they are
1044 /// used.
1045 ///
1046 void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr,
1047 unsigned depth = 100) const;
1048
1049 /// Dump this node, for debugging.
1050 void dump() const;
1051
1052 /// Dump (recursively) this node and its use-def subgraph.
1053 void dumpr() const;
1054
1055 /// Dump this node, for debugging.
1056 /// The given SelectionDAG allows target-specific nodes to be printed
1057 /// in human-readable form.
1058 void dump(const SelectionDAG *G) const;
1059
1060 /// Dump (recursively) this node and its use-def subgraph.
1061 /// The given SelectionDAG allows target-specific nodes to be printed
1062 /// in human-readable form.
1063 void dumpr(const SelectionDAG *G) const;
1064
1065 /// printrFull to dbgs(). The given SelectionDAG allows
1066 /// target-specific nodes to be printed in human-readable form.
1067 /// Unlike dumpr, this will print the whole DAG, including children
1068 /// that appear multiple times.
1069 void dumprFull(const SelectionDAG *G = nullptr) const;
1070
1071 /// printrWithDepth to dbgs(). The given
1072 /// SelectionDAG allows target-specific nodes to be printed in
1073 /// human-readable form. Unlike dumpr, this will print children
1074 /// that appear multiple times wherever they are used.
1075 ///
1076 void dumprWithDepth(const SelectionDAG *G = nullptr,
1077 unsigned depth = 100) const;
1078
1079 /// Gather unique data for the node.
1080 void Profile(FoldingSetNodeID &ID) const;
1081
1082 /// This method should only be used by the SDUse class.
1083 void addUse(SDUse &U) { U.addToList(&UseList); }
1084
1085protected:
1086 static SDVTList getSDVTList(EVT VT) {
1087 SDVTList Ret = { getValueTypeList(VT), 1 };
1088 return Ret;
1089 }
1090
1091 /// Create an SDNode.
1092 ///
1093 /// SDNodes are created without any operands, and never own the operand
1094 /// storage. To add operands, see SelectionDAG::createOperands.
1095 SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs)
1096 : NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs),
1097 IROrder(Order), debugLoc(std::move(dl)) {
1098 memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits));
1099 assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor")((debugLoc.hasTrivialDestructor() && "Expected trivial destructor"
) ? static_cast<void> (0) : __assert_fail ("debugLoc.hasTrivialDestructor() && \"Expected trivial destructor\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1099, __PRETTY_FUNCTION__))
;
1100 assert(NumValues == VTs.NumVTs &&((NumValues == VTs.NumVTs && "NumValues wasn't wide enough for its operands!"
) ? static_cast<void> (0) : __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1101, __PRETTY_FUNCTION__))
1101 "NumValues wasn't wide enough for its operands!")((NumValues == VTs.NumVTs && "NumValues wasn't wide enough for its operands!"
) ? static_cast<void> (0) : __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1101, __PRETTY_FUNCTION__))
;
1102 }
1103
1104 /// Release the operands and set this node to have zero operands.
1105 void DropOperands();
1106};
1107
1108/// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed
1109/// into SDNode creation functions.
1110/// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted
1111/// from the original Instruction, and IROrder is the ordinal position of
1112/// the instruction.
1113/// When an SDNode is created after the DAG is being built, both DebugLoc and
1114/// the IROrder are propagated from the original SDNode.
1115/// So SDLoc class provides two constructors besides the default one, one to
1116/// be used by the DAGBuilder, the other to be used by others.
1117class SDLoc {
1118private:
1119 DebugLoc DL;
1120 int IROrder = 0;
1121
1122public:
1123 SDLoc() = default;
1124 SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {}
1125 SDLoc(const SDValue V) : SDLoc(V.getNode()) {}
1126 SDLoc(const Instruction *I, int Order) : IROrder(Order) {
1127 assert(Order >= 0 && "bad IROrder")((Order >= 0 && "bad IROrder") ? static_cast<void
> (0) : __assert_fail ("Order >= 0 && \"bad IROrder\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1127, __PRETTY_FUNCTION__))
;
1128 if (I)
1129 DL = I->getDebugLoc();
1130 }
1131
1132 unsigned getIROrder() const { return IROrder; }
1133 const DebugLoc &getDebugLoc() const { return DL; }
1134};
1135
1136// Define inline functions from the SDValue class.
1137
1138inline SDValue::SDValue(SDNode *node, unsigned resno)
1139 : Node(node), ResNo(resno) {
1140 // Explicitly check for !ResNo to avoid use-after-free, because there are
1141 // callers that use SDValue(N, 0) with a deleted N to indicate successful
1142 // combines.
1143 assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&(((!Node || !ResNo || ResNo < Node->getNumValues()) &&
"Invalid result number for the given node!") ? static_cast<
void> (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1144, __PRETTY_FUNCTION__))
1144 "Invalid result number for the given node!")(((!Node || !ResNo || ResNo < Node->getNumValues()) &&
"Invalid result number for the given node!") ? static_cast<
void> (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1144, __PRETTY_FUNCTION__))
;
1145 assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.")((ResNo < -2U && "Cannot use result numbers reserved for DenseMaps."
) ? static_cast<void> (0) : __assert_fail ("ResNo < -2U && \"Cannot use result numbers reserved for DenseMaps.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1145, __PRETTY_FUNCTION__))
;
1146}
1147
1148inline unsigned SDValue::getOpcode() const {
1149 return Node->getOpcode();
1150}
1151
1152inline EVT SDValue::getValueType() const {
1153 return Node->getValueType(ResNo);
1154}
1155
1156inline unsigned SDValue::getNumOperands() const {
1157 return Node->getNumOperands();
1158}
1159
1160inline const SDValue &SDValue::getOperand(unsigned i) const {
1161 return Node->getOperand(i);
1162}
1163
1164inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
1165 return Node->getConstantOperandVal(i);
1166}
1167
1168inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const {
1169 return Node->getConstantOperandAPInt(i);
1170}
1171
1172inline bool SDValue::isTargetOpcode() const {
1173 return Node->isTargetOpcode();
1174}
1175
1176inline bool SDValue::isTargetMemoryOpcode() const {
1177 return Node->isTargetMemoryOpcode();
1178}
1179
1180inline bool SDValue::isMachineOpcode() const {
1181 return Node->isMachineOpcode();
1182}
1183
1184inline unsigned SDValue::getMachineOpcode() const {
1185 return Node->getMachineOpcode();
1186}
1187
1188inline bool SDValue::isUndef() const {
1189 return Node->isUndef();
1190}
1191
1192inline bool SDValue::use_empty() const {
1193 return !Node->hasAnyUseOfValue(ResNo);
1194}
1195
1196inline bool SDValue::hasOneUse() const {
1197 return Node->hasNUsesOfValue(1, ResNo);
1198}
1199
1200inline const DebugLoc &SDValue::getDebugLoc() const {
1201 return Node->getDebugLoc();
1202}
1203
1204inline void SDValue::dump() const {
1205 return Node->dump();
1206}
1207
1208inline void SDValue::dump(const SelectionDAG *G) const {
1209 return Node->dump(G);
1210}
1211
1212inline void SDValue::dumpr() const {
1213 return Node->dumpr();
1214}
1215
1216inline void SDValue::dumpr(const SelectionDAG *G) const {
1217 return Node->dumpr(G);
1218}
1219
1220// Define inline functions from the SDUse class.
1221
1222inline void SDUse::set(const SDValue &V) {
1223 if (Val.getNode()) removeFromList();
1224 Val = V;
1225 if (V.getNode()) V.getNode()->addUse(*this);
1226}
1227
1228inline void SDUse::setInitial(const SDValue &V) {
1229 Val = V;
1230 V.getNode()->addUse(*this);
1231}
1232
1233inline void SDUse::setNode(SDNode *N) {
1234 if (Val.getNode()) removeFromList();
1235 Val.setNode(N);
1236 if (N) N->addUse(*this);
1237}
1238
1239/// This class is used to form a handle around another node that
1240/// is persistent and is updated across invocations of replaceAllUsesWith on its
1241/// operand. This node should be directly created by end-users and not added to
1242/// the AllNodes list.
1243class HandleSDNode : public SDNode {
1244 SDUse Op;
1245
1246public:
1247 explicit HandleSDNode(SDValue X)
1248 : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
1249 // HandleSDNodes are never inserted into the DAG, so they won't be
1250 // auto-numbered. Use ID 65535 as a sentinel.
1251 PersistentId = 0xffff;
1252
1253 // Manually set up the operand list. This node type is special in that it's
1254 // always stack allocated and SelectionDAG does not manage its operands.
1255 // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not
1256 // be so special.
1257 Op.setUser(this);
1258 Op.setInitial(X);
1259 NumOperands = 1;
1260 OperandList = &Op;
1261 }
1262 ~HandleSDNode();
1263
1264 const SDValue &getValue() const { return Op; }
1265};
1266
1267class AddrSpaceCastSDNode : public SDNode {
1268private:
1269 unsigned SrcAddrSpace;
1270 unsigned DestAddrSpace;
1271
1272public:
1273 AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT,
1274 unsigned SrcAS, unsigned DestAS);
1275
1276 unsigned getSrcAddressSpace() const { return SrcAddrSpace; }
1277 unsigned getDestAddressSpace() const { return DestAddrSpace; }
1278
1279 static bool classof(const SDNode *N) {
1280 return N->getOpcode() == ISD::ADDRSPACECAST;
1281 }
1282};
1283
1284/// This is an abstract virtual class for memory operations.
1285class MemSDNode : public SDNode {
1286private:
1287 // VT of in-memory value.
1288 EVT MemoryVT;
1289
1290protected:
1291 /// Memory reference information.
1292 MachineMemOperand *MMO;
1293
1294public:
1295 MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs,
1296 EVT memvt, MachineMemOperand *MMO);
1297
1298 bool readMem() const { return MMO->isLoad(); }
1299 bool writeMem() const { return MMO->isStore(); }
1300
1301 /// Returns alignment and volatility of the memory access
1302 unsigned getOriginalAlignment() const {
1303 return MMO->getBaseAlignment();
1304 }
1305 unsigned getAlignment() const {
1306 return MMO->getAlignment();
9
Called C++ object pointer is null
1307 }
1308
1309 /// Return the SubclassData value, without HasDebugValue. This contains an
1310 /// encoding of the volatile flag, as well as bits used by subclasses. This
1311 /// function should only be used to compute a FoldingSetNodeID value.
1312 /// The HasDebugValue bit is masked out because CSE map needs to match
1313 /// nodes with debug info with nodes without debug info. Same is about
1314 /// isDivergent bit.
1315 unsigned getRawSubclassData() const {
1316 uint16_t Data;
1317 union {
1318 char RawSDNodeBits[sizeof(uint16_t)];
1319 SDNodeBitfields SDNodeBits;
1320 };
1321 memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits));
1322 SDNodeBits.HasDebugValue = 0;
1323 SDNodeBits.IsDivergent = false;
1324 memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits));
1325 return Data;
1326 }
1327
1328 bool isVolatile() const { return MemSDNodeBits.IsVolatile; }
1329 bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; }
1330 bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; }
1331 bool isInvariant() const { return MemSDNodeBits.IsInvariant; }
1332
1333 // Returns the offset from the location of the access.
1334 int64_t getSrcValueOffset() const { return MMO->getOffset(); }
1335
1336 /// Returns the AA info that describes the dereference.
1337 AAMDNodes getAAInfo() const { return MMO->getAAInfo(); }
1338
1339 /// Returns the Ranges that describes the dereference.
1340 const MDNode *getRanges() const { return MMO->getRanges(); }
1341
1342 /// Returns the synchronization scope ID for this memory operation.
1343 SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); }
1344
1345 /// Return the atomic ordering requirements for this memory operation. For
1346 /// cmpxchg atomic operations, return the atomic ordering requirements when
1347 /// store occurs.
1348 AtomicOrdering getOrdering() const { return MMO->getOrdering(); }
1349
1350 /// Return true if the memory operation ordering is Unordered or higher.
1351 bool isAtomic() const { return MMO->isAtomic(); }
1352
1353 /// Returns true if the memory operation doesn't imply any ordering
1354 /// constraints on surrounding memory operations beyond the normal memory
1355 /// aliasing rules.
1356 bool isUnordered() const { return MMO->isUnordered(); }
1357
1358 /// Returns true if the memory operation is neither atomic or volatile.
1359 bool isSimple() const { return !isAtomic() && !isVolatile(); }
1360
1361 /// Return the type of the in-memory value.
1362 EVT getMemoryVT() const { return MemoryVT; }
1363
1364 /// Return a MachineMemOperand object describing the memory
1365 /// reference performed by operation.
1366 MachineMemOperand *getMemOperand() const { return MMO; }
1367
1368 const MachinePointerInfo &getPointerInfo() const {
1369 return MMO->getPointerInfo();
1370 }
1371
1372 /// Return the address space for the associated pointer
1373 unsigned getAddressSpace() const {
1374 return getPointerInfo().getAddrSpace();
1375 }
1376
1377 /// Update this MemSDNode's MachineMemOperand information
1378 /// to reflect the alignment of NewMMO, if it has a greater alignment.
1379 /// This must only be used when the new alignment applies to all users of
1380 /// this MachineMemOperand.
1381 void refineAlignment(const MachineMemOperand *NewMMO) {
1382 MMO->refineAlignment(NewMMO);
1383 }
1384
1385 const SDValue &getChain() const { return getOperand(0); }
1386 const SDValue &getBasePtr() const {
1387 return getOperand(getOpcode() == ISD::STORE ? 2 : 1);
1388 }
1389
1390 // Methods to support isa and dyn_cast
1391 static bool classof(const SDNode *N) {
1392 // For some targets, we lower some target intrinsics to a MemIntrinsicNode
1393 // with either an intrinsic or a target opcode.
1394 return N->getOpcode() == ISD::LOAD ||
1395 N->getOpcode() == ISD::STORE ||
1396 N->getOpcode() == ISD::PREFETCH ||
1397 N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
1398 N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
1399 N->getOpcode() == ISD::ATOMIC_SWAP ||
1400 N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
1401 N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
1402 N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
1403 N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
1404 N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
1405 N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
1406 N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
1407 N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
1408 N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
1409 N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
1410 N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
1411 N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
1412 N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
1413 N->getOpcode() == ISD::ATOMIC_LOAD ||
1414 N->getOpcode() == ISD::ATOMIC_STORE ||
1415 N->getOpcode() == ISD::MLOAD ||
1416 N->getOpcode() == ISD::MSTORE ||
1417 N->getOpcode() == ISD::MGATHER ||
1418 N->getOpcode() == ISD::MSCATTER ||
1419 N->isMemIntrinsic() ||
1420 N->isTargetMemoryOpcode();
1421 }
1422};
1423
1424/// This is an SDNode representing atomic operations.
1425class AtomicSDNode : public MemSDNode {
1426public:
1427 AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
1428 EVT MemVT, MachineMemOperand *MMO)
1429 : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
1430 assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||((((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE
) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? static_cast<void> (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1431, __PRETTY_FUNCTION__))
1431 MMO->isAtomic()) && "then why are we using an AtomicSDNode?")((((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE
) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? static_cast<void> (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1431, __PRETTY_FUNCTION__))
;
1432 }
1433
1434 const SDValue &getBasePtr() const { return getOperand(1); }
1435 const SDValue &getVal() const { return getOperand(2); }
1436
1437 /// Returns true if this SDNode represents cmpxchg atomic operation, false
1438 /// otherwise.
1439 bool isCompareAndSwap() const {
1440 unsigned Op = getOpcode();
1441 return Op == ISD::ATOMIC_CMP_SWAP ||
1442 Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS;
1443 }
1444
1445 /// For cmpxchg atomic operations, return the atomic ordering requirements
1446 /// when store does not occur.
1447 AtomicOrdering getFailureOrdering() const {
1448 assert(isCompareAndSwap() && "Must be cmpxchg operation")((isCompareAndSwap() && "Must be cmpxchg operation") ?
static_cast<void> (0) : __assert_fail ("isCompareAndSwap() && \"Must be cmpxchg operation\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1448, __PRETTY_FUNCTION__))
;
1449 return MMO->getFailureOrdering();
1450 }
1451
1452 // Methods to support isa and dyn_cast
1453 static bool classof(const SDNode *N) {
1454 return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
1455 N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
1456 N->getOpcode() == ISD::ATOMIC_SWAP ||
1457 N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
1458 N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
1459 N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
1460 N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
1461 N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
1462 N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
1463 N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
1464 N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
1465 N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
1466 N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
1467 N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
1468 N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
1469 N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
1470 N->getOpcode() == ISD::ATOMIC_LOAD ||
1471 N->getOpcode() == ISD::ATOMIC_STORE;
1472 }
1473};
1474
1475/// This SDNode is used for target intrinsics that touch
1476/// memory and need an associated MachineMemOperand. Its opcode may be
1477/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode
1478/// with a value not less than FIRST_TARGET_MEMORY_OPCODE.
1479class MemIntrinsicSDNode : public MemSDNode {
1480public:
1481 MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
1482 SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO)
1483 : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) {
1484 SDNodeBits.IsMemIntrinsic = true;
1485 }
1486
1487 // Methods to support isa and dyn_cast
1488 static bool classof(const SDNode *N) {
1489 // We lower some target intrinsics to their target opcode
1490 // early a node with a target opcode can be of this class
1491 return N->isMemIntrinsic() ||
1492 N->getOpcode() == ISD::PREFETCH ||
1493 N->isTargetMemoryOpcode();
1494 }
1495};
1496
1497/// This SDNode is used to implement the code generator
1498/// support for the llvm IR shufflevector instruction. It combines elements
1499/// from two input vectors into a new input vector, with the selection and
1500/// ordering of elements determined by an array of integers, referred to as
1501/// the shuffle mask. For input vectors of width N, mask indices of 0..N-1
1502/// refer to elements from the LHS input, and indices from N to 2N-1 the RHS.
1503/// An index of -1 is treated as undef, such that the code generator may put
1504/// any value in the corresponding element of the result.
1505class ShuffleVectorSDNode : public SDNode {
1506 // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and
1507 // is freed when the SelectionDAG object is destroyed.
1508 const int *Mask;
1509
1510protected:
1511 friend class SelectionDAG;
1512
1513 ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M)
1514 : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {}
1515
1516public:
1517 ArrayRef<int> getMask() const {
1518 EVT VT = getValueType(0);
1519 return makeArrayRef(Mask, VT.getVectorNumElements());
1520 }
1521
1522 int getMaskElt(unsigned Idx) const {
1523 assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!")((Idx < getValueType(0).getVectorNumElements() && "Idx out of range!"
) ? static_cast<void> (0) : __assert_fail ("Idx < getValueType(0).getVectorNumElements() && \"Idx out of range!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1523, __PRETTY_FUNCTION__))
;
1524 return Mask[Idx];
1525 }
1526
1527 bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
1528
1529 int getSplatIndex() const {
1530 assert(isSplat() && "Cannot get splat index for non-splat!")((isSplat() && "Cannot get splat index for non-splat!"
) ? static_cast<void> (0) : __assert_fail ("isSplat() && \"Cannot get splat index for non-splat!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1530, __PRETTY_FUNCTION__))
;
1531 EVT VT = getValueType(0);
1532 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
1533 if (Mask[i] >= 0)
1534 return Mask[i];
1535
1536 // We can choose any index value here and be correct because all elements
1537 // are undefined. Return 0 for better potential for callers to simplify.
1538 return 0;
1539 }
1540
1541 static bool isSplatMask(const int *Mask, EVT VT);
1542
1543 /// Change values in a shuffle permute mask assuming
1544 /// the two vector operands have swapped position.
1545 static void commuteMask(MutableArrayRef<int> Mask) {
1546 unsigned NumElems = Mask.size();
1547 for (unsigned i = 0; i != NumElems; ++i) {
1548 int idx = Mask[i];
1549 if (idx < 0)
1550 continue;
1551 else if (idx < (int)NumElems)
1552 Mask[i] = idx + NumElems;
1553 else
1554 Mask[i] = idx - NumElems;
1555 }
1556 }
1557
1558 static bool classof(const SDNode *N) {
1559 return N->getOpcode() == ISD::VECTOR_SHUFFLE;
1560 }
1561};
1562
1563class ConstantSDNode : public SDNode {
1564 friend class SelectionDAG;
1565
1566 const ConstantInt *Value;
1567
1568 ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT)
1569 : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(),
1570 getSDVTList(VT)),
1571 Value(val) {
1572 ConstantSDNodeBits.IsOpaque = isOpaque;
1573 }
1574
1575public:
1576 const ConstantInt *getConstantIntValue() const { return Value; }
1577 const APInt &getAPIntValue() const { return Value->getValue(); }
1578 uint64_t getZExtValue() const { return Value->getZExtValue(); }
1579 int64_t getSExtValue() const { return Value->getSExtValue(); }
1580 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) {
1581 return Value->getLimitedValue(Limit);
1582 }
1583
1584 bool isOne() const { return Value->isOne(); }
1585 bool isNullValue() const { return Value->isZero(); }
1586 bool isAllOnesValue() const { return Value->isMinusOne(); }
1587
1588 bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; }
1589
1590 static bool classof(const SDNode *N) {
1591 return N->getOpcode() == ISD::Constant ||
1592 N->getOpcode() == ISD::TargetConstant;
1593 }
1594};
1595
1596uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
1597 return cast<ConstantSDNode>(getOperand(Num))->getZExtValue();
1598}
1599
1600const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const {
1601 return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue();
1602}
1603
1604class ConstantFPSDNode : public SDNode {
1605 friend class SelectionDAG;
1606
1607 const ConstantFP *Value;
1608
1609 ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT)
1610 : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0,
1611 DebugLoc(), getSDVTList(VT)),
1612 Value(val) {}
1613
1614public:
1615 const APFloat& getValueAPF() const { return Value->getValueAPF(); }
1616 const ConstantFP *getConstantFPValue() const { return Value; }
1617
1618 /// Return true if the value is positive or negative zero.
1619 bool isZero() const { return Value->isZero(); }
1620
1621 /// Return true if the value is a NaN.
1622 bool isNaN() const { return Value->isNaN(); }
1623
1624 /// Return true if the value is an infinity
1625 bool isInfinity() const { return Value->isInfinity(); }
1626
1627 /// Return true if the value is negative.
1628 bool isNegative() const { return Value->isNegative(); }
1629
1630 /// We don't rely on operator== working on double values, as
1631 /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
1632 /// As such, this method can be used to do an exact bit-for-bit comparison of
1633 /// two floating point values.
1634
1635 /// We leave the version with the double argument here because it's just so
1636 /// convenient to write "2.0" and the like. Without this function we'd
1637 /// have to duplicate its logic everywhere it's called.
1638 bool isExactlyValue(double V) const {
1639 return Value->getValueAPF().isExactlyValue(V);
1640 }
1641 bool isExactlyValue(const APFloat& V) const;
1642
1643 static bool isValueValidForType(EVT VT, const APFloat& Val);
1644
1645 static bool classof(const SDNode *N) {
1646 return N->getOpcode() == ISD::ConstantFP ||
1647 N->getOpcode() == ISD::TargetConstantFP;
1648 }
1649};
1650
1651/// Returns true if \p V is a constant integer zero.
1652bool isNullConstant(SDValue V);
1653
1654/// Returns true if \p V is an FP constant with a value of positive zero.
1655bool isNullFPConstant(SDValue V);
1656
1657/// Returns true if \p V is an integer constant with all bits set.
1658bool isAllOnesConstant(SDValue V);
1659
1660/// Returns true if \p V is a constant integer one.
1661bool isOneConstant(SDValue V);
1662
1663/// Return the non-bitcasted source operand of \p V if it exists.
1664/// If \p V is not a bitcasted value, it is returned as-is.
1665SDValue peekThroughBitcasts(SDValue V);
1666
1667/// Return the non-bitcasted and one-use source operand of \p V if it exists.
1668/// If \p V is not a bitcasted one-use value, it is returned as-is.
1669SDValue peekThroughOneUseBitcasts(SDValue V);
1670
1671/// Return the non-extracted vector source operand of \p V if it exists.
1672/// If \p V is not an extracted subvector, it is returned as-is.
1673SDValue peekThroughExtractSubvectors(SDValue V);
1674
1675/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
1676/// constant is canonicalized to be operand 1.
1677bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
1678
1679/// Returns the SDNode if it is a constant splat BuildVector or constant int.
1680ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false,
1681 bool AllowTruncation = false);
1682
1683/// Returns the SDNode if it is a demanded constant splat BuildVector or
1684/// constant int.
1685ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
1686 bool AllowUndefs = false,
1687 bool AllowTruncation = false);
1688
1689/// Returns the SDNode if it is a constant splat BuildVector or constant float.
1690ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
1691
1692/// Returns the SDNode if it is a demanded constant splat BuildVector or
1693/// constant float.
1694ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts,
1695 bool AllowUndefs = false);
1696
1697/// Return true if the value is a constant 0 integer or a splatted vector of
1698/// a constant 0 integer (with no undefs by default).
1699/// Build vector implicit truncation is not an issue for null values.
1700bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false);
1701
1702/// Return true if the value is a constant 1 integer or a splatted vector of a
1703/// constant 1 integer (with no undefs).
1704/// Does not permit build vector implicit truncation.
1705bool isOneOrOneSplat(SDValue V);
1706
1707/// Return true if the value is a constant -1 integer or a splatted vector of a
1708/// constant -1 integer (with no undefs).
1709/// Does not permit build vector implicit truncation.
1710bool isAllOnesOrAllOnesSplat(SDValue V);
1711
1712class GlobalAddressSDNode : public SDNode {
1713 friend class SelectionDAG;
1714
1715 const GlobalValue *TheGlobal;
1716 int64_t Offset;
1717 unsigned TargetFlags;
1718
1719 GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
1720 const GlobalValue *GA, EVT VT, int64_t o,
1721 unsigned TF);
1722
1723public:
1724 const GlobalValue *getGlobal() const { return TheGlobal; }
1725 int64_t getOffset() const { return Offset; }
1726 unsigned getTargetFlags() const { return TargetFlags; }
1727 // Return the address space this GlobalAddress belongs to.
1728 unsigned getAddressSpace() const;
1729
1730 static bool classof(const SDNode *N) {
1731 return N->getOpcode() == ISD::GlobalAddress ||
1732 N->getOpcode() == ISD::TargetGlobalAddress ||
1733 N->getOpcode() == ISD::GlobalTLSAddress ||
1734 N->getOpcode() == ISD::TargetGlobalTLSAddress;
1735 }
1736};
1737
1738class FrameIndexSDNode : public SDNode {
1739 friend class SelectionDAG;
1740
1741 int FI;
1742
1743 FrameIndexSDNode(int fi, EVT VT, bool isTarg)
1744 : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
1745 0, DebugLoc(), getSDVTList(VT)), FI(fi) {
1746 }
1747
1748public:
1749 int getIndex() const { return FI; }
1750
1751 static bool classof(const SDNode *N) {
1752 return N->getOpcode() == ISD::FrameIndex ||
1753 N->getOpcode() == ISD::TargetFrameIndex;
1754 }
1755};
1756
1757/// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate
1758/// the offet and size that are started/ended in the underlying FrameIndex.
1759class LifetimeSDNode : public SDNode {
1760 friend class SelectionDAG;
1761 int64_t Size;
1762 int64_t Offset; // -1 if offset is unknown.
1763
1764 LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1765 SDVTList VTs, int64_t Size, int64_t Offset)
1766 : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {}
1767public:
1768 int64_t getFrameIndex() const {
1769 return cast<FrameIndexSDNode>(getOperand(1))->getIndex();
1770 }
1771
1772 bool hasOffset() const { return Offset >= 0; }
1773 int64_t getOffset() const {
1774 assert(hasOffset() && "offset is unknown")((hasOffset() && "offset is unknown") ? static_cast<
void> (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1774, __PRETTY_FUNCTION__))
;
1775 return Offset;
1776 }
1777 int64_t getSize() const {
1778 assert(hasOffset() && "offset is unknown")((hasOffset() && "offset is unknown") ? static_cast<
void> (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1778, __PRETTY_FUNCTION__))
;
1779 return Size;
1780 }
1781
1782 // Methods to support isa and dyn_cast
1783 static bool classof(const SDNode *N) {
1784 return N->getOpcode() == ISD::LIFETIME_START ||
1785 N->getOpcode() == ISD::LIFETIME_END;
1786 }
1787};
1788
1789class JumpTableSDNode : public SDNode {
1790 friend class SelectionDAG;
1791
1792 int JTI;
1793 unsigned TargetFlags;
1794
1795 JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF)
1796 : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
1797 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
1798 }
1799
1800public:
1801 int getIndex() const { return JTI; }
1802 unsigned getTargetFlags() const { return TargetFlags; }
1803
1804 static bool classof(const SDNode *N) {
1805 return N->getOpcode() == ISD::JumpTable ||
1806 N->getOpcode() == ISD::TargetJumpTable;
1807 }
1808};
1809
1810class ConstantPoolSDNode : public SDNode {
1811 friend class SelectionDAG;
1812
1813 union {
1814 const Constant *ConstVal;
1815 MachineConstantPoolValue *MachineCPVal;
1816 } Val;
1817 int Offset; // It's a MachineConstantPoolValue if top bit is set.
1818 unsigned Alignment; // Minimum alignment requirement of CP (not log2 value).
1819 unsigned TargetFlags;
1820
1821 ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
1822 unsigned Align, unsigned TF)
1823 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1824 DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align),
1825 TargetFlags(TF) {
1826 assert(Offset >= 0 && "Offset is too large")((Offset >= 0 && "Offset is too large") ? static_cast
<void> (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1826, __PRETTY_FUNCTION__))
;
1827 Val.ConstVal = c;
1828 }
1829
1830 ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v,
1831 EVT VT, int o, unsigned Align, unsigned TF)
1832 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1833 DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align),
1834 TargetFlags(TF) {
1835 assert(Offset >= 0 && "Offset is too large")((Offset >= 0 && "Offset is too large") ? static_cast
<void> (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1835, __PRETTY_FUNCTION__))
;
1836 Val.MachineCPVal = v;
1837 Offset |= 1 << (sizeof(unsigned)*CHAR_BIT8-1);
1838 }
1839
1840public:
1841 bool isMachineConstantPoolEntry() const {
1842 return Offset < 0;
1843 }
1844
1845 const Constant *getConstVal() const {
1846 assert(!isMachineConstantPoolEntry() && "Wrong constantpool type")((!isMachineConstantPoolEntry() && "Wrong constantpool type"
) ? static_cast<void> (0) : __assert_fail ("!isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1846, __PRETTY_FUNCTION__))
;
1847 return Val.ConstVal;
1848 }
1849
1850 MachineConstantPoolValue *getMachineCPVal() const {
1851 assert(isMachineConstantPoolEntry() && "Wrong constantpool type")((isMachineConstantPoolEntry() && "Wrong constantpool type"
) ? static_cast<void> (0) : __assert_fail ("isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1851, __PRETTY_FUNCTION__))
;
1852 return Val.MachineCPVal;
1853 }
1854
1855 int getOffset() const {
1856 return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT8-1));
1857 }
1858
1859 // Return the alignment of this constant pool object, which is either 0 (for
1860 // default alignment) or the desired value.
1861 unsigned getAlignment() const { return Alignment; }
1862 unsigned getTargetFlags() const { return TargetFlags; }
1863
1864 Type *getType() const;
1865
1866 static bool classof(const SDNode *N) {
1867 return N->getOpcode() == ISD::ConstantPool ||
1868 N->getOpcode() == ISD::TargetConstantPool;
1869 }
1870};
1871
1872/// Completely target-dependent object reference.
1873class TargetIndexSDNode : public SDNode {
1874 friend class SelectionDAG;
1875
1876 unsigned TargetFlags;
1877 int Index;
1878 int64_t Offset;
1879
1880public:
1881 TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF)
1882 : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
1883 TargetFlags(TF), Index(Idx), Offset(Ofs) {}
1884
1885 unsigned getTargetFlags() const { return TargetFlags; }
1886 int getIndex() const { return Index; }
1887 int64_t getOffset() const { return Offset; }
1888
1889 static bool classof(const SDNode *N) {
1890 return N->getOpcode() == ISD::TargetIndex;
1891 }
1892};
1893
1894class BasicBlockSDNode : public SDNode {
1895 friend class SelectionDAG;
1896
1897 MachineBasicBlock *MBB;
1898
1899 /// Debug info is meaningful and potentially useful here, but we create
1900 /// blocks out of order when they're jumped to, which makes it a bit
1901 /// harder. Let's see if we need it first.
1902 explicit BasicBlockSDNode(MachineBasicBlock *mbb)
1903 : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb)
1904 {}
1905
1906public:
1907 MachineBasicBlock *getBasicBlock() const { return MBB; }
1908
1909 static bool classof(const SDNode *N) {
1910 return N->getOpcode() == ISD::BasicBlock;
1911 }
1912};
1913
1914/// A "pseudo-class" with methods for operating on BUILD_VECTORs.
1915class BuildVectorSDNode : public SDNode {
1916public:
1917 // These are constructed as SDNodes and then cast to BuildVectorSDNodes.
1918 explicit BuildVectorSDNode() = delete;
1919
1920 /// Check if this is a constant splat, and if so, find the
1921 /// smallest element size that splats the vector. If MinSplatBits is
1922 /// nonzero, the element size must be at least that large. Note that the
1923 /// splat element may be the entire vector (i.e., a one element vector).
1924 /// Returns the splat element value in SplatValue. Any undefined bits in
1925 /// that value are zero, and the corresponding bits in the SplatUndef mask
1926 /// are set. The SplatBitSize value is set to the splat element size in
1927 /// bits. HasAnyUndefs is set to true if any bits in the vector are
1928 /// undefined. isBigEndian describes the endianness of the target.
1929 bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
1930 unsigned &SplatBitSize, bool &HasAnyUndefs,
1931 unsigned MinSplatBits = 0,
1932 bool isBigEndian = false) const;
1933
1934 /// Returns the demanded splatted value or a null value if this is not a
1935 /// splat.
1936 ///
1937 /// The DemandedElts mask indicates the elements that must be in the splat.
1938 /// If passed a non-null UndefElements bitvector, it will resize it to match
1939 /// the vector width and set the bits where elements are undef.
1940 SDValue getSplatValue(const APInt &DemandedElts,
1941 BitVector *UndefElements = nullptr) const;
1942
1943 /// Returns the splatted value or a null value if this is not a splat.
1944 ///
1945 /// If passed a non-null UndefElements bitvector, it will resize it to match
1946 /// the vector width and set the bits where elements are undef.
1947 SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
1948
1949 /// Returns the demanded splatted constant or null if this is not a constant
1950 /// splat.
1951 ///
1952 /// The DemandedElts mask indicates the elements that must be in the splat.
1953 /// If passed a non-null UndefElements bitvector, it will resize it to match
1954 /// the vector width and set the bits where elements are undef.
1955 ConstantSDNode *
1956 getConstantSplatNode(const APInt &DemandedElts,
1957 BitVector *UndefElements = nullptr) const;
1958
1959 /// Returns the splatted constant or null if this is not a constant
1960 /// splat.
1961 ///
1962 /// If passed a non-null UndefElements bitvector, it will resize it to match
1963 /// the vector width and set the bits where elements are undef.
1964 ConstantSDNode *
1965 getConstantSplatNode(BitVector *UndefElements = nullptr) const;
1966
1967 /// Returns the demanded splatted constant FP or null if this is not a
1968 /// constant FP splat.
1969 ///
1970 /// The DemandedElts mask indicates the elements that must be in the splat.
1971 /// If passed a non-null UndefElements bitvector, it will resize it to match
1972 /// the vector width and set the bits where elements are undef.
1973 ConstantFPSDNode *
1974 getConstantFPSplatNode(const APInt &DemandedElts,
1975 BitVector *UndefElements = nullptr) const;
1976
1977 /// Returns the splatted constant FP or null if this is not a constant
1978 /// FP splat.
1979 ///
1980 /// If passed a non-null UndefElements bitvector, it will resize it to match
1981 /// the vector width and set the bits where elements are undef.
1982 ConstantFPSDNode *
1983 getConstantFPSplatNode(BitVector *UndefElements = nullptr) const;
1984
1985 /// If this is a constant FP splat and the splatted constant FP is an
1986 /// exact power or 2, return the log base 2 integer value. Otherwise,
1987 /// return -1.
1988 ///
1989 /// The BitWidth specifies the necessary bit precision.
1990 int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
1991 uint32_t BitWidth) const;
1992
1993 bool isConstant() const;
1994
1995 static bool classof(const SDNode *N) {
1996 return N->getOpcode() == ISD::BUILD_VECTOR;
1997 }
1998};
1999
2000/// An SDNode that holds an arbitrary LLVM IR Value. This is
2001/// used when the SelectionDAG needs to make a simple reference to something
2002/// in the LLVM IR representation.
2003///
2004class SrcValueSDNode : public SDNode {
2005 friend class SelectionDAG;
2006
2007 const Value *V;
2008
2009 /// Create a SrcValue for a general value.
2010 explicit SrcValueSDNode(const Value *v)
2011 : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
2012
2013public:
2014 /// Return the contained Value.
2015 const Value *getValue() const { return V; }
2016
2017 static bool classof(const SDNode *N) {
2018 return N->getOpcode() == ISD::SRCVALUE;
2019 }
2020};
2021
2022class MDNodeSDNode : public SDNode {
2023 friend class SelectionDAG;
2024
2025 const MDNode *MD;
2026
2027 explicit MDNodeSDNode(const MDNode *md)
2028 : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md)
2029 {}
2030
2031public:
2032 const MDNode *getMD() const { return MD; }
2033
2034 static bool classof(const SDNode *N) {
2035 return N->getOpcode() == ISD::MDNODE_SDNODE;
2036 }
2037};
2038
2039class RegisterSDNode : public SDNode {
2040 friend class SelectionDAG;
2041
2042 unsigned Reg;
2043
2044 RegisterSDNode(unsigned reg, EVT VT)
2045 : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}
2046
2047public:
2048 unsigned getReg() const { return Reg; }
2049
2050 static bool classof(const SDNode *N) {
2051 return N->getOpcode() == ISD::Register;
2052 }
2053};
2054
2055class RegisterMaskSDNode : public SDNode {
2056 friend class SelectionDAG;
2057
2058 // The memory for RegMask is not owned by the node.
2059 const uint32_t *RegMask;
2060
2061 RegisterMaskSDNode(const uint32_t *mask)
2062 : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)),
2063 RegMask(mask) {}
2064
2065public:
2066 const uint32_t *getRegMask() const { return RegMask; }
2067
2068 static bool classof(const SDNode *N) {
2069 return N->getOpcode() == ISD::RegisterMask;
2070 }
2071};
2072
2073class BlockAddressSDNode : public SDNode {
2074 friend class SelectionDAG;
2075
2076 const BlockAddress *BA;
2077 int64_t Offset;
2078 unsigned TargetFlags;
2079
2080 BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
2081 int64_t o, unsigned Flags)
2082 : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
2083 BA(ba), Offset(o), TargetFlags(Flags) {}
2084
2085public:
2086 const BlockAddress *getBlockAddress() const { return BA; }
2087 int64_t getOffset() const { return Offset; }
2088 unsigned getTargetFlags() const { return TargetFlags; }
2089
2090 static bool classof(const SDNode *N) {
2091 return N->getOpcode() == ISD::BlockAddress ||
2092 N->getOpcode() == ISD::TargetBlockAddress;
2093 }
2094};
2095
2096class LabelSDNode : public SDNode {
2097 friend class SelectionDAG;
2098
2099 MCSymbol *Label;
2100
2101 LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L)
2102 : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) {
2103 assert(LabelSDNode::classof(this) && "not a label opcode")((LabelSDNode::classof(this) && "not a label opcode")
? static_cast<void> (0) : __assert_fail ("LabelSDNode::classof(this) && \"not a label opcode\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2103, __PRETTY_FUNCTION__))
;
2104 }
2105
2106public:
2107 MCSymbol *getLabel() const { return Label; }
2108
2109 static bool classof(const SDNode *N) {
2110 return N->getOpcode() == ISD::EH_LABEL ||
2111 N->getOpcode() == ISD::ANNOTATION_LABEL;
2112 }
2113};
2114
2115class ExternalSymbolSDNode : public SDNode {
2116 friend class SelectionDAG;
2117
2118 const char *Symbol;
2119 unsigned TargetFlags;
2120
2121 ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT)
2122 : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0,
2123 DebugLoc(), getSDVTList(VT)),
2124 Symbol(Sym), TargetFlags(TF) {}
2125
2126public:
2127 const char *getSymbol() const { return Symbol; }
2128 unsigned getTargetFlags() const { return TargetFlags; }
2129
2130 static bool classof(const SDNode *N) {
2131 return N->getOpcode() == ISD::ExternalSymbol ||
2132 N->getOpcode() == ISD::TargetExternalSymbol;
2133 }
2134};
2135
2136class MCSymbolSDNode : public SDNode {
2137 friend class SelectionDAG;
2138
2139 MCSymbol *Symbol;
2140
2141 MCSymbolSDNode(MCSymbol *Symbol, EVT VT)
2142 : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {}
2143
2144public:
2145 MCSymbol *getMCSymbol() const { return Symbol; }
2146
2147 static bool classof(const SDNode *N) {
2148 return N->getOpcode() == ISD::MCSymbol;
2149 }
2150};
2151
2152class CondCodeSDNode : public SDNode {
2153 friend class SelectionDAG;
2154
2155 ISD::CondCode Condition;
2156
2157 explicit CondCodeSDNode(ISD::CondCode Cond)
2158 : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2159 Condition(Cond) {}
2160
2161public:
2162 ISD::CondCode get() const { return Condition; }
2163
2164 static bool classof(const SDNode *N) {
2165 return N->getOpcode() == ISD::CONDCODE;
2166 }
2167};
2168
2169/// This class is used to represent EVT's, which are used
2170/// to parameterize some operations.
2171class VTSDNode : public SDNode {
2172 friend class SelectionDAG;
2173
2174 EVT ValueType;
2175
2176 explicit VTSDNode(EVT VT)
2177 : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2178 ValueType(VT) {}
2179
2180public:
2181 EVT getVT() const { return ValueType; }
2182
2183 static bool classof(const SDNode *N) {
2184 return N->getOpcode() == ISD::VALUETYPE;
2185 }
2186};
2187
2188/// Base class for LoadSDNode and StoreSDNode
2189class LSBaseSDNode : public MemSDNode {
2190public:
2191 LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
2192 SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
2193 MachineMemOperand *MMO)
2194 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2195 LSBaseSDNodeBits.AddressingMode = AM;
2196 assert(getAddressingMode() == AM && "Value truncated")((getAddressingMode() == AM && "Value truncated") ? static_cast
<void> (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2196, __PRETTY_FUNCTION__))
;
2197 }
2198
2199 const SDValue &getOffset() const {
2200 return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
2201 }
2202
2203 /// Return the addressing mode for this load or store:
2204 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2205 ISD::MemIndexedMode getAddressingMode() const {
2206 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2207 }
2208
2209 /// Return true if this is a pre/post inc/dec load/store.
2210 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2211
2212 /// Return true if this is NOT a pre/post inc/dec load/store.
2213 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2214
2215 static bool classof(const SDNode *N) {
2216 return N->getOpcode() == ISD::LOAD ||
2217 N->getOpcode() == ISD::STORE;
2218 }
2219};
2220
2221/// This class is used to represent ISD::LOAD nodes.
2222class LoadSDNode : public LSBaseSDNode {
2223 friend class SelectionDAG;
2224
2225 LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2226 ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
2227 MachineMemOperand *MMO)
2228 : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) {
2229 LoadSDNodeBits.ExtTy = ETy;
2230 assert(readMem() && "Load MachineMemOperand is not a load!")((readMem() && "Load MachineMemOperand is not a load!"
) ? static_cast<void> (0) : __assert_fail ("readMem() && \"Load MachineMemOperand is not a load!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2230, __PRETTY_FUNCTION__))
;
2231 assert(!writeMem() && "Load MachineMemOperand is a store!")((!writeMem() && "Load MachineMemOperand is a store!"
) ? static_cast<void> (0) : __assert_fail ("!writeMem() && \"Load MachineMemOperand is a store!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2231, __PRETTY_FUNCTION__))
;
2232 }
2233
2234public:
2235 /// Return whether this is a plain node,
2236 /// or one of the varieties of value-extending loads.
2237 ISD::LoadExtType getExtensionType() const {
2238 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2239 }
2240
2241 const SDValue &getBasePtr() const { return getOperand(1); }
2242 const SDValue &getOffset() const { return getOperand(2); }
2243
2244 static bool classof(const SDNode *N) {
2245 return N->getOpcode() == ISD::LOAD;
2246 }
2247};
2248
2249/// This class is used to represent ISD::STORE nodes.
2250class StoreSDNode : public LSBaseSDNode {
2251 friend class SelectionDAG;
2252
2253 StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2254 ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
2255 MachineMemOperand *MMO)
2256 : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) {
2257 StoreSDNodeBits.IsTruncating = isTrunc;
2258 assert(!readMem() && "Store MachineMemOperand is a load!")((!readMem() && "Store MachineMemOperand is a load!")
? static_cast<void> (0) : __assert_fail ("!readMem() && \"Store MachineMemOperand is a load!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2258, __PRETTY_FUNCTION__))
;
2259 assert(writeMem() && "Store MachineMemOperand is not a store!")((writeMem() && "Store MachineMemOperand is not a store!"
) ? static_cast<void> (0) : __assert_fail ("writeMem() && \"Store MachineMemOperand is not a store!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2259, __PRETTY_FUNCTION__))
;
2260 }
2261
2262public:
2263 /// Return true if the op does a truncation before store.
2264 /// For integers this is the same as doing a TRUNCATE and storing the result.
2265 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2266 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2267 void setTruncatingStore(bool Truncating) {
2268 StoreSDNodeBits.IsTruncating = Truncating;
2269 }
2270
2271 const SDValue &getValue() const { return getOperand(1); }
2272 const SDValue &getBasePtr() const { return getOperand(2); }
2273 const SDValue &getOffset() const { return getOperand(3); }
2274
2275 static bool classof(const SDNode *N) {
2276 return N->getOpcode() == ISD::STORE;
2277 }
2278};
2279
2280/// This base class is used to represent MLOAD and MSTORE nodes
2281class MaskedLoadStoreSDNode : public MemSDNode {
2282public:
2283 friend class SelectionDAG;
2284
2285 MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
2286 const DebugLoc &dl, SDVTList VTs,
2287 ISD::MemIndexedMode AM, EVT MemVT,
2288 MachineMemOperand *MMO)
2289 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2290 LSBaseSDNodeBits.AddressingMode = AM;
2291 assert(getAddressingMode() == AM && "Value truncated")((getAddressingMode() == AM && "Value truncated") ? static_cast
<void> (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2291, __PRETTY_FUNCTION__))
;
2292 }
2293
2294 // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru)
2295 // MaskedStoreSDNode (Chain, data, ptr, offset, mask)
2296 // Mask is a vector of i1 elements
2297 const SDValue &getBasePtr() const {
2298 return getOperand(getOpcode() == ISD::MLOAD ? 1 : 2);
2299 }
2300 const SDValue &getOffset() const {
2301 return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
2302 }
2303 const SDValue &getMask() const {
2304 return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4);
2305 }
2306
2307 /// Return the addressing mode for this load or store:
2308 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2309 ISD::MemIndexedMode getAddressingMode() const {
2310 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2311 }
2312
2313 /// Return true if this is a pre/post inc/dec load/store.
2314 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2315
2316 /// Return true if this is NOT a pre/post inc/dec load/store.
2317 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2318
2319 static bool classof(const SDNode *N) {
2320 return N->getOpcode() == ISD::MLOAD ||
2321 N->getOpcode() == ISD::MSTORE;
2322 }
2323};
2324
2325/// This class is used to represent an MLOAD node
2326class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
2327public:
2328 friend class SelectionDAG;
2329
2330 MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2331 ISD::MemIndexedMode AM, ISD::LoadExtType ETy,
2332 bool IsExpanding, EVT MemVT, MachineMemOperand *MMO)
2333 : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) {
2334 LoadSDNodeBits.ExtTy = ETy;
2335 LoadSDNodeBits.IsExpanding = IsExpanding;
2336 }
2337
2338 ISD::LoadExtType getExtensionType() const {
2339 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2340 }
2341
2342 const SDValue &getBasePtr() const { return getOperand(1); }
2343 const SDValue &getOffset() const { return getOperand(2); }
2344 const SDValue &getMask() const { return getOperand(3); }
2345 const SDValue &getPassThru() const { return getOperand(4); }
2346
2347 static bool classof(const SDNode *N) {
2348 return N->getOpcode() == ISD::MLOAD;
2349 }
2350
2351 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2352};
2353
2354/// This class is used to represent an MSTORE node
2355class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
2356public:
2357 friend class SelectionDAG;
2358
2359 MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2360 ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
2361 EVT MemVT, MachineMemOperand *MMO)
2362 : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) {
2363 StoreSDNodeBits.IsTruncating = isTrunc;
2364 StoreSDNodeBits.IsCompressing = isCompressing;
2365 }
2366
2367 /// Return true if the op does a truncation before store.
2368 /// For integers this is the same as doing a TRUNCATE and storing the result.
2369 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2370 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2371
2372 /// Returns true if the op does a compression to the vector before storing.
2373 /// The node contiguously stores the active elements (integers or floats)
2374 /// in src (those with their respective bit set in writemask k) to unaligned
2375 /// memory at base_addr.
2376 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2377
2378 const SDValue &getValue() const { return getOperand(1); }
2379 const SDValue &getBasePtr() const { return getOperand(2); }
2380 const SDValue &getOffset() const { return getOperand(3); }
2381 const SDValue &getMask() const { return getOperand(4); }
2382
2383 static bool classof(const SDNode *N) {
2384 return N->getOpcode() == ISD::MSTORE;
2385 }
2386};
2387
2388/// This is a base class used to represent
2389/// MGATHER and MSCATTER nodes
2390///
2391class MaskedGatherScatterSDNode : public MemSDNode {
2392public:
2393 friend class SelectionDAG;
2394
2395 MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
2396 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2397 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2398 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2399 LSBaseSDNodeBits.AddressingMode = IndexType;
2400 assert(getIndexType() == IndexType && "Value truncated")((getIndexType() == IndexType && "Value truncated") ?
static_cast<void> (0) : __assert_fail ("getIndexType() == IndexType && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2400, __PRETTY_FUNCTION__))
;
2401 }
2402
2403 /// How is Index applied to BasePtr when computing addresses.
2404 ISD::MemIndexType getIndexType() const {
2405 return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
2406 }
2407 bool isIndexScaled() const {
2408 return (getIndexType() == ISD::SIGNED_SCALED) ||
2409 (getIndexType() == ISD::UNSIGNED_SCALED);
2410 }
2411 bool isIndexSigned() const {
2412 return (getIndexType() == ISD::SIGNED_SCALED) ||
2413 (getIndexType() == ISD::SIGNED_UNSCALED);
2414 }
2415
2416 // In the both nodes address is Op1, mask is Op2:
2417 // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale)
2418 // MaskedScatterSDNode (Chain, value, mask, base, index, scale)
2419 // Mask is a vector of i1 elements
2420 const SDValue &getBasePtr() const { return getOperand(3); }
2421 const SDValue &getIndex() const { return getOperand(4); }
2422 const SDValue &getMask() const { return getOperand(2); }
2423 const SDValue &getScale() const { return getOperand(5); }
2424
2425 static bool classof(const SDNode *N) {
2426 return N->getOpcode() == ISD::MGATHER ||
2427 N->getOpcode() == ISD::MSCATTER;
2428 }
2429};
2430
2431/// This class is used to represent an MGATHER node
2432///
2433class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
2434public:
2435 friend class SelectionDAG;
2436
2437 MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2438 EVT MemVT, MachineMemOperand *MMO,
2439 ISD::MemIndexType IndexType)
2440 : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO,
2441 IndexType) {}
2442
2443 const SDValue &getPassThru() const { return getOperand(1); }
2444
2445 static bool classof(const SDNode *N) {
2446 return N->getOpcode() == ISD::MGATHER;
2447 }
2448};
2449
2450/// This class is used to represent an MSCATTER node
2451///
2452class MaskedScatterSDNode : public MaskedGatherScatterSDNode {
2453public:
2454 friend class SelectionDAG;
2455
2456 MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2457 EVT MemVT, MachineMemOperand *MMO,
2458 ISD::MemIndexType IndexType)
2459 : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO,
2460 IndexType) {}
2461
2462 const SDValue &getValue() const { return getOperand(1); }
2463
2464 static bool classof(const SDNode *N) {
2465 return N->getOpcode() == ISD::MSCATTER;
2466 }
2467};
2468
2469/// An SDNode that represents everything that will be needed
2470/// to construct a MachineInstr. These nodes are created during the
2471/// instruction selection proper phase.
2472///
2473/// Note that the only supported way to set the `memoperands` is by calling the
2474/// `SelectionDAG::setNodeMemRefs` function as the memory management happens
2475/// inside the DAG rather than in the node.
2476class MachineSDNode : public SDNode {
2477private:
2478 friend class SelectionDAG;
2479
2480 MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
2481 : SDNode(Opc, Order, DL, VTs) {}
2482
2483 // We use a pointer union between a single `MachineMemOperand` pointer and
2484 // a pointer to an array of `MachineMemOperand` pointers. This is null when
2485 // the number of these is zero, the single pointer variant used when the
2486 // number is one, and the array is used for larger numbers.
2487 //
2488 // The array is allocated via the `SelectionDAG`'s allocator and so will
2489 // always live until the DAG is cleaned up and doesn't require ownership here.
2490 //
2491 // We can't use something simpler like `TinyPtrVector` here because `SDNode`
2492 // subclasses aren't managed in a conforming C++ manner. See the comments on
2493 // `SelectionDAG::MorphNodeTo` which details what all goes on, but the
2494 // constraint here is that these don't manage memory with their constructor or
2495 // destructor and can be initialized to a good state even if they start off
2496 // uninitialized.
2497 PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {};
2498
2499 // Note that this could be folded into the above `MemRefs` member if doing so
2500 // is advantageous at some point. We don't need to store this in most cases.
2501 // However, at the moment this doesn't appear to make the allocation any
2502 // smaller and makes the code somewhat simpler to read.
2503 int NumMemRefs = 0;
2504
2505public:
2506 using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator;
2507
2508 ArrayRef<MachineMemOperand *> memoperands() const {
2509 // Special case the common cases.
2510 if (NumMemRefs == 0)
2511 return {};
2512 if (NumMemRefs == 1)
2513 return makeArrayRef(MemRefs.getAddrOfPtr1(), 1);
2514
2515 // Otherwise we have an actual array.
2516 return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs);
2517 }
2518 mmo_iterator memoperands_begin() const { return memoperands().begin(); }
2519 mmo_iterator memoperands_end() const { return memoperands().end(); }
2520 bool memoperands_empty() const { return memoperands().empty(); }
2521
2522 /// Clear out the memory reference descriptor list.
2523 void clearMemRefs() {
2524 MemRefs = nullptr;
2525 NumMemRefs = 0;
2526 }
2527
2528 static bool classof(const SDNode *N) {
2529 return N->isMachineOpcode();
2530 }
2531};
2532
2533class SDNodeIterator : public std::iterator<std::forward_iterator_tag,
2534 SDNode, ptrdiff_t> {
2535 const SDNode *Node;
2536 unsigned Operand;
2537
2538 SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {}
2539
2540public:
2541 bool operator==(const SDNodeIterator& x) const {
2542 return Operand == x.Operand;
2543 }
2544 bool operator!=(const SDNodeIterator& x) const { return !operator==(x); }
2545
2546 pointer operator*() const {
2547 return Node->getOperand(Operand).getNode();
2548 }
2549 pointer operator->() const { return operator*(); }
2550
2551 SDNodeIterator& operator++() { // Preincrement
2552 ++Operand;
2553 return *this;
2554 }
2555 SDNodeIterator operator++(int) { // Postincrement
2556 SDNodeIterator tmp = *this; ++*this; return tmp;
2557 }
2558 size_t operator-(SDNodeIterator Other) const {
2559 assert(Node == Other.Node &&((Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? static_cast<void> (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2560, __PRETTY_FUNCTION__))
2560 "Cannot compare iterators of two different nodes!")((Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? static_cast<void> (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2560, __PRETTY_FUNCTION__))
;
2561 return Operand - Other.Operand;
2562 }
2563
2564 static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); }
2565 static SDNodeIterator end (const SDNode *N) {
2566 return SDNodeIterator(N, N->getNumOperands());
2567 }
2568
2569 unsigned getOperand() const { return Operand; }
2570 const SDNode *getNode() const { return Node; }
2571};
2572
2573template <> struct GraphTraits<SDNode*> {
2574 using NodeRef = SDNode *;
2575 using ChildIteratorType = SDNodeIterator;
2576
2577 static NodeRef getEntryNode(SDNode *N) { return N; }
2578
2579 static ChildIteratorType child_begin(NodeRef N) {
2580 return SDNodeIterator::begin(N);
2581 }
2582
2583 static ChildIteratorType child_end(NodeRef N) {
2584 return SDNodeIterator::end(N);
2585 }
2586};
2587
2588/// A representation of the largest SDNode, for use in sizeof().
2589///
2590/// This needs to be a union because the largest node differs on 32 bit systems
2591/// with 4 and 8 byte pointer alignment, respectively.
2592using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
2593 BlockAddressSDNode,
2594 GlobalAddressSDNode>;
2595
2596/// The SDNode class with the greatest alignment requirement.
2597using MostAlignedSDNode = GlobalAddressSDNode;
2598
2599namespace ISD {
2600
2601 /// Returns true if the specified node is a non-extending and unindexed load.
2602 inline bool isNormalLoad(const SDNode *N) {
2603 const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
2604 return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD &&
2605 Ld->getAddressingMode() == ISD::UNINDEXED;
2606 }
2607
2608 /// Returns true if the specified node is a non-extending load.
2609 inline bool isNON_EXTLoad(const SDNode *N) {
2610 return isa<LoadSDNode>(N) &&
2611 cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
2612 }
2613
2614 /// Returns true if the specified node is a EXTLOAD.
2615 inline bool isEXTLoad(const SDNode *N) {
2616 return isa<LoadSDNode>(N) &&
2617 cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
2618 }
2619
2620 /// Returns true if the specified node is a SEXTLOAD.
2621 inline bool isSEXTLoad(const SDNode *N) {
2622 return isa<LoadSDNode>(N) &&
2623 cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
2624 }
2625
2626 /// Returns true if the specified node is a ZEXTLOAD.
2627 inline bool isZEXTLoad(const SDNode *N) {
2628 return isa<LoadSDNode>(N) &&
2629 cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
2630 }
2631
2632 /// Returns true if the specified node is an unindexed load.
2633 inline bool isUNINDEXEDLoad(const SDNode *N) {
2634 return isa<LoadSDNode>(N) &&
2635 cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
2636 }
2637
2638 /// Returns true if the specified node is a non-truncating
2639 /// and unindexed store.
2640 inline bool isNormalStore(const SDNode *N) {
2641 const StoreSDNode *St = dyn_cast<StoreSDNode>(N);
2642 return St && !St->isTruncatingStore() &&
2643 St->getAddressingMode() == ISD::UNINDEXED;
2644 }
2645
2646 /// Returns true if the specified node is a non-truncating store.
2647 inline bool isNON_TRUNCStore(const SDNode *N) {
2648 return isa<StoreSDNode>(N) && !cast<StoreSDNode>(N)->isTruncatingStore();
2649 }
2650
2651 /// Returns true if the specified node is a truncating store.
2652 inline bool isTRUNCStore(const SDNode *N) {
2653 return isa<StoreSDNode>(N) && cast<StoreSDNode>(N)->isTruncatingStore();
2654 }
2655
2656 /// Returns true if the specified node is an unindexed store.
2657 inline bool isUNINDEXEDStore(const SDNode *N) {
2658 return isa<StoreSDNode>(N) &&
2659 cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
2660 }
2661
2662 /// Attempt to match a unary predicate against a scalar/splat constant or
2663 /// every element of a constant BUILD_VECTOR.
2664 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
2665 bool matchUnaryPredicate(SDValue Op,
2666 std::function<bool(ConstantSDNode *)> Match,
2667 bool AllowUndefs = false);
2668
2669 /// Attempt to match a binary predicate against a pair of scalar/splat
2670 /// constants or every element of a pair of constant BUILD_VECTORs.
2671 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
2672 /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match.
2673 bool matchBinaryPredicate(
2674 SDValue LHS, SDValue RHS,
2675 std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
2676 bool AllowUndefs = false, bool AllowTypeMismatch = false);
2677
2678 /// Returns true if the specified value is the overflow result from one
2679 /// of the overflow intrinsic nodes.
2680 inline bool isOverflowIntrOpRes(SDValue Op) {
2681 unsigned Opc = Op.getOpcode();
2682 return (Op.getResNo() == 1 &&
2683 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
2684 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
2685 }
2686
2687} // end namespace ISD
2688
2689} // end namespace llvm
2690
2691#endif // LLVM_CODEGEN_SELECTIONDAGNODES_H